scripts/decodetree.py

   1 #!/usr/bin/env python3
   2 # Copyright (c) 2018 Linaro Limited
   3 #
   4 # This library is free software; you can redistribute it and/or
   5 # modify it under the terms of the GNU Lesser General Public
   6 # License as published by the Free Software Foundation; either
   7 # version 2.1 of the License, or (at your option) any later version.
   8 #
   9 # This library is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12 # Lesser General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Lesser General Public
  15 # License along with this library; if not, see <http://www.gnu.org/licenses/>.
  16 #
  17
  18 #
  19 # Generate a decoding tree from a specification file.
  20 # See the syntax and semantics in docs/devel/decodetree.rst.
  21 #
  22
  23 import io
  24 import os
  25 import re
  26 import sys
  27 import getopt
  28
  29 insnwidth = 32
  30 bitop_width = 32
  31 insnmask = 0xffffffff
  32 variablewidth = False
  33 fields = {}
  34 arguments = {}
  35 formats = {}
  36 allpatterns = []
  37 anyextern = False
  38 testforerror = False
  39
  40 translate_prefix = 'trans'
  41 translate_scope = 'static '
  42 input_file = ''
  43 output_file = None
  44 output_fd = None
  45 output_null = False
  46 insntype = 'uint32_t'
  47 decode_function = 'decode'
  48
  49 # An identifier for C.
  50 re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
  51
  52 # Identifiers for Arguments, Fields, Formats and Patterns.
  53 re_arg_ident = '&[a-zA-Z0-9_]*'
  54 re_fld_ident = '%[a-zA-Z0-9_]*'
  55 re_fmt_ident = '@[a-zA-Z0-9_]*'
  56 re_pat_ident = '[a-zA-Z0-9_]*'
  57
  58 # Local implementation of a topological sort. We use the same API that
  59 # the Python graphlib does, so that when QEMU moves forward to a
  60 # baseline of Python 3.9 or newer this code can all be dropped and
  61 # replaced with:
  62 #    from graphlib import TopologicalSorter, CycleError
  63 #
  64 # https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter
  65 #
  66 # We only implement the parts of TopologicalSorter we care about:
  67 #  ts = TopologicalSorter(graph=None)
  68 #    create the sorter. graph is a dictionary whose keys are
  69 #    nodes and whose values are lists of the predecessors of that node.
  70 #    (That is, if graph contains "A" -> ["B", "C"] then we must output
  71 #    B and C before A.)
  72 #  ts.static_order()
  73 #    returns a list of all the nodes in sorted order, or raises CycleError
  74 #  CycleError
  75 #    exception raised if there are cycles in the graph. The second
  76 #    element in the args attribute is a list of nodes which form a
  77 #    cycle; the first and last element are the same, eg [a, b, c, a]
  78 #    (Our implementation doesn't give the order correctly.)
  79 #
  80 # For our purposes we can assume that the data set is always small
  81 # (typically 10 nodes or less, actual links in the graph very rare),
  82 # so we don't need to worry about efficiency of implementation.
  83 #
  84 # The core of this implementation is from
  85 # https://code.activestate.com/recipes/578272-topological-sort/
  86 # (but updated to Python 3), and is under the MIT license.
  87
  88 class CycleError(ValueError):
  89     """Subclass of ValueError raised if cycles exist in the graph"""
  90     pass
  91
  92 class TopologicalSorter:
  93     """Topologically sort a graph"""
  94     def __init__(self, graph=None):
  95         self.graph = graph
  96
  97     def static_order(self):
  98         # We do the sort right here, unlike the stdlib version
  99         from functools import reduce
 100         data = {}
 101         r = []
 102
 103         if not self.graph:
 104             return []
 105
 106         # This code wants the values in the dict to be specifically sets
 107         for k, v in self.graph.items():
 108             data[k] = set(v)
 109
 110         # Find all items that don't depend on anything.
 111         extra_items_in_deps = (reduce(set.union, data.values())
 112                                - set(data.keys()))
 113         # Add empty dependencies where needed
 114         data.update({item:{} for item in extra_items_in_deps})
 115         while True:
 116             ordered = set(item for item, dep in data.items() if not dep)
 117             if not ordered:
 118                 break
 119             r.extend(ordered)
 120             data = {item: (dep - ordered)
 121                     for item, dep in data.items()
 122                         if item not in ordered}
 123         if data:
 124             # This doesn't give as nice results as the stdlib, which
 125             # gives you the cycle by listing the nodes in order. Here
 126             # we only know the nodes in the cycle but not their order.
 127             raise CycleError(f'nodes are in a cycle', list(data.keys()))
 128
 129         return r
 130 # end TopologicalSorter
 131
 132 def error_with_file(file, lineno, *args):
 133     """Print an error message from file:line and args and exit."""
 134     global output_file
 135     global output_fd
 136
 137     # For the test suite expected-errors case, don't print the
 138     # string "error: ", so they don't turn up as false positives
 139     # if you grep the meson logs for strings like that.
 140     end = 'error: ' if not testforerror else 'detected: '
 141     prefix = ''
 142     if file:
 143         prefix += f'{file}:'
 144     if lineno:
 145         prefix += f'{lineno}:'
 146     if prefix:
 147         prefix += ' '
 148     print(prefix, end=end, file=sys.stderr)
 149     print(*args, file=sys.stderr)
 150
 151     if output_file and output_fd:
 152         output_fd.close()
 153         os.remove(output_file)
 154     exit(0 if testforerror else 1)
 155 # end error_with_file
 156
 157
 158 def error(lineno, *args):
 159     error_with_file(input_file, lineno, *args)
 160 # end error
 161
 162
 163 def output(*args):
 164     global output_fd
 165     for a in args:
 166         output_fd.write(a)
 167
 168
 169 def output_autogen():
 170     output('/* This file is autogenerated by scripts/decodetree.py.  */\n\n')
 171
 172
 173 def str_indent(c):
 174     """Return a string with C spaces"""
 175     return ' ' * c
 176
 177
 178 def str_fields(fields):
 179     """Return a string uniquely identifying FIELDS"""
 180     r = ''
 181     for n in sorted(fields.keys()):
 182         r += '_' + n
 183     return r[1:]
 184
 185
 186 def whex(val):
 187     """Return a hex string for val padded for insnwidth"""
 188     global insnwidth
 189     return f'0x{val:0{insnwidth // 4}x}'
 190
 191
 192 def whexC(val):
 193     """Return a hex string for val padded for insnwidth,
 194        and with the proper suffix for a C constant."""
 195     suffix = ''
 196     if val >= 0x100000000:
 197         suffix = 'ull'
 198     elif val >= 0x80000000:
 199         suffix = 'u'
 200     return whex(val) + suffix
 201
 202
 203 def str_match_bits(bits, mask):
 204     """Return a string pretty-printing BITS/MASK"""
 205     global insnwidth
 206
 207     i = 1 << (insnwidth - 1)
 208     space = 0x01010100
 209     r = ''
 210     while i != 0:
 211         if i & mask:
 212             if i & bits:
 213                 r += '1'
 214             else:
 215                 r += '0'
 216         else:
 217             r += '.'
 218         if i & space:
 219             r += ' '
 220         i >>= 1
 221     return r
 222
 223
 224 def is_pow2(x):
 225     """Return true iff X is equal to a power of 2."""
 226     return (x & (x - 1)) == 0
 227
 228
 229 def ctz(x):
 230     """Return the number of times 2 factors into X."""
 231     assert x != 0
 232     r = 0
 233     while ((x >> r) & 1) == 0:
 234         r += 1
 235     return r
 236
 237
 238 def is_contiguous(bits):
 239     if bits == 0:
 240         return -1
 241     shift = ctz(bits)
 242     if is_pow2((bits >> shift) + 1):
 243         return shift
 244     else:
 245         return -1
 246
 247
 248 def eq_fields_for_args(flds_a, arg):
 249     if len(flds_a) != len(arg.fields):
 250         return False
 251     # Only allow inference on default types
 252     for t in arg.types:
 253         if t != 'int':
 254             return False
 255     for k, a in flds_a.items():
 256         if k not in arg.fields:
 257             return False
 258     return True
 259
 260
 261 def eq_fields_for_fmts(flds_a, flds_b):
 262     if len(flds_a) != len(flds_b):
 263         return False
 264     for k, a in flds_a.items():
 265         if k not in flds_b:
 266             return False
 267         b = flds_b[k]
 268         if a.__class__ != b.__class__ or a != b:
 269             return False
 270     return True
 271
 272
 273 class Field:
 274     """Class representing a simple instruction field"""
 275     def __init__(self, sign, pos, len):
 276         self.sign = sign
 277         self.pos = pos
 278         self.len = len
 279         self.mask = ((1 << len) - 1) << pos
 280
 281     def __str__(self):
 282         if self.sign:
 283             s = 's'
 284         else:
 285             s = ''
 286         return str(self.pos) + ':' + s + str(self.len)
 287
 288     def str_extract(self, lvalue_formatter):
 289         global bitop_width
 290         s = 's' if self.sign else ''
 291         return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
 292
 293     def referenced_fields(self):
 294         return []
 295
 296     def __eq__(self, other):
 297         return self.sign == other.sign and self.mask == other.mask
 298
 299     def __ne__(self, other):
 300         return not self.__eq__(other)
 301 # end Field
 302
 303
 304 class MultiField:
 305     """Class representing a compound instruction field"""
 306     def __init__(self, subs, mask):
 307         self.subs = subs
 308         self.sign = subs[0].sign
 309         self.mask = mask
 310
 311     def __str__(self):
 312         return str(self.subs)
 313
 314     def str_extract(self, lvalue_formatter):
 315         global bitop_width
 316         ret = '0'
 317         pos = 0
 318         for f in reversed(self.subs):
 319             ext = f.str_extract(lvalue_formatter)
 320             if pos == 0:
 321                 ret = ext
 322             else:
 323                 ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
 324             pos += f.len
 325         return ret
 326
 327     def referenced_fields(self):
 328         l = []
 329         for f in self.subs:
 330             l.extend(f.referenced_fields())
 331         return l
 332
 333     def __ne__(self, other):
 334         if len(self.subs) != len(other.subs):
 335             return True
 336         for a, b in zip(self.subs, other.subs):
 337             if a.__class__ != b.__class__ or a != b:
 338                 return True
 339         return False
 340
 341     def __eq__(self, other):
 342         return not self.__ne__(other)
 343 # end MultiField
 344
 345
 346 class ConstField:
 347     """Class representing an argument field with constant value"""
 348     def __init__(self, value):
 349         self.value = value
 350         self.mask = 0
 351         self.sign = value < 0
 352
 353     def __str__(self):
 354         return str(self.value)
 355
 356     def str_extract(self, lvalue_formatter):
 357         return str(self.value)
 358
 359     def referenced_fields(self):
 360         return []
 361
 362     def __cmp__(self, other):
 363         return self.value - other.value
 364 # end ConstField
 365
 366
 367 class FunctionField:
 368     """Class representing a field passed through a function"""
 369     def __init__(self, func, base):
 370         self.mask = base.mask
 371         self.sign = base.sign
 372         self.base = base
 373         self.func = func
 374
 375     def __str__(self):
 376         return self.func + '(' + str(self.base) + ')'
 377
 378     def str_extract(self, lvalue_formatter):
 379         return (self.func + '(ctx, '
 380                 + self.base.str_extract(lvalue_formatter) + ')')
 381
 382     def referenced_fields(self):
 383         return self.base.referenced_fields()
 384
 385     def __eq__(self, other):
 386         return self.func == other.func and self.base == other.base
 387
 388     def __ne__(self, other):
 389         return not self.__eq__(other)
 390 # end FunctionField
 391
 392
 393 class ParameterField:
 394     """Class representing a pseudo-field read from a function"""
 395     def __init__(self, func):
 396         self.mask = 0
 397         self.sign = 0
 398         self.func = func
 399
 400     def __str__(self):
 401         return self.func
 402
 403     def str_extract(self, lvalue_formatter):
 404         return self.func + '(ctx)'
 405
 406     def referenced_fields(self):
 407         return []
 408
 409     def __eq__(self, other):
 410         return self.func == other.func
 411
 412     def __ne__(self, other):
 413         return not self.__eq__(other)
 414 # end ParameterField
 415
 416 class NamedField:
 417     """Class representing a field already named in the pattern"""
 418     def __init__(self, name, sign, len):
 419         self.mask = 0
 420         self.sign = sign
 421         self.len = len
 422         self.name = name
 423
 424     def __str__(self):
 425         return self.name
 426
 427     def str_extract(self, lvalue_formatter):
 428         global bitop_width
 429         s = 's' if self.sign else ''
 430         lvalue = lvalue_formatter(self.name)
 431         return f'{s}extract{bitop_width}({lvalue}, 0, {self.len})'
 432
 433     def referenced_fields(self):
 434         return [self.name]
 435
 436     def __eq__(self, other):
 437         return self.name == other.name
 438
 439     def __ne__(self, other):
 440         return not self.__eq__(other)
 441 # end NamedField
 442
 443 class Arguments:
 444     """Class representing the extracted fields of a format"""
 445     def __init__(self, nm, flds, types, extern):
 446         self.name = nm
 447         self.extern = extern
 448         self.fields = flds
 449         self.types = types
 450
 451     def __str__(self):
 452         return self.name + ' ' + str(self.fields)
 453
 454     def struct_name(self):
 455         return 'arg_' + self.name
 456
 457     def output_def(self):
 458         if not self.extern:
 459             output('typedef struct {\n')
 460             for (n, t) in zip(self.fields, self.types):
 461                 output(f'    {t} {n};\n')
 462             output('} ', self.struct_name(), ';\n\n')
 463 # end Arguments
 464
 465 class General:
 466     """Common code between instruction formats and instruction patterns"""
 467     def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
 468         self.name = name
 469         self.file = input_file
 470         self.lineno = lineno
 471         self.base = base
 472         self.fixedbits = fixb
 473         self.fixedmask = fixm
 474         self.undefmask = udfm
 475         self.fieldmask = fldm
 476         self.fields = flds
 477         self.width = w
 478         self.dangling = None
 479
 480     def __str__(self):
 481         return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
 482
 483     def str1(self, i):
 484         return str_indent(i) + self.__str__()
 485
 486     def dangling_references(self):
 487         # Return a list of all named references which aren't satisfied
 488         # directly by this format/pattern. This will be either:
 489         #  * a format referring to a field which is specified by the
 490         #    pattern(s) using it
 491         #  * a pattern referring to a field which is specified by the
 492         #    format it uses
 493         #  * a user error (referring to a field that doesn't exist at all)
 494         if self.dangling is None:
 495             # Compute this once and cache the answer
 496             dangling = []
 497             for n, f in self.fields.items():
 498                 for r in f.referenced_fields():
 499                     if r not in self.fields:
 500                         dangling.append(r)
 501             self.dangling = dangling
 502         return self.dangling
 503
 504     def output_fields(self, indent, lvalue_formatter):
 505         # We use a topological sort to ensure that any use of NamedField
 506         # comes after the initialization of the field it is referencing.
 507         graph = {}
 508         for n, f in self.fields.items():
 509             refs = f.referenced_fields()
 510             graph[n] = refs
 511
 512         try:
 513             ts = TopologicalSorter(graph)
 514             for n in ts.static_order():
 515                 # We only want to emit assignments for the keys
 516                 # in our fields list, not for anything that ends up
 517                 # in the tsort graph only because it was referenced as
 518                 # a NamedField.
 519                 try:
 520                     f = self.fields[n]
 521                     output(indent, lvalue_formatter(n), ' = ',
 522                            f.str_extract(lvalue_formatter), ';\n')
 523                 except KeyError:
 524                     pass
 525         except CycleError as e:
 526             # The second element of args is a list of nodes which form
 527             # a cycle (there might be others too, but only one is reported).
 528             # Pretty-print it to tell the user.
 529             cycle = ' => '.join(e.args[1])
 530             error(self.lineno, 'field definitions form a cycle: ' + cycle)
 531 # end General
 532
 533
 534 class Format(General):
 535     """Class representing an instruction format"""
 536
 537     def extract_name(self):
 538         global decode_function
 539         return decode_function + '_extract_' + self.name
 540
 541     def output_extract(self):
 542         output('static void ', self.extract_name(), '(DisasContext *ctx, ',
 543                self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
 544         self.output_fields(str_indent(4), lambda n: 'a->' + n)
 545         output('}\n\n')
 546 # end Format
 547
 548
 549 class Pattern(General):
 550     """Class representing an instruction pattern"""
 551
 552     def output_decl(self):
 553         global translate_scope
 554         global translate_prefix
 555         output('typedef ', self.base.base.struct_name(),
 556                ' arg_', self.name, ';\n')
 557         output(translate_scope, 'bool ', translate_prefix, '_', self.name,
 558                '(DisasContext *ctx, arg_', self.name, ' *a);\n')
 559
 560     def output_code(self, i, extracted, outerbits, outermask):
 561         global translate_prefix
 562         ind = str_indent(i)
 563         arg = self.base.base.name
 564         output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
 565         # We might have named references in the format that refer to fields
 566         # in the pattern, or named references in the pattern that refer
 567         # to fields in the format. This affects whether we extract the fields
 568         # for the format before or after the ones for the pattern.
 569         # For simplicity we don't allow cross references in both directions.
 570         # This is also where we catch the syntax error of referring to
 571         # a nonexistent field.
 572         fmt_refs = self.base.dangling_references()
 573         for r in fmt_refs:
 574             if r not in self.fields:
 575                 error(self.lineno, f'format refers to undefined field {r}')
 576         pat_refs = self.dangling_references()
 577         for r in pat_refs:
 578             if r not in self.base.fields:
 579                 error(self.lineno, f'pattern refers to undefined field {r}')
 580         if pat_refs and fmt_refs:
 581             error(self.lineno, ('pattern that uses fields defined in format '
 582                                 'cannot use format that uses fields defined '
 583                                 'in pattern'))
 584         if fmt_refs:
 585             # pattern fields first
 586             self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
 587             assert not extracted, "dangling fmt refs but it was already extracted"
 588         if not extracted:
 589             output(ind, self.base.extract_name(),
 590                    '(ctx, &u.f_', arg, ', insn);\n')
 591         if not fmt_refs:
 592             # pattern fields last
 593             self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
 594
 595         output(ind, 'if (', translate_prefix, '_', self.name,
 596                '(ctx, &u.f_', arg, ')) return true;\n')
 597
 598     # Normal patterns do not have children.
 599     def build_tree(self):
 600         return
 601     def prop_masks(self):
 602         return
 603     def prop_format(self):
 604         return
 605     def prop_width(self):
 606         return
 607
 608 # end Pattern
 609
 610
 611 class MultiPattern(General):
 612     """Class representing a set of instruction patterns"""
 613
 614     def __init__(self, lineno):
 615         self.file = input_file
 616         self.lineno = lineno
 617         self.pats = []
 618         self.base = None
 619         self.fixedbits = 0
 620         self.fixedmask = 0
 621         self.undefmask = 0
 622         self.width = None
 623
 624     def __str__(self):
 625         r = 'group'
 626         if self.fixedbits is not None:
 627             r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
 628         return r
 629
 630     def output_decl(self):
 631         for p in self.pats:
 632             p.output_decl()
 633
 634     def prop_masks(self):
 635         global insnmask
 636
 637         fixedmask = insnmask
 638         undefmask = insnmask
 639
 640         # Collect fixedmask/undefmask for all of the children.
 641         for p in self.pats:
 642             p.prop_masks()
 643             fixedmask &= p.fixedmask
 644             undefmask &= p.undefmask
 645
 646         # Widen fixedmask until all fixedbits match
 647         repeat = True
 648         fixedbits = 0
 649         while repeat and fixedmask != 0:
 650             fixedbits = None
 651             for p in self.pats:
 652                 thisbits = p.fixedbits & fixedmask
 653                 if fixedbits is None:
 654                     fixedbits = thisbits
 655                 elif fixedbits != thisbits:
 656                     fixedmask &= ~(fixedbits ^ thisbits)
 657                     break
 658             else:
 659                 repeat = False
 660
 661         self.fixedbits = fixedbits
 662         self.fixedmask = fixedmask
 663         self.undefmask = undefmask
 664
 665     def build_tree(self):
 666         for p in self.pats:
 667             p.build_tree()
 668
 669     def prop_format(self):
 670         for p in self.pats:
 671             p.prop_format()
 672
 673     def prop_width(self):
 674         width = None
 675         for p in self.pats:
 676             p.prop_width()
 677             if width is None:
 678                 width = p.width
 679             elif width != p.width:
 680                 error_with_file(self.file, self.lineno,
 681                                 'width mismatch in patterns within braces')
 682         self.width = width
 683
 684 # end MultiPattern
 685
 686
 687 class IncMultiPattern(MultiPattern):
 688     """Class representing an overlapping set of instruction patterns"""
 689
 690     def output_code(self, i, extracted, outerbits, outermask):
 691         global translate_prefix
 692         ind = str_indent(i)
 693         for p in self.pats:
 694             if outermask != p.fixedmask:
 695                 innermask = p.fixedmask & ~outermask
 696                 innerbits = p.fixedbits & ~outermask
 697                 output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
 698                 output(ind, f'    /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
 699                 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
 700                 output(ind, '}\n')
 701             else:
 702                 p.output_code(i, extracted, p.fixedbits, p.fixedmask)
 703
 704     def build_tree(self):
 705         if not self.pats:
 706             error_with_file(self.file, self.lineno, 'empty pattern group')
 707         super().build_tree()
 708
 709 #end IncMultiPattern
 710
 711
 712 class Tree:
 713     """Class representing a node in a decode tree"""
 714
 715     def __init__(self, fm, tm):
 716         self.fixedmask = fm
 717         self.thismask = tm
 718         self.subs = []
 719         self.base = None
 720
 721     def str1(self, i):
 722         ind = str_indent(i)
 723         r = ind + whex(self.fixedmask)
 724         if self.format:
 725             r += ' ' + self.format.name
 726         r += ' [\n'
 727         for (b, s) in self.subs:
 728             r += ind + f'  {whex(b)}:\n'
 729             r += s.str1(i + 4) + '\n'
 730         r += ind + ']'
 731         return r
 732
 733     def __str__(self):
 734         return self.str1(0)
 735
 736     def output_code(self, i, extracted, outerbits, outermask):
 737         ind = str_indent(i)
 738
 739         # If we identified all nodes below have the same format,
 740         # extract the fields now. But don't do it if the format relies
 741         # on named fields from the insn pattern, as those won't have
 742         # been initialised at this point.
 743         if not extracted and self.base and not self.base.dangling_references():
 744             output(ind, self.base.extract_name(),
 745                    '(ctx, &u.f_', self.base.base.name, ', insn);\n')
 746             extracted = True
 747
 748         # Attempt to aid the compiler in producing compact switch statements.
 749         # If the bits in the mask are contiguous, extract them.
 750         sh = is_contiguous(self.thismask)
 751         if sh > 0:
 752             # Propagate SH down into the local functions.
 753             def str_switch(b, sh=sh):
 754                 return f'(insn >> {sh}) & {b >> sh:#x}'
 755
 756             def str_case(b, sh=sh):
 757                 return hex(b >> sh)
 758         else:
 759             def str_switch(b):
 760                 return f'insn & {whexC(b)}'
 761
 762             def str_case(b):
 763                 return whexC(b)
 764
 765         output(ind, 'switch (', str_switch(self.thismask), ') {\n')
 766         for b, s in sorted(self.subs):
 767             assert (self.thismask & ~s.fixedmask) == 0
 768             innermask = outermask | self.thismask
 769             innerbits = outerbits | b
 770             output(ind, 'case ', str_case(b), ':\n')
 771             output(ind, '    /* ',
 772                    str_match_bits(innerbits, innermask), ' */\n')
 773             s.output_code(i + 4, extracted, innerbits, innermask)
 774             output(ind, '    break;\n')
 775         output(ind, '}\n')
 776 # end Tree
 777
 778
 779 class ExcMultiPattern(MultiPattern):
 780     """Class representing a non-overlapping set of instruction patterns"""
 781
 782     def output_code(self, i, extracted, outerbits, outermask):
 783         # Defer everything to our decomposed Tree node
 784         self.tree.output_code(i, extracted, outerbits, outermask)
 785
 786     @staticmethod
 787     def __build_tree(pats, outerbits, outermask):
 788         # Find the intersection of all remaining fixedmask.
 789         innermask = ~outermask & insnmask
 790         for i in pats:
 791             innermask &= i.fixedmask
 792
 793         if innermask == 0:
 794             # Edge condition: One pattern covers the entire insnmask
 795             if len(pats) == 1:
 796                 t = Tree(outermask, innermask)
 797                 t.subs.append((0, pats[0]))
 798                 return t
 799
 800             text = 'overlapping patterns:'
 801             for p in pats:
 802                 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
 803             error_with_file(pats[0].file, pats[0].lineno, text)
 804
 805         fullmask = outermask | innermask
 806
 807         # Sort each element of pats into the bin selected by the mask.
 808         bins = {}
 809         for i in pats:
 810             fb = i.fixedbits & innermask
 811             if fb in bins:
 812                 bins[fb].append(i)
 813             else:
 814                 bins[fb] = [i]
 815
 816         # We must recurse if any bin has more than one element or if
 817         # the single element in the bin has not been fully matched.
 818         t = Tree(fullmask, innermask)
 819
 820         for b, l in bins.items():
 821             s = l[0]
 822             if len(l) > 1 or s.fixedmask & ~fullmask != 0:
 823                 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
 824             t.subs.append((b, s))
 825
 826         return t
 827
 828     def build_tree(self):
 829         super().build_tree()
 830         self.tree = self.__build_tree(self.pats, self.fixedbits,
 831                                       self.fixedmask)
 832
 833     @staticmethod
 834     def __prop_format(tree):
 835         """Propagate Format objects into the decode tree"""
 836
 837         # Depth first search.
 838         for (b, s) in tree.subs:
 839             if isinstance(s, Tree):
 840                 ExcMultiPattern.__prop_format(s)
 841
 842         # If all entries in SUBS have the same format, then
 843         # propagate that into the tree.
 844         f = None
 845         for (b, s) in tree.subs:
 846             if f is None:
 847                 f = s.base
 848                 if f is None:
 849                     return
 850             if f is not s.base:
 851                 return
 852         tree.base = f
 853
 854     def prop_format(self):
 855         super().prop_format()
 856         self.__prop_format(self.tree)
 857
 858 # end ExcMultiPattern
 859
 860
 861 def parse_field(lineno, name, toks):
 862     """Parse one instruction field from TOKS at LINENO"""
 863     global fields
 864     global insnwidth
 865     global re_C_ident
 866
 867     # A "simple" field will have only one entry;
 868     # a "multifield" will have several.
 869     subs = []
 870     width = 0
 871     func = None
 872     for t in toks:
 873         if re.match('^!function=', t):
 874             if func:
 875                 error(lineno, 'duplicate function')
 876             func = t.split('=')
 877             func = func[1]
 878             continue
 879
 880         if re.fullmatch(re_C_ident + ':s[0-9]+', t):
 881             # Signed named field
 882             subtoks = t.split(':')
 883             n = subtoks[0]
 884             le = int(subtoks[1])
 885             f = NamedField(n, True, le)
 886             subs.append(f)
 887             width += le
 888             continue
 889         if re.fullmatch(re_C_ident + ':[0-9]+', t):
 890             # Unsigned named field
 891             subtoks = t.split(':')
 892             n = subtoks[0]
 893             le = int(subtoks[1])
 894             f = NamedField(n, False, le)
 895             subs.append(f)
 896             width += le
 897             continue
 898
 899         if re.fullmatch('[0-9]+:s[0-9]+', t):
 900             # Signed field extract
 901             subtoks = t.split(':s')
 902             sign = True
 903         elif re.fullmatch('[0-9]+:[0-9]+', t):
 904             # Unsigned field extract
 905             subtoks = t.split(':')
 906             sign = False
 907         else:
 908             error(lineno, f'invalid field token "{t}"')
 909         po = int(subtoks[0])
 910         le = int(subtoks[1])
 911         if po + le > insnwidth:
 912             error(lineno, f'field {t} too large')
 913         f = Field(sign, po, le)
 914         subs.append(f)
 915         width += le
 916
 917     if width > insnwidth:
 918         error(lineno, 'field too large')
 919     if len(subs) == 0:
 920         if func:
 921             f = ParameterField(func)
 922         else:
 923             error(lineno, 'field with no value')
 924     else:
 925         if len(subs) == 1:
 926             f = subs[0]
 927         else:
 928             mask = 0
 929             for s in subs:
 930                 if mask & s.mask:
 931                     error(lineno, 'field components overlap')
 932                 mask |= s.mask
 933             f = MultiField(subs, mask)
 934         if func:
 935             f = FunctionField(func, f)
 936
 937     if name in fields:
 938         error(lineno, 'duplicate field', name)
 939     fields[name] = f
 940 # end parse_field
 941
 942
 943 def parse_arguments(lineno, name, toks):
 944     """Parse one argument set from TOKS at LINENO"""
 945     global arguments
 946     global re_C_ident
 947     global anyextern
 948
 949     flds = []
 950     types = []
 951     extern = False
 952     for n in toks:
 953         if re.fullmatch('!extern', n):
 954             extern = True
 955             anyextern = True
 956             continue
 957         if re.fullmatch(re_C_ident + ':' + re_C_ident, n):
 958             (n, t) = n.split(':')
 959         elif re.fullmatch(re_C_ident, n):
 960             t = 'int'
 961         else:
 962             error(lineno, f'invalid argument set token "{n}"')
 963         if n in flds:
 964             error(lineno, f'duplicate argument "{n}"')
 965         flds.append(n)
 966         types.append(t)
 967
 968     if name in arguments:
 969         error(lineno, 'duplicate argument set', name)
 970     arguments[name] = Arguments(name, flds, types, extern)
 971 # end parse_arguments
 972
 973
 974 def lookup_field(lineno, name):
 975     global fields
 976     if name in fields:
 977         return fields[name]
 978     error(lineno, 'undefined field', name)
 979
 980
 981 def add_field(lineno, flds, new_name, f):
 982     if new_name in flds:
 983         error(lineno, 'duplicate field', new_name)
 984     flds[new_name] = f
 985     return flds
 986
 987
 988 def add_field_byname(lineno, flds, new_name, old_name):
 989     return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
 990
 991
 992 def infer_argument_set(flds):
 993     global arguments
 994     global decode_function
 995
 996     for arg in arguments.values():
 997         if eq_fields_for_args(flds, arg):
 998             return arg
 999
1000     name = decode_function + str(len(arguments))
1001     arg = Arguments(name, flds.keys(), ['int'] * len(flds), False)
1002     arguments[name] = arg
1003     return arg
1004
1005
1006 def infer_format(arg, fieldmask, flds, width):
1007     global arguments
1008     global formats
1009     global decode_function
1010
1011     const_flds = {}
1012     var_flds = {}
1013     for n, c in flds.items():
1014         if c is ConstField:
1015             const_flds[n] = c
1016         else:
1017             var_flds[n] = c
1018
1019     # Look for an existing format with the same argument set and fields
1020     for fmt in formats.values():
1021         if arg and fmt.base != arg:
1022             continue
1023         if fieldmask != fmt.fieldmask:
1024             continue
1025         if width != fmt.width:
1026             continue
1027         if not eq_fields_for_fmts(flds, fmt.fields):
1028             continue
1029         return (fmt, const_flds)
1030
1031     name = decode_function + '_Fmt_' + str(len(formats))
1032     if not arg:
1033         arg = infer_argument_set(flds)
1034
1035     fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
1036     formats[name] = fmt
1037
1038     return (fmt, const_flds)
1039 # end infer_format
1040
1041
1042 def parse_generic(lineno, parent_pat, name, toks):
1043     """Parse one instruction format from TOKS at LINENO"""
1044     global fields
1045     global arguments
1046     global formats
1047     global allpatterns
1048     global re_arg_ident
1049     global re_fld_ident
1050     global re_fmt_ident
1051     global re_C_ident
1052     global insnwidth
1053     global insnmask
1054     global variablewidth
1055
1056     is_format = parent_pat is None
1057
1058     fixedmask = 0
1059     fixedbits = 0
1060     undefmask = 0
1061     width = 0
1062     flds = {}
1063     arg = None
1064     fmt = None
1065     for t in toks:
1066         # '&Foo' gives a format an explicit argument set.
1067         if re.fullmatch(re_arg_ident, t):
1068             tt = t[1:]
1069             if arg:
1070                 error(lineno, 'multiple argument sets')
1071             if tt in arguments:
1072                 arg = arguments[tt]
1073             else:
1074                 error(lineno, 'undefined argument set', t)
1075             continue
1076
1077         # '@Foo' gives a pattern an explicit format.
1078         if re.fullmatch(re_fmt_ident, t):
1079             tt = t[1:]
1080             if fmt:
1081                 error(lineno, 'multiple formats')
1082             if tt in formats:
1083                 fmt = formats[tt]
1084             else:
1085                 error(lineno, 'undefined format', t)
1086             continue
1087
1088         # '%Foo' imports a field.
1089         if re.fullmatch(re_fld_ident, t):
1090             tt = t[1:]
1091             flds = add_field_byname(lineno, flds, tt, tt)
1092             continue
1093
1094         # 'Foo=%Bar' imports a field with a different name.
1095         if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
1096             (fname, iname) = t.split('=%')
1097             flds = add_field_byname(lineno, flds, fname, iname)
1098             continue
1099
1100         # 'Foo=number' sets an argument field to a constant value
1101         if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
1102             (fname, value) = t.split('=')
1103             value = int(value)
1104             flds = add_field(lineno, flds, fname, ConstField(value))
1105             continue
1106
1107         # Pattern of 0s, 1s, dots and dashes indicate required zeros,
1108         # required ones, or dont-cares.
1109         if re.fullmatch('[01.-]+', t):
1110             shift = len(t)
1111             fms = t.replace('0', '1')
1112             fms = fms.replace('.', '0')
1113             fms = fms.replace('-', '0')
1114             fbs = t.replace('.', '0')
1115             fbs = fbs.replace('-', '0')
1116             ubm = t.replace('1', '0')
1117             ubm = ubm.replace('.', '0')
1118             ubm = ubm.replace('-', '1')
1119             fms = int(fms, 2)
1120             fbs = int(fbs, 2)
1121             ubm = int(ubm, 2)
1122             fixedbits = (fixedbits << shift) | fbs
1123             fixedmask = (fixedmask << shift) | fms
1124             undefmask = (undefmask << shift) | ubm
1125         # Otherwise, fieldname:fieldwidth
1126         elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
1127             (fname, flen) = t.split(':')
1128             sign = False
1129             if flen[0] == 's':
1130                 sign = True
1131                 flen = flen[1:]
1132             shift = int(flen, 10)
1133             if shift + width > insnwidth:
1134                 error(lineno, f'field {fname} exceeds insnwidth')
1135             f = Field(sign, insnwidth - width - shift, shift)
1136             flds = add_field(lineno, flds, fname, f)
1137             fixedbits <<= shift
1138             fixedmask <<= shift
1139             undefmask <<= shift
1140         else:
1141             error(lineno, f'invalid token "{t}"')
1142         width += shift
1143
1144     if variablewidth and width < insnwidth and width % 8 == 0:
1145         shift = insnwidth - width
1146         fixedbits <<= shift
1147         fixedmask <<= shift
1148         undefmask <<= shift
1149         undefmask |= (1 << shift) - 1
1150
1151     # We should have filled in all of the bits of the instruction.
1152     elif not (is_format and width == 0) and width != insnwidth:
1153         error(lineno, f'definition has {width} bits')
1154
1155     # Do not check for fields overlapping fields; one valid usage
1156     # is to be able to duplicate fields via import.
1157     fieldmask = 0
1158     for f in flds.values():
1159         fieldmask |= f.mask
1160
1161     # Fix up what we've parsed to match either a format or a pattern.
1162     if is_format:
1163         # Formats cannot reference formats.
1164         if fmt:
1165             error(lineno, 'format referencing format')
1166         # If an argument set is given, then there should be no fields
1167         # without a place to store it.
1168         if arg:
1169             for f in flds.keys():
1170                 if f not in arg.fields:
1171                     error(lineno, f'field {f} not in argument set {arg.name}')
1172         else:
1173             arg = infer_argument_set(flds)
1174         if name in formats:
1175             error(lineno, 'duplicate format name', name)
1176         fmt = Format(name, lineno, arg, fixedbits, fixedmask,
1177                      undefmask, fieldmask, flds, width)
1178         formats[name] = fmt
1179     else:
1180         # Patterns can reference a format ...
1181         if fmt:
1182             # ... but not an argument simultaneously
1183             if arg:
1184                 error(lineno, 'pattern specifies both format and argument set')
1185             if fixedmask & fmt.fixedmask:
1186                 error(lineno, 'pattern fixed bits overlap format fixed bits')
1187             if width != fmt.width:
1188                 error(lineno, 'pattern uses format of different width')
1189             fieldmask |= fmt.fieldmask
1190             fixedbits |= fmt.fixedbits
1191             fixedmask |= fmt.fixedmask
1192             undefmask |= fmt.undefmask
1193         else:
1194             (fmt, flds) = infer_format(arg, fieldmask, flds, width)
1195         arg = fmt.base
1196         for f in flds.keys():
1197             if f not in arg.fields:
1198                 error(lineno, f'field {f} not in argument set {arg.name}')
1199             if f in fmt.fields.keys():
1200                 error(lineno, f'field {f} set by format and pattern')
1201         for f in arg.fields:
1202             if f not in flds.keys() and f not in fmt.fields.keys():
1203                 error(lineno, f'field {f} not initialized')
1204         pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
1205                       undefmask, fieldmask, flds, width)
1206         parent_pat.pats.append(pat)
1207         allpatterns.append(pat)
1208
1209     # Validate the masks that we have assembled.
1210     if fieldmask & fixedmask:
1211         error(lineno, 'fieldmask overlaps fixedmask ',
1212               f'({whex(fieldmask)} & {whex(fixedmask)})')
1213     if fieldmask & undefmask:
1214         error(lineno, 'fieldmask overlaps undefmask ',
1215               f'({whex(fieldmask)} & {whex(undefmask)})')
1216     if fixedmask & undefmask:
1217         error(lineno, 'fixedmask overlaps undefmask ',
1218               f'({whex(fixedmask)} & {whex(undefmask)})')
1219     if not is_format:
1220         allbits = fieldmask | fixedmask | undefmask
1221         if allbits != insnmask:
1222             error(lineno, 'bits left unspecified ',
1223                   f'({whex(allbits ^ insnmask)})')
1224 # end parse_general
1225
1226
1227 def parse_file(f, parent_pat):
1228     """Parse all of the patterns within a file"""
1229     global re_arg_ident
1230     global re_fld_ident
1231     global re_fmt_ident
1232     global re_pat_ident
1233
1234     # Read all of the lines of the file.  Concatenate lines
1235     # ending in backslash; discard empty lines and comments.
1236     toks = []
1237     lineno = 0
1238     nesting = 0
1239     nesting_pats = []
1240
1241     for line in f:
1242         lineno += 1
1243
1244         # Expand and strip spaces, to find indent.
1245         line = line.rstrip()
1246         line = line.expandtabs()
1247         len1 = len(line)
1248         line = line.lstrip()
1249         len2 = len(line)
1250
1251         # Discard comments
1252         end = line.find('#')
1253         if end >= 0:
1254             line = line[:end]
1255
1256         t = line.split()
1257         if len(toks) != 0:
1258             # Next line after continuation
1259             toks.extend(t)
1260         else:
1261             # Allow completely blank lines.
1262             if len1 == 0:
1263                 continue
1264             indent = len1 - len2
1265             # Empty line due to comment.
1266             if len(t) == 0:
1267                 # Indentation must be correct, even for comment lines.
1268                 if indent != nesting:
1269                     error(lineno, 'indentation ', indent, ' != ', nesting)
1270                 continue
1271             start_lineno = lineno
1272             toks = t
1273
1274         # Continuation?
1275         if toks[-1] == '\\':
1276             toks.pop()
1277             continue
1278
1279         name = toks[0]
1280         del toks[0]
1281
1282         # End nesting?
1283         if name == '}' or name == ']':
1284             if len(toks) != 0:
1285                 error(start_lineno, 'extra tokens after close brace')
1286
1287             # Make sure { } and [ ] nest properly.
1288             if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1289                 error(lineno, 'mismatched close brace')
1290
1291             try:
1292                 parent_pat = nesting_pats.pop()
1293             except:
1294                 error(lineno, 'extra close brace')
1295
1296             nesting -= 2
1297             if indent != nesting:
1298                 error(lineno, 'indentation ', indent, ' != ', nesting)
1299
1300             toks = []
1301             continue
1302
1303         # Everything else should have current indentation.
1304         if indent != nesting:
1305             error(start_lineno, 'indentation ', indent, ' != ', nesting)
1306
1307         # Start nesting?
1308         if name == '{' or name == '[':
1309             if len(toks) != 0:
1310                 error(start_lineno, 'extra tokens after open brace')
1311
1312             if name == '{':
1313                 nested_pat = IncMultiPattern(start_lineno)
1314             else:
1315                 nested_pat = ExcMultiPattern(start_lineno)
1316             parent_pat.pats.append(nested_pat)
1317             nesting_pats.append(parent_pat)
1318             parent_pat = nested_pat
1319
1320             nesting += 2
1321             toks = []
1322             continue
1323
1324         # Determine the type of object needing to be parsed.
1325         if re.fullmatch(re_fld_ident, name):
1326             parse_field(start_lineno, name[1:], toks)
1327         elif re.fullmatch(re_arg_ident, name):
1328             parse_arguments(start_lineno, name[1:], toks)
1329         elif re.fullmatch(re_fmt_ident, name):
1330             parse_generic(start_lineno, None, name[1:], toks)
1331         elif re.fullmatch(re_pat_ident, name):
1332             parse_generic(start_lineno, parent_pat, name, toks)
1333         else:
1334             error(lineno, f'invalid token "{name}"')
1335         toks = []
1336
1337     if nesting != 0:
1338         error(lineno, 'missing close brace')
1339 # end parse_file
1340
1341
1342 class SizeTree:
1343     """Class representing a node in a size decode tree"""
1344
1345     def __init__(self, m, w):
1346         self.mask = m
1347         self.subs = []
1348         self.base = None
1349         self.width = w
1350
1351     def str1(self, i):
1352         ind = str_indent(i)
1353         r = ind + whex(self.mask) + ' [\n'
1354         for (b, s) in self.subs:
1355             r += ind + f'  {whex(b)}:\n'
1356             r += s.str1(i + 4) + '\n'
1357         r += ind + ']'
1358         return r
1359
1360     def __str__(self):
1361         return self.str1(0)
1362
1363     def output_code(self, i, extracted, outerbits, outermask):
1364         ind = str_indent(i)
1365
1366         # If we need to load more bytes to test, do so now.
1367         if extracted < self.width:
1368             output(ind, f'insn = {decode_function}_load_bytes',
1369                    f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
1370             extracted = self.width
1371
1372         # Attempt to aid the compiler in producing compact switch statements.
1373         # If the bits in the mask are contiguous, extract them.
1374         sh = is_contiguous(self.mask)
1375         if sh > 0:
1376             # Propagate SH down into the local functions.
1377             def str_switch(b, sh=sh):
1378                 return f'(insn >> {sh}) & {b >> sh:#x}'
1379
1380             def str_case(b, sh=sh):
1381                 return hex(b >> sh)
1382         else:
1383             def str_switch(b):
1384                 return f'insn & {whexC(b)}'
1385
1386             def str_case(b):
1387                 return whexC(b)
1388
1389         output(ind, 'switch (', str_switch(self.mask), ') {\n')
1390         for b, s in sorted(self.subs):
1391             innermask = outermask | self.mask
1392             innerbits = outerbits | b
1393             output(ind, 'case ', str_case(b), ':\n')
1394             output(ind, '    /* ',
1395                    str_match_bits(innerbits, innermask), ' */\n')
1396             s.output_code(i + 4, extracted, innerbits, innermask)
1397         output(ind, '}\n')
1398         output(ind, 'return insn;\n')
1399 # end SizeTree
1400
1401 class SizeLeaf:
1402     """Class representing a leaf node in a size decode tree"""
1403
1404     def __init__(self, m, w):
1405         self.mask = m
1406         self.width = w
1407
1408     def str1(self, i):
1409         return str_indent(i) + whex(self.mask)
1410
1411     def __str__(self):
1412         return self.str1(0)
1413
1414     def output_code(self, i, extracted, outerbits, outermask):
1415         global decode_function
1416         ind = str_indent(i)
1417
1418         # If we need to load more bytes, do so now.
1419         if extracted < self.width:
1420             output(ind, f'insn = {decode_function}_load_bytes',
1421                    f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
1422             extracted = self.width
1423         output(ind, 'return insn;\n')
1424 # end SizeLeaf
1425
1426
1427 def build_size_tree(pats, width, outerbits, outermask):
1428     global insnwidth
1429
1430     # Collect the mask of bits that are fixed in this width
1431     innermask = 0xff << (insnwidth - width)
1432     innermask &= ~outermask
1433     minwidth = None
1434     onewidth = True
1435     for i in pats:
1436         innermask &= i.fixedmask
1437         if minwidth is None:
1438             minwidth = i.width
1439         elif minwidth != i.width:
1440             onewidth = False;
1441             if minwidth < i.width:
1442                 minwidth = i.width
1443
1444     if onewidth:
1445         return SizeLeaf(innermask, minwidth)
1446
1447     if innermask == 0:
1448         if width < minwidth:
1449             return build_size_tree(pats, width + 8, outerbits, outermask)
1450
1451         pnames = []
1452         for p in pats:
1453             pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1454         error_with_file(pats[0].file, pats[0].lineno,
1455                         f'overlapping patterns size {width}:', pnames)
1456
1457     bins = {}
1458     for i in pats:
1459         fb = i.fixedbits & innermask
1460         if fb in bins:
1461             bins[fb].append(i)
1462         else:
1463             bins[fb] = [i]
1464
1465     fullmask = outermask | innermask
1466     lens = sorted(bins.keys())
1467     if len(lens) == 1:
1468         b = lens[0]
1469         return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1470
1471     r = SizeTree(innermask, width)
1472     for b, l in bins.items():
1473         s = build_size_tree(l, width, b | outerbits, fullmask)
1474         r.subs.append((b, s))
1475     return r
1476 # end build_size_tree
1477
1478
1479 def prop_size(tree):
1480     """Propagate minimum widths up the decode size tree"""
1481
1482     if isinstance(tree, SizeTree):
1483         min = None
1484         for (b, s) in tree.subs:
1485             width = prop_size(s)
1486             if min is None or min > width:
1487                 min = width
1488         assert min >= tree.width
1489         tree.width = min
1490     else:
1491         min = tree.width
1492     return min
1493 # end prop_size
1494
1495
1496 def main():
1497     global arguments
1498     global formats
1499     global allpatterns
1500     global translate_scope
1501     global translate_prefix
1502     global output_fd
1503     global output_file
1504     global output_null
1505     global input_file
1506     global insnwidth
1507     global insntype
1508     global insnmask
1509     global decode_function
1510     global bitop_width
1511     global variablewidth
1512     global anyextern
1513     global testforerror
1514
1515     decode_scope = 'static '
1516
1517     long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
1518                  'static-decode=', 'varinsnwidth=', 'test-for-error',
1519                  'output-null']
1520     try:
1521         (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
1522     except getopt.GetoptError as err:
1523         error(0, err)
1524     for o, a in opts:
1525         if o in ('-o', '--output'):
1526             output_file = a
1527         elif o == '--decode':
1528             decode_function = a
1529             decode_scope = ''
1530         elif o == '--static-decode':
1531             decode_function = a
1532         elif o == '--translate':
1533             translate_prefix = a
1534             translate_scope = ''
1535         elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1536             if o == '--varinsnwidth':
1537                 variablewidth = True
1538             insnwidth = int(a)
1539             if insnwidth == 16:
1540                 insntype = 'uint16_t'
1541                 insnmask = 0xffff
1542             elif insnwidth == 64:
1543                 insntype = 'uint64_t'
1544                 insnmask = 0xffffffffffffffff
1545                 bitop_width = 64
1546             elif insnwidth != 32:
1547                 error(0, 'cannot handle insns of width', insnwidth)
1548         elif o == '--test-for-error':
1549             testforerror = True
1550         elif o == '--output-null':
1551             output_null = True
1552         else:
1553             assert False, 'unhandled option'
1554
1555     if len(args) < 1:
1556         error(0, 'missing input file')
1557
1558     toppat = ExcMultiPattern(0)
1559
1560     for filename in args:
1561         input_file = filename
1562         f = open(filename, 'rt', encoding='utf-8')
1563         parse_file(f, toppat)
1564         f.close()
1565
1566     # We do not want to compute masks for toppat, because those masks
1567     # are used as a starting point for build_tree.  For toppat, we must
1568     # insist that decode begins from naught.
1569     for i in toppat.pats:
1570         i.prop_masks()
1571
1572     toppat.build_tree()
1573     toppat.prop_format()
1574
1575     if variablewidth:
1576         for i in toppat.pats:
1577             i.prop_width()
1578         stree = build_size_tree(toppat.pats, 8, 0, 0)
1579         prop_size(stree)
1580
1581     if output_null:
1582         output_fd = open(os.devnull, 'wt', encoding='utf-8', errors="ignore")
1583     elif output_file:
1584         output_fd = open(output_file, 'wt', encoding='utf-8')
1585     else:
1586         output_fd = io.TextIOWrapper(sys.stdout.buffer,
1587                                      encoding=sys.stdout.encoding,
1588                                      errors="ignore")
1589
1590     output_autogen()
1591     for n in sorted(arguments.keys()):
1592         f = arguments[n]
1593         f.output_def()
1594
1595     # A single translate function can be invoked for different patterns.
1596     # Make sure that the argument sets are the same, and declare the
1597     # function only once.
1598     #
1599     # If we're sharing formats, we're likely also sharing trans_* functions,
1600     # but we can't tell which ones.  Prevent issues from the compiler by
1601     # suppressing redundant declaration warnings.
1602     if anyextern:
1603         output("#pragma GCC diagnostic push\n",
1604                "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1605                "#ifdef __clang__\n"
1606                "#  pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
1607                "#endif\n\n")
1608
1609     out_pats = {}
1610     for i in allpatterns:
1611         if i.name in out_pats:
1612             p = out_pats[i.name]
1613             if i.base.base != p.base.base:
1614                 error(0, i.name, ' has conflicting argument sets')
1615         else:
1616             i.output_decl()
1617             out_pats[i.name] = i
1618     output('\n')
1619
1620     if anyextern:
1621         output("#pragma GCC diagnostic pop\n\n")
1622
1623     for n in sorted(formats.keys()):
1624         f = formats[n]
1625         f.output_extract()
1626
1627     output(decode_scope, 'bool ', decode_function,
1628            '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1629
1630     i4 = str_indent(4)
1631
1632     if len(allpatterns) != 0:
1633         output(i4, 'union {\n')
1634         for n in sorted(arguments.keys()):
1635             f = arguments[n]
1636             output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1637         output(i4, '} u;\n\n')
1638         toppat.output_code(4, False, 0, 0)
1639
1640     output(i4, 'return false;\n')
1641     output('}\n')
1642
1643     if variablewidth:
1644         output('\n', decode_scope, insntype, ' ', decode_function,
1645                '_load(DisasContext *ctx)\n{\n',
1646                '    ', insntype, ' insn = 0;\n\n')
1647         stree.output_code(4, 0, 0, 0)
1648         output('}\n')
1649
1650     if output_file:
1651         output_fd.close()
1652     exit(1 if testforerror else 0)
1653 # end main
1654
1655
1656 if __name__ == '__main__':
1657     main()