2 # Copyright (c) 2018 Linaro Limited
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation; either
7 # version 2.1 of the License, or (at your option) any later version.
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # Lesser General Public License for more details.
14 # You should have received a copy of the GNU Lesser General Public
15 # License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 # Generate a decoding tree from a specification file.
20 # See the syntax and semantics in docs/devel/decodetree.rst.
40 translate_prefix
= 'trans'
41 translate_scope
= 'static '
47 decode_function
= 'decode'
49 # An identifier for C.
50 re_C_ident
= '[a-zA-Z][a-zA-Z0-9_]*'
52 # Identifiers for Arguments, Fields, Formats and Patterns.
53 re_arg_ident
= '&[a-zA-Z0-9_]*'
54 re_fld_ident
= '%[a-zA-Z0-9_]*'
55 re_fmt_ident
= '@[a-zA-Z0-9_]*'
56 re_pat_ident
= '[a-zA-Z0-9_]*'
58 # Local implementation of a topological sort. We use the same API that
59 # the Python graphlib does, so that when QEMU moves forward to a
60 # baseline of Python 3.9 or newer this code can all be dropped and
62 # from graphlib import TopologicalSorter, CycleError
64 # https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter
66 # We only implement the parts of TopologicalSorter we care about:
67 # ts = TopologicalSorter(graph=None)
68 # create the sorter. graph is a dictionary whose keys are
69 # nodes and whose values are lists of the predecessors of that node.
70 # (That is, if graph contains "A" -> ["B", "C"] then we must output
73 # returns a list of all the nodes in sorted order, or raises CycleError
75 # exception raised if there are cycles in the graph. The second
76 # element in the args attribute is a list of nodes which form a
77 # cycle; the first and last element are the same, eg [a, b, c, a]
78 # (Our implementation doesn't give the order correctly.)
80 # For our purposes we can assume that the data set is always small
81 # (typically 10 nodes or less, actual links in the graph very rare),
82 # so we don't need to worry about efficiency of implementation.
84 # The core of this implementation is from
85 # https://code.activestate.com/recipes/578272-topological-sort/
86 # (but updated to Python 3), and is under the MIT license.
88 class CycleError(ValueError):
89 """Subclass of ValueError raised if cycles exist in the graph"""
92 class TopologicalSorter
:
93 """Topologically sort a graph"""
94 def __init__(self
, graph
=None):
97 def static_order(self
):
98 # We do the sort right here, unlike the stdlib version
99 from functools
import reduce
106 # This code wants the values in the dict to be specifically sets
107 for k
, v
in self
.graph
.items():
110 # Find all items that don't depend on anything.
111 extra_items_in_deps
= (reduce(set.union
, data
.values())
113 # Add empty dependencies where needed
114 data
.update({item
:{} for item
in extra_items_in_deps
})
116 ordered
= set(item
for item
, dep
in data
.items() if not dep
)
120 data
= {item
: (dep
- ordered
)
121 for item
, dep
in data
.items()
122 if item
not in ordered
}
124 # This doesn't give as nice results as the stdlib, which
125 # gives you the cycle by listing the nodes in order. Here
126 # we only know the nodes in the cycle but not their order.
127 raise CycleError(f
'nodes are in a cycle', list(data
.keys()))
130 # end TopologicalSorter
132 def error_with_file(file, lineno
, *args
):
133 """Print an error message from file:line and args and exit."""
141 prefix
+= f
'{lineno}:'
144 print(prefix
, end
='error: ', file=sys
.stderr
)
145 print(*args
, file=sys
.stderr
)
147 if output_file
and output_fd
:
149 os
.remove(output_file
)
150 exit(0 if testforerror
else 1)
151 # end error_with_file
154 def error(lineno
, *args
):
155 error_with_file(input_file
, lineno
, *args
)
165 def output_autogen():
166 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
170 """Return a string with C spaces"""
174 def str_fields(fields
):
175 """Return a string uniquely identifying FIELDS"""
177 for n
in sorted(fields
.keys()):
183 """Return a hex string for val padded for insnwidth"""
185 return f
'0x{val:0{insnwidth // 4}x}'
189 """Return a hex string for val padded for insnwidth,
190 and with the proper suffix for a C constant."""
192 if val
>= 0x100000000:
194 elif val
>= 0x80000000:
196 return whex(val
) + suffix
199 def str_match_bits(bits
, mask
):
200 """Return a string pretty-printing BITS/MASK"""
203 i
= 1 << (insnwidth
- 1)
221 """Return true iff X is equal to a power of 2."""
222 return (x
& (x
- 1)) == 0
226 """Return the number of times 2 factors into X."""
229 while ((x
>> r
) & 1) == 0:
234 def is_contiguous(bits
):
238 if is_pow2((bits
>> shift
) + 1):
244 def eq_fields_for_args(flds_a
, arg
):
245 if len(flds_a
) != len(arg
.fields
):
247 # Only allow inference on default types
251 for k
, a
in flds_a
.items():
252 if k
not in arg
.fields
:
257 def eq_fields_for_fmts(flds_a
, flds_b
):
258 if len(flds_a
) != len(flds_b
):
260 for k
, a
in flds_a
.items():
264 if a
.__class
__ != b
.__class
__ or a
!= b
:
270 """Class representing a simple instruction field"""
271 def __init__(self
, sign
, pos
, len):
275 self
.mask
= ((1 << len) - 1) << pos
282 return str(self
.pos
) + ':' + s
+ str(self
.len)
284 def str_extract(self
, lvalue_formatter
):
286 s
= 's' if self
.sign
else ''
287 return f
'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
289 def referenced_fields(self
):
292 def __eq__(self
, other
):
293 return self
.sign
== other
.sign
and self
.mask
== other
.mask
295 def __ne__(self
, other
):
296 return not self
.__eq
__(other
)
301 """Class representing a compound instruction field"""
302 def __init__(self
, subs
, mask
):
304 self
.sign
= subs
[0].sign
308 return str(self
.subs
)
310 def str_extract(self
, lvalue_formatter
):
314 for f
in reversed(self
.subs
):
315 ext
= f
.str_extract(lvalue_formatter
)
319 ret
= f
'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
323 def referenced_fields(self
):
326 l
.extend(f
.referenced_fields())
329 def __ne__(self
, other
):
330 if len(self
.subs
) != len(other
.subs
):
332 for a
, b
in zip(self
.subs
, other
.subs
):
333 if a
.__class
__ != b
.__class
__ or a
!= b
:
337 def __eq__(self
, other
):
338 return not self
.__ne
__(other
)
343 """Class representing an argument field with constant value"""
344 def __init__(self
, value
):
347 self
.sign
= value
< 0
350 return str(self
.value
)
352 def str_extract(self
, lvalue_formatter
):
353 return str(self
.value
)
355 def referenced_fields(self
):
358 def __cmp__(self
, other
):
359 return self
.value
- other
.value
364 """Class representing a field passed through a function"""
365 def __init__(self
, func
, base
):
366 self
.mask
= base
.mask
367 self
.sign
= base
.sign
372 return self
.func
+ '(' + str(self
.base
) + ')'
374 def str_extract(self
, lvalue_formatter
):
375 return (self
.func
+ '(ctx, '
376 + self
.base
.str_extract(lvalue_formatter
) + ')')
378 def referenced_fields(self
):
379 return self
.base
.referenced_fields()
381 def __eq__(self
, other
):
382 return self
.func
== other
.func
and self
.base
== other
.base
384 def __ne__(self
, other
):
385 return not self
.__eq
__(other
)
389 class ParameterField
:
390 """Class representing a pseudo-field read from a function"""
391 def __init__(self
, func
):
399 def str_extract(self
, lvalue_formatter
):
400 return self
.func
+ '(ctx)'
402 def referenced_fields(self
):
405 def __eq__(self
, other
):
406 return self
.func
== other
.func
408 def __ne__(self
, other
):
409 return not self
.__eq
__(other
)
413 """Class representing a field already named in the pattern"""
414 def __init__(self
, name
, sign
, len):
423 def str_extract(self
, lvalue_formatter
):
425 s
= 's' if self
.sign
else ''
426 lvalue
= lvalue_formatter(self
.name
)
427 return f
'{s}extract{bitop_width}({lvalue}, 0, {self.len})'
429 def referenced_fields(self
):
432 def __eq__(self
, other
):
433 return self
.name
== other
.name
435 def __ne__(self
, other
):
436 return not self
.__eq
__(other
)
440 """Class representing the extracted fields of a format"""
441 def __init__(self
, nm
, flds
, types
, extern
):
448 return self
.name
+ ' ' + str(self
.fields
)
450 def struct_name(self
):
451 return 'arg_' + self
.name
453 def output_def(self
):
455 output('typedef struct {\n')
456 for (n
, t
) in zip(self
.fields
, self
.types
):
457 output(f
' {t} {n};\n')
458 output('} ', self
.struct_name(), ';\n\n')
462 """Common code between instruction formats and instruction patterns"""
463 def __init__(self
, name
, lineno
, base
, fixb
, fixm
, udfm
, fldm
, flds
, w
):
465 self
.file = input_file
468 self
.fixedbits
= fixb
469 self
.fixedmask
= fixm
470 self
.undefmask
= udfm
471 self
.fieldmask
= fldm
477 return self
.name
+ ' ' + str_match_bits(self
.fixedbits
, self
.fixedmask
)
480 return str_indent(i
) + self
.__str
__()
482 def dangling_references(self
):
483 # Return a list of all named references which aren't satisfied
484 # directly by this format/pattern. This will be either:
485 # * a format referring to a field which is specified by the
486 # pattern(s) using it
487 # * a pattern referring to a field which is specified by the
489 # * a user error (referring to a field that doesn't exist at all)
490 if self
.dangling
is None:
491 # Compute this once and cache the answer
493 for n
, f
in self
.fields
.items():
494 for r
in f
.referenced_fields():
495 if r
not in self
.fields
:
497 self
.dangling
= dangling
500 def output_fields(self
, indent
, lvalue_formatter
):
501 # We use a topological sort to ensure that any use of NamedField
502 # comes after the initialization of the field it is referencing.
504 for n
, f
in self
.fields
.items():
505 refs
= f
.referenced_fields()
509 ts
= TopologicalSorter(graph
)
510 for n
in ts
.static_order():
511 # We only want to emit assignments for the keys
512 # in our fields list, not for anything that ends up
513 # in the tsort graph only because it was referenced as
517 output(indent
, lvalue_formatter(n
), ' = ',
518 f
.str_extract(lvalue_formatter
), ';\n')
521 except CycleError
as e
:
522 # The second element of args is a list of nodes which form
523 # a cycle (there might be others too, but only one is reported).
524 # Pretty-print it to tell the user.
525 cycle
= ' => '.join(e
.args
[1])
526 error(self
.lineno
, 'field definitions form a cycle: ' + cycle
)
530 class Format(General
):
531 """Class representing an instruction format"""
533 def extract_name(self
):
534 global decode_function
535 return decode_function
+ '_extract_' + self
.name
537 def output_extract(self
):
538 output('static void ', self
.extract_name(), '(DisasContext *ctx, ',
539 self
.base
.struct_name(), ' *a, ', insntype
, ' insn)\n{\n')
540 self
.output_fields(str_indent(4), lambda n
: 'a->' + n
)
545 class Pattern(General
):
546 """Class representing an instruction pattern"""
548 def output_decl(self
):
549 global translate_scope
550 global translate_prefix
551 output('typedef ', self
.base
.base
.struct_name(),
552 ' arg_', self
.name
, ';\n')
553 output(translate_scope
, 'bool ', translate_prefix
, '_', self
.name
,
554 '(DisasContext *ctx, arg_', self
.name
, ' *a);\n')
556 def output_code(self
, i
, extracted
, outerbits
, outermask
):
557 global translate_prefix
559 arg
= self
.base
.base
.name
560 output(ind
, '/* ', self
.file, ':', str(self
.lineno
), ' */\n')
561 # We might have named references in the format that refer to fields
562 # in the pattern, or named references in the pattern that refer
563 # to fields in the format. This affects whether we extract the fields
564 # for the format before or after the ones for the pattern.
565 # For simplicity we don't allow cross references in both directions.
566 # This is also where we catch the syntax error of referring to
567 # a nonexistent field.
568 fmt_refs
= self
.base
.dangling_references()
570 if r
not in self
.fields
:
571 error(self
.lineno
, f
'format refers to undefined field {r}')
572 pat_refs
= self
.dangling_references()
574 if r
not in self
.base
.fields
:
575 error(self
.lineno
, f
'pattern refers to undefined field {r}')
576 if pat_refs
and fmt_refs
:
577 error(self
.lineno
, ('pattern that uses fields defined in format '
578 'cannot use format that uses fields defined '
581 # pattern fields first
582 self
.output_fields(ind
, lambda n
: 'u.f_' + arg
+ '.' + n
)
583 assert not extracted
, "dangling fmt refs but it was already extracted"
585 output(ind
, self
.base
.extract_name(),
586 '(ctx, &u.f_', arg
, ', insn);\n')
588 # pattern fields last
589 self
.output_fields(ind
, lambda n
: 'u.f_' + arg
+ '.' + n
)
591 output(ind
, 'if (', translate_prefix
, '_', self
.name
,
592 '(ctx, &u.f_', arg
, ')) return true;\n')
594 # Normal patterns do not have children.
595 def build_tree(self
):
597 def prop_masks(self
):
599 def prop_format(self
):
601 def prop_width(self
):
607 class MultiPattern(General
):
608 """Class representing a set of instruction patterns"""
610 def __init__(self
, lineno
):
611 self
.file = input_file
622 if self
.fixedbits
is not None:
623 r
+= ' ' + str_match_bits(self
.fixedbits
, self
.fixedmask
)
626 def output_decl(self
):
630 def prop_masks(self
):
636 # Collect fixedmask/undefmask for all of the children.
639 fixedmask
&= p
.fixedmask
640 undefmask
&= p
.undefmask
642 # Widen fixedmask until all fixedbits match
645 while repeat
and fixedmask
!= 0:
648 thisbits
= p
.fixedbits
& fixedmask
649 if fixedbits
is None:
651 elif fixedbits
!= thisbits
:
652 fixedmask
&= ~
(fixedbits ^ thisbits
)
657 self
.fixedbits
= fixedbits
658 self
.fixedmask
= fixedmask
659 self
.undefmask
= undefmask
661 def build_tree(self
):
665 def prop_format(self
):
669 def prop_width(self
):
675 elif width
!= p
.width
:
676 error_with_file(self
.file, self
.lineno
,
677 'width mismatch in patterns within braces')
683 class IncMultiPattern(MultiPattern
):
684 """Class representing an overlapping set of instruction patterns"""
686 def output_code(self
, i
, extracted
, outerbits
, outermask
):
687 global translate_prefix
690 if outermask
!= p
.fixedmask
:
691 innermask
= p
.fixedmask
& ~outermask
692 innerbits
= p
.fixedbits
& ~outermask
693 output(ind
, f
'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
694 output(ind
, f
' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
695 p
.output_code(i
+ 4, extracted
, p
.fixedbits
, p
.fixedmask
)
698 p
.output_code(i
, extracted
, p
.fixedbits
, p
.fixedmask
)
700 def build_tree(self
):
702 error_with_file(self
.file, self
.lineno
, 'empty pattern group')
709 """Class representing a node in a decode tree"""
711 def __init__(self
, fm
, tm
):
719 r
= ind
+ whex(self
.fixedmask
)
721 r
+= ' ' + self
.format
.name
723 for (b
, s
) in self
.subs
:
724 r
+= ind
+ f
' {whex(b)}:\n'
725 r
+= s
.str1(i
+ 4) + '\n'
732 def output_code(self
, i
, extracted
, outerbits
, outermask
):
735 # If we identified all nodes below have the same format,
736 # extract the fields now. But don't do it if the format relies
737 # on named fields from the insn pattern, as those won't have
738 # been initialised at this point.
739 if not extracted
and self
.base
and not self
.base
.dangling_references():
740 output(ind
, self
.base
.extract_name(),
741 '(ctx, &u.f_', self
.base
.base
.name
, ', insn);\n')
744 # Attempt to aid the compiler in producing compact switch statements.
745 # If the bits in the mask are contiguous, extract them.
746 sh
= is_contiguous(self
.thismask
)
748 # Propagate SH down into the local functions.
749 def str_switch(b
, sh
=sh
):
750 return f
'(insn >> {sh}) & {b >> sh:#x}'
752 def str_case(b
, sh
=sh
):
756 return f
'insn & {whexC(b)}'
761 output(ind
, 'switch (', str_switch(self
.thismask
), ') {\n')
762 for b
, s
in sorted(self
.subs
):
763 assert (self
.thismask
& ~s
.fixedmask
) == 0
764 innermask
= outermask | self
.thismask
765 innerbits
= outerbits | b
766 output(ind
, 'case ', str_case(b
), ':\n')
768 str_match_bits(innerbits
, innermask
), ' */\n')
769 s
.output_code(i
+ 4, extracted
, innerbits
, innermask
)
770 output(ind
, ' break;\n')
775 class ExcMultiPattern(MultiPattern
):
776 """Class representing a non-overlapping set of instruction patterns"""
778 def output_code(self
, i
, extracted
, outerbits
, outermask
):
779 # Defer everything to our decomposed Tree node
780 self
.tree
.output_code(i
, extracted
, outerbits
, outermask
)
783 def __build_tree(pats
, outerbits
, outermask
):
784 # Find the intersection of all remaining fixedmask.
785 innermask
= ~outermask
& insnmask
787 innermask
&= i
.fixedmask
790 # Edge condition: One pattern covers the entire insnmask
792 t
= Tree(outermask
, innermask
)
793 t
.subs
.append((0, pats
[0]))
796 text
= 'overlapping patterns:'
798 text
+= '\n' + p
.file + ':' + str(p
.lineno
) + ': ' + str(p
)
799 error_with_file(pats
[0].file, pats
[0].lineno
, text
)
801 fullmask
= outermask | innermask
803 # Sort each element of pats into the bin selected by the mask.
806 fb
= i
.fixedbits
& innermask
812 # We must recurse if any bin has more than one element or if
813 # the single element in the bin has not been fully matched.
814 t
= Tree(fullmask
, innermask
)
816 for b
, l
in bins
.items():
818 if len(l
) > 1 or s
.fixedmask
& ~fullmask
!= 0:
819 s
= ExcMultiPattern
.__build
_tree
(l
, b | outerbits
, fullmask
)
820 t
.subs
.append((b
, s
))
824 def build_tree(self
):
826 self
.tree
= self
.__build
_tree
(self
.pats
, self
.fixedbits
,
830 def __prop_format(tree
):
831 """Propagate Format objects into the decode tree"""
833 # Depth first search.
834 for (b
, s
) in tree
.subs
:
835 if isinstance(s
, Tree
):
836 ExcMultiPattern
.__prop
_format
(s
)
838 # If all entries in SUBS have the same format, then
839 # propagate that into the tree.
841 for (b
, s
) in tree
.subs
:
850 def prop_format(self
):
851 super().prop_format()
852 self
.__prop
_format
(self
.tree
)
854 # end ExcMultiPattern
857 def parse_field(lineno
, name
, toks
):
858 """Parse one instruction field from TOKS at LINENO"""
863 # A "simple" field will have only one entry;
864 # a "multifield" will have several.
869 if re
.match('^!function=', t
):
871 error(lineno
, 'duplicate function')
876 if re
.fullmatch(re_C_ident
+ ':s[0-9]+', t
):
878 subtoks
= t
.split(':')
881 f
= NamedField(n
, True, le
)
885 if re
.fullmatch(re_C_ident
+ ':[0-9]+', t
):
886 # Unsigned named field
887 subtoks
= t
.split(':')
890 f
= NamedField(n
, False, le
)
895 if re
.fullmatch('[0-9]+:s[0-9]+', t
):
896 # Signed field extract
897 subtoks
= t
.split(':s')
899 elif re
.fullmatch('[0-9]+:[0-9]+', t
):
900 # Unsigned field extract
901 subtoks
= t
.split(':')
904 error(lineno
, f
'invalid field token "{t}"')
907 if po
+ le
> insnwidth
:
908 error(lineno
, f
'field {t} too large')
909 f
= Field(sign
, po
, le
)
913 if width
> insnwidth
:
914 error(lineno
, 'field too large')
917 f
= ParameterField(func
)
919 error(lineno
, 'field with no value')
927 error(lineno
, 'field components overlap')
929 f
= MultiField(subs
, mask
)
931 f
= FunctionField(func
, f
)
934 error(lineno
, 'duplicate field', name
)
939 def parse_arguments(lineno
, name
, toks
):
940 """Parse one argument set from TOKS at LINENO"""
949 if re
.fullmatch('!extern', n
):
953 if re
.fullmatch(re_C_ident
+ ':' + re_C_ident
, n
):
954 (n
, t
) = n
.split(':')
955 elif re
.fullmatch(re_C_ident
, n
):
958 error(lineno
, f
'invalid argument set token "{n}"')
960 error(lineno
, f
'duplicate argument "{n}"')
964 if name
in arguments
:
965 error(lineno
, 'duplicate argument set', name
)
966 arguments
[name
] = Arguments(name
, flds
, types
, extern
)
967 # end parse_arguments
970 def lookup_field(lineno
, name
):
974 error(lineno
, 'undefined field', name
)
977 def add_field(lineno
, flds
, new_name
, f
):
979 error(lineno
, 'duplicate field', new_name
)
984 def add_field_byname(lineno
, flds
, new_name
, old_name
):
985 return add_field(lineno
, flds
, new_name
, lookup_field(lineno
, old_name
))
988 def infer_argument_set(flds
):
990 global decode_function
992 for arg
in arguments
.values():
993 if eq_fields_for_args(flds
, arg
):
996 name
= decode_function
+ str(len(arguments
))
997 arg
= Arguments(name
, flds
.keys(), ['int'] * len(flds
), False)
998 arguments
[name
] = arg
1002 def infer_format(arg
, fieldmask
, flds
, width
):
1005 global decode_function
1009 for n
, c
in flds
.items():
1015 # Look for an existing format with the same argument set and fields
1016 for fmt
in formats
.values():
1017 if arg
and fmt
.base
!= arg
:
1019 if fieldmask
!= fmt
.fieldmask
:
1021 if width
!= fmt
.width
:
1023 if not eq_fields_for_fmts(flds
, fmt
.fields
):
1025 return (fmt
, const_flds
)
1027 name
= decode_function
+ '_Fmt_' + str(len(formats
))
1029 arg
= infer_argument_set(flds
)
1031 fmt
= Format(name
, 0, arg
, 0, 0, 0, fieldmask
, var_flds
, width
)
1034 return (fmt
, const_flds
)
1038 def parse_generic(lineno
, parent_pat
, name
, toks
):
1039 """Parse one instruction format from TOKS at LINENO"""
1050 global variablewidth
1052 is_format
= parent_pat
is None
1062 # '&Foo' gives a format an explicit argument set.
1063 if re
.fullmatch(re_arg_ident
, t
):
1066 error(lineno
, 'multiple argument sets')
1070 error(lineno
, 'undefined argument set', t
)
1073 # '@Foo' gives a pattern an explicit format.
1074 if re
.fullmatch(re_fmt_ident
, t
):
1077 error(lineno
, 'multiple formats')
1081 error(lineno
, 'undefined format', t
)
1084 # '%Foo' imports a field.
1085 if re
.fullmatch(re_fld_ident
, t
):
1087 flds
= add_field_byname(lineno
, flds
, tt
, tt
)
1090 # 'Foo=%Bar' imports a field with a different name.
1091 if re
.fullmatch(re_C_ident
+ '=' + re_fld_ident
, t
):
1092 (fname
, iname
) = t
.split('=%')
1093 flds
= add_field_byname(lineno
, flds
, fname
, iname
)
1096 # 'Foo=number' sets an argument field to a constant value
1097 if re
.fullmatch(re_C_ident
+ '=[+-]?[0-9]+', t
):
1098 (fname
, value
) = t
.split('=')
1100 flds
= add_field(lineno
, flds
, fname
, ConstField(value
))
1103 # Pattern of 0s, 1s, dots and dashes indicate required zeros,
1104 # required ones, or dont-cares.
1105 if re
.fullmatch('[01.-]+', t
):
1107 fms
= t
.replace('0', '1')
1108 fms
= fms
.replace('.', '0')
1109 fms
= fms
.replace('-', '0')
1110 fbs
= t
.replace('.', '0')
1111 fbs
= fbs
.replace('-', '0')
1112 ubm
= t
.replace('1', '0')
1113 ubm
= ubm
.replace('.', '0')
1114 ubm
= ubm
.replace('-', '1')
1118 fixedbits
= (fixedbits
<< shift
) | fbs
1119 fixedmask
= (fixedmask
<< shift
) | fms
1120 undefmask
= (undefmask
<< shift
) | ubm
1121 # Otherwise, fieldname:fieldwidth
1122 elif re
.fullmatch(re_C_ident
+ ':s?[0-9]+', t
):
1123 (fname
, flen
) = t
.split(':')
1128 shift
= int(flen
, 10)
1129 if shift
+ width
> insnwidth
:
1130 error(lineno
, f
'field {fname} exceeds insnwidth')
1131 f
= Field(sign
, insnwidth
- width
- shift
, shift
)
1132 flds
= add_field(lineno
, flds
, fname
, f
)
1137 error(lineno
, f
'invalid token "{t}"')
1140 if variablewidth
and width
< insnwidth
and width
% 8 == 0:
1141 shift
= insnwidth
- width
1145 undefmask |
= (1 << shift
) - 1
1147 # We should have filled in all of the bits of the instruction.
1148 elif not (is_format
and width
== 0) and width
!= insnwidth
:
1149 error(lineno
, f
'definition has {width} bits')
1151 # Do not check for fields overlapping fields; one valid usage
1152 # is to be able to duplicate fields via import.
1154 for f
in flds
.values():
1157 # Fix up what we've parsed to match either a format or a pattern.
1159 # Formats cannot reference formats.
1161 error(lineno
, 'format referencing format')
1162 # If an argument set is given, then there should be no fields
1163 # without a place to store it.
1165 for f
in flds
.keys():
1166 if f
not in arg
.fields
:
1167 error(lineno
, f
'field {f} not in argument set {arg.name}')
1169 arg
= infer_argument_set(flds
)
1171 error(lineno
, 'duplicate format name', name
)
1172 fmt
= Format(name
, lineno
, arg
, fixedbits
, fixedmask
,
1173 undefmask
, fieldmask
, flds
, width
)
1176 # Patterns can reference a format ...
1178 # ... but not an argument simultaneously
1180 error(lineno
, 'pattern specifies both format and argument set')
1181 if fixedmask
& fmt
.fixedmask
:
1182 error(lineno
, 'pattern fixed bits overlap format fixed bits')
1183 if width
!= fmt
.width
:
1184 error(lineno
, 'pattern uses format of different width')
1185 fieldmask |
= fmt
.fieldmask
1186 fixedbits |
= fmt
.fixedbits
1187 fixedmask |
= fmt
.fixedmask
1188 undefmask |
= fmt
.undefmask
1190 (fmt
, flds
) = infer_format(arg
, fieldmask
, flds
, width
)
1192 for f
in flds
.keys():
1193 if f
not in arg
.fields
:
1194 error(lineno
, f
'field {f} not in argument set {arg.name}')
1195 if f
in fmt
.fields
.keys():
1196 error(lineno
, f
'field {f} set by format and pattern')
1197 for f
in arg
.fields
:
1198 if f
not in flds
.keys() and f
not in fmt
.fields
.keys():
1199 error(lineno
, f
'field {f} not initialized')
1200 pat
= Pattern(name
, lineno
, fmt
, fixedbits
, fixedmask
,
1201 undefmask
, fieldmask
, flds
, width
)
1202 parent_pat
.pats
.append(pat
)
1203 allpatterns
.append(pat
)
1205 # Validate the masks that we have assembled.
1206 if fieldmask
& fixedmask
:
1207 error(lineno
, 'fieldmask overlaps fixedmask ',
1208 f
'({whex(fieldmask)} & {whex(fixedmask)})')
1209 if fieldmask
& undefmask
:
1210 error(lineno
, 'fieldmask overlaps undefmask ',
1211 f
'({whex(fieldmask)} & {whex(undefmask)})')
1212 if fixedmask
& undefmask
:
1213 error(lineno
, 'fixedmask overlaps undefmask ',
1214 f
'({whex(fixedmask)} & {whex(undefmask)})')
1216 allbits
= fieldmask | fixedmask | undefmask
1217 if allbits
!= insnmask
:
1218 error(lineno
, 'bits left unspecified ',
1219 f
'({whex(allbits ^ insnmask)})')
1223 def parse_file(f
, parent_pat
):
1224 """Parse all of the patterns within a file"""
1230 # Read all of the lines of the file. Concatenate lines
1231 # ending in backslash; discard empty lines and comments.
1240 # Expand and strip spaces, to find indent.
1241 line
= line
.rstrip()
1242 line
= line
.expandtabs()
1244 line
= line
.lstrip()
1248 end
= line
.find('#')
1254 # Next line after continuation
1257 # Allow completely blank lines.
1260 indent
= len1
- len2
1261 # Empty line due to comment.
1263 # Indentation must be correct, even for comment lines.
1264 if indent
!= nesting
:
1265 error(lineno
, 'indentation ', indent
, ' != ', nesting
)
1267 start_lineno
= lineno
1271 if toks
[-1] == '\\':
1279 if name
== '}' or name
== ']':
1281 error(start_lineno
, 'extra tokens after close brace')
1283 # Make sure { } and [ ] nest properly.
1284 if (name
== '}') != isinstance(parent_pat
, IncMultiPattern
):
1285 error(lineno
, 'mismatched close brace')
1288 parent_pat
= nesting_pats
.pop()
1290 error(lineno
, 'extra close brace')
1293 if indent
!= nesting
:
1294 error(lineno
, 'indentation ', indent
, ' != ', nesting
)
1299 # Everything else should have current indentation.
1300 if indent
!= nesting
:
1301 error(start_lineno
, 'indentation ', indent
, ' != ', nesting
)
1304 if name
== '{' or name
== '[':
1306 error(start_lineno
, 'extra tokens after open brace')
1309 nested_pat
= IncMultiPattern(start_lineno
)
1311 nested_pat
= ExcMultiPattern(start_lineno
)
1312 parent_pat
.pats
.append(nested_pat
)
1313 nesting_pats
.append(parent_pat
)
1314 parent_pat
= nested_pat
1320 # Determine the type of object needing to be parsed.
1321 if re
.fullmatch(re_fld_ident
, name
):
1322 parse_field(start_lineno
, name
[1:], toks
)
1323 elif re
.fullmatch(re_arg_ident
, name
):
1324 parse_arguments(start_lineno
, name
[1:], toks
)
1325 elif re
.fullmatch(re_fmt_ident
, name
):
1326 parse_generic(start_lineno
, None, name
[1:], toks
)
1327 elif re
.fullmatch(re_pat_ident
, name
):
1328 parse_generic(start_lineno
, parent_pat
, name
, toks
)
1330 error(lineno
, f
'invalid token "{name}"')
1334 error(lineno
, 'missing close brace')
1339 """Class representing a node in a size decode tree"""
1341 def __init__(self
, m
, w
):
1349 r
= ind
+ whex(self
.mask
) + ' [\n'
1350 for (b
, s
) in self
.subs
:
1351 r
+= ind
+ f
' {whex(b)}:\n'
1352 r
+= s
.str1(i
+ 4) + '\n'
1359 def output_code(self
, i
, extracted
, outerbits
, outermask
):
1362 # If we need to load more bytes to test, do so now.
1363 if extracted
< self
.width
:
1364 output(ind
, f
'insn = {decode_function}_load_bytes',
1365 f
'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
1366 extracted
= self
.width
1368 # Attempt to aid the compiler in producing compact switch statements.
1369 # If the bits in the mask are contiguous, extract them.
1370 sh
= is_contiguous(self
.mask
)
1372 # Propagate SH down into the local functions.
1373 def str_switch(b
, sh
=sh
):
1374 return f
'(insn >> {sh}) & {b >> sh:#x}'
1376 def str_case(b
, sh
=sh
):
1380 return f
'insn & {whexC(b)}'
1385 output(ind
, 'switch (', str_switch(self
.mask
), ') {\n')
1386 for b
, s
in sorted(self
.subs
):
1387 innermask
= outermask | self
.mask
1388 innerbits
= outerbits | b
1389 output(ind
, 'case ', str_case(b
), ':\n')
1391 str_match_bits(innerbits
, innermask
), ' */\n')
1392 s
.output_code(i
+ 4, extracted
, innerbits
, innermask
)
1394 output(ind
, 'return insn;\n')
1398 """Class representing a leaf node in a size decode tree"""
1400 def __init__(self
, m
, w
):
1405 return str_indent(i
) + whex(self
.mask
)
1410 def output_code(self
, i
, extracted
, outerbits
, outermask
):
1411 global decode_function
1414 # If we need to load more bytes, do so now.
1415 if extracted
< self
.width
:
1416 output(ind
, f
'insn = {decode_function}_load_bytes',
1417 f
'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
1418 extracted
= self
.width
1419 output(ind
, 'return insn;\n')
1423 def build_size_tree(pats
, width
, outerbits
, outermask
):
1426 # Collect the mask of bits that are fixed in this width
1427 innermask
= 0xff << (insnwidth
- width
)
1428 innermask
&= ~outermask
1432 innermask
&= i
.fixedmask
1433 if minwidth
is None:
1435 elif minwidth
!= i
.width
:
1437 if minwidth
< i
.width
:
1441 return SizeLeaf(innermask
, minwidth
)
1444 if width
< minwidth
:
1445 return build_size_tree(pats
, width
+ 8, outerbits
, outermask
)
1449 pnames
.append(p
.name
+ ':' + p
.file + ':' + str(p
.lineno
))
1450 error_with_file(pats
[0].file, pats
[0].lineno
,
1451 f
'overlapping patterns size {width}:', pnames
)
1455 fb
= i
.fixedbits
& innermask
1461 fullmask
= outermask | innermask
1462 lens
= sorted(bins
.keys())
1465 return build_size_tree(bins
[b
], width
+ 8, b | outerbits
, fullmask
)
1467 r
= SizeTree(innermask
, width
)
1468 for b
, l
in bins
.items():
1469 s
= build_size_tree(l
, width
, b | outerbits
, fullmask
)
1470 r
.subs
.append((b
, s
))
1472 # end build_size_tree
1475 def prop_size(tree
):
1476 """Propagate minimum widths up the decode size tree"""
1478 if isinstance(tree
, SizeTree
):
1480 for (b
, s
) in tree
.subs
:
1481 width
= prop_size(s
)
1482 if min is None or min > width
:
1484 assert min >= tree
.width
1496 global translate_scope
1497 global translate_prefix
1505 global decode_function
1507 global variablewidth
1511 decode_scope
= 'static '
1513 long_opts
= ['decode=', 'translate=', 'output=', 'insnwidth=',
1514 'static-decode=', 'varinsnwidth=', 'test-for-error',
1517 (opts
, args
) = getopt
.gnu_getopt(sys
.argv
[1:], 'o:vw:', long_opts
)
1518 except getopt
.GetoptError
as err
:
1521 if o
in ('-o', '--output'):
1523 elif o
== '--decode':
1526 elif o
== '--static-decode':
1528 elif o
== '--translate':
1529 translate_prefix
= a
1530 translate_scope
= ''
1531 elif o
in ('-w', '--insnwidth', '--varinsnwidth'):
1532 if o
== '--varinsnwidth':
1533 variablewidth
= True
1536 insntype
= 'uint16_t'
1538 elif insnwidth
== 64:
1539 insntype
= 'uint64_t'
1540 insnmask
= 0xffffffffffffffff
1542 elif insnwidth
!= 32:
1543 error(0, 'cannot handle insns of width', insnwidth
)
1544 elif o
== '--test-for-error':
1546 elif o
== '--output-null':
1549 assert False, 'unhandled option'
1552 error(0, 'missing input file')
1554 toppat
= ExcMultiPattern(0)
1556 for filename
in args
:
1557 input_file
= filename
1558 f
= open(filename
, 'rt', encoding
='utf-8')
1559 parse_file(f
, toppat
)
1562 # We do not want to compute masks for toppat, because those masks
1563 # are used as a starting point for build_tree. For toppat, we must
1564 # insist that decode begins from naught.
1565 for i
in toppat
.pats
:
1569 toppat
.prop_format()
1572 for i
in toppat
.pats
:
1574 stree
= build_size_tree(toppat
.pats
, 8, 0, 0)
1578 output_fd
= open(os
.devnull
, 'wt', encoding
='utf-8', errors
="ignore")
1580 output_fd
= open(output_file
, 'wt', encoding
='utf-8')
1582 output_fd
= io
.TextIOWrapper(sys
.stdout
.buffer,
1583 encoding
=sys
.stdout
.encoding
,
1587 for n
in sorted(arguments
.keys()):
1591 # A single translate function can be invoked for different patterns.
1592 # Make sure that the argument sets are the same, and declare the
1593 # function only once.
1595 # If we're sharing formats, we're likely also sharing trans_* functions,
1596 # but we can't tell which ones. Prevent issues from the compiler by
1597 # suppressing redundant declaration warnings.
1599 output("#pragma GCC diagnostic push\n",
1600 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1601 "#ifdef __clang__\n"
1602 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
1606 for i
in allpatterns
:
1607 if i
.name
in out_pats
:
1608 p
= out_pats
[i
.name
]
1609 if i
.base
.base
!= p
.base
.base
:
1610 error(0, i
.name
, ' has conflicting argument sets')
1613 out_pats
[i
.name
] = i
1617 output("#pragma GCC diagnostic pop\n\n")
1619 for n
in sorted(formats
.keys()):
1623 output(decode_scope
, 'bool ', decode_function
,
1624 '(DisasContext *ctx, ', insntype
, ' insn)\n{\n')
1628 if len(allpatterns
) != 0:
1629 output(i4
, 'union {\n')
1630 for n
in sorted(arguments
.keys()):
1632 output(i4
, i4
, f
.struct_name(), ' f_', f
.name
, ';\n')
1633 output(i4
, '} u;\n\n')
1634 toppat
.output_code(4, False, 0, 0)
1636 output(i4
, 'return false;\n')
1640 output('\n', decode_scope
, insntype
, ' ', decode_function
,
1641 '_load(DisasContext *ctx)\n{\n',
1642 ' ', insntype
, ' insn = 0;\n\n')
1643 stree
.output_code(4, 0, 0, 0)
1648 exit(1 if testforerror
else 0)
1652 if __name__
== '__main__':