2 # Copyright (c) 2018 Linaro Limited
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation; either
7 # version 2.1 of the License, or (at your option) any later version.
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # Lesser General Public License for more details.
14 # You should have received a copy of the GNU Lesser General Public
15 # License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 # Generate a decoding tree from a specification file.
20 # See the syntax and semantics in docs/devel/decodetree.rst.
40 translate_prefix
= 'trans'
41 translate_scope
= 'static '
47 decode_function
= 'decode'
49 # An identifier for C.
50 re_C_ident
= '[a-zA-Z][a-zA-Z0-9_]*'
52 # Identifiers for Arguments, Fields, Formats and Patterns.
53 re_arg_ident
= '&[a-zA-Z0-9_]*'
54 re_fld_ident
= '%[a-zA-Z0-9_]*'
55 re_fmt_ident
= '@[a-zA-Z0-9_]*'
56 re_pat_ident
= '[a-zA-Z0-9_]*'
58 # Local implementation of a topological sort. We use the same API that
59 # the Python graphlib does, so that when QEMU moves forward to a
60 # baseline of Python 3.9 or newer this code can all be dropped and
62 # from graphlib import TopologicalSorter, CycleError
64 # https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter
66 # We only implement the parts of TopologicalSorter we care about:
67 # ts = TopologicalSorter(graph=None)
68 # create the sorter. graph is a dictionary whose keys are
69 # nodes and whose values are lists of the predecessors of that node.
70 # (That is, if graph contains "A" -> ["B", "C"] then we must output
73 # returns a list of all the nodes in sorted order, or raises CycleError
75 # exception raised if there are cycles in the graph. The second
76 # element in the args attribute is a list of nodes which form a
77 # cycle; the first and last element are the same, eg [a, b, c, a]
78 # (Our implementation doesn't give the order correctly.)
80 # For our purposes we can assume that the data set is always small
81 # (typically 10 nodes or less, actual links in the graph very rare),
82 # so we don't need to worry about efficiency of implementation.
84 # The core of this implementation is from
85 # https://code.activestate.com/recipes/578272-topological-sort/
86 # (but updated to Python 3), and is under the MIT license.
88 class CycleError(ValueError):
89 """Subclass of ValueError raised if cycles exist in the graph"""
92 class TopologicalSorter
:
93 """Topologically sort a graph"""
94 def __init__(self
, graph
=None):
97 def static_order(self
):
98 # We do the sort right here, unlike the stdlib version
99 from functools
import reduce
106 # This code wants the values in the dict to be specifically sets
107 for k
, v
in self
.graph
.items():
110 # Find all items that don't depend on anything.
111 extra_items_in_deps
= (reduce(set.union
, data
.values())
113 # Add empty dependencies where needed
114 data
.update({item
:{} for item
in extra_items_in_deps
})
116 ordered
= set(item
for item
, dep
in data
.items() if not dep
)
120 data
= {item
: (dep
- ordered
)
121 for item
, dep
in data
.items()
122 if item
not in ordered
}
124 # This doesn't give as nice results as the stdlib, which
125 # gives you the cycle by listing the nodes in order. Here
126 # we only know the nodes in the cycle but not their order.
127 raise CycleError(f
'nodes are in a cycle', list(data
.keys()))
130 # end TopologicalSorter
132 def error_with_file(file, lineno
, *args
):
133 """Print an error message from file:line and args and exit."""
137 # For the test suite expected-errors case, don't print the
138 # string "error: ", so they don't turn up as false positives
139 # if you grep the meson logs for strings like that.
140 end
= 'error: ' if not testforerror
else 'detected: '
145 prefix
+= f
'{lineno}:'
148 print(prefix
, end
=end
, file=sys
.stderr
)
149 print(*args
, file=sys
.stderr
)
151 if output_file
and output_fd
:
153 os
.remove(output_file
)
154 exit(0 if testforerror
else 1)
155 # end error_with_file
158 def error(lineno
, *args
):
159 error_with_file(input_file
, lineno
, *args
)
169 def output_autogen():
170 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
174 """Return a string with C spaces"""
178 def str_fields(fields
):
179 """Return a string uniquely identifying FIELDS"""
181 for n
in sorted(fields
.keys()):
187 """Return a hex string for val padded for insnwidth"""
189 return f
'0x{val:0{insnwidth // 4}x}'
193 """Return a hex string for val padded for insnwidth,
194 and with the proper suffix for a C constant."""
196 if val
>= 0x100000000:
198 elif val
>= 0x80000000:
200 return whex(val
) + suffix
203 def str_match_bits(bits
, mask
):
204 """Return a string pretty-printing BITS/MASK"""
207 i
= 1 << (insnwidth
- 1)
225 """Return true iff X is equal to a power of 2."""
226 return (x
& (x
- 1)) == 0
230 """Return the number of times 2 factors into X."""
233 while ((x
>> r
) & 1) == 0:
238 def is_contiguous(bits
):
242 if is_pow2((bits
>> shift
) + 1):
248 def eq_fields_for_args(flds_a
, arg
):
249 if len(flds_a
) != len(arg
.fields
):
251 # Only allow inference on default types
255 for k
, a
in flds_a
.items():
256 if k
not in arg
.fields
:
261 def eq_fields_for_fmts(flds_a
, flds_b
):
262 if len(flds_a
) != len(flds_b
):
264 for k
, a
in flds_a
.items():
268 if a
.__class
__ != b
.__class
__ or a
!= b
:
274 """Class representing a simple instruction field"""
275 def __init__(self
, sign
, pos
, len):
279 self
.mask
= ((1 << len) - 1) << pos
286 return str(self
.pos
) + ':' + s
+ str(self
.len)
288 def str_extract(self
, lvalue_formatter
):
290 s
= 's' if self
.sign
else ''
291 return f
'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
293 def referenced_fields(self
):
296 def __eq__(self
, other
):
297 return self
.sign
== other
.sign
and self
.mask
== other
.mask
299 def __ne__(self
, other
):
300 return not self
.__eq
__(other
)
305 """Class representing a compound instruction field"""
306 def __init__(self
, subs
, mask
):
308 self
.sign
= subs
[0].sign
312 return str(self
.subs
)
314 def str_extract(self
, lvalue_formatter
):
318 for f
in reversed(self
.subs
):
319 ext
= f
.str_extract(lvalue_formatter
)
323 ret
= f
'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
327 def referenced_fields(self
):
330 l
.extend(f
.referenced_fields())
333 def __ne__(self
, other
):
334 if len(self
.subs
) != len(other
.subs
):
336 for a
, b
in zip(self
.subs
, other
.subs
):
337 if a
.__class
__ != b
.__class
__ or a
!= b
:
341 def __eq__(self
, other
):
342 return not self
.__ne
__(other
)
347 """Class representing an argument field with constant value"""
348 def __init__(self
, value
):
351 self
.sign
= value
< 0
354 return str(self
.value
)
356 def str_extract(self
, lvalue_formatter
):
357 return str(self
.value
)
359 def referenced_fields(self
):
362 def __cmp__(self
, other
):
363 return self
.value
- other
.value
368 """Class representing a field passed through a function"""
369 def __init__(self
, func
, base
):
370 self
.mask
= base
.mask
371 self
.sign
= base
.sign
376 return self
.func
+ '(' + str(self
.base
) + ')'
378 def str_extract(self
, lvalue_formatter
):
379 return (self
.func
+ '(ctx, '
380 + self
.base
.str_extract(lvalue_formatter
) + ')')
382 def referenced_fields(self
):
383 return self
.base
.referenced_fields()
385 def __eq__(self
, other
):
386 return self
.func
== other
.func
and self
.base
== other
.base
388 def __ne__(self
, other
):
389 return not self
.__eq
__(other
)
393 class ParameterField
:
394 """Class representing a pseudo-field read from a function"""
395 def __init__(self
, func
):
403 def str_extract(self
, lvalue_formatter
):
404 return self
.func
+ '(ctx)'
406 def referenced_fields(self
):
409 def __eq__(self
, other
):
410 return self
.func
== other
.func
412 def __ne__(self
, other
):
413 return not self
.__eq
__(other
)
417 """Class representing a field already named in the pattern"""
418 def __init__(self
, name
, sign
, len):
427 def str_extract(self
, lvalue_formatter
):
429 s
= 's' if self
.sign
else ''
430 lvalue
= lvalue_formatter(self
.name
)
431 return f
'{s}extract{bitop_width}({lvalue}, 0, {self.len})'
433 def referenced_fields(self
):
436 def __eq__(self
, other
):
437 return self
.name
== other
.name
439 def __ne__(self
, other
):
440 return not self
.__eq
__(other
)
444 """Class representing the extracted fields of a format"""
445 def __init__(self
, nm
, flds
, types
, extern
):
452 return self
.name
+ ' ' + str(self
.fields
)
454 def struct_name(self
):
455 return 'arg_' + self
.name
457 def output_def(self
):
459 output('typedef struct {\n')
460 for (n
, t
) in zip(self
.fields
, self
.types
):
461 output(f
' {t} {n};\n')
462 output('} ', self
.struct_name(), ';\n\n')
466 """Common code between instruction formats and instruction patterns"""
467 def __init__(self
, name
, lineno
, base
, fixb
, fixm
, udfm
, fldm
, flds
, w
):
469 self
.file = input_file
472 self
.fixedbits
= fixb
473 self
.fixedmask
= fixm
474 self
.undefmask
= udfm
475 self
.fieldmask
= fldm
481 return self
.name
+ ' ' + str_match_bits(self
.fixedbits
, self
.fixedmask
)
484 return str_indent(i
) + self
.__str
__()
486 def dangling_references(self
):
487 # Return a list of all named references which aren't satisfied
488 # directly by this format/pattern. This will be either:
489 # * a format referring to a field which is specified by the
490 # pattern(s) using it
491 # * a pattern referring to a field which is specified by the
493 # * a user error (referring to a field that doesn't exist at all)
494 if self
.dangling
is None:
495 # Compute this once and cache the answer
497 for n
, f
in self
.fields
.items():
498 for r
in f
.referenced_fields():
499 if r
not in self
.fields
:
501 self
.dangling
= dangling
504 def output_fields(self
, indent
, lvalue_formatter
):
505 # We use a topological sort to ensure that any use of NamedField
506 # comes after the initialization of the field it is referencing.
508 for n
, f
in self
.fields
.items():
509 refs
= f
.referenced_fields()
513 ts
= TopologicalSorter(graph
)
514 for n
in ts
.static_order():
515 # We only want to emit assignments for the keys
516 # in our fields list, not for anything that ends up
517 # in the tsort graph only because it was referenced as
521 output(indent
, lvalue_formatter(n
), ' = ',
522 f
.str_extract(lvalue_formatter
), ';\n')
525 except CycleError
as e
:
526 # The second element of args is a list of nodes which form
527 # a cycle (there might be others too, but only one is reported).
528 # Pretty-print it to tell the user.
529 cycle
= ' => '.join(e
.args
[1])
530 error(self
.lineno
, 'field definitions form a cycle: ' + cycle
)
534 class Format(General
):
535 """Class representing an instruction format"""
537 def extract_name(self
):
538 global decode_function
539 return decode_function
+ '_extract_' + self
.name
541 def output_extract(self
):
542 output('static void ', self
.extract_name(), '(DisasContext *ctx, ',
543 self
.base
.struct_name(), ' *a, ', insntype
, ' insn)\n{\n')
544 self
.output_fields(str_indent(4), lambda n
: 'a->' + n
)
549 class Pattern(General
):
550 """Class representing an instruction pattern"""
552 def output_decl(self
):
553 global translate_scope
554 global translate_prefix
555 output('typedef ', self
.base
.base
.struct_name(),
556 ' arg_', self
.name
, ';\n')
557 output(translate_scope
, 'bool ', translate_prefix
, '_', self
.name
,
558 '(DisasContext *ctx, arg_', self
.name
, ' *a);\n')
560 def output_code(self
, i
, extracted
, outerbits
, outermask
):
561 global translate_prefix
563 arg
= self
.base
.base
.name
564 output(ind
, '/* ', self
.file, ':', str(self
.lineno
), ' */\n')
565 # We might have named references in the format that refer to fields
566 # in the pattern, or named references in the pattern that refer
567 # to fields in the format. This affects whether we extract the fields
568 # for the format before or after the ones for the pattern.
569 # For simplicity we don't allow cross references in both directions.
570 # This is also where we catch the syntax error of referring to
571 # a nonexistent field.
572 fmt_refs
= self
.base
.dangling_references()
574 if r
not in self
.fields
:
575 error(self
.lineno
, f
'format refers to undefined field {r}')
576 pat_refs
= self
.dangling_references()
578 if r
not in self
.base
.fields
:
579 error(self
.lineno
, f
'pattern refers to undefined field {r}')
580 if pat_refs
and fmt_refs
:
581 error(self
.lineno
, ('pattern that uses fields defined in format '
582 'cannot use format that uses fields defined '
585 # pattern fields first
586 self
.output_fields(ind
, lambda n
: 'u.f_' + arg
+ '.' + n
)
587 assert not extracted
, "dangling fmt refs but it was already extracted"
589 output(ind
, self
.base
.extract_name(),
590 '(ctx, &u.f_', arg
, ', insn);\n')
592 # pattern fields last
593 self
.output_fields(ind
, lambda n
: 'u.f_' + arg
+ '.' + n
)
595 output(ind
, 'if (', translate_prefix
, '_', self
.name
,
596 '(ctx, &u.f_', arg
, ')) return true;\n')
598 # Normal patterns do not have children.
599 def build_tree(self
):
601 def prop_masks(self
):
603 def prop_format(self
):
605 def prop_width(self
):
611 class MultiPattern(General
):
612 """Class representing a set of instruction patterns"""
614 def __init__(self
, lineno
):
615 self
.file = input_file
626 if self
.fixedbits
is not None:
627 r
+= ' ' + str_match_bits(self
.fixedbits
, self
.fixedmask
)
630 def output_decl(self
):
634 def prop_masks(self
):
640 # Collect fixedmask/undefmask for all of the children.
643 fixedmask
&= p
.fixedmask
644 undefmask
&= p
.undefmask
646 # Widen fixedmask until all fixedbits match
649 while repeat
and fixedmask
!= 0:
652 thisbits
= p
.fixedbits
& fixedmask
653 if fixedbits
is None:
655 elif fixedbits
!= thisbits
:
656 fixedmask
&= ~
(fixedbits ^ thisbits
)
661 self
.fixedbits
= fixedbits
662 self
.fixedmask
= fixedmask
663 self
.undefmask
= undefmask
665 def build_tree(self
):
669 def prop_format(self
):
673 def prop_width(self
):
679 elif width
!= p
.width
:
680 error_with_file(self
.file, self
.lineno
,
681 'width mismatch in patterns within braces')
687 class IncMultiPattern(MultiPattern
):
688 """Class representing an overlapping set of instruction patterns"""
690 def output_code(self
, i
, extracted
, outerbits
, outermask
):
691 global translate_prefix
694 if outermask
!= p
.fixedmask
:
695 innermask
= p
.fixedmask
& ~outermask
696 innerbits
= p
.fixedbits
& ~outermask
697 output(ind
, f
'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
698 output(ind
, f
' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
699 p
.output_code(i
+ 4, extracted
, p
.fixedbits
, p
.fixedmask
)
702 p
.output_code(i
, extracted
, p
.fixedbits
, p
.fixedmask
)
704 def build_tree(self
):
706 error_with_file(self
.file, self
.lineno
, 'empty pattern group')
713 """Class representing a node in a decode tree"""
715 def __init__(self
, fm
, tm
):
723 r
= ind
+ whex(self
.fixedmask
)
725 r
+= ' ' + self
.format
.name
727 for (b
, s
) in self
.subs
:
728 r
+= ind
+ f
' {whex(b)}:\n'
729 r
+= s
.str1(i
+ 4) + '\n'
736 def output_code(self
, i
, extracted
, outerbits
, outermask
):
739 # If we identified all nodes below have the same format,
740 # extract the fields now. But don't do it if the format relies
741 # on named fields from the insn pattern, as those won't have
742 # been initialised at this point.
743 if not extracted
and self
.base
and not self
.base
.dangling_references():
744 output(ind
, self
.base
.extract_name(),
745 '(ctx, &u.f_', self
.base
.base
.name
, ', insn);\n')
748 # Attempt to aid the compiler in producing compact switch statements.
749 # If the bits in the mask are contiguous, extract them.
750 sh
= is_contiguous(self
.thismask
)
752 # Propagate SH down into the local functions.
753 def str_switch(b
, sh
=sh
):
754 return f
'(insn >> {sh}) & {b >> sh:#x}'
756 def str_case(b
, sh
=sh
):
760 return f
'insn & {whexC(b)}'
765 output(ind
, 'switch (', str_switch(self
.thismask
), ') {\n')
766 for b
, s
in sorted(self
.subs
):
767 assert (self
.thismask
& ~s
.fixedmask
) == 0
768 innermask
= outermask | self
.thismask
769 innerbits
= outerbits | b
770 output(ind
, 'case ', str_case(b
), ':\n')
772 str_match_bits(innerbits
, innermask
), ' */\n')
773 s
.output_code(i
+ 4, extracted
, innerbits
, innermask
)
774 output(ind
, ' break;\n')
779 class ExcMultiPattern(MultiPattern
):
780 """Class representing a non-overlapping set of instruction patterns"""
782 def output_code(self
, i
, extracted
, outerbits
, outermask
):
783 # Defer everything to our decomposed Tree node
784 self
.tree
.output_code(i
, extracted
, outerbits
, outermask
)
787 def __build_tree(pats
, outerbits
, outermask
):
788 # Find the intersection of all remaining fixedmask.
789 innermask
= ~outermask
& insnmask
791 innermask
&= i
.fixedmask
794 # Edge condition: One pattern covers the entire insnmask
796 t
= Tree(outermask
, innermask
)
797 t
.subs
.append((0, pats
[0]))
800 text
= 'overlapping patterns:'
802 text
+= '\n' + p
.file + ':' + str(p
.lineno
) + ': ' + str(p
)
803 error_with_file(pats
[0].file, pats
[0].lineno
, text
)
805 fullmask
= outermask | innermask
807 # Sort each element of pats into the bin selected by the mask.
810 fb
= i
.fixedbits
& innermask
816 # We must recurse if any bin has more than one element or if
817 # the single element in the bin has not been fully matched.
818 t
= Tree(fullmask
, innermask
)
820 for b
, l
in bins
.items():
822 if len(l
) > 1 or s
.fixedmask
& ~fullmask
!= 0:
823 s
= ExcMultiPattern
.__build
_tree
(l
, b | outerbits
, fullmask
)
824 t
.subs
.append((b
, s
))
828 def build_tree(self
):
830 self
.tree
= self
.__build
_tree
(self
.pats
, self
.fixedbits
,
834 def __prop_format(tree
):
835 """Propagate Format objects into the decode tree"""
837 # Depth first search.
838 for (b
, s
) in tree
.subs
:
839 if isinstance(s
, Tree
):
840 ExcMultiPattern
.__prop
_format
(s
)
842 # If all entries in SUBS have the same format, then
843 # propagate that into the tree.
845 for (b
, s
) in tree
.subs
:
854 def prop_format(self
):
855 super().prop_format()
856 self
.__prop
_format
(self
.tree
)
858 # end ExcMultiPattern
861 def parse_field(lineno
, name
, toks
):
862 """Parse one instruction field from TOKS at LINENO"""
867 # A "simple" field will have only one entry;
868 # a "multifield" will have several.
873 if re
.match('^!function=', t
):
875 error(lineno
, 'duplicate function')
880 if re
.fullmatch(re_C_ident
+ ':s[0-9]+', t
):
882 subtoks
= t
.split(':')
885 f
= NamedField(n
, True, le
)
889 if re
.fullmatch(re_C_ident
+ ':[0-9]+', t
):
890 # Unsigned named field
891 subtoks
= t
.split(':')
894 f
= NamedField(n
, False, le
)
899 if re
.fullmatch('[0-9]+:s[0-9]+', t
):
900 # Signed field extract
901 subtoks
= t
.split(':s')
903 elif re
.fullmatch('[0-9]+:[0-9]+', t
):
904 # Unsigned field extract
905 subtoks
= t
.split(':')
908 error(lineno
, f
'invalid field token "{t}"')
911 if po
+ le
> insnwidth
:
912 error(lineno
, f
'field {t} too large')
913 f
= Field(sign
, po
, le
)
917 if width
> insnwidth
:
918 error(lineno
, 'field too large')
921 f
= ParameterField(func
)
923 error(lineno
, 'field with no value')
931 error(lineno
, 'field components overlap')
933 f
= MultiField(subs
, mask
)
935 f
= FunctionField(func
, f
)
938 error(lineno
, 'duplicate field', name
)
943 def parse_arguments(lineno
, name
, toks
):
944 """Parse one argument set from TOKS at LINENO"""
953 if re
.fullmatch('!extern', n
):
957 if re
.fullmatch(re_C_ident
+ ':' + re_C_ident
, n
):
958 (n
, t
) = n
.split(':')
959 elif re
.fullmatch(re_C_ident
, n
):
962 error(lineno
, f
'invalid argument set token "{n}"')
964 error(lineno
, f
'duplicate argument "{n}"')
968 if name
in arguments
:
969 error(lineno
, 'duplicate argument set', name
)
970 arguments
[name
] = Arguments(name
, flds
, types
, extern
)
971 # end parse_arguments
974 def lookup_field(lineno
, name
):
978 error(lineno
, 'undefined field', name
)
981 def add_field(lineno
, flds
, new_name
, f
):
983 error(lineno
, 'duplicate field', new_name
)
988 def add_field_byname(lineno
, flds
, new_name
, old_name
):
989 return add_field(lineno
, flds
, new_name
, lookup_field(lineno
, old_name
))
992 def infer_argument_set(flds
):
994 global decode_function
996 for arg
in arguments
.values():
997 if eq_fields_for_args(flds
, arg
):
1000 name
= decode_function
+ str(len(arguments
))
1001 arg
= Arguments(name
, flds
.keys(), ['int'] * len(flds
), False)
1002 arguments
[name
] = arg
1006 def infer_format(arg
, fieldmask
, flds
, width
):
1009 global decode_function
1013 for n
, c
in flds
.items():
1019 # Look for an existing format with the same argument set and fields
1020 for fmt
in formats
.values():
1021 if arg
and fmt
.base
!= arg
:
1023 if fieldmask
!= fmt
.fieldmask
:
1025 if width
!= fmt
.width
:
1027 if not eq_fields_for_fmts(flds
, fmt
.fields
):
1029 return (fmt
, const_flds
)
1031 name
= decode_function
+ '_Fmt_' + str(len(formats
))
1033 arg
= infer_argument_set(flds
)
1035 fmt
= Format(name
, 0, arg
, 0, 0, 0, fieldmask
, var_flds
, width
)
1038 return (fmt
, const_flds
)
1042 def parse_generic(lineno
, parent_pat
, name
, toks
):
1043 """Parse one instruction format from TOKS at LINENO"""
1054 global variablewidth
1056 is_format
= parent_pat
is None
1066 # '&Foo' gives a format an explicit argument set.
1067 if re
.fullmatch(re_arg_ident
, t
):
1070 error(lineno
, 'multiple argument sets')
1074 error(lineno
, 'undefined argument set', t
)
1077 # '@Foo' gives a pattern an explicit format.
1078 if re
.fullmatch(re_fmt_ident
, t
):
1081 error(lineno
, 'multiple formats')
1085 error(lineno
, 'undefined format', t
)
1088 # '%Foo' imports a field.
1089 if re
.fullmatch(re_fld_ident
, t
):
1091 flds
= add_field_byname(lineno
, flds
, tt
, tt
)
1094 # 'Foo=%Bar' imports a field with a different name.
1095 if re
.fullmatch(re_C_ident
+ '=' + re_fld_ident
, t
):
1096 (fname
, iname
) = t
.split('=%')
1097 flds
= add_field_byname(lineno
, flds
, fname
, iname
)
1100 # 'Foo=number' sets an argument field to a constant value
1101 if re
.fullmatch(re_C_ident
+ '=[+-]?[0-9]+', t
):
1102 (fname
, value
) = t
.split('=')
1104 flds
= add_field(lineno
, flds
, fname
, ConstField(value
))
1107 # Pattern of 0s, 1s, dots and dashes indicate required zeros,
1108 # required ones, or dont-cares.
1109 if re
.fullmatch('[01.-]+', t
):
1111 fms
= t
.replace('0', '1')
1112 fms
= fms
.replace('.', '0')
1113 fms
= fms
.replace('-', '0')
1114 fbs
= t
.replace('.', '0')
1115 fbs
= fbs
.replace('-', '0')
1116 ubm
= t
.replace('1', '0')
1117 ubm
= ubm
.replace('.', '0')
1118 ubm
= ubm
.replace('-', '1')
1122 fixedbits
= (fixedbits
<< shift
) | fbs
1123 fixedmask
= (fixedmask
<< shift
) | fms
1124 undefmask
= (undefmask
<< shift
) | ubm
1125 # Otherwise, fieldname:fieldwidth
1126 elif re
.fullmatch(re_C_ident
+ ':s?[0-9]+', t
):
1127 (fname
, flen
) = t
.split(':')
1132 shift
= int(flen
, 10)
1133 if shift
+ width
> insnwidth
:
1134 error(lineno
, f
'field {fname} exceeds insnwidth')
1135 f
= Field(sign
, insnwidth
- width
- shift
, shift
)
1136 flds
= add_field(lineno
, flds
, fname
, f
)
1141 error(lineno
, f
'invalid token "{t}"')
1144 if variablewidth
and width
< insnwidth
and width
% 8 == 0:
1145 shift
= insnwidth
- width
1149 undefmask |
= (1 << shift
) - 1
1151 # We should have filled in all of the bits of the instruction.
1152 elif not (is_format
and width
== 0) and width
!= insnwidth
:
1153 error(lineno
, f
'definition has {width} bits')
1155 # Do not check for fields overlapping fields; one valid usage
1156 # is to be able to duplicate fields via import.
1158 for f
in flds
.values():
1161 # Fix up what we've parsed to match either a format or a pattern.
1163 # Formats cannot reference formats.
1165 error(lineno
, 'format referencing format')
1166 # If an argument set is given, then there should be no fields
1167 # without a place to store it.
1169 for f
in flds
.keys():
1170 if f
not in arg
.fields
:
1171 error(lineno
, f
'field {f} not in argument set {arg.name}')
1173 arg
= infer_argument_set(flds
)
1175 error(lineno
, 'duplicate format name', name
)
1176 fmt
= Format(name
, lineno
, arg
, fixedbits
, fixedmask
,
1177 undefmask
, fieldmask
, flds
, width
)
1180 # Patterns can reference a format ...
1182 # ... but not an argument simultaneously
1184 error(lineno
, 'pattern specifies both format and argument set')
1185 if fixedmask
& fmt
.fixedmask
:
1186 error(lineno
, 'pattern fixed bits overlap format fixed bits')
1187 if width
!= fmt
.width
:
1188 error(lineno
, 'pattern uses format of different width')
1189 fieldmask |
= fmt
.fieldmask
1190 fixedbits |
= fmt
.fixedbits
1191 fixedmask |
= fmt
.fixedmask
1192 undefmask |
= fmt
.undefmask
1194 (fmt
, flds
) = infer_format(arg
, fieldmask
, flds
, width
)
1196 for f
in flds
.keys():
1197 if f
not in arg
.fields
:
1198 error(lineno
, f
'field {f} not in argument set {arg.name}')
1199 if f
in fmt
.fields
.keys():
1200 error(lineno
, f
'field {f} set by format and pattern')
1201 for f
in arg
.fields
:
1202 if f
not in flds
.keys() and f
not in fmt
.fields
.keys():
1203 error(lineno
, f
'field {f} not initialized')
1204 pat
= Pattern(name
, lineno
, fmt
, fixedbits
, fixedmask
,
1205 undefmask
, fieldmask
, flds
, width
)
1206 parent_pat
.pats
.append(pat
)
1207 allpatterns
.append(pat
)
1209 # Validate the masks that we have assembled.
1210 if fieldmask
& fixedmask
:
1211 error(lineno
, 'fieldmask overlaps fixedmask ',
1212 f
'({whex(fieldmask)} & {whex(fixedmask)})')
1213 if fieldmask
& undefmask
:
1214 error(lineno
, 'fieldmask overlaps undefmask ',
1215 f
'({whex(fieldmask)} & {whex(undefmask)})')
1216 if fixedmask
& undefmask
:
1217 error(lineno
, 'fixedmask overlaps undefmask ',
1218 f
'({whex(fixedmask)} & {whex(undefmask)})')
1220 allbits
= fieldmask | fixedmask | undefmask
1221 if allbits
!= insnmask
:
1222 error(lineno
, 'bits left unspecified ',
1223 f
'({whex(allbits ^ insnmask)})')
1227 def parse_file(f
, parent_pat
):
1228 """Parse all of the patterns within a file"""
1234 # Read all of the lines of the file. Concatenate lines
1235 # ending in backslash; discard empty lines and comments.
1244 # Expand and strip spaces, to find indent.
1245 line
= line
.rstrip()
1246 line
= line
.expandtabs()
1248 line
= line
.lstrip()
1252 end
= line
.find('#')
1258 # Next line after continuation
1261 # Allow completely blank lines.
1264 indent
= len1
- len2
1265 # Empty line due to comment.
1267 # Indentation must be correct, even for comment lines.
1268 if indent
!= nesting
:
1269 error(lineno
, 'indentation ', indent
, ' != ', nesting
)
1271 start_lineno
= lineno
1275 if toks
[-1] == '\\':
1283 if name
== '}' or name
== ']':
1285 error(start_lineno
, 'extra tokens after close brace')
1287 # Make sure { } and [ ] nest properly.
1288 if (name
== '}') != isinstance(parent_pat
, IncMultiPattern
):
1289 error(lineno
, 'mismatched close brace')
1292 parent_pat
= nesting_pats
.pop()
1294 error(lineno
, 'extra close brace')
1297 if indent
!= nesting
:
1298 error(lineno
, 'indentation ', indent
, ' != ', nesting
)
1303 # Everything else should have current indentation.
1304 if indent
!= nesting
:
1305 error(start_lineno
, 'indentation ', indent
, ' != ', nesting
)
1308 if name
== '{' or name
== '[':
1310 error(start_lineno
, 'extra tokens after open brace')
1313 nested_pat
= IncMultiPattern(start_lineno
)
1315 nested_pat
= ExcMultiPattern(start_lineno
)
1316 parent_pat
.pats
.append(nested_pat
)
1317 nesting_pats
.append(parent_pat
)
1318 parent_pat
= nested_pat
1324 # Determine the type of object needing to be parsed.
1325 if re
.fullmatch(re_fld_ident
, name
):
1326 parse_field(start_lineno
, name
[1:], toks
)
1327 elif re
.fullmatch(re_arg_ident
, name
):
1328 parse_arguments(start_lineno
, name
[1:], toks
)
1329 elif re
.fullmatch(re_fmt_ident
, name
):
1330 parse_generic(start_lineno
, None, name
[1:], toks
)
1331 elif re
.fullmatch(re_pat_ident
, name
):
1332 parse_generic(start_lineno
, parent_pat
, name
, toks
)
1334 error(lineno
, f
'invalid token "{name}"')
1338 error(lineno
, 'missing close brace')
1343 """Class representing a node in a size decode tree"""
1345 def __init__(self
, m
, w
):
1353 r
= ind
+ whex(self
.mask
) + ' [\n'
1354 for (b
, s
) in self
.subs
:
1355 r
+= ind
+ f
' {whex(b)}:\n'
1356 r
+= s
.str1(i
+ 4) + '\n'
1363 def output_code(self
, i
, extracted
, outerbits
, outermask
):
1366 # If we need to load more bytes to test, do so now.
1367 if extracted
< self
.width
:
1368 output(ind
, f
'insn = {decode_function}_load_bytes',
1369 f
'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
1370 extracted
= self
.width
1372 # Attempt to aid the compiler in producing compact switch statements.
1373 # If the bits in the mask are contiguous, extract them.
1374 sh
= is_contiguous(self
.mask
)
1376 # Propagate SH down into the local functions.
1377 def str_switch(b
, sh
=sh
):
1378 return f
'(insn >> {sh}) & {b >> sh:#x}'
1380 def str_case(b
, sh
=sh
):
1384 return f
'insn & {whexC(b)}'
1389 output(ind
, 'switch (', str_switch(self
.mask
), ') {\n')
1390 for b
, s
in sorted(self
.subs
):
1391 innermask
= outermask | self
.mask
1392 innerbits
= outerbits | b
1393 output(ind
, 'case ', str_case(b
), ':\n')
1395 str_match_bits(innerbits
, innermask
), ' */\n')
1396 s
.output_code(i
+ 4, extracted
, innerbits
, innermask
)
1398 output(ind
, 'return insn;\n')
1402 """Class representing a leaf node in a size decode tree"""
1404 def __init__(self
, m
, w
):
1409 return str_indent(i
) + whex(self
.mask
)
1414 def output_code(self
, i
, extracted
, outerbits
, outermask
):
1415 global decode_function
1418 # If we need to load more bytes, do so now.
1419 if extracted
< self
.width
:
1420 output(ind
, f
'insn = {decode_function}_load_bytes',
1421 f
'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
1422 extracted
= self
.width
1423 output(ind
, 'return insn;\n')
1427 def build_size_tree(pats
, width
, outerbits
, outermask
):
1430 # Collect the mask of bits that are fixed in this width
1431 innermask
= 0xff << (insnwidth
- width
)
1432 innermask
&= ~outermask
1436 innermask
&= i
.fixedmask
1437 if minwidth
is None:
1439 elif minwidth
!= i
.width
:
1441 if minwidth
< i
.width
:
1445 return SizeLeaf(innermask
, minwidth
)
1448 if width
< minwidth
:
1449 return build_size_tree(pats
, width
+ 8, outerbits
, outermask
)
1453 pnames
.append(p
.name
+ ':' + p
.file + ':' + str(p
.lineno
))
1454 error_with_file(pats
[0].file, pats
[0].lineno
,
1455 f
'overlapping patterns size {width}:', pnames
)
1459 fb
= i
.fixedbits
& innermask
1465 fullmask
= outermask | innermask
1466 lens
= sorted(bins
.keys())
1469 return build_size_tree(bins
[b
], width
+ 8, b | outerbits
, fullmask
)
1471 r
= SizeTree(innermask
, width
)
1472 for b
, l
in bins
.items():
1473 s
= build_size_tree(l
, width
, b | outerbits
, fullmask
)
1474 r
.subs
.append((b
, s
))
1476 # end build_size_tree
1479 def prop_size(tree
):
1480 """Propagate minimum widths up the decode size tree"""
1482 if isinstance(tree
, SizeTree
):
1484 for (b
, s
) in tree
.subs
:
1485 width
= prop_size(s
)
1486 if min is None or min > width
:
1488 assert min >= tree
.width
1500 global translate_scope
1501 global translate_prefix
1509 global decode_function
1511 global variablewidth
1515 decode_scope
= 'static '
1517 long_opts
= ['decode=', 'translate=', 'output=', 'insnwidth=',
1518 'static-decode=', 'varinsnwidth=', 'test-for-error',
1521 (opts
, args
) = getopt
.gnu_getopt(sys
.argv
[1:], 'o:vw:', long_opts
)
1522 except getopt
.GetoptError
as err
:
1525 if o
in ('-o', '--output'):
1527 elif o
== '--decode':
1530 elif o
== '--static-decode':
1532 elif o
== '--translate':
1533 translate_prefix
= a
1534 translate_scope
= ''
1535 elif o
in ('-w', '--insnwidth', '--varinsnwidth'):
1536 if o
== '--varinsnwidth':
1537 variablewidth
= True
1540 insntype
= 'uint16_t'
1542 elif insnwidth
== 64:
1543 insntype
= 'uint64_t'
1544 insnmask
= 0xffffffffffffffff
1546 elif insnwidth
!= 32:
1547 error(0, 'cannot handle insns of width', insnwidth
)
1548 elif o
== '--test-for-error':
1550 elif o
== '--output-null':
1553 assert False, 'unhandled option'
1556 error(0, 'missing input file')
1558 toppat
= ExcMultiPattern(0)
1560 for filename
in args
:
1561 input_file
= filename
1562 f
= open(filename
, 'rt', encoding
='utf-8')
1563 parse_file(f
, toppat
)
1566 # We do not want to compute masks for toppat, because those masks
1567 # are used as a starting point for build_tree. For toppat, we must
1568 # insist that decode begins from naught.
1569 for i
in toppat
.pats
:
1573 toppat
.prop_format()
1576 for i
in toppat
.pats
:
1578 stree
= build_size_tree(toppat
.pats
, 8, 0, 0)
1582 output_fd
= open(os
.devnull
, 'wt', encoding
='utf-8', errors
="ignore")
1584 output_fd
= open(output_file
, 'wt', encoding
='utf-8')
1586 output_fd
= io
.TextIOWrapper(sys
.stdout
.buffer,
1587 encoding
=sys
.stdout
.encoding
,
1591 for n
in sorted(arguments
.keys()):
1595 # A single translate function can be invoked for different patterns.
1596 # Make sure that the argument sets are the same, and declare the
1597 # function only once.
1599 # If we're sharing formats, we're likely also sharing trans_* functions,
1600 # but we can't tell which ones. Prevent issues from the compiler by
1601 # suppressing redundant declaration warnings.
1603 output("#pragma GCC diagnostic push\n",
1604 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1605 "#ifdef __clang__\n"
1606 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
1610 for i
in allpatterns
:
1611 if i
.name
in out_pats
:
1612 p
= out_pats
[i
.name
]
1613 if i
.base
.base
!= p
.base
.base
:
1614 error(0, i
.name
, ' has conflicting argument sets')
1617 out_pats
[i
.name
] = i
1621 output("#pragma GCC diagnostic pop\n\n")
1623 for n
in sorted(formats
.keys()):
1627 output(decode_scope
, 'bool ', decode_function
,
1628 '(DisasContext *ctx, ', insntype
, ' insn)\n{\n')
1632 if len(allpatterns
) != 0:
1633 output(i4
, 'union {\n')
1634 for n
in sorted(arguments
.keys()):
1636 output(i4
, i4
, f
.struct_name(), ' f_', f
.name
, ';\n')
1637 output(i4
, '} u;\n\n')
1638 toppat
.output_code(4, False, 0, 0)
1640 output(i4
, 'return false;\n')
1644 output('\n', decode_scope
, insntype
, ' ', decode_function
,
1645 '_load(DisasContext *ctx)\n{\n',
1646 ' ', insntype
, ' insn = 0;\n\n')
1647 stree
.output_code(4, 0, 0, 0)
1652 exit(1 if testforerror
else 0)
1656 if __name__
== '__main__':