2 # Copyright (c) 2018 Linaro Limited
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation; either
7 # version 2 of the License, or (at your option) any later version.
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # Lesser General Public License for more details.
14 # You should have received a copy of the GNU Lesser General Public
15 # License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 # Generate a decoding tree from a specification file.
20 # See the syntax and semantics in docs/devel/decodetree.rst.
36 translate_prefix
= 'trans'
37 translate_scope
= 'static '
42 decode_function
= 'decode'
44 re_ident
= '[a-zA-Z][a-zA-Z0-9_]*'
47 def error_with_file(file, lineno
, *args
):
48 """Print an error message from file:line and args and exit."""
53 r
= '{0}:{1}: error:'.format(file, lineno
)
55 r
= '{0}: error:'.format(file)
62 if output_file
and output_fd
:
64 os
.remove(output_file
)
67 def error(lineno
, *args
):
68 error_with_file(input_file
, lineno
, args
)
76 if sys
.version_info
>= (3, 4):
77 re_fullmatch
= re
.fullmatch
79 def re_fullmatch(pat
, str):
80 return re
.match('^' + pat
+ '$', str)
84 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
88 """Return a string with C spaces"""
92 def str_fields(fields
):
93 """Return a string uniquely identifing FIELDS"""
95 for n
in sorted(fields
.keys()):
100 def str_match_bits(bits
, mask
):
101 """Return a string pretty-printing BITS/MASK"""
104 i
= 1 << (insnwidth
- 1)
122 """Return true iff X is equal to a power of 2."""
123 return (x
& (x
- 1)) == 0
127 """Return the number of times 2 factors into X."""
129 while ((x
>> r
) & 1) == 0:
134 def is_contiguous(bits
):
136 if is_pow2((bits
>> shift
) + 1):
142 def eq_fields_for_args(flds_a
, flds_b
):
143 if len(flds_a
) != len(flds_b
):
145 for k
, a
in flds_a
.items():
151 def eq_fields_for_fmts(flds_a
, flds_b
):
152 if len(flds_a
) != len(flds_b
):
154 for k
, a
in flds_a
.items():
158 if a
.__class
__ != b
.__class
__ or a
!= b
:
164 """Class representing a simple instruction field"""
165 def __init__(self
, sign
, pos
, len):
169 self
.mask
= ((1 << len) - 1) << pos
176 return str(self
.pos
) + ':' + s
+ str(self
.len)
178 def str_extract(self
):
183 return '{0}(insn, {1}, {2})'.format(extr
, self
.pos
, self
.len)
185 def __eq__(self
, other
):
186 return self
.sign
== other
.sign
and self
.sign
== other
.sign
188 def __ne__(self
, other
):
189 return not self
.__eq
__(other
)
194 """Class representing a compound instruction field"""
195 def __init__(self
, subs
, mask
):
197 self
.sign
= subs
[0].sign
201 return str(self
.subs
)
203 def str_extract(self
):
206 for f
in reversed(self
.subs
):
208 ret
= f
.str_extract()
210 ret
= 'deposit32({0}, {1}, {2}, {3})' \
211 .format(ret
, pos
, 32 - pos
, f
.str_extract())
215 def __ne__(self
, other
):
216 if len(self
.subs
) != len(other
.subs
):
218 for a
, b
in zip(self
.subs
, other
.subs
):
219 if a
.__class
__ != b
.__class
__ or a
!= b
:
223 def __eq__(self
, other
):
224 return not self
.__ne
__(other
)
229 """Class representing an argument field with constant value"""
230 def __init__(self
, value
):
233 self
.sign
= value
< 0
236 return str(self
.value
)
238 def str_extract(self
):
239 return str(self
.value
)
241 def __cmp__(self
, other
):
242 return self
.value
- other
.value
247 """Class representing a field passed through an expander"""
248 def __init__(self
, func
, base
):
249 self
.mask
= base
.mask
250 self
.sign
= base
.sign
255 return self
.func
+ '(' + str(self
.base
) + ')'
257 def str_extract(self
):
258 return self
.func
+ '(' + self
.base
.str_extract() + ')'
260 def __eq__(self
, other
):
261 return self
.func
== other
.func
and self
.base
== other
.base
263 def __ne__(self
, other
):
264 return not self
.__eq
__(other
)
269 """Class representing the extracted fields of a format"""
270 def __init__(self
, nm
, flds
, extern
):
273 self
.fields
= sorted(flds
)
276 return self
.name
+ ' ' + str(self
.fields
)
278 def struct_name(self
):
279 return 'arg_' + self
.name
281 def output_def(self
):
283 output('typedef struct {\n')
284 for n
in self
.fields
:
285 output(' int ', n
, ';\n')
286 output('} ', self
.struct_name(), ';\n\n')
291 """Common code between instruction formats and instruction patterns"""
292 def __init__(self
, name
, lineno
, base
, fixb
, fixm
, udfm
, fldm
, flds
):
294 self
.file = input_file
297 self
.fixedbits
= fixb
298 self
.fixedmask
= fixm
299 self
.undefmask
= udfm
300 self
.fieldmask
= fldm
304 return self
.name
+ ' ' + str_match_bits(self
.fixedbits
, self
.fixedmask
)
307 return str_indent(i
) + self
.__str
__()
311 class Format(General
):
312 """Class representing an instruction format"""
314 def extract_name(self
):
315 global decode_function
316 return decode_function
+ '_extract_' + self
.name
318 def output_extract(self
):
319 output('static void ', self
.extract_name(), '(',
320 self
.base
.struct_name(), ' *a, ', insntype
, ' insn)\n{\n')
321 for n
, f
in self
.fields
.items():
322 output(' a->', n
, ' = ', f
.str_extract(), ';\n')
327 class Pattern(General
):
328 """Class representing an instruction pattern"""
330 def output_decl(self
):
331 global translate_scope
332 global translate_prefix
333 output('typedef ', self
.base
.base
.struct_name(),
334 ' arg_', self
.name
, ';\n')
335 output(translate_scope
, 'bool ', translate_prefix
, '_', self
.name
,
336 '(DisasContext *ctx, arg_', self
.name
, ' *a);\n')
338 def output_code(self
, i
, extracted
, outerbits
, outermask
):
339 global translate_prefix
341 arg
= self
.base
.base
.name
342 output(ind
, '/* ', self
.file, ':', str(self
.lineno
), ' */\n')
344 output(ind
, self
.base
.extract_name(), '(&u.f_', arg
, ', insn);\n')
345 for n
, f
in self
.fields
.items():
346 output(ind
, 'u.f_', arg
, '.', n
, ' = ', f
.str_extract(), ';\n')
347 output(ind
, 'if (', translate_prefix
, '_', self
.name
,
348 '(ctx, &u.f_', arg
, ')) return true;\n')
352 class MultiPattern(General
):
353 """Class representing an overlapping set of instruction patterns"""
355 def __init__(self
, lineno
, pats
, fixb
, fixm
, udfm
):
356 self
.file = input_file
360 self
.fixedbits
= fixb
361 self
.fixedmask
= fixm
362 self
.undefmask
= udfm
370 def output_decl(self
):
374 def output_code(self
, i
, extracted
, outerbits
, outermask
):
375 global translate_prefix
378 if outermask
!= p
.fixedmask
:
379 innermask
= p
.fixedmask
& ~outermask
380 innerbits
= p
.fixedbits
& ~outermask
381 output(ind
, 'if ((insn & ',
382 '0x{0:08x}) == 0x{1:08x}'.format(innermask
, innerbits
),
385 str_match_bits(p
.fixedbits
, p
.fixedmask
), ' */\n')
386 p
.output_code(i
+ 4, extracted
, p
.fixedbits
, p
.fixedmask
)
389 p
.output_code(i
, extracted
, p
.fixedbits
, p
.fixedmask
)
393 def parse_field(lineno
, name
, toks
):
394 """Parse one instruction field from TOKS at LINENO"""
399 # A "simple" field will have only one entry;
400 # a "multifield" will have several.
405 if re_fullmatch('!function=' + re_ident
, t
):
407 error(lineno
, 'duplicate function')
412 if re_fullmatch('[0-9]+:s[0-9]+', t
):
413 # Signed field extract
414 subtoks
= t
.split(':s')
416 elif re_fullmatch('[0-9]+:[0-9]+', t
):
417 # Unsigned field extract
418 subtoks
= t
.split(':')
421 error(lineno
, 'invalid field token "{0}"'.format(t
))
424 if po
+ le
> insnwidth
:
425 error(lineno
, 'field {0} too large'.format(t
))
426 f
= Field(sign
, po
, le
)
430 if width
> insnwidth
:
431 error(lineno
, 'field too large')
438 error(lineno
, 'field components overlap')
440 f
= MultiField(subs
, mask
)
442 f
= FunctionField(func
, f
)
445 error(lineno
, 'duplicate field', name
)
450 def parse_arguments(lineno
, name
, toks
):
451 """Parse one argument set from TOKS at LINENO"""
458 if re_fullmatch('!extern', t
):
461 if not re_fullmatch(re_ident
, t
):
462 error(lineno
, 'invalid argument set token "{0}"'.format(t
))
464 error(lineno
, 'duplicate argument "{0}"'.format(t
))
467 if name
in arguments
:
468 error(lineno
, 'duplicate argument set', name
)
469 arguments
[name
] = Arguments(name
, flds
, extern
)
470 # end parse_arguments
473 def lookup_field(lineno
, name
):
477 error(lineno
, 'undefined field', name
)
480 def add_field(lineno
, flds
, new_name
, f
):
482 error(lineno
, 'duplicate field', new_name
)
487 def add_field_byname(lineno
, flds
, new_name
, old_name
):
488 return add_field(lineno
, flds
, new_name
, lookup_field(lineno
, old_name
))
491 def infer_argument_set(flds
):
493 global decode_function
495 for arg
in arguments
.values():
496 if eq_fields_for_args(flds
, arg
.fields
):
499 name
= decode_function
+ str(len(arguments
))
500 arg
= Arguments(name
, flds
.keys(), False)
501 arguments
[name
] = arg
505 def infer_format(arg
, fieldmask
, flds
):
508 global decode_function
512 for n
, c
in flds
.items():
518 # Look for an existing format with the same argument set and fields
519 for fmt
in formats
.values():
520 if arg
and fmt
.base
!= arg
:
522 if fieldmask
!= fmt
.fieldmask
:
524 if not eq_fields_for_fmts(flds
, fmt
.fields
):
526 return (fmt
, const_flds
)
528 name
= decode_function
+ '_Fmt_' + str(len(formats
))
530 arg
= infer_argument_set(flds
)
532 fmt
= Format(name
, 0, arg
, 0, 0, 0, fieldmask
, var_flds
)
535 return (fmt
, const_flds
)
539 def parse_generic(lineno
, is_format
, name
, toks
):
540 """Parse one instruction format from TOKS at LINENO"""
558 # '&Foo' gives a format an explcit argument set.
562 error(lineno
, 'multiple argument sets')
566 error(lineno
, 'undefined argument set', t
)
569 # '@Foo' gives a pattern an explicit format.
573 error(lineno
, 'multiple formats')
577 error(lineno
, 'undefined format', t
)
580 # '%Foo' imports a field.
583 flds
= add_field_byname(lineno
, flds
, tt
, tt
)
586 # 'Foo=%Bar' imports a field with a different name.
587 if re_fullmatch(re_ident
+ '=%' + re_ident
, t
):
588 (fname
, iname
) = t
.split('=%')
589 flds
= add_field_byname(lineno
, flds
, fname
, iname
)
592 # 'Foo=number' sets an argument field to a constant value
593 if re_fullmatch(re_ident
+ '=[+-]?[0-9]+', t
):
594 (fname
, value
) = t
.split('=')
596 flds
= add_field(lineno
, flds
, fname
, ConstField(value
))
599 # Pattern of 0s, 1s, dots and dashes indicate required zeros,
600 # required ones, or dont-cares.
601 if re_fullmatch('[01.-]+', t
):
603 fms
= t
.replace('0', '1')
604 fms
= fms
.replace('.', '0')
605 fms
= fms
.replace('-', '0')
606 fbs
= t
.replace('.', '0')
607 fbs
= fbs
.replace('-', '0')
608 ubm
= t
.replace('1', '0')
609 ubm
= ubm
.replace('.', '0')
610 ubm
= ubm
.replace('-', '1')
614 fixedbits
= (fixedbits
<< shift
) | fbs
615 fixedmask
= (fixedmask
<< shift
) | fms
616 undefmask
= (undefmask
<< shift
) | ubm
617 # Otherwise, fieldname:fieldwidth
618 elif re_fullmatch(re_ident
+ ':s?[0-9]+', t
):
619 (fname
, flen
) = t
.split(':')
624 shift
= int(flen
, 10)
625 if shift
+ width
> insnwidth
:
626 error(lineno
, 'field {0} exceeds insnwidth'.format(fname
))
627 f
= Field(sign
, insnwidth
- width
- shift
, shift
)
628 flds
= add_field(lineno
, flds
, fname
, f
)
633 error(lineno
, 'invalid token "{0}"'.format(t
))
636 # We should have filled in all of the bits of the instruction.
637 if not (is_format
and width
== 0) and width
!= insnwidth
:
638 error(lineno
, 'definition has {0} bits'.format(width
))
640 # Do not check for fields overlaping fields; one valid usage
641 # is to be able to duplicate fields via import.
643 for f
in flds
.values():
646 # Fix up what we've parsed to match either a format or a pattern.
648 # Formats cannot reference formats.
650 error(lineno
, 'format referencing format')
651 # If an argument set is given, then there should be no fields
652 # without a place to store it.
654 for f
in flds
.keys():
655 if f
not in arg
.fields
:
656 error(lineno
, 'field {0} not in argument set {1}'
657 .format(f
, arg
.name
))
659 arg
= infer_argument_set(flds
)
661 error(lineno
, 'duplicate format name', name
)
662 fmt
= Format(name
, lineno
, arg
, fixedbits
, fixedmask
,
663 undefmask
, fieldmask
, flds
)
666 # Patterns can reference a format ...
668 # ... but not an argument simultaneously
670 error(lineno
, 'pattern specifies both format and argument set')
671 if fixedmask
& fmt
.fixedmask
:
672 error(lineno
, 'pattern fixed bits overlap format fixed bits')
673 fieldmask |
= fmt
.fieldmask
674 fixedbits |
= fmt
.fixedbits
675 fixedmask |
= fmt
.fixedmask
676 undefmask |
= fmt
.undefmask
678 (fmt
, flds
) = infer_format(arg
, fieldmask
, flds
)
680 for f
in flds
.keys():
681 if f
not in arg
.fields
:
682 error(lineno
, 'field {0} not in argument set {1}'
683 .format(f
, arg
.name
))
684 if f
in fmt
.fields
.keys():
685 error(lineno
, 'field {0} set by format and pattern'.format(f
))
687 if f
not in flds
.keys() and f
not in fmt
.fields
.keys():
688 error(lineno
, 'field {0} not initialized'.format(f
))
689 pat
= Pattern(name
, lineno
, fmt
, fixedbits
, fixedmask
,
690 undefmask
, fieldmask
, flds
)
692 allpatterns
.append(pat
)
694 # Validate the masks that we have assembled.
695 if fieldmask
& fixedmask
:
696 error(lineno
, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
697 .format(fieldmask
, fixedmask
))
698 if fieldmask
& undefmask
:
699 error(lineno
, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
700 .format(fieldmask
, undefmask
))
701 if fixedmask
& undefmask
:
702 error(lineno
, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
703 .format(fixedmask
, undefmask
))
705 allbits
= fieldmask | fixedmask | undefmask
706 if allbits
!= insnmask
:
707 error(lineno
, 'bits left unspecified (0x{0:08x})'
708 .format(allbits ^ insnmask
))
711 def build_multi_pattern(lineno
, pats
):
712 """Validate the Patterns going into a MultiPattern."""
717 error(lineno
, 'less than two patterns within braces')
722 # Collect fixed/undefmask for all of the children.
723 # Move the defining lineno back to that of the first child.
725 fixedmask
&= p
.fixedmask
726 undefmask
&= p
.undefmask
727 if p
.lineno
< lineno
:
733 error(lineno
, 'no overlap in patterns within braces')
736 thisbits
= p
.fixedbits
& fixedmask
737 if fixedbits
is None:
739 elif fixedbits
!= thisbits
:
740 fixedmask
&= ~
(fixedbits ^ thisbits
)
745 mp
= MultiPattern(lineno
, pats
, fixedbits
, fixedmask
, undefmask
)
747 # end build_multi_pattern
750 """Parse all of the patterns within a file"""
754 # Read all of the lines of the file. Concatenate lines
755 # ending in backslash; discard empty lines and comments.
764 # Expand and strip spaces, to find indent.
766 line
= line
.expandtabs()
778 # Next line after continuation
781 # Allow completely blank lines.
785 # Empty line due to comment.
787 # Indentation must be correct, even for comment lines.
788 if indent
!= nesting
:
789 error(lineno
, 'indentation ', indent
, ' != ', nesting
)
791 start_lineno
= lineno
805 error(start_lineno
, 'mismatched close brace')
807 error(start_lineno
, 'extra tokens after close brace')
809 if indent
!= nesting
:
810 error(start_lineno
, 'indentation ', indent
, ' != ', nesting
)
812 patterns
= saved_pats
.pop()
813 build_multi_pattern(lineno
, pats
)
817 # Everything else should have current indentation.
818 if indent
!= nesting
:
819 error(start_lineno
, 'indentation ', indent
, ' != ', nesting
)
824 error(start_lineno
, 'extra tokens after open brace')
825 saved_pats
.append(patterns
)
831 # Determine the type of object needing to be parsed.
833 parse_field(start_lineno
, name
[1:], toks
)
835 parse_arguments(start_lineno
, name
[1:], toks
)
837 parse_generic(start_lineno
, True, name
[1:], toks
)
839 parse_generic(start_lineno
, False, name
, toks
)
845 """Class representing a node in a decode tree"""
847 def __init__(self
, fm
, tm
):
855 r
= '{0}{1:08x}'.format(ind
, self
.fixedmask
)
857 r
+= ' ' + self
.format
.name
859 for (b
, s
) in self
.subs
:
860 r
+= '{0} {1:08x}:\n'.format(ind
, b
)
861 r
+= s
.str1(i
+ 4) + '\n'
868 def output_code(self
, i
, extracted
, outerbits
, outermask
):
871 # If we identified all nodes below have the same format,
872 # extract the fields now.
873 if not extracted
and self
.base
:
874 output(ind
, self
.base
.extract_name(),
875 '(&u.f_', self
.base
.base
.name
, ', insn);\n')
878 # Attempt to aid the compiler in producing compact switch statements.
879 # If the bits in the mask are contiguous, extract them.
880 sh
= is_contiguous(self
.thismask
)
882 # Propagate SH down into the local functions.
883 def str_switch(b
, sh
=sh
):
884 return '(insn >> {0}) & 0x{1:x}'.format(sh
, b
>> sh
)
886 def str_case(b
, sh
=sh
):
887 return '0x{0:x}'.format(b
>> sh
)
890 return 'insn & 0x{0:08x}'.format(b
)
893 return '0x{0:08x}'.format(b
)
895 output(ind
, 'switch (', str_switch(self
.thismask
), ') {\n')
896 for b
, s
in sorted(self
.subs
):
897 assert (self
.thismask
& ~s
.fixedmask
) == 0
898 innermask
= outermask | self
.thismask
899 innerbits
= outerbits | b
900 output(ind
, 'case ', str_case(b
), ':\n')
902 str_match_bits(innerbits
, innermask
), ' */\n')
903 s
.output_code(i
+ 4, extracted
, innerbits
, innermask
)
904 output(ind
, ' return false;\n')
909 def build_tree(pats
, outerbits
, outermask
):
910 # Find the intersection of all remaining fixedmask.
911 innermask
= ~outermask
& insnmask
913 innermask
&= i
.fixedmask
916 text
= 'overlapping patterns:'
918 text
+= '\n' + p
.file + ':' + str(p
.lineno
) + ': ' + str(p
)
919 error_with_file(pats
[0].file, pats
[0].lineno
, text
)
921 fullmask
= outermask | innermask
923 # Sort each element of pats into the bin selected by the mask.
926 fb
= i
.fixedbits
& innermask
932 # We must recurse if any bin has more than one element or if
933 # the single element in the bin has not been fully matched.
934 t
= Tree(fullmask
, innermask
)
936 for b
, l
in bins
.items():
938 if len(l
) > 1 or s
.fixedmask
& ~fullmask
!= 0:
939 s
= build_tree(l
, b | outerbits
, fullmask
)
940 t
.subs
.append((b
, s
))
946 def prop_format(tree
):
947 """Propagate Format objects into the decode tree"""
949 # Depth first search.
950 for (b
, s
) in tree
.subs
:
951 if isinstance(s
, Tree
):
954 # If all entries in SUBS have the same format, then
955 # propagate that into the tree.
957 for (b
, s
) in tree
.subs
:
973 global translate_scope
974 global translate_prefix
981 global decode_function
983 decode_scope
= 'static '
985 long_opts
= ['decode=', 'translate=', 'output=', 'insnwidth=',
988 (opts
, args
) = getopt
.getopt(sys
.argv
[1:], 'o:w:', long_opts
)
989 except getopt
.GetoptError
as err
:
992 if o
in ('-o', '--output'):
994 elif o
== '--decode':
997 elif o
== '--static-decode':
999 elif o
== '--translate':
1000 translate_prefix
= a
1001 translate_scope
= ''
1002 elif o
in ('-w', '--insnwidth'):
1005 insntype
= 'uint16_t'
1007 elif insnwidth
!= 32:
1008 error(0, 'cannot handle insns of width', insnwidth
)
1010 assert False, 'unhandled option'
1013 error(0, 'missing input file')
1014 for filename
in args
:
1015 input_file
= filename
1016 f
= open(filename
, 'r')
1020 t
= build_tree(patterns
, 0, 0)
1024 output_fd
= open(output_file
, 'w')
1026 output_fd
= sys
.stdout
1029 for n
in sorted(arguments
.keys()):
1033 # A single translate function can be invoked for different patterns.
1034 # Make sure that the argument sets are the same, and declare the
1035 # function only once.
1037 for i
in allpatterns
:
1038 if i
.name
in out_pats
:
1039 p
= out_pats
[i
.name
]
1040 if i
.base
.base
!= p
.base
.base
:
1041 error(0, i
.name
, ' has conflicting argument sets')
1044 out_pats
[i
.name
] = i
1047 for n
in sorted(formats
.keys()):
1051 output(decode_scope
, 'bool ', decode_function
,
1052 '(DisasContext *ctx, ', insntype
, ' insn)\n{\n')
1056 if len(allpatterns
) != 0:
1057 output(i4
, 'union {\n')
1058 for n
in sorted(arguments
.keys()):
1060 output(i4
, i4
, f
.struct_name(), ' f_', f
.name
, ';\n')
1061 output(i4
, '} u;\n\n')
1062 t
.output_code(4, False, 0, 0)
1064 output(i4
, 'return false;\n')
1072 if __name__
== '__main__':