.github: point Repo Lockdown bot to GitLab repo
[qemu/kevin.git] / scripts / decodetree.py
blob4637b633e704be81d2a048540e516258d5805ff5
1 #!/usr/bin/env python3
2 # Copyright (c) 2018 Linaro Limited
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation; either
7 # version 2.1 of the License, or (at your option) any later version.
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # Lesser General Public License for more details.
14 # You should have received a copy of the GNU Lesser General Public
15 # License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 # Generate a decoding tree from a specification file.
20 # See the syntax and semantics in docs/devel/decodetree.rst.
23 import io
24 import os
25 import re
26 import sys
27 import getopt
29 insnwidth = 32
30 insnmask = 0xffffffff
31 variablewidth = False
32 fields = {}
33 arguments = {}
34 formats = {}
35 allpatterns = []
36 anyextern = False
38 translate_prefix = 'trans'
39 translate_scope = 'static '
40 input_file = ''
41 output_file = None
42 output_fd = None
43 insntype = 'uint32_t'
44 decode_function = 'decode'
46 # An identifier for C.
47 re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
49 # Identifiers for Arguments, Fields, Formats and Patterns.
50 re_arg_ident = '&[a-zA-Z0-9_]*'
51 re_fld_ident = '%[a-zA-Z0-9_]*'
52 re_fmt_ident = '@[a-zA-Z0-9_]*'
53 re_pat_ident = '[a-zA-Z0-9_]*'
55 def error_with_file(file, lineno, *args):
56 """Print an error message from file:line and args and exit."""
57 global output_file
58 global output_fd
60 prefix = ''
61 if file:
62 prefix += '{0}:'.format(file)
63 if lineno:
64 prefix += '{0}:'.format(lineno)
65 if prefix:
66 prefix += ' '
67 print(prefix, end='error: ', file=sys.stderr)
68 print(*args, file=sys.stderr)
70 if output_file and output_fd:
71 output_fd.close()
72 os.remove(output_file)
73 exit(1)
74 # end error_with_file
77 def error(lineno, *args):
78 error_with_file(input_file, lineno, *args)
79 # end error
82 def output(*args):
83 global output_fd
84 for a in args:
85 output_fd.write(a)
88 def output_autogen():
89 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
92 def str_indent(c):
93 """Return a string with C spaces"""
94 return ' ' * c
97 def str_fields(fields):
98 """Return a string uniquely identifying FIELDS"""
99 r = ''
100 for n in sorted(fields.keys()):
101 r += '_' + n
102 return r[1:]
105 def str_match_bits(bits, mask):
106 """Return a string pretty-printing BITS/MASK"""
107 global insnwidth
109 i = 1 << (insnwidth - 1)
110 space = 0x01010100
111 r = ''
112 while i != 0:
113 if i & mask:
114 if i & bits:
115 r += '1'
116 else:
117 r += '0'
118 else:
119 r += '.'
120 if i & space:
121 r += ' '
122 i >>= 1
123 return r
126 def is_pow2(x):
127 """Return true iff X is equal to a power of 2."""
128 return (x & (x - 1)) == 0
131 def ctz(x):
132 """Return the number of times 2 factors into X."""
133 assert x != 0
134 r = 0
135 while ((x >> r) & 1) == 0:
136 r += 1
137 return r
140 def is_contiguous(bits):
141 if bits == 0:
142 return -1
143 shift = ctz(bits)
144 if is_pow2((bits >> shift) + 1):
145 return shift
146 else:
147 return -1
150 def eq_fields_for_args(flds_a, flds_b):
151 if len(flds_a) != len(flds_b):
152 return False
153 for k, a in flds_a.items():
154 if k not in flds_b:
155 return False
156 return True
159 def eq_fields_for_fmts(flds_a, flds_b):
160 if len(flds_a) != len(flds_b):
161 return False
162 for k, a in flds_a.items():
163 if k not in flds_b:
164 return False
165 b = flds_b[k]
166 if a.__class__ != b.__class__ or a != b:
167 return False
168 return True
171 class Field:
172 """Class representing a simple instruction field"""
173 def __init__(self, sign, pos, len):
174 self.sign = sign
175 self.pos = pos
176 self.len = len
177 self.mask = ((1 << len) - 1) << pos
179 def __str__(self):
180 if self.sign:
181 s = 's'
182 else:
183 s = ''
184 return str(self.pos) + ':' + s + str(self.len)
186 def str_extract(self):
187 if self.sign:
188 extr = 'sextract32'
189 else:
190 extr = 'extract32'
191 return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
193 def __eq__(self, other):
194 return self.sign == other.sign and self.mask == other.mask
196 def __ne__(self, other):
197 return not self.__eq__(other)
198 # end Field
201 class MultiField:
202 """Class representing a compound instruction field"""
203 def __init__(self, subs, mask):
204 self.subs = subs
205 self.sign = subs[0].sign
206 self.mask = mask
208 def __str__(self):
209 return str(self.subs)
211 def str_extract(self):
212 ret = '0'
213 pos = 0
214 for f in reversed(self.subs):
215 if pos == 0:
216 ret = f.str_extract()
217 else:
218 ret = 'deposit32({0}, {1}, {2}, {3})' \
219 .format(ret, pos, 32 - pos, f.str_extract())
220 pos += f.len
221 return ret
223 def __ne__(self, other):
224 if len(self.subs) != len(other.subs):
225 return True
226 for a, b in zip(self.subs, other.subs):
227 if a.__class__ != b.__class__ or a != b:
228 return True
229 return False
231 def __eq__(self, other):
232 return not self.__ne__(other)
233 # end MultiField
236 class ConstField:
237 """Class representing an argument field with constant value"""
238 def __init__(self, value):
239 self.value = value
240 self.mask = 0
241 self.sign = value < 0
243 def __str__(self):
244 return str(self.value)
246 def str_extract(self):
247 return str(self.value)
249 def __cmp__(self, other):
250 return self.value - other.value
251 # end ConstField
254 class FunctionField:
255 """Class representing a field passed through a function"""
256 def __init__(self, func, base):
257 self.mask = base.mask
258 self.sign = base.sign
259 self.base = base
260 self.func = func
262 def __str__(self):
263 return self.func + '(' + str(self.base) + ')'
265 def str_extract(self):
266 return self.func + '(ctx, ' + self.base.str_extract() + ')'
268 def __eq__(self, other):
269 return self.func == other.func and self.base == other.base
271 def __ne__(self, other):
272 return not self.__eq__(other)
273 # end FunctionField
276 class ParameterField:
277 """Class representing a pseudo-field read from a function"""
278 def __init__(self, func):
279 self.mask = 0
280 self.sign = 0
281 self.func = func
283 def __str__(self):
284 return self.func
286 def str_extract(self):
287 return self.func + '(ctx)'
289 def __eq__(self, other):
290 return self.func == other.func
292 def __ne__(self, other):
293 return not self.__eq__(other)
294 # end ParameterField
297 class Arguments:
298 """Class representing the extracted fields of a format"""
299 def __init__(self, nm, flds, extern):
300 self.name = nm
301 self.extern = extern
302 self.fields = sorted(flds)
304 def __str__(self):
305 return self.name + ' ' + str(self.fields)
307 def struct_name(self):
308 return 'arg_' + self.name
310 def output_def(self):
311 if not self.extern:
312 output('typedef struct {\n')
313 for n in self.fields:
314 output(' int ', n, ';\n')
315 output('} ', self.struct_name(), ';\n\n')
316 # end Arguments
319 class General:
320 """Common code between instruction formats and instruction patterns"""
321 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
322 self.name = name
323 self.file = input_file
324 self.lineno = lineno
325 self.base = base
326 self.fixedbits = fixb
327 self.fixedmask = fixm
328 self.undefmask = udfm
329 self.fieldmask = fldm
330 self.fields = flds
331 self.width = w
333 def __str__(self):
334 return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
336 def str1(self, i):
337 return str_indent(i) + self.__str__()
338 # end General
341 class Format(General):
342 """Class representing an instruction format"""
344 def extract_name(self):
345 global decode_function
346 return decode_function + '_extract_' + self.name
348 def output_extract(self):
349 output('static void ', self.extract_name(), '(DisasContext *ctx, ',
350 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
351 for n, f in self.fields.items():
352 output(' a->', n, ' = ', f.str_extract(), ';\n')
353 output('}\n\n')
354 # end Format
357 class Pattern(General):
358 """Class representing an instruction pattern"""
360 def output_decl(self):
361 global translate_scope
362 global translate_prefix
363 output('typedef ', self.base.base.struct_name(),
364 ' arg_', self.name, ';\n')
365 output(translate_scope, 'bool ', translate_prefix, '_', self.name,
366 '(DisasContext *ctx, arg_', self.name, ' *a);\n')
368 def output_code(self, i, extracted, outerbits, outermask):
369 global translate_prefix
370 ind = str_indent(i)
371 arg = self.base.base.name
372 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
373 if not extracted:
374 output(ind, self.base.extract_name(),
375 '(ctx, &u.f_', arg, ', insn);\n')
376 for n, f in self.fields.items():
377 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
378 output(ind, 'if (', translate_prefix, '_', self.name,
379 '(ctx, &u.f_', arg, ')) return true;\n')
381 # Normal patterns do not have children.
382 def build_tree(self):
383 return
384 def prop_masks(self):
385 return
386 def prop_format(self):
387 return
388 def prop_width(self):
389 return
391 # end Pattern
394 class MultiPattern(General):
395 """Class representing a set of instruction patterns"""
397 def __init__(self, lineno):
398 self.file = input_file
399 self.lineno = lineno
400 self.pats = []
401 self.base = None
402 self.fixedbits = 0
403 self.fixedmask = 0
404 self.undefmask = 0
405 self.width = None
407 def __str__(self):
408 r = 'group'
409 if self.fixedbits is not None:
410 r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
411 return r
413 def output_decl(self):
414 for p in self.pats:
415 p.output_decl()
417 def prop_masks(self):
418 global insnmask
420 fixedmask = insnmask
421 undefmask = insnmask
423 # Collect fixedmask/undefmask for all of the children.
424 for p in self.pats:
425 p.prop_masks()
426 fixedmask &= p.fixedmask
427 undefmask &= p.undefmask
429 # Widen fixedmask until all fixedbits match
430 repeat = True
431 fixedbits = 0
432 while repeat and fixedmask != 0:
433 fixedbits = None
434 for p in self.pats:
435 thisbits = p.fixedbits & fixedmask
436 if fixedbits is None:
437 fixedbits = thisbits
438 elif fixedbits != thisbits:
439 fixedmask &= ~(fixedbits ^ thisbits)
440 break
441 else:
442 repeat = False
444 self.fixedbits = fixedbits
445 self.fixedmask = fixedmask
446 self.undefmask = undefmask
448 def build_tree(self):
449 for p in self.pats:
450 p.build_tree()
452 def prop_format(self):
453 for p in self.pats:
454 p.build_tree()
456 def prop_width(self):
457 width = None
458 for p in self.pats:
459 p.prop_width()
460 if width is None:
461 width = p.width
462 elif width != p.width:
463 error_with_file(self.file, self.lineno,
464 'width mismatch in patterns within braces')
465 self.width = width
467 # end MultiPattern
470 class IncMultiPattern(MultiPattern):
471 """Class representing an overlapping set of instruction patterns"""
473 def output_code(self, i, extracted, outerbits, outermask):
474 global translate_prefix
475 ind = str_indent(i)
476 for p in self.pats:
477 if outermask != p.fixedmask:
478 innermask = p.fixedmask & ~outermask
479 innerbits = p.fixedbits & ~outermask
480 output(ind, 'if ((insn & ',
481 '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits),
482 ') {\n')
483 output(ind, ' /* ',
484 str_match_bits(p.fixedbits, p.fixedmask), ' */\n')
485 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
486 output(ind, '}\n')
487 else:
488 p.output_code(i, extracted, p.fixedbits, p.fixedmask)
489 #end IncMultiPattern
492 class Tree:
493 """Class representing a node in a decode tree"""
495 def __init__(self, fm, tm):
496 self.fixedmask = fm
497 self.thismask = tm
498 self.subs = []
499 self.base = None
501 def str1(self, i):
502 ind = str_indent(i)
503 r = '{0}{1:08x}'.format(ind, self.fixedmask)
504 if self.format:
505 r += ' ' + self.format.name
506 r += ' [\n'
507 for (b, s) in self.subs:
508 r += '{0} {1:08x}:\n'.format(ind, b)
509 r += s.str1(i + 4) + '\n'
510 r += ind + ']'
511 return r
513 def __str__(self):
514 return self.str1(0)
516 def output_code(self, i, extracted, outerbits, outermask):
517 ind = str_indent(i)
519 # If we identified all nodes below have the same format,
520 # extract the fields now.
521 if not extracted and self.base:
522 output(ind, self.base.extract_name(),
523 '(ctx, &u.f_', self.base.base.name, ', insn);\n')
524 extracted = True
526 # Attempt to aid the compiler in producing compact switch statements.
527 # If the bits in the mask are contiguous, extract them.
528 sh = is_contiguous(self.thismask)
529 if sh > 0:
530 # Propagate SH down into the local functions.
531 def str_switch(b, sh=sh):
532 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
534 def str_case(b, sh=sh):
535 return '0x{0:x}'.format(b >> sh)
536 else:
537 def str_switch(b):
538 return 'insn & 0x{0:08x}'.format(b)
540 def str_case(b):
541 return '0x{0:08x}'.format(b)
543 output(ind, 'switch (', str_switch(self.thismask), ') {\n')
544 for b, s in sorted(self.subs):
545 assert (self.thismask & ~s.fixedmask) == 0
546 innermask = outermask | self.thismask
547 innerbits = outerbits | b
548 output(ind, 'case ', str_case(b), ':\n')
549 output(ind, ' /* ',
550 str_match_bits(innerbits, innermask), ' */\n')
551 s.output_code(i + 4, extracted, innerbits, innermask)
552 output(ind, ' break;\n')
553 output(ind, '}\n')
554 # end Tree
557 class ExcMultiPattern(MultiPattern):
558 """Class representing a non-overlapping set of instruction patterns"""
560 def output_code(self, i, extracted, outerbits, outermask):
561 # Defer everything to our decomposed Tree node
562 self.tree.output_code(i, extracted, outerbits, outermask)
564 @staticmethod
565 def __build_tree(pats, outerbits, outermask):
566 # Find the intersection of all remaining fixedmask.
567 innermask = ~outermask & insnmask
568 for i in pats:
569 innermask &= i.fixedmask
571 if innermask == 0:
572 # Edge condition: One pattern covers the entire insnmask
573 if len(pats) == 1:
574 t = Tree(outermask, innermask)
575 t.subs.append((0, pats[0]))
576 return t
578 text = 'overlapping patterns:'
579 for p in pats:
580 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
581 error_with_file(pats[0].file, pats[0].lineno, text)
583 fullmask = outermask | innermask
585 # Sort each element of pats into the bin selected by the mask.
586 bins = {}
587 for i in pats:
588 fb = i.fixedbits & innermask
589 if fb in bins:
590 bins[fb].append(i)
591 else:
592 bins[fb] = [i]
594 # We must recurse if any bin has more than one element or if
595 # the single element in the bin has not been fully matched.
596 t = Tree(fullmask, innermask)
598 for b, l in bins.items():
599 s = l[0]
600 if len(l) > 1 or s.fixedmask & ~fullmask != 0:
601 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
602 t.subs.append((b, s))
604 return t
606 def build_tree(self):
607 super().prop_format()
608 self.tree = self.__build_tree(self.pats, self.fixedbits,
609 self.fixedmask)
611 @staticmethod
612 def __prop_format(tree):
613 """Propagate Format objects into the decode tree"""
615 # Depth first search.
616 for (b, s) in tree.subs:
617 if isinstance(s, Tree):
618 ExcMultiPattern.__prop_format(s)
620 # If all entries in SUBS have the same format, then
621 # propagate that into the tree.
622 f = None
623 for (b, s) in tree.subs:
624 if f is None:
625 f = s.base
626 if f is None:
627 return
628 if f is not s.base:
629 return
630 tree.base = f
632 def prop_format(self):
633 super().prop_format()
634 self.__prop_format(self.tree)
636 # end ExcMultiPattern
639 def parse_field(lineno, name, toks):
640 """Parse one instruction field from TOKS at LINENO"""
641 global fields
642 global insnwidth
644 # A "simple" field will have only one entry;
645 # a "multifield" will have several.
646 subs = []
647 width = 0
648 func = None
649 for t in toks:
650 if re.match('^!function=', t):
651 if func:
652 error(lineno, 'duplicate function')
653 func = t.split('=')
654 func = func[1]
655 continue
657 if re.fullmatch('[0-9]+:s[0-9]+', t):
658 # Signed field extract
659 subtoks = t.split(':s')
660 sign = True
661 elif re.fullmatch('[0-9]+:[0-9]+', t):
662 # Unsigned field extract
663 subtoks = t.split(':')
664 sign = False
665 else:
666 error(lineno, 'invalid field token "{0}"'.format(t))
667 po = int(subtoks[0])
668 le = int(subtoks[1])
669 if po + le > insnwidth:
670 error(lineno, 'field {0} too large'.format(t))
671 f = Field(sign, po, le)
672 subs.append(f)
673 width += le
675 if width > insnwidth:
676 error(lineno, 'field too large')
677 if len(subs) == 0:
678 if func:
679 f = ParameterField(func)
680 else:
681 error(lineno, 'field with no value')
682 else:
683 if len(subs) == 1:
684 f = subs[0]
685 else:
686 mask = 0
687 for s in subs:
688 if mask & s.mask:
689 error(lineno, 'field components overlap')
690 mask |= s.mask
691 f = MultiField(subs, mask)
692 if func:
693 f = FunctionField(func, f)
695 if name in fields:
696 error(lineno, 'duplicate field', name)
697 fields[name] = f
698 # end parse_field
701 def parse_arguments(lineno, name, toks):
702 """Parse one argument set from TOKS at LINENO"""
703 global arguments
704 global re_C_ident
705 global anyextern
707 flds = []
708 extern = False
709 for t in toks:
710 if re.fullmatch('!extern', t):
711 extern = True
712 anyextern = True
713 continue
714 if not re.fullmatch(re_C_ident, t):
715 error(lineno, 'invalid argument set token "{0}"'.format(t))
716 if t in flds:
717 error(lineno, 'duplicate argument "{0}"'.format(t))
718 flds.append(t)
720 if name in arguments:
721 error(lineno, 'duplicate argument set', name)
722 arguments[name] = Arguments(name, flds, extern)
723 # end parse_arguments
726 def lookup_field(lineno, name):
727 global fields
728 if name in fields:
729 return fields[name]
730 error(lineno, 'undefined field', name)
733 def add_field(lineno, flds, new_name, f):
734 if new_name in flds:
735 error(lineno, 'duplicate field', new_name)
736 flds[new_name] = f
737 return flds
740 def add_field_byname(lineno, flds, new_name, old_name):
741 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
744 def infer_argument_set(flds):
745 global arguments
746 global decode_function
748 for arg in arguments.values():
749 if eq_fields_for_args(flds, arg.fields):
750 return arg
752 name = decode_function + str(len(arguments))
753 arg = Arguments(name, flds.keys(), False)
754 arguments[name] = arg
755 return arg
758 def infer_format(arg, fieldmask, flds, width):
759 global arguments
760 global formats
761 global decode_function
763 const_flds = {}
764 var_flds = {}
765 for n, c in flds.items():
766 if c is ConstField:
767 const_flds[n] = c
768 else:
769 var_flds[n] = c
771 # Look for an existing format with the same argument set and fields
772 for fmt in formats.values():
773 if arg and fmt.base != arg:
774 continue
775 if fieldmask != fmt.fieldmask:
776 continue
777 if width != fmt.width:
778 continue
779 if not eq_fields_for_fmts(flds, fmt.fields):
780 continue
781 return (fmt, const_flds)
783 name = decode_function + '_Fmt_' + str(len(formats))
784 if not arg:
785 arg = infer_argument_set(flds)
787 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
788 formats[name] = fmt
790 return (fmt, const_flds)
791 # end infer_format
794 def parse_generic(lineno, parent_pat, name, toks):
795 """Parse one instruction format from TOKS at LINENO"""
796 global fields
797 global arguments
798 global formats
799 global allpatterns
800 global re_arg_ident
801 global re_fld_ident
802 global re_fmt_ident
803 global re_C_ident
804 global insnwidth
805 global insnmask
806 global variablewidth
808 is_format = parent_pat is None
810 fixedmask = 0
811 fixedbits = 0
812 undefmask = 0
813 width = 0
814 flds = {}
815 arg = None
816 fmt = None
817 for t in toks:
818 # '&Foo' gives a format an explicit argument set.
819 if re.fullmatch(re_arg_ident, t):
820 tt = t[1:]
821 if arg:
822 error(lineno, 'multiple argument sets')
823 if tt in arguments:
824 arg = arguments[tt]
825 else:
826 error(lineno, 'undefined argument set', t)
827 continue
829 # '@Foo' gives a pattern an explicit format.
830 if re.fullmatch(re_fmt_ident, t):
831 tt = t[1:]
832 if fmt:
833 error(lineno, 'multiple formats')
834 if tt in formats:
835 fmt = formats[tt]
836 else:
837 error(lineno, 'undefined format', t)
838 continue
840 # '%Foo' imports a field.
841 if re.fullmatch(re_fld_ident, t):
842 tt = t[1:]
843 flds = add_field_byname(lineno, flds, tt, tt)
844 continue
846 # 'Foo=%Bar' imports a field with a different name.
847 if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
848 (fname, iname) = t.split('=%')
849 flds = add_field_byname(lineno, flds, fname, iname)
850 continue
852 # 'Foo=number' sets an argument field to a constant value
853 if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
854 (fname, value) = t.split('=')
855 value = int(value)
856 flds = add_field(lineno, flds, fname, ConstField(value))
857 continue
859 # Pattern of 0s, 1s, dots and dashes indicate required zeros,
860 # required ones, or dont-cares.
861 if re.fullmatch('[01.-]+', t):
862 shift = len(t)
863 fms = t.replace('0', '1')
864 fms = fms.replace('.', '0')
865 fms = fms.replace('-', '0')
866 fbs = t.replace('.', '0')
867 fbs = fbs.replace('-', '0')
868 ubm = t.replace('1', '0')
869 ubm = ubm.replace('.', '0')
870 ubm = ubm.replace('-', '1')
871 fms = int(fms, 2)
872 fbs = int(fbs, 2)
873 ubm = int(ubm, 2)
874 fixedbits = (fixedbits << shift) | fbs
875 fixedmask = (fixedmask << shift) | fms
876 undefmask = (undefmask << shift) | ubm
877 # Otherwise, fieldname:fieldwidth
878 elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
879 (fname, flen) = t.split(':')
880 sign = False
881 if flen[0] == 's':
882 sign = True
883 flen = flen[1:]
884 shift = int(flen, 10)
885 if shift + width > insnwidth:
886 error(lineno, 'field {0} exceeds insnwidth'.format(fname))
887 f = Field(sign, insnwidth - width - shift, shift)
888 flds = add_field(lineno, flds, fname, f)
889 fixedbits <<= shift
890 fixedmask <<= shift
891 undefmask <<= shift
892 else:
893 error(lineno, 'invalid token "{0}"'.format(t))
894 width += shift
896 if variablewidth and width < insnwidth and width % 8 == 0:
897 shift = insnwidth - width
898 fixedbits <<= shift
899 fixedmask <<= shift
900 undefmask <<= shift
901 undefmask |= (1 << shift) - 1
903 # We should have filled in all of the bits of the instruction.
904 elif not (is_format and width == 0) and width != insnwidth:
905 error(lineno, 'definition has {0} bits'.format(width))
907 # Do not check for fields overlapping fields; one valid usage
908 # is to be able to duplicate fields via import.
909 fieldmask = 0
910 for f in flds.values():
911 fieldmask |= f.mask
913 # Fix up what we've parsed to match either a format or a pattern.
914 if is_format:
915 # Formats cannot reference formats.
916 if fmt:
917 error(lineno, 'format referencing format')
918 # If an argument set is given, then there should be no fields
919 # without a place to store it.
920 if arg:
921 for f in flds.keys():
922 if f not in arg.fields:
923 error(lineno, 'field {0} not in argument set {1}'
924 .format(f, arg.name))
925 else:
926 arg = infer_argument_set(flds)
927 if name in formats:
928 error(lineno, 'duplicate format name', name)
929 fmt = Format(name, lineno, arg, fixedbits, fixedmask,
930 undefmask, fieldmask, flds, width)
931 formats[name] = fmt
932 else:
933 # Patterns can reference a format ...
934 if fmt:
935 # ... but not an argument simultaneously
936 if arg:
937 error(lineno, 'pattern specifies both format and argument set')
938 if fixedmask & fmt.fixedmask:
939 error(lineno, 'pattern fixed bits overlap format fixed bits')
940 if width != fmt.width:
941 error(lineno, 'pattern uses format of different width')
942 fieldmask |= fmt.fieldmask
943 fixedbits |= fmt.fixedbits
944 fixedmask |= fmt.fixedmask
945 undefmask |= fmt.undefmask
946 else:
947 (fmt, flds) = infer_format(arg, fieldmask, flds, width)
948 arg = fmt.base
949 for f in flds.keys():
950 if f not in arg.fields:
951 error(lineno, 'field {0} not in argument set {1}'
952 .format(f, arg.name))
953 if f in fmt.fields.keys():
954 error(lineno, 'field {0} set by format and pattern'.format(f))
955 for f in arg.fields:
956 if f not in flds.keys() and f not in fmt.fields.keys():
957 error(lineno, 'field {0} not initialized'.format(f))
958 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
959 undefmask, fieldmask, flds, width)
960 parent_pat.pats.append(pat)
961 allpatterns.append(pat)
963 # Validate the masks that we have assembled.
964 if fieldmask & fixedmask:
965 error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
966 .format(fieldmask, fixedmask))
967 if fieldmask & undefmask:
968 error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
969 .format(fieldmask, undefmask))
970 if fixedmask & undefmask:
971 error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
972 .format(fixedmask, undefmask))
973 if not is_format:
974 allbits = fieldmask | fixedmask | undefmask
975 if allbits != insnmask:
976 error(lineno, 'bits left unspecified (0x{0:08x})'
977 .format(allbits ^ insnmask))
978 # end parse_general
981 def parse_file(f, parent_pat):
982 """Parse all of the patterns within a file"""
983 global re_arg_ident
984 global re_fld_ident
985 global re_fmt_ident
986 global re_pat_ident
988 # Read all of the lines of the file. Concatenate lines
989 # ending in backslash; discard empty lines and comments.
990 toks = []
991 lineno = 0
992 nesting = 0
993 nesting_pats = []
995 for line in f:
996 lineno += 1
998 # Expand and strip spaces, to find indent.
999 line = line.rstrip()
1000 line = line.expandtabs()
1001 len1 = len(line)
1002 line = line.lstrip()
1003 len2 = len(line)
1005 # Discard comments
1006 end = line.find('#')
1007 if end >= 0:
1008 line = line[:end]
1010 t = line.split()
1011 if len(toks) != 0:
1012 # Next line after continuation
1013 toks.extend(t)
1014 else:
1015 # Allow completely blank lines.
1016 if len1 == 0:
1017 continue
1018 indent = len1 - len2
1019 # Empty line due to comment.
1020 if len(t) == 0:
1021 # Indentation must be correct, even for comment lines.
1022 if indent != nesting:
1023 error(lineno, 'indentation ', indent, ' != ', nesting)
1024 continue
1025 start_lineno = lineno
1026 toks = t
1028 # Continuation?
1029 if toks[-1] == '\\':
1030 toks.pop()
1031 continue
1033 name = toks[0]
1034 del toks[0]
1036 # End nesting?
1037 if name == '}' or name == ']':
1038 if len(toks) != 0:
1039 error(start_lineno, 'extra tokens after close brace')
1041 # Make sure { } and [ ] nest properly.
1042 if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1043 error(lineno, 'mismatched close brace')
1045 try:
1046 parent_pat = nesting_pats.pop()
1047 except:
1048 error(lineno, 'extra close brace')
1050 nesting -= 2
1051 if indent != nesting:
1052 error(lineno, 'indentation ', indent, ' != ', nesting)
1054 toks = []
1055 continue
1057 # Everything else should have current indentation.
1058 if indent != nesting:
1059 error(start_lineno, 'indentation ', indent, ' != ', nesting)
1061 # Start nesting?
1062 if name == '{' or name == '[':
1063 if len(toks) != 0:
1064 error(start_lineno, 'extra tokens after open brace')
1066 if name == '{':
1067 nested_pat = IncMultiPattern(start_lineno)
1068 else:
1069 nested_pat = ExcMultiPattern(start_lineno)
1070 parent_pat.pats.append(nested_pat)
1071 nesting_pats.append(parent_pat)
1072 parent_pat = nested_pat
1074 nesting += 2
1075 toks = []
1076 continue
1078 # Determine the type of object needing to be parsed.
1079 if re.fullmatch(re_fld_ident, name):
1080 parse_field(start_lineno, name[1:], toks)
1081 elif re.fullmatch(re_arg_ident, name):
1082 parse_arguments(start_lineno, name[1:], toks)
1083 elif re.fullmatch(re_fmt_ident, name):
1084 parse_generic(start_lineno, None, name[1:], toks)
1085 elif re.fullmatch(re_pat_ident, name):
1086 parse_generic(start_lineno, parent_pat, name, toks)
1087 else:
1088 error(lineno, 'invalid token "{0}"'.format(name))
1089 toks = []
1091 if nesting != 0:
1092 error(lineno, 'missing close brace')
1093 # end parse_file
1096 class SizeTree:
1097 """Class representing a node in a size decode tree"""
1099 def __init__(self, m, w):
1100 self.mask = m
1101 self.subs = []
1102 self.base = None
1103 self.width = w
1105 def str1(self, i):
1106 ind = str_indent(i)
1107 r = '{0}{1:08x}'.format(ind, self.mask)
1108 r += ' [\n'
1109 for (b, s) in self.subs:
1110 r += '{0} {1:08x}:\n'.format(ind, b)
1111 r += s.str1(i + 4) + '\n'
1112 r += ind + ']'
1113 return r
1115 def __str__(self):
1116 return self.str1(0)
1118 def output_code(self, i, extracted, outerbits, outermask):
1119 ind = str_indent(i)
1121 # If we need to load more bytes to test, do so now.
1122 if extracted < self.width:
1123 output(ind, 'insn = ', decode_function,
1124 '_load_bytes(ctx, insn, {0}, {1});\n'
1125 .format(extracted // 8, self.width // 8));
1126 extracted = self.width
1128 # Attempt to aid the compiler in producing compact switch statements.
1129 # If the bits in the mask are contiguous, extract them.
1130 sh = is_contiguous(self.mask)
1131 if sh > 0:
1132 # Propagate SH down into the local functions.
1133 def str_switch(b, sh=sh):
1134 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
1136 def str_case(b, sh=sh):
1137 return '0x{0:x}'.format(b >> sh)
1138 else:
1139 def str_switch(b):
1140 return 'insn & 0x{0:08x}'.format(b)
1142 def str_case(b):
1143 return '0x{0:08x}'.format(b)
1145 output(ind, 'switch (', str_switch(self.mask), ') {\n')
1146 for b, s in sorted(self.subs):
1147 innermask = outermask | self.mask
1148 innerbits = outerbits | b
1149 output(ind, 'case ', str_case(b), ':\n')
1150 output(ind, ' /* ',
1151 str_match_bits(innerbits, innermask), ' */\n')
1152 s.output_code(i + 4, extracted, innerbits, innermask)
1153 output(ind, '}\n')
1154 output(ind, 'return insn;\n')
1155 # end SizeTree
1157 class SizeLeaf:
1158 """Class representing a leaf node in a size decode tree"""
1160 def __init__(self, m, w):
1161 self.mask = m
1162 self.width = w
1164 def str1(self, i):
1165 ind = str_indent(i)
1166 return '{0}{1:08x}'.format(ind, self.mask)
1168 def __str__(self):
1169 return self.str1(0)
1171 def output_code(self, i, extracted, outerbits, outermask):
1172 global decode_function
1173 ind = str_indent(i)
1175 # If we need to load more bytes, do so now.
1176 if extracted < self.width:
1177 output(ind, 'insn = ', decode_function,
1178 '_load_bytes(ctx, insn, {0}, {1});\n'
1179 .format(extracted // 8, self.width // 8));
1180 extracted = self.width
1181 output(ind, 'return insn;\n')
1182 # end SizeLeaf
1185 def build_size_tree(pats, width, outerbits, outermask):
1186 global insnwidth
1188 # Collect the mask of bits that are fixed in this width
1189 innermask = 0xff << (insnwidth - width)
1190 innermask &= ~outermask
1191 minwidth = None
1192 onewidth = True
1193 for i in pats:
1194 innermask &= i.fixedmask
1195 if minwidth is None:
1196 minwidth = i.width
1197 elif minwidth != i.width:
1198 onewidth = False;
1199 if minwidth < i.width:
1200 minwidth = i.width
1202 if onewidth:
1203 return SizeLeaf(innermask, minwidth)
1205 if innermask == 0:
1206 if width < minwidth:
1207 return build_size_tree(pats, width + 8, outerbits, outermask)
1209 pnames = []
1210 for p in pats:
1211 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1212 error_with_file(pats[0].file, pats[0].lineno,
1213 'overlapping patterns size {0}:'.format(width), pnames)
1215 bins = {}
1216 for i in pats:
1217 fb = i.fixedbits & innermask
1218 if fb in bins:
1219 bins[fb].append(i)
1220 else:
1221 bins[fb] = [i]
1223 fullmask = outermask | innermask
1224 lens = sorted(bins.keys())
1225 if len(lens) == 1:
1226 b = lens[0]
1227 return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1229 r = SizeTree(innermask, width)
1230 for b, l in bins.items():
1231 s = build_size_tree(l, width, b | outerbits, fullmask)
1232 r.subs.append((b, s))
1233 return r
1234 # end build_size_tree
1237 def prop_size(tree):
1238 """Propagate minimum widths up the decode size tree"""
1240 if isinstance(tree, SizeTree):
1241 min = None
1242 for (b, s) in tree.subs:
1243 width = prop_size(s)
1244 if min is None or min > width:
1245 min = width
1246 assert min >= tree.width
1247 tree.width = min
1248 else:
1249 min = tree.width
1250 return min
1251 # end prop_size
1254 def main():
1255 global arguments
1256 global formats
1257 global allpatterns
1258 global translate_scope
1259 global translate_prefix
1260 global output_fd
1261 global output_file
1262 global input_file
1263 global insnwidth
1264 global insntype
1265 global insnmask
1266 global decode_function
1267 global variablewidth
1268 global anyextern
1270 decode_scope = 'static '
1272 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
1273 'static-decode=', 'varinsnwidth=']
1274 try:
1275 (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
1276 except getopt.GetoptError as err:
1277 error(0, err)
1278 for o, a in opts:
1279 if o in ('-o', '--output'):
1280 output_file = a
1281 elif o == '--decode':
1282 decode_function = a
1283 decode_scope = ''
1284 elif o == '--static-decode':
1285 decode_function = a
1286 elif o == '--translate':
1287 translate_prefix = a
1288 translate_scope = ''
1289 elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1290 if o == '--varinsnwidth':
1291 variablewidth = True
1292 insnwidth = int(a)
1293 if insnwidth == 16:
1294 insntype = 'uint16_t'
1295 insnmask = 0xffff
1296 elif insnwidth != 32:
1297 error(0, 'cannot handle insns of width', insnwidth)
1298 else:
1299 assert False, 'unhandled option'
1301 if len(args) < 1:
1302 error(0, 'missing input file')
1304 toppat = ExcMultiPattern(0)
1306 for filename in args:
1307 input_file = filename
1308 f = open(filename, 'rt', encoding='utf-8')
1309 parse_file(f, toppat)
1310 f.close()
1312 # We do not want to compute masks for toppat, because those masks
1313 # are used as a starting point for build_tree. For toppat, we must
1314 # insist that decode begins from naught.
1315 for i in toppat.pats:
1316 i.prop_masks()
1318 toppat.build_tree()
1319 toppat.prop_format()
1321 if variablewidth:
1322 for i in toppat.pats:
1323 i.prop_width()
1324 stree = build_size_tree(toppat.pats, 8, 0, 0)
1325 prop_size(stree)
1327 if output_file:
1328 output_fd = open(output_file, 'wt', encoding='utf-8')
1329 else:
1330 output_fd = io.TextIOWrapper(sys.stdout.buffer,
1331 encoding=sys.stdout.encoding,
1332 errors="ignore")
1334 output_autogen()
1335 for n in sorted(arguments.keys()):
1336 f = arguments[n]
1337 f.output_def()
1339 # A single translate function can be invoked for different patterns.
1340 # Make sure that the argument sets are the same, and declare the
1341 # function only once.
1343 # If we're sharing formats, we're likely also sharing trans_* functions,
1344 # but we can't tell which ones. Prevent issues from the compiler by
1345 # suppressing redundant declaration warnings.
1346 if anyextern:
1347 output("#pragma GCC diagnostic push\n",
1348 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1349 "#ifdef __clang__\n"
1350 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
1351 "#endif\n\n")
1353 out_pats = {}
1354 for i in allpatterns:
1355 if i.name in out_pats:
1356 p = out_pats[i.name]
1357 if i.base.base != p.base.base:
1358 error(0, i.name, ' has conflicting argument sets')
1359 else:
1360 i.output_decl()
1361 out_pats[i.name] = i
1362 output('\n')
1364 if anyextern:
1365 output("#pragma GCC diagnostic pop\n\n")
1367 for n in sorted(formats.keys()):
1368 f = formats[n]
1369 f.output_extract()
1371 output(decode_scope, 'bool ', decode_function,
1372 '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1374 i4 = str_indent(4)
1376 if len(allpatterns) != 0:
1377 output(i4, 'union {\n')
1378 for n in sorted(arguments.keys()):
1379 f = arguments[n]
1380 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1381 output(i4, '} u;\n\n')
1382 toppat.output_code(4, False, 0, 0)
1384 output(i4, 'return false;\n')
1385 output('}\n')
1387 if variablewidth:
1388 output('\n', decode_scope, insntype, ' ', decode_function,
1389 '_load(DisasContext *ctx)\n{\n',
1390 ' ', insntype, ' insn = 0;\n\n')
1391 stree.output_code(4, 0, 0, 0)
1392 output('}\n')
1394 if output_file:
1395 output_fd.close()
1396 # end main
1399 if __name__ == '__main__':
1400 main()