expressions: Major work to improve error messages.
[pspp.git] / src / language / expressions / generate.py
blobffe780a3c35ab9cbfdd07f016dcc857890605a06
1 #! /usr/bin/python3
2 # PSPP - a program for statistical analysis.
3 # Copyright (C) 2017, 2021 Free Software Foundation, Inc.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 import enum
19 import getopt
20 import re
21 import sys
23 argv0 = sys.argv[0]
26 def die(s):
27 sys.stderr.write("%s\n" % s)
28 sys.exit(1)
31 def init_all_types():
32 """Defines all our types.
34 Initializes 'types' global.
36 """
38 global types
39 types = {}
41 for t in [
42 # Common user-visible types used throughout evaluation trees.
43 Type.new_any('number', 'double', 'number', 'n',
44 'number', 'ns', 'SYSMIS'),
45 Type.new_any('string', 'struct substring', 'string', 's',
46 'string', 'ss', 'empty_string'),
47 Type.new_any('boolean', 'double', 'number', 'n',
48 'boolean', 'ns', 'SYSMIS'),
49 Type.new_any('integer', 'int', 'number', 'n',
50 'integer', 'ns', 'SYSMIS'),
52 # Format types.
53 Type.new_atom('format'),
54 Type.new_leaf('ni_format', 'const struct fmt_spec *',
55 'format', 'f', 'num_input_format'),
56 Type.new_leaf('no_format', 'const struct fmt_spec *',
57 'format', 'f', 'num_output_format'),
59 # Integer types.
60 Type.new_leaf('pos_int', 'int', 'integer', 'n',
61 'positive_integer_constant'),
63 # Variable names.
64 Type.new_atom('variable'),
65 Type.new_leaf('num_var', 'const struct variable *',
66 'variable', 'Vn', 'num_variable'),
67 Type.new_leaf('str_var', 'const struct variable *',
68 'variable', 'Vs', 'string_variable'),
69 Type.new_leaf('var', 'const struct variable *',
70 'variable', 'V', 'variable'),
72 # Vectors.
73 Type.new_leaf('vector', 'const struct vector *',
74 'vector', 'v', 'vector'),
75 Type.new_any('num_vec_elem', 'double', 'number', 'n',
76 'number', 'ns', 'SYSMIS'),
78 # Types as leaves or auxiliary data.
79 Type.new_leaf('expr_node', 'const struct expr_node *',
80 'expr_node', 'e', 'expr_node'),
82 # Types that appear only as auxiliary data.
83 Type.new_auxonly('expression', 'struct expression *', 'e'),
84 Type.new_auxonly('case', 'const struct ccase *', 'c'),
85 Type.new_auxonly('case_idx', 'size_t', 'case_idx'),
86 Type.new_auxonly('dataset', 'struct dataset *', 'ds'),
88 # One of these is emitted at the end of each expression as a
89 # sentinel that tells expr_evaluate() to return the value on
90 # the stack.
91 Type.new_atom('return_number'),
92 Type.new_atom('return_string'),
94 # Used only for debugging purposes.
95 Type.new_atom('operation'),
97 types[t.name] = t
100 class Type:
101 def __init__(self, name, role, human_name, c_type=None):
102 self.name = name
103 self.role = role
104 self.human_name = human_name
105 if c_type:
106 if c_type.endswith('*'):
107 self.c_type = c_type
108 else:
109 self.c_type = c_type + ' '
111 def new_atom(name):
112 """Creates and returns a new atom Type with the given 'name'.
114 An atom isn't directly allowed as an operand or function
115 argument type. They are all exceptional cases in some way.
118 return Type(name, 'atom', name)
120 def new_any(name, c_type, atom, mangle, human_name, stack,
121 missing_value):
122 """Creates and returns a new Type that can appear in any context, that
123 is, it can be an operation's argument type or return type.
125 'c_type' is the type used for C objects of this type.
127 'atom' should be the name of the member of "union
128 operation_data" that holds a value of this type.
130 'mangle' should be a short string for name mangling purposes,
131 to allow overloading functions with the same name but
132 different argument types. Use the same 'mangle' for two
133 different types if those two types should not be overloaded.
135 'human_name' should be a name to use when describing this type
136 to the user (see Op.prototype()).
138 'stack' is the name of the local variable in expr_evaluate()
139 used for maintaining a stack of this type.
141 'missing_value' is the expression used for a missing value of
142 this type.
145 new = Type(name, 'any', human_name, c_type)
146 new.atom = atom
147 new.mangle = mangle
148 new.stack = stack
149 new.missing_value = missing_value
150 return new
152 def new_leaf(name, c_type, atom, mangle, human_name):
153 """Creates and returns a new leaf Type. A leaf type can appear in
154 expressions as an operation's argument type, but not as a return type.
155 (Thus, it only appears in a parse tree as a leaf node.)
157 The other arguments are as for new_any().
159 new = Type(name, 'leaf', human_name, c_type)
160 new.atom = atom
161 new.mangle = mangle
162 return new
164 def new_auxonly(name, c_type, auxonly_value):
165 """Creates and returns a new auxiliary-only Type. An auxiliary-only
166 Type is one that gets passed into the evaluation function but
167 isn't supplied directly by the user as an operand or argument.
169 'c_type' is as in new_any().
171 'auxonly_value' is the name of the local variable in
172 expr_evaluate() that has the value of this auxiliary data.
175 new = Type(name, 'auxonly', name, c_type)
176 new.auxonly_value = auxonly_value
177 return new
179 def parse():
180 """If the current token is an identifier that names a type, returns
181 the type and skips to the next token. Otherwise, returns
182 None.
184 if toktype == 'id':
185 for type_ in types.values():
186 if type_.name == token:
187 get_token()
188 return type_
189 return None
192 class Category(enum.Enum):
193 FUNCTION = enum.auto()
194 OPERATOR = enum.auto()
197 class Op:
198 def __init__(self, name, category, returns, args, aux, expression,
199 block, min_valid, optimizable, unimplemented,
200 extension, perm_only, absorb_miss, no_abbrev):
201 self.name = name
202 self.category = category
203 self.returns = returns
204 self.args = args
205 self.aux = aux
206 self.expression = expression
207 self.block = block
208 self.min_valid = min_valid
209 self.optimizable = optimizable
210 self.unimplemented = unimplemented
211 self.extension = extension
212 self.perm_only = perm_only
213 self.absorb_miss = absorb_miss
214 self.no_abbrev = no_abbrev
216 self.opname = ('OP_%s' % name).replace('.', '_')
217 if category == Category.FUNCTION:
218 self.opname += '_%s' % (''.join([a.type_.mangle for a in args]))
220 def array_arg(self):
221 """If this operation has an array argument, returns it. Otherwise,
222 returns None.
224 if self.args and self.args[-1].idx is not None:
225 return self.args[-1]
226 else:
227 return None
229 def sysmis_decl(self, min_valid_src):
230 """Returns a declaration for a boolean variable called `force_sysmis',
231 which will be true when this operation should be
232 system-missing. Returns None if there are no such
233 circumstances.
235 If this operation has a minimum number of valid arguments,
236 'min_valid_src' should be an an expression that evaluates to
237 the minimum number of valid arguments for this operation.
240 sysmis_cond = []
241 if not self.absorb_miss:
242 for arg in self.args:
243 arg_name = 'arg_%s' % arg.name
244 if arg.idx is None:
245 if arg.type_.name in ['number', 'boolean', 'integer']:
246 sysmis_cond += ['!is_valid (%s)' % arg_name]
247 elif arg.type_.name == 'number':
248 a = arg_name
249 n = 'arg_%s' % arg.idx
250 sysmis_cond += ['count_valid (%s, %s) < %s' % (a, n, n)]
251 elif self.min_valid > 0:
252 args = self.args
253 arg = args[-1]
254 a = 'arg_%s' % arg.name
255 n = 'arg_%s' % arg.idx
256 sysmis_cond += ['count_valid (%s, %s) < %s'
257 % (a, n, min_valid_src)]
258 for arg in self.args:
259 if arg.condition is not None:
260 sysmis_cond += ['!(%s)' % arg.condition]
261 if sysmis_cond:
262 return 'bool force_sysmis = %s' % ' || '.join(sysmis_cond)
263 return None
265 def prototype(self):
266 """Composes and returns a string that describes the function in a way
267 suitable for a human to understand, something like a C
268 function prototype, e.g. "ABS(number)" or "ANY(number,
269 number[, number]...)".
271 This doesn't make sense for operators so this function just
272 returns None for them.
275 if self.category == Category.FUNCTION:
276 args = []
277 opt_args = []
278 for arg in self.args:
279 if arg.idx is None:
280 args += [arg.type_.human_name]
282 array = self.array_arg()
283 if array is not None:
284 if self.min_valid == 0:
285 array_args = []
286 for i in range(array.times):
287 array_args += [array.type_.human_name]
288 args += array_args
289 opt_args = array_args
290 else:
291 for i in range(self.min_valid):
292 args += [array.type_.human_name]
293 opt_args += [array.type_.human_name]
295 prototype = '%s(%s' % (self.name, ', '.join(args))
296 if opt_args:
297 prototype += '[, %s]...' % ', '.join(opt_args)
298 prototype += ')'
299 return prototype
300 else:
301 return None
303 def flags(self):
304 """Returns the OPF_* flags that apply to 'self'."""
305 flags = []
306 if self.absorb_miss:
307 flags += ['OPF_ABSORB_MISS']
308 if self.array_arg():
309 flags += ['OPF_ARRAY_OPERAND']
310 if self.min_valid > 0:
311 flags += ['OPF_MIN_VALID']
312 if not self.optimizable:
313 flags += ['OPF_NONOPTIMIZABLE']
314 if self.extension:
315 flags += ['OPF_EXTENSION']
316 if self.unimplemented:
317 flags += ['OPF_UNIMPLEMENTED']
318 if self.perm_only:
319 flags += ['OPF_PERM_ONLY']
320 if self.no_abbrev:
321 flags += ['OPF_NO_ABBREV']
322 for aux in self.aux:
323 if aux['TYPE'].name == 'expr_node':
324 flags += ['OPF_EXPR_NODE']
325 break
326 return ' | '.join(flags) if flags else '0'
329 def parse_input():
330 """Parses the entire input.
332 Initializes ops, funcs, opers."""
334 global token
335 global toktype
336 global line_number
337 token = None
338 toktype = None
339 line_number = 0
340 get_line()
341 get_token()
343 global funcs
344 global opers
345 global order
346 ops = {}
347 funcs = []
348 opers = []
350 while toktype != 'eof':
351 optimizable = True
352 unimplemented = False
353 extension = False
354 perm_only = False
355 absorb_miss = False
356 no_abbrev = False
357 while True:
358 if match('extension'):
359 extension = True
360 elif match('no_opt'):
361 optimizable = False
362 elif match('absorb_miss'):
363 absorb_miss = True
364 elif match('perm_only'):
365 perm_only = True
366 elif match('no_abbrev'):
367 no_abbrev = True
368 else:
369 break
371 return_type = Type.parse()
372 if return_type is None:
373 return_type = types['number']
374 if return_type.name not in ['number', 'string', 'boolean', 'num_vec_elem']:
375 die('%s is not a valid return type' % return_type.name)
377 if token == 'operator':
378 category = Category.OPERATOR
379 elif token == 'function':
380 category = Category.FUNCTION
381 else:
382 die("'operator' or 'function' expected at '%s'" % token)
383 get_token()
385 name = force('id')
386 if category == Category.FUNCTION and '_' in name:
387 die("function name '%s' may not contain underscore" % name)
388 elif category == Category.OPERATOR and '.' in name:
389 die("operator name '%s' may not contain period" % name)
391 m = re.match(r'(.*)\.(\d+)$', name)
392 if m:
393 prefix, suffix = m.groups()
394 name = prefix
395 min_valid = int(suffix)
396 absorb_miss = True
397 else:
398 min_valid = 0
400 force_match('(')
401 args = []
402 while not match(')'):
403 arg = Arg.parse()
404 args += [arg]
405 if arg.idx is not None:
406 if match(')'):
407 break
408 die('array must be last argument')
409 if not match(','):
410 force_match(')')
411 break
413 for arg in args:
414 if arg.condition is not None:
415 any_arg = '|'.join([a.name for a in args])
416 arg.condition = re.sub(r'\b(%s)\b' % any_arg,
417 r'arg_\1', arg.condition)
419 aux = []
420 while toktype == 'id':
421 type_ = Type.parse()
422 if type_ is None:
423 die('parse error')
424 if type_.role not in ['leaf', 'auxonly']:
425 die("'%s' is not allowed as auxiliary data" % type_.name)
426 aux_name = force('id')
427 aux += [{'TYPE': type_, 'NAME': aux_name}]
428 force_match(';')
430 if optimizable:
431 if name.startswith('RV.'):
432 die("random variate functions must be marked 'no_opt'")
433 for key in ['CASE', 'CASE_IDX']:
434 if key in aux:
435 die("operators with %s aux data must be marked 'no_opt'"
436 % key)
438 if return_type.name == 'string' and not absorb_miss:
439 for arg in args:
440 if arg.type_.name in ['number', 'boolean']:
441 die("'%s' returns string and has double or bool "
442 "argument, but is not marked ABSORB_MISS" % name)
443 if arg.condition is not None:
444 die("'%s' returns string but has "
445 "argument with condition")
447 if toktype == 'block':
448 block = force('block')
449 expression = None
450 elif toktype == 'expression':
451 if token == 'unimplemented':
452 unimplemented = True
453 else:
454 expression = token
455 block = None
456 get_token()
457 else:
458 die('block or expression expected')
460 op = Op(name, category,
461 return_type, args, aux,
462 expression, block,
463 min_valid,
464 optimizable, unimplemented, extension, perm_only, absorb_miss,
465 no_abbrev)
467 if min_valid > 0:
468 aa = op.array_arg()
469 if aa is None:
470 die("can't have minimum valid count without array arg")
471 if aa.type_.name != 'number':
472 die('minimum valid count allowed only with double array')
473 if aa.times != 1:
474 die("can't have minimum valid count if "
475 "array has multiplication factor")
477 if op.opname in ops:
478 die("duplicate operation name '%s'" % op.opname)
479 ops[op.opname] = op
480 if category == Category.FUNCTION:
481 funcs += [op]
482 else:
483 opers += [op]
485 in_file.close()
487 funcs = sorted(funcs, key=lambda f: (f.name, f.opname))
488 opers = sorted(opers, key=lambda o: o.name)
489 order = funcs + opers
492 def get_token():
493 """Reads the next token into 'token' and 'toktype'."""
495 global line
496 global token
497 global toktype
499 lookahead()
500 if toktype == 'eof':
501 return
503 m = re.match(r'([a-zA-Z_][a-zA-Z_.0-9]*)(.*)$', line)
504 if m:
505 token, line = m.groups()
506 toktype = 'id'
507 return
509 m = re.match(r'([0-9]+)(.*)$', line)
510 if m:
511 token, line = m.groups()
512 token = int(token)
513 toktype = 'int'
514 return
516 m = re.match(r'([][(),*;.])(.*)$', line)
517 if m:
518 token, line = m.groups()
519 toktype = 'punct'
520 return
522 m = re.match(r'=\s*(.*)$', line)
523 if m:
524 toktype = 'expression'
525 line = m.group(1)
526 token = accumulate_balanced(';')
527 return
529 m = re.match(r'{(.*)$', line)
530 if m:
531 toktype = 'block'
532 line = m.group(1)
533 token = accumulate_balanced('}')
534 token = token.rstrip('\n')
535 return
537 die("bad character '%s' in input" % line[0])
540 def lookahead():
541 """Skip whitespace."""
542 global line
543 if line is None:
544 die('unexpected end of file')
546 while True:
547 line = line.lstrip()
548 if line != '':
549 break
550 get_line()
551 if line is None:
552 global token
553 global toktype
554 token = 'eof'
555 toktype = 'eof'
556 return
559 def accumulate_balanced(end, swallow_end=True):
560 """Accumulates input until a character in 'end' is encountered,
561 except that balanced pairs of (), [], or {} cause 'end' to be
562 ignored. Returns the input read.
564 s = ''
565 nest = 0
566 global line
567 while True:
568 for idx, c in enumerate(line):
569 if c in end and nest == 0:
570 line = line[idx:]
571 if swallow_end:
572 line = line[1:]
573 s = s.strip('\r\n')
574 return s
575 elif c in '[({':
576 nest += 1
577 elif c in '])}':
578 if nest > 0:
579 nest -= 1
580 else:
581 die('unbalanced parentheses')
582 s += c
583 s += '\n'
584 get_line()
587 def get_line():
588 """Reads the next line from INPUT into 'line'."""
589 global line
590 global line_number
591 line = in_file.readline()
592 line_number += 1
593 if line == '':
594 line = None
595 else:
596 line = line.rstrip('\r\n')
597 comment_ofs = line.find('//')
598 if comment_ofs >= 0:
599 line = line[:comment_ofs]
602 def force(type_):
603 """Makes sure that 'toktype' equals 'type', reads the next token, and
604 returns the previous 'token'.
607 if type_ != toktype:
608 die("parse error at `%s' expecting %s" % (token, type_))
609 tok = token
610 get_token()
611 return tok
614 def match(tok):
615 """If 'token' equals 'tok', reads the next token and returns true.
616 Otherwise, returns false."""
617 if token == tok:
618 get_token()
619 return True
620 else:
621 return False
624 def force_match(tok):
625 """If 'token' equals 'tok', reads the next token. Otherwise, flags an
626 error in the input.
628 if not match(tok):
629 die("parse error at `%s' expecting `%s'" % (token, tok))
632 class Arg:
633 def __init__(self, name, type_, idx, times, condition):
634 self.name = name
635 self.type_ = type_
636 self.idx = idx
637 self.times = times
638 self.condition = condition
640 def parse():
641 """Parses and returns a function argument."""
642 type_ = Type.parse()
643 if type_ is None:
644 type_ = types['number']
646 if toktype != 'id':
647 die("argument name expected at `%s'" % token)
648 name = token
650 lookahead()
651 global line
653 idx = None
654 times = 1
656 if line[0] in '[,)':
657 get_token()
658 if match('['):
659 if type_.name not in ('number', 'string'):
660 die('only double and string arrays supported')
661 idx = force('id')
662 if match('*'):
663 times = force('int')
664 if times != 2:
665 die('multiplication factor must be two')
666 force_match(']')
667 condition = None
668 else:
669 condition = name + ' '
670 condition += accumulate_balanced(',)', swallow_end=False)
671 get_token()
673 return Arg(name, type_, idx, times, condition)
676 def print_header():
677 """Prints the output file header."""
678 sys.stdout.write("""\
679 /* Generated by generate.py. Do not modify! */
680 """)
683 def print_trailer():
684 """Prints the output file trailer."""
685 sys.stdout.write("""\
688 Local Variables:
689 mode: c
690 buffer-read-only: t
691 End:
693 """)
696 def generate_evaluate_h():
697 sys.stdout.write('#include "helpers.h"\n\n')
699 for op in order:
700 if op.unimplemented:
701 continue
703 args = []
704 for arg in op.args:
705 if arg.idx is None:
706 args += [arg.type_.c_type + arg.name]
707 else:
708 args += [arg.type_.c_type + arg.name + '[]']
709 args += ['size_t %s' % arg.idx]
710 for aux in op.aux:
711 args += [aux['TYPE'].c_type + aux['NAME']]
712 if not args:
713 args += ['void']
715 if op.block:
716 statements = op.block + '\n'
717 else:
718 statements = ' return %s;\n' % op.expression
720 sys.stdout.write('static inline %s\n' % op.returns.c_type)
721 sys.stdout.write('eval_%s (%s)\n' % (op.opname, ', '.join(args)))
722 sys.stdout.write('{\n')
723 sys.stdout.write(statements)
724 sys.stdout.write('}\n\n')
727 def generate_evaluate_inc():
728 for op in order:
729 if op.unimplemented:
730 sys.stdout.write('case %s:\n' % op.opname)
731 sys.stdout.write(' NOT_REACHED ();\n\n')
732 continue
734 decls = []
735 args = []
736 for arg in op.args:
737 type_ = arg.type_
738 if type_.c_type == 'int ':
739 c_type = 'double '
740 if op.absorb_miss:
741 args += ['arg_%s == SYSMIS ? INT_MIN : arg_%s'
742 % (arg.name, arg.name)]
743 else:
744 args += ['arg_%s' % arg.name]
745 else:
746 c_type = type_.c_type
747 args += ['arg_%s' % arg.name]
748 if arg.idx is None:
749 decl = '%sarg_%s' % (c_type, arg.name)
750 if type_.role == 'any':
751 decls = ['%s = *--%s' % (decl, type_.stack)] + decls
752 elif type_.role == 'leaf':
753 decls += ['%s = op++->%s' % (decl, type_.atom)]
754 else:
755 assert False
756 else:
757 idx = arg.idx
758 decls = ['%s*arg_%s = %s -= arg_%s'
759 % (c_type, arg.name, type_.stack, idx)] + decls
760 decls = ['size_t arg_%s = op++->integer' % idx] + decls
762 idx = 'arg_%s' % idx
763 if arg.times != 1:
764 idx += ' / %s' % arg.times
765 args += [idx]
766 for aux in op.aux:
767 type_ = aux['TYPE']
768 name = aux['NAME']
769 if type_.role == 'leaf':
770 decls += ['%saux_%s = op++->%s'
771 % (type_.c_type, name, type_.atom)]
772 args += ['aux_%s' % name]
773 elif type_.name == 'expr_node':
774 decls += ['%saux_%s = op++->node'
775 % (type_.c_type, name)]
776 args += ['aux_%s' % name]
777 elif type_.role == 'auxonly':
778 args += [type_.auxonly_value]
780 sysmis_cond = op.sysmis_decl('op++->integer')
781 if sysmis_cond is not None:
782 decls += [sysmis_cond]
784 result = 'eval_%s (%s)' % (op.opname, ', '.join(args))
786 stack = op.returns.stack
788 sys.stdout.write('case %s:\n' % op.opname)
789 if decls:
790 sys.stdout.write(' {\n')
791 for decl in decls:
792 sys.stdout.write(' %s;\n' % decl)
793 if sysmis_cond is not None:
794 miss_ret = op.returns.missing_value
795 sys.stdout.write(' *%s++ = force_sysmis ? %s : %s;\n'
796 % (stack, miss_ret, result))
797 else:
798 sys.stdout.write(' *%s++ = %s;\n' % (stack, result))
799 sys.stdout.write(' }\n')
800 else:
801 sys.stdout.write(' *%s++ = %s;\n' % (stack, result))
802 sys.stdout.write(' break;\n\n')
805 def generate_operations_h():
806 sys.stdout.write('#include <stdlib.h>\n')
807 sys.stdout.write('#include <stdbool.h>\n\n')
809 sys.stdout.write('typedef enum')
810 sys.stdout.write(' {\n')
811 atoms = []
812 for type_ in types.values():
813 if type_.role != 'auxonly':
814 atoms += ['OP_%s' % type_.name]
816 print_operations('atom', 1, atoms)
817 print_operations('function', 'OP_atom_last + 1',
818 [f.opname for f in funcs])
819 print_operations('operator', 'OP_function_last + 1',
820 [o.opname for o in opers])
821 print_range('OP_composite', 'OP_function_first', 'OP_operator_last')
822 sys.stdout.write(',\n\n')
823 print_range('OP', 'OP_atom_first', 'OP_composite_last')
824 sys.stdout.write('\n }\n')
825 sys.stdout.write('operation_type, atom_type;\n')
827 print_predicate('is_operation', 'OP')
828 for key in ('atom', 'composite', 'function', 'operator'):
829 print_predicate('is_%s' % key, 'OP_%s' % key)
832 def print_operations(type_, first, names):
833 sys.stdout.write(' /* %s types. */\n' % type_.title())
834 sys.stdout.write(' %s = %s,\n' % (names[0], first))
835 for name in names[1:]:
836 sys.stdout.write(' %s,\n' % name)
837 print_range('OP_%s' % type_, names[0], names[-1])
838 sys.stdout.write(',\n\n')
841 def print_range(prefix, first, last):
842 sys.stdout.write(' %s_first = %s,\n' % (prefix, first))
843 sys.stdout.write(' %s_last = %s,\n' % (prefix, last))
844 sys.stdout.write(' n_%s = %s_last - %s_first + 1'
845 % (prefix, prefix, prefix))
848 def print_predicate(function, category):
849 sys.stdout.write('\nstatic inline bool\n')
850 sys.stdout.write('%s (operation_type op)\n' % function)
851 sys.stdout.write('{\n')
852 if function != 'is_operation':
853 sys.stdout.write(' assert (is_operation (op));\n')
854 sys.stdout.write(' return op >= %s_first && op <= %s_last;\n'
855 % (category, category))
856 sys.stdout.write('}\n')
859 def generate_optimize_inc():
860 for op in order:
861 if not op.optimizable or op.unimplemented:
862 sys.stdout.write('case %s:\n' % op.opname)
863 sys.stdout.write(' NOT_REACHED ();\n\n')
864 continue
866 decls = []
867 arg_idx = 0
868 for arg in op.args:
869 name = arg.name
870 type_ = arg.type_
871 c_type = type_.c_type
872 if arg.idx is None:
873 func = 'get_%s_arg' % type_.atom
874 decls += ['%sarg_%s = %s (node, %s)'
875 % (c_type, name, func, arg_idx)]
876 else:
877 decl = 'size_t arg_%s = node->n_args' % arg.idx
878 if arg_idx > 0:
879 decl += ' - %s' % arg_idx
880 decls += [decl]
882 decls += ['%s*arg_%s = get_%s_args '
883 '(node, %s, arg_%s, e)'
884 % (c_type, name, type_.atom, arg_idx, arg.idx)]
885 arg_idx += 1
887 sysmis_cond = op.sysmis_decl('node->min_valid')
888 if sysmis_cond is not None:
889 decls += [sysmis_cond]
891 args = []
892 for arg in op.args:
893 args += ['arg_%s' % arg.name]
894 if arg.idx is not None:
895 idx = 'arg_%s' % arg.idx
896 if arg.times != 1:
897 idx += ' / %s' % arg.times
898 args += [idx]
900 for aux in op.aux:
901 type_ = aux['TYPE']
902 if type_.role == 'leaf':
903 assert type_.name == 'expr_node'
904 args += ['node']
905 elif type_.role == 'auxonly':
906 args += [type_.auxonly_value]
907 else:
908 assert False
910 result = 'eval_%s (%s)' % (op.opname, ', '.join(args))
911 if decls and sysmis_cond is not None:
912 miss_ret = op.returns.missing_value
913 decls += ['%sresult = force_sysmis ? %s : %s'
914 % (op.returns.c_type, miss_ret, result)]
915 result = 'result'
917 sys.stdout.write('case %s:\n' % op.opname)
918 alloc_func = 'expr_allocate_%s' % op.returns.name
919 if decls:
920 sys.stdout.write(' {\n')
921 for decl in decls:
922 sys.stdout.write(' %s;\n' % decl)
923 sys.stdout.write(' return %s (e, %s);\n' % (alloc_func, result))
924 sys.stdout.write(' }\n')
925 else:
926 sys.stdout.write(' return %s (e, %s);\n' % (alloc_func, result))
927 sys.stdout.write('\n')
930 def generate_parse_inc():
931 members = ['""', '""', '0', '0', '0', '{}', '0', '0']
932 sys.stdout.write('{%s},\n' % ', '.join(members))
934 for type_ in types.values():
935 if type_.role != 'auxonly':
936 members = ('"%s"' % type_.name, '"%s"' % type_.human_name,
937 '0', 'OP_%s' % type_.name, '0', '{}', '0', '0')
938 sys.stdout.write('{%s},\n' % ', '.join(members))
940 for op in order:
941 members = []
942 members += ['"%s"' % op.name]
944 prototype = op.prototype()
945 members += ['"%s"' % prototype if prototype else 'NULL']
947 members += [op.flags()]
949 members += ['OP_%s' % op.returns.name]
951 members += ['%s' % len(op.args)]
953 arg_types = ['OP_%s' % arg.type_.name for arg in op.args]
954 members += ['{%s}' % ', '.join(arg_types)]
956 members += ['%s' % op.min_valid]
958 members += ['%s' % (op.array_arg().times if op.array_arg() else 0)]
960 sys.stdout.write('{%s},\n' % ', '.join(members))
963 def usage():
964 print("""\
965 %s, for generating expression parsers and evaluators from definitions
966 usage: generate.py -o OUTPUT_TYPE [-i INPUT] [-h] > OUTPUT
967 -i INPUT input file containing definitions (default: operations.def)
968 -o OUTPUT output file type, one of: evaluate.h, evaluate.inc,
969 operations.h, optimize.inc, parse.inc
970 -h display this help message
971 """ % argv0)
972 sys.exit(0)
975 if __name__ == '__main__':
976 try:
977 options, args = getopt.gnu_getopt(sys.argv[1:], 'hi:o:',
978 ['input=s',
979 'output=s',
980 'help'])
981 except getopt.GetoptError as geo:
982 die('%s: %s' % (argv0, geo.msg))
984 in_file_name = 'operations.def'
985 out_file_name = None
986 for key, value in options:
987 if key in ['-h', '--help']:
988 usage()
989 elif key in ['-i', '--input']:
990 in_file_name = value
991 elif key in ['-o', '--output']:
992 out_file_name = value
993 else:
994 sys.exit(0)
996 if out_file_name is None:
997 die('%s: output file must be specified '
998 '(use --help for help)' % argv0)
1000 in_file = open(in_file_name, 'r')
1002 init_all_types()
1003 parse_input()
1005 print_header()
1006 if out_file_name == 'evaluate.h':
1007 generate_evaluate_h()
1008 elif out_file_name == 'evaluate.inc':
1009 generate_evaluate_inc()
1010 elif out_file_name == 'operations.h':
1011 generate_operations_h()
1012 elif out_file_name == 'optimize.inc':
1013 generate_optimize_inc()
1014 elif out_file_name == 'parse.inc':
1015 generate_parse_inc()
1016 else:
1017 die('%s: unknown output type' % argv0)
1018 print_trailer()