1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2010, 2011, 2012, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
26 #include "data/case.h"
27 #include "data/dictionary.h"
28 #include "data/settings.h"
29 #include "data/variable.h"
30 #include "language/expressions/helpers.h"
31 #include "language/lexer/format-parser.h"
32 #include "language/lexer/lexer.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/array.h"
35 #include "libpspp/assertion.h"
36 #include "libpspp/i18n.h"
37 #include "libpspp/message.h"
38 #include "libpspp/misc.h"
39 #include "libpspp/pool.h"
40 #include "libpspp/str.h"
42 #include "gl/c-strcase.h"
43 #include "gl/minmax.h"
44 #include "gl/xalloc.h"
48 /* Recursive descent parser in order of increasing precedence. */
49 typedef struct expr_node
*parse_recursively_func (struct lexer
*, struct expression
*);
50 static parse_recursively_func parse_or
, parse_and
, parse_not
;
51 static parse_recursively_func parse_rel
, parse_add
, parse_mul
;
52 static parse_recursively_func parse_neg
, parse_exp
;
53 static parse_recursively_func parse_primary
;
54 static parse_recursively_func parse_vector_element
, parse_function
;
56 /* Utility functions. */
57 static struct expression
*expr_create (struct dataset
*ds
);
58 atom_type
expr_node_returns (const struct expr_node
*);
60 static const char *atom_type_name (atom_type
);
61 static struct expression
*finish_expression (struct expr_node
*,
63 static bool type_check (const struct expr_node
*, enum val_type expected_type
);
64 static struct expr_node
*allocate_unary_variable (struct expression
*,
65 const struct variable
*);
67 /* Public functions. */
69 /* Parses an expression of the given TYPE. If DS is nonnull then variables and
70 vectors within it may be referenced within the expression; otherwise, the
71 expression must not reference any variables or vectors. Returns the new
72 expression if successful or a null pointer otherwise. */
74 expr_parse (struct lexer
*lexer
, struct dataset
*ds
, enum val_type type
)
76 assert (val_type_is_valid (type
));
78 struct expression
*e
= expr_create (ds
);
79 struct expr_node
*n
= parse_or (lexer
, e
);
80 if (!n
|| !type_check (n
, type
))
86 return finish_expression (expr_optimize (n
, e
), e
);
89 /* Parses a boolean expression, otherwise similar to expr_parse(). */
91 expr_parse_bool (struct lexer
*lexer
, struct dataset
*ds
)
93 struct expression
*e
= expr_create (ds
);
94 struct expr_node
*n
= parse_or (lexer
, e
);
101 atom_type actual_type
= expr_node_returns (n
);
102 if (actual_type
== OP_number
)
103 n
= expr_allocate_unary (e
, OP_NUM_TO_BOOLEAN
, n
);
104 else if (actual_type
!= OP_boolean
)
106 msg (SE
, _("Type mismatch: expression has %s type, "
107 "but a boolean value is required here."),
108 atom_type_name (actual_type
));
113 return finish_expression (expr_optimize (n
, e
), e
);
116 /* Parses a numeric expression that is intended to be assigned to newly created
117 variable NEW_VAR_NAME. (This allows for a better error message if the
118 expression is not numeric.) Otherwise similar to expr_parse(). */
120 expr_parse_new_variable (struct lexer
*lexer
, struct dataset
*ds
,
121 const char *new_var_name
)
123 struct expression
*e
= expr_create (ds
);
124 struct expr_node
*n
= parse_or (lexer
, e
);
131 atom_type actual_type
= expr_node_returns (n
);
132 if (actual_type
!= OP_number
&& actual_type
!= OP_boolean
)
134 msg (SE
, _("This command tries to create a new variable %s by assigning a "
135 "string value to it, but this is not supported. Use "
136 "the STRING command to create the new variable with the "
137 "correct width before assigning to it, e.g. STRING %s(A20)."),
138 new_var_name
, new_var_name
);
143 return finish_expression (expr_optimize (n
, e
), e
);
146 /* Free expression E. */
148 expr_free (struct expression
*e
)
151 pool_destroy (e
->expr_pool
);
155 expr_parse_any (struct lexer
*lexer
, struct dataset
*ds
, bool optimize
)
158 struct expression
*e
;
160 e
= expr_create (ds
);
161 n
= parse_or (lexer
, e
);
169 n
= expr_optimize (n
, e
);
170 return finish_expression (n
, e
);
173 /* Finishing up expression building. */
175 /* Height of an expression's stacks. */
178 int number_height
; /* Height of number stack. */
179 int string_height
; /* Height of string stack. */
182 /* Stack heights used by different kinds of arguments. */
183 static const struct stack_heights on_number_stack
= {1, 0};
184 static const struct stack_heights on_string_stack
= {0, 1};
185 static const struct stack_heights not_on_stack
= {0, 0};
187 /* Returns the stack heights used by an atom of the given
189 static const struct stack_heights
*
190 atom_type_stack (atom_type type
)
192 assert (is_atom (type
));
198 return &on_number_stack
;
201 return &on_string_stack
;
211 return ¬_on_stack
;
218 /* Measures the stack height needed for node N, supposing that
219 the stack height is initially *HEIGHT and updating *HEIGHT to
220 the final stack height. Updates *MAX, if necessary, to
221 reflect the maximum intermediate or final height. */
223 measure_stack (const struct expr_node
*n
,
224 struct stack_heights
*height
, struct stack_heights
*max
)
226 const struct stack_heights
*return_height
;
228 if (is_composite (n
->type
))
230 struct stack_heights args
;
234 for (i
= 0; i
< n
->n_args
; i
++)
235 measure_stack (n
->args
[i
], &args
, max
);
237 return_height
= atom_type_stack (operations
[n
->type
].returns
);
240 return_height
= atom_type_stack (n
->type
);
242 height
->number_height
+= return_height
->number_height
;
243 height
->string_height
+= return_height
->string_height
;
245 if (height
->number_height
> max
->number_height
)
246 max
->number_height
= height
->number_height
;
247 if (height
->string_height
> max
->string_height
)
248 max
->string_height
= height
->string_height
;
251 /* Allocates stacks within E sufficient for evaluating node N. */
253 allocate_stacks (struct expr_node
*n
, struct expression
*e
)
255 struct stack_heights initial
= {0, 0};
256 struct stack_heights max
= {0, 0};
258 measure_stack (n
, &initial
, &max
);
259 e
->number_stack
= pool_alloc (e
->expr_pool
,
260 sizeof *e
->number_stack
* max
.number_height
);
261 e
->string_stack
= pool_alloc (e
->expr_pool
,
262 sizeof *e
->string_stack
* max
.string_height
);
265 /* Finalizes expression E for evaluating node N. */
266 static struct expression
*
267 finish_expression (struct expr_node
*n
, struct expression
*e
)
269 /* Allocate stacks. */
270 allocate_stacks (n
, e
);
272 /* Output postfix representation. */
275 /* The eval_pool might have been used for allocating strings
276 during optimization. We need to keep those strings around
277 for all subsequent evaluations, so start a new eval_pool. */
278 e
->eval_pool
= pool_create_subpool (e
->expr_pool
);
283 /* Verifies that expression E, whose root node is *N, can be
284 converted to type EXPECTED_TYPE, inserting a conversion at *N
285 if necessary. Returns true if successful, false on failure. */
287 type_check (const struct expr_node
*n
, enum val_type expected_type
)
289 atom_type actual_type
= expr_node_returns (n
);
291 switch (expected_type
)
294 if (actual_type
!= OP_number
&& actual_type
!= OP_boolean
)
296 msg (SE
, _("Type mismatch: expression has %s type, "
297 "but a numeric value is required here."),
298 atom_type_name (actual_type
));
304 if (actual_type
!= OP_string
)
306 msg (SE
, _("Type mismatch: expression has %s type, "
307 "but a string value is required here."),
308 atom_type_name (actual_type
));
320 /* Recursive-descent expression parser. */
323 free_msg_location (void *loc_
)
325 struct msg_location
*loc
= loc_
;
326 msg_location_destroy (loc
);
330 expr_location__ (struct expression
*e
,
331 const struct expr_node
*node
,
332 const struct msg_location
**minp
,
333 const struct msg_location
**maxp
)
335 struct msg_location
*loc
= node
->location
;
338 const struct msg_location
*min
= *minp
;
341 || loc
->start
.line
< min
->start
.line
342 || (loc
->start
.line
== min
->start
.line
343 && loc
->start
.column
< min
->start
.column
)))
346 const struct msg_location
*max
= *maxp
;
349 || loc
->end
.line
> max
->end
.line
350 || (loc
->end
.line
== max
->end
.line
351 && loc
->end
.column
> max
->end
.column
)))
357 if (is_composite (node
->type
))
358 for (size_t i
= 0; i
< node
->n_args
; i
++)
359 expr_location__ (e
, node
->args
[i
], minp
, maxp
);
362 /* Returns the source code location corresponding to expression NODE, computing
363 it lazily if needed. */
364 static const struct msg_location
*
365 expr_location (struct expression
*e
, const struct expr_node
*node_
)
367 struct expr_node
*node
= CONST_CAST (struct expr_node
*, node_
);
373 const struct msg_location
*min
= NULL
;
374 const struct msg_location
*max
= NULL
;
375 expr_location__ (e
, node
, &min
, &max
);
378 node
->location
= msg_location_dup (min
);
379 node
->location
->end
= max
->end
;
380 pool_register (e
->expr_pool
, free_msg_location
, node
->location
);
383 return node
->location
;
386 /* Sets e->location to the tokens in S's lexer from offset START_OFS to the
387 token before the current one. Has no effect if E already has a location or
390 expr_add_location (struct lexer
*lexer
, struct expression
*e
,
391 int start_ofs
, struct expr_node
*node
)
393 if (node
&& !node
->location
)
395 node
->location
= lex_ofs_location (lexer
, start_ofs
, lex_ofs (lexer
) - 1);
396 pool_register (e
->expr_pool
, free_msg_location
, node
->location
);
401 type_coercion__ (struct expression
*e
, struct expr_node
*node
, size_t arg_idx
,
404 assert (!!do_coercion
== (e
!= NULL
));
409 struct expr_node
**argp
= &node
->args
[arg_idx
];
410 struct expr_node
*arg
= *argp
;
414 const struct operation
*op
= &operations
[node
->type
];
415 atom_type required_type
= op
->args
[MIN (arg_idx
, op
->n_args
- 1)];
416 atom_type actual_type
= expr_node_returns (arg
);
417 if (actual_type
== required_type
)
423 switch (required_type
)
426 if (actual_type
== OP_boolean
)
428 /* To enforce strict typing rules, insert Boolean to
429 numeric "conversion". This conversion is a no-op,
430 so it will be removed later. */
432 *argp
= expr_allocate_unary (e
, OP_BOOLEAN_TO_NUM
, arg
);
438 /* No coercion to string. */
442 if (actual_type
== OP_number
)
444 /* Convert numeric to boolean. */
446 *argp
= expr_allocate_unary (e
, OP_NUM_TO_BOOLEAN
, arg
);
456 if (arg
->type
== OP_format
457 && fmt_check_input (&arg
->format
)
458 && fmt_check_type_compat (&arg
->format
, VAL_NUMERIC
))
462 arg
->type
= OP_ni_format
;
470 if (arg
->type
== OP_format
471 && fmt_check_output (&arg
->format
)
472 && fmt_check_type_compat (&arg
->format
, VAL_NUMERIC
))
476 arg
->type
= OP_no_format
;
483 if (arg
->type
== OP_NUM_VAR
)
486 *argp
= arg
->args
[0];
492 if (arg
->type
== OP_STR_VAR
)
495 *argp
= arg
->args
[0];
501 if (arg
->type
== OP_NUM_VAR
|| arg
->type
== OP_STR_VAR
)
504 *argp
= arg
->args
[0];
510 if (arg
->type
== OP_number
511 && floor (arg
->number
) == arg
->number
512 && arg
->number
> 0 && arg
->number
< INT_MAX
)
515 *argp
= expr_allocate_pos_int (e
, arg
->number
);
527 type_coercion (struct expression
*e
, struct expr_node
*node
, size_t arg_idx
)
529 return type_coercion__ (e
, node
, arg_idx
, true);
533 is_coercible (const struct expr_node
*node_
, size_t arg_idx
)
535 struct expr_node
*node
= CONST_CAST (struct expr_node
*, node_
);
536 return type_coercion__ (NULL
, node
, arg_idx
, false);
539 /* How to parse an operator. */
542 enum token_type token
; /* Operator token. */
543 operation_type type
; /* Operation. */
546 /* Attempts to match the current token against the tokens for the
547 OP_CNT operators in OPS[]. If successful, returns true
548 and, if OPERATOR is non-null, sets *OPERATOR to the operator.
549 On failure, returns false and, if OPERATOR is non-null, sets
550 *OPERATOR to a null pointer. */
551 static const struct operator *
552 match_operator (struct lexer
*lexer
, const struct operator ops
[], size_t n_ops
,
553 const struct expr_node
*lhs
)
555 bool lhs_is_numeric
= operations
[lhs
->type
].returns
!= OP_string
;
556 for (const struct operator *op
= ops
; op
< ops
+ n_ops
; op
++)
557 if (lex_token (lexer
) == op
->token
)
559 bool op_is_numeric
= operations
[op
->type
].args
[0] != OP_string
;
560 if (op_is_numeric
== lhs_is_numeric
)
562 if (op
->token
!= T_NEG_NUM
)
571 operator_name (const struct operator *op
)
573 return op
->token
== T_NEG_NUM
? "-" : token_type_to_string (op
->token
);
576 static struct expr_node
*
577 parse_binary_operators__ (struct lexer
*lexer
, struct expression
*e
,
578 const struct operator ops
[], size_t n_ops
,
579 parse_recursively_func
*parse_next_level
,
580 const char *chain_warning
, struct expr_node
*lhs
)
582 for (int op_count
= 0; ; op_count
++)
584 const struct operator *operator = match_operator (lexer
, ops
, n_ops
, lhs
);
587 if (op_count
> 1 && chain_warning
)
588 msg_at (SW
, expr_location (e
, lhs
), "%s", chain_warning
);
593 struct expr_node
*rhs
= parse_next_level (lexer
, e
);
597 struct expr_node
*node
= expr_allocate_binary (e
, operator->type
,
599 bool lhs_ok
= type_coercion (e
, node
, 0);
600 bool rhs_ok
= type_coercion (e
, node
, 1);
602 if (!lhs_ok
|| !rhs_ok
)
605 for (size_t i
= 0; i
< n_ops
; i
++)
606 if (ops
[i
].token
== operator->token
)
609 const char *name
= operator_name (operator);
611 msg_at (SE
, expr_location (e
, node
),
612 _("The operands of %s must have the same type."), name
);
613 else if (operations
[node
->type
].args
[0] != OP_string
)
614 msg_at (SE
, expr_location (e
, node
),
615 _("Both operands of %s must be numeric."), name
);
617 msg_at (SE
, expr_location (e
, node
),
618 _("Both operands of %s must be strings."), name
);
620 msg_at (SN
, expr_location (e
, node
->args
[0]),
621 _("The left-hand operand of %s has type '%s'."),
622 name
, atom_type_name (expr_node_returns (node
->args
[0])));
623 msg_at (SN
, expr_location (e
, node
->args
[1]),
624 _("The right-hand operand of %s has type '%s'."),
625 name
, atom_type_name (expr_node_returns (node
->args
[1])));
634 static struct expr_node
*
635 parse_binary_operators (struct lexer
*lexer
, struct expression
*e
,
636 const struct operator ops
[], size_t n_ops
,
637 parse_recursively_func
*parse_next_level
,
638 const char *chain_warning
)
640 struct expr_node
*lhs
= parse_next_level (lexer
, e
);
644 return parse_binary_operators__ (lexer
, e
, ops
, n_ops
, parse_next_level
,
648 static struct expr_node
*
649 parse_inverting_unary_operator (struct lexer
*lexer
, struct expression
*e
,
650 const struct operator *op
,
651 parse_recursively_func
*parse_next_level
)
653 int start_ofs
= lex_ofs (lexer
);
654 unsigned int op_count
= 0;
655 while (lex_match (lexer
, op
->token
))
658 struct expr_node
*inner
= parse_next_level (lexer
, e
);
659 if (!inner
|| !op_count
)
662 struct expr_node
*outer
= expr_allocate_unary (e
, op
->type
, inner
);
663 expr_add_location (lexer
, e
, start_ofs
, outer
);
665 if (!type_coercion (e
, outer
, 0))
667 assert (operations
[outer
->type
].args
[0] != OP_string
);
669 const char *name
= operator_name (op
);
670 msg_at (SE
, expr_location (e
, outer
),
671 _("The unary %s operator requires a numeric operand."), name
);
673 msg_at (SN
, expr_location (e
, outer
->args
[0]),
674 _("The operand of %s has type '%s'."),
675 name
, atom_type_name (expr_node_returns (outer
->args
[0])));
680 return op_count
% 2 ? outer
: outer
->args
[0];
683 /* Parses the OR level. */
684 static struct expr_node
*
685 parse_or (struct lexer
*lexer
, struct expression
*e
)
687 static const struct operator op
= { T_OR
, OP_OR
};
688 return parse_binary_operators (lexer
, e
, &op
, 1, parse_and
, NULL
);
691 /* Parses the AND level. */
692 static struct expr_node
*
693 parse_and (struct lexer
*lexer
, struct expression
*e
)
695 static const struct operator op
= { T_AND
, OP_AND
};
697 return parse_binary_operators (lexer
, e
, &op
, 1, parse_not
, NULL
);
700 /* Parses the NOT level. */
701 static struct expr_node
*
702 parse_not (struct lexer
*lexer
, struct expression
*e
)
704 static const struct operator op
= { T_NOT
, OP_NOT
};
705 return parse_inverting_unary_operator (lexer
, e
, &op
, parse_rel
);
708 /* Parse relational operators. */
709 static struct expr_node
*
710 parse_rel (struct lexer
*lexer
, struct expression
*e
)
712 const char *chain_warning
=
713 _("Chaining relational operators (e.g. `a < b < c') will "
714 "not produce the mathematically expected result. "
715 "Use the AND logical operator to fix the problem "
716 "(e.g. `a < b AND b < c'). "
717 "If chaining is really intended, parentheses will disable "
718 "this warning (e.g. `(a < b) < c'.)");
720 static const struct operator ops
[] =
722 /* Numeric operators. */
731 /* String operators. */
732 { T_EQUALS
, OP_EQ_STRING
},
733 { T_EQ
, OP_EQ_STRING
},
734 { T_GE
, OP_GE_STRING
},
735 { T_GT
, OP_GT_STRING
},
736 { T_LE
, OP_LE_STRING
},
737 { T_LT
, OP_LT_STRING
},
738 { T_NE
, OP_NE_STRING
},
741 return parse_binary_operators (lexer
, e
, ops
, sizeof ops
/ sizeof *ops
,
742 parse_add
, chain_warning
);
745 /* Parses the addition and subtraction level. */
746 static struct expr_node
*
747 parse_add (struct lexer
*lexer
, struct expression
*e
)
749 static const struct operator ops
[] =
753 { T_NEG_NUM
, OP_ADD
},
756 return parse_binary_operators (lexer
, e
, ops
, sizeof ops
/ sizeof *ops
,
760 /* Parses the multiplication and division level. */
761 static struct expr_node
*
762 parse_mul (struct lexer
*lexer
, struct expression
*e
)
764 static const struct operator ops
[] =
766 { T_ASTERISK
, OP_MUL
},
770 return parse_binary_operators (lexer
, e
, ops
, sizeof ops
/ sizeof *ops
,
774 /* Parses the unary minus level. */
775 static struct expr_node
*
776 parse_neg (struct lexer
*lexer
, struct expression
*e
)
778 static const struct operator op
= { T_DASH
, OP_NEG
};
779 return parse_inverting_unary_operator (lexer
, e
, &op
, parse_exp
);
782 static struct expr_node
*
783 parse_exp (struct lexer
*lexer
, struct expression
*e
)
785 static const struct operator op
= { T_EXP
, OP_POW
};
787 const char *chain_warning
=
788 _("The exponentiation operator (`**') is left-associative, "
789 "even though right-associative semantics are more useful. "
790 "That is, `a**b**c' equals `(a**b)**c', not as `a**(b**c)'. "
791 "To disable this warning, insert parentheses.");
793 if (lex_token (lexer
) != T_NEG_NUM
|| lex_next_token (lexer
, 1) != T_EXP
)
794 return parse_binary_operators (lexer
, e
, &op
, 1,
795 parse_primary
, chain_warning
);
797 /* Special case for situations like "-5**6", which must be parsed as
800 int start_ofs
= lex_ofs (lexer
);
801 struct expr_node
*lhs
= expr_allocate_number (e
, -lex_tokval (lexer
));
804 struct expr_node
*node
= parse_binary_operators__ (
805 lexer
, e
, &op
, 1, parse_primary
, chain_warning
, lhs
);
809 node
= expr_allocate_unary (e
, OP_NEG
, node
);
810 expr_add_location (lexer
, e
, start_ofs
, node
);
814 /* Parses system variables. */
815 static struct expr_node
*
816 parse_sysvar (struct lexer
*lexer
, struct expression
*e
)
818 if (lex_match_id (lexer
, "$CASENUM"))
819 return expr_allocate_nullary (e
, OP_CASENUM
);
820 else if (lex_match_id (lexer
, "$DATE"))
822 static const char *months
[12] =
824 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
825 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
828 time_t last_proc_time
= time_of_last_procedure (e
->ds
);
833 time
= localtime (&last_proc_time
);
834 sprintf (temp_buf
, "%02d %s %02d", abs (time
->tm_mday
) % 100,
835 months
[abs (time
->tm_mon
) % 12], abs (time
->tm_year
) % 100);
837 ss_alloc_substring (&s
, ss_cstr (temp_buf
));
838 return expr_allocate_string (e
, s
);
840 else if (lex_match_id (lexer
, "$TRUE"))
841 return expr_allocate_boolean (e
, 1.0);
842 else if (lex_match_id (lexer
, "$FALSE"))
843 return expr_allocate_boolean (e
, 0.0);
844 else if (lex_match_id (lexer
, "$SYSMIS"))
845 return expr_allocate_number (e
, SYSMIS
);
846 else if (lex_match_id (lexer
, "$JDATE"))
848 time_t time
= time_of_last_procedure (e
->ds
);
849 struct tm
*tm
= localtime (&time
);
850 return expr_allocate_number (e
, expr_ymd_to_ofs (tm
->tm_year
+ 1900,
854 else if (lex_match_id (lexer
, "$TIME"))
856 time_t time
= time_of_last_procedure (e
->ds
);
857 struct tm
*tm
= localtime (&time
);
858 return expr_allocate_number (e
,
859 expr_ymd_to_date (tm
->tm_year
+ 1900,
862 + tm
->tm_hour
* 60 * 60.
866 else if (lex_match_id (lexer
, "$LENGTH"))
867 return expr_allocate_number (e
, settings_get_viewlength ());
868 else if (lex_match_id (lexer
, "$WIDTH"))
869 return expr_allocate_number (e
, settings_get_viewwidth ());
872 msg (SE
, _("Unknown system variable %s."), lex_tokcstr (lexer
));
877 /* Parses numbers, varnames, etc. */
878 static struct expr_node
*
879 parse_primary__ (struct lexer
*lexer
, struct expression
*e
)
881 switch (lex_token (lexer
))
884 if (lex_next_token (lexer
, 1) == T_LPAREN
)
886 /* An identifier followed by a left parenthesis may be
887 a vector element reference. If not, it's a function
889 if (e
->ds
!= NULL
&& dict_lookup_vector (dataset_dict (e
->ds
), lex_tokcstr (lexer
)) != NULL
)
890 return parse_vector_element (lexer
, e
);
892 return parse_function (lexer
, e
);
894 else if (lex_tokcstr (lexer
)[0] == '$')
896 /* $ at the beginning indicates a system variable. */
897 return parse_sysvar (lexer
, e
);
899 else if (e
->ds
!= NULL
&& dict_lookup_var (dataset_dict (e
->ds
), lex_tokcstr (lexer
)))
901 /* It looks like a user variable.
902 (It could be a format specifier, but we'll assume
903 it's a variable unless proven otherwise. */
904 return allocate_unary_variable (e
, parse_variable (lexer
, dataset_dict (e
->ds
)));
908 /* Try to parse it as a format specifier. */
913 ok
= parse_format_specifier (lexer
, &fmt
);
917 return expr_allocate_format (e
, &fmt
);
919 /* All attempts failed. */
920 msg (SE
, _("Unknown identifier %s."), lex_tokcstr (lexer
));
928 struct expr_node
*node
= expr_allocate_number (e
, lex_tokval (lexer
));
935 const char *dict_encoding
;
936 struct expr_node
*node
;
939 dict_encoding
= (e
->ds
!= NULL
940 ? dict_get_encoding (dataset_dict (e
->ds
))
942 s
= recode_string_pool (dict_encoding
, "UTF-8", lex_tokcstr (lexer
),
943 ss_length (lex_tokss (lexer
)), e
->expr_pool
);
944 node
= expr_allocate_string (e
, ss_cstr (s
));
952 /* Count number of left parentheses so that we can match them against
953 an equal number of right parentheses. This defeats trivial attempts
954 to exhaust the stack with a lot of left parentheses. (More
955 sophisticated attacks will still succeed.) */
957 while (lex_match (lexer
, T_LPAREN
))
960 struct expr_node
*node
= parse_or (lexer
, e
);
964 for (size_t i
= 0; i
< n
; i
++)
965 if (!lex_force_match (lexer
, T_RPAREN
))
972 lex_error (lexer
, NULL
);
977 static struct expr_node
*
978 parse_primary (struct lexer
*lexer
, struct expression
*e
)
980 int start_ofs
= lex_ofs (lexer
);
981 struct expr_node
*node
= parse_primary__ (lexer
, e
);
982 expr_add_location (lexer
, e
, start_ofs
, node
);
986 static struct expr_node
*
987 parse_vector_element (struct lexer
*lexer
, struct expression
*e
)
989 int vector_start_ofs
= lex_ofs (lexer
);
991 /* Find vector, skip token.
992 The caller must already have verified that the current token
993 is the name of a vector. */
994 const struct vector
*vector
= dict_lookup_vector (dataset_dict (e
->ds
),
995 lex_tokcstr (lexer
));
996 assert (vector
!= NULL
);
999 /* Skip left parenthesis token.
1000 The caller must have verified that the lookahead is a left
1002 assert (lex_token (lexer
) == T_LPAREN
);
1005 int element_start_ofs
= lex_ofs (lexer
);
1006 struct expr_node
*element
= parse_or (lexer
, e
);
1009 expr_add_location (lexer
, e
, element_start_ofs
, element
);
1011 if (!lex_match (lexer
, T_RPAREN
))
1014 operation_type type
= (vector_get_type (vector
) == VAL_NUMERIC
1015 ? OP_VEC_ELEM_NUM
: OP_VEC_ELEM_STR
);
1016 struct expr_node
*node
= expr_allocate_binary (
1017 e
, type
, element
, expr_allocate_vector (e
, vector
));
1018 expr_add_location (lexer
, e
, vector_start_ofs
, node
);
1020 if (!type_coercion (e
, node
, 1))
1022 msg_at (SE
, expr_location (e
, node
),
1023 _("A vector index must be numeric."));
1025 msg_at (SN
, expr_location (e
, node
->args
[0]),
1026 _("This vector index has type '%s'."),
1027 atom_type_name (expr_node_returns (node
->args
[0])));
1035 /* Individual function parsing. */
1037 const struct operation operations
[OP_first
+ n_OP
] = {
1038 #include "parse.inc"
1042 word_matches (const char **test
, const char **name
)
1044 size_t test_len
= strcspn (*test
, ".");
1045 size_t name_len
= strcspn (*name
, ".");
1046 if (test_len
== name_len
)
1048 if (buf_compare_case (*test
, *name
, test_len
))
1051 else if (test_len
< 3 || test_len
> name_len
)
1055 if (buf_compare_case (*test
, *name
, test_len
))
1061 if (**test
!= **name
)
1072 /* Returns 0 if TOKEN and FUNC do not match,
1073 1 if TOKEN is an acceptable abbreviation for FUNC,
1074 2 if TOKEN equals FUNC. */
1076 compare_function_names (const char *token_
, const char *func_
)
1078 const char *token
= token_
;
1079 const char *func
= func_
;
1080 while (*token
|| *func
)
1081 if (!word_matches (&token
, &func
))
1083 return !c_strcasecmp (token_
, func_
) ? 2 : 1;
1087 lookup_function (const char *token
,
1088 const struct operation
**first
,
1089 const struct operation
**last
)
1091 *first
= *last
= NULL
;
1092 const struct operation
*best
= NULL
;
1094 for (const struct operation
*f
= operations
+ OP_function_first
;
1095 f
<= operations
+ OP_function_last
; f
++)
1097 int score
= compare_function_names (token
, f
->name
);
1103 else if (score
== 1 && !(f
->flags
& OPF_NO_ABBREV
) && !best
)
1112 const struct operation
*f
= best
;
1113 while (f
<= operations
+ OP_function_last
1114 && !c_strcasecmp (f
->name
, best
->name
))
1122 extract_min_valid (const char *s
)
1124 char *p
= strrchr (s
, '.');
1126 || p
[1] < '0' || p
[1] > '9'
1127 || strspn (p
+ 1, "0123456789") != strlen (p
+ 1))
1130 return atoi (p
+ 1);
1134 match_function__ (struct expr_node
*node
, const struct operation
*f
)
1136 if (node
->n_args
< f
->n_args
1137 || (node
->n_args
> f
->n_args
&& (f
->flags
& OPF_ARRAY_OPERAND
) == 0)
1138 || node
->n_args
- (f
->n_args
- 1) < f
->array_min_elems
)
1141 for (size_t i
= 0; i
< node
->n_args
; i
++)
1142 if (!is_coercible (node
, i
))
1148 static const struct operation
*
1149 match_function (struct expr_node
*node
,
1150 const struct operation
*first
, const struct operation
*last
)
1152 for (const struct operation
*f
= first
; f
< last
; f
++)
1153 if (match_function__ (node
, f
))
1159 validate_function_args (const struct operation
*f
, int n_args
, int min_valid
)
1161 /* Count the function arguments that go into the trailing array (if any). We
1162 know that there must be at least the minimum number because
1163 match_function() already checked. */
1164 int array_n_args
= n_args
- (f
->n_args
- 1);
1165 assert (array_n_args
>= f
->array_min_elems
);
1167 if ((f
->flags
& OPF_ARRAY_OPERAND
)
1168 && array_n_args
% f
->array_granularity
!= 0)
1170 /* RANGE is the only case we have so far. It has paired arguments with
1171 one initial argument, and that's the only special case we deal with
1173 assert (f
->array_granularity
== 2);
1174 assert (n_args
% 2 == 0);
1175 msg (SE
, _("%s must have an odd number of arguments."), f
->prototype
);
1179 if (min_valid
!= -1)
1181 if (f
->array_min_elems
== 0)
1183 assert ((f
->flags
& OPF_MIN_VALID
) == 0);
1184 msg (SE
, _("%s function cannot accept suffix .%d to specify the "
1185 "minimum number of valid arguments."),
1186 f
->prototype
, min_valid
);
1191 assert (f
->flags
& OPF_MIN_VALID
);
1192 if (min_valid
> array_n_args
)
1194 msg (SE
, _("For %s with %d arguments, at most %d (not %d) may be "
1195 "required to be valid."),
1196 f
->prototype
, n_args
, array_n_args
, min_valid
);
1206 add_arg (struct expr_node
***args
, size_t *n_args
, size_t *allocated_args
,
1207 struct expr_node
*arg
,
1208 struct expression
*e
, struct lexer
*lexer
, int arg_start_ofs
)
1210 if (*n_args
>= *allocated_args
)
1211 *args
= x2nrealloc (*args
, allocated_args
, sizeof **args
);
1213 expr_add_location (lexer
, e
, arg_start_ofs
, arg
);
1214 (*args
)[(*n_args
)++] = arg
;
1218 put_invocation (struct string
*s
,
1219 const char *func_name
, struct expr_node
**args
, size_t n_args
)
1223 ds_put_format (s
, "%s(", func_name
);
1224 for (i
= 0; i
< n_args
; i
++)
1227 ds_put_cstr (s
, ", ");
1228 ds_put_cstr (s
, operations
[expr_node_returns (args
[i
])].prototype
);
1230 ds_put_byte (s
, ')');
1234 no_match (const char *func_name
,
1235 struct expr_node
**args
, size_t n_args
,
1236 const struct operation
*first
, const struct operation
*last
)
1239 const struct operation
*f
;
1243 if (last
- first
== 1)
1245 ds_put_format (&s
, _("Type mismatch invoking %s as "), first
->prototype
);
1246 put_invocation (&s
, func_name
, args
, n_args
);
1250 ds_put_cstr (&s
, _("Function invocation "));
1251 put_invocation (&s
, func_name
, args
, n_args
);
1252 ds_put_cstr (&s
, _(" does not match any known function. Candidates are:"));
1254 for (f
= first
; f
< last
; f
++)
1255 ds_put_format (&s
, "\n%s", f
->prototype
);
1257 ds_put_byte (&s
, '.');
1259 msg (SE
, "%s", ds_cstr (&s
));
1264 static struct expr_node
*
1265 parse_function (struct lexer
*lexer
, struct expression
*e
)
1267 struct string func_name
;
1268 ds_init_substring (&func_name
, lex_tokss (lexer
));
1270 int min_valid
= extract_min_valid (lex_tokcstr (lexer
));
1272 const struct operation
*first
, *last
;
1273 if (!lookup_function (lex_tokcstr (lexer
), &first
, &last
))
1275 msg (SE
, _("No function or vector named %s."), lex_tokcstr (lexer
));
1276 ds_destroy (&func_name
);
1280 int func_start_ofs
= lex_ofs (lexer
);
1282 if (!lex_force_match (lexer
, T_LPAREN
))
1284 ds_destroy (&func_name
);
1288 struct expr_node
**args
= NULL
;
1290 size_t allocated_args
= 0;
1291 if (lex_token (lexer
) != T_RPAREN
)
1294 int arg_start_ofs
= lex_ofs (lexer
);
1295 if (lex_token (lexer
) == T_ID
1296 && lex_next_token (lexer
, 1) == T_TO
)
1298 const struct variable
**vars
;
1301 if (!parse_variables_const (lexer
, dataset_dict (e
->ds
),
1302 &vars
, &n_vars
, PV_SINGLE
))
1304 for (size_t i
= 0; i
< n_vars
; i
++)
1305 add_arg (&args
, &n_args
, &allocated_args
,
1306 allocate_unary_variable (e
, vars
[i
]),
1307 e
, lexer
, arg_start_ofs
);
1312 struct expr_node
*arg
= parse_or (lexer
, e
);
1316 add_arg (&args
, &n_args
, &allocated_args
, arg
,
1317 e
, lexer
, arg_start_ofs
);
1319 if (lex_match (lexer
, T_RPAREN
))
1321 else if (!lex_match (lexer
, T_COMMA
))
1323 lex_error_expecting (lexer
, "`,'", "`)'");
1328 struct expr_node
*n
= expr_allocate_composite (e
, first
- operations
,
1330 expr_add_location (lexer
, e
, func_start_ofs
, n
);
1331 const struct operation
*f
= match_function (n
, first
, last
);
1334 no_match (ds_cstr (&func_name
), args
, n_args
, first
, last
);
1337 n
->type
= f
- operations
;
1338 n
->min_valid
= min_valid
!= -1 ? min_valid
: f
->array_min_elems
;
1340 for (size_t i
= 0; i
< n_args
; i
++)
1341 if (!type_coercion (e
, n
, i
))
1343 /* Unreachable because match_function already checked that the
1344 arguments were coercible. */
1347 if (!validate_function_args (f
, n_args
, min_valid
))
1350 if ((f
->flags
& OPF_EXTENSION
) && settings_get_syntax () == COMPATIBLE
)
1351 msg (SW
, _("%s is a PSPP extension."), f
->prototype
);
1352 if (f
->flags
& OPF_UNIMPLEMENTED
)
1354 msg (SE
, _("%s is not available in this version of PSPP."),
1358 if ((f
->flags
& OPF_PERM_ONLY
) &&
1359 proc_in_temporary_transformations (e
->ds
))
1361 msg (SE
, _("%s may not appear after %s."), f
->prototype
, "TEMPORARY");
1365 if (n
->type
== OP_LAG_Vn
|| n
->type
== OP_LAG_Vs
)
1366 dataset_need_lag (e
->ds
, 1);
1367 else if (n
->type
== OP_LAG_Vnn
|| n
->type
== OP_LAG_Vsn
)
1369 assert (n
->n_args
== 2);
1370 assert (n
->args
[1]->type
== OP_pos_int
);
1371 dataset_need_lag (e
->ds
, n
->args
[1]->integer
);
1375 ds_destroy (&func_name
);
1380 ds_destroy (&func_name
);
1384 /* Utility functions. */
1386 static struct expression
*
1387 expr_create (struct dataset
*ds
)
1389 struct pool
*pool
= pool_create ();
1390 struct expression
*e
= pool_alloc (pool
, sizeof *e
);
1391 *e
= (struct expression
) {
1394 .eval_pool
= pool_create_subpool (pool
),
1400 expr_node_returns (const struct expr_node
*n
)
1403 assert (is_operation (n
->type
));
1404 if (is_atom (n
->type
))
1406 else if (is_composite (n
->type
))
1407 return operations
[n
->type
].returns
;
1413 atom_type_name (atom_type type
)
1415 assert (is_atom (type
));
1416 return operations
[type
].name
;
1420 expr_allocate_nullary (struct expression
*e
, operation_type op
)
1422 return expr_allocate_composite (e
, op
, NULL
, 0);
1426 expr_allocate_unary (struct expression
*e
, operation_type op
,
1427 struct expr_node
*arg0
)
1429 return expr_allocate_composite (e
, op
, &arg0
, 1);
1433 expr_allocate_binary (struct expression
*e
, operation_type op
,
1434 struct expr_node
*arg0
, struct expr_node
*arg1
)
1436 struct expr_node
*args
[2];
1439 return expr_allocate_composite (e
, op
, args
, 2);
1443 expr_allocate_composite (struct expression
*e
, operation_type op
,
1444 struct expr_node
**args
, size_t n_args
)
1446 for (size_t i
= 0; i
< n_args
; i
++)
1450 struct expr_node
*n
= pool_alloc (e
->expr_pool
, sizeof *n
);
1451 *n
= (struct expr_node
) {
1454 .args
= pool_clone (e
->expr_pool
, args
, sizeof *n
->args
* n_args
),
1460 expr_allocate_number (struct expression
*e
, double d
)
1462 struct expr_node
*n
= pool_alloc (e
->expr_pool
, sizeof *n
);
1463 *n
= (struct expr_node
) { .type
= OP_number
, .number
= d
};
1468 expr_allocate_boolean (struct expression
*e
, double b
)
1470 assert (b
== 0.0 || b
== 1.0 || b
== SYSMIS
);
1472 struct expr_node
*n
= pool_alloc (e
->expr_pool
, sizeof *n
);
1473 *n
= (struct expr_node
) { .type
= OP_boolean
, .number
= b
};
1478 expr_allocate_integer (struct expression
*e
, int i
)
1480 struct expr_node
*n
= pool_alloc (e
->expr_pool
, sizeof *n
);
1481 *n
= (struct expr_node
) { .type
= OP_integer
, .integer
= i
};
1486 expr_allocate_pos_int (struct expression
*e
, int i
)
1490 struct expr_node
*n
= pool_alloc (e
->expr_pool
, sizeof *n
);
1491 *n
= (struct expr_node
) { .type
= OP_pos_int
, .integer
= i
};
1496 expr_allocate_vector (struct expression
*e
, const struct vector
*vector
)
1498 struct expr_node
*n
= pool_alloc (e
->expr_pool
, sizeof *n
);
1499 *n
= (struct expr_node
) { .type
= OP_vector
, .vector
= vector
};
1504 expr_allocate_string (struct expression
*e
, struct substring s
)
1506 struct expr_node
*n
= pool_alloc (e
->expr_pool
, sizeof *n
);
1507 *n
= (struct expr_node
) { .type
= OP_string
, .string
= s
};
1512 expr_allocate_variable (struct expression
*e
, const struct variable
*v
)
1514 struct expr_node
*n
= pool_alloc (e
->expr_pool
, sizeof *n
);
1515 *n
= (struct expr_node
) {
1516 .type
= var_is_numeric (v
) ? OP_num_var
: OP_str_var
,
1523 expr_allocate_format (struct expression
*e
, const struct fmt_spec
*format
)
1525 struct expr_node
*n
= pool_alloc (e
->expr_pool
, sizeof *n
);
1526 *n
= (struct expr_node
) { .type
= OP_format
, .format
= *format
};
1530 /* Allocates a unary composite node that represents the value of
1531 variable V in expression E. */
1532 static struct expr_node
*
1533 allocate_unary_variable (struct expression
*e
, const struct variable
*v
)
1536 return expr_allocate_unary (e
, var_is_numeric (v
) ? OP_NUM_VAR
: OP_STR_VAR
,
1537 expr_allocate_variable (e
, v
));
1540 /* Export function details to other modules. */
1542 /* Returns the operation structure for the function with the
1544 const struct operation
*
1545 expr_get_function (size_t idx
)
1547 assert (idx
< n_OP_function
);
1548 return &operations
[OP_function_first
+ idx
];
1551 /* Returns the number of expression functions. */
1553 expr_get_n_functions (void)
1555 return n_OP_function
;
1558 /* Returns the name of operation OP. */
1560 expr_operation_get_name (const struct operation
*op
)
1565 /* Returns the human-readable prototype for operation OP. */
1567 expr_operation_get_prototype (const struct operation
*op
)
1569 return op
->prototype
;
1572 /* Returns the number of arguments for operation OP. */
1574 expr_operation_get_n_args (const struct operation
*op
)