expressions: Convert SYSMIS into int as INT_MIN during optimization too.
[pspp.git] / src / language / expressions / optimize.c
blob129dbc30e65056f8164ba0dae78812fe4fe0f60e
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include "language/expressions/private.h"
21 #include <math.h>
22 #include <ctype.h>
23 #include <errno.h>
24 #include <stdlib.h>
26 #include "data/calendar.h"
27 #include "data/data-in.h"
28 #include "data/variable.h"
29 #include "evaluate.h"
30 #include "language/expressions/helpers.h"
31 #include "language/expressions/public.h"
32 #include "libpspp/assertion.h"
33 #include "libpspp/message.h"
34 #include "libpspp/misc.h"
35 #include "libpspp/pool.h"
36 #include "libpspp/str.h"
38 #include "gl/xalloc.h"
40 static struct expr_node *evaluate_tree (struct expr_node *, struct expression *);
41 static struct expr_node *optimize_tree (struct expr_node *, struct expression *);
43 struct expr_node *
44 expr_optimize (struct expr_node *node, struct expression *e)
46 int n_nonconst = 0; /* Number of nonconstant children. */
47 int n_sysmis = 0; /* Number of system-missing children. */
48 const struct operation *op;
49 int i;
51 /* We can't optimize an atom. */
52 if (is_atom (node->type))
53 return node;
55 /* Start by optimizing all the children. */
56 for (i = 0; i < node->n_args; i++)
58 node->args[i] = expr_optimize (node->args[i], e);
59 if (node->args[i]->type == OP_number)
61 if (node->args[i]->number == SYSMIS)
62 n_sysmis++;
65 if (!is_atom (node->args[i]->type))
66 n_nonconst++;
69 op = &operations[node->type];
71 struct expr_node *new;
72 if (n_sysmis && (op->flags & OPF_ABSORB_MISS) == 0)
74 /* Most operations produce SYSMIS given any SYSMIS
75 argument. */
76 assert (op->returns == OP_number || op->returns == OP_boolean);
77 new = (op->returns == OP_number
78 ? expr_allocate_number (e, SYSMIS)
79 : expr_allocate_boolean (e, SYSMIS));
81 else if (!n_nonconst && (op->flags & OPF_NONOPTIMIZABLE) == 0)
83 /* Evaluate constant expressions. */
84 new = evaluate_tree (node, e);
86 else
88 /* A few optimization possibilities are still left. */
89 new = optimize_tree (node, e);
92 if (new != node && !new->location)
94 const struct msg_location *loc = expr_location (e, node);
95 new->location = CONST_CAST (struct msg_location *, loc);
97 return new;
100 static int
101 eq_double (struct expr_node *node, double n)
103 return node->type == OP_number && node->number == n;
106 static struct expr_node *
107 optimize_tree (struct expr_node *n, struct expression *e)
109 assert (is_composite (n->type));
111 /* If you add to these optimizations, please also add a
112 correctness test in tests/expressions/expressions.sh. */
114 /* x+0, x-0, 0+x => x. */
115 if ((n->type == OP_ADD || n->type == OP_SUB) && eq_double (n->args[1], 0.))
116 return n->args[0];
117 else if (n->type == OP_ADD && eq_double (n->args[0], 0.))
118 return n->args[1];
120 /* x*1, x/1, 1*x => x. */
121 else if ((n->type == OP_MUL || n->type == OP_DIV)
122 && eq_double (n->args[1], 1.))
123 return n->args[0];
124 else if (n->type == OP_MUL && eq_double (n->args[0], 1.))
125 return n->args[1];
127 /* 0*x, 0/x, x*0, MOD(0,x) => 0. */
128 else if (((n->type == OP_MUL || n->type == OP_DIV || n->type == OP_MOD_nn)
129 && eq_double (n->args[0], 0.))
130 || (n->type == OP_MUL && eq_double (n->args[1], 0.)))
131 return expr_allocate_number (e, 0.);
133 /* x**1 => x. */
134 else if (n->type == OP_POW && eq_double (n->args[1], 1))
135 return n->args[0];
137 /* x**2 => SQUARE(x). */
138 else if (n->type == OP_POW && eq_double (n->args[1], 2))
139 return expr_allocate_unary (e, OP_SQUARE, n->args[0]);
141 /* Otherwise, nothing to do. */
142 else
143 return n;
146 static double
147 get_number_arg (struct expr_node *n, size_t arg_idx)
149 assert (arg_idx < n->n_args);
150 assert (n->args[arg_idx]->type == OP_number
151 || n->args[arg_idx]->type == OP_boolean
152 || n->args[arg_idx]->type == OP_integer);
153 return n->args[arg_idx]->number;
156 static double *
157 get_number_args (struct expr_node *n, size_t arg_idx, size_t n_args,
158 struct expression *e)
160 double *d = pool_alloc (e->expr_pool, sizeof *d * n_args);
161 for (size_t i = 0; i < n_args; i++)
162 d[i] = get_number_arg (n, i + arg_idx);
163 return d;
166 static int
167 get_integer_arg (struct expr_node *n, size_t arg_idx)
169 double number = n->args[arg_idx]->number;
170 return number == SYSMIS ? INT_MIN : number;
173 static struct substring
174 get_string_arg (struct expr_node *n, size_t arg_idx)
176 assert (arg_idx < n->n_args);
177 assert (n->args[arg_idx]->type == OP_string);
178 return n->args[arg_idx]->string;
181 static struct substring *
182 get_string_args (struct expr_node *n, size_t arg_idx, size_t n_args,
183 struct expression *e)
185 struct substring *s;
186 size_t i;
188 s = pool_alloc (e->expr_pool, sizeof *s * n_args);
189 for (i = 0; i < n_args; i++)
190 s[i] = get_string_arg (n, i + arg_idx);
191 return s;
194 static const struct fmt_spec *
195 get_format_arg (struct expr_node *n, size_t arg_idx)
197 assert (arg_idx < n->n_args);
198 assert (n->args[arg_idx]->type == OP_ni_format
199 || n->args[arg_idx]->type == OP_no_format);
200 return &n->args[arg_idx]->format;
203 static const struct expr_node *
204 get_expr_node_arg (struct expr_node *n, size_t arg_idx)
206 assert (arg_idx < n->n_args);
207 assert (n->args[arg_idx]->type == OP_expr_node);
208 return n->args[arg_idx]->expr_node;
211 static struct expr_node *
212 evaluate_tree (struct expr_node *node, struct expression *e)
214 switch (node->type)
216 #include "optimize.inc"
218 default:
219 NOT_REACHED ();
222 NOT_REACHED ();
225 /* Expression flattening. */
227 static union operation_data *allocate_aux (struct expression *,
228 operation_type);
229 static void flatten_node (struct expr_node *, struct expression *);
231 static void
232 emit_operation (struct expression *e, operation_type type)
234 allocate_aux (e, OP_operation)->operation = type;
237 static void
238 emit_number (struct expression *e, double n)
240 allocate_aux (e, OP_number)->number = n;
243 static void
244 emit_string (struct expression *e, struct substring s)
246 allocate_aux (e, OP_string)->string = s;
249 static void
250 emit_format (struct expression *e, const struct fmt_spec *f)
252 allocate_aux (e, OP_format)->format = pool_clone (e->expr_pool,
253 f, sizeof *f);
256 static void
257 emit_variable (struct expression *e, const struct variable *v)
259 allocate_aux (e, OP_variable)->variable = v;
262 static void
263 emit_vector (struct expression *e, const struct vector *v)
265 allocate_aux (e, OP_vector)->vector = v;
268 static void
269 emit_integer (struct expression *e, int i)
271 allocate_aux (e, OP_integer)->integer = i;
274 void
275 expr_flatten (struct expr_node *n, struct expression *e)
277 flatten_node (n, e);
278 e->type = expr_node_returns (n);
279 emit_operation (e, (e->type == OP_string
280 ? OP_return_string : OP_return_number));
283 static void
284 flatten_atom (struct expr_node *n, struct expression *e)
286 switch (n->type)
288 case OP_number:
289 case OP_boolean:
290 emit_operation (e, OP_number);
291 emit_number (e, n->number);
292 break;
294 case OP_string:
295 emit_operation (e, OP_string);
296 emit_string (e, n->string);
297 break;
299 case OP_num_var:
300 case OP_str_var:
301 case OP_vector:
302 case OP_no_format:
303 case OP_ni_format:
304 case OP_pos_int:
305 case OP_expr_node:
306 /* These are passed as aux data following the
307 operation. */
308 break;
310 default:
311 NOT_REACHED ();
315 static void
316 flatten_composite (struct expr_node *n, struct expression *e)
318 const struct operation *op = &operations[n->type];
319 size_t i;
321 for (i = 0; i < n->n_args; i++)
322 flatten_node (n->args[i], e);
324 if (n->type != OP_BOOLEAN_TO_NUM)
325 emit_operation (e, n->type);
327 for (i = 0; i < n->n_args; i++)
329 struct expr_node *arg = n->args[i];
330 switch (arg->type)
332 case OP_num_var:
333 case OP_str_var:
334 emit_variable (e, arg->variable);
335 break;
337 case OP_vector:
338 emit_vector (e, arg->vector);
339 break;
341 case OP_ni_format:
342 case OP_no_format:
343 emit_format (e, &arg->format);
344 break;
346 case OP_pos_int:
347 emit_integer (e, arg->integer);
348 break;
350 case OP_expr_node:
351 allocate_aux (e, OP_expr_node)->expr_node = arg->expr_node;
352 break;
354 default:
355 /* Nothing to do. */
356 break;
360 if (op->flags & OPF_ARRAY_OPERAND)
361 emit_integer (e, n->n_args - op->n_args + 1);
362 if (op->flags & OPF_MIN_VALID)
363 emit_integer (e, n->min_valid);
364 if (op->flags & OPF_EXPR_NODE)
365 allocate_aux (e, OP_expr_node)->expr_node = n;
368 void
369 flatten_node (struct expr_node *n, struct expression *e)
371 assert (is_operation (n->type));
373 if (is_atom (n->type))
374 flatten_atom (n, e);
375 else if (is_composite (n->type))
376 flatten_composite (n, e);
377 else
378 NOT_REACHED ();
381 static union operation_data *
382 allocate_aux (struct expression *e, operation_type type)
384 if (e->n_ops >= e->allocated_ops)
386 e->allocated_ops = (e->allocated_ops + 8) * 3 / 2;
387 e->ops = pool_realloc (e->expr_pool, e->ops,
388 sizeof *e->ops * e->allocated_ops);
389 e->op_types = pool_realloc (e->expr_pool, e->op_types,
390 sizeof *e->op_types * e->allocated_ops);
393 e->op_types[e->n_ops] = type;
394 return &e->ops[e->n_ops++];