Cache regex compilation for another autoconf speedup.
[m4/ericb.git] / src / eval.c
blobd35364cae71bfedad56589dde010e0f5ab4043f8
1 /* GNU m4 -- A simple macro processor
3 Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007
4 Free Software Foundation, Inc.
6 This file is part of GNU M4.
8 GNU M4 is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU M4 is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 /* This file contains the functions to evaluate integer expressions for
23 the "eval" macro. It is a little, fairly self-contained module, with
24 its own scanner, and a recursive descent parser. The only entry point
25 is evaluate (). */
27 #include "m4.h"
29 /* Evaluates token types. */
31 typedef enum eval_token
33 ERROR, BADOP,
34 PLUS, MINUS,
35 EXPONENT,
36 TIMES, DIVIDE, MODULO,
37 ASSIGN, EQ, NOTEQ, GT, GTEQ, LS, LSEQ,
38 LSHIFT, RSHIFT,
39 LNOT, LAND, LOR,
40 NOT, AND, OR, XOR,
41 LEFTP, RIGHTP,
42 NUMBER, EOTEXT
44 eval_token;
46 /* Error types. */
48 typedef enum eval_error
50 NO_ERROR,
51 DIVIDE_ZERO,
52 MODULO_ZERO,
53 NEGATIVE_EXPONENT,
54 /* All errors prior to SYNTAX_ERROR can be ignored in a dead
55 branch of && and ||. All errors after are just more details
56 about a syntax error. */
57 SYNTAX_ERROR,
58 MISSING_RIGHT,
59 UNKNOWN_INPUT,
60 EXCESS_INPUT,
61 INVALID_OPERATOR
63 eval_error;
65 static eval_error logical_or_term (eval_token, int32_t *);
66 static eval_error logical_and_term (eval_token, int32_t *);
67 static eval_error or_term (eval_token, int32_t *);
68 static eval_error xor_term (eval_token, int32_t *);
69 static eval_error and_term (eval_token, int32_t *);
70 static eval_error equality_term (eval_token, int32_t *);
71 static eval_error cmp_term (eval_token, int32_t *);
72 static eval_error shift_term (eval_token, int32_t *);
73 static eval_error add_term (eval_token, int32_t *);
74 static eval_error mult_term (eval_token, int32_t *);
75 static eval_error exp_term (eval_token, int32_t *);
76 static eval_error unary_term (eval_token, int32_t *);
77 static eval_error simple_term (eval_token, int32_t *);
79 /*--------------------.
80 | Lexical functions. |
81 `--------------------*/
83 /* Pointer to next character of input text. */
84 static const char *eval_text;
86 /* Value of eval_text, from before last call of eval_lex (). This is so we
87 can back up, if we have read too much. */
88 static const char *last_text;
90 static void
91 eval_init_lex (const char *text)
93 eval_text = text;
94 last_text = NULL;
97 static void
98 eval_undo (void)
100 eval_text = last_text;
103 /* VAL is numerical value, if any. */
105 static eval_token
106 eval_lex (int32_t *val)
108 while (isspace (to_uchar (*eval_text)))
109 eval_text++;
111 last_text = eval_text;
113 if (*eval_text == '\0')
114 return EOTEXT;
116 if (isdigit (to_uchar (*eval_text)))
118 int base, digit;
120 if (*eval_text == '0')
122 eval_text++;
123 switch (*eval_text)
125 case 'x':
126 case 'X':
127 base = 16;
128 eval_text++;
129 break;
131 case 'b':
132 case 'B':
133 base = 2;
134 eval_text++;
135 break;
137 case 'r':
138 case 'R':
139 base = 0;
140 eval_text++;
141 while (isdigit (to_uchar (*eval_text)) && base <= 36)
142 base = 10 * base + *eval_text++ - '0';
143 if (base == 0 || base > 36 || *eval_text != ':')
144 return ERROR;
145 eval_text++;
146 break;
148 default:
149 base = 8;
152 else
153 base = 10;
155 /* FIXME - this calculation can overflow. Consider xstrtol. */
156 *val = 0;
157 for (; *eval_text; eval_text++)
159 if (isdigit (to_uchar (*eval_text)))
160 digit = *eval_text - '0';
161 else if (islower (to_uchar (*eval_text)))
162 digit = *eval_text - 'a' + 10;
163 else if (isupper (to_uchar (*eval_text)))
164 digit = *eval_text - 'A' + 10;
165 else
166 break;
168 if (base == 1)
170 if (digit == 1)
171 (*val)++;
172 else if (digit == 0 && !*val)
173 continue;
174 else
175 break;
177 else if (digit >= base)
178 break;
179 else
180 *val = *val * base + digit;
182 return NUMBER;
185 switch (*eval_text++)
187 case '+':
188 if (*eval_text == '+' || *eval_text == '=')
189 return BADOP;
190 return PLUS;
191 case '-':
192 if (*eval_text == '-' || *eval_text == '=')
193 return BADOP;
194 return MINUS;
195 case '*':
196 if (*eval_text == '*')
198 eval_text++;
199 return EXPONENT;
201 else if (*eval_text == '=')
202 return BADOP;
203 return TIMES;
204 case '/':
205 if (*eval_text == '=')
206 return BADOP;
207 return DIVIDE;
208 case '%':
209 if (*eval_text == '=')
210 return BADOP;
211 return MODULO;
212 case '=':
213 if (*eval_text == '=')
215 eval_text++;
216 return EQ;
218 return ASSIGN;
219 case '!':
220 if (*eval_text == '=')
222 eval_text++;
223 return NOTEQ;
225 return LNOT;
226 case '>':
227 if (*eval_text == '=')
229 eval_text++;
230 return GTEQ;
232 else if (*eval_text == '>')
234 if (*++eval_text == '=')
235 return BADOP;
236 return RSHIFT;
238 return GT;
239 case '<':
240 if (*eval_text == '=')
242 eval_text++;
243 return LSEQ;
245 else if (*eval_text == '<')
247 if (*++eval_text == '=')
248 return BADOP;
249 return LSHIFT;
251 return LS;
252 case '^':
253 if (*eval_text == '=')
254 return BADOP;
255 return XOR;
256 case '~':
257 return NOT;
258 case '&':
259 if (*eval_text == '&')
261 eval_text++;
262 return LAND;
264 else if (*eval_text == '=')
265 return BADOP;
266 return AND;
267 case '|':
268 if (*eval_text == '|')
270 eval_text++;
271 return LOR;
273 else if (*eval_text == '=')
274 return BADOP;
275 return OR;
276 case '(':
277 return LEFTP;
278 case ')':
279 return RIGHTP;
280 default:
281 return ERROR;
285 /*---------------------------------------.
286 | Main entry point, called from "eval". |
287 `---------------------------------------*/
289 bool
290 evaluate (const char *expr, int32_t *val)
292 eval_token et;
293 eval_error err;
295 eval_init_lex (expr);
296 et = eval_lex (val);
297 err = logical_or_term (et, val);
299 if (err == NO_ERROR && *eval_text != '\0')
301 if (eval_lex (val) == BADOP)
302 err = INVALID_OPERATOR;
303 else
304 err = EXCESS_INPUT;
307 switch (err)
309 case NO_ERROR:
310 break;
312 case MISSING_RIGHT:
313 M4ERROR ((warning_status, 0,
314 "bad expression in eval (missing right parenthesis): %s",
315 expr));
316 break;
318 case SYNTAX_ERROR:
319 M4ERROR ((warning_status, 0,
320 "bad expression in eval: %s", expr));
321 break;
323 case UNKNOWN_INPUT:
324 M4ERROR ((warning_status, 0,
325 "bad expression in eval (bad input): %s", expr));
326 break;
328 case EXCESS_INPUT:
329 M4ERROR ((warning_status, 0,
330 "bad expression in eval (excess input): %s", expr));
331 break;
333 case INVALID_OPERATOR:
334 M4ERROR ((warning_status, 0,
335 "invalid operator in eval: %s", expr));
336 retcode = EXIT_FAILURE;
337 break;
339 case DIVIDE_ZERO:
340 M4ERROR ((warning_status, 0,
341 "divide by zero in eval: %s", expr));
342 break;
344 case MODULO_ZERO:
345 M4ERROR ((warning_status, 0,
346 "modulo by zero in eval: %s", expr));
347 break;
349 case NEGATIVE_EXPONENT:
350 M4ERROR ((warning_status, 0,
351 "negative exponent in eval: %s", expr));
352 break;
354 default:
355 M4ERROR ((warning_status, 0,
356 "INTERNAL ERROR: bad error code in evaluate ()"));
357 abort ();
360 return err != NO_ERROR;
363 /*---------------------------.
364 | Recursive descent parser. |
365 `---------------------------*/
367 static eval_error
368 logical_or_term (eval_token et, int32_t *v1)
370 int32_t v2;
371 eval_error er;
373 if ((er = logical_and_term (et, v1)) != NO_ERROR)
374 return er;
376 while ((et = eval_lex (&v2)) == LOR)
378 et = eval_lex (&v2);
379 if (et == ERROR)
380 return UNKNOWN_INPUT;
382 /* Implement short-circuiting of valid syntax. */
383 er = logical_and_term (et, &v2);
384 if (er == NO_ERROR)
385 *v1 = *v1 || v2;
386 else if (*v1 != 0 && er < SYNTAX_ERROR)
387 *v1 = 1;
388 else
389 return er;
391 if (et == ERROR)
392 return UNKNOWN_INPUT;
394 eval_undo ();
395 return NO_ERROR;
398 static eval_error
399 logical_and_term (eval_token et, int32_t *v1)
401 int32_t v2;
402 eval_error er;
404 if ((er = or_term (et, v1)) != NO_ERROR)
405 return er;
407 while ((et = eval_lex (&v2)) == LAND)
409 et = eval_lex (&v2);
410 if (et == ERROR)
411 return UNKNOWN_INPUT;
413 /* Implement short-circuiting of valid syntax. */
414 er = or_term (et, &v2);
415 if (er == NO_ERROR)
416 *v1 = *v1 && v2;
417 else if (*v1 == 0 && er < SYNTAX_ERROR)
418 ; /* v1 is already 0 */
419 else
420 return er;
422 if (et == ERROR)
423 return UNKNOWN_INPUT;
425 eval_undo ();
426 return NO_ERROR;
429 static eval_error
430 or_term (eval_token et, int32_t *v1)
432 int32_t v2;
433 eval_error er;
435 if ((er = xor_term (et, v1)) != NO_ERROR)
436 return er;
438 while ((et = eval_lex (&v2)) == OR)
440 et = eval_lex (&v2);
441 if (et == ERROR)
442 return UNKNOWN_INPUT;
444 if ((er = xor_term (et, &v2)) != NO_ERROR)
445 return er;
447 *v1 |= v2;
449 if (et == ERROR)
450 return UNKNOWN_INPUT;
452 eval_undo ();
453 return NO_ERROR;
456 static eval_error
457 xor_term (eval_token et, int32_t *v1)
459 int32_t v2;
460 eval_error er;
462 if ((er = and_term (et, v1)) != NO_ERROR)
463 return er;
465 while ((et = eval_lex (&v2)) == XOR)
467 et = eval_lex (&v2);
468 if (et == ERROR)
469 return UNKNOWN_INPUT;
471 if ((er = and_term (et, &v2)) != NO_ERROR)
472 return er;
474 *v1 ^= v2;
476 if (et == ERROR)
477 return UNKNOWN_INPUT;
479 eval_undo ();
480 return NO_ERROR;
483 static eval_error
484 and_term (eval_token et, int32_t *v1)
486 int32_t v2;
487 eval_error er;
489 if ((er = equality_term (et, v1)) != NO_ERROR)
490 return er;
492 while ((et = eval_lex (&v2)) == AND)
494 et = eval_lex (&v2);
495 if (et == ERROR)
496 return UNKNOWN_INPUT;
498 if ((er = equality_term (et, &v2)) != NO_ERROR)
499 return er;
501 *v1 &= v2;
503 if (et == ERROR)
504 return UNKNOWN_INPUT;
506 eval_undo ();
507 return NO_ERROR;
510 static eval_error
511 equality_term (eval_token et, int32_t *v1)
513 eval_token op;
514 int32_t v2;
515 eval_error er;
517 if ((er = cmp_term (et, v1)) != NO_ERROR)
518 return er;
520 /* In the 1.4.x series, we maintain the traditional behavior that
521 '=' is a synonym for '=='; however, this is contrary to POSIX and
522 we hope to convert '=' to mean assignment in 2.0. */
523 while ((op = eval_lex (&v2)) == EQ || op == NOTEQ || op == ASSIGN)
525 et = eval_lex (&v2);
526 if (et == ERROR)
527 return UNKNOWN_INPUT;
529 if ((er = cmp_term (et, &v2)) != NO_ERROR)
530 return er;
532 if (op == ASSIGN)
534 M4ERROR ((warning_status, 0, "\
535 Warning: recommend ==, not =, for equality operator"));
536 op = EQ;
538 *v1 = (op == EQ) == (*v1 == v2);
540 if (op == ERROR)
541 return UNKNOWN_INPUT;
543 eval_undo ();
544 return NO_ERROR;
547 static eval_error
548 cmp_term (eval_token et, int32_t *v1)
550 eval_token op;
551 int32_t v2;
552 eval_error er;
554 if ((er = shift_term (et, v1)) != NO_ERROR)
555 return er;
557 while ((op = eval_lex (&v2)) == GT || op == GTEQ
558 || op == LS || op == LSEQ)
561 et = eval_lex (&v2);
562 if (et == ERROR)
563 return UNKNOWN_INPUT;
565 if ((er = shift_term (et, &v2)) != NO_ERROR)
566 return er;
568 switch (op)
570 case GT:
571 *v1 = *v1 > v2;
572 break;
574 case GTEQ:
575 *v1 = *v1 >= v2;
576 break;
578 case LS:
579 *v1 = *v1 < v2;
580 break;
582 case LSEQ:
583 *v1 = *v1 <= v2;
584 break;
586 default:
587 M4ERROR ((warning_status, 0,
588 "INTERNAL ERROR: bad comparison operator in cmp_term ()"));
589 abort ();
592 if (op == ERROR)
593 return UNKNOWN_INPUT;
595 eval_undo ();
596 return NO_ERROR;
599 static eval_error
600 shift_term (eval_token et, int32_t *v1)
602 eval_token op;
603 int32_t v2;
604 uint32_t u1;
605 eval_error er;
607 if ((er = add_term (et, v1)) != NO_ERROR)
608 return er;
610 while ((op = eval_lex (&v2)) == LSHIFT || op == RSHIFT)
613 et = eval_lex (&v2);
614 if (et == ERROR)
615 return UNKNOWN_INPUT;
617 if ((er = add_term (et, &v2)) != NO_ERROR)
618 return er;
620 /* Minimize undefined C behavior (shifting by a negative number,
621 shifting by the width or greater, left shift overflow, or
622 right shift of a negative number). Implement Java 32-bit
623 wrap-around semantics. This code assumes that the
624 implementation-defined overflow when casting unsigned to
625 signed is a silent twos-complement wrap-around. */
626 switch (op)
628 case LSHIFT:
629 u1 = *v1;
630 u1 <<= (uint32_t) (v2 & 0x1f);
631 *v1 = u1;
632 break;
634 case RSHIFT:
635 u1 = *v1 < 0 ? ~*v1 : *v1;
636 u1 >>= (uint32_t) (v2 & 0x1f);
637 *v1 = *v1 < 0 ? ~u1 : u1;
638 break;
640 default:
641 M4ERROR ((warning_status, 0,
642 "INTERNAL ERROR: bad shift operator in shift_term ()"));
643 abort ();
646 if (op == ERROR)
647 return UNKNOWN_INPUT;
649 eval_undo ();
650 return NO_ERROR;
653 static eval_error
654 add_term (eval_token et, int32_t *v1)
656 eval_token op;
657 int32_t v2;
658 eval_error er;
660 if ((er = mult_term (et, v1)) != NO_ERROR)
661 return er;
663 while ((op = eval_lex (&v2)) == PLUS || op == MINUS)
665 et = eval_lex (&v2);
666 if (et == ERROR)
667 return UNKNOWN_INPUT;
669 if ((er = mult_term (et, &v2)) != NO_ERROR)
670 return er;
672 /* Minimize undefined C behavior on overflow. This code assumes
673 that the implementation-defined overflow when casting
674 unsigned to signed is a silent twos-complement
675 wrap-around. */
676 if (op == PLUS)
677 *v1 = (int32_t) ((uint32_t) *v1 + (uint32_t) v2);
678 else
679 *v1 = (int32_t) ((uint32_t) *v1 - (uint32_t) v2);
681 if (op == ERROR)
682 return UNKNOWN_INPUT;
684 eval_undo ();
685 return NO_ERROR;
688 static eval_error
689 mult_term (eval_token et, int32_t *v1)
691 eval_token op;
692 int32_t v2;
693 eval_error er;
695 if ((er = exp_term (et, v1)) != NO_ERROR)
696 return er;
698 while ((op = eval_lex (&v2)) == TIMES || op == DIVIDE || op == MODULO)
700 et = eval_lex (&v2);
701 if (et == ERROR)
702 return UNKNOWN_INPUT;
704 if ((er = exp_term (et, &v2)) != NO_ERROR)
705 return er;
707 /* Minimize undefined C behavior on overflow. This code assumes
708 that the implementation-defined overflow when casting
709 unsigned to signed is a silent twos-complement
710 wrap-around. */
711 switch (op)
713 case TIMES:
714 *v1 = (int32_t) ((uint32_t) *v1 * (uint32_t) v2);
715 break;
717 case DIVIDE:
718 if (v2 == 0)
719 return DIVIDE_ZERO;
720 else if (v2 == -1)
721 /* Avoid overflow, and the x86 SIGFPE on INT_MIN / -1. */
722 *v1 = (int32_t) -(uint32_t) *v1;
723 else
724 *v1 /= v2;
725 break;
727 case MODULO:
728 if (v2 == 0)
729 return MODULO_ZERO;
730 else if (v2 == -1)
731 /* Avoid the x86 SIGFPE on INT_MIN % -1. */
732 *v1 = 0;
733 else
734 *v1 %= v2;
735 break;
737 default:
738 M4ERROR ((warning_status, 0,
739 "INTERNAL ERROR: bad operator in mult_term ()"));
740 abort ();
743 if (op == ERROR)
744 return UNKNOWN_INPUT;
746 eval_undo ();
747 return NO_ERROR;
750 static eval_error
751 exp_term (eval_token et, int32_t *v1)
753 uint32_t result;
754 int32_t v2;
755 eval_error er;
757 if ((er = unary_term (et, v1)) != NO_ERROR)
758 return er;
760 while ((et = eval_lex (&v2)) == EXPONENT)
762 et = eval_lex (&v2);
763 if (et == ERROR)
764 return UNKNOWN_INPUT;
766 if ((er = exp_term (et, &v2)) != NO_ERROR)
767 return er;
769 /* Minimize undefined C behavior on overflow. This code assumes
770 that the implementation-defined overflow when casting
771 unsigned to signed is a silent twos-complement
772 wrap-around. */
773 result = 1;
774 if (v2 < 0)
775 return NEGATIVE_EXPONENT;
776 if (*v1 == 0 && v2 == 0)
777 return DIVIDE_ZERO;
778 while (v2-- > 0)
779 result *= (uint32_t) *v1;
780 *v1 = result;
782 if (et == ERROR)
783 return UNKNOWN_INPUT;
785 eval_undo ();
786 return NO_ERROR;
789 static eval_error
790 unary_term (eval_token et, int32_t *v1)
792 eval_token et2 = et;
793 eval_error er;
795 if (et == PLUS || et == MINUS || et == NOT || et == LNOT)
797 et2 = eval_lex (v1);
798 if (et2 == ERROR)
799 return UNKNOWN_INPUT;
801 if ((er = unary_term (et2, v1)) != NO_ERROR)
802 return er;
804 /* Minimize undefined C behavior on overflow. This code assumes
805 that the implementation-defined overflow when casting
806 unsigned to signed is a silent twos-complement
807 wrap-around. */
808 if (et == MINUS)
809 *v1 = (int32_t) -(uint32_t) *v1;
810 else if (et == NOT)
811 *v1 = ~*v1;
812 else if (et == LNOT)
813 *v1 = *v1 == 0 ? 1 : 0;
815 else if ((er = simple_term (et, v1)) != NO_ERROR)
816 return er;
818 return NO_ERROR;
821 static eval_error
822 simple_term (eval_token et, int32_t *v1)
824 int32_t v2;
825 eval_error er;
827 switch (et)
829 case LEFTP:
830 et = eval_lex (v1);
831 if (et == ERROR)
832 return UNKNOWN_INPUT;
834 if ((er = logical_or_term (et, v1)) != NO_ERROR)
835 return er;
837 et = eval_lex (&v2);
838 if (et == ERROR)
839 return UNKNOWN_INPUT;
841 if (et != RIGHTP)
842 return MISSING_RIGHT;
844 break;
846 case NUMBER:
847 break;
849 case BADOP:
850 return INVALID_OPERATOR;
852 default:
853 return SYNTAX_ERROR;
855 return NO_ERROR;