maint: update all copyright year number ranges
[coreutils.git] / src / expr.c
blob3448fc82224a54a0d8ec9476bce7788e5816b2aa
1 /* expr -- evaluate expressions.
2 Copyright (C) 1986-2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Author: Mike Parker.
18 Modified for arbitrary-precision calculation by James Youngman.
20 This program evaluates expressions. Each token (operator, operand,
21 parenthesis) of the expression must be a separate argument. The
22 parser used is a reasonably general one, though any incarnation of
23 it is language-specific. It is especially nice for expressions.
25 No parse tree is needed; a new node is evaluated immediately.
26 One function can handle multiple operators all of equal precedence,
27 provided they all associate ((x op x) op x).
29 Define EVAL_TRACE to print an evaluation trace. */
31 #include <config.h>
32 #include <stdio.h>
33 #include <sys/types.h>
34 #include "system.h"
36 #include <regex.h>
37 #include "die.h"
38 #include "error.h"
39 #include "long-options.h"
40 #include "strnumcmp.h"
41 #include "xstrtol.h"
43 /* Various parts of this code assume size_t fits into unsigned long
44 int, the widest unsigned type that GMP supports. */
45 verify (SIZE_MAX <= ULONG_MAX);
47 #ifndef HAVE_GMP
48 # define HAVE_GMP 0
49 #endif
51 #if HAVE_GMP
52 # include <gmp.h>
53 #else
54 static void integer_overflow (char) ATTRIBUTE_NORETURN;
55 /* Approximate gmp.h well enough for expr.c's purposes. */
56 typedef intmax_t mpz_t[1];
57 static void mpz_clear (mpz_t z) { (void) z; }
58 static void mpz_init_set_ui (mpz_t z, unsigned long int i) { z[0] = i; }
59 static int
60 mpz_init_set_str (mpz_t z, char *s, int base)
62 return xstrtoimax (s, NULL, base, z, NULL) == LONGINT_OK ? 0 : -1;
64 static void
65 mpz_add (mpz_t r, mpz_t a0, mpz_t b0)
67 intmax_t a = a0[0];
68 intmax_t b = b0[0];
69 intmax_t val = a + b;
70 if ((val < a) != (b < 0))
71 integer_overflow ('+');
72 r[0] = val;
74 static void
75 mpz_sub (mpz_t r, mpz_t a0, mpz_t b0)
77 intmax_t a = a0[0];
78 intmax_t b = b0[0];
79 intmax_t val = a - b;
80 if ((a < val) != (b < 0))
81 integer_overflow ('-');
82 r[0] = val;
84 static void
85 mpz_mul (mpz_t r, mpz_t a0, mpz_t b0)
87 intmax_t a = a0[0];
88 intmax_t b = b0[0];
89 intmax_t val = a * b;
90 if (! (a == 0 || b == 0
91 || ((val < 0) == ((a < 0) ^ (b < 0)) && val / a == b)))
92 integer_overflow ('*');
93 r[0] = val;
95 static void
96 mpz_tdiv_q (mpz_t r, mpz_t a0, mpz_t b0)
98 intmax_t a = a0[0];
99 intmax_t b = b0[0];
101 /* Some x86-style hosts raise an exception for INT_MIN / -1. */
102 if (a < - INTMAX_MAX && b == -1)
103 integer_overflow ('/');
104 r[0] = a / b;
106 static void
107 mpz_tdiv_r (mpz_t r, mpz_t a0, mpz_t b0)
109 intmax_t a = a0[0];
110 intmax_t b = b0[0];
112 /* Some x86-style hosts raise an exception for INT_MIN % -1. */
113 r[0] = a < - INTMAX_MAX && b == -1 ? 0 : a % b;
115 static char *
116 mpz_get_str (char const *str, int base, mpz_t z)
118 (void) str; (void) base;
119 char buf[INT_BUFSIZE_BOUND (intmax_t)];
120 return xstrdup (imaxtostr (z[0], buf));
122 static int
123 mpz_sgn (mpz_t z)
125 return z[0] < 0 ? -1 : 0 < z[0];
127 static int
128 mpz_fits_ulong_p (mpz_t z)
130 return 0 <= z[0] && z[0] <= ULONG_MAX;
132 static unsigned long int
133 mpz_get_ui (mpz_t z)
135 return z[0];
137 static int
138 mpz_out_str (FILE *stream, int base, mpz_t z)
140 (void) base;
141 char buf[INT_BUFSIZE_BOUND (intmax_t)];
142 return fputs (imaxtostr (z[0], buf), stream) != EOF;
144 #endif
146 /* The official name of this program (e.g., no 'g' prefix). */
147 #define PROGRAM_NAME "expr"
149 #define AUTHORS \
150 proper_name ("Mike Parker"), \
151 proper_name ("James Youngman"), \
152 proper_name ("Paul Eggert")
154 /* Exit statuses. */
155 enum
157 /* Invalid expression: e.g., its form does not conform to the
158 grammar for expressions. Our grammar is an extension of the
159 POSIX grammar. */
160 EXPR_INVALID = 2,
162 /* An internal error occurred, e.g., arithmetic overflow, storage
163 exhaustion. */
164 EXPR_FAILURE
167 /* The kinds of value we can have. */
168 enum valtype
170 integer,
171 string
173 typedef enum valtype TYPE;
175 /* A value is.... */
176 struct valinfo
178 TYPE type; /* Which kind. */
179 union
180 { /* The value itself. */
181 mpz_t i;
182 char *s;
183 } u;
185 typedef struct valinfo VALUE;
187 /* The arguments given to the program, minus the program name. */
188 static char **args;
190 static VALUE *eval (bool);
191 static bool nomoreargs (void);
192 static bool null (VALUE *v);
193 static void printv (VALUE *v);
195 void
196 usage (int status)
198 if (status != EXIT_SUCCESS)
199 emit_try_help ();
200 else
202 printf (_("\
203 Usage: %s EXPRESSION\n\
204 or: %s OPTION\n\
206 program_name, program_name);
207 putchar ('\n');
208 fputs (HELP_OPTION_DESCRIPTION, stdout);
209 fputs (VERSION_OPTION_DESCRIPTION, stdout);
210 fputs (_("\
212 Print the value of EXPRESSION to standard output. A blank line below\n\
213 separates increasing precedence groups. EXPRESSION may be:\n\
215 ARG1 | ARG2 ARG1 if it is neither null nor 0, otherwise ARG2\n\
217 ARG1 & ARG2 ARG1 if neither argument is null or 0, otherwise 0\n\
218 "), stdout);
219 fputs (_("\
221 ARG1 < ARG2 ARG1 is less than ARG2\n\
222 ARG1 <= ARG2 ARG1 is less than or equal to ARG2\n\
223 ARG1 = ARG2 ARG1 is equal to ARG2\n\
224 ARG1 != ARG2 ARG1 is unequal to ARG2\n\
225 ARG1 >= ARG2 ARG1 is greater than or equal to ARG2\n\
226 ARG1 > ARG2 ARG1 is greater than ARG2\n\
227 "), stdout);
228 fputs (_("\
230 ARG1 + ARG2 arithmetic sum of ARG1 and ARG2\n\
231 ARG1 - ARG2 arithmetic difference of ARG1 and ARG2\n\
232 "), stdout);
233 /* Tell xgettext that the "% A" below is not a printf-style
234 format string: xgettext:no-c-format */
235 fputs (_("\
237 ARG1 * ARG2 arithmetic product of ARG1 and ARG2\n\
238 ARG1 / ARG2 arithmetic quotient of ARG1 divided by ARG2\n\
239 ARG1 % ARG2 arithmetic remainder of ARG1 divided by ARG2\n\
240 "), stdout);
241 fputs (_("\
243 STRING : REGEXP anchored pattern match of REGEXP in STRING\n\
245 match STRING REGEXP same as STRING : REGEXP\n\
246 substr STRING POS LENGTH substring of STRING, POS counted from 1\n\
247 index STRING CHARS index in STRING where any CHARS is found, or 0\n\
248 length STRING length of STRING\n\
249 "), stdout);
250 fputs (_("\
251 + TOKEN interpret TOKEN as a string, even if it is a\n\
252 keyword like 'match' or an operator like '/'\n\
254 ( EXPRESSION ) value of EXPRESSION\n\
255 "), stdout);
256 fputs (_("\
258 Beware that many operators need to be escaped or quoted for shells.\n\
259 Comparisons are arithmetic if both ARGs are numbers, else lexicographical.\n\
260 Pattern matches return the string matched between \\( and \\) or null; if\n\
261 \\( and \\) are not used, they return the number of characters matched or 0.\n\
262 "), stdout);
263 fputs (_("\
265 Exit status is 0 if EXPRESSION is neither null nor 0, 1 if EXPRESSION is null\n\
266 or 0, 2 if EXPRESSION is syntactically invalid, and 3 if an error occurred.\n\
267 "), stdout);
268 emit_ancillary_info (PROGRAM_NAME);
270 exit (status);
273 /* Report a syntax error and exit. */
274 static void
275 syntax_error (void)
277 die (EXPR_INVALID, 0, _("syntax error"));
280 #if ! HAVE_GMP
281 /* Report an integer overflow for operation OP and exit. */
282 static void
283 integer_overflow (char op)
285 die (EXPR_FAILURE, ERANGE, "%c", op);
287 #endif
290 main (int argc, char **argv)
292 VALUE *v;
294 initialize_main (&argc, &argv);
295 set_program_name (argv[0]);
296 setlocale (LC_ALL, "");
297 bindtextdomain (PACKAGE, LOCALEDIR);
298 textdomain (PACKAGE);
300 initialize_exit_failure (EXPR_FAILURE);
301 atexit (close_stdout);
303 parse_long_options (argc, argv, PROGRAM_NAME, PACKAGE_NAME, VERSION,
304 usage, AUTHORS, (char const *) NULL);
306 /* The above handles --help and --version.
307 Since there is no other invocation of getopt, handle '--' here. */
308 unsigned int u_argc = argc;
309 if (1 < u_argc && STREQ (argv[1], "--"))
311 --u_argc;
312 ++argv;
315 if (u_argc <= 1)
317 error (0, 0, _("missing operand"));
318 usage (EXPR_INVALID);
321 args = argv + 1;
323 v = eval (true);
324 if (!nomoreargs ())
325 syntax_error ();
326 printv (v);
328 return null (v);
331 /* Return a VALUE for I. */
333 static VALUE *
334 int_value (unsigned long int i)
336 VALUE *v = xmalloc (sizeof *v);
337 v->type = integer;
338 mpz_init_set_ui (v->u.i, i);
339 return v;
342 /* Return a VALUE for S. */
344 static VALUE *
345 str_value (char const *s)
347 VALUE *v = xmalloc (sizeof *v);
348 v->type = string;
349 v->u.s = xstrdup (s);
350 return v;
353 /* Free VALUE V, including structure components. */
355 static void
356 freev (VALUE *v)
358 if (v->type == string)
359 free (v->u.s);
360 else
361 mpz_clear (v->u.i);
362 free (v);
365 /* Print VALUE V. */
367 static void
368 printv (VALUE *v)
370 switch (v->type)
372 case integer:
373 mpz_out_str (stdout, 10, v->u.i);
374 putchar ('\n');
375 break;
376 case string:
377 puts (v->u.s);
378 break;
379 default:
380 abort ();
384 /* Return true if V is a null-string or zero-number. */
386 static bool _GL_ATTRIBUTE_PURE
387 null (VALUE *v)
389 switch (v->type)
391 case integer:
392 return mpz_sgn (v->u.i) == 0;
393 case string:
395 char const *cp = v->u.s;
396 if (*cp == '\0')
397 return true;
399 cp += (*cp == '-');
403 if (*cp != '0')
404 return false;
406 while (*++cp);
408 return true;
410 default:
411 abort ();
415 /* Return true if CP takes the form of an integer. */
417 static bool _GL_ATTRIBUTE_PURE
418 looks_like_integer (char const *cp)
420 cp += (*cp == '-');
423 if (! ISDIGIT (*cp))
424 return false;
425 while (*++cp);
427 return true;
430 /* Coerce V to a string value (can't fail). */
432 static void
433 tostring (VALUE *v)
435 switch (v->type)
437 case integer:
439 char *s = mpz_get_str (NULL, 10, v->u.i);
440 mpz_clear (v->u.i);
441 v->u.s = s;
442 v->type = string;
444 break;
445 case string:
446 break;
447 default:
448 abort ();
452 /* Coerce V to an integer value. Return true on success, false on failure. */
454 static bool
455 toarith (VALUE *v)
457 switch (v->type)
459 case integer:
460 return true;
461 case string:
463 char *s = v->u.s;
465 if (! looks_like_integer (s))
466 return false;
467 if (mpz_init_set_str (v->u.i, s, 10) != 0 && !HAVE_GMP)
468 die (EXPR_FAILURE, ERANGE, "%s", (s));
469 free (s);
470 v->type = integer;
471 return true;
473 default:
474 abort ();
478 /* Extract a size_t value from an integer value I.
479 If the value is negative, return SIZE_MAX.
480 If the value is too large, return SIZE_MAX - 1. */
481 static size_t
482 getsize (mpz_t i)
484 if (mpz_sgn (i) < 0)
485 return SIZE_MAX;
486 if (mpz_fits_ulong_p (i))
488 unsigned long int ul = mpz_get_ui (i);
489 if (ul < SIZE_MAX)
490 return ul;
492 return SIZE_MAX - 1;
495 /* Return true and advance if the next token matches STR exactly.
496 STR must not be NULL. */
498 static bool
499 nextarg (char const *str)
501 if (*args == NULL)
502 return false;
503 else
505 bool r = STREQ (*args, str);
506 args += r;
507 return r;
511 /* Return true if there no more tokens. */
513 static bool
514 nomoreargs (void)
516 return *args == 0;
519 #ifdef EVAL_TRACE
520 /* Print evaluation trace and args remaining. */
522 static void
523 trace (fxn)
524 char *fxn;
526 char **a;
528 printf ("%s:", fxn);
529 for (a = args; *a; a++)
530 printf (" %s", *a);
531 putchar ('\n');
533 #endif
535 /* Do the : operator.
536 SV is the VALUE for the lhs (the string),
537 PV is the VALUE for the rhs (the pattern). */
539 static VALUE *
540 docolon (VALUE *sv, VALUE *pv)
542 VALUE *v IF_LINT ( = NULL);
543 const char *errmsg;
544 struct re_pattern_buffer re_buffer;
545 char fastmap[UCHAR_MAX + 1];
546 struct re_registers re_regs;
547 regoff_t matchlen;
549 tostring (sv);
550 tostring (pv);
552 re_regs.num_regs = 0;
553 re_regs.start = NULL;
554 re_regs.end = NULL;
556 re_buffer.buffer = NULL;
557 re_buffer.allocated = 0;
558 re_buffer.fastmap = fastmap;
559 re_buffer.translate = NULL;
560 re_syntax_options =
561 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
562 errmsg = re_compile_pattern (pv->u.s, strlen (pv->u.s), &re_buffer);
563 if (errmsg)
564 die (EXPR_INVALID, 0, "%s", (errmsg));
565 re_buffer.newline_anchor = 0;
567 matchlen = re_match (&re_buffer, sv->u.s, strlen (sv->u.s), 0, &re_regs);
568 if (0 <= matchlen)
570 /* Were \(...\) used? */
571 if (re_buffer.re_nsub > 0)
573 sv->u.s[re_regs.end[1]] = '\0';
574 v = str_value (sv->u.s + re_regs.start[1]);
576 else
577 v = int_value (matchlen);
579 else if (matchlen == -1)
581 /* Match failed -- return the right kind of null. */
582 if (re_buffer.re_nsub > 0)
583 v = str_value ("");
584 else
585 v = int_value (0);
587 else
588 die (EXPR_FAILURE,
589 (matchlen == -2 ? errno : EOVERFLOW),
590 _("error in regular expression matcher"));
592 if (0 < re_regs.num_regs)
594 free (re_regs.start);
595 free (re_regs.end);
597 re_buffer.fastmap = NULL;
598 regfree (&re_buffer);
599 return v;
602 /* Handle bare operands and ( expr ) syntax. */
604 static VALUE *
605 eval7 (bool evaluate)
607 VALUE *v;
609 #ifdef EVAL_TRACE
610 trace ("eval7");
611 #endif
612 if (nomoreargs ())
613 syntax_error ();
615 if (nextarg ("("))
617 v = eval (evaluate);
618 if (!nextarg (")"))
619 syntax_error ();
620 return v;
623 if (nextarg (")"))
624 syntax_error ();
626 return str_value (*args++);
629 /* Handle match, substr, index, and length keywords, and quoting "+". */
631 static VALUE *
632 eval6 (bool evaluate)
634 VALUE *l;
635 VALUE *r;
636 VALUE *v;
637 VALUE *i1;
638 VALUE *i2;
640 #ifdef EVAL_TRACE
641 trace ("eval6");
642 #endif
643 if (nextarg ("+"))
645 if (nomoreargs ())
646 syntax_error ();
647 return str_value (*args++);
649 else if (nextarg ("length"))
651 r = eval6 (evaluate);
652 tostring (r);
653 v = int_value (strlen (r->u.s));
654 freev (r);
655 return v;
657 else if (nextarg ("match"))
659 l = eval6 (evaluate);
660 r = eval6 (evaluate);
661 if (evaluate)
663 v = docolon (l, r);
664 freev (l);
666 else
667 v = l;
668 freev (r);
669 return v;
671 else if (nextarg ("index"))
673 size_t pos;
675 l = eval6 (evaluate);
676 r = eval6 (evaluate);
677 tostring (l);
678 tostring (r);
679 pos = strcspn (l->u.s, r->u.s);
680 v = int_value (l->u.s[pos] ? pos + 1 : 0);
681 freev (l);
682 freev (r);
683 return v;
685 else if (nextarg ("substr"))
687 size_t llen;
688 l = eval6 (evaluate);
689 i1 = eval6 (evaluate);
690 i2 = eval6 (evaluate);
691 tostring (l);
692 llen = strlen (l->u.s);
694 if (!toarith (i1) || !toarith (i2))
695 v = str_value ("");
696 else
698 size_t pos = getsize (i1->u.i);
699 size_t len = getsize (i2->u.i);
701 if (llen < pos || pos == 0 || len == 0 || len == SIZE_MAX)
702 v = str_value ("");
703 else
705 size_t vlen = MIN (len, llen - pos + 1);
706 char *vlim;
707 v = xmalloc (sizeof *v);
708 v->type = string;
709 v->u.s = xmalloc (vlen + 1);
710 vlim = mempcpy (v->u.s, l->u.s + pos - 1, vlen);
711 *vlim = '\0';
714 freev (l);
715 freev (i1);
716 freev (i2);
717 return v;
719 else
720 return eval7 (evaluate);
723 /* Handle : operator (pattern matching).
724 Calls docolon to do the real work. */
726 static VALUE *
727 eval5 (bool evaluate)
729 VALUE *l;
730 VALUE *r;
731 VALUE *v;
733 #ifdef EVAL_TRACE
734 trace ("eval5");
735 #endif
736 l = eval6 (evaluate);
737 while (1)
739 if (nextarg (":"))
741 r = eval6 (evaluate);
742 if (evaluate)
744 v = docolon (l, r);
745 freev (l);
746 l = v;
748 freev (r);
750 else
751 return l;
755 /* Handle *, /, % operators. */
757 static VALUE *
758 eval4 (bool evaluate)
760 VALUE *l;
761 VALUE *r;
762 enum { multiply, divide, mod } fxn;
764 #ifdef EVAL_TRACE
765 trace ("eval4");
766 #endif
767 l = eval5 (evaluate);
768 while (1)
770 if (nextarg ("*"))
771 fxn = multiply;
772 else if (nextarg ("/"))
773 fxn = divide;
774 else if (nextarg ("%"))
775 fxn = mod;
776 else
777 return l;
778 r = eval5 (evaluate);
779 if (evaluate)
781 if (!toarith (l) || !toarith (r))
782 die (EXPR_INVALID, 0, _("non-integer argument"));
783 if (fxn != multiply && mpz_sgn (r->u.i) == 0)
784 die (EXPR_INVALID, 0, _("division by zero"));
785 ((fxn == multiply ? mpz_mul
786 : fxn == divide ? mpz_tdiv_q
787 : mpz_tdiv_r)
788 (l->u.i, l->u.i, r->u.i));
790 freev (r);
794 /* Handle +, - operators. */
796 static VALUE *
797 eval3 (bool evaluate)
799 VALUE *l;
800 VALUE *r;
801 enum { plus, minus } fxn;
803 #ifdef EVAL_TRACE
804 trace ("eval3");
805 #endif
806 l = eval4 (evaluate);
807 while (1)
809 if (nextarg ("+"))
810 fxn = plus;
811 else if (nextarg ("-"))
812 fxn = minus;
813 else
814 return l;
815 r = eval4 (evaluate);
816 if (evaluate)
818 if (!toarith (l) || !toarith (r))
819 die (EXPR_INVALID, 0, _("non-integer argument"));
820 (fxn == plus ? mpz_add : mpz_sub) (l->u.i, l->u.i, r->u.i);
822 freev (r);
826 /* Handle comparisons. */
828 static VALUE *
829 eval2 (bool evaluate)
831 VALUE *l;
833 #ifdef EVAL_TRACE
834 trace ("eval2");
835 #endif
836 l = eval3 (evaluate);
837 while (1)
839 VALUE *r;
840 enum
842 less_than, less_equal, equal, not_equal, greater_equal, greater_than
843 } fxn;
844 bool val = false;
846 if (nextarg ("<"))
847 fxn = less_than;
848 else if (nextarg ("<="))
849 fxn = less_equal;
850 else if (nextarg ("=") || nextarg ("=="))
851 fxn = equal;
852 else if (nextarg ("!="))
853 fxn = not_equal;
854 else if (nextarg (">="))
855 fxn = greater_equal;
856 else if (nextarg (">"))
857 fxn = greater_than;
858 else
859 return l;
860 r = eval3 (evaluate);
862 if (evaluate)
864 int cmp;
865 tostring (l);
866 tostring (r);
868 if (looks_like_integer (l->u.s) && looks_like_integer (r->u.s))
869 cmp = strintcmp (l->u.s, r->u.s);
870 else
872 errno = 0;
873 cmp = strcoll (l->u.s, r->u.s);
875 if (errno)
877 error (0, errno, _("string comparison failed"));
878 error (0, 0, _("set LC_ALL='C' to work around the problem"));
879 die (EXPR_INVALID, 0,
880 _("the strings compared were %s and %s"),
881 quotearg_n_style (0, locale_quoting_style, l->u.s),
882 quotearg_n_style (1, locale_quoting_style, r->u.s));
886 switch (fxn)
888 case less_than: val = (cmp < 0); break;
889 case less_equal: val = (cmp <= 0); break;
890 case equal: val = (cmp == 0); break;
891 case not_equal: val = (cmp != 0); break;
892 case greater_equal: val = (cmp >= 0); break;
893 case greater_than: val = (cmp > 0); break;
894 default: abort ();
898 freev (l);
899 freev (r);
900 l = int_value (val);
904 /* Handle &. */
906 static VALUE *
907 eval1 (bool evaluate)
909 VALUE *l;
910 VALUE *r;
912 #ifdef EVAL_TRACE
913 trace ("eval1");
914 #endif
915 l = eval2 (evaluate);
916 while (1)
918 if (nextarg ("&"))
920 r = eval2 (evaluate && !null (l));
921 if (null (l) || null (r))
923 freev (l);
924 freev (r);
925 l = int_value (0);
927 else
928 freev (r);
930 else
931 return l;
935 /* Handle |. */
937 static VALUE *
938 eval (bool evaluate)
940 VALUE *l;
941 VALUE *r;
943 #ifdef EVAL_TRACE
944 trace ("eval");
945 #endif
946 l = eval1 (evaluate);
947 while (1)
949 if (nextarg ("|"))
951 r = eval1 (evaluate && null (l));
952 if (null (l))
954 freev (l);
955 l = r;
956 if (null (l))
958 freev (l);
959 l = int_value (0);
962 else
963 freev (r);
965 else
966 return l;