1 /* expr -- evaluate expressions.
2 Copyright (C) 1986-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Author: Mike Parker.
18 Modified for arbitrary-precision calculation by James Youngman.
20 This program evaluates expressions. Each token (operator, operand,
21 parenthesis) of the expression must be a separate argument. The
22 parser used is a reasonably general one, though any incarnation of
23 it is language-specific. It is especially nice for expressions.
25 No parse tree is needed; a new node is evaluated immediately.
26 One function can handle multiple operators all of equal precedence,
27 provided they all associate ((x op x) op x).
29 Define EVAL_TRACE to print an evaluation trace. */
33 #include <sys/types.h>
40 #include "long-options.h"
42 #include "strnumcmp.h"
45 /* Various parts of this code assume size_t fits into unsigned long
46 int, the widest unsigned type that GMP supports. */
47 static_assert (SIZE_MAX
<= ULONG_MAX
);
49 /* The official name of this program (e.g., no 'g' prefix). */
50 #define PROGRAM_NAME "expr"
53 proper_name ("Mike Parker"), \
54 proper_name ("James Youngman"), \
55 proper_name ("Paul Eggert")
60 /* Invalid expression: e.g., its form does not conform to the
61 grammar for expressions. Our grammar is an extension of the
65 /* An internal error occurred, e.g., arithmetic overflow, storage
70 /* The kinds of value we can have. */
76 typedef enum valtype TYPE
;
81 TYPE type
; /* Which kind. */
83 { /* The value itself. */
88 typedef struct valinfo VALUE
;
90 /* The arguments given to the program, minus the program name. */
93 static VALUE
*eval (bool);
94 static bool nomoreargs (void);
95 static bool null (VALUE
*v
);
96 static void printv (VALUE
*v
);
100 Find the first occurrence in the character string STRING of any character
101 in the character string ACCEPT.
103 Copied from gnulib's mbscspn, with two differences:
104 1. Returns 1-based position of first found character, or zero if not found.
105 2. Returned value is the logical character index, NOT byte offset.
108 mbs_logical_cspn ('hello','a') => 0
109 mbs_logical_cspn ('hello','h') => 1
110 mbs_logical_cspn ('hello','oe') => 1
111 mbs_logical_cspn ('hello','lo') => 3
113 In UTF-8 \xCE\xB1 is a single character (greek alpha):
114 mbs_logical_cspn ('\xCE\xB1bc','\xCE\xB1') => 1
115 mbs_logical_cspn ('\xCE\xB1bc','c') => 3 */
117 mbs_logical_cspn (char const *s
, char const *accept
)
121 if (accept
[0] == '\0')
127 mbui_iterator_t iter
;
129 for (mbui_init (iter
, s
); mbui_avail (iter
); mbui_advance (iter
))
132 if (mb_len (mbui_cur (iter
)) == 1)
134 if (mbschr (accept
, *mbui_cur_ptr (iter
)))
139 mbui_iterator_t aiter
;
141 for (mbui_init (aiter
, accept
);
143 mbui_advance (aiter
))
144 if (mb_equal (mbui_cur (aiter
), mbui_cur (iter
)))
154 /* single-byte locale,
155 convert returned byte offset to 1-based index or zero if not found. */
156 size_t i
= strcspn (s
, accept
);
157 return (s
[i
] ? i
+ 1 : 0);
161 /* Extract the substring of S, from logical character
162 position POS and LEN characters.
163 first character position is 1.
164 POS and LEN refer to logical characters, not octets.
166 Upon exit, sets v->s to the new string.
167 The new string might be empty if POS/LEN are invalid. */
169 mbs_logical_substr (char const *s
, size_t pos
, size_t len
)
173 size_t blen
= strlen (s
); /* byte length */
174 size_t llen
= (MB_CUR_MAX
> 1) ? mbslen (s
) : blen
; /* logical length */
176 if (llen
< pos
|| pos
== 0 || len
== 0 || len
== SIZE_MAX
)
179 /* characters to copy */
180 size_t vlen
= MIN (len
, llen
- pos
+ 1);
184 /* Single-byte case */
185 v
= xmalloc (vlen
+ 1);
186 vlim
= mempcpy (v
, s
+ pos
- 1, vlen
);
192 /* FIXME: this is wasteful. Some memory can be saved by counting
193 how many bytes the matching characters occupy. */
194 vlim
= v
= xmalloc (blen
+ 1);
196 mbui_iterator_t iter
;
198 for (mbui_init (iter
, s
);
199 mbui_avail (iter
) && vlen
> 0;
200 mbui_advance (iter
), ++idx
)
202 /* Skip until we reach the starting position */
206 /* Copy one character */
208 vlim
= mempcpy (vlim
, mbui_cur_ptr (iter
), mb_len (mbui_cur (iter
)));
215 /* Return the number of logical characters (possibly multibyte)
216 that are in string S in the first OFS octets.
219 "\xE2\x9D\xA7" is "U+2767 ROTATED FLORAL HEART BULLET".
220 In the string below, there are only two characters
221 up to the first 4 bytes (The U+2767 which occupies 3 bytes and 'x'):
222 mbs_count_to_offset ("\xE2\x9D\xA7xyz", 4) => 2 */
224 mbs_offset_to_chars (char const *s
, size_t ofs
)
226 mbui_iterator_t iter
;
228 for (mbui_init (iter
, s
); mbui_avail (iter
); mbui_advance (iter
))
230 ptrdiff_t d
= mbui_cur_ptr (iter
) - s
;
243 if (status
!= EXIT_SUCCESS
)
248 Usage: %s EXPRESSION\n\
251 program_name
, program_name
);
253 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
254 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
257 Print the value of EXPRESSION to standard output. A blank line below\n\
258 separates increasing precedence groups. EXPRESSION may be:\n\
260 ARG1 | ARG2 ARG1 if it is neither null nor 0, otherwise ARG2\n\
262 ARG1 & ARG2 ARG1 if neither argument is null or 0, otherwise 0\n\
266 ARG1 < ARG2 ARG1 is less than ARG2\n\
267 ARG1 <= ARG2 ARG1 is less than or equal to ARG2\n\
268 ARG1 = ARG2 ARG1 is equal to ARG2\n\
269 ARG1 != ARG2 ARG1 is unequal to ARG2\n\
270 ARG1 >= ARG2 ARG1 is greater than or equal to ARG2\n\
271 ARG1 > ARG2 ARG1 is greater than ARG2\n\
275 ARG1 + ARG2 arithmetic sum of ARG1 and ARG2\n\
276 ARG1 - ARG2 arithmetic difference of ARG1 and ARG2\n\
278 /* Tell xgettext that the "% A" below is not a printf-style
279 format string: xgettext:no-c-format */
282 ARG1 * ARG2 arithmetic product of ARG1 and ARG2\n\
283 ARG1 / ARG2 arithmetic quotient of ARG1 divided by ARG2\n\
284 ARG1 % ARG2 arithmetic remainder of ARG1 divided by ARG2\n\
288 STRING : REGEXP anchored pattern match of REGEXP in STRING\n\
290 match STRING REGEXP same as STRING : REGEXP\n\
291 substr STRING POS LENGTH substring of STRING, POS counted from 1\n\
292 index STRING CHARS index in STRING where any CHARS is found, or 0\n\
293 length STRING length of STRING\n\
296 + TOKEN interpret TOKEN as a string, even if it is a\n\
297 keyword like 'match' or an operator like '/'\n\
299 ( EXPRESSION ) value of EXPRESSION\n\
303 Beware that many operators need to be escaped or quoted for shells.\n\
304 Comparisons are arithmetic if both ARGs are numbers, else lexicographical.\n\
305 Pattern matches return the string matched between \\( and \\) or null; if\n\
306 \\( and \\) are not used, they return the number of characters matched or 0.\n\
310 Exit status is 0 if EXPRESSION is neither null nor 0, 1 if EXPRESSION is null\n\
311 or 0, 2 if EXPRESSION is syntactically invalid, and 3 if an error occurred.\n\
313 emit_ancillary_info (PROGRAM_NAME
);
320 main (int argc
, char **argv
)
324 initialize_main (&argc
, &argv
);
325 set_program_name (argv
[0]);
326 setlocale (LC_ALL
, "");
327 bindtextdomain (PACKAGE
, LOCALEDIR
);
328 textdomain (PACKAGE
);
330 initialize_exit_failure (EXPR_FAILURE
);
331 atexit (close_stdout
);
333 parse_long_options (argc
, argv
, PROGRAM_NAME
, PACKAGE_NAME
, VERSION
,
334 usage
, AUTHORS
, (char const *) NULL
);
336 /* The above handles --help and --version.
337 Since there is no other invocation of getopt, handle '--' here. */
338 unsigned int u_argc
= argc
;
339 if (1 < u_argc
&& STREQ (argv
[1], "--"))
347 error (0, 0, _("missing operand"));
348 usage (EXPR_INVALID
);
355 die (EXPR_INVALID
, 0, _("syntax error: unexpected argument %s"),
356 quotearg_n_style (0, locale_quoting_style
, *args
));
360 main_exit (null (v
));
363 /* Return a VALUE for I. */
366 int_value (unsigned long int i
)
368 VALUE
*v
= xmalloc (sizeof *v
);
370 mpz_init_set_ui (v
->u
.i
, i
);
374 /* Return a VALUE for S. */
377 str_value (char const *s
)
379 VALUE
*v
= xmalloc (sizeof *v
);
381 v
->u
.s
= xstrdup (s
);
385 /* Free VALUE V, including structure components. */
390 if (v
->type
== string
)
405 mpz_out_str (stdout
, 10, v
->u
.i
);
416 /* Return true if V is a null-string or zero-number. */
425 return mpz_sgn (v
->u
.i
) == 0;
428 char const *cp
= v
->u
.s
;
448 /* Return true if CP takes the form of an integer. */
452 looks_like_integer (char const *cp
)
464 /* Coerce V to a string value (can't fail). */
473 char *s
= mpz_get_str (NULL
, 10, v
->u
.i
);
486 /* Coerce V to an integer value. Return true on success, false on failure. */
499 if (! looks_like_integer (s
))
501 if (mpz_init_set_str (v
->u
.i
, s
, 10) != 0)
502 die (EXPR_FAILURE
, ERANGE
, "%s", (s
));
512 /* Extract a size_t value from an integer value I.
513 If the value is negative, return SIZE_MAX.
514 If the value is too large, return SIZE_MAX - 1. */
520 if (mpz_fits_ulong_p (i
))
522 unsigned long int ul
= mpz_get_ui (i
);
529 /* Return true and advance if the next token matches STR exactly.
530 STR must not be NULL. */
533 nextarg (char const *str
)
539 bool r
= STREQ (*args
, str
);
545 /* Return true if there no more tokens. */
553 /* Report missing operand.
554 There is an implicit assumption that there was a previous argument,
555 and (args-1) is valid. */
557 require_more_args (void)
560 die (EXPR_INVALID
, 0, _("syntax error: missing argument after %s"),
561 quotearg_n_style (0, locale_quoting_style
, *(args
- 1)));
566 /* Print evaluation trace and args remaining. */
575 for (a
= args
; *a
; a
++)
581 /* Do the : operator.
582 SV is the VALUE for the lhs (the string),
583 PV is the VALUE for the rhs (the pattern). */
586 docolon (VALUE
*sv
, VALUE
*pv
)
590 struct re_pattern_buffer re_buffer
;
591 char fastmap
[UCHAR_MAX
+ 1];
592 struct re_registers re_regs
;
598 re_regs
.num_regs
= 0;
599 re_regs
.start
= NULL
;
602 re_buffer
.buffer
= NULL
;
603 re_buffer
.allocated
= 0;
604 re_buffer
.fastmap
= fastmap
;
605 re_buffer
.translate
= NULL
;
607 RE_SYNTAX_POSIX_BASIC
& ~RE_CONTEXT_INVALID_DUP
& ~RE_NO_EMPTY_RANGES
;
608 errmsg
= re_compile_pattern (pv
->u
.s
, strlen (pv
->u
.s
), &re_buffer
);
610 die (EXPR_INVALID
, 0, "%s", (errmsg
));
611 re_buffer
.newline_anchor
= 0;
613 matchlen
= re_match (&re_buffer
, sv
->u
.s
, strlen (sv
->u
.s
), 0, &re_regs
);
616 /* Were \(...\) used? */
617 if (re_buffer
.re_nsub
> 0)
619 if (re_regs
.end
[1] < 0)
623 sv
->u
.s
[re_regs
.end
[1]] = '\0';
624 v
= str_value (sv
->u
.s
+ re_regs
.start
[1]);
629 /* In multibyte locales, convert the matched offset (=number of bytes)
630 to the number of matched characters. */
631 size_t i
= (MB_CUR_MAX
== 1
633 : mbs_offset_to_chars (sv
->u
.s
, matchlen
));
637 else if (matchlen
== -1)
639 /* Match failed -- return the right kind of null. */
640 if (re_buffer
.re_nsub
> 0)
647 (matchlen
== -2 ? errno
: EOVERFLOW
),
648 _("error in regular expression matcher"));
650 if (0 < re_regs
.num_regs
)
652 free (re_regs
.start
);
655 re_buffer
.fastmap
= NULL
;
656 regfree (&re_buffer
);
660 /* Handle bare operands and ( expr ) syntax. */
663 eval7 (bool evaluate
)
670 require_more_args ();
676 die (EXPR_INVALID
, 0, _("syntax error: expecting ')' after %s"),
677 quotearg_n_style (0, locale_quoting_style
, *(args
- 1)));
679 die (EXPR_INVALID
, 0, _("syntax error: expecting ')' instead of %s"),
680 quotearg_n_style (0, locale_quoting_style
, *args
));
685 die (EXPR_INVALID
, 0, _("syntax error: unexpected ')'"));
687 return str_value (*args
++);
690 /* Handle match, substr, index, and length keywords, and quoting "+". */
693 eval6 (bool evaluate
)
706 require_more_args ();
707 return str_value (*args
++);
709 else if (nextarg ("length"))
711 r
= eval6 (evaluate
);
713 v
= int_value (mbslen (r
->u
.s
));
717 else if (nextarg ("match"))
719 l
= eval6 (evaluate
);
720 r
= eval6 (evaluate
);
731 else if (nextarg ("index"))
735 l
= eval6 (evaluate
);
736 r
= eval6 (evaluate
);
739 pos
= mbs_logical_cspn (l
->u
.s
, r
->u
.s
);
745 else if (nextarg ("substr"))
747 l
= eval6 (evaluate
);
748 i1
= eval6 (evaluate
);
749 i2
= eval6 (evaluate
);
752 if (!toarith (i1
) || !toarith (i2
))
756 size_t pos
= getsize (i1
->u
.i
);
757 size_t len
= getsize (i2
->u
.i
);
759 char *s
= mbs_logical_substr (l
->u
.s
, pos
, len
);
769 return eval7 (evaluate
);
772 /* Handle : operator (pattern matching).
773 Calls docolon to do the real work. */
776 eval5 (bool evaluate
)
785 l
= eval6 (evaluate
);
790 r
= eval6 (evaluate
);
804 /* Handle *, /, % operators. */
807 eval4 (bool evaluate
)
811 enum { multiply
, divide
, mod
} fxn
;
816 l
= eval5 (evaluate
);
821 else if (nextarg ("/"))
823 else if (nextarg ("%"))
827 r
= eval5 (evaluate
);
830 if (!toarith (l
) || !toarith (r
))
831 die (EXPR_INVALID
, 0, _("non-integer argument"));
832 if (fxn
!= multiply
&& mpz_sgn (r
->u
.i
) == 0)
833 die (EXPR_INVALID
, 0, _("division by zero"));
834 ((fxn
== multiply
? mpz_mul
835 : fxn
== divide
? mpz_tdiv_q
837 (l
->u
.i
, l
->u
.i
, r
->u
.i
));
843 /* Handle +, - operators. */
846 eval3 (bool evaluate
)
850 enum { plus
, minus
} fxn
;
855 l
= eval4 (evaluate
);
860 else if (nextarg ("-"))
864 r
= eval4 (evaluate
);
867 if (!toarith (l
) || !toarith (r
))
868 die (EXPR_INVALID
, 0, _("non-integer argument"));
869 (fxn
== plus
? mpz_add
: mpz_sub
) (l
->u
.i
, l
->u
.i
, r
->u
.i
);
875 /* Handle comparisons. */
878 eval2 (bool evaluate
)
885 l
= eval3 (evaluate
);
891 less_than
, less_equal
, equal
, not_equal
, greater_equal
, greater_than
897 else if (nextarg ("<="))
899 else if (nextarg ("=") || nextarg ("=="))
901 else if (nextarg ("!="))
903 else if (nextarg (">="))
905 else if (nextarg (">"))
909 r
= eval3 (evaluate
);
917 if (looks_like_integer (l
->u
.s
) && looks_like_integer (r
->u
.s
))
918 cmp
= strintcmp (l
->u
.s
, r
->u
.s
);
922 cmp
= strcoll (l
->u
.s
, r
->u
.s
);
926 error (0, errno
, _("string comparison failed"));
927 error (0, 0, _("set LC_ALL='C' to work around the problem"));
928 die (EXPR_INVALID
, 0,
929 _("the strings compared were %s and %s"),
930 quotearg_n_style (0, locale_quoting_style
, l
->u
.s
),
931 quotearg_n_style (1, locale_quoting_style
, r
->u
.s
));
937 case less_than
: val
= (cmp
< 0); break;
938 case less_equal
: val
= (cmp
<= 0); break;
939 case equal
: val
= (cmp
== 0); break;
940 case not_equal
: val
= (cmp
!= 0); break;
941 case greater_equal
: val
= (cmp
>= 0); break;
942 case greater_than
: val
= (cmp
> 0); break;
956 eval1 (bool evaluate
)
964 l
= eval2 (evaluate
);
969 r
= eval2 (evaluate
&& !null (l
));
970 if (null (l
) || null (r
))
995 l
= eval1 (evaluate
);
1000 r
= eval1 (evaluate
&& null (l
));