1 /* expr -- evaluate expressions.
2 Copyright (C) 1986-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Author: Mike Parker.
18 Modified for arbitrary-precision calculation by James Youngman.
20 This program evaluates expressions. Each token (operator, operand,
21 parenthesis) of the expression must be a separate argument. The
22 parser used is a reasonably general one, though any incarnation of
23 it is language-specific. It is especially nice for expressions.
25 No parse tree is needed; a new node is evaluated immediately.
26 One function can handle multiple operators all of equal precedence,
27 provided they all associate ((x op x) op x).
29 Define EVAL_TRACE to print an evaluation trace. */
33 #include <sys/types.h>
38 #include "long-options.h"
40 #include "strnumcmp.h"
43 /* Various parts of this code assume size_t fits into unsigned long
44 int, the widest unsigned type that GMP supports. */
45 static_assert (SIZE_MAX
<= ULONG_MAX
);
47 /* The official name of this program (e.g., no 'g' prefix). */
48 #define PROGRAM_NAME "expr"
51 proper_name ("Mike Parker"), \
52 proper_name ("James Youngman"), \
53 proper_name ("Paul Eggert")
58 /* Invalid expression: e.g., its form does not conform to the
59 grammar for expressions. Our grammar is an extension of the
63 /* An internal error occurred, e.g., arithmetic overflow, storage
68 /* The kinds of value we can have. */
74 typedef enum valtype TYPE
;
79 TYPE type
; /* Which kind. */
81 { /* The value itself. */
86 typedef struct valinfo VALUE
;
88 /* The arguments given to the program, minus the program name. */
91 static VALUE
*eval (bool);
92 static bool nomoreargs (void);
93 static bool null (VALUE
*v
);
94 static void printv (VALUE
*v
);
98 Find the first occurrence in the character string STRING of any character
99 in the character string ACCEPT.
101 Copied from gnulib's mbscspn, with two differences:
102 1. Returns 1-based position of first found character, or zero if not found.
103 2. Returned value is the logical character index, NOT byte offset.
106 mbs_logical_cspn ('hello','a') => 0
107 mbs_logical_cspn ('hello','h') => 1
108 mbs_logical_cspn ('hello','oe') => 1
109 mbs_logical_cspn ('hello','lo') => 3
111 In UTF-8 \xCE\xB1 is a single character (greek alpha):
112 mbs_logical_cspn ('\xCE\xB1bc','\xCE\xB1') => 1
113 mbs_logical_cspn ('\xCE\xB1bc','c') => 3 */
115 mbs_logical_cspn (char const *s
, char const *accept
)
119 if (accept
[0] == '\0')
125 mbui_iterator_t iter
;
127 for (mbui_init (iter
, s
); mbui_avail (iter
); mbui_advance (iter
))
130 if (mb_len (mbui_cur (iter
)) == 1)
132 if (mbschr (accept
, *mbui_cur_ptr (iter
)))
137 mbui_iterator_t aiter
;
139 for (mbui_init (aiter
, accept
);
141 mbui_advance (aiter
))
142 if (mb_equal (mbui_cur (aiter
), mbui_cur (iter
)))
152 /* single-byte locale,
153 convert returned byte offset to 1-based index or zero if not found. */
154 size_t i
= strcspn (s
, accept
);
155 return (s
[i
] ? i
+ 1 : 0);
159 /* Extract the substring of S, from logical character
160 position POS and LEN characters.
161 first character position is 1.
162 POS and LEN refer to logical characters, not octets.
164 Upon exit, sets v->s to the new string.
165 The new string might be empty if POS/LEN are invalid. */
167 mbs_logical_substr (char const *s
, size_t pos
, size_t len
)
171 size_t blen
= strlen (s
); /* byte length */
172 size_t llen
= (MB_CUR_MAX
> 1) ? mbslen (s
) : blen
; /* logical length */
174 if (llen
< pos
|| pos
== 0 || len
== 0 || len
== SIZE_MAX
)
177 /* characters to copy */
178 size_t vlen
= MIN (len
, llen
- pos
+ 1);
182 /* Single-byte case */
183 v
= xmalloc (vlen
+ 1);
184 vlim
= mempcpy (v
, s
+ pos
- 1, vlen
);
190 /* FIXME: this is wasteful. Some memory can be saved by counting
191 how many bytes the matching characters occupy. */
192 vlim
= v
= xmalloc (blen
+ 1);
194 mbui_iterator_t iter
;
196 for (mbui_init (iter
, s
);
197 mbui_avail (iter
) && vlen
> 0;
198 mbui_advance (iter
), ++idx
)
200 /* Skip until we reach the starting position */
204 /* Copy one character */
206 vlim
= mempcpy (vlim
, mbui_cur_ptr (iter
), mb_len (mbui_cur (iter
)));
213 /* Return the number of logical characters (possibly multibyte)
214 that are in string S in the first OFS octets.
217 "\xE2\x9D\xA7" is "U+2767 ROTATED FLORAL HEART BULLET".
218 In the string below, there are only two characters
219 up to the first 4 bytes (The U+2767 which occupies 3 bytes and 'x'):
220 mbs_count_to_offset ("\xE2\x9D\xA7xyz", 4) => 2 */
222 mbs_offset_to_chars (char const *s
, size_t ofs
)
224 mbui_iterator_t iter
;
226 for (mbui_init (iter
, s
); mbui_avail (iter
); mbui_advance (iter
))
228 ptrdiff_t d
= mbui_cur_ptr (iter
) - s
;
241 if (status
!= EXIT_SUCCESS
)
246 Usage: %s EXPRESSION\n\
249 program_name
, program_name
);
251 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
252 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
255 Print the value of EXPRESSION to standard output. A blank line below\n\
256 separates increasing precedence groups. EXPRESSION may be:\n\
258 ARG1 | ARG2 ARG1 if it is neither null nor 0, otherwise ARG2\n\
260 ARG1 & ARG2 ARG1 if neither argument is null or 0, otherwise 0\n\
264 ARG1 < ARG2 ARG1 is less than ARG2\n\
265 ARG1 <= ARG2 ARG1 is less than or equal to ARG2\n\
266 ARG1 = ARG2 ARG1 is equal to ARG2\n\
267 ARG1 != ARG2 ARG1 is unequal to ARG2\n\
268 ARG1 >= ARG2 ARG1 is greater than or equal to ARG2\n\
269 ARG1 > ARG2 ARG1 is greater than ARG2\n\
273 ARG1 + ARG2 arithmetic sum of ARG1 and ARG2\n\
274 ARG1 - ARG2 arithmetic difference of ARG1 and ARG2\n\
276 /* Tell xgettext that the "% A" below is not a printf-style
277 format string: xgettext:no-c-format */
280 ARG1 * ARG2 arithmetic product of ARG1 and ARG2\n\
281 ARG1 / ARG2 arithmetic quotient of ARG1 divided by ARG2\n\
282 ARG1 % ARG2 arithmetic remainder of ARG1 divided by ARG2\n\
286 STRING : REGEXP anchored pattern match of REGEXP in STRING\n\
288 match STRING REGEXP same as STRING : REGEXP\n\
289 substr STRING POS LENGTH substring of STRING, POS counted from 1\n\
290 index STRING CHARS index in STRING where any CHARS is found, or 0\n\
291 length STRING length of STRING\n\
294 + TOKEN interpret TOKEN as a string, even if it is a\n\
295 keyword like 'match' or an operator like '/'\n\
297 ( EXPRESSION ) value of EXPRESSION\n\
301 Beware that many operators need to be escaped or quoted for shells.\n\
302 Comparisons are arithmetic if both ARGs are numbers, else lexicographical.\n\
303 Pattern matches return the string matched between \\( and \\) or null; if\n\
304 \\( and \\) are not used, they return the number of characters matched or 0.\n\
308 Exit status is 0 if EXPRESSION is neither null nor 0, 1 if EXPRESSION is null\n\
309 or 0, 2 if EXPRESSION is syntactically invalid, and 3 if an error occurred.\n\
311 emit_ancillary_info (PROGRAM_NAME
);
318 main (int argc
, char **argv
)
322 initialize_main (&argc
, &argv
);
323 set_program_name (argv
[0]);
324 setlocale (LC_ALL
, "");
325 bindtextdomain (PACKAGE
, LOCALEDIR
);
326 textdomain (PACKAGE
);
328 initialize_exit_failure (EXPR_FAILURE
);
329 atexit (close_stdout
);
331 parse_long_options (argc
, argv
, PROGRAM_NAME
, PACKAGE_NAME
, VERSION
,
332 usage
, AUTHORS
, (char const *) nullptr);
334 /* The above handles --help and --version.
335 Since there is no other invocation of getopt, handle '--' here. */
336 if (1 < argc
&& STREQ (argv
[1], "--"))
344 error (0, 0, _("missing operand"));
345 usage (EXPR_INVALID
);
352 error (EXPR_INVALID
, 0, _("syntax error: unexpected argument %s"),
353 quotearg_n_style (0, locale_quoting_style
, *args
));
357 main_exit (null (v
));
360 /* Return a VALUE for I. */
363 int_value (unsigned long int i
)
365 VALUE
*v
= xmalloc (sizeof *v
);
367 mpz_init_set_ui (v
->u
.i
, i
);
371 /* Return a VALUE for S. */
374 str_value (char const *s
)
376 VALUE
*v
= xmalloc (sizeof *v
);
378 v
->u
.s
= xstrdup (s
);
382 /* Free VALUE V, including structure components. */
387 if (v
->type
== string
)
402 mpz_out_str (stdout
, 10, v
->u
.i
);
413 /* Return true if V is a null-string or zero-number. */
422 return mpz_sgn (v
->u
.i
) == 0;
425 char const *cp
= v
->u
.s
;
445 /* Return true if CP takes the form of an integer. */
449 looks_like_integer (char const *cp
)
461 /* Coerce V to a string value (can't fail). */
470 char *s
= mpz_get_str (nullptr, 10, v
->u
.i
);
483 /* Coerce V to an integer value. Return true on success, false on failure. */
496 if (! looks_like_integer (s
))
498 if (mpz_init_set_str (v
->u
.i
, s
, 10) != 0)
499 error (EXPR_FAILURE
, ERANGE
, "%s", (s
));
509 /* Extract a size_t value from an integer value I.
510 If the value is negative, return SIZE_MAX.
511 If the value is too large, return SIZE_MAX - 1. */
517 if (mpz_fits_ulong_p (i
))
519 unsigned long int ul
= mpz_get_ui (i
);
526 /* Return true and advance if the next token matches STR exactly.
527 STR must not be null. */
530 nextarg (char const *str
)
532 if (*args
== nullptr)
536 bool r
= STREQ (*args
, str
);
542 /* Return true if there no more tokens. */
550 /* Report missing operand.
551 There is an implicit assumption that there was a previous argument,
552 and (args-1) is valid. */
554 require_more_args (void)
557 error (EXPR_INVALID
, 0, _("syntax error: missing argument after %s"),
558 quotearg_n_style (0, locale_quoting_style
, *(args
- 1)));
563 /* Print evaluation trace and args remaining. */
572 for (a
= args
; *a
; a
++)
578 /* Do the : operator.
579 SV is the VALUE for the lhs (the string),
580 PV is the VALUE for the rhs (the pattern). */
583 docolon (VALUE
*sv
, VALUE
*pv
)
587 struct re_pattern_buffer re_buffer
;
588 char fastmap
[UCHAR_MAX
+ 1];
589 struct re_registers re_regs
;
595 re_regs
.num_regs
= 0;
596 re_regs
.start
= nullptr;
597 re_regs
.end
= nullptr;
599 re_buffer
.buffer
= nullptr;
600 re_buffer
.allocated
= 0;
601 re_buffer
.fastmap
= fastmap
;
602 re_buffer
.translate
= nullptr;
604 RE_SYNTAX_POSIX_BASIC
& ~RE_CONTEXT_INVALID_DUP
& ~RE_NO_EMPTY_RANGES
;
605 errmsg
= re_compile_pattern (pv
->u
.s
, strlen (pv
->u
.s
), &re_buffer
);
607 error (EXPR_INVALID
, 0, "%s", (errmsg
));
608 re_buffer
.newline_anchor
= 0;
610 matchlen
= re_match (&re_buffer
, sv
->u
.s
, strlen (sv
->u
.s
), 0, &re_regs
);
613 /* Were \(...\) used? */
614 if (re_buffer
.re_nsub
> 0)
616 if (re_regs
.end
[1] < 0)
620 sv
->u
.s
[re_regs
.end
[1]] = '\0';
621 v
= str_value (sv
->u
.s
+ re_regs
.start
[1]);
626 /* In multibyte locales, convert the matched offset (=number of bytes)
627 to the number of matched characters. */
628 size_t i
= (MB_CUR_MAX
== 1
630 : mbs_offset_to_chars (sv
->u
.s
, matchlen
));
634 else if (matchlen
== -1)
636 /* Match failed -- return the right kind of null. */
637 if (re_buffer
.re_nsub
> 0)
644 matchlen
== -2 ? errno
: EOVERFLOW
,
645 _("error in regular expression matcher"));
647 if (0 < re_regs
.num_regs
)
649 free (re_regs
.start
);
652 re_buffer
.fastmap
= nullptr;
653 regfree (&re_buffer
);
657 /* Handle bare operands and ( expr ) syntax. */
660 eval7 (bool evaluate
)
667 require_more_args ();
673 error (EXPR_INVALID
, 0, _("syntax error: expecting ')' after %s"),
674 quotearg_n_style (0, locale_quoting_style
, *(args
- 1)));
676 error (EXPR_INVALID
, 0, _("syntax error: expecting ')' instead of %s"),
677 quotearg_n_style (0, locale_quoting_style
, *args
));
682 error (EXPR_INVALID
, 0, _("syntax error: unexpected ')'"));
684 return str_value (*args
++);
687 /* Handle match, substr, index, and length keywords, and quoting "+". */
690 eval6 (bool evaluate
)
703 require_more_args ();
704 return str_value (*args
++);
706 else if (nextarg ("length"))
708 r
= eval6 (evaluate
);
710 v
= int_value (mbslen (r
->u
.s
));
714 else if (nextarg ("match"))
716 l
= eval6 (evaluate
);
717 r
= eval6 (evaluate
);
728 else if (nextarg ("index"))
732 l
= eval6 (evaluate
);
733 r
= eval6 (evaluate
);
736 pos
= mbs_logical_cspn (l
->u
.s
, r
->u
.s
);
742 else if (nextarg ("substr"))
744 l
= eval6 (evaluate
);
745 i1
= eval6 (evaluate
);
746 i2
= eval6 (evaluate
);
749 if (!toarith (i1
) || !toarith (i2
))
753 size_t pos
= getsize (i1
->u
.i
);
754 size_t len
= getsize (i2
->u
.i
);
756 char *s
= mbs_logical_substr (l
->u
.s
, pos
, len
);
766 return eval7 (evaluate
);
769 /* Handle : operator (pattern matching).
770 Calls docolon to do the real work. */
773 eval5 (bool evaluate
)
782 l
= eval6 (evaluate
);
787 r
= eval6 (evaluate
);
801 /* Handle *, /, % operators. */
804 eval4 (bool evaluate
)
808 enum { multiply
, divide
, mod
} fxn
;
813 l
= eval5 (evaluate
);
818 else if (nextarg ("/"))
820 else if (nextarg ("%"))
824 r
= eval5 (evaluate
);
827 if (!toarith (l
) || !toarith (r
))
828 error (EXPR_INVALID
, 0, _("non-integer argument"));
829 if (fxn
!= multiply
&& mpz_sgn (r
->u
.i
) == 0)
830 error (EXPR_INVALID
, 0, _("division by zero"));
831 ((fxn
== multiply
? mpz_mul
832 : fxn
== divide
? mpz_tdiv_q
834 (l
->u
.i
, l
->u
.i
, r
->u
.i
));
840 /* Handle +, - operators. */
843 eval3 (bool evaluate
)
847 enum { plus
, minus
} fxn
;
852 l
= eval4 (evaluate
);
857 else if (nextarg ("-"))
861 r
= eval4 (evaluate
);
864 if (!toarith (l
) || !toarith (r
))
865 error (EXPR_INVALID
, 0, _("non-integer argument"));
866 (fxn
== plus
? mpz_add
: mpz_sub
) (l
->u
.i
, l
->u
.i
, r
->u
.i
);
872 /* Handle comparisons. */
875 eval2 (bool evaluate
)
882 l
= eval3 (evaluate
);
888 less_than
, less_equal
, equal
, not_equal
, greater_equal
, greater_than
894 else if (nextarg ("<="))
896 else if (nextarg ("=") || nextarg ("=="))
898 else if (nextarg ("!="))
900 else if (nextarg (">="))
902 else if (nextarg (">"))
906 r
= eval3 (evaluate
);
914 if (looks_like_integer (l
->u
.s
) && looks_like_integer (r
->u
.s
))
915 cmp
= strintcmp (l
->u
.s
, r
->u
.s
);
919 cmp
= strcoll (l
->u
.s
, r
->u
.s
);
923 error (0, errno
, _("string comparison failed"));
924 error (0, 0, _("set LC_ALL='C' to work around the problem"));
925 error (EXPR_INVALID
, 0,
926 _("the strings compared were %s and %s"),
927 quotearg_n_style (0, locale_quoting_style
, l
->u
.s
),
928 quotearg_n_style (1, locale_quoting_style
, r
->u
.s
));
934 case less_than
: val
= (cmp
< 0); break;
935 case less_equal
: val
= (cmp
<= 0); break;
936 case equal
: val
= (cmp
== 0); break;
937 case not_equal
: val
= (cmp
!= 0); break;
938 case greater_equal
: val
= (cmp
>= 0); break;
939 case greater_than
: val
= (cmp
> 0); break;
940 default: unreachable ();
953 eval1 (bool evaluate
)
961 l
= eval2 (evaluate
);
966 r
= eval2 (evaluate
&& !null (l
));
967 if (null (l
) || null (r
))
992 l
= eval1 (evaluate
);
997 r
= eval1 (evaluate
&& null (l
));