1 /* expr -- evaluate expressions.
2 Copyright (C) 1986-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Author: Mike Parker.
18 Modified for arbitrary-precision calculation by James Youngman.
20 This program evaluates expressions. Each token (operator, operand,
21 parenthesis) of the expression must be a separate argument. The
22 parser used is a reasonably general one, though any incarnation of
23 it is language-specific. It is especially nice for expressions.
25 No parse tree is needed; a new node is evaluated immediately.
26 One function can handle multiple operators all of equal precedence,
27 provided they all associate ((x op x) op x).
29 Define EVAL_TRACE to print an evaluation trace. */
33 #include <sys/types.h>
38 #include "long-options.h"
40 #include "strnumcmp.h"
43 /* Various parts of this code assume size_t fits into unsigned long
44 int, the widest unsigned type that GMP supports. */
45 static_assert (SIZE_MAX
<= ULONG_MAX
);
47 /* The official name of this program (e.g., no 'g' prefix). */
48 #define PROGRAM_NAME "expr"
51 proper_name ("Mike Parker"), \
52 proper_name ("James Youngman"), \
53 proper_name ("Paul Eggert")
58 /* Invalid expression: e.g., its form does not conform to the
59 grammar for expressions. Our grammar is an extension of the
63 /* An internal error occurred, e.g., arithmetic overflow, storage
68 /* The kinds of value we can have. */
74 typedef enum valtype TYPE
;
79 TYPE type
; /* Which kind. */
81 { /* The value itself. */
86 typedef struct valinfo VALUE
;
88 /* The arguments given to the program, minus the program name. */
91 static VALUE
*eval (bool);
92 static bool nomoreargs (void);
93 static bool null (VALUE
*v
);
94 static void printv (VALUE
*v
);
98 Find the first occurrence in the character string STRING of any character
99 in the character string ACCEPT.
101 Copied from gnulib's mbscspn, with two differences:
102 1. Returns 1-based position of first found character, or zero if not found.
103 2. Returned value is the logical character index, NOT byte offset.
106 mbs_logical_cspn ('hello','a') => 0
107 mbs_logical_cspn ('hello','h') => 1
108 mbs_logical_cspn ('hello','oe') => 1
109 mbs_logical_cspn ('hello','lo') => 3
111 In UTF-8 \xCE\xB1 is a single character (greek alpha):
112 mbs_logical_cspn ('\xCE\xB1bc','\xCE\xB1') => 1
113 mbs_logical_cspn ('\xCE\xB1bc','c') => 3 */
115 mbs_logical_cspn (char const *s
, char const *accept
)
119 if (accept
[0] == '\0')
125 mbui_iterator_t iter
;
127 for (mbui_init (iter
, s
); mbui_avail (iter
); mbui_advance (iter
))
130 if (mb_len (mbui_cur (iter
)) == 1)
132 if (mbschr (accept
, *mbui_cur_ptr (iter
)))
137 mbui_iterator_t aiter
;
139 for (mbui_init (aiter
, accept
);
141 mbui_advance (aiter
))
142 if (mb_equal (mbui_cur (aiter
), mbui_cur (iter
)))
152 /* single-byte locale,
153 convert returned byte offset to 1-based index or zero if not found. */
154 size_t i
= strcspn (s
, accept
);
155 return (s
[i
] ? i
+ 1 : 0);
159 /* Extract the substring of S, from logical character
160 position POS and LEN characters.
161 first character position is 1.
162 POS and LEN refer to logical characters, not octets.
164 Upon exit, sets v->s to the new string.
165 The new string might be empty if POS/LEN are invalid. */
167 mbs_logical_substr (char const *s
, size_t pos
, size_t len
)
171 size_t blen
= strlen (s
); /* byte length */
172 size_t llen
= (MB_CUR_MAX
> 1) ? mbslen (s
) : blen
; /* logical length */
174 if (llen
< pos
|| pos
== 0 || len
== 0 || len
== SIZE_MAX
)
177 /* characters to copy */
178 size_t vlen
= MIN (len
, llen
- pos
+ 1);
182 /* Single-byte case */
183 v
= xmalloc (vlen
+ 1);
184 vlim
= mempcpy (v
, s
+ pos
- 1, vlen
);
190 /* FIXME: this is wasteful. Some memory can be saved by counting
191 how many bytes the matching characters occupy. */
192 vlim
= v
= xmalloc (blen
+ 1);
194 mbui_iterator_t iter
;
196 for (mbui_init (iter
, s
);
197 mbui_avail (iter
) && vlen
> 0;
198 mbui_advance (iter
), ++idx
)
200 /* Skip until we reach the starting position */
204 /* Copy one character */
206 vlim
= mempcpy (vlim
, mbui_cur_ptr (iter
), mb_len (mbui_cur (iter
)));
213 /* Return the number of logical characters (possibly multibyte)
214 that are in string S in the first OFS octets.
217 "\xE2\x9D\xA7" is "U+2767 ROTATED FLORAL HEART BULLET".
218 In the string below, there are only two characters
219 up to the first 4 bytes (The U+2767 which occupies 3 bytes and 'x'):
220 mbs_count_to_offset ("\xE2\x9D\xA7xyz", 4) => 2 */
222 mbs_offset_to_chars (char const *s
, size_t ofs
)
224 mbui_iterator_t iter
;
226 for (mbui_init (iter
, s
); mbui_avail (iter
); mbui_advance (iter
))
228 ptrdiff_t d
= mbui_cur_ptr (iter
) - s
;
241 if (status
!= EXIT_SUCCESS
)
246 Usage: %s EXPRESSION\n\
249 program_name
, program_name
);
251 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
252 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
255 Print the value of EXPRESSION to standard output. A blank line below\n\
256 separates increasing precedence groups. EXPRESSION may be:\n\
258 ARG1 | ARG2 ARG1 if it is neither null nor 0, otherwise ARG2\n\
260 ARG1 & ARG2 ARG1 if neither argument is null or 0, otherwise 0\n\
264 ARG1 < ARG2 ARG1 is less than ARG2\n\
265 ARG1 <= ARG2 ARG1 is less than or equal to ARG2\n\
266 ARG1 = ARG2 ARG1 is equal to ARG2\n\
267 ARG1 != ARG2 ARG1 is unequal to ARG2\n\
268 ARG1 >= ARG2 ARG1 is greater than or equal to ARG2\n\
269 ARG1 > ARG2 ARG1 is greater than ARG2\n\
273 ARG1 + ARG2 arithmetic sum of ARG1 and ARG2\n\
274 ARG1 - ARG2 arithmetic difference of ARG1 and ARG2\n\
276 /* Tell xgettext that the "% A" below is not a printf-style
277 format string: xgettext:no-c-format */
280 ARG1 * ARG2 arithmetic product of ARG1 and ARG2\n\
281 ARG1 / ARG2 arithmetic quotient of ARG1 divided by ARG2\n\
282 ARG1 % ARG2 arithmetic remainder of ARG1 divided by ARG2\n\
286 STRING : REGEXP anchored pattern match of REGEXP in STRING\n\
288 match STRING REGEXP same as STRING : REGEXP\n\
289 substr STRING POS LENGTH substring of STRING, POS counted from 1\n\
290 index STRING CHARS index in STRING where any CHARS is found, or 0\n\
291 length STRING length of STRING\n\
294 + TOKEN interpret TOKEN as a string, even if it is a\n\
295 keyword like 'match' or an operator like '/'\n\
297 ( EXPRESSION ) value of EXPRESSION\n\
301 Beware that many operators need to be escaped or quoted for shells.\n\
302 Comparisons are arithmetic if both ARGs are numbers, else lexicographical.\n\
303 Pattern matches return the string matched between \\( and \\) or null; if\n\
304 \\( and \\) are not used, they return the number of characters matched or 0.\n\
308 Exit status is 0 if EXPRESSION is neither null nor 0, 1 if EXPRESSION is null\n\
309 or 0, 2 if EXPRESSION is syntactically invalid, and 3 if an error occurred.\n\
311 emit_ancillary_info (PROGRAM_NAME
);
318 main (int argc
, char **argv
)
322 initialize_main (&argc
, &argv
);
323 set_program_name (argv
[0]);
324 setlocale (LC_ALL
, "");
325 bindtextdomain (PACKAGE
, LOCALEDIR
);
326 textdomain (PACKAGE
);
328 initialize_exit_failure (EXPR_FAILURE
);
329 atexit (close_stdout
);
331 parse_long_options (argc
, argv
, PROGRAM_NAME
, PACKAGE_NAME
, VERSION
,
332 usage
, AUTHORS
, (char const *) nullptr);
334 /* The above handles --help and --version.
335 Since there is no other invocation of getopt, handle '--' here. */
336 unsigned int u_argc
= argc
;
337 if (1 < u_argc
&& STREQ (argv
[1], "--"))
345 error (0, 0, _("missing operand"));
346 usage (EXPR_INVALID
);
353 error (EXPR_INVALID
, 0, _("syntax error: unexpected argument %s"),
354 quotearg_n_style (0, locale_quoting_style
, *args
));
358 main_exit (null (v
));
361 /* Return a VALUE for I. */
364 int_value (unsigned long int i
)
366 VALUE
*v
= xmalloc (sizeof *v
);
368 mpz_init_set_ui (v
->u
.i
, i
);
372 /* Return a VALUE for S. */
375 str_value (char const *s
)
377 VALUE
*v
= xmalloc (sizeof *v
);
379 v
->u
.s
= xstrdup (s
);
383 /* Free VALUE V, including structure components. */
388 if (v
->type
== string
)
403 mpz_out_str (stdout
, 10, v
->u
.i
);
414 /* Return true if V is a null-string or zero-number. */
423 return mpz_sgn (v
->u
.i
) == 0;
426 char const *cp
= v
->u
.s
;
446 /* Return true if CP takes the form of an integer. */
450 looks_like_integer (char const *cp
)
462 /* Coerce V to a string value (can't fail). */
471 char *s
= mpz_get_str (nullptr, 10, v
->u
.i
);
484 /* Coerce V to an integer value. Return true on success, false on failure. */
497 if (! looks_like_integer (s
))
499 if (mpz_init_set_str (v
->u
.i
, s
, 10) != 0)
500 error (EXPR_FAILURE
, ERANGE
, "%s", (s
));
510 /* Extract a size_t value from an integer value I.
511 If the value is negative, return SIZE_MAX.
512 If the value is too large, return SIZE_MAX - 1. */
518 if (mpz_fits_ulong_p (i
))
520 unsigned long int ul
= mpz_get_ui (i
);
527 /* Return true and advance if the next token matches STR exactly.
528 STR must not be null. */
531 nextarg (char const *str
)
533 if (*args
== nullptr)
537 bool r
= STREQ (*args
, str
);
543 /* Return true if there no more tokens. */
551 /* Report missing operand.
552 There is an implicit assumption that there was a previous argument,
553 and (args-1) is valid. */
555 require_more_args (void)
558 error (EXPR_INVALID
, 0, _("syntax error: missing argument after %s"),
559 quotearg_n_style (0, locale_quoting_style
, *(args
- 1)));
564 /* Print evaluation trace and args remaining. */
573 for (a
= args
; *a
; a
++)
579 /* Do the : operator.
580 SV is the VALUE for the lhs (the string),
581 PV is the VALUE for the rhs (the pattern). */
584 docolon (VALUE
*sv
, VALUE
*pv
)
588 struct re_pattern_buffer re_buffer
;
589 char fastmap
[UCHAR_MAX
+ 1];
590 struct re_registers re_regs
;
596 re_regs
.num_regs
= 0;
597 re_regs
.start
= nullptr;
598 re_regs
.end
= nullptr;
600 re_buffer
.buffer
= nullptr;
601 re_buffer
.allocated
= 0;
602 re_buffer
.fastmap
= fastmap
;
603 re_buffer
.translate
= nullptr;
605 RE_SYNTAX_POSIX_BASIC
& ~RE_CONTEXT_INVALID_DUP
& ~RE_NO_EMPTY_RANGES
;
606 errmsg
= re_compile_pattern (pv
->u
.s
, strlen (pv
->u
.s
), &re_buffer
);
608 error (EXPR_INVALID
, 0, "%s", (errmsg
));
609 re_buffer
.newline_anchor
= 0;
611 matchlen
= re_match (&re_buffer
, sv
->u
.s
, strlen (sv
->u
.s
), 0, &re_regs
);
614 /* Were \(...\) used? */
615 if (re_buffer
.re_nsub
> 0)
617 if (re_regs
.end
[1] < 0)
621 sv
->u
.s
[re_regs
.end
[1]] = '\0';
622 v
= str_value (sv
->u
.s
+ re_regs
.start
[1]);
627 /* In multibyte locales, convert the matched offset (=number of bytes)
628 to the number of matched characters. */
629 size_t i
= (MB_CUR_MAX
== 1
631 : mbs_offset_to_chars (sv
->u
.s
, matchlen
));
635 else if (matchlen
== -1)
637 /* Match failed -- return the right kind of null. */
638 if (re_buffer
.re_nsub
> 0)
645 matchlen
== -2 ? errno
: EOVERFLOW
,
646 _("error in regular expression matcher"));
648 if (0 < re_regs
.num_regs
)
650 free (re_regs
.start
);
653 re_buffer
.fastmap
= nullptr;
654 regfree (&re_buffer
);
658 /* Handle bare operands and ( expr ) syntax. */
661 eval7 (bool evaluate
)
668 require_more_args ();
674 error (EXPR_INVALID
, 0, _("syntax error: expecting ')' after %s"),
675 quotearg_n_style (0, locale_quoting_style
, *(args
- 1)));
677 error (EXPR_INVALID
, 0, _("syntax error: expecting ')' instead of %s"),
678 quotearg_n_style (0, locale_quoting_style
, *args
));
683 error (EXPR_INVALID
, 0, _("syntax error: unexpected ')'"));
685 return str_value (*args
++);
688 /* Handle match, substr, index, and length keywords, and quoting "+". */
691 eval6 (bool evaluate
)
704 require_more_args ();
705 return str_value (*args
++);
707 else if (nextarg ("length"))
709 r
= eval6 (evaluate
);
711 v
= int_value (mbslen (r
->u
.s
));
715 else if (nextarg ("match"))
717 l
= eval6 (evaluate
);
718 r
= eval6 (evaluate
);
729 else if (nextarg ("index"))
733 l
= eval6 (evaluate
);
734 r
= eval6 (evaluate
);
737 pos
= mbs_logical_cspn (l
->u
.s
, r
->u
.s
);
743 else if (nextarg ("substr"))
745 l
= eval6 (evaluate
);
746 i1
= eval6 (evaluate
);
747 i2
= eval6 (evaluate
);
750 if (!toarith (i1
) || !toarith (i2
))
754 size_t pos
= getsize (i1
->u
.i
);
755 size_t len
= getsize (i2
->u
.i
);
757 char *s
= mbs_logical_substr (l
->u
.s
, pos
, len
);
767 return eval7 (evaluate
);
770 /* Handle : operator (pattern matching).
771 Calls docolon to do the real work. */
774 eval5 (bool evaluate
)
783 l
= eval6 (evaluate
);
788 r
= eval6 (evaluate
);
802 /* Handle *, /, % operators. */
805 eval4 (bool evaluate
)
809 enum { multiply
, divide
, mod
} fxn
;
814 l
= eval5 (evaluate
);
819 else if (nextarg ("/"))
821 else if (nextarg ("%"))
825 r
= eval5 (evaluate
);
828 if (!toarith (l
) || !toarith (r
))
829 error (EXPR_INVALID
, 0, _("non-integer argument"));
830 if (fxn
!= multiply
&& mpz_sgn (r
->u
.i
) == 0)
831 error (EXPR_INVALID
, 0, _("division by zero"));
832 ((fxn
== multiply
? mpz_mul
833 : fxn
== divide
? mpz_tdiv_q
835 (l
->u
.i
, l
->u
.i
, r
->u
.i
));
841 /* Handle +, - operators. */
844 eval3 (bool evaluate
)
848 enum { plus
, minus
} fxn
;
853 l
= eval4 (evaluate
);
858 else if (nextarg ("-"))
862 r
= eval4 (evaluate
);
865 if (!toarith (l
) || !toarith (r
))
866 error (EXPR_INVALID
, 0, _("non-integer argument"));
867 (fxn
== plus
? mpz_add
: mpz_sub
) (l
->u
.i
, l
->u
.i
, r
->u
.i
);
873 /* Handle comparisons. */
876 eval2 (bool evaluate
)
883 l
= eval3 (evaluate
);
889 less_than
, less_equal
, equal
, not_equal
, greater_equal
, greater_than
895 else if (nextarg ("<="))
897 else if (nextarg ("=") || nextarg ("=="))
899 else if (nextarg ("!="))
901 else if (nextarg (">="))
903 else if (nextarg (">"))
907 r
= eval3 (evaluate
);
915 if (looks_like_integer (l
->u
.s
) && looks_like_integer (r
->u
.s
))
916 cmp
= strintcmp (l
->u
.s
, r
->u
.s
);
920 cmp
= strcoll (l
->u
.s
, r
->u
.s
);
924 error (0, errno
, _("string comparison failed"));
925 error (0, 0, _("set LC_ALL='C' to work around the problem"));
926 error (EXPR_INVALID
, 0,
927 _("the strings compared were %s and %s"),
928 quotearg_n_style (0, locale_quoting_style
, l
->u
.s
),
929 quotearg_n_style (1, locale_quoting_style
, r
->u
.s
));
935 case less_than
: val
= (cmp
< 0); break;
936 case less_equal
: val
= (cmp
<= 0); break;
937 case equal
: val
= (cmp
== 0); break;
938 case not_equal
: val
= (cmp
!= 0); break;
939 case greater_equal
: val
= (cmp
>= 0); break;
940 case greater_than
: val
= (cmp
> 0); break;
941 default: unreachable ();
954 eval1 (bool evaluate
)
962 l
= eval2 (evaluate
);
967 r
= eval2 (evaluate
&& !null (l
));
968 if (null (l
) || null (r
))
993 l
= eval1 (evaluate
);
998 r
= eval1 (evaluate
&& null (l
));