docproc: avoid segfault during file closing
[busybox-git.git] / shell / math.c
blobe90a38f05d393b6e4a566694cc88c050b02d526c
1 /*
2 * Arithmetic code ripped out of ash shell for code sharing.
4 * This code is derived from software contributed to Berkeley by
5 * Kenneth Almquist.
7 * Original BSD copyright notice is retained at the end of this file.
9 * Copyright (c) 1989, 1991, 1993, 1994
10 * The Regents of the University of California. All rights reserved.
12 * Copyright (c) 1997-2005 Herbert Xu <herbert@gondor.apana.org.au>
13 * was re-ported from NetBSD and debianized.
15 * rewrite arith.y to micro stack based cryptic algorithm by
16 * Copyright (c) 2001 Aaron Lehmann <aaronl@vitelus.com>
18 * Modified by Paul Mundt <lethal@linux-sh.org> (c) 2004 to support
19 * dynamic variables.
21 * Modified by Vladimir Oleynik <dzo@simtreas.ru> (c) 2001-2005 to be
22 * used in busybox and size optimizations,
23 * rewrote arith (see notes to this), added locale support,
24 * rewrote dynamic variables.
26 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
28 /* Copyright (c) 2001 Aaron Lehmann <aaronl@vitelus.com>
30 * Permission is hereby granted, free of charge, to any person obtaining
31 * a copy of this software and associated documentation files (the
32 * "Software"), to deal in the Software without restriction, including
33 * without limitation the rights to use, copy, modify, merge, publish,
34 * distribute, sublicense, and/or sell copies of the Software, and to
35 * permit persons to whom the Software is furnished to do so, subject to
36 * the following conditions:
38 * The above copyright notice and this permission notice shall be
39 * included in all copies or substantial portions of the Software.
41 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
42 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
43 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
44 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
45 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
46 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
47 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
49 /* This is my infix parser/evaluator. It is optimized for size, intended
50 * as a replacement for yacc-based parsers. However, it may well be faster
51 * than a comparable parser written in yacc. The supported operators are
52 * listed in #defines below. Parens, order of operations, and error handling
53 * are supported. This code is thread safe. The exact expression format should
54 * be that which POSIX specifies for shells.
56 * The code uses a simple two-stack algorithm. See
57 * http://www.onthenet.com.au/~grahamis/int2008/week02/lect02.html
58 * for a detailed explanation of the infix-to-postfix algorithm on which
59 * this is based (this code differs in that it applies operators immediately
60 * to the stack instead of adding them to a queue to end up with an
61 * expression).
64 * Aug 24, 2001 Manuel Novoa III
66 * Reduced the generated code size by about 30% (i386) and fixed several bugs.
68 * 1) In arith_apply():
69 * a) Cached values of *numptr and &(numptr[-1]).
70 * b) Removed redundant test for zero denominator.
72 * 2) In arith():
73 * a) Eliminated redundant code for processing operator tokens by moving
74 * to a table-based implementation. Also folded handling of parens
75 * into the table.
76 * b) Combined all 3 loops which called arith_apply to reduce generated
77 * code size at the cost of speed.
79 * 3) The following expressions were treated as valid by the original code:
80 * 1() , 0! , 1 ( *3 ) .
81 * These bugs have been fixed by internally enclosing the expression in
82 * parens and then checking that all binary ops and right parens are
83 * preceded by a valid expression (NUM_TOKEN).
85 * Note: It may be desirable to replace Aaron's test for whitespace with
86 * ctype's isspace() if it is used by another busybox applet or if additional
87 * whitespace chars should be considered. Look below the "#include"s for a
88 * precompiler test.
91 * Aug 26, 2001 Manuel Novoa III
93 * Return 0 for null expressions. Pointed out by Vladimir Oleynik.
95 * Merge in Aaron's comments previously posted to the busybox list,
96 * modified slightly to take account of my changes to the code.
99 * (C) 2003 Vladimir Oleynik <dzo@simtreas.ru>
101 * - allow access to variable,
102 * use recursive value indirection: c="2*2"; a="c"; echo $((a+=2)) produce 6
103 * - implement assign syntax (VAR=expr, +=, *= etc)
104 * - implement exponentiation (** operator)
105 * - implement comma separated - expr, expr
106 * - implement ++expr --expr expr++ expr--
107 * - implement expr ? expr : expr (but second expr is always calculated)
108 * - allow hexadecimal and octal numbers
109 * - restore lost XOR operator
110 * - protect $((num num)) as true zero expr (Manuel's error)
111 * - always use special isspace(), see comment from bash ;-)
113 #include "libbb.h"
114 #include "math.h"
116 #if 1
117 # define dbg(...) ((void)0)
118 #else
119 # define dbg(...) bb_error_msg(__VA_ARGS__)
120 #endif
122 typedef unsigned char operator;
124 /* An operator's token id is a bit of a bitfield. The lower 5 bits are the
125 * precedence, and 3 high bits are an ID unique across operators of that
126 * precedence. The ID portion is so that multiple operators can have the
127 * same precedence, ensuring that the leftmost one is evaluated first.
128 * Consider * and /
130 #define tok_decl(prec,id) (((id)<<5) | (prec))
131 #define ID_SHIFT 5
132 #define PREC(op) ((op) & 0x1f)
134 #define PREC_LPAREN 0
135 #define TOK_LPAREN tok_decl(0,0)
136 /* Precedence value of RPAREN is used only to distinguish it from LPAREN */
137 #define TOK_RPAREN tok_decl(1,1)
139 #define TOK_COMMA tok_decl(1,0)
141 /* All assignments are right associative and have the same precedence,
142 * but there are 11 of them, which doesn't fit into 3 bits for unique id.
143 * Abusing another precedence level:
145 #define PREC_ASSIGN1 2
146 #define TOK_ASSIGN tok_decl(2,0)
147 #define TOK_AND_ASSIGN tok_decl(2,1)
148 #define TOK_OR_ASSIGN tok_decl(2,2)
149 #define TOK_XOR_ASSIGN tok_decl(2,3)
150 #define TOK_ADD_ASSIGN tok_decl(2,4)
151 #define TOK_SUB_ASSIGN tok_decl(2,5)
152 #define TOK_LSHIFT_ASSIGN tok_decl(2,6)
153 #define TOK_RSHIFT_ASSIGN tok_decl(2,7)
155 #define PREC_ASSIGN2 3
156 #define TOK_MUL_ASSIGN tok_decl(3,0)
157 /* "/" and "/=" ops have the same id bits */
158 #define DIV_ID1 1
159 #define TOK_DIV_ASSIGN tok_decl(3,DIV_ID1)
160 #define TOK_REM_ASSIGN tok_decl(3,2)
162 #define fix_assignment_prec(prec) do { prec -= (prec == 3); } while (0)
164 /* Ternary conditional operator is right associative too */
166 * bash documentation says that precedence order is:
167 * ...
168 * expr ? expr1 : expr2
169 * = *= /= %= += -= <<= >>= &= ^= |=
170 * exprA , exprB
171 * What it omits is that expr1 is parsed as if parenthesized
172 * (this matches the rules of ?: in C language):
173 * "v ? 1,2 : 3,4" is parsed as "(v ? (1,2) : 3),4"
174 * "v ? a=2 : b=4" is parsed as "(v ? (a=1) : b)=4" (thus, this is a syntax error)
176 #define TOK_CONDITIONAL tok_decl(4,0)
177 #define TOK_CONDITIONAL_SEP tok_decl(4,1)
179 #define TOK_OR tok_decl(5,0)
181 #define TOK_AND tok_decl(6,0)
183 #define TOK_BOR tok_decl(7,0)
185 #define TOK_BXOR tok_decl(8,0)
187 #define TOK_BAND tok_decl(9,0)
189 #define TOK_EQ tok_decl(10,0)
190 #define TOK_NE tok_decl(10,1)
192 #define TOK_LT tok_decl(11,0)
193 #define TOK_GT tok_decl(11,1)
194 #define TOK_GE tok_decl(11,2)
195 #define TOK_LE tok_decl(11,3)
197 #define TOK_LSHIFT tok_decl(12,0)
198 #define TOK_RSHIFT tok_decl(12,1)
200 #define TOK_ADD tok_decl(13,0)
201 #define TOK_SUB tok_decl(13,1)
203 #define TOK_MUL tok_decl(14,0)
204 #define TOK_DIV tok_decl(14,DIV_ID1)
205 #define TOK_REM tok_decl(14,2)
207 /* Exponent is right associative */
208 #define TOK_EXPONENT tok_decl(15,1)
210 /* Unary operators */
211 #define UNARYPREC 16
212 #define TOK_BNOT tok_decl(UNARYPREC,0)
213 #define TOK_NOT tok_decl(UNARYPREC,1)
215 #define TOK_UMINUS tok_decl(UNARYPREC+1,0)
216 #define TOK_UPLUS tok_decl(UNARYPREC+1,1)
218 #define PREC_PRE (UNARYPREC+2)
219 #define TOK_PRE_INC tok_decl(PREC_PRE,0)
220 #define TOK_PRE_DEC tok_decl(PREC_PRE,1)
222 #define PREC_POST (UNARYPREC+3)
223 #define TOK_POST_INC tok_decl(PREC_POST,0)
224 #define TOK_POST_DEC tok_decl(PREC_POST,1)
226 /* TOK_VALUE marks a number, name, name++/name--, or (EXPR):
227 * IOW: something which can be used as the left side of a binary op.
228 * Since it's never pushed to opstack, its precedence does not matter.
230 #define TOK_VALUE tok_decl(PREC_POST,2)
232 static int
233 is_assign_op(operator op)
235 operator prec = PREC(op);
236 return prec == PREC_ASSIGN1
237 || prec == PREC_ASSIGN2
238 || prec == PREC_PRE
239 || prec == PREC_POST;
242 static int
243 is_right_associative(operator prec)
245 return prec == PREC(TOK_ASSIGN)
246 || prec == PREC(TOK_EXPONENT)
247 || prec == PREC(TOK_CONDITIONAL);
250 typedef struct {
251 arith_t val;
252 const char *var_name;
253 } var_or_num_t;
255 #define VALID_NAME(name) (name)
256 #define NOT_NAME(name) (!(name))
258 typedef struct remembered_name {
259 struct remembered_name *next;
260 const char *var_name;
261 } remembered_name;
263 static ALWAYS_INLINE int isalnum_(int c)
265 return (isalnum(c) || c == '_');
268 static arith_t
269 evaluate_string(arith_state_t *math_state, const char *expr);
271 static arith_t
272 arith_lookup_val(arith_state_t *math_state, const char *name, char *endname)
274 char c;
275 const char *p;
277 c = *endname;
278 *endname = '\0';
279 p = math_state->lookupvar(name);
280 *endname = c;
281 if (p) {
282 arith_t val;
283 size_t len = endname - name;
284 remembered_name *cur;
285 remembered_name remember;
287 /* did we already see this name?
288 * testcase: a=b; b=a; echo $((a))
290 for (cur = math_state->list_of_recursed_names; cur; cur = cur->next) {
291 if (strncmp(cur->var_name, name, len) == 0
292 && !isalnum_(cur->var_name[len])
294 /* yes */
295 math_state->errmsg = "expression recursion loop detected";
296 return -1;
300 /* push current var name */
301 remember.var_name = name;
302 remember.next = math_state->list_of_recursed_names;
303 math_state->list_of_recursed_names = &remember;
305 /* recursively evaluate p as expression */
306 /* this sets math_state->errmsg on error */
307 val = evaluate_string(math_state, p);
309 /* pop current var name */
310 math_state->list_of_recursed_names = remember.next;
312 return val;
314 /* treat undefined var as 0 */
315 return 0;
318 /* "Applying" a token means performing it on the top elements on the integer
319 * stack. For an unary operator it will only change the top element,
320 * a binary operator will pop two arguments and push the result,
321 * the ternary ?: op will pop three arguments and push the result.
323 static NOINLINE const char*
324 arith_apply(arith_state_t *math_state, operator op, var_or_num_t *numstack, var_or_num_t **numstackptr)
326 #define NUMSTACKPTR (*numstackptr)
328 var_or_num_t *top_of_stack;
329 arith_t rez;
331 /* There is no operator that can work without arguments */
332 if (NUMSTACKPTR == numstack)
333 goto syntax_err;
335 top_of_stack = NUMSTACKPTR - 1;
337 if (op == TOK_CONDITIONAL_SEP) {
338 /* "expr1 ? expr2 : expr3" operation */
339 var_or_num_t *expr1 = &top_of_stack[-2];
340 NUMSTACKPTR = expr1 + 1;
341 if (expr1 < numstack) /* Example: $((2:3)) */
342 return "malformed ?: operator";
343 if (expr1->val != 0) /* select expr2 or expr3 */
344 top_of_stack--;
345 rez = top_of_stack->val;
346 top_of_stack = expr1;
347 goto ret_rez;
349 if (op == TOK_CONDITIONAL) /* Example: $((a ? b)) */
350 return "malformed ?: operator";
352 rez = top_of_stack->val;
353 if (op == TOK_UMINUS)
354 rez = -rez;
355 else if (op == TOK_NOT)
356 rez = !rez;
357 else if (op == TOK_BNOT)
358 rez = ~rez;
359 else if (op == TOK_POST_INC || op == TOK_PRE_INC)
360 rez++;
361 else if (op == TOK_POST_DEC || op == TOK_PRE_DEC)
362 rez--;
363 else /*if (op != TOK_UPLUS) - always true, we drop TOK_UPLUS earlier */ {
364 /* Binary operators */
365 arith_t right_side_val;
367 if (top_of_stack == numstack) /* have two arguments? */
368 goto syntax_err; /* no */
370 /* Pop numstack */
371 NUMSTACKPTR = top_of_stack; /* this decrements NUMSTACKPTR */
373 if (math_state->evaluation_disabled) {
374 dbg("binary op %02x skipped", op);
375 return NULL;
376 /* bash 5.2.12 does not execute "2/0" in disabled
377 * branches of ?: (and thus does not complain),
378 * but complains about negative exp: "2**-1".
379 * I don't think we need to emulate that.
383 top_of_stack--; /* now points to left side */
384 right_side_val = rez;
385 rez = top_of_stack->val;
386 if (op == TOK_BOR || op == TOK_OR_ASSIGN)
387 rez |= right_side_val;
388 else if (op == TOK_OR)
389 rez = right_side_val || rez;
390 else if (op == TOK_BAND || op == TOK_AND_ASSIGN)
391 rez &= right_side_val;
392 else if (op == TOK_BXOR || op == TOK_XOR_ASSIGN)
393 rez ^= right_side_val;
394 else if (op == TOK_AND)
395 rez = rez && right_side_val;
396 else if (op == TOK_EQ)
397 rez = (rez == right_side_val);
398 else if (op == TOK_NE)
399 rez = (rez != right_side_val);
400 else if (op == TOK_GE)
401 rez = (rez >= right_side_val);
402 else if (op == TOK_RSHIFT || op == TOK_RSHIFT_ASSIGN)
403 rez >>= right_side_val;
404 else if (op == TOK_LSHIFT || op == TOK_LSHIFT_ASSIGN)
405 rez <<= right_side_val;
406 else if (op == TOK_GT)
407 rez = (rez > right_side_val);
408 else if (op == TOK_LT)
409 rez = (rez < right_side_val);
410 else if (op == TOK_LE)
411 rez = (rez <= right_side_val);
412 else if (op == TOK_MUL || op == TOK_MUL_ASSIGN)
413 rez *= right_side_val;
414 else if (op == TOK_ADD || op == TOK_ADD_ASSIGN)
415 rez += right_side_val;
416 else if (op == TOK_SUB || op == TOK_SUB_ASSIGN)
417 rez -= right_side_val;
418 else if (op == TOK_ASSIGN || op == TOK_COMMA)
419 rez = right_side_val;
420 else if (op == TOK_EXPONENT) {
421 arith_t c;
422 if (right_side_val < 0)
423 return "exponent less than 0";
424 c = 1;
425 while (right_side_val != 0) {
426 if ((right_side_val & 1) == 0) {
427 /* this if() block is not necessary for correctness,
428 * but otherwise echo $((3**999999999999999999))
429 * takes a VERY LONG time
430 * (and it's not interruptible by ^C)
432 rez *= rez;
433 right_side_val >>= 1;
435 c *= rez;
436 right_side_val--;
438 rez = c;
440 else /*if (op == TOK_DIV || op == TOK_DIV_ASSIGN
441 || op == TOK_REM || op == TOK_REM_ASSIGN) - always true */
443 if (right_side_val == 0)
444 return "divide by zero";
446 * bash 4.2.45 x86 64bit: SEGV on 'echo $((2**63 / -1))'
448 * MAX_NEGATIVE_INT / -1 = MAX_POSITIVE_INT+1
449 * and thus is not representable.
450 * Some CPUs segfault trying such op.
451 * Others overflow MAX_POSITIVE_INT+1 to
452 * MAX_NEGATIVE_INT (0x7fff+1 = 0x8000).
453 * Make sure to at least not SEGV here:
455 if (right_side_val == -1
456 && (rez << 1) == 0 /* MAX_NEGATIVE_INT or 0 */
458 right_side_val = 1;
460 if (op & (DIV_ID1 << ID_SHIFT)) /* DIV or DIV_ASSIGN? */
461 rez /= right_side_val;
462 else
463 rez %= right_side_val;
467 if (math_state->evaluation_disabled) {
468 dbg("unary op %02x skipped", op);
469 return NULL;
472 if (is_assign_op(op)) {
473 char buf[sizeof(arith_t)*3 + 2];
475 if (NOT_NAME(top_of_stack->var_name)) {
476 /* Hmm, 1=2 ? */
477 goto syntax_err;
479 /* Save to shell variable */
480 sprintf(buf, ARITH_FMT, rez);
482 char *e = (char*)endofname(top_of_stack->var_name);
483 char c = *e;
484 *e = '\0';
485 math_state->setvar(top_of_stack->var_name, buf);
486 *e = c;
488 /* VAR++ or VAR--? */
489 if (PREC(op) == PREC_POST) {
490 /* Do not store new value to stack (keep old value) */
491 goto ret_NULL;
494 ret_rez:
495 top_of_stack->val = rez;
496 ret_NULL:
497 /* Erase var name, it is just a number now */
498 top_of_stack->var_name = NULL;
499 return NULL;
500 syntax_err:
501 return "arithmetic syntax error";
502 #undef NUMSTACKPTR
505 /* longest must be first */
506 static const char op_tokens[] ALIGN1 = {
507 '<','<','=',0, TOK_LSHIFT_ASSIGN,
508 '>','>','=',0, TOK_RSHIFT_ASSIGN,
509 '<','<', 0, TOK_LSHIFT,
510 '>','>', 0, TOK_RSHIFT,
511 '|','|', 0, TOK_OR,
512 '&','&', 0, TOK_AND,
513 '!','=', 0, TOK_NE,
514 '<','=', 0, TOK_LE,
515 '>','=', 0, TOK_GE,
516 '=','=', 0, TOK_EQ,
517 '|','=', 0, TOK_OR_ASSIGN,
518 '&','=', 0, TOK_AND_ASSIGN,
519 '*','=', 0, TOK_MUL_ASSIGN,
520 '/','=', 0, TOK_DIV_ASSIGN,
521 '%','=', 0, TOK_REM_ASSIGN,
522 '+','=', 0, TOK_ADD_ASSIGN,
523 '-','=', 0, TOK_SUB_ASSIGN,
524 '-','-', 0, TOK_POST_DEC,
525 '^','=', 0, TOK_XOR_ASSIGN,
526 '+','+', 0, TOK_POST_INC,
527 '*','*', 0, TOK_EXPONENT,
528 '!', 0, TOK_NOT,
529 '<', 0, TOK_LT,
530 '>', 0, TOK_GT,
531 '=', 0, TOK_ASSIGN,
532 '|', 0, TOK_BOR,
533 '&', 0, TOK_BAND,
534 '*', 0, TOK_MUL,
535 '/', 0, TOK_DIV,
536 '%', 0, TOK_REM,
537 '+', 0, TOK_ADD,
538 '-', 0, TOK_SUB,
539 '^', 0, TOK_BXOR,
540 '~', 0, TOK_BNOT,
541 ',', 0, TOK_COMMA,
542 '?', 0, TOK_CONDITIONAL,
543 ':', 0, TOK_CONDITIONAL_SEP,
544 ')', 0, TOK_RPAREN,
545 '(', 0, TOK_LPAREN,
548 #define END_POINTER (&op_tokens[sizeof(op_tokens)-1])
550 #if ENABLE_FEATURE_SH_MATH_BASE
551 static arith_t parse_with_base(const char *nptr, char **endptr, unsigned base)
553 arith_t n = 0;
554 const char *start = nptr;
556 for (;;) {
557 unsigned digit = (unsigned)*nptr - '0';
558 if (digit >= 10 /* not 0..9 */
559 && digit <= 'z' - '0' /* reject e.g. $((64#~)) */
561 /* current char is one of :;<=>?@A..Z[\]^_`a..z */
563 /* in bases up to 36, case does not matter for a-z,
564 * map @A..Z and `a..z to 9..35: */
565 digit = (unsigned)(*nptr | 0x20) - ('a' - 10);
566 if (base > 36 && *nptr <= '_') {
567 /* base > 36: A-Z,@,_ are 36-61,62,63 */
568 if (*nptr == '_')
569 digit = 63;
570 else if (*nptr == '@')
571 digit = 62;
572 else if (digit < 36) /* A-Z */
573 digit += 36 - 10;
574 else
575 break; /* error: one of [\]^ */
577 //bb_error_msg("ch:'%c'%d digit:%u", *nptr, *nptr, digit);
578 if (digit < 10) /* reject e.g. $((36#@)) */
579 break;
581 if (digit >= base)
582 break;
583 /* bash does not check for overflows */
584 n = n * base + digit;
585 nptr++;
587 *endptr = (char*)nptr;
588 /* "64#" and "64#+1" used to be valid expressions, but bash 5.2.15
589 * no longer allow such, detect this:
591 // NB: bash allows $((0x)), this is probably a bug...
592 if (nptr == start)
593 *endptr = NULL; /* there weren't any digits, bad */
594 return n;
597 static arith_t strto_arith_t(const char *nptr, char **endptr)
599 /* NB: we do not use strtoull here to be bash-compatible:
600 * $((99999999999999999999)) is 7766279631452241919
601 * (the 64-bit truncated value).
603 unsigned base;
605 /* nptr[0] is '0'..'9' here */
607 base = nptr[0] - '0';
608 if (base == 0) { /* nptr[0] is '0' */
609 base = 8;
610 if ((nptr[1] | 0x20) == 'x') {
611 base = 16;
612 nptr += 2;
614 // NB: bash allows $((0x)), this is probably a bug...
615 return parse_with_base(nptr, endptr, base);
618 /* base is 1..9 here */
620 if (nptr[1] == '#') {
621 if (base > 1)
622 return parse_with_base(nptr + 2, endptr, base);
623 /* else: "1#NN", bash says "invalid arithmetic base" */
626 if (isdigit(nptr[1]) && nptr[2] == '#') {
627 base = 10 * base + (nptr[1] - '0');
628 /* base is at least 10 here */
629 if (base <= 64)
630 return parse_with_base(nptr + 3, endptr, base);
631 /* else: bash says "invalid arithmetic base" */
634 return parse_with_base(nptr, endptr, 10);
636 #else /* !ENABLE_FEATURE_SH_MATH_BASE */
637 # if ENABLE_FEATURE_SH_MATH_64
638 # define strto_arith_t(nptr, endptr) strtoull(nptr, endptr, 0)
639 # else
640 # define strto_arith_t(nptr, endptr) strtoul(nptr, endptr, 0)
641 # endif
642 #endif
644 static arith_t
645 evaluate_string(arith_state_t *math_state, const char *expr)
647 /* Stack of integers/names */
648 var_or_num_t *numstack, *numstackptr;
649 /* Stack of operator tokens */
650 operator *opstack, *opstackptr;
651 /* To detect whether we are after a "value": */
652 operator lasttok;
653 /* To insert implicit () in ?: ternary op: */
654 operator insert_op = 0xff;
655 unsigned ternary_level = 0;
656 const char *errmsg;
657 const char *start_expr = expr = skip_whitespace(expr);
660 unsigned expr_len = strlen(expr);
661 /* If LOTS of whitespace, do not blow up the estimation */
662 const char *p = expr;
663 while (*p) {
664 /* in a run of whitespace, count only 1st char */
665 if (isspace(*p)) {
666 while (p++, isspace(*p))
667 expr_len--;
668 } else {
669 p++;
672 dbg("expr:'%s' expr_len:%u", expr, expr_len);
673 /* expr_len deep opstack is needed. Think "------------7".
674 * Only "?" operator temporarily needs two opstack slots
675 * (IOW: more than one slot), but its second slot (LPAREN)
676 * is popped off when ":" is reached.
678 expr_len++; /* +1 for 1st LPAREN. See what $((1?)) pushes to opstack */
679 opstackptr = opstack = alloca(expr_len * sizeof(opstack[0]));
680 /* There can be no more than (expr_len/2 + 1)
681 * integers/names in any given correct or incorrect expression.
682 * (modulo "09", "0v" cases where 2 chars are 2 ints/names,
683 * but we have code to detect that early)
685 expr_len = (expr_len / 2)
686 + 1 /* "1+2" has two nums, 2 = len/2+1, NOT len/2 */;
687 numstackptr = numstack = alloca(expr_len * sizeof(numstack[0]));
690 /* Start with a left paren */
691 dbg("(%d) op:TOK_LPAREN", (int)(opstackptr - opstack));
692 *opstackptr++ = lasttok = TOK_LPAREN;
694 while (1) {
695 const char *p;
696 operator op;
697 operator prec;
699 expr = skip_whitespace(expr);
700 if (*expr == '\0') {
701 if (expr == start_expr) {
702 /* Null expression */
703 return 0;
706 /* This is only reached after all tokens have been extracted from the
707 * input stream. If there are still tokens on the operator stack, they
708 * are to be applied in order. At the end, there should be a final
709 * result on the integer stack */
711 if (expr != END_POINTER) {
712 /* If we haven't done so already,
713 * append a closing right paren
714 * and let the loop process it */
715 expr = END_POINTER;
716 op = TOK_RPAREN;
717 goto tok_found1;
719 /* At this point, we're done with the expression */
720 if (numstackptr != numstack + 1) {
721 /* if there is not exactly one result, it's bad */
722 /* Example: $((1 2)) */
723 goto syntax_err;
725 return numstack->val;
728 p = endofname(expr);
729 if (p != expr) {
730 /* Name */
731 if (!math_state->evaluation_disabled) {
732 numstackptr->var_name = expr;
733 dbg("[%d] var:'%.*s'", (int)(numstackptr - numstack), (int)(p - expr), expr);
734 expr = skip_whitespace(p);
735 /* If it is not followed by "=" operator... */
736 if (expr[0] != '=' /* not "=..." */
737 || expr[1] == '=' /* or "==..." */
739 /* Evaluate variable to value */
740 arith_t val = arith_lookup_val(math_state, numstackptr->var_name, (char*)p);
741 if (math_state->errmsg)
742 return val; /* -1 */
743 numstackptr->val = val;
745 } else {
746 dbg("[%d] var:IGNORED", (int)(numstackptr - numstack));
747 expr = p;
748 numstackptr->var_name = NULL; /* not needed, paranoia */
749 numstackptr->val = 0; /* not needed, paranoia */
751 push_value:
752 numstackptr++;
753 lasttok = TOK_VALUE;
754 continue;
757 if (isdigit(*expr)) {
758 /* Number */
759 char *end;
760 numstackptr->var_name = NULL;
761 /* code is smaller compared to using &expr here: */
762 numstackptr->val = strto_arith_t(expr, &end);
763 expr = end;
764 dbg("[%d] val:%lld", (int)(numstackptr - numstack), numstackptr->val);
765 if (!expr) /* example: $((10#)) */
766 goto syntax_err;
767 /* A number can't be followed by another number, or a variable name.
768 * We'd catch this later anyway, but this would require numstack[]
769 * to be ~twice as deep to handle strings where _every_ char is
770 * a new number or name.
771 * Examples: "09" is two numbers, "0v" is number and name.
773 if (isalnum(*expr) || *expr == '_')
774 goto syntax_err;
775 goto push_value;
778 /* Should be an operator */
780 /* Special case: XYZ--, XYZ++, --XYZ, ++XYZ are recognized
781 * only if XYZ is a variable name, not a number or EXPR. IOW:
782 * "a+++v" is a++ + v.
783 * "(a)+++7" is ( a ) + + + 7.
784 * "7+++v" is 7 + ++v, not 7++ + v.
785 * "--7" is - - 7, not --7.
786 * "++++a" is + + ++a, not ++ ++a.
788 if ((expr[0] == '+' || expr[0] == '-')
789 && (expr[1] == expr[0])
791 if (numstackptr == numstack || NOT_NAME(numstackptr[-1].var_name)) {
792 /* not a VAR++ */
793 char next = skip_whitespace(expr + 2)[0];
794 if (!(isalpha(next) || next == '_')) {
795 /* not a ++VAR */
796 op = (expr[0] == '+' ? TOK_ADD : TOK_SUB);
797 expr++;
798 goto tok_found1;
803 p = op_tokens;
804 while (1) {
805 /* Compare expr to current op_tokens[] element */
806 const char *e = expr;
807 while (1) {
808 if (*p == '\0') {
809 /* Match: operator is found */
810 expr = e;
811 goto tok_found;
813 if (*p != *e)
814 break;
815 p++;
816 e++;
818 /* No match, go to next element of op_tokens[] */
819 while (*p)
820 p++;
821 p += 2; /* skip NUL and TOK_foo bytes */
822 if (*p == '\0') {
823 /* No next element, operator not found */
824 //math_state->syntax_error_at = expr;
825 goto syntax_err;
828 /* NB: expr now points past the operator */
829 tok_found:
830 op = p[1]; /* fetch TOK_foo value */
832 /* Special rule for "? EXPR :"
833 * "EXPR in the middle of ? : is parsed as if parenthesized"
834 * (this quirk originates in C grammar, I think).
836 if (op == TOK_CONDITIONAL) {
837 insert_op = TOK_LPAREN;
838 dbg("insert_op=%02x", insert_op);
840 if (op == TOK_CONDITIONAL_SEP) {
841 insert_op = op;
842 op = TOK_RPAREN;
843 dbg("insert_op=%02x op=%02x", insert_op, op);
845 tok_found1:
846 /* NAME++ is a "value" (something suitable for a binop) */
847 if (PREC(lasttok) == PREC_POST)
848 lasttok = TOK_VALUE;
850 /* Plus and minus are binary (not unary) _only_ if the last
851 * token was a "value". Think about it. It makes sense.
853 if (lasttok != TOK_VALUE) {
854 switch (op) {
855 case TOK_ADD:
856 //op = TOK_UPLUS;
857 //break;
858 /* Unary plus does nothing, do not even push it to opstack */
859 continue;
860 case TOK_SUB:
861 op = TOK_UMINUS;
862 break;
863 case TOK_POST_INC:
864 op = TOK_PRE_INC;
865 break;
866 case TOK_POST_DEC:
867 op = TOK_PRE_DEC;
868 break;
871 /* We don't want an unary operator to cause recursive descent on the
872 * stack, because there can be many in a row and it could cause an
873 * operator to be evaluated before its argument is pushed onto the
874 * integer stack.
875 * But for binary operators, "apply" everything on the operator
876 * stack until we find an operator with a lesser priority than the
877 * one we have just extracted. If op is right-associative,
878 * then stop "applying" on the equal priority too.
879 * Left paren will never be "applied" in this way.
881 prec = PREC(op);
882 if (prec != PREC_LPAREN && prec < UNARYPREC) {
883 /* Binary, ternary or RPAREN */
884 if (lasttok != TOK_VALUE) {
885 /* Must be preceded by a value.
886 * $((2 2 + * 3)) would be accepted without this.
888 goto syntax_err;
890 /* if op is RPAREN:
891 * while opstack is not empty:
892 * pop prev_op
893 * if prev_op is LPAREN (finished evaluating (EXPR)):
894 * goto N
895 * evaluate prev_op on top of numstack
896 * BUG (unpaired RPAREN)
897 * else (op is not RPAREN):
898 * while opstack is not empty:
899 * pop prev_op
900 * if can't evaluate prev_op (it is lower precedence than op):
901 * push prev_op back
902 * goto C
903 * evaluate prev_op on top of numstack
904 * C:if op is "?": check result, set disable flag if needed
905 * push op
906 * N:loop to parse the rest of string
908 while (opstackptr != opstack) {
909 operator prev_op = *--opstackptr;
910 if (op == TOK_RPAREN) {
911 if (prev_op == TOK_LPAREN) {
912 /* Erase var name: for example, (VAR) = 1 is not valid */
913 numstackptr[-1].var_name = NULL;
914 /* (EXPR) is a "value": next operator directly after
915 * close paren should be considered binary
917 lasttok = TOK_VALUE;
918 goto next;
920 /* Not (y), but ...x~y). Fall through to evaluate x~y */
921 } else {
922 operator prev_prec = PREC(prev_op);
923 fix_assignment_prec(prec);
924 fix_assignment_prec(prev_prec);
925 if (prev_prec < prec
926 || (prev_prec == prec && is_right_associative(prec))
928 /* ...x~y@. push @ on opstack */
929 opstackptr++; /* undo removal of ~ op */
930 goto check_cond;
932 /* else: ...x~y@. Evaluate x~y, replace it on stack with result. Then repeat */
934 dbg("arith_apply(prev_op:%02x, numstack:%d)", prev_op, (int)(numstackptr - numstack));
935 errmsg = arith_apply(math_state, prev_op, numstack, &numstackptr);
936 if (errmsg)
937 goto err_with_custom_msg;
938 dbg(" numstack:%d val:%lld '%s'", (int)(numstackptr - numstack), numstackptr[-1].val, numstackptr[-1].var_name);
939 if (prev_op == TOK_CONDITIONAL_SEP) {
940 /* We just executed ":" */
941 /* Remove "?" from opstack too, not just ":" */
942 opstackptr--;
943 if (*opstackptr != TOK_CONDITIONAL) {
944 /* Example: $((1,2:3)) */
945 errmsg = "malformed ?: operator";
946 goto err_with_custom_msg;
948 /* Example: a=1?2:3,a. We just executed ":".
949 * Prevent assignment from being still disabled.
951 if (ternary_level == math_state->evaluation_disabled) {
952 math_state->evaluation_disabled = 0;
953 dbg("':' executed: evaluation_disabled=CLEAR");
955 ternary_level--;
957 } /* while (opstack not empty) */
959 if (op == TOK_RPAREN) /* unpaired RPAREN? */
960 goto syntax_err;
961 check_cond:
962 if (op == TOK_CONDITIONAL) {
963 /* We just now evaluated EXPR before "?".
964 * Should we disable evaluation now?
966 ternary_level++;
967 if (numstackptr[-1].val == 0 && !math_state->evaluation_disabled) {
968 math_state->evaluation_disabled = ternary_level;
969 dbg("'?' entered: evaluation_disabled=%u", math_state->evaluation_disabled);
972 } /* if */
973 /* else: LPAREN or UNARY: push it on opstack */
975 /* Push this operator to opstack */
976 dbg("(%d) op:%02x insert_op:%02x", (int)(opstackptr - opstack), op, insert_op);
977 *opstackptr++ = lasttok = op;
978 next:
979 if (insert_op != 0xff) {
980 op = insert_op;
981 insert_op = 0xff;
982 dbg("inserting %02x", op);
983 if (op == TOK_CONDITIONAL_SEP) {
984 /* The next token is ":". Toggle "do not evaluate" state */
985 if (!math_state->evaluation_disabled) {
986 math_state->evaluation_disabled = ternary_level;
987 dbg("':' entered: evaluation_disabled=%u", math_state->evaluation_disabled);
988 } else if (ternary_level == math_state->evaluation_disabled) {
989 math_state->evaluation_disabled = 0;
990 dbg("':' entered: evaluation_disabled=CLEAR");
991 } /* else: ternary_level > evaluation_disabled && evaluation_disabled != 0 */
992 /* We are in nested "?:" while in outer "?:" disabled branch */
993 /* do_nothing */
995 goto tok_found1;
997 } /* while (1) */
999 syntax_err:
1000 errmsg = "arithmetic syntax error";
1001 err_with_custom_msg:
1002 math_state->errmsg = errmsg;
1003 return -1;
1006 arith_t FAST_FUNC
1007 arith(arith_state_t *math_state, const char *expr)
1009 math_state->evaluation_disabled = 0;
1010 math_state->errmsg = NULL;
1011 math_state->list_of_recursed_names = NULL;
1012 return evaluate_string(math_state, expr);
1016 * Copyright (c) 1989, 1991, 1993, 1994
1017 * The Regents of the University of California. All rights reserved.
1019 * This code is derived from software contributed to Berkeley by
1020 * Kenneth Almquist.
1022 * Redistribution and use in source and binary forms, with or without
1023 * modification, are permitted provided that the following conditions
1024 * are met:
1025 * 1. Redistributions of source code must retain the above copyright
1026 * notice, this list of conditions and the following disclaimer.
1027 * 2. Redistributions in binary form must reproduce the above copyright
1028 * notice, this list of conditions and the following disclaimer in the
1029 * documentation and/or other materials provided with the distribution.
1030 * 3. Neither the name of the University nor the names of its contributors
1031 * may be used to endorse or promote products derived from this software
1032 * without specific prior written permission.
1034 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND
1035 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1036 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1037 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
1038 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1039 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1040 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1041 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
1042 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
1043 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
1044 * SUCH DAMAGE.