2 * Arithmetic code ripped out of ash shell for code sharing.
4 * This code is derived from software contributed to Berkeley by
7 * Original BSD copyright notice is retained at the end of this file.
9 * Copyright (c) 1989, 1991, 1993, 1994
10 * The Regents of the University of California. All rights reserved.
12 * Copyright (c) 1997-2005 Herbert Xu <herbert@gondor.apana.org.au>
13 * was re-ported from NetBSD and debianized.
15 * rewrite arith.y to micro stack based cryptic algorithm by
16 * Copyright (c) 2001 Aaron Lehmann <aaronl@vitelus.com>
18 * Modified by Paul Mundt <lethal@linux-sh.org> (c) 2004 to support
21 * Modified by Vladimir Oleynik <dzo@simtreas.ru> (c) 2001-2005 to be
22 * used in busybox and size optimizations,
23 * rewrote arith (see notes to this), added locale support,
24 * rewrote dynamic variables.
26 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
28 /* Copyright (c) 2001 Aaron Lehmann <aaronl@vitelus.com>
30 * Permission is hereby granted, free of charge, to any person obtaining
31 * a copy of this software and associated documentation files (the
32 * "Software"), to deal in the Software without restriction, including
33 * without limitation the rights to use, copy, modify, merge, publish,
34 * distribute, sublicense, and/or sell copies of the Software, and to
35 * permit persons to whom the Software is furnished to do so, subject to
36 * the following conditions:
38 * The above copyright notice and this permission notice shall be
39 * included in all copies or substantial portions of the Software.
41 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
42 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
43 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
44 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
45 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
46 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
47 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
50 /* This is my infix parser/evaluator. It is optimized for size, intended
51 * as a replacement for yacc-based parsers. However, it may well be faster
52 * than a comparable parser written in yacc. The supported operators are
53 * listed in #defines below. Parens, order of operations, and error handling
54 * are supported. This code is thread safe. The exact expression format should
55 * be that which POSIX specifies for shells.
57 * The code uses a simple two-stack algorithm. See
58 * http://www.onthenet.com.au/~grahamis/int2008/week02/lect02.html
59 * for a detailed explanation of the infix-to-postfix algorithm on which
60 * this is based (this code differs in that it applies operators immediately
61 * to the stack instead of adding them to a queue to end up with an
66 * Aug 24, 2001 Manuel Novoa III
68 * Reduced the generated code size by about 30% (i386) and fixed several bugs.
70 * 1) In arith_apply():
71 * a) Cached values of *numptr and &(numptr[-1]).
72 * b) Removed redundant test for zero denominator.
75 * a) Eliminated redundant code for processing operator tokens by moving
76 * to a table-based implementation. Also folded handling of parens
78 * b) Combined all 3 loops which called arith_apply to reduce generated
79 * code size at the cost of speed.
81 * 3) The following expressions were treated as valid by the original code:
82 * 1() , 0! , 1 ( *3 ) .
83 * These bugs have been fixed by internally enclosing the expression in
84 * parens and then checking that all binary ops and right parens are
85 * preceded by a valid expression (NUM_TOKEN).
87 * Note: It may be desirable to replace Aaron's test for whitespace with
88 * ctype's isspace() if it is used by another busybox applet or if additional
89 * whitespace chars should be considered. Look below the "#include"s for a
93 * Aug 26, 2001 Manuel Novoa III
95 * Return 0 for null expressions. Pointed out by Vladimir Oleynik.
97 * Merge in Aaron's comments previously posted to the busybox list,
98 * modified slightly to take account of my changes to the code.
102 * (C) 2003 Vladimir Oleynik <dzo@simtreas.ru>
104 * - allow access to variable,
105 * use recursive value indirection: c="2*2"; a="c"; echo $((a+=2)) produce 6
106 * - implement assign syntax (VAR=expr, +=, *= etc)
107 * - implement exponentiation (** operator)
108 * - implement comma separated - expr, expr
109 * - implement ++expr --expr expr++ expr--
110 * - implement expr ? expr : expr (but second expr is always calculated)
111 * - allow hexadecimal and octal numbers
112 * - restore lost XOR operator
113 * - protect $((num num)) as true zero expr (Manuel's error)
114 * - always use special isspace(), see comment from bash ;-)
119 #define lookupvar (math_state->lookupvar)
120 #define setvar (math_state->setvar )
121 //#define endofname (math_state->endofname)
123 typedef unsigned char operator;
125 /* An operator's token id is a bit of a bitfield. The lower 5 bits are the
126 * precedence, and 3 high bits are an ID unique across operators of that
127 * precedence. The ID portion is so that multiple operators can have the
128 * same precedence, ensuring that the leftmost one is evaluated first.
131 #define tok_decl(prec,id) (((id)<<5) | (prec))
132 #define PREC(op) ((op) & 0x1F)
134 #define TOK_LPAREN tok_decl(0,0)
136 #define TOK_COMMA tok_decl(1,0)
138 /* All assignments are right associative and have the same precedence,
139 * but there are 11 of them, which doesn't fit into 3 bits for unique id.
140 * Abusing another precedence level:
142 #define TOK_ASSIGN tok_decl(2,0)
143 #define TOK_AND_ASSIGN tok_decl(2,1)
144 #define TOK_OR_ASSIGN tok_decl(2,2)
145 #define TOK_XOR_ASSIGN tok_decl(2,3)
146 #define TOK_PLUS_ASSIGN tok_decl(2,4)
147 #define TOK_MINUS_ASSIGN tok_decl(2,5)
148 #define TOK_LSHIFT_ASSIGN tok_decl(2,6)
149 #define TOK_RSHIFT_ASSIGN tok_decl(2,7)
151 #define TOK_MUL_ASSIGN tok_decl(3,0)
152 #define TOK_DIV_ASSIGN tok_decl(3,1)
153 #define TOK_REM_ASSIGN tok_decl(3,2)
155 #define fix_assignment_prec(prec) do { if (prec == 3) prec = 2; } while (0)
157 /* Ternary conditional operator is right associative too */
158 #define TOK_CONDITIONAL tok_decl(4,0)
159 #define TOK_CONDITIONAL_SEP tok_decl(4,1)
161 #define TOK_OR tok_decl(5,0)
163 #define TOK_AND tok_decl(6,0)
165 #define TOK_BOR tok_decl(7,0)
167 #define TOK_BXOR tok_decl(8,0)
169 #define TOK_BAND tok_decl(9,0)
171 #define TOK_EQ tok_decl(10,0)
172 #define TOK_NE tok_decl(10,1)
174 #define TOK_LT tok_decl(11,0)
175 #define TOK_GT tok_decl(11,1)
176 #define TOK_GE tok_decl(11,2)
177 #define TOK_LE tok_decl(11,3)
179 #define TOK_LSHIFT tok_decl(12,0)
180 #define TOK_RSHIFT tok_decl(12,1)
182 #define TOK_ADD tok_decl(13,0)
183 #define TOK_SUB tok_decl(13,1)
185 #define TOK_MUL tok_decl(14,0)
186 #define TOK_DIV tok_decl(14,1)
187 #define TOK_REM tok_decl(14,2)
189 /* Exponent is right associative */
190 #define TOK_EXPONENT tok_decl(15,1)
192 /* Unary operators */
194 #define TOK_BNOT tok_decl(UNARYPREC,0)
195 #define TOK_NOT tok_decl(UNARYPREC,1)
197 #define TOK_UMINUS tok_decl(UNARYPREC+1,0)
198 #define TOK_UPLUS tok_decl(UNARYPREC+1,1)
200 #define PREC_PRE (UNARYPREC+2)
202 #define TOK_PRE_INC tok_decl(PREC_PRE, 0)
203 #define TOK_PRE_DEC tok_decl(PREC_PRE, 1)
205 #define PREC_POST (UNARYPREC+3)
207 #define TOK_POST_INC tok_decl(PREC_POST, 0)
208 #define TOK_POST_DEC tok_decl(PREC_POST, 1)
210 #define SPEC_PREC (UNARYPREC+4)
212 #define TOK_NUM tok_decl(SPEC_PREC, 0)
213 #define TOK_RPAREN tok_decl(SPEC_PREC, 1)
216 is_assign_op(operator op
)
218 operator prec
= PREC(op
);
219 fix_assignment_prec(prec
);
220 return prec
== PREC(TOK_ASSIGN
)
222 || prec
== PREC_POST
;
226 is_right_associative(operator prec
)
228 return prec
== PREC(TOK_ASSIGN
)
229 || prec
== PREC(TOK_EXPONENT
)
230 || prec
== PREC(TOK_CONDITIONAL
);
236 /* We acquire second_val only when "expr1 : expr2" part
237 * of ternary ?: op is evaluated.
238 * We treat ?: as two binary ops: (expr ? (expr1 : expr2)).
239 * ':' produces a new value which has two parts, val and second_val;
240 * then '?' selects one of them based on its left side.
243 char second_val_present
;
244 /* If NULL then it's just a number, else it's a named variable */
248 typedef struct remembered_name
{
249 struct remembered_name
*next
;
254 static arith_t FAST_FUNC
255 evaluate_string(arith_state_t
*math_state
, const char *expr
);
258 arith_lookup_val(arith_state_t
*math_state
, var_or_num_t
*t
)
261 const char *p
= lookupvar(t
->var
);
263 remembered_name
*cur
;
264 remembered_name cur_save
;
266 /* did we already see this name?
267 * testcase: a=b; b=a; echo $((a))
269 for (cur
= math_state
->list_of_recursed_names
; cur
; cur
= cur
->next
) {
270 if (strcmp(cur
->var
, t
->var
) == 0) {
272 return "expression recursion loop detected";
276 /* push current var name */
277 cur
= math_state
->list_of_recursed_names
;
278 cur_save
.var
= t
->var
;
280 math_state
->list_of_recursed_names
= &cur_save
;
282 /* recursively evaluate p as expression */
283 t
->val
= evaluate_string(math_state
, p
);
285 /* pop current var name */
286 math_state
->list_of_recursed_names
= cur
;
288 return math_state
->errmsg
;
290 /* treat undefined var as 0 */
296 /* "Applying" a token means performing it on the top elements on the integer
297 * stack. For an unary operator it will only change the top element, but a
298 * binary operator will pop two arguments and push the result */
299 static NOINLINE
const char*
300 arith_apply(arith_state_t
*math_state
, operator op
, var_or_num_t
*numstack
, var_or_num_t
**numstackptr
)
302 #define NUMPTR (*numstackptr)
304 var_or_num_t
*top_of_stack
;
308 /* There is no operator that can work without arguments */
309 if (NUMPTR
== numstack
)
312 top_of_stack
= NUMPTR
- 1;
314 /* Resolve name to value, if needed */
315 err
= arith_lookup_val(math_state
, top_of_stack
);
319 rez
= top_of_stack
->val
;
320 if (op
== TOK_UMINUS
)
322 else if (op
== TOK_NOT
)
324 else if (op
== TOK_BNOT
)
326 else if (op
== TOK_POST_INC
|| op
== TOK_PRE_INC
)
328 else if (op
== TOK_POST_DEC
|| op
== TOK_PRE_DEC
)
330 else if (op
!= TOK_UPLUS
) {
331 /* Binary operators */
332 arith_t right_side_val
;
335 /* Binary operators need two arguments */
336 if (top_of_stack
== numstack
)
338 /* ...and they pop one */
339 NUMPTR
= top_of_stack
; /* this decrements NUMPTR */
341 bad_second_val
= top_of_stack
->second_val_present
;
342 if (op
== TOK_CONDITIONAL
) { /* ? operation */
343 /* Make next if (...) protect against
344 * $((expr1 ? expr2)) - that is, missing ": expr" */
345 bad_second_val
= !bad_second_val
;
347 if (bad_second_val
) {
348 /* Protect against $((expr <not_?_op> expr1 : expr2)) */
349 return "malformed ?: operator";
352 top_of_stack
--; /* now points to left side */
354 if (op
!= TOK_ASSIGN
) {
355 /* Resolve left side value (unless the op is '=') */
356 err
= arith_lookup_val(math_state
, top_of_stack
);
361 right_side_val
= rez
;
362 rez
= top_of_stack
->val
;
363 if (op
== TOK_CONDITIONAL
) /* ? operation */
364 rez
= (rez
? right_side_val
: top_of_stack
[1].second_val
);
365 else if (op
== TOK_CONDITIONAL_SEP
) { /* : operation */
366 if (top_of_stack
== numstack
) {
367 /* Protect against $((expr : expr)) */
368 return "malformed ?: operator";
370 top_of_stack
->second_val_present
= op
;
371 top_of_stack
->second_val
= right_side_val
;
373 else if (op
== TOK_BOR
|| op
== TOK_OR_ASSIGN
)
374 rez
|= right_side_val
;
375 else if (op
== TOK_OR
)
376 rez
= right_side_val
|| rez
;
377 else if (op
== TOK_BAND
|| op
== TOK_AND_ASSIGN
)
378 rez
&= right_side_val
;
379 else if (op
== TOK_BXOR
|| op
== TOK_XOR_ASSIGN
)
380 rez
^= right_side_val
;
381 else if (op
== TOK_AND
)
382 rez
= rez
&& right_side_val
;
383 else if (op
== TOK_EQ
)
384 rez
= (rez
== right_side_val
);
385 else if (op
== TOK_NE
)
386 rez
= (rez
!= right_side_val
);
387 else if (op
== TOK_GE
)
388 rez
= (rez
>= right_side_val
);
389 else if (op
== TOK_RSHIFT
|| op
== TOK_RSHIFT_ASSIGN
)
390 rez
>>= right_side_val
;
391 else if (op
== TOK_LSHIFT
|| op
== TOK_LSHIFT_ASSIGN
)
392 rez
<<= right_side_val
;
393 else if (op
== TOK_GT
)
394 rez
= (rez
> right_side_val
);
395 else if (op
== TOK_LT
)
396 rez
= (rez
< right_side_val
);
397 else if (op
== TOK_LE
)
398 rez
= (rez
<= right_side_val
);
399 else if (op
== TOK_MUL
|| op
== TOK_MUL_ASSIGN
)
400 rez
*= right_side_val
;
401 else if (op
== TOK_ADD
|| op
== TOK_PLUS_ASSIGN
)
402 rez
+= right_side_val
;
403 else if (op
== TOK_SUB
|| op
== TOK_MINUS_ASSIGN
)
404 rez
-= right_side_val
;
405 else if (op
== TOK_ASSIGN
|| op
== TOK_COMMA
)
406 rez
= right_side_val
;
407 else if (op
== TOK_EXPONENT
) {
409 if (right_side_val
< 0)
410 return "exponent less than 0";
412 while (--right_side_val
>= 0)
416 else if (right_side_val
== 0)
417 return "divide by zero";
418 else if (op
== TOK_DIV
|| op
== TOK_DIV_ASSIGN
)
419 rez
/= right_side_val
;
420 else if (op
== TOK_REM
|| op
== TOK_REM_ASSIGN
)
421 rez
%= right_side_val
;
424 if (is_assign_op(op
)) {
425 char buf
[sizeof(arith_t
)*3 + 2];
427 if (top_of_stack
->var
== NULL
) {
429 //TODO: actually, bash allows ++7 but for some reason it evals to 7, not 8
432 /* Save to shell variable */
433 sprintf(buf
, ARITH_FMT
, rez
);
434 setvar(top_of_stack
->var
, buf
);
435 /* After saving, make previous value for v++ or v-- */
436 if (op
== TOK_POST_INC
)
438 else if (op
== TOK_POST_DEC
)
442 top_of_stack
->val
= rez
;
443 /* Erase var name, it is just a number now */
444 top_of_stack
->var
= NULL
;
447 return "arithmetic syntax error";
451 /* longest must be first */
452 static const char op_tokens
[] ALIGN1
= {
453 '<','<','=',0, TOK_LSHIFT_ASSIGN
,
454 '>','>','=',0, TOK_RSHIFT_ASSIGN
,
455 '<','<', 0, TOK_LSHIFT
,
456 '>','>', 0, TOK_RSHIFT
,
463 '|','=', 0, TOK_OR_ASSIGN
,
464 '&','=', 0, TOK_AND_ASSIGN
,
465 '*','=', 0, TOK_MUL_ASSIGN
,
466 '/','=', 0, TOK_DIV_ASSIGN
,
467 '%','=', 0, TOK_REM_ASSIGN
,
468 '+','=', 0, TOK_PLUS_ASSIGN
,
469 '-','=', 0, TOK_MINUS_ASSIGN
,
470 '-','-', 0, TOK_POST_DEC
,
471 '^','=', 0, TOK_XOR_ASSIGN
,
472 '+','+', 0, TOK_POST_INC
,
473 '*','*', 0, TOK_EXPONENT
,
489 '?', 0, TOK_CONDITIONAL
,
490 ':', 0, TOK_CONDITIONAL_SEP
,
495 #define ptr_to_rparen (&op_tokens[sizeof(op_tokens)-7])
497 const char* FAST_FUNC
498 endofname(const char *name
)
503 if (!is_in_name(*name
))
509 static arith_t FAST_FUNC
510 evaluate_string(arith_state_t
*math_state
, const char *expr
)
514 const char *start_expr
= expr
= skip_whitespace(expr
);
515 unsigned expr_len
= strlen(expr
) + 2;
516 /* Stack of integers */
517 /* The proof that there can be no more than strlen(startbuf)/2+1
518 * integers in any given correct or incorrect expression
519 * is left as an exercise to the reader. */
520 var_or_num_t
*const numstack
= alloca((expr_len
/ 2) * sizeof(numstack
[0]));
521 var_or_num_t
*numstackptr
= numstack
;
522 /* Stack of operator tokens */
523 operator *const stack
= alloca(expr_len
* sizeof(stack
[0]));
524 operator *stackptr
= stack
;
526 /* Start with a left paren */
527 *stackptr
++ = lasttok
= TOK_LPAREN
;
536 expr
= skip_whitespace(expr
);
538 if (arithval
== '\0') {
539 if (expr
== start_expr
) {
540 /* Null expression */
545 /* This is only reached after all tokens have been extracted from the
546 * input stream. If there are still tokens on the operator stack, they
547 * are to be applied in order. At the end, there should be a final
548 * result on the integer stack */
550 if (expr
!= ptr_to_rparen
+ 1) {
551 /* If we haven't done so already,
552 * append a closing right paren
553 * and let the loop process it */
554 expr
= ptr_to_rparen
;
557 /* At this point, we're done with the expression */
558 if (numstackptr
!= numstack
+ 1) {
559 /* ...but if there isn't, it's bad */
563 /* expression is $((var)) only, lookup now */
564 errmsg
= arith_lookup_val(math_state
, numstack
);
572 size_t var_name_size
= (p
-expr
) + 1; /* +1 for NUL */
573 numstackptr
->var
= alloca(var_name_size
);
574 safe_strncpy(numstackptr
->var
, expr
, var_name_size
);
577 numstackptr
->second_val_present
= 0;
583 if (isdigit(arithval
)) {
585 numstackptr
->var
= NULL
;
587 numstackptr
->val
= strto_arith_t(expr
, (char**) &expr
, 0);
589 numstackptr
->val
= 0; /* bash compat */
593 /* Should be an operator */
596 // TODO: bash allows 7+++v, treats it as 7 + ++v
597 // we treat it as 7++ + v and reject
598 /* Compare expr to current op_tokens[] element */
599 const char *e
= expr
;
602 /* Match: operator is found */
611 /* No match, go to next element of op_tokens[] */
614 p
+= 2; /* skip NUL and TOK_foo bytes */
616 /* No next element, operator not found */
617 //math_state->syntax_error_at = expr;
622 op
= p
[1]; /* fetch TOK_foo value */
623 /* NB: expr now points past the operator */
625 /* post grammar: a++ reduce to num */
626 if (lasttok
== TOK_POST_INC
|| lasttok
== TOK_POST_DEC
)
629 /* Plus and minus are binary (not unary) _only_ if the last
630 * token was a number, or a right paren (which pretends to be
631 * a number, since it evaluates to one). Think about it.
633 if (lasttok
!= TOK_NUM
) {
649 /* We don't want an unary operator to cause recursive descent on the
650 * stack, because there can be many in a row and it could cause an
651 * operator to be evaluated before its argument is pushed onto the
653 * But for binary operators, "apply" everything on the operator
654 * stack until we find an operator with a lesser priority than the
655 * one we have just extracted. If op is right-associative,
656 * then stop "applying" on the equal priority too.
657 * Left paren is given the lowest priority so it will never be
658 * "applied" in this way.
661 if ((prec
> 0 && prec
< UNARYPREC
) || prec
== SPEC_PREC
) {
662 /* not left paren or unary */
663 if (lasttok
!= TOK_NUM
) {
664 /* binary op must be preceded by a num */
667 while (stackptr
!= stack
) {
668 operator prev_op
= *--stackptr
;
669 if (op
== TOK_RPAREN
) {
670 /* The algorithm employed here is simple: while we don't
671 * hit an open paren nor the bottom of the stack, pop
672 * tokens and apply them */
673 if (prev_op
== TOK_LPAREN
) {
674 /* Any operator directly after a
675 * close paren should consider itself binary */
680 operator prev_prec
= PREC(prev_op
);
681 fix_assignment_prec(prec
);
682 fix_assignment_prec(prev_prec
);
684 || (prev_prec
== prec
&& is_right_associative(prec
))
690 errmsg
= arith_apply(math_state
, prev_op
, numstack
, &numstackptr
);
692 goto err_with_custom_msg
;
694 if (op
== TOK_RPAREN
)
698 /* Push this operator to the stack and remember it */
699 *stackptr
++ = lasttok
= op
;
704 errmsg
= "arithmetic syntax error";
708 math_state
->errmsg
= errmsg
;
709 return numstack
->val
;
713 arith(arith_state_t
*math_state
, const char *expr
)
715 math_state
->errmsg
= NULL
;
716 math_state
->list_of_recursed_names
= NULL
;
717 return evaluate_string(math_state
, expr
);
721 * Copyright (c) 1989, 1991, 1993, 1994
722 * The Regents of the University of California. All rights reserved.
724 * This code is derived from software contributed to Berkeley by
727 * Redistribution and use in source and binary forms, with or without
728 * modification, are permitted provided that the following conditions
730 * 1. Redistributions of source code must retain the above copyright
731 * notice, this list of conditions and the following disclaimer.
732 * 2. Redistributions in binary form must reproduce the above copyright
733 * notice, this list of conditions and the following disclaimer in the
734 * documentation and/or other materials provided with the distribution.
735 * 3. Neither the name of the University nor the names of its contributors
736 * may be used to endorse or promote products derived from this software
737 * without specific prior written permission.
739 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
740 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
741 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
742 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
743 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
744 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
745 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
746 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
747 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
748 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF