Add support for __builtin_strpbrk()
[smatch.git] / expression.c
blobb1ffa6c49dda5440b1f281173b522323dc40be4d
1 /*
2 * sparse/expression.c
4 * Copyright (C) 2003 Transmeta Corp.
5 * 2003-2004 Linus Torvalds
7 * Licensed under the Open Software License version 1.1
9 * This is the expression parsing part of parsing C.
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <ctype.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <errno.h>
19 #include <limits.h>
21 #include "lib.h"
22 #include "allocate.h"
23 #include "allocate.h"
24 #include "token.h"
25 #include "parse.h"
26 #include "symbol.h"
27 #include "scope.h"
28 #include "expression.h"
29 #include "target.h"
31 static int match_oplist(int op, ...)
33 va_list args;
35 va_start(args, op);
36 for (;;) {
37 int nextop = va_arg(args, int);
38 if (!nextop)
39 return 0;
40 if (op == nextop)
41 return 1;
45 static struct token *comma_expression(struct token *, struct expression **);
47 struct token *parens_expression(struct token *token, struct expression **expr, const char *where)
49 token = expect(token, '(', where);
50 if (match_op(token, '{')) {
51 struct expression *e = alloc_expression(token->pos, EXPR_STATEMENT);
52 struct statement *stmt = alloc_statement(token->pos, STMT_COMPOUND);
53 *expr = e;
54 e->statement = stmt;
55 start_symbol_scope();
56 token = compound_statement(token->next, stmt);
57 end_symbol_scope();
58 token = expect(token, '}', "at end of statement expression");
59 } else
60 token = parse_expression(token, expr);
61 return expect(token, ')', where);
65 * Handle __func__, __FUNCTION__ and __PRETTY_FUNCTION__ token
66 * conversion
68 static int convert_one_fn_token(struct token *token)
70 struct symbol *sym = current_fn;
72 if (sym) {
73 struct ident *ident = sym->ident;
74 if (ident) {
75 int len = ident->len;
76 struct string *string;
78 string = __alloc_string(len+1);
79 memcpy(string->data, ident->name, len);
80 string->data[len] = 0;
81 string->length = len+1;
82 token_type(token) = TOKEN_STRING;
83 token->string = string;
84 return 1;
87 return 0;
90 static int convert_function(struct token *next)
92 int retval = 0;
93 for (;;) {
94 struct token *token = next;
95 next = next->next;
96 switch (token_type(token)) {
97 case TOKEN_STRING:
98 continue;
99 case TOKEN_IDENT:
100 if (token->ident == &__func___ident ||
101 token->ident == &__FUNCTION___ident ||
102 token->ident == &__PRETTY_FUNCTION___ident) {
103 if (!convert_one_fn_token(token))
104 break;
105 retval = 1;
106 continue;
108 /* Fall through */
109 default:
110 break;
112 break;
114 return retval;
117 static struct token *parse_type(struct token *token, struct expression **tree)
119 struct symbol *sym;
120 *tree = alloc_expression(token->pos, EXPR_TYPE);
121 token = typename(token, &sym);
122 if (sym->ident)
123 sparse_error(token->pos,
124 "type expression should not include identifier "
125 "\"%s\"", sym->ident->name);
126 (*tree)->symbol = sym;
127 return token;
130 static struct token *builtin_types_compatible_p_expr(struct token *token,
131 struct expression **tree)
133 struct expression *expr = alloc_expression(
134 token->pos, EXPR_COMPARE);
135 expr->op = SPECIAL_EQUAL;
136 token = token->next;
137 if (!match_op(token, '('))
138 return expect(token, '(',
139 "after __builtin_types_compatible_p");
140 token = token->next;
141 token = parse_type(token, &expr->left);
142 if (!match_op(token, ','))
143 return expect(token, ',',
144 "in __builtin_types_compatible_p");
145 token = token->next;
146 token = parse_type(token, &expr->right);
147 if (!match_op(token, ')'))
148 return expect(token, ')',
149 "at end of __builtin_types_compatible_p");
150 token = token->next;
152 *tree = expr;
153 return token;
156 static struct token *string_expression(struct token *token, struct expression *expr)
158 struct string *string = token->string;
159 struct token *next = token->next;
161 convert_function(token);
163 if (token_type(next) == TOKEN_STRING) {
164 int totlen = string->length-1;
165 char *data;
167 do {
168 totlen += next->string->length-1;
169 next = next->next;
170 } while (token_type(next) == TOKEN_STRING);
172 if (totlen > MAX_STRING) {
173 warning(token->pos, "trying to concatenate %d-character string (%d bytes max)", totlen, MAX_STRING);
174 totlen = MAX_STRING;
177 string = __alloc_string(totlen+1);
178 string->length = totlen+1;
179 data = string->data;
180 next = token;
181 do {
182 struct string *s = next->string;
183 int len = s->length-1;
185 if (len > totlen)
186 len = totlen;
187 totlen -= len;
189 next = next->next;
190 memcpy(data, s->data, len);
191 data += len;
192 } while (token_type(next) == TOKEN_STRING);
193 *data = '\0';
195 expr->string = string;
196 return next;
199 #ifndef ULLONG_MAX
200 #define ULLONG_MAX (~0ULL)
201 #endif
203 static void get_number_value(struct expression *expr, struct token *token)
205 const char *str = token->number;
206 unsigned long long value;
207 char *end;
208 unsigned long modifiers = 0;
209 int overflow = 0, do_warn = 0;
210 int try_unsigned = 1;
211 int bits;
213 errno = 0;
214 value = strtoull(str, &end, 0);
215 if (end == str)
216 goto Float;
217 if (value == ULLONG_MAX && errno == ERANGE)
218 overflow = 1;
219 while (1) {
220 unsigned long added;
221 char c = *end++;
222 if (!c) {
223 break;
224 } else if (c == 'u' || c == 'U') {
225 added = MOD_UNSIGNED;
226 } else if (c == 'l' || c == 'L') {
227 added = MOD_LONG;
228 if (*end == c) {
229 added |= MOD_LONGLONG;
230 end++;
232 } else
233 goto Float;
234 if (modifiers & added)
235 goto Enoint;
236 modifiers |= added;
238 if (overflow)
239 goto Eoverflow;
240 /* OK, it's a valid integer */
241 /* decimals can be unsigned only if directly specified as such */
242 if (str[0] != '0' && !(modifiers & MOD_UNSIGNED))
243 try_unsigned = 0;
244 if (!(modifiers & MOD_LONG)) {
245 bits = bits_in_int - 1;
246 if (!(value & (~1ULL << bits))) {
247 if (!(value & (1ULL << bits))) {
248 goto got_it;
249 } else if (try_unsigned) {
250 modifiers |= MOD_UNSIGNED;
251 goto got_it;
254 modifiers |= MOD_LONG;
255 do_warn = 1;
257 if (!(modifiers & MOD_LONGLONG)) {
258 bits = bits_in_long - 1;
259 if (!(value & (~1ULL << bits))) {
260 if (!(value & (1ULL << bits))) {
261 goto got_it;
262 } else if (try_unsigned) {
263 modifiers |= MOD_UNSIGNED;
264 goto got_it;
266 do_warn |= 2;
268 modifiers |= MOD_LONGLONG;
269 do_warn |= 1;
271 bits = bits_in_longlong - 1;
272 if (value & (~1ULL << bits))
273 goto Eoverflow;
274 if (!(value & (1ULL << bits)))
275 goto got_it;
276 if (!try_unsigned)
277 warning(expr->pos, "decimal constant %s is too big for long long",
278 show_token(token));
279 modifiers |= MOD_UNSIGNED;
280 got_it:
281 if (do_warn)
282 warning(expr->pos, "constant %s is so big it is%s%s%s",
283 show_token(token),
284 (modifiers & MOD_UNSIGNED) ? " unsigned":"",
285 (modifiers & MOD_LONG) ? " long":"",
286 (modifiers & MOD_LONGLONG) ? " long":"");
287 if (do_warn & 2)
288 warning(expr->pos,
289 "decimal constant %s is between LONG_MAX and ULONG_MAX."
290 " For C99 that means long long, C90 compilers are very "
291 "likely to produce unsigned long (and a warning) here",
292 show_token(token));
293 expr->type = EXPR_VALUE;
294 expr->ctype = ctype_integer(modifiers);
295 expr->value = value;
296 return;
297 Eoverflow:
298 error_die(expr->pos, "constant %s is too big even for unsigned long long",
299 show_token(token));
300 return;
301 Float:
302 expr->fvalue = string_to_ld(str, &end);
303 if (str == end)
304 goto Enoint;
306 if (*end && end[1])
307 goto Enoint;
309 if (*end == 'f' || *end == 'F')
310 expr->ctype = &float_ctype;
311 else if (*end == 'l' || *end == 'L')
312 expr->ctype = &ldouble_ctype;
313 else if (!*end)
314 expr->ctype = &double_ctype;
315 else
316 goto Enoint;
318 expr->type = EXPR_FVALUE;
319 return;
321 Enoint:
322 error_die(expr->pos, "constant %s is not a valid number", show_token(token));
325 struct token *primary_expression(struct token *token, struct expression **tree)
327 struct expression *expr = NULL;
329 switch (token_type(token)) {
330 case TOKEN_CHAR:
331 expr = alloc_expression(token->pos, EXPR_VALUE);
332 expr->ctype = &int_ctype;
333 expr->value = (unsigned char) token->character;
334 token = token->next;
335 break;
337 case TOKEN_NUMBER:
338 expr = alloc_expression(token->pos, EXPR_VALUE);
339 get_number_value(expr, token);
340 token = token->next;
341 break;
343 case TOKEN_ZERO_IDENT: {
344 expr = alloc_expression(token->pos, EXPR_SYMBOL);
345 expr->ctype = &int_ctype;
346 expr->symbol = &zero_int;
347 expr->symbol_name = token->ident;
348 token = token->next;
349 break;
352 case TOKEN_IDENT: {
353 struct symbol *sym = lookup_symbol(token->ident, NS_SYMBOL | NS_TYPEDEF);
354 struct token *next = token->next;
356 if (!sym) {
357 if (convert_function(token))
358 goto handle_string;
359 if (token->ident == &__builtin_types_compatible_p_ident) {
360 token = builtin_types_compatible_p_expr(token, &expr);
361 break;
365 expr = alloc_expression(token->pos, EXPR_SYMBOL);
368 * We support types as real first-class citizens, with type
369 * comparisons etc:
371 * if (typeof(a) == int) ..
373 if (sym && sym->namespace == NS_TYPEDEF) {
374 sparse_error(token->pos, "typename in expression");
375 sym = NULL;
377 expr->symbol_name = token->ident;
378 expr->symbol = sym;
379 token = next;
380 break;
383 case TOKEN_STRING: {
384 handle_string:
385 expr = alloc_expression(token->pos, EXPR_STRING);
386 token = string_expression(token, expr);
387 break;
390 case TOKEN_SPECIAL:
391 if (token->special == '(') {
392 expr = alloc_expression(token->pos, EXPR_PREOP);
393 expr->op = '(';
394 token = parens_expression(token, &expr->unop, "in expression");
395 break;
397 if (token->special == '[' && lookup_type(token->next)) {
398 expr = alloc_expression(token->pos, EXPR_TYPE);
399 token = typename(token->next, &expr->symbol);
400 token = expect(token, ']', "in type expression");
401 break;
404 default:
407 *tree = expr;
408 return token;
411 static struct token *expression_list(struct token *token, struct expression_list **list)
413 while (!match_op(token, ')')) {
414 struct expression *expr = NULL;
415 token = assignment_expression(token, &expr);
416 if (!expr)
417 break;
418 add_expression(list, expr);
419 if (!match_op(token, ','))
420 break;
421 token = token->next;
423 return token;
427 * extend to deal with the ambiguous C grammar for parsing
428 * a cast expressions followed by an initializer.
430 static struct token *postfix_expression(struct token *token, struct expression **tree, struct expression *cast_init_expr)
432 struct expression *expr = cast_init_expr;
434 if (!expr)
435 token = primary_expression(token, &expr);
437 while (expr && token_type(token) == TOKEN_SPECIAL) {
438 switch (token->special) {
439 case '[': { /* Array dereference */
440 struct expression *deref = alloc_expression(token->pos, EXPR_PREOP);
441 struct expression *add = alloc_expression(token->pos, EXPR_BINOP);
443 deref->op = '*';
444 deref->unop = add;
446 add->op = '+';
447 add->left = expr;
448 token = parse_expression(token->next, &add->right);
449 token = expect(token, ']', "at end of array dereference");
450 expr = deref;
451 continue;
453 case SPECIAL_INCREMENT: /* Post-increment */
454 case SPECIAL_DECREMENT: { /* Post-decrement */
455 struct expression *post = alloc_expression(token->pos, EXPR_POSTOP);
456 post->op = token->special;
457 post->unop = expr;
458 expr = post;
459 token = token->next;
460 continue;
462 case SPECIAL_DEREFERENCE: { /* Structure pointer member dereference */
463 /* "x->y" is just shorthand for "(*x).y" */
464 struct expression *inner = alloc_expression(token->pos, EXPR_PREOP);
465 inner->op = '*';
466 inner->unop = expr;
467 expr = inner;
469 /* Fallthrough!! */
470 case '.': { /* Structure member dereference */
471 struct expression *deref = alloc_expression(token->pos, EXPR_DEREF);
472 deref->op = '.';
473 deref->deref = expr;
474 token = token->next;
475 if (token_type(token) != TOKEN_IDENT) {
476 sparse_error(token->pos, "Expected member name");
477 break;
479 deref->member = token->ident;
480 token = token->next;
481 expr = deref;
482 continue;
485 case '(': { /* Function call */
486 struct expression *call = alloc_expression(token->pos, EXPR_CALL);
487 call->op = '(';
488 call->fn = expr;
489 token = expression_list(token->next, &call->args);
490 token = expect(token, ')', "in function call");
491 expr = call;
492 continue;
495 default:
496 break;
498 break;
500 *tree = expr;
501 return token;
504 static struct token *cast_expression(struct token *token, struct expression **tree);
505 static struct token *unary_expression(struct token *token, struct expression **tree);
507 static struct token *type_info_expression(struct token *token,
508 struct expression **tree, int type)
510 struct expression *expr = alloc_expression(token->pos, type);
512 *tree = expr;
513 token = token->next;
514 if (!match_op(token, '(') || !lookup_type(token->next))
515 return unary_expression(token, &expr->cast_expression);
516 token = typename(token->next, &expr->cast_type);
518 if (!match_op(token, ')')) {
519 static const char * error[] = {
520 [EXPR_SIZEOF] = "at end of sizeof",
521 [EXPR_ALIGNOF] = "at end of __alignof__",
522 [EXPR_PTRSIZEOF] = "at end of __sizeof_ptr__"
524 return expect(token, ')', error[type]);
527 token = token->next;
529 * C99 ambiguity: the typename might have been the beginning
530 * of a typed initializer expression..
532 if (match_op(token, '{'))
533 token = initializer(&expr->cast_expression, token);
534 return token;
537 static struct token *unary_expression(struct token *token, struct expression **tree)
539 if (token_type(token) == TOKEN_IDENT) {
540 struct ident *ident = token->ident;
541 if (ident->reserved) {
542 static const struct {
543 struct ident *id;
544 int type;
545 } type_information[] = {
546 { &sizeof_ident, EXPR_SIZEOF },
547 { &__alignof___ident, EXPR_ALIGNOF },
548 { &__sizeof_ptr___ident, EXPR_PTRSIZEOF },
550 int i;
551 for (i = 0; i < 3; i++) {
552 if (ident == type_information[i].id)
553 return type_info_expression(token, tree, type_information[i].type);
558 if (token_type(token) == TOKEN_SPECIAL) {
559 if (match_oplist(token->special,
560 SPECIAL_INCREMENT, SPECIAL_DECREMENT,
561 '&', '*', '+', '-', '~', '!', 0)) {
562 struct expression *unop;
563 struct expression *unary;
564 struct token *next;
566 next = cast_expression(token->next, &unop);
567 if (!unop) {
568 sparse_error(token->pos, "Syntax error in unary expression");
569 return next;
571 unary = alloc_expression(token->pos, EXPR_PREOP);
572 unary->op = token->special;
573 unary->unop = unop;
574 *tree = unary;
575 return next;
578 /* Gcc extension: &&label gives the address of a label */
579 if (match_op(token, SPECIAL_LOGICAL_AND) &&
580 token_type(token->next) == TOKEN_IDENT) {
581 struct expression *label = alloc_expression(token->pos, EXPR_LABEL);
582 struct symbol *sym = label_symbol(token->next);
583 if (!(sym->ctype.modifiers & MOD_ADDRESSABLE)) {
584 sym->ctype.modifiers |= MOD_ADDRESSABLE;
585 add_symbol(&function_computed_target_list, sym);
587 label->label_symbol = sym;
588 *tree = label;
589 return token->next->next;
594 return postfix_expression(token, tree, NULL);
598 * Ambiguity: a '(' can be either a cast-expression or
599 * a primary-expression depending on whether it is followed
600 * by a type or not.
602 * additional ambiguity: a "cast expression" followed by
603 * an initializer is really a postfix-expression.
605 static struct token *cast_expression(struct token *token, struct expression **tree)
607 if (match_op(token, '(')) {
608 struct token *next = token->next;
609 if (lookup_type(next)) {
610 struct expression *cast = alloc_expression(next->pos, EXPR_CAST);
611 struct symbol *sym;
613 token = typename(next, &sym);
614 cast->cast_type = sym;
615 token = expect(token, ')', "at end of cast operator");
616 if (match_op(token, '{')) {
617 token = initializer(&cast->cast_expression, token);
618 return postfix_expression(token, tree, cast);
620 *tree = cast;
621 token = cast_expression(token, &cast->cast_expression);
622 return token;
625 return unary_expression(token, tree);
629 * Generic left-to-right binop parsing
631 * This _really_ needs to be inlined, because that makes the inner
632 * function call statically deterministic rather than a totally
633 * unpredictable indirect call. But gcc-3 is so "clever" that it
634 * doesn't do so by default even when you tell it to inline it.
636 * Making it a macro avoids the inlining problem, and also means
637 * that we can pass in the op-comparison as an expression rather
638 * than create a data structure for it.
641 #define LR_BINOP_EXPRESSION(token, tree, type, inner, compare) \
642 struct expression *left = NULL; \
643 struct token * next = inner(token, &left); \
645 if (left) { \
646 while (token_type(next) == TOKEN_SPECIAL) { \
647 struct expression *top, *right = NULL; \
648 int op = next->special; \
650 if (!(compare)) \
651 goto out; \
652 top = alloc_expression(next->pos, type); \
653 next = inner(next->next, &right); \
654 if (!right) { \
655 sparse_error(next->pos, "No right hand side of '%s'-expression", show_special(op)); \
656 break; \
658 top->op = op; \
659 top->left = left; \
660 top->right = right; \
661 left = top; \
664 out: \
665 *tree = left; \
666 return next; \
669 static struct token *multiplicative_expression(struct token *token, struct expression **tree)
671 LR_BINOP_EXPRESSION(
672 token, tree, EXPR_BINOP, cast_expression,
673 (op == '*') || (op == '/') || (op == '%')
677 static struct token *additive_expression(struct token *token, struct expression **tree)
679 LR_BINOP_EXPRESSION(
680 token, tree, EXPR_BINOP, multiplicative_expression,
681 (op == '+') || (op == '-')
685 static struct token *shift_expression(struct token *token, struct expression **tree)
687 LR_BINOP_EXPRESSION(
688 token, tree, EXPR_BINOP, additive_expression,
689 (op == SPECIAL_LEFTSHIFT) || (op == SPECIAL_RIGHTSHIFT)
693 static struct token *relational_expression(struct token *token, struct expression **tree)
695 LR_BINOP_EXPRESSION(
696 token, tree, EXPR_COMPARE, shift_expression,
697 (op == '<') || (op == '>') ||
698 (op == SPECIAL_LTE) || (op == SPECIAL_GTE)
702 static struct token *equality_expression(struct token *token, struct expression **tree)
704 LR_BINOP_EXPRESSION(
705 token, tree, EXPR_COMPARE, relational_expression,
706 (op == SPECIAL_EQUAL) || (op == SPECIAL_NOTEQUAL)
710 static struct token *bitwise_and_expression(struct token *token, struct expression **tree)
712 LR_BINOP_EXPRESSION(
713 token, tree, EXPR_BINOP, equality_expression,
714 (op == '&')
718 static struct token *bitwise_xor_expression(struct token *token, struct expression **tree)
720 LR_BINOP_EXPRESSION(
721 token, tree, EXPR_BINOP, bitwise_and_expression,
722 (op == '^')
726 static struct token *bitwise_or_expression(struct token *token, struct expression **tree)
728 LR_BINOP_EXPRESSION(
729 token, tree, EXPR_BINOP, bitwise_xor_expression,
730 (op == '|')
734 static struct token *logical_and_expression(struct token *token, struct expression **tree)
736 LR_BINOP_EXPRESSION(
737 token, tree, EXPR_LOGICAL, bitwise_or_expression,
738 (op == SPECIAL_LOGICAL_AND)
742 static struct token *logical_or_expression(struct token *token, struct expression **tree)
744 LR_BINOP_EXPRESSION(
745 token, tree, EXPR_LOGICAL, logical_and_expression,
746 (op == SPECIAL_LOGICAL_OR)
750 struct token *conditional_expression(struct token *token, struct expression **tree)
752 token = logical_or_expression(token, tree);
753 if (*tree && match_op(token, '?')) {
754 struct expression *expr = alloc_expression(token->pos, EXPR_CONDITIONAL);
755 expr->op = token->special;
756 expr->left = *tree;
757 *tree = expr;
758 token = parse_expression(token->next, &expr->cond_true);
759 token = expect(token, ':', "in conditional expression");
760 token = conditional_expression(token, &expr->cond_false);
762 return token;
765 struct token *assignment_expression(struct token *token, struct expression **tree)
767 token = conditional_expression(token, tree);
768 if (*tree && token_type(token) == TOKEN_SPECIAL) {
769 static const int assignments[] = {
770 '=',
771 SPECIAL_ADD_ASSIGN, SPECIAL_SUB_ASSIGN,
772 SPECIAL_MUL_ASSIGN, SPECIAL_DIV_ASSIGN,
773 SPECIAL_MOD_ASSIGN, SPECIAL_SHL_ASSIGN,
774 SPECIAL_SHR_ASSIGN, SPECIAL_AND_ASSIGN,
775 SPECIAL_OR_ASSIGN, SPECIAL_XOR_ASSIGN };
776 int i, op = token->special;
777 for (i = 0; i < sizeof(assignments)/sizeof(int); i++)
778 if (assignments[i] == op) {
779 struct expression * expr = alloc_expression(token->pos, EXPR_ASSIGNMENT);
780 expr->left = *tree;
781 expr->op = op;
782 *tree = expr;
783 return assignment_expression(token->next, &expr->right);
786 return token;
789 static struct token *comma_expression(struct token *token, struct expression **tree)
791 LR_BINOP_EXPRESSION(
792 token, tree, EXPR_COMMA, assignment_expression,
793 (op == ',')
797 struct token *parse_expression(struct token *token, struct expression **tree)
799 return comma_expression(token,tree);