[PATCH] parser.c cleanup
[smatch.git] / expression.c
blobccb1ee000a7c23c4551d996251e7ac43924e5b33
1 /*
2 * sparse/expression.c
4 * Copyright (C) 2003 Transmeta Corp.
5 * 2003 Linus Torvalds
7 * Licensed under the Open Software License version 1.1
9 * This is the expression parsing part of parsing C.
11 #define _ISOC99_SOURCE
12 #include <stdarg.h>
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <ctype.h>
17 #include <unistd.h>
18 #include <fcntl.h>
19 #include <errno.h>
20 #include <limits.h>
22 #include "lib.h"
23 #include "token.h"
24 #include "parse.h"
25 #include "symbol.h"
26 #include "scope.h"
27 #include "expression.h"
28 #include "target.h"
30 static int match_oplist(int op, ...)
32 va_list args;
34 va_start(args, op);
35 for (;;) {
36 int nextop = va_arg(args, int);
37 if (!nextop)
38 return 0;
39 if (op == nextop)
40 return 1;
44 static struct token *comma_expression(struct token *, struct expression **);
46 struct token *parens_expression(struct token *token, struct expression **expr, const char *where)
48 token = expect(token, '(', where);
49 if (match_op(token, '{')) {
50 struct expression *e = alloc_expression(token->pos, EXPR_STATEMENT);
51 struct statement *stmt = alloc_statement(token->pos, STMT_COMPOUND);
52 *expr = e;
53 e->statement = stmt;
54 start_symbol_scope();
55 token = compound_statement(token->next, stmt);
56 end_symbol_scope();
57 token = expect(token, '}', "at end of statement expression");
58 } else
59 token = parse_expression(token, expr);
60 return expect(token, ')', where);
63 static struct token *string_expression(struct token *token, struct expression *expr)
65 struct string *string = token->string;
66 struct token *next = token->next;
68 if (token_type(next) == TOKEN_STRING) {
69 int totlen = string->length-1;
70 char *data;
72 do {
73 totlen += next->string->length-1;
74 next = next->next;
75 } while (token_type(next) == TOKEN_STRING);
77 if (totlen > MAX_STRING) {
78 warn(token->pos, "trying to concatenate %d-character string (%d bytes max)", totlen, MAX_STRING);
79 totlen = MAX_STRING;
82 string = __alloc_string(totlen+1);
83 string->length = totlen+1;
84 data = string->data;
85 next = token;
86 do {
87 struct string *s = next->string;
88 int len = s->length-1;
90 if (len > totlen)
91 len = totlen;
92 totlen -= len;
94 next = next->next;
95 memcpy(data, s->data, len);
96 data += len;
97 } while (token_type(next) == TOKEN_STRING);
98 *data = '\0';
100 expr->string = string;
101 return next;
104 #ifndef ULLONG_MAX
105 #define ULLONG_MAX (~0ULL)
106 #endif
108 static void get_number_value(struct expression *expr, struct token *token)
110 const char *str = token->number;
111 unsigned long long value;
112 char *end;
113 unsigned long modifiers = 0;
114 int overflow = 0, do_warn = 0;
115 int try_unsigned = 1;
116 int bits;
118 errno = 0;
119 value = strtoull(str, &end, 0);
120 if (end == str)
121 goto Float;
122 if (value == ULLONG_MAX && errno == ERANGE)
123 overflow = 1;
124 while (1) {
125 unsigned long added;
126 char c = *end++;
127 if (!c) {
128 break;
129 } else if (c == 'u' || c == 'U') {
130 added = MOD_UNSIGNED;
131 } else if (c == 'l' || c == 'L') {
132 added = MOD_LONG;
133 if (*end == c) {
134 added |= MOD_LONGLONG;
135 end++;
137 } else
138 goto Float;
139 if (modifiers & added)
140 goto Enoint;
141 modifiers |= added;
143 if (overflow)
144 goto Eoverflow;
145 /* OK, it's a valid integer */
146 /* decimals can be unsigned only if directly specified as such */
147 if (str[0] != '0' && !(modifiers & MOD_UNSIGNED))
148 try_unsigned = 0;
149 if (!(modifiers & MOD_LONG)) {
150 bits = bits_in_int - 1;
151 if (!(value & (~1ULL << bits))) {
152 if (!(value & (1ULL << bits))) {
153 goto got_it;
154 } else if (try_unsigned) {
155 modifiers |= MOD_UNSIGNED;
156 goto got_it;
159 modifiers |= MOD_LONG;
160 do_warn = 1;
162 if (!(modifiers & MOD_LONGLONG)) {
163 bits = bits_in_long - 1;
164 if (!(value & (~1ULL << bits))) {
165 if (!(value & (1ULL << bits))) {
166 goto got_it;
167 } else if (try_unsigned) {
168 modifiers |= MOD_UNSIGNED;
169 goto got_it;
171 do_warn |= 2;
173 modifiers |= MOD_LONGLONG;
174 do_warn |= 1;
176 bits = bits_in_longlong - 1;
177 if (value & (~1ULL << bits))
178 goto Eoverflow;
179 if (!(value & (1ULL << bits)))
180 goto got_it;
181 if (!try_unsigned)
182 warn(expr->pos, "decimal constant %s is too big for long long",
183 show_token(token));
184 modifiers |= MOD_UNSIGNED;
185 got_it:
186 if (do_warn)
187 warn(expr->pos, "constant %s is so big it is%s%s%s",
188 show_token(token),
189 (modifiers & MOD_UNSIGNED) ? " unsigned":"",
190 (modifiers & MOD_LONG) ? " long":"",
191 (modifiers & MOD_LONGLONG) ? " long":"");
192 if (do_warn & 2)
193 warn(expr->pos,
194 "decimal constant %s is between LONG_MAX and ULONG_MAX."
195 " For C99 that means long long, C90 compilers are very "
196 "likely to produce unsigned long (and a warning) here",
197 show_token(token));
198 expr->type = EXPR_VALUE;
199 expr->ctype = ctype_integer(modifiers);
200 expr->value = value;
201 return;
202 Eoverflow:
203 error(expr->pos, "constant %s is too big even for unsigned long long",
204 show_token(token));
205 return;
206 Float:
207 expr->fvalue = strtold(str, &end);
208 if (str == end)
209 goto Enoint;
211 if (*end && end[1])
212 goto Enoint;
214 if (*end == 'f' || *end == 'F')
215 expr->ctype = &float_ctype;
216 else if (*end == 'l' || *end == 'L')
217 expr->ctype = &ldouble_ctype;
218 else if (!*end)
219 expr->ctype = &double_ctype;
220 else
221 goto Enoint;
223 expr->type = EXPR_FVALUE;
224 return;
226 Enoint:
227 error(expr->pos, "constant %s is not a valid number", show_token(token));
230 struct token *primary_expression(struct token *token, struct expression **tree)
232 struct expression *expr = NULL;
234 switch (token_type(token)) {
235 case TOKEN_CHAR:
236 expr = alloc_expression(token->pos, EXPR_VALUE);
237 expr->ctype = &int_ctype;
238 expr->value = (unsigned char) token->character;
239 token = token->next;
240 break;
242 case TOKEN_NUMBER:
243 expr = alloc_expression(token->pos, EXPR_VALUE);
244 get_number_value(expr, token);
245 token = token->next;
246 break;
248 case TOKEN_IDENT: {
249 struct symbol *sym = lookup_symbol(token->ident, NS_SYMBOL | NS_TYPEDEF);
250 struct token *next = token->next;
252 expr = alloc_expression(token->pos, EXPR_SYMBOL);
255 * We support types as real first-class citizens, with type
256 * comparisons etc:
258 * if (typeof(a) == int) ..
260 if (sym && sym->namespace == NS_TYPEDEF) {
261 warn(token->pos, "typename in expression");
262 sym = NULL;
264 expr->symbol_name = token->ident;
265 expr->symbol = sym;
266 token = next;
267 break;
270 case TOKEN_STRING: {
271 expr = alloc_expression(token->pos, EXPR_STRING);
272 token = string_expression(token, expr);
273 break;
276 case TOKEN_SPECIAL:
277 if (token->special == '(') {
278 expr = alloc_expression(token->pos, EXPR_PREOP);
279 expr->op = '(';
280 token = parens_expression(token, &expr->unop, "in expression");
281 break;
283 if (token->special == '[' && lookup_type(token->next)) {
284 expr = alloc_expression(token->pos, EXPR_TYPE);
285 token = typename(token->next, &expr->symbol);
286 token = expect(token, ']', "in type expression");
287 break;
290 default:
293 *tree = expr;
294 return token;
297 static struct token *expression_list(struct token *token, struct expression_list **list)
299 while (!match_op(token, ')')) {
300 struct expression *expr = NULL;
301 token = assignment_expression(token, &expr);
302 if (!expr)
303 break;
304 add_expression(list, expr);
305 if (!match_op(token, ','))
306 break;
307 token = token->next;
309 return token;
313 * extend to deal with the ambiguous C grammar for parsing
314 * a cast expressions followed by an initializer.
316 static struct token *postfix_expression(struct token *token, struct expression **tree, struct expression *cast_init_expr)
318 struct expression *expr = cast_init_expr;
320 if (!expr)
321 token = primary_expression(token, &expr);
323 while (expr && token_type(token) == TOKEN_SPECIAL) {
324 switch (token->special) {
325 case '[': { /* Array dereference */
326 struct expression *deref = alloc_expression(token->pos, EXPR_PREOP);
327 struct expression *add = alloc_expression(token->pos, EXPR_BINOP);
329 deref->op = '*';
330 deref->unop = add;
332 add->op = '+';
333 add->left = expr;
334 token = parse_expression(token->next, &add->right);
335 token = expect(token, ']', "at end of array dereference");
336 expr = deref;
337 continue;
339 case SPECIAL_INCREMENT: /* Post-increment */
340 case SPECIAL_DECREMENT: { /* Post-decrement */
341 struct expression *post = alloc_expression(token->pos, EXPR_POSTOP);
342 post->op = token->special;
343 post->unop = expr;
344 expr = post;
345 token = token->next;
346 continue;
348 case SPECIAL_DEREFERENCE: { /* Structure pointer member dereference */
349 /* "x->y" is just shorthand for "(*x).y" */
350 struct expression *inner = alloc_expression(token->pos, EXPR_PREOP);
351 inner->op = '*';
352 inner->unop = expr;
353 expr = inner;
355 /* Fallthrough!! */
356 case '.': { /* Structure member dereference */
357 struct expression *deref = alloc_expression(token->pos, EXPR_DEREF);
358 deref->op = '.';
359 deref->deref = expr;
360 token = token->next;
361 if (token_type(token) != TOKEN_IDENT) {
362 warn(token->pos, "Expected member name");
363 break;
365 deref->member = token->ident;
366 token = token->next;
367 expr = deref;
368 continue;
371 case '(': { /* Function call */
372 struct expression *call = alloc_expression(token->pos, EXPR_CALL);
373 call->op = '(';
374 call->fn = expr;
375 token = expression_list(token->next, &call->args);
376 token = expect(token, ')', "in function call");
377 expr = call;
378 continue;
381 default:
382 break;
384 break;
386 *tree = expr;
387 return token;
390 static struct token *cast_expression(struct token *token, struct expression **tree);
391 static struct token *unary_expression(struct token *token, struct expression **tree)
393 if (token_type(token) == TOKEN_IDENT) {
394 if (token->ident == &sizeof_ident) {
395 struct expression *sizeof_ex
396 = alloc_expression(token->pos, EXPR_SIZEOF);
397 *tree = sizeof_ex;
398 tree = &sizeof_ex->unop;
399 token = token->next;
400 if (!match_op(token, '(') || !lookup_type(token->next))
401 return unary_expression(token, &sizeof_ex->cast_expression);
402 token = typename(token->next, &sizeof_ex->cast_type);
404 if (!match_op(token, ')'))
405 return expect(token, ')', "at end of sizeof type-name");
407 token = token->next;
409 * C99 ambiguity: the typename might have been the beginning
410 * of a typed initializer expression..
412 if (match_op(token, '{'))
413 token = initializer(&sizeof_ex->cast_expression, token);
414 return token;
415 } else if (token->ident == &__alignof___ident) {
416 struct expression *alignof_ex
417 = alloc_expression(token->pos, EXPR_ALIGNOF);
418 *tree = alignof_ex;
419 tree = &alignof_ex->unop;
420 token = token->next;
421 if (!match_op(token, '(') || !lookup_type(token->next))
422 return unary_expression(token, &alignof_ex->cast_expression);
423 token = typename(token->next, &alignof_ex->cast_type);
424 return expect(token, ')', "at end of alignof type-name");
428 if (token_type(token) == TOKEN_SPECIAL) {
429 if (match_oplist(token->special,
430 SPECIAL_INCREMENT, SPECIAL_DECREMENT,
431 '&', '*', '+', '-', '~', '!', 0)) {
432 struct expression *unop;
433 struct expression *unary;
434 struct token *next;
436 next = cast_expression(token->next, &unop);
437 if (!unop) {
438 warn(token->pos, "Syntax error in unary expression");
439 return next;
441 unary = alloc_expression(token->pos, EXPR_PREOP);
442 unary->op = token->special;
443 unary->unop = unop;
444 *tree = unary;
445 return next;
448 /* Gcc extension: &&label gives the address of a label */
449 if (match_op(token, SPECIAL_LOGICAL_AND) &&
450 token_type(token->next) == TOKEN_IDENT) {
451 struct expression *label = alloc_expression(token->pos, EXPR_LABEL);
452 struct symbol *sym = label_symbol(token->next);
453 if (!(sym->ctype.modifiers & MOD_ADDRESSABLE)) {
454 sym->ctype.modifiers |= MOD_ADDRESSABLE;
455 add_symbol(&function_computed_target_list, sym);
457 label->label_symbol = sym;
458 *tree = label;
459 return token->next->next;
464 return postfix_expression(token, tree, NULL);
468 * Ambiguity: a '(' can be either a cast-expression or
469 * a primary-expression depending on whether it is followed
470 * by a type or not.
472 * additional ambiguity: a "cast expression" followed by
473 * an initializer is really a postfix-expression.
475 static struct token *cast_expression(struct token *token, struct expression **tree)
477 if (match_op(token, '(')) {
478 struct token *next = token->next;
479 if (lookup_type(next)) {
480 struct expression *cast = alloc_expression(next->pos, EXPR_CAST);
481 struct symbol *sym;
483 token = typename(next, &sym);
484 cast->cast_type = sym;
485 token = expect(token, ')', "at end of cast operator");
486 if (match_op(token, '{')) {
487 token = initializer(&cast->cast_expression, token);
488 return postfix_expression(token, tree, cast);
490 *tree = cast;
491 token = cast_expression(token, &cast->cast_expression);
492 return token;
495 return unary_expression(token, tree);
499 * Generic left-to-right binop parsing
501 * This _really_ needs to be inlined, because that makes the inner
502 * function call statically deterministic rather than a totally
503 * unpredictable indirect call. But gcc-3 is so "clever" that it
504 * doesn't do so by default even when you tell it to inline it.
506 * Making it a macro avoids the inlining problem, and also means
507 * that we can pass in the op-comparison as an expression rather
508 * than create a data structure for it.
511 #define LR_BINOP_EXPRESSION(token, tree, type, inner, compare) \
512 struct expression *left = NULL; \
513 struct token * next = inner(token, &left); \
515 if (left) { \
516 while (token_type(next) == TOKEN_SPECIAL) { \
517 struct expression *top, *right = NULL; \
518 int op = next->special; \
520 if (!(compare)) \
521 goto out; \
522 top = alloc_expression(next->pos, type); \
523 next = inner(next->next, &right); \
524 if (!right) { \
525 warn(next->pos, "No right hand side of '%s'-expression", show_special(op)); \
526 break; \
528 top->op = op; \
529 top->left = left; \
530 top->right = right; \
531 left = top; \
534 out: \
535 *tree = left; \
536 return next; \
539 static struct token *multiplicative_expression(struct token *token, struct expression **tree)
541 LR_BINOP_EXPRESSION(
542 token, tree, EXPR_BINOP, cast_expression,
543 (op == '*') || (op == '/') || (op == '%')
547 static struct token *additive_expression(struct token *token, struct expression **tree)
549 LR_BINOP_EXPRESSION(
550 token, tree, EXPR_BINOP, multiplicative_expression,
551 (op == '+') || (op == '-')
555 static struct token *shift_expression(struct token *token, struct expression **tree)
557 LR_BINOP_EXPRESSION(
558 token, tree, EXPR_BINOP, additive_expression,
559 (op == SPECIAL_LEFTSHIFT) || (op == SPECIAL_RIGHTSHIFT)
563 static struct token *relational_expression(struct token *token, struct expression **tree)
565 LR_BINOP_EXPRESSION(
566 token, tree, EXPR_COMPARE, shift_expression,
567 (op == '<') || (op == '>') ||
568 (op == SPECIAL_LTE) || (op == SPECIAL_GTE)
572 static struct token *equality_expression(struct token *token, struct expression **tree)
574 LR_BINOP_EXPRESSION(
575 token, tree, EXPR_COMPARE, relational_expression,
576 (op == SPECIAL_EQUAL) || (op == SPECIAL_NOTEQUAL)
580 static struct token *bitwise_and_expression(struct token *token, struct expression **tree)
582 LR_BINOP_EXPRESSION(
583 token, tree, EXPR_BINOP, equality_expression,
584 (op == '&')
588 static struct token *bitwise_xor_expression(struct token *token, struct expression **tree)
590 LR_BINOP_EXPRESSION(
591 token, tree, EXPR_BINOP, bitwise_and_expression,
592 (op == '^')
596 static struct token *bitwise_or_expression(struct token *token, struct expression **tree)
598 LR_BINOP_EXPRESSION(
599 token, tree, EXPR_BINOP, bitwise_xor_expression,
600 (op == '|')
604 static struct token *logical_and_expression(struct token *token, struct expression **tree)
606 LR_BINOP_EXPRESSION(
607 token, tree, EXPR_LOGICAL, bitwise_or_expression,
608 (op == SPECIAL_LOGICAL_AND)
612 static struct token *logical_or_expression(struct token *token, struct expression **tree)
614 LR_BINOP_EXPRESSION(
615 token, tree, EXPR_LOGICAL, logical_and_expression,
616 (op == SPECIAL_LOGICAL_OR)
620 struct token *conditional_expression(struct token *token, struct expression **tree)
622 token = logical_or_expression(token, tree);
623 if (match_op(token, '?')) {
624 struct expression *expr = alloc_expression(token->pos, EXPR_CONDITIONAL);
625 expr->op = token->special;
626 expr->left = *tree;
627 *tree = expr;
628 token = parse_expression(token->next, &expr->cond_true);
629 token = expect(token, ':', "in conditional expression");
630 token = conditional_expression(token, &expr->cond_false);
632 return token;
635 struct token *assignment_expression(struct token *token, struct expression **tree)
637 token = conditional_expression(token, tree);
638 if (token_type(token) == TOKEN_SPECIAL) {
639 static const int assignments[] = {
640 '=',
641 SPECIAL_ADD_ASSIGN, SPECIAL_SUB_ASSIGN,
642 SPECIAL_MUL_ASSIGN, SPECIAL_DIV_ASSIGN,
643 SPECIAL_MOD_ASSIGN, SPECIAL_SHL_ASSIGN,
644 SPECIAL_SHR_ASSIGN, SPECIAL_AND_ASSIGN,
645 SPECIAL_OR_ASSIGN, SPECIAL_XOR_ASSIGN };
646 int i, op = token->special;
647 for (i = 0; i < sizeof(assignments)/sizeof(int); i++)
648 if (assignments[i] == op) {
649 struct expression * expr = alloc_expression(token->pos, EXPR_ASSIGNMENT);
650 expr->left = *tree;
651 expr->op = op;
652 *tree = expr;
653 return assignment_expression(token->next, &expr->right);
656 return token;
659 static struct token *comma_expression(struct token *token, struct expression **tree)
661 LR_BINOP_EXPRESSION(
662 token, tree, EXPR_COMMA, assignment_expression,
663 (op == ',')
667 struct token *parse_expression(struct token *token, struct expression **tree)
669 return comma_expression(token,tree);