[PATCH] speed up (and fix corner case in) tokenizer
[smatch.git] / expression.c
blob6a705f951f643a9e4b879074295304a7f7b98c34
1 /*
2 * sparse/expression.c
4 * Copyright (C) 2003 Transmeta Corp.
5 * 2003 Linus Torvalds
7 * Licensed under the Open Software License version 1.1
9 * This is the expression parsing part of parsing C.
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <ctype.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <errno.h>
19 #include <limits.h>
21 #include "lib.h"
22 #include "token.h"
23 #include "parse.h"
24 #include "symbol.h"
25 #include "scope.h"
26 #include "expression.h"
27 #include "target.h"
29 static int match_oplist(int op, ...)
31 va_list args;
33 va_start(args, op);
34 for (;;) {
35 int nextop = va_arg(args, int);
36 if (!nextop)
37 return 0;
38 if (op == nextop)
39 return 1;
43 static struct token *comma_expression(struct token *, struct expression **);
45 struct token *parens_expression(struct token *token, struct expression **expr, const char *where)
47 token = expect(token, '(', where);
48 if (match_op(token, '{')) {
49 struct expression *e = alloc_expression(token->pos, EXPR_STATEMENT);
50 struct statement *stmt = alloc_statement(token->pos, STMT_COMPOUND);
51 *expr = e;
52 e->statement = stmt;
53 start_symbol_scope();
54 token = compound_statement(token->next, stmt);
55 end_symbol_scope();
56 token = expect(token, '}', "at end of statement expression");
57 } else
58 token = parse_expression(token, expr);
59 return expect(token, ')', where);
62 static struct token *string_expression(struct token *token, struct expression *expr)
64 struct string *string = token->string;
65 struct token *next = token->next;
67 if (token_type(next) == TOKEN_STRING) {
68 int totlen = string->length;
69 char *data;
71 do {
72 totlen += next->string->length-1;
73 next = next->next;
74 } while (token_type(next) == TOKEN_STRING);
76 string = __alloc_string(totlen);
77 string->length = totlen;
78 data = string->data;
79 next = token;
80 do {
81 struct string *s = next->string;
82 int len = s->length;
84 next = next->next;
85 memcpy(data, s->data, len);
86 data += len-1;
87 } while (token_type(next) == TOKEN_STRING);
89 expr->string = string;
90 return next;
93 static void get_fp_value(struct expression *expr, struct token *token)
95 static int fp_warned;
97 expr->ctype = &double_ctype;
98 expr->value = 0;
99 if (!fp_warned) {
100 warn(token->pos, "FP values not yet implemented");
101 fp_warned = 1;
105 #ifndef ULLONG_MAX
106 #define ULLONG_MAX (~0ULL)
107 #endif
109 static void get_number_value(struct expression *expr, struct token *token)
111 const char *str = token->number;
112 unsigned long long value;
113 char *end;
114 unsigned long modifiers = 0;
115 int overflow = 0, do_warn = 0;
116 int try_unsigned = 1;
117 int bits;
119 errno = 0;
120 value = strtoull(str, &end, 0);
121 if (end == str)
122 goto Enoint;
123 if (value == ULLONG_MAX && errno == ERANGE)
124 overflow = 1;
125 while (1) {
126 unsigned long added;
127 char c = *end++;
128 if (!c) {
129 break;
130 } else if (c == 'u' || c == 'U') {
131 added = MOD_UNSIGNED;
132 } else if (c == 'l' || c == 'L') {
133 added = MOD_LONG;
134 if (*end == c) {
135 added |= MOD_LONGLONG;
136 end++;
138 } else
139 goto Enoint;
140 if (modifiers & added)
141 goto Enoint;
142 modifiers |= added;
144 if (overflow)
145 goto Eoverflow;
146 /* OK, it's a valid integer */
147 /* decimals can be unsigned only if directly specified as such */
148 if (str[0] != '0' && !(modifiers & MOD_UNSIGNED))
149 try_unsigned = 0;
150 if (!(modifiers & MOD_LONG)) {
151 bits = bits_in_int - 1;
152 if (!(value & (~1ULL << bits))) {
153 if (!(value & (1ULL << bits))) {
154 goto got_it;
155 } else if (try_unsigned) {
156 modifiers |= MOD_UNSIGNED;
157 goto got_it;
160 modifiers |= MOD_LONG;
161 do_warn = 1;
163 if (!(modifiers & MOD_LONGLONG)) {
164 bits = bits_in_long - 1;
165 if (!(value & (~1ULL << bits))) {
166 if (!(value & (1ULL << bits))) {
167 goto got_it;
168 } else if (try_unsigned) {
169 modifiers |= MOD_UNSIGNED;
170 goto got_it;
172 do_warn |= 2;
174 modifiers |= MOD_LONGLONG;
175 do_warn |= 1;
177 bits = bits_in_longlong - 1;
178 if (value & (~1ULL << bits))
179 goto Eoverflow;
180 if (!(value & (1ULL << bits)))
181 goto got_it;
182 if (!try_unsigned)
183 warn(expr->pos, "decimal constant %s is too big for long long",
184 show_token(token));
185 modifiers |= MOD_UNSIGNED;
186 got_it:
187 if (do_warn)
188 warn(expr->pos, "constant %s is so big it is%s%s%s",
189 show_token(token),
190 (modifiers & MOD_UNSIGNED) ? " unsigned":"",
191 (modifiers & MOD_LONG) ? " long":"",
192 (modifiers & MOD_LONGLONG) ? " long":"");
193 if (do_warn & 2)
194 warn(expr->pos,
195 "decimal constant %s is between LONG_MAX and ULONG_MAX."
196 " For C99 that means long long, C90 compilers are very "
197 "likely to produce unsigned long (and a warning) here",
198 show_token(token));
199 expr->type = EXPR_VALUE;
200 expr->ctype = ctype_integer(modifiers);
201 expr->value = value;
202 return;
203 Eoverflow:
204 error(expr->pos, "constant %s is too big even for unsigned long long",
205 show_token(token));
206 Enoint:
207 get_fp_value(expr, token);
210 struct token *primary_expression(struct token *token, struct expression **tree)
212 struct expression *expr = NULL;
214 switch (token_type(token)) {
215 case TOKEN_CHAR:
216 expr = alloc_expression(token->pos, EXPR_VALUE);
217 expr->ctype = &int_ctype;
218 expr->value = (unsigned char) token->character;
219 token = token->next;
220 break;
222 case TOKEN_NUMBER:
223 expr = alloc_expression(token->pos, EXPR_VALUE);
224 get_number_value(expr, token);
225 token = token->next;
226 break;
228 case TOKEN_IDENT: {
229 struct symbol *sym = lookup_symbol(token->ident, NS_SYMBOL | NS_TYPEDEF);
230 struct token *next = token->next;
232 expr = alloc_expression(token->pos, EXPR_SYMBOL);
235 * We support types as real first-class citizens, with type
236 * comparisons etc:
238 * if (typeof(a) == int) ..
240 if (sym && sym->namespace == NS_TYPEDEF) {
241 warn(token->pos, "typename in expression");
242 sym = NULL;
244 expr->symbol_name = token->ident;
245 expr->symbol = sym;
246 token = next;
247 break;
250 case TOKEN_STRING: {
251 expr = alloc_expression(token->pos, EXPR_STRING);
252 token = string_expression(token, expr);
253 break;
256 case TOKEN_SPECIAL:
257 if (token->special == '(') {
258 expr = alloc_expression(token->pos, EXPR_PREOP);
259 expr->op = '(';
260 token = parens_expression(token, &expr->unop, "in expression");
261 break;
263 if (token->special == '[' && lookup_type(token->next)) {
264 expr = alloc_expression(token->pos, EXPR_TYPE);
265 token = typename(token->next, &expr->symbol);
266 token = expect(token, ']', "in type expression");
267 break;
270 default:
273 *tree = expr;
274 return token;
277 static struct token *expression_list(struct token *token, struct expression_list **list)
279 while (!match_op(token, ')')) {
280 struct expression *expr = NULL;
281 token = assignment_expression(token, &expr);
282 if (!expr)
283 break;
284 add_expression(list, expr);
285 if (!match_op(token, ','))
286 break;
287 token = token->next;
289 return token;
293 * extend to deal with the ambiguous C grammar for parsing
294 * a cast expressions followed by an initializer.
296 static struct token *postfix_expression(struct token *token, struct expression **tree, struct expression *cast_init_expr)
298 struct expression *expr = cast_init_expr;
300 if (!expr)
301 token = primary_expression(token, &expr);
303 while (expr && token_type(token) == TOKEN_SPECIAL) {
304 switch (token->special) {
305 case '[': { /* Array dereference */
306 struct expression *deref = alloc_expression(token->pos, EXPR_PREOP);
307 struct expression *add = alloc_expression(token->pos, EXPR_BINOP);
309 deref->op = '*';
310 deref->unop = add;
312 add->op = '+';
313 add->left = expr;
314 token = parse_expression(token->next, &add->right);
315 token = expect(token, ']', "at end of array dereference");
316 expr = deref;
317 continue;
319 case SPECIAL_INCREMENT: /* Post-increment */
320 case SPECIAL_DECREMENT: { /* Post-decrement */
321 struct expression *post = alloc_expression(token->pos, EXPR_POSTOP);
322 post->op = token->special;
323 post->unop = expr;
324 expr = post;
325 token = token->next;
326 continue;
328 case SPECIAL_DEREFERENCE: { /* Structure pointer member dereference */
329 /* "x->y" is just shorthand for "(*x).y" */
330 struct expression *inner = alloc_expression(token->pos, EXPR_PREOP);
331 inner->op = '*';
332 inner->unop = expr;
333 expr = inner;
335 /* Fallthrough!! */
336 case '.': { /* Structure member dereference */
337 struct expression *deref = alloc_expression(token->pos, EXPR_DEREF);
338 deref->op = '.';
339 deref->deref = expr;
340 token = token->next;
341 if (token_type(token) != TOKEN_IDENT) {
342 warn(token->pos, "Expected member name");
343 break;
345 deref->member = token->ident;
346 token = token->next;
347 expr = deref;
348 continue;
351 case '(': { /* Function call */
352 struct expression *call = alloc_expression(token->pos, EXPR_CALL);
353 call->op = '(';
354 call->fn = expr;
355 token = expression_list(token->next, &call->args);
356 token = expect(token, ')', "in function call");
357 expr = call;
358 continue;
361 default:
362 break;
364 break;
366 *tree = expr;
367 return token;
370 static struct token *cast_expression(struct token *token, struct expression **tree);
371 static struct token *unary_expression(struct token *token, struct expression **tree)
373 if (token_type(token) == TOKEN_IDENT) {
374 if (token->ident == &sizeof_ident) {
375 struct expression *sizeof_ex
376 = alloc_expression(token->pos, EXPR_SIZEOF);
377 *tree = sizeof_ex;
378 tree = &sizeof_ex->unop;
379 token = token->next;
380 if (!match_op(token, '(') || !lookup_type(token->next))
381 return unary_expression(token, &sizeof_ex->cast_expression);
382 token = typename(token->next, &sizeof_ex->cast_type);
383 return expect(token, ')', "at end of sizeof type-name");
384 } else if (token->ident == &__alignof___ident) {
385 struct expression *alignof_ex
386 = alloc_expression(token->pos, EXPR_ALIGNOF);
387 *tree = alignof_ex;
388 tree = &alignof_ex->unop;
389 token = token->next;
390 if (!match_op(token, '(') || !lookup_type(token->next))
391 return unary_expression(token, &alignof_ex->cast_expression);
392 token = typename(token->next, &alignof_ex->cast_type);
393 return expect(token, ')', "at end of alignof type-name");
397 if (token_type(token) == TOKEN_SPECIAL) {
398 if (match_oplist(token->special,
399 SPECIAL_INCREMENT, SPECIAL_DECREMENT,
400 '&', '*', '+', '-', '~', '!', 0)) {
401 struct expression *unop;
402 struct expression *unary;
403 struct token *next;
405 next = cast_expression(token->next, &unop);
406 if (!unop) {
407 warn(token->pos, "Syntax error in unary expression");
408 return next;
410 unary = alloc_expression(token->pos, EXPR_PREOP);
411 unary->op = token->special;
412 unary->unop = unop;
413 *tree = unary;
414 return next;
417 /* Gcc extension: &&label gives the address of a label */
418 if (match_op(token, SPECIAL_LOGICAL_AND) &&
419 token_type(token->next) == TOKEN_IDENT) {
420 struct expression *label = alloc_expression(token->pos, EXPR_LABEL);
421 label->label_symbol = label_symbol(token->next);
422 *tree = label;
423 return token->next->next;
428 return postfix_expression(token, tree, NULL);
432 * Ambiguity: a '(' can be either a cast-expression or
433 * a primary-expression depending on whether it is followed
434 * by a type or not.
436 * additional ambiguity: a "cast expression" followed by
437 * an initializer is really a postfix-expression.
439 static struct token *cast_expression(struct token *token, struct expression **tree)
441 if (match_op(token, '(')) {
442 struct token *next = token->next;
443 if (lookup_type(next)) {
444 struct expression *cast = alloc_expression(next->pos, EXPR_CAST);
445 struct symbol *sym;
447 token = typename(next, &sym);
448 cast->cast_type = sym;
449 token = expect(token, ')', "at end of cast operator");
450 if (match_op(token, '{')) {
451 token = initializer(&cast->cast_expression, token);
452 return postfix_expression(token, tree, cast);
454 *tree = cast;
455 token = cast_expression(token, &cast->cast_expression);
456 return token;
459 return unary_expression(token, tree);
463 * Generic left-to-right binop parsing
465 * This _really_ needs to be inlined, because that makes the inner
466 * function call statically deterministic rather than a totally
467 * unpredictable indirect call. But gcc-3 is so "clever" that it
468 * doesn't do so by default even when you tell it to inline it.
470 * Making it a macro avoids the inlining problem, and also means
471 * that we can pass in the op-comparison as an expression rather
472 * than create a data structure for it.
475 #define LR_BINOP_EXPRESSION(token, tree, type, inner, compare) \
476 struct expression *left = NULL; \
477 struct token * next = inner(token, &left); \
479 if (left) { \
480 while (token_type(next) == TOKEN_SPECIAL) { \
481 struct expression *top, *right = NULL; \
482 int op = next->special; \
484 if (!(compare)) \
485 goto out; \
486 top = alloc_expression(next->pos, type); \
487 next = inner(next->next, &right); \
488 if (!right) { \
489 warn(next->pos, "No right hand side of '%s'-expression", show_special(op)); \
490 break; \
492 top->op = op; \
493 top->left = left; \
494 top->right = right; \
495 left = top; \
498 out: \
499 *tree = left; \
500 return next; \
503 static struct token *multiplicative_expression(struct token *token, struct expression **tree)
505 LR_BINOP_EXPRESSION(
506 token, tree, EXPR_BINOP, cast_expression,
507 (op == '*') || (op == '/') || (op == '%')
511 static struct token *additive_expression(struct token *token, struct expression **tree)
513 LR_BINOP_EXPRESSION(
514 token, tree, EXPR_BINOP, multiplicative_expression,
515 (op == '+') || (op == '-')
519 static struct token *shift_expression(struct token *token, struct expression **tree)
521 LR_BINOP_EXPRESSION(
522 token, tree, EXPR_BINOP, additive_expression,
523 (op == SPECIAL_LEFTSHIFT) || (op == SPECIAL_RIGHTSHIFT)
527 static struct token *relational_expression(struct token *token, struct expression **tree)
529 LR_BINOP_EXPRESSION(
530 token, tree, EXPR_COMPARE, shift_expression,
531 (op == '<') || (op == '>') ||
532 (op == SPECIAL_LTE) || (op == SPECIAL_GTE)
536 static struct token *equality_expression(struct token *token, struct expression **tree)
538 LR_BINOP_EXPRESSION(
539 token, tree, EXPR_COMPARE, relational_expression,
540 (op == SPECIAL_EQUAL) || (op == SPECIAL_NOTEQUAL)
544 static struct token *bitwise_and_expression(struct token *token, struct expression **tree)
546 LR_BINOP_EXPRESSION(
547 token, tree, EXPR_BINOP, equality_expression,
548 (op == '&')
552 static struct token *bitwise_xor_expression(struct token *token, struct expression **tree)
554 LR_BINOP_EXPRESSION(
555 token, tree, EXPR_BINOP, bitwise_and_expression,
556 (op == '^')
560 static struct token *bitwise_or_expression(struct token *token, struct expression **tree)
562 LR_BINOP_EXPRESSION(
563 token, tree, EXPR_BINOP, bitwise_xor_expression,
564 (op == '|')
568 static struct token *logical_and_expression(struct token *token, struct expression **tree)
570 LR_BINOP_EXPRESSION(
571 token, tree, EXPR_LOGICAL, bitwise_or_expression,
572 (op == SPECIAL_LOGICAL_AND)
576 static struct token *logical_or_expression(struct token *token, struct expression **tree)
578 LR_BINOP_EXPRESSION(
579 token, tree, EXPR_LOGICAL, logical_and_expression,
580 (op == SPECIAL_LOGICAL_OR)
584 struct token *conditional_expression(struct token *token, struct expression **tree)
586 token = logical_or_expression(token, tree);
587 if (match_op(token, '?')) {
588 struct expression *expr = alloc_expression(token->pos, EXPR_CONDITIONAL);
589 expr->op = token->special;
590 expr->left = *tree;
591 *tree = expr;
592 token = parse_expression(token->next, &expr->cond_true);
593 token = expect(token, ':', "in conditional expression");
594 token = conditional_expression(token, &expr->cond_false);
596 return token;
599 struct token *assignment_expression(struct token *token, struct expression **tree)
601 token = conditional_expression(token, tree);
602 if (token_type(token) == TOKEN_SPECIAL) {
603 static const int assignments[] = {
604 '=',
605 SPECIAL_ADD_ASSIGN, SPECIAL_SUB_ASSIGN,
606 SPECIAL_MUL_ASSIGN, SPECIAL_DIV_ASSIGN,
607 SPECIAL_MOD_ASSIGN, SPECIAL_SHL_ASSIGN,
608 SPECIAL_SHR_ASSIGN, SPECIAL_AND_ASSIGN,
609 SPECIAL_OR_ASSIGN, SPECIAL_XOR_ASSIGN };
610 int i, op = token->special;
611 for (i = 0; i < sizeof(assignments)/sizeof(int); i++)
612 if (assignments[i] == op) {
613 struct expression * expr = alloc_expression(token->pos, EXPR_ASSIGNMENT);
614 expr->left = *tree;
615 expr->op = op;
616 *tree = expr;
617 return assignment_expression(token->next, &expr->right);
620 return token;
623 static struct token *comma_expression(struct token *token, struct expression **tree)
625 LR_BINOP_EXPRESSION(
626 token, tree, EXPR_COMMA, assignment_expression,
627 (op == ',')
631 struct token *parse_expression(struct token *token, struct expression **tree)
633 return comma_expression(token,tree);