Oops. Bad scoping for iterators and switch() statements. We didn't
[smatch.git] / pre-process.c
blob9e89cc6a776efaefc31e0c9648ffdb42d75015e0
1 /*
2 * Do C preprocessing, based on a token list gathered by
3 * the tokenizer.
5 * This may not be the smartest preprocessor on the planet.
7 * Copyright (C) 2003 Transmeta Corp, all rights reserved.
8 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <stdarg.h>
12 #include <stddef.h>
13 #include <string.h>
14 #include <ctype.h>
15 #include <unistd.h>
16 #include <fcntl.h>
17 #include <limits.h>
19 #include "lib.h"
20 #include "parse.h"
21 #include "token.h"
22 #include "symbol.h"
23 #include "expression.h"
25 int preprocessing = 0;
27 #define MAXNEST (16)
28 static int true_nesting = 0;
29 static int false_nesting = 0;
30 static struct token *unmatched_if = NULL;
31 static int elif_ignore[MAXNEST];
32 #define if_nesting (true_nesting + false_nesting)
34 #define INCLUDEPATHS 32
35 const char *includepath[INCLUDEPATHS+1] = {
36 "/usr/lib/gcc-lib/i386-redhat-linux/3.2.1/include/",
37 "/usr/include/",
38 "/usr/local/include/",
39 "",
40 NULL
46 * This is stupid - the tokenizer already guarantees unique
47 * identifiers, so we should just compare identifier pointers
49 int match_string_ident(struct ident *ident, const char *str)
51 return !str[ident->len] && !memcmp(str, ident->name, ident->len);
54 static struct token *alloc_token(struct position *pos)
56 struct token *token = __alloc_token(0);
58 token->pos.stream = pos->stream;
59 token->pos.line = pos->line;
60 token->pos.pos = pos->pos;
61 token->pos.whitespace = 1;
62 return token;
65 static const char *show_token_sequence(struct token *token);
67 /* Head is one-before-list, and last is one-past-list */
68 static struct token *for_each_ident(struct token *head, struct token *(*action)(struct token *head, struct token *))
70 for (;;) {
71 struct token *next = head->next;
73 /* Did we hit the end of the current expansion? */
74 if (eof_token(next))
75 break;
77 if (token_type(next) == TOKEN_IDENT)
78 next = action(head, next);
80 head = next;
82 return head;
85 static struct token *is_defined(struct token *head, struct token *token, struct token *next)
87 char *string[] = { "0", "1" };
88 char *defined = string[lookup_symbol(token->ident, NS_PREPROCESSOR) != NULL];
89 struct token *newtoken = alloc_token(&token->pos);
91 token_type(newtoken) = TOKEN_INTEGER;
92 newtoken->integer = defined;
93 newtoken->next = next;
94 head->next = newtoken;
95 return next;
99 struct token *defined_one_symbol(struct token *head, struct token *next)
101 if (match_string_ident(next->ident, "defined")) {
102 struct token *token = next->next;
103 struct token *past = token->next;
105 if (match_op(token, '(')) {
106 token = past;
107 past = token->next;
108 if (!match_op(past, ')'))
109 return next;
110 past = past->next;
112 if (token_type(token) == TOKEN_IDENT)
113 return is_defined(head, token, past);
115 return next;
118 static struct token *expand_defined(struct token *head)
120 return for_each_ident(head, defined_one_symbol);
123 /* Expand symbol 'sym' between 'head->next' and 'head->next->next' */
124 static struct token *expand(struct token *, struct symbol *);
126 static void replace_with_string(struct token *token, const char *str)
128 int size = strlen(str) + 1;
129 struct string *s = __alloc_string(size);
131 s->length = size;
132 memcpy(s->data, str, size);
133 token_type(token) = TOKEN_STRING;
134 token->string = s;
137 static void replace_with_integer(struct token *token, unsigned int val)
139 char *buf = __alloc_bytes(10);
140 sprintf(buf, "%d", val);
141 token_type(token) = TOKEN_INTEGER;
142 token->integer = buf;
145 struct token *expand_one_symbol(struct token *head, struct token *token)
147 struct symbol *sym = lookup_symbol(token->ident, NS_PREPROCESSOR);
148 if (sym && !sym->busy) {
149 if (sym->arglist && !match_op(token->next, '('))
150 return token;
151 return expand(head, sym);
153 if (!memcmp(token->ident->name, "__LINE__", 9)) {
154 replace_with_integer(token, token->pos.line);
155 } else if (!memcmp(token->ident->name, "__FILE__", 9)) {
156 replace_with_string(token, (input_streams + token->pos.stream)->name);
158 return token;
161 static struct token *expand_list(struct token *head)
163 return for_each_ident(head, expand_one_symbol);
166 static struct token *find_argument_end(struct token *start)
168 int nesting = 0;
170 while (!eof_token(start)) {
171 struct token *next = start->next;
172 if (match_op(next, '('))
173 nesting++;
174 else if (match_op(next, ')')) {
175 if (--nesting < 0) {
176 start->next = &eof_token_entry;
177 return next->next;
179 } else if (!nesting && match_op(next, ','))
180 next->special = SPECIAL_ARG_SEPARATOR;
181 start = next;
183 return start;
186 static struct token *dup_token(struct token *token, struct position *pos, int newline)
188 struct token *alloc = alloc_token(pos);
189 token_type(alloc) = token_type(token);
190 alloc->pos.line = pos->line;
191 alloc->pos.newline = newline;
192 alloc->integer = token->integer;
193 return alloc;
196 static void insert(struct token *token, struct token *prev)
198 token->next = prev->next;
199 prev->next = token;
202 static struct token * replace(struct token *token, struct token *prev, struct token *list)
204 int newline = token->pos.newline;
206 prev->next = token->next;
207 while (!eof_token(list) && !match_op(list, SPECIAL_ARG_SEPARATOR)) {
208 struct token *newtok = dup_token(list, &token->pos, newline);
209 newline = 0;
210 insert(newtok, prev);
211 prev = newtok;
212 list = list->next;
214 return prev;
217 static struct token *get_argument(int nr, struct token *args)
219 if (!nr)
220 return args;
221 while (!eof_token(args)) {
222 if (match_op(args, SPECIAL_ARG_SEPARATOR))
223 if (!--nr)
224 return args->next;
225 args = args->next;
228 return args;
231 static struct token *stringify(struct token *token, struct token *arg)
233 const char *s = show_token_sequence(arg);
234 int size = strlen(s)+1;
235 struct token *newtoken = alloc_token(&token->pos);
236 struct string *string = __alloc_string(size);
238 newtoken->pos.newline = token->pos.newline;
239 memcpy(string->data, s, size);
240 string->length = size;
241 token_type(newtoken) = TOKEN_STRING;
242 newtoken->string = string;
243 newtoken->next = &eof_token_entry;
244 return newtoken;
247 static int arg_number(struct token *arglist, struct ident *ident)
249 int nr = 0;
251 while (!eof_token(arglist)) {
252 if (arglist->ident == ident)
253 return nr;
254 nr++;
255 arglist = arglist->next;
257 return -1;
260 static struct token empty_arg_token = { .pos = { .type = TOKEN_EOF } };
262 static struct token *expand_one_arg(struct token *head, struct token *token,
263 struct token *arglist, struct token *arguments)
265 int nr = arg_number(arglist, token->ident);
266 struct token *orig_head = head;
268 if (nr >= 0) {
269 struct token *arg = get_argument(nr, arguments);
270 struct token *last = token->next;
271 token->next = &eof_token_entry;
274 * Special case for gcc 'x ## arg' semantics: if 'arg' is empty
275 * then the 'x' goes away too.
277 if (match_op(head, SPECIAL_HASHHASH) && eof_token(arg)) {
278 arg = &empty_arg_token;
279 empty_arg_token.next = &eof_token_entry;
282 head = replace(token, head, arg);
283 if (!match_op(orig_head, SPECIAL_HASHHASH) && !match_op(last, SPECIAL_HASHHASH) && !match_op(orig_head, '#'))
284 head = expand_list(orig_head);
285 head->next = last;
286 return head;
288 return token;
291 static void expand_arguments(struct token *token, struct token *head,
292 struct token *arguments, struct token *arglist)
294 for (;;) {
295 struct token *next = head->next;
297 /* Did we hit the end of the current expansion? */
298 if (eof_token(next))
299 break;
301 if (match_op(next, '#')) {
302 struct token *nextnext = next->next;
303 int nr = arg_number(arglist, nextnext->ident);
304 if (nextnext != head && nr >= 0 && token_type(nextnext) == TOKEN_IDENT) {
305 struct token *newtoken = stringify(nextnext, get_argument(nr, arguments));
306 replace(nextnext, head, newtoken);
307 continue;
309 warn(next->pos, "'#' operation is not followed by argument name");
312 if (token_type(next) == TOKEN_IDENT)
313 next = expand_one_arg(head, next, arglist, arguments);
315 head = next;
320 * Possibly valid combinations:
321 * - anything + 'empty_arg_token' is empty.
322 * - ident + ident - combine (==ident)
323 * - ident + number - combine (==ident)
324 * - number + number - combine (==number)
325 * - number + ident - combine (==number)
326 * - string + string - leave as is, C will combine them anyway
327 * others cause an error and leave the two tokens as separate tokens.
329 static struct token *hashhash(struct token *head, struct token *first, struct token *second)
331 static char buffer[512], *p;
332 struct token *newtoken;
333 static const char *src;
334 int len;
336 first->next = second;
339 * Special case for gcc 'x ## arg' semantics: if 'arg' is empty
340 * then the 'x' goes away too.
342 * See expand_one_arg.
344 if (token_type(second) == TOKEN_EOF) {
345 head->next = second->next;
346 return head;
349 p = buffer;
350 switch (token_type(first)) {
351 case TOKEN_INTEGER:
352 len = strlen(first->integer);
353 src = first->integer;
354 break;
355 case TOKEN_IDENT:
356 len = first->ident->len;
357 src = first->ident->name;
358 break;
359 default:
360 return second;
362 memcpy(p, src, len);
363 p += len;
365 switch (token_type(second)) {
366 case TOKEN_INTEGER:
367 len = strlen(second->integer);
368 src = second->integer;
369 break;
370 case TOKEN_IDENT:
371 len = second->ident->len;
372 src = second->ident->name;
373 break;
374 default:
375 return second;
377 memcpy(p, src, len);
378 p += len;
379 *p++ = 0;
381 newtoken = alloc_token(&first->pos);
382 head->next = newtoken;
383 token_type(newtoken) = token_type(first);
384 switch (token_type(newtoken)) {
385 case TOKEN_IDENT:
386 newtoken->ident = built_in_ident(buffer);
387 break;
388 case TOKEN_INTEGER:
389 newtoken->integer = __alloc_bytes(p - buffer);
390 memcpy(newtoken->integer, buffer, p - buffer);
391 break;
393 return newtoken;
396 static void retokenize(struct token *head)
398 struct token * next = head->next;
399 struct token * nextnext = next->next;
400 struct token * nextnextnext = nextnext->next;
402 if (eof_token(next) || eof_token(nextnext))
403 return;
405 for (;;) {
406 if (eof_token(nextnextnext))
407 break;
409 if (match_op(nextnext, SPECIAL_HASHHASH)) {
410 struct token *newtoken = hashhash(head, next, nextnextnext);
412 next = newtoken;
413 nextnext = nextnextnext->next;
414 nextnextnext = nextnext->next;
416 newtoken->next = nextnext;
417 if (!eof_token(nextnext))
418 continue;
419 break;
422 head = next;
423 next = nextnext;
424 nextnext = nextnext->next;
425 nextnextnext = nextnextnext->next;
429 static struct token *expand(struct token *head, struct symbol *sym)
431 struct token *arguments, *token, *last;
433 token = head->next;
434 last = token->next;
436 arguments = NULL;
437 if (sym->arglist) {
438 arguments = last->next;
439 last = find_argument_end(last);
441 token->next = &eof_token_entry;
443 /* Replace the token with the token expansion */
444 replace(token, head, sym->expansion);
446 /* Then, replace all the arguments with their expansions */
447 if (arguments)
448 expand_arguments(token, head, arguments, sym->arglist);
450 /* Re-tokenize the sequence if any ## token exists.. */
451 retokenize(head);
453 /* Finally, expand the expansion itself .. */
454 sym->busy++;
455 head = expand_list(head);
456 sym->busy--;
458 /* Put the rest of the stuff in place again */
459 head->next = last;
460 return head;
463 static const char *token_name_sequence(struct token *token, int endop, struct token *start)
465 struct token *last;
466 static char buffer[256];
467 char *ptr = buffer;
469 last = token;
470 while (!eof_token(token) && !match_op(token, endop)) {
471 int len;
472 const char *val = token->string->data;
473 if (token_type(token) != TOKEN_STRING)
474 val = show_token(token);
475 len = strlen(val);
476 memcpy(ptr, val, len);
477 ptr += len;
478 token = token->next;
480 *ptr = 0;
481 if (endop && !match_op(token, endop))
482 warn(start->pos, "expected '>' at end of filename");
483 return buffer;
486 static void do_include(struct token *head, struct token *token, const char *filename)
488 int endlen = strlen(filename) + 1;
489 const char **pptr = includepath, *path;
491 while ((path = *pptr++) != NULL) {
492 int fd, len = strlen(path);
493 static char fullname[PATH_MAX];
495 memcpy(fullname, path, len);
496 memcpy(fullname+len, filename, endlen);
497 fd = open(fullname, O_RDONLY);
498 if (fd >= 0) {
499 char * streamname = __alloc_bytes(len + endlen);
500 memcpy(streamname, fullname, len + endlen);
501 head->next = tokenize(streamname, fd, head->next);
502 close(fd);
503 return;
506 warn(token->pos, "unable to open '%s'", filename);
509 static int handle_include(struct stream *stream, struct token *head, struct token *token)
511 const char *filename;
512 struct token *next;
513 int expect;
515 if (stream->constant == -1)
516 stream->constant = 0;
517 if (false_nesting)
518 return 1;
519 next = token->next;
520 expect = '>';
521 if (!match_op(next, '<')) {
522 expand_list(token);
523 expect = 0;
524 next = token;
526 token = next->next;
527 filename = token_name_sequence(token, expect, token);
528 do_include(head, token, filename);
529 return 1;
532 static int token_list_different(struct token *list1, struct token *list2)
534 for (;;) {
535 if (list1 == list2)
536 return 0;
537 if (!list1 || !list2)
538 return 1;
539 if (token_type(list1) != token_type(list2))
540 return 1;
541 list1 = list1->next;
542 list2 = list2->next;
547 static int handle_define(struct stream *stream, struct token *head, struct token *token)
549 struct token *arglist, *expansion;
550 struct token *left = token->next;
551 struct symbol *sym;
552 struct ident *name;
554 if (token_type(left) != TOKEN_IDENT) {
555 warn(head->pos, "expected identifier to 'define'");
556 return 0;
558 if (false_nesting)
559 return 1;
560 name = left->ident;
562 arglist = NULL;
563 expansion = left->next;
564 if (!expansion->pos.whitespace && match_op(expansion, '(')) {
565 arglist = expansion;
566 while (!eof_token(expansion)) {
567 struct token *next = expansion->next;
568 if (match_op(next, ')')) {
569 // Terminate the arglist
570 expansion->next = &eof_token_entry;
571 expansion = next->next;
572 break;
574 if (match_op(next, ','))
575 expansion->next = next->next;
576 expansion = next;
578 arglist = arglist->next;
581 sym = lookup_symbol(name, NS_PREPROCESSOR);
582 if (sym) {
583 if (token_list_different(sym->expansion, expansion) ||
584 token_list_different(sym->arglist, arglist)) {
585 warn(left->pos, "preprocessor token redefined");
586 warn(sym->pos, "this was the original definition");
588 return 1;
590 sym = alloc_symbol(left->pos, SYM_NODE);
591 bind_symbol(sym, name, NS_PREPROCESSOR);
593 sym->expansion = expansion;
594 sym->arglist = arglist;
595 return 1;
598 static int handle_undef(struct stream *stream, struct token *head, struct token *token)
600 struct token *left = token->next;
601 struct symbol **sym;
603 if (token_type(left) != TOKEN_IDENT) {
604 warn(head->pos, "expected identifier to 'undef'");
605 return 0;
607 if (false_nesting)
608 return 1;
609 sym = &left->ident->symbols;
610 while (*sym) {
611 struct symbol *t = *sym;
612 if (t->namespace == NS_PREPROCESSOR) {
613 *sym = t->next_id;
614 return 1;
616 sym = &t->next_id;
618 return 1;
621 static int preprocessor_if(struct token *token, int true)
623 if (if_nesting == 0)
624 unmatched_if = token;
625 elif_ignore[if_nesting] = false_nesting || true;
626 if (false_nesting || !true) {
627 false_nesting++;
628 return 1;
630 true_nesting++;
631 return 1;
634 static int token_defined(struct token *token)
636 if (token_type(token) == TOKEN_IDENT)
637 return lookup_symbol(token->ident, NS_PREPROCESSOR) != NULL;
639 warn(token->pos, "expected identifier for #if[n]def");
640 return 0;
643 static int handle_ifdef(struct stream *stream, struct token *head, struct token *token)
645 return preprocessor_if(token, token_defined(token->next));
648 static int handle_ifndef(struct stream *stream, struct token *head, struct token *token)
650 struct token *next = token->next;
651 if (stream->constant == -1) {
652 int newconstant = 0;
653 if (token_type(next) == TOKEN_IDENT) {
654 if (!stream->protect || stream->protect == next->ident) {
655 newconstant = -2;
656 stream->protect = next->ident;
657 stream->nesting = if_nesting+1;
660 stream->constant = newconstant;
662 return preprocessor_if(token, !token_defined(next));
665 static int expression_value(struct token *head)
667 struct expression *expr;
668 struct token *token;
669 long long value;
671 expand_defined(head);
672 expand_list(head);
673 token = constant_expression(head->next, &expr);
674 if (!eof_token(token))
675 warn(token->pos, "garbage at end: %s", show_token_sequence(token));
676 value = get_expression_value(expr);
677 return value != 0;
680 static int handle_if(struct stream *stream, struct token *head, struct token *token)
682 int value = 0;
683 if (!false_nesting)
684 value = expression_value(token);
685 return preprocessor_if(token, value);
688 static int handle_elif(struct stream * stream, struct token *head, struct token *token)
690 if (stream->nesting == if_nesting)
691 stream->constant = 0;
692 if (false_nesting) {
693 /* If this whole if-thing is if'ed out, an elif cannot help */
694 if (elif_ignore[if_nesting-1])
695 return 1;
696 if (expression_value(token)) {
697 false_nesting--;
698 true_nesting++;
699 elif_ignore[if_nesting-1] = 1;
701 return 1;
703 if (true_nesting) {
704 false_nesting = 1;
705 true_nesting--;
706 return 1;
708 warn(token->pos, "unmatched '#elif'");
709 return 1;
712 static int handle_else(struct stream *stream, struct token *head, struct token *token)
714 if (stream->nesting == if_nesting)
715 stream->constant = 0;
716 if (false_nesting) {
717 /* If this whole if-thing is if'ed out, an else cannot help */
718 if (elif_ignore[if_nesting-1])
719 return 1;
720 false_nesting--;
721 true_nesting++;
722 elif_ignore[if_nesting-1] = 1;
723 return 1;
725 if (true_nesting) {
726 true_nesting--;
727 false_nesting = 1;
728 return 1;
730 warn(token->pos, "unmatched #else");
731 return 1;
734 static int handle_endif(struct stream *stream, struct token *head, struct token *token)
736 if (stream->constant == -2 && stream->nesting == if_nesting)
737 stream->constant = -1;
739 if (false_nesting) {
740 false_nesting--;
741 return 1;
743 if (true_nesting) {
744 true_nesting--;
745 return 1;
747 warn(token->pos, "unmatched #endif");
748 return 1;
751 static const char *show_token_sequence(struct token *token)
753 static char buffer[256];
754 char *ptr = buffer;
755 int whitespace = 0;
757 if (!token)
758 return "<none>";
759 while (!eof_token(token) && !match_op(token, SPECIAL_ARG_SEPARATOR)) {
760 const char *val = show_token(token);
761 int len = strlen(val);
762 if (whitespace)
763 *ptr++ = ' ';
764 memcpy(ptr, val, len);
765 ptr += len;
766 token = token->next;
767 whitespace = token->pos.whitespace;
769 *ptr = 0;
770 return buffer;
773 static int handle_warning(struct stream *stream, struct token *head, struct token *token)
775 if (false_nesting)
776 return 1;
777 warn(token->pos, "%s", show_token_sequence(token->next));
778 return 1;
781 static int handle_error(struct stream *stream, struct token *head, struct token *token)
783 if (false_nesting)
784 return 1;
785 error(token->pos, "%s", show_token_sequence(token->next));
786 return 1;
789 static int handle_nostdinc(struct stream *stream, struct token *head, struct token *token)
791 if (false_nesting)
792 return 1;
793 includepath[1] = NULL;
794 return 1;
797 static void add_path_entry(struct token *token, const char *path)
799 int i;
801 for (i = 0; i < INCLUDEPATHS; i++) {
802 if (!includepath[i]) {
803 includepath[i] = path;
804 includepath[i+1] = NULL;
805 return;
808 warn(token->pos, "too many include path entries");
811 static int handle_add_include(struct stream *stream, struct token *head, struct token *token)
813 for (;;) {
814 token = token->next;
815 if (eof_token(token))
816 return 1;
817 if (token_type(token) != TOKEN_STRING) {
818 warn(token->pos, "expected path string");
819 return 1;
821 add_path_entry(token, token->string->data);
825 static int handle_preprocessor_command(struct stream *stream, struct token *head, struct ident *ident, struct token *token)
827 int i;
828 static struct {
829 const char *name;
830 int (*handler)(struct stream *, struct token *, struct token *);
831 } handlers[] = {
832 { "define", handle_define },
833 { "undef", handle_undef },
834 { "ifdef", handle_ifdef },
835 { "ifndef", handle_ifndef },
836 { "else", handle_else },
837 { "endif", handle_endif },
838 { "if", handle_if },
839 { "elif", handle_elif },
840 { "warning", handle_warning },
841 { "error", handle_error },
842 { "include", handle_include },
844 // our internal preprocessor tokens
845 { "nostdinc", handle_nostdinc },
846 { "add_include", handle_add_include },
849 for (i = 0; i < (sizeof (handlers) / sizeof (handlers[0])); i++) {
850 if (match_string_ident(ident, handlers[i].name))
851 return handlers[i].handler(stream, head, token);
853 return 0;
856 static void handle_preprocessor_line(struct stream *stream, struct token * head, struct token *token)
858 if (!token)
859 return;
861 if (token_type(token) == TOKEN_IDENT)
862 if (handle_preprocessor_command(stream, head, token->ident, token))
863 return;
864 warn(token->pos, "unrecognized preprocessor line '%s'", show_token_sequence(token));
867 static void preprocessor_line(struct stream *stream, struct token * head)
869 struct token *start = head->next, *next;
870 struct token **tp = &start->next;
872 for (;;) {
873 next = *tp;
874 if (next->pos.newline)
875 break;
876 tp = &next->next;
878 head->next = next;
879 *tp = &eof_token_entry;
880 handle_preprocessor_line(stream, head, start->next);
883 static void do_preprocess(struct token *head)
885 do {
886 struct token *next = head->next;
887 struct stream *stream = input_streams + next->pos.stream;
889 if (next->pos.newline && match_op(next, '#')) {
890 preprocessor_line(stream, head);
891 continue;
894 if (false_nesting) {
895 head->next = next->next;
896 continue;
899 switch (token_type(next)) {
900 case TOKEN_STREAMEND:
901 if (stream->constant == -1 && stream->protect) {
902 stream->constant = 1;
904 /* fallthrough */
905 case TOKEN_STREAMBEGIN:
906 head->next = next->next;
907 continue;
909 case TOKEN_IDENT:
910 next = expand_one_symbol(head, next);
911 /* fallthrough */
912 default:
914 * Any token expansion (even if it ended up being an
915 * empty expansion) in this stream implies it can't
916 * be constant.
918 stream->constant = 0;
921 head = next;
922 } while (!eof_token(head));
925 struct token * preprocess(struct token *token)
927 struct token header = { };
929 preprocessing = 1;
930 header.next = token;
931 do_preprocess(&header);
932 if (if_nesting)
933 warn(unmatched_if->pos, "unmatched preprocessor conditional");
935 // Drop all expressions from pre-processing, they're not used any more.
936 clear_expression_alloc();
937 preprocessing = 0;
939 return header.next;