Oops. Move type definition to top of function, instead of
[smatch.git] / pre-process.c
blobcaa4921ec3a91a6b497309c5164784c4a674249b
1 /*
2 * Do C preprocessing, based on a token list gathered by
3 * the tokenizer.
5 * This may not be the smartest preprocessor on the planet.
7 * Copyright (C) 2003 Transmeta Corp, all rights reserved.
8 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <stdarg.h>
12 #include <stddef.h>
13 #include <string.h>
14 #include <ctype.h>
15 #include <unistd.h>
16 #include <fcntl.h>
17 #include <limits.h>
19 #include "lib.h"
20 #include "parse.h"
21 #include "token.h"
22 #include "symbol.h"
23 #include "expression.h"
25 #define MAXNEST (16)
26 static int true_nesting = 0;
27 static int false_nesting = 0;
28 static struct token *unmatched_if = NULL;
29 static int elif_ignore[MAXNEST];
30 #define if_nesting (true_nesting + false_nesting)
32 #define INCLUDEPATHS 32
33 const char *includepath[INCLUDEPATHS+1] = {
34 "/usr/lib/gcc-lib/i386-redhat-linux/3.2.1/include/",
35 "/usr/include/",
36 "/usr/local/include/",
37 "",
38 NULL
44 * This is stupid - the tokenizer already guarantees unique
45 * identifiers, so we should just compare identifier pointers
47 static int match_string_ident(struct ident *ident, const char *str)
49 return !str[ident->len] && !memcmp(str, ident->name, ident->len);
52 static struct token *alloc_token(struct position *pos)
54 struct token *token = __alloc_token(0);
56 token->pos.stream = pos->stream;
57 token->pos.line = pos->line;
58 token->pos.pos = pos->pos;
59 token->pos.whitespace = 1;
60 return token;
63 static const char *show_token_sequence(struct token *token);
65 /* Head is one-before-list, and last is one-past-list */
66 static struct token *for_each_ident(struct token *head, struct token *(*action)(struct token *head, struct token *))
68 for (;;) {
69 struct token *next = head->next;
71 /* Did we hit the end of the current expansion? */
72 if (eof_token(next))
73 break;
75 if (token_type(next) == TOKEN_IDENT)
76 next = action(head, next);
78 head = next;
80 return head;
83 static struct token *is_defined(struct token *head, struct token *token, struct token *next)
85 char *string[] = { "0", "1" };
86 char *defined = string[lookup_symbol(token->ident, NS_PREPROCESSOR) != NULL];
87 struct token *newtoken = alloc_token(&token->pos);
89 token_type(newtoken) = TOKEN_INTEGER;
90 newtoken->integer = defined;
91 newtoken->next = next;
92 head->next = newtoken;
93 return next;
97 struct token *defined_one_symbol(struct token *head, struct token *next)
99 if (match_string_ident(next->ident, "defined")) {
100 struct token *token = next->next;
101 struct token *past = token->next;
103 if (match_op(token, '(')) {
104 token = past;
105 past = token->next;
106 if (!match_op(past, ')'))
107 return next;
108 past = past->next;
110 if (token_type(token) == TOKEN_IDENT)
111 return is_defined(head, token, past);
113 return next;
116 static struct token *expand_defined(struct token *head)
118 return for_each_ident(head, defined_one_symbol);
121 /* Expand symbol 'sym' between 'head->next' and 'head->next->next' */
122 static struct token *expand(struct token *, struct symbol *);
124 static void replace_with_string(struct token *token, const char *str)
126 int size = strlen(str) + 1;
127 struct string *s = __alloc_string(size);
129 s->length = size;
130 memcpy(s->data, str, size);
131 token_type(token) = TOKEN_STRING;
132 token->string = s;
135 static void replace_with_integer(struct token *token, unsigned int val)
137 char *buf = __alloc_bytes(10);
138 sprintf(buf, "%d", val);
139 token_type(token) = TOKEN_INTEGER;
140 token->integer = buf;
143 struct token *expand_one_symbol(struct token *head, struct token *token)
145 struct symbol *sym = lookup_symbol(token->ident, NS_PREPROCESSOR);
146 if (sym && !sym->busy) {
147 if (sym->arglist && !match_op(token->next, '('))
148 return token;
149 return expand(head, sym);
151 if (!memcmp(token->ident->name, "__LINE__", 9)) {
152 replace_with_integer(token, token->pos.line);
153 } else if (!memcmp(token->ident->name, "__FILE__", 9)) {
154 replace_with_string(token, (input_streams + token->pos.stream)->name);
156 return token;
159 static struct token *expand_list(struct token *head)
161 return for_each_ident(head, expand_one_symbol);
164 static struct token *find_argument_end(struct token *start)
166 int nesting = 0;
168 while (!eof_token(start)) {
169 struct token *next = start->next;
170 if (match_op(next, '('))
171 nesting++;
172 else if (match_op(next, ')')) {
173 if (--nesting < 0) {
174 start->next = &eof_token_entry;
175 return next->next;
177 } else if (!nesting && match_op(next, ','))
178 next->special = SPECIAL_ARG_SEPARATOR;
179 start = next;
181 return start;
184 static struct token *dup_token(struct token *token, struct position *pos, int newline)
186 struct token *alloc = alloc_token(pos);
187 token_type(alloc) = token_type(token);
188 alloc->pos.line = pos->line;
189 alloc->pos.newline = newline;
190 alloc->integer = token->integer;
191 return alloc;
194 static void insert(struct token *token, struct token *prev)
196 token->next = prev->next;
197 prev->next = token;
200 static struct token * replace(struct token *token, struct token *prev, struct token *list)
202 int newline = token->pos.newline;
204 prev->next = token->next;
205 while (!eof_token(list) && !match_op(list, SPECIAL_ARG_SEPARATOR)) {
206 struct token *newtok = dup_token(list, &token->pos, newline);
207 newline = 0;
208 insert(newtok, prev);
209 prev = newtok;
210 list = list->next;
212 return prev;
215 static struct token *get_argument(int nr, struct token *args)
217 if (!nr)
218 return args;
219 while (!eof_token(args)) {
220 if (match_op(args, SPECIAL_ARG_SEPARATOR))
221 if (!--nr)
222 return args->next;
223 args = args->next;
226 return args;
229 static struct token *stringify(struct token *token, struct token *arg)
231 const char *s = show_token_sequence(arg);
232 int size = strlen(s)+1;
233 struct token *newtoken = alloc_token(&token->pos);
234 struct string *string = __alloc_string(size);
236 newtoken->pos.newline = token->pos.newline;
237 memcpy(string->data, s, size);
238 string->length = size;
239 token_type(newtoken) = TOKEN_STRING;
240 newtoken->string = string;
241 newtoken->next = &eof_token_entry;
242 return newtoken;
245 static int arg_number(struct token *arglist, struct ident *ident)
247 int nr = 0;
249 while (!eof_token(arglist)) {
250 if (arglist->ident == ident)
251 return nr;
252 nr++;
253 arglist = arglist->next;
255 return -1;
258 static struct token empty_arg_token = { .pos = { .type = TOKEN_EOF } };
260 static struct token *expand_one_arg(struct token *head, struct token *token,
261 struct token *arglist, struct token *arguments)
263 int nr = arg_number(arglist, token->ident);
264 struct token *orig_head = head;
266 if (nr >= 0) {
267 struct token *arg = get_argument(nr, arguments);
268 struct token *last = token->next;
269 token->next = &eof_token_entry;
272 * Special case for gcc 'x ## arg' semantics: if 'arg' is empty
273 * then the 'x' goes away too.
275 if (match_op(head, SPECIAL_HASHHASH) && eof_token(arg)) {
276 arg = &empty_arg_token;
277 empty_arg_token.next = &eof_token_entry;
280 head = replace(token, head, arg);
281 if (!match_op(orig_head, SPECIAL_HASHHASH) && !match_op(last, SPECIAL_HASHHASH) && !match_op(orig_head, '#'))
282 head = expand_list(orig_head);
283 head->next = last;
284 return head;
286 return token;
289 static void expand_arguments(struct token *token, struct token *head,
290 struct token *arguments, struct token *arglist)
292 for (;;) {
293 struct token *next = head->next;
295 /* Did we hit the end of the current expansion? */
296 if (eof_token(next))
297 break;
299 if (match_op(next, '#')) {
300 struct token *nextnext = next->next;
301 int nr = arg_number(arglist, nextnext->ident);
302 if (nextnext != head && nr >= 0 && token_type(nextnext) == TOKEN_IDENT) {
303 struct token *newtoken = stringify(nextnext, get_argument(nr, arguments));
304 replace(nextnext, head, newtoken);
305 continue;
307 warn(next->pos, "'#' operation is not followed by argument name");
310 if (token_type(next) == TOKEN_IDENT)
311 next = expand_one_arg(head, next, arglist, arguments);
313 head = next;
318 * Possibly valid combinations:
319 * - anything + 'empty_arg_token' is empty.
320 * - ident + ident - combine (==ident)
321 * - ident + number - combine (==ident)
322 * - number + number - combine (==number)
323 * - number + ident - combine (==number)
324 * - string + string - leave as is, C will combine them anyway
325 * others cause an error and leave the two tokens as separate tokens.
327 static struct token *hashhash(struct token *head, struct token *first, struct token *second)
329 static char buffer[512], *p;
330 struct token *newtoken;
331 static const char *src;
332 int len;
334 first->next = second;
337 * Special case for gcc 'x ## arg' semantics: if 'arg' is empty
338 * then the 'x' goes away too.
340 * See expand_one_arg.
342 if (token_type(second) == TOKEN_EOF) {
343 head->next = second->next;
344 return head;
347 p = buffer;
348 switch (token_type(first)) {
349 case TOKEN_INTEGER:
350 len = strlen(first->integer);
351 src = first->integer;
352 break;
353 case TOKEN_IDENT:
354 len = first->ident->len;
355 src = first->ident->name;
356 break;
357 default:
358 return second;
360 memcpy(p, src, len);
361 p += len;
363 switch (token_type(second)) {
364 case TOKEN_INTEGER:
365 len = strlen(second->integer);
366 src = second->integer;
367 break;
368 case TOKEN_IDENT:
369 len = second->ident->len;
370 src = second->ident->name;
371 break;
372 default:
373 return second;
375 memcpy(p, src, len);
376 p += len;
377 *p++ = 0;
379 newtoken = alloc_token(&first->pos);
380 head->next = newtoken;
381 token_type(newtoken) = token_type(first);
382 switch (token_type(newtoken)) {
383 case TOKEN_IDENT:
384 newtoken->ident = built_in_ident(buffer);
385 break;
386 case TOKEN_INTEGER:
387 newtoken->integer = __alloc_bytes(p - buffer);
388 memcpy(newtoken->integer, buffer, p - buffer);
389 break;
391 return newtoken;
394 static void retokenize(struct token *head)
396 struct token * next = head->next;
397 struct token * nextnext = next->next;
398 struct token * nextnextnext = nextnext->next;
400 if (eof_token(next) || eof_token(nextnext))
401 return;
403 for (;;) {
404 if (eof_token(nextnextnext))
405 break;
407 if (match_op(nextnext, SPECIAL_HASHHASH)) {
408 struct token *newtoken = hashhash(head, next, nextnextnext);
410 next = newtoken;
411 nextnext = nextnextnext->next;
412 nextnextnext = nextnext->next;
414 newtoken->next = nextnext;
415 if (!eof_token(nextnext))
416 continue;
417 break;
420 head = next;
421 next = nextnext;
422 nextnext = nextnext->next;
423 nextnextnext = nextnextnext->next;
427 static struct token *expand(struct token *head, struct symbol *sym)
429 struct token *arguments, *token, *last;
431 sym->busy++;
432 token = head->next;
433 last = token->next;
435 arguments = NULL;
436 if (sym->arglist) {
437 arguments = last->next;
438 last = find_argument_end(last);
440 token->next = &eof_token_entry;
442 /* Replace the token with the token expansion */
443 replace(token, head, sym->expansion);
445 /* Then, replace all the arguments with their expansions */
446 if (arguments)
447 expand_arguments(token, head, arguments, sym->arglist);
449 /* Re-tokenize the sequence if any ## token exists.. */
450 retokenize(head);
452 /* Finally, expand the expansion itself .. */
453 head = expand_list(head);
455 /* Put the rest of the stuff in place again */
456 head->next = last;
457 sym->busy--;
458 return head;
461 static const char *token_name_sequence(struct token *token, int endop, struct token *start)
463 struct token *last;
464 static char buffer[256];
465 char *ptr = buffer;
467 last = token;
468 while (!eof_token(token) && !match_op(token, endop)) {
469 int len;
470 const char *val = token->string->data;
471 if (token_type(token) != TOKEN_STRING)
472 val = show_token(token);
473 len = strlen(val);
474 memcpy(ptr, val, len);
475 ptr += len;
476 token = token->next;
478 *ptr = 0;
479 if (endop && !match_op(token, endop))
480 warn(start->pos, "expected '>' at end of filename");
481 return buffer;
484 static void do_include(struct token *head, struct token *token, const char *filename)
486 int endlen = strlen(filename) + 1;
487 const char **pptr = includepath, *path;
489 while ((path = *pptr++) != NULL) {
490 int fd, len = strlen(path);
491 static char fullname[PATH_MAX];
493 memcpy(fullname, path, len);
494 memcpy(fullname+len, filename, endlen);
495 fd = open(fullname, O_RDONLY);
496 if (fd >= 0) {
497 char * streamname = __alloc_bytes(len + endlen);
498 memcpy(streamname, fullname, len + endlen);
499 head->next = tokenize(streamname, fd, head->next);
500 close(fd);
501 return;
504 warn(token->pos, "unable to open '%s'", filename);
507 static int handle_include(struct stream *stream, struct token *head, struct token *token)
509 const char *filename;
510 struct token *next;
511 int expect;
513 if (stream->constant == -1)
514 stream->constant = 0;
515 if (false_nesting)
516 return 1;
517 next = token->next;
518 expect = '>';
519 if (!match_op(next, '<')) {
520 expand_list(token);
521 expect = 0;
522 next = token;
524 token = next->next;
525 filename = token_name_sequence(token, expect, token);
526 do_include(head, token, filename);
527 return 1;
530 static int token_list_different(struct token *list1, struct token *list2)
532 for (;;) {
533 if (list1 == list2)
534 return 0;
535 if (!list1 || !list2)
536 return 1;
537 if (token_type(list1) != token_type(list2))
538 return 1;
539 list1 = list1->next;
540 list2 = list2->next;
545 static int handle_define(struct stream *stream, struct token *head, struct token *token)
547 struct token *arglist, *expansion;
548 struct token *left = token->next;
549 struct symbol *sym;
550 struct ident *name;
552 if (token_type(left) != TOKEN_IDENT) {
553 warn(head->pos, "expected identifier to 'define'");
554 return 0;
556 if (false_nesting)
557 return 1;
558 name = left->ident;
560 arglist = NULL;
561 expansion = left->next;
562 if (!expansion->pos.whitespace && match_op(expansion, '(')) {
563 arglist = expansion;
564 while (!eof_token(expansion)) {
565 struct token *next = expansion->next;
566 if (match_op(next, ')')) {
567 // Terminate the arglist
568 expansion->next = &eof_token_entry;
569 expansion = next->next;
570 break;
572 if (match_op(next, ','))
573 expansion->next = next->next;
574 expansion = next;
576 arglist = arglist->next;
579 sym = lookup_symbol(name, NS_PREPROCESSOR);
580 if (sym) {
581 if (token_list_different(sym->expansion, expansion) ||
582 token_list_different(sym->arglist, arglist)) {
583 warn(left->pos, "preprocessor token redefined");
584 warn(sym->pos, "this was the original definition");
586 return 1;
588 sym = alloc_symbol(left->pos, SYM_NODE);
589 bind_symbol(sym, name, NS_PREPROCESSOR);
591 sym->expansion = expansion;
592 sym->arglist = arglist;
593 return 1;
596 static int handle_undef(struct stream *stream, struct token *head, struct token *token)
598 struct token *left = token->next;
599 struct symbol **sym;
601 if (token_type(left) != TOKEN_IDENT) {
602 warn(head->pos, "expected identifier to 'undef'");
603 return 0;
605 if (false_nesting)
606 return 1;
607 sym = &left->ident->symbols;
608 while (*sym) {
609 struct symbol *t = *sym;
610 if (t->namespace == NS_PREPROCESSOR) {
611 *sym = t->next_id;
612 return 1;
614 sym = &t->next_id;
616 return 1;
619 static int preprocessor_if(struct token *token, int true)
621 if (if_nesting == 0)
622 unmatched_if = token;
623 elif_ignore[if_nesting] = false_nesting || true;
624 if (false_nesting || !true) {
625 false_nesting++;
626 return 1;
628 true_nesting++;
629 return 1;
632 static int token_defined(struct token *token)
634 if (token_type(token) == TOKEN_IDENT)
635 return lookup_symbol(token->ident, NS_PREPROCESSOR) != NULL;
637 warn(token->pos, "expected identifier for #if[n]def");
638 return 0;
641 static int handle_ifdef(struct stream *stream, struct token *head, struct token *token)
643 return preprocessor_if(token, token_defined(token->next));
646 static int handle_ifndef(struct stream *stream, struct token *head, struct token *token)
648 struct token *next = token->next;
649 if (stream->constant == -1) {
650 int newconstant = 0;
651 if (token_type(next) == TOKEN_IDENT) {
652 if (!stream->protect || stream->protect == next->ident) {
653 newconstant = -2;
654 stream->protect = next->ident;
655 stream->nesting = if_nesting+1;
658 stream->constant = newconstant;
660 return preprocessor_if(token, !token_defined(next));
663 static int expression_value(struct token *head)
665 struct expression *expr;
666 struct token *token;
667 long long value;
669 expand_defined(head);
670 expand_list(head);
671 token = constant_expression(head->next, &expr);
672 if (!eof_token(token))
673 warn(token->pos, "garbage at end: %s", show_token_sequence(token));
674 value = get_expression_value(expr);
675 return value != 0;
678 static int handle_if(struct stream *stream, struct token *head, struct token *token)
680 int value = 0;
681 if (!false_nesting)
682 value = expression_value(token);
683 return preprocessor_if(token, value);
686 static int handle_elif(struct stream * stream, struct token *head, struct token *token)
688 if (stream->nesting == if_nesting)
689 stream->constant = 0;
690 if (false_nesting) {
691 /* If this whole if-thing is if'ed out, an elif cannot help */
692 if (elif_ignore[if_nesting-1])
693 return 1;
694 if (expression_value(token)) {
695 false_nesting--;
696 true_nesting++;
697 elif_ignore[if_nesting-1] = 1;
699 return 1;
701 if (true_nesting) {
702 false_nesting = 1;
703 true_nesting--;
704 return 1;
706 warn(token->pos, "unmatched '#elif'");
707 return 1;
710 static int handle_else(struct stream *stream, struct token *head, struct token *token)
712 if (stream->nesting == if_nesting)
713 stream->constant = 0;
714 if (false_nesting) {
715 /* If this whole if-thing is if'ed out, an else cannot help */
716 if (elif_ignore[if_nesting-1])
717 return 1;
718 false_nesting--;
719 true_nesting++;
720 elif_ignore[if_nesting-1] = 1;
721 return 1;
723 if (true_nesting) {
724 true_nesting--;
725 false_nesting = 1;
726 return 1;
728 warn(token->pos, "unmatched #else");
729 return 1;
732 static int handle_endif(struct stream *stream, struct token *head, struct token *token)
734 if (stream->constant == -2 && stream->nesting == if_nesting)
735 stream->constant = -1;
737 if (false_nesting) {
738 false_nesting--;
739 return 1;
741 if (true_nesting) {
742 true_nesting--;
743 return 1;
745 warn(token->pos, "unmatched #endif");
746 return 1;
749 static const char *show_token_sequence(struct token *token)
751 static char buffer[256];
752 char *ptr = buffer;
753 int whitespace = 0;
755 if (!token)
756 return "<none>";
757 while (!eof_token(token) && !match_op(token, SPECIAL_ARG_SEPARATOR)) {
758 const char *val = show_token(token);
759 int len = strlen(val);
760 if (whitespace)
761 *ptr++ = ' ';
762 memcpy(ptr, val, len);
763 ptr += len;
764 token = token->next;
765 whitespace = token->pos.whitespace;
767 *ptr = 0;
768 return buffer;
771 static int handle_warning(struct stream *stream, struct token *head, struct token *token)
773 if (false_nesting)
774 return 1;
775 warn(token->pos, "%s", show_token_sequence(token->next));
776 return 1;
779 static int handle_error(struct stream *stream, struct token *head, struct token *token)
781 if (false_nesting)
782 return 1;
783 error(token->pos, "%s", show_token_sequence(token->next));
784 return 1;
787 static int handle_nostdinc(struct stream *stream, struct token *head, struct token *token)
789 if (false_nesting)
790 return 1;
791 includepath[1] = NULL;
792 return 1;
795 static void add_path_entry(struct token *token, const char *path)
797 int i;
799 for (i = 0; i < INCLUDEPATHS; i++) {
800 if (!includepath[i]) {
801 includepath[i] = path;
802 includepath[i+1] = NULL;
803 return;
806 warn(token->pos, "too many include path entries");
809 static int handle_add_include(struct stream *stream, struct token *head, struct token *token)
811 for (;;) {
812 token = token->next;
813 if (eof_token(token))
814 return 1;
815 if (token_type(token) != TOKEN_STRING) {
816 warn(token->pos, "expected path string");
817 return 1;
819 add_path_entry(token, token->string->data);
823 static int handle_preprocessor_command(struct stream *stream, struct token *head, struct ident *ident, struct token *token)
825 int i;
826 static struct {
827 const char *name;
828 int (*handler)(struct stream *, struct token *, struct token *);
829 } handlers[] = {
830 { "define", handle_define },
831 { "undef", handle_undef },
832 { "ifdef", handle_ifdef },
833 { "ifndef", handle_ifndef },
834 { "else", handle_else },
835 { "endif", handle_endif },
836 { "if", handle_if },
837 { "elif", handle_elif },
838 { "warning", handle_warning },
839 { "error", handle_error },
840 { "include", handle_include },
842 // our internal preprocessor tokens
843 { "nostdinc", handle_nostdinc },
844 { "add_include", handle_add_include },
847 for (i = 0; i < (sizeof (handlers) / sizeof (handlers[0])); i++) {
848 if (match_string_ident(ident, handlers[i].name))
849 return handlers[i].handler(stream, head, token);
851 return 0;
854 static void handle_preprocessor_line(struct stream *stream, struct token * head, struct token *token)
856 if (!token)
857 return;
859 if (token_type(token) == TOKEN_IDENT)
860 if (handle_preprocessor_command(stream, head, token->ident, token))
861 return;
862 warn(token->pos, "unrecognized preprocessor line '%s'", show_token_sequence(token));
865 static void preprocessor_line(struct stream *stream, struct token * head)
867 struct token *start = head->next, *next;
868 struct token **tp = &start->next;
870 for (;;) {
871 next = *tp;
872 if (next->pos.newline)
873 break;
874 tp = &next->next;
876 head->next = next;
877 *tp = &eof_token_entry;
878 handle_preprocessor_line(stream, head, start->next);
881 static void do_preprocess(struct token *head)
883 do {
884 struct token *next = head->next;
885 struct stream *stream = input_streams + next->pos.stream;
887 if (next->pos.newline && match_op(next, '#')) {
888 preprocessor_line(stream, head);
889 continue;
892 if (false_nesting) {
893 head->next = next->next;
894 continue;
897 switch (token_type(next)) {
898 case TOKEN_STREAMEND:
899 if (stream->constant == -1 && stream->protect) {
900 stream->constant = 1;
902 /* fallthrough */
903 case TOKEN_STREAMBEGIN:
904 head->next = next->next;
905 continue;
907 case TOKEN_IDENT:
908 next = expand_one_symbol(head, next);
909 /* fallthrough */
910 default:
912 * Any token expansion (even if it ended up being an
913 * empty expansion) in this stream implies it can't
914 * be constant.
916 stream->constant = 0;
919 head = next;
920 } while (!eof_token(head));
923 struct token * preprocess(struct token *token)
925 struct token header = { };
927 header.next = token;
928 do_preprocess(&header);
929 if (if_nesting)
930 warn(unmatched_if->pos, "unmatched preprocessor conditional");
932 // Drop all expressions from pre-processing, they're not used any more.
933 clear_expression_alloc();
935 return header.next;