Fix stringify that got broken by other changes
[smatch.git] / pre-process.c
blob3b7cfb4783bb2300ca67c84ffb99786e9cb536e2
1 /*
2 * Do C preprocessing, based on a token list gathered by
3 * the tokenizer.
5 * This may not be the smartest preprocessor on the planet.
7 * Copyright (C) 2003 Linus Torvalds, all rights reserved.
8 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <stdarg.h>
12 #include <stddef.h>
13 #include <string.h>
14 #include <ctype.h>
15 #include <unistd.h>
16 #include <fcntl.h>
17 #include <limits.h>
19 #include "lib.h"
20 #include "parse.h"
21 #include "token.h"
22 #include "symbol.h"
24 #define MAXNEST (16)
25 static int true_nesting = 0;
26 static int false_nesting = 0;
27 static struct token *unmatched_if = NULL;
28 static int elif_ignore[MAXNEST];
29 #define if_nesting (true_nesting + false_nesting)
32 * This is stupid - the tokenizer already guarantees unique
33 * identifiers, so we should just compare identifier pointers
35 static int match_string_ident(struct ident *ident, const char *str)
37 return !str[ident->len] && !memcmp(str, ident->name, ident->len);
40 static struct token *alloc_token(struct token *dup)
42 struct token *token = __alloc_token(0);
44 token->stream = dup->stream;
45 token->line = dup->line;
46 token->pos = dup->pos;
47 token->whitespace = 1;
48 return token;
51 static const char *show_token_sequence(struct token *token);
53 /* Head is one-before-list, and last is one-past-list */
54 static struct token *for_each_ident(struct token *head, struct token *(*action)(struct token *head, struct token *))
56 for (;;) {
57 struct token *next = head->next;
59 /* Did we hit the end of the current expansion? */
60 if (eof_token(next))
61 break;
63 if (next->type == TOKEN_IDENT)
64 next = action(head, next);
66 head = next;
68 return head;
71 static struct token *is_defined(struct token *head, struct token *token, struct token *next)
73 char *string[] = { "0", "1" };
74 char *defined = string[lookup_symbol(token->ident, NS_PREPROCESSOR) != NULL];
75 struct token *newtoken = alloc_token(token);
77 newtoken->type = TOKEN_INTEGER;
78 newtoken->integer = defined;
79 newtoken->next = next;
80 head->next = newtoken;
81 return next;
85 struct token *defined_one_symbol(struct token *head, struct token *next)
87 if (match_string_ident(next->ident, "defined")) {
88 struct token *token = next->next;
89 struct token *past = token->next;
91 if (match_op(token, '(')) {
92 token = past;
93 past = token->next;
94 if (!match_op(past, ')'))
95 return next;
96 past = past->next;
98 if (token->type == TOKEN_IDENT)
99 return is_defined(head, token, past);
101 return next;
104 static struct token *expand_defined(struct token *head)
106 return for_each_ident(head, defined_one_symbol);
109 /* Expand symbol 'sym' between 'head->next' and 'head->next->next' */
110 static struct token *expand(struct token *, struct symbol *);
112 struct token *expand_one_symbol(struct token *head, struct token *token)
114 struct symbol *sym = lookup_symbol(token->ident, NS_PREPROCESSOR);
115 if (sym && !sym->busy) {
116 if (sym->arglist && !match_op(token->next, '('))
117 return token;
118 return expand(head, sym);
120 return token;
123 static struct token *expand_list(struct token *head)
125 return for_each_ident(head, expand_one_symbol);
128 static struct token *find_argument_end(struct token *start)
130 int nesting = 0;
132 while (!eof_token(start)) {
133 struct token *next = start->next;
134 if (match_op(next, '('))
135 nesting++;
136 else if (match_op(next, ')')) {
137 if (--nesting < 0) {
138 start->next = &eof_token_entry;
139 return next->next;
141 } else if (!nesting && match_op(next, ','))
142 next->special = SPECIAL_ARG_SEPARATOR;
143 start = next;
145 return start;
148 static struct token *dup_token(struct token *token, struct token *pos, int newline)
150 struct token *alloc = alloc_token(pos);
151 alloc->type = token->type;
152 alloc->line = pos->line;
153 alloc->newline = newline;
154 alloc->integer = token->integer;
155 return alloc;
158 static void insert(struct token *token, struct token *prev)
160 token->next = &eof_token_entry;
161 prev->next = token;
164 static void replace(struct token *token, struct token *prev, struct token *list)
166 int newline = token->newline;
168 prev->next = &eof_token_entry;
169 while (!eof_token(list) && !match_op(list, SPECIAL_ARG_SEPARATOR)) {
170 struct token *newtok = dup_token(list, token, newline);
171 newline = 0;
172 insert(newtok, prev);
173 prev = newtok;
174 list = list->next;
178 static struct token *get_argument(int nr, struct token *args)
180 if (!nr)
181 return args;
182 while (!eof_token(args)) {
183 if (match_op(args, SPECIAL_ARG_SEPARATOR))
184 if (!--nr)
185 return args->next;
186 args = args->next;
189 return args;
192 static struct token *stringify(struct token *token, struct token *arg)
194 const char *s = show_token_sequence(arg);
195 int size = strlen(s)+1;
196 struct token *newtoken = alloc_token(token);
197 struct string *string = __alloc_string(size);
199 newtoken->newline = token->newline;
200 memcpy(string->data, s, size);
201 string->length = size;
202 newtoken->type = TOKEN_STRING;
203 newtoken->string = string;
204 newtoken->next = &eof_token_entry;
205 return newtoken;
208 static int arg_number(struct token *arglist, struct ident *ident)
210 int nr = 0;
212 while (!eof_token(arglist)) {
213 if (arglist->ident == ident)
214 return nr;
215 nr++;
216 arglist = arglist->next;
218 return -1;
221 static struct token empty_arg_token = { .type = TOKEN_EOF };
223 static struct token *expand_one_arg(struct token *head, struct token *token,
224 struct token *arglist, struct token *arguments)
226 int nr = arg_number(arglist, token->ident);
228 if (nr >= 0) {
229 struct token *arg = get_argument(nr, arguments);
230 struct token *last = token->next;
231 token->next = &eof_token_entry;
234 * Special case for gcc 'x ## arg' semantics: if 'arg' is empty
235 * then the 'x' goes away too.
237 if (match_op(head, SPECIAL_HASHHASH) && eof_token(arg)) {
238 arg = &empty_arg_token;
239 empty_arg_token.next = &eof_token_entry;
242 replace(token, head, arg);
243 head = expand_list(head);
244 head->next = last;
245 return head;
247 return token;
250 static void expand_arguments(struct token *token, struct token *head,
251 struct token *arguments, struct token *arglist)
253 for (;;) {
254 struct token *next = head->next;
256 /* Did we hit the end of the current expansion? */
257 if (eof_token(next))
258 break;
260 if (match_op(next, '#')) {
261 struct token *nextnext = next->next;
262 int nr = arg_number(arglist, nextnext->ident);
263 if (nextnext != head && nr >= 0 && nextnext->type == TOKEN_IDENT) {
264 struct token * last = nextnext->next;
265 struct token *newtoken = stringify(nextnext, get_argument(nr, arguments));
266 replace(nextnext, head, newtoken);
267 continue;
269 warn(next, "'#' operation is not followed by argument name");
272 if (next->type == TOKEN_IDENT)
273 next = expand_one_arg(head, next, arglist, arguments);
275 head = next;
280 * Possibly valid combinations:
281 * - anything + 'empty_arg_token' is empty.
282 * - ident + ident - combine (==ident)
283 * - ident + number - combine (==ident)
284 * - number + number - combine (==number)
285 * - number + ident - combine (==number)
286 * - string + string - leave as is, C will combine them anyway
287 * others cause an error and leave the two tokens as separate tokens.
289 static struct token *hashhash(struct token *head, struct token *first, struct token *second)
291 static char buffer[512], *p;
292 struct token *newtoken;
293 static const char *src;
294 int len;
296 first->next = second;
299 * Special case for gcc 'x ## arg' semantics: if 'arg' is empty
300 * then the 'x' goes away too.
302 * See expand_one_arg.
304 if (second->type == TOKEN_EOF) {
305 head->next = second->next;
306 return head;
309 p = buffer;
310 switch (first->type) {
311 case TOKEN_INTEGER:
312 len = strlen(first->integer);
313 src = first->integer;
314 break;
315 case TOKEN_IDENT:
316 len = first->ident->len;
317 src = first->ident->name;
318 break;
319 default:
320 return second;
322 memcpy(p, src, len);
323 p += len;
325 switch (second->type) {
326 case TOKEN_INTEGER:
327 len = strlen(second->integer);
328 src = second->integer;
329 break;
330 case TOKEN_IDENT:
331 len = second->ident->len;
332 src = second->ident->name;
333 break;
334 default:
335 return second;
337 memcpy(p, src, len);
338 p += len;
339 *p++ = 0;
341 newtoken = alloc_token(first);
342 head->next = newtoken;
343 newtoken->type = first->type;
344 switch (newtoken->type) {
345 case TOKEN_IDENT:
346 newtoken->ident = built_in_ident(buffer);
347 break;
348 case TOKEN_INTEGER:
349 newtoken->integer = __alloc_bytes(p - buffer);
350 memcpy(newtoken->integer, buffer, p - buffer);
351 break;
353 return newtoken;
356 static void retokenize(struct token *head)
358 struct token * next = head->next;
359 struct token * nextnext = next->next;
360 struct token * nextnextnext = nextnext->next;
362 if (eof_token(next) || eof_token(nextnext))
363 return;
365 for (;;) {
366 if (eof_token(nextnextnext))
367 break;
369 if (match_op(nextnext, SPECIAL_HASHHASH)) {
370 struct token *newtoken = hashhash(head, next, nextnextnext);
372 next = newtoken;
373 nextnext = nextnextnext->next;
374 nextnextnext = nextnext->next;
376 newtoken->next = nextnext;
377 if (!eof_token(nextnext))
378 continue;
379 break;
382 head = next;
383 next = nextnext;
384 nextnext = nextnext->next;
385 nextnextnext = nextnextnext->next;
389 static struct token *expand(struct token *head, struct symbol *sym)
391 struct token *arguments, *token, *last;
393 sym->busy++;
394 token = head->next;
395 last = token->next;
397 arguments = NULL;
398 if (sym->arglist) {
399 arguments = last->next;
400 last = find_argument_end(last);
402 token->next = &eof_token_entry;
404 /* Replace the token with the token expansion */
405 replace(token, head, sym->expansion);
407 /* Then, replace all the arguments with their expansions */
408 if (arguments)
409 expand_arguments(token, head, arguments, sym->arglist);
411 /* Re-tokenize the sequence if any ## token exists.. */
412 retokenize(head);
414 /* Finally, expand the expansion itself .. */
415 head = expand_list(head);
417 /* Put the rest of the stuff in place again */
418 head->next = last;
419 sym->busy--;
420 return head;
423 static const char *token_name_sequence(struct token *token, int endop, struct token *start)
425 struct token *last;
426 static char buffer[256];
427 char *ptr = buffer;
429 last = token;
430 while (!eof_token(token) && !match_op(token, endop)) {
431 int len;
432 const char *val = token->string->data;
433 if (token->type != TOKEN_STRING)
434 val = show_token(token);
435 len = strlen(val);
436 memcpy(ptr, val, len);
437 ptr += len;
438 token = token->next;
440 *ptr = 0;
441 if (endop && !match_op(token, endop))
442 warn(start, "expected '>' at end of filename");
443 return buffer;
446 static void do_include(struct token *head, struct token *token, const char *filename)
448 int endlen = strlen(filename) + 1;
449 char **pptr = includepath, *path;
451 while ((path = *pptr++) != NULL) {
452 int fd, len = strlen(path);
453 static char fullname[PATH_MAX];
455 memcpy(fullname, path, len);
456 memcpy(fullname+len, filename, endlen);
457 fd = open(fullname, O_RDONLY);
458 if (fd >= 0) {
459 char * streamname = __alloc_bytes(len + endlen);
460 memcpy(streamname, fullname, len + endlen);
461 head->next = tokenize(streamname, fd, head->next);
462 close(fd);
463 return;
466 warn(token, "unable to open '%s'", filename);
469 static int handle_include(struct stream *stream, struct token *head, struct token *token)
471 const char *filename;
472 struct token *next;
473 int expect;
475 if (stream->constant == -1)
476 stream->constant = 0;
477 if (false_nesting)
478 return 1;
479 next = token->next;
480 expect = '>';
481 if (!match_op(next, '<')) {
482 expand_list(token);
483 expect = 0;
484 next = token;
486 token = next->next;
487 filename = token_name_sequence(token, expect, token);
488 do_include(head, token, filename);
489 return 1;
492 static int token_list_different(struct token *list1, struct token *list2)
494 for (;;) {
495 if (list1 == list2)
496 return 0;
497 if (!list1 || !list2)
498 return 1;
499 if (list1->type != list2->type)
500 return 1;
501 list1 = list1->next;
502 list2 = list2->next;
507 static int handle_define(struct stream *stream, struct token *head, struct token *token)
509 struct token *arglist, *expansion;
510 struct token *left = token->next;
511 struct symbol *sym;
512 struct ident *name;
514 if (left->type != TOKEN_IDENT) {
515 warn(head, "expected identifier to 'define'");
516 return 0;
518 if (false_nesting)
519 return 1;
520 name = left->ident;
522 arglist = NULL;
523 expansion = left->next;
524 if (!expansion->whitespace && match_op(expansion, '(')) {
525 arglist = expansion;
526 while (!eof_token(expansion)) {
527 struct token *next = expansion->next;
528 if (match_op(next, ')')) {
529 // Terminate the arglist
530 expansion->next = &eof_token_entry;
531 expansion = next->next;
532 break;
534 if (match_op(next, ','))
535 expansion->next = next->next;
536 expansion = next;
538 arglist = arglist->next;
541 sym = lookup_symbol(name, NS_PREPROCESSOR);
542 if (sym) {
543 if (token_list_different(sym->expansion, expansion) ||
544 token_list_different(sym->arglist, arglist)) {
545 warn(left, "preprocessor token redefined");
546 warn(sym->token, "this was the original definition");
548 return 1;
550 sym = alloc_symbol(left, SYM_NONE);
551 bind_symbol(sym, name, NS_PREPROCESSOR);
553 sym->expansion = expansion;
554 sym->arglist = arglist;
555 return 1;
558 static int handle_undef(struct stream *stream, struct token *head, struct token *token)
560 struct token *left = token->next;
561 struct symbol **sym;
563 if (left->type != TOKEN_IDENT) {
564 warn(head, "expected identifier to 'undef'");
565 return 0;
567 if (false_nesting)
568 return 1;
569 sym = &left->ident->symbols;
570 while (*sym) {
571 struct symbol *t = *sym;
572 if (t->namespace == NS_PREPROCESSOR) {
573 *sym = t->next_id;
574 return 1;
576 sym = &t->next_id;
578 return 1;
581 static int preprocessor_if(struct token *token, int true)
583 if (if_nesting == 0)
584 unmatched_if = token;
585 elif_ignore[if_nesting] = false_nesting || true;
586 if (false_nesting || !true) {
587 false_nesting++;
588 return 1;
590 true_nesting++;
591 return 1;
594 static int token_defined(struct token *token)
596 if (token->type == TOKEN_IDENT)
597 return lookup_symbol(token->ident, NS_PREPROCESSOR) != NULL;
599 warn(token, "expected identifier for #if[n]def");
600 return 0;
603 static int handle_ifdef(struct stream *stream, struct token *head, struct token *token)
605 return preprocessor_if(token, token_defined(token->next));
608 static int handle_ifndef(struct stream *stream, struct token *head, struct token *token)
610 struct token *next = token->next;
611 if (stream->constant == -1) {
612 int newconstant = 0;
613 if (next->type == TOKEN_IDENT) {
614 if (!stream->protect || stream->protect == next->ident) {
615 newconstant = -2;
616 stream->protect = next->ident;
617 stream->nesting = if_nesting+1;
620 stream->constant = newconstant;
622 return preprocessor_if(token, !token_defined(next));
625 static unsigned long long get_int_value(const char *str)
627 unsigned long long value = 0;
628 unsigned int base = 10, digit;
630 switch (str[0]) {
631 case 'x':
632 base = 18; // the -= 2 for the octal case will
633 str++; // skip the 'x'
634 /* fallthrough */
635 case 'o':
636 str++; // skip the 'o' or 'x/X'
637 base -= 2; // the fall-through will make this 8
639 while ((digit = hexval(*str)) < base) {
640 value = value * base + digit;
641 str++;
643 return value;
646 static long long primary_value(struct token *token)
648 switch (token->type) {
649 case TOKEN_INTEGER:
650 return get_int_value(token->integer);
652 error(token, "bad constant expression");
653 return 0;
656 long long get_expression_value(struct expression *expr)
658 long long left, middle, right;
660 switch (expr->type) {
661 case EXPR_CONSTANT:
662 return primary_value(expr->token);
663 case EXPR_SYMBOL: {
664 struct symbol *sym = expr->symbol;
665 if (!sym || !sym->ctype.base_type || sym->ctype.base_type->type != SYM_ENUM) {
666 warn(expr->token, "undefined identifier in constant expression");
667 return 0;
669 return sym->value;
672 #define OP(x,y) case x: return left y right;
673 case EXPR_BINOP:
674 left = get_expression_value(expr->left);
675 if (!left && expr->op == SPECIAL_LOGICAL_AND)
676 return 0;
677 if (left && expr->op == SPECIAL_LOGICAL_OR)
678 return 1;
679 right = get_expression_value(expr->right);
680 switch (expr->op) {
681 OP('+',+); OP('-',-); OP('*',*); OP('/',/);
682 OP('%',%); OP('<',<); OP('>',>);
683 OP('&',&);OP('|',|);OP('^',^);
684 OP(SPECIAL_EQUAL,==); OP(SPECIAL_NOTEQUAL,!=);
685 OP(SPECIAL_LTE,<=); OP(SPECIAL_LEFTSHIFT,<<);
686 OP(SPECIAL_RIGHTSHIFT,>>); OP(SPECIAL_GTE,>=);
687 OP(SPECIAL_LOGICAL_AND,&&);OP(SPECIAL_LOGICAL_OR,||);
689 break;
691 #undef OP
692 #define OP(x,y) case x: return y left;
693 case EXPR_PREOP:
694 left = get_expression_value(expr->unop);
695 switch (expr->op) {
696 OP('+', +); OP('-', -); OP('!', !); OP('~', ~); OP('(', );
698 break;
700 case EXPR_CONDITIONAL:
701 left = get_expression_value(expr->conditional);
702 if (!expr->cond_true)
703 middle = left;
704 else
705 middle = get_expression_value(expr->cond_true);
706 right = get_expression_value(expr->cond_false);
707 return left ? middle : right;
709 error(expr->token, "bad constant expression");
710 return 0;
713 extern struct token *conditional_expression(struct token *token, struct expression **tree);
715 static int expression_value(struct token *head)
717 struct expression *expr;
718 struct token *token;
719 long long value;
721 expand_defined(head);
722 expand_list(head);
723 token = conditional_expression(head->next, &expr);
724 if (!eof_token(token))
725 warn(token, "garbage at end: %s", show_token_sequence(token));
726 value = get_expression_value(expr);
727 return value != 0;
730 static int handle_if(struct stream *stream, struct token *head, struct token *token)
732 int value = 0;
733 if (!false_nesting)
734 value = expression_value(token);
735 return preprocessor_if(token, value);
738 static int handle_elif(struct stream * stream, struct token *head, struct token *token)
740 if (stream->nesting == if_nesting)
741 stream->constant = 0;
742 if (false_nesting) {
743 /* If this whole if-thing is if'ed out, an elif cannot help */
744 if (elif_ignore[if_nesting-1])
745 return 1;
746 if (expression_value(token)) {
747 false_nesting--;
748 true_nesting++;
749 elif_ignore[if_nesting-1] = 1;
751 return 1;
753 if (true_nesting) {
754 false_nesting = 1;
755 true_nesting--;
756 return 1;
758 warn(token, "unmatched '#elif'");
759 return 1;
762 static int handle_else(struct stream *stream, struct token *head, struct token *token)
764 if (stream->nesting == if_nesting)
765 stream->constant = 0;
766 if (false_nesting) {
767 /* If this whole if-thing is if'ed out, an else cannot help */
768 if (elif_ignore[if_nesting-1])
769 return 1;
770 false_nesting--;
771 true_nesting++;
772 elif_ignore[if_nesting-1] = 1;
773 return 1;
775 if (true_nesting) {
776 true_nesting--;
777 false_nesting = 1;
778 return 1;
780 warn(token, "unmatched #else");
781 return 1;
784 static int handle_endif(struct stream *stream, struct token *head, struct token *token)
786 if (stream->constant == -2 && stream->nesting == if_nesting)
787 stream->constant = -1;
789 if (false_nesting) {
790 false_nesting--;
791 return 1;
793 if (true_nesting) {
794 true_nesting--;
795 return 1;
797 warn(token, "unmatched #endif");
798 return 1;
801 static const char *show_token_sequence(struct token *token)
803 static char buffer[256];
804 char *ptr = buffer;
805 int whitespace = 0;
807 if (!token)
808 return "<none>";
809 while (!eof_token(token)) {
810 const char *val = show_token(token);
811 int len = strlen(val);
812 if (whitespace)
813 *ptr++ = ' ';
814 memcpy(ptr, val, len);
815 ptr += len;
816 token = token->next;
817 whitespace = token->whitespace;
819 *ptr++ = 0;
820 *ptr = 0;
821 return buffer;
824 static int handle_warning(struct stream *stream, struct token *head, struct token *token)
826 if (false_nesting)
827 return 1;
828 warn(token, "%s", show_token_sequence(token->next));
829 return 1;
832 static int handle_error(struct stream *stream, struct token *head, struct token *token)
834 if (false_nesting)
835 return 1;
836 error(token, "%s", show_token_sequence(token->next));
837 return 1;
840 static int handle_preprocessor_command(struct stream *stream, struct token *head, struct ident *ident, struct token *token)
842 int i;
843 static struct {
844 const char *name;
845 int (*handler)(struct stream *, struct token *, struct token *);
846 } handlers[] = {
847 { "define", handle_define },
848 { "undef", handle_undef },
849 { "ifdef", handle_ifdef },
850 { "ifndef", handle_ifndef },
851 { "else", handle_else },
852 { "endif", handle_endif },
853 { "if", handle_if },
854 { "elif", handle_elif },
855 { "warning", handle_warning },
856 { "error", handle_error },
857 { "include", handle_include },
860 for (i = 0; i < (sizeof (handlers) / sizeof (handlers[0])); i++) {
861 if (match_string_ident(ident, handlers[i].name))
862 return handlers[i].handler(stream, head, token);
864 return 0;
867 static void handle_preprocessor_line(struct stream *stream, struct token * head, struct token *token)
869 if (!token)
870 return;
872 if (token->type == TOKEN_IDENT)
873 if (handle_preprocessor_command(stream, head, token->ident, token))
874 return;
875 warn(token, "unrecognized preprocessor line '%s'", show_token_sequence(token));
878 static void preprocessor_line(struct stream *stream, struct token * head)
880 struct token *start = head->next, *next;
881 struct token **tp = &start->next;
883 for (;;) {
884 next = *tp;
885 if (next->newline)
886 break;
887 tp = &next->next;
889 head->next = next;
890 *tp = &eof_token_entry;
891 handle_preprocessor_line(stream, head, start->next);
894 static void do_preprocess(struct token *head)
896 do {
897 struct token *next = head->next;
898 struct stream *stream = input_streams + next->stream;
900 if (next->newline && match_op(next, '#')) {
901 preprocessor_line(stream, head);
902 continue;
905 if (false_nesting) {
906 head->next = next->next;
907 continue;
910 switch (next->type) {
911 case TOKEN_STREAMEND:
912 if (stream->constant == -1 && stream->protect) {
913 stream->constant = 1;
915 /* fallthrough */
916 case TOKEN_STREAMBEGIN:
917 head->next = next->next;
918 continue;
920 case TOKEN_IDENT:
921 next = expand_one_symbol(head, next);
922 /* fallthrough */
923 default:
925 * Any token expansion (even if it ended up being an
926 * empty expansion) in this stream implies it can't
927 * be constant.
929 stream->constant = 0;
932 head = next;
933 } while (!eof_token(head));
936 struct token * preprocess(struct token *token)
938 struct token header = { 0, };
940 header.next = token;
941 do_preprocess(&header);
942 if (if_nesting)
943 warn(unmatched_if, "unmatched preprocessor conditional");
945 // Drop all expressions from pre-processing, they're not used any more.
946 clear_expression_alloc();
948 return header.next;