Start doing "sizeof" evaluation.
[smatch.git] / pre-process.c
blob8715a77ffe79c5d81cf14e893c34ff28a175bbdc
1 /*
2 * Do C preprocessing, based on a token list gathered by
3 * the tokenizer.
5 * This may not be the smartest preprocessor on the planet.
7 * Copyright (C) 2003 Linus Torvalds, all rights reserved.
8 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <stdarg.h>
12 #include <stddef.h>
13 #include <string.h>
14 #include <ctype.h>
15 #include <unistd.h>
16 #include <fcntl.h>
17 #include <limits.h>
19 #include "lib.h"
20 #include "parse.h"
21 #include "token.h"
22 #include "symbol.h"
23 #include "expression.h"
25 #define MAXNEST (16)
26 static int true_nesting = 0;
27 static int false_nesting = 0;
28 static struct token *unmatched_if = NULL;
29 static int elif_ignore[MAXNEST];
30 #define if_nesting (true_nesting + false_nesting)
33 * This is stupid - the tokenizer already guarantees unique
34 * identifiers, so we should just compare identifier pointers
36 static int match_string_ident(struct ident *ident, const char *str)
38 return !str[ident->len] && !memcmp(str, ident->name, ident->len);
41 static struct token *alloc_token(struct token *dup)
43 struct token *token = __alloc_token(0);
45 token->stream = dup->stream;
46 token->line = dup->line;
47 token->pos = dup->pos;
48 token->whitespace = 1;
49 return token;
52 static const char *show_token_sequence(struct token *token);
54 /* Head is one-before-list, and last is one-past-list */
55 static struct token *for_each_ident(struct token *head, struct token *(*action)(struct token *head, struct token *))
57 for (;;) {
58 struct token *next = head->next;
60 /* Did we hit the end of the current expansion? */
61 if (eof_token(next))
62 break;
64 if (next->type == TOKEN_IDENT)
65 next = action(head, next);
67 head = next;
69 return head;
72 static struct token *is_defined(struct token *head, struct token *token, struct token *next)
74 char *string[] = { "0", "1" };
75 char *defined = string[lookup_symbol(token->ident, NS_PREPROCESSOR) != NULL];
76 struct token *newtoken = alloc_token(token);
78 newtoken->type = TOKEN_INTEGER;
79 newtoken->integer = defined;
80 newtoken->next = next;
81 head->next = newtoken;
82 return next;
86 struct token *defined_one_symbol(struct token *head, struct token *next)
88 if (match_string_ident(next->ident, "defined")) {
89 struct token *token = next->next;
90 struct token *past = token->next;
92 if (match_op(token, '(')) {
93 token = past;
94 past = token->next;
95 if (!match_op(past, ')'))
96 return next;
97 past = past->next;
99 if (token->type == TOKEN_IDENT)
100 return is_defined(head, token, past);
102 return next;
105 static struct token *expand_defined(struct token *head)
107 return for_each_ident(head, defined_one_symbol);
110 /* Expand symbol 'sym' between 'head->next' and 'head->next->next' */
111 static struct token *expand(struct token *, struct symbol *);
113 struct token *expand_one_symbol(struct token *head, struct token *token)
115 struct symbol *sym = lookup_symbol(token->ident, NS_PREPROCESSOR);
116 if (sym && !sym->busy) {
117 if (sym->arglist && !match_op(token->next, '('))
118 return token;
119 return expand(head, sym);
121 return token;
124 static struct token *expand_list(struct token *head)
126 return for_each_ident(head, expand_one_symbol);
129 static struct token *find_argument_end(struct token *start)
131 int nesting = 0;
133 while (!eof_token(start)) {
134 struct token *next = start->next;
135 if (match_op(next, '('))
136 nesting++;
137 else if (match_op(next, ')')) {
138 if (--nesting < 0) {
139 start->next = &eof_token_entry;
140 return next->next;
142 } else if (!nesting && match_op(next, ','))
143 next->special = SPECIAL_ARG_SEPARATOR;
144 start = next;
146 return start;
149 static struct token *dup_token(struct token *token, struct token *pos, int newline)
151 struct token *alloc = alloc_token(pos);
152 alloc->type = token->type;
153 alloc->line = pos->line;
154 alloc->newline = newline;
155 alloc->integer = token->integer;
156 return alloc;
159 static void insert(struct token *token, struct token *prev)
161 token->next = prev->next;
162 prev->next = token;
165 static struct token * replace(struct token *token, struct token *prev, struct token *list)
167 int newline = token->newline;
169 prev->next = token->next;
170 while (!eof_token(list) && !match_op(list, SPECIAL_ARG_SEPARATOR)) {
171 struct token *newtok = dup_token(list, token, newline);
172 newline = 0;
173 insert(newtok, prev);
174 prev = newtok;
175 list = list->next;
177 return prev;
180 static struct token *get_argument(int nr, struct token *args)
182 if (!nr)
183 return args;
184 while (!eof_token(args)) {
185 if (match_op(args, SPECIAL_ARG_SEPARATOR))
186 if (!--nr)
187 return args->next;
188 args = args->next;
191 return args;
194 static struct token *stringify(struct token *token, struct token *arg)
196 const char *s = show_token_sequence(arg);
197 int size = strlen(s)+1;
198 struct token *newtoken = alloc_token(token);
199 struct string *string = __alloc_string(size);
201 newtoken->newline = token->newline;
202 memcpy(string->data, s, size);
203 string->length = size;
204 newtoken->type = TOKEN_STRING;
205 newtoken->string = string;
206 newtoken->next = &eof_token_entry;
207 return newtoken;
210 static int arg_number(struct token *arglist, struct ident *ident)
212 int nr = 0;
214 while (!eof_token(arglist)) {
215 if (arglist->ident == ident)
216 return nr;
217 nr++;
218 arglist = arglist->next;
220 return -1;
223 static struct token empty_arg_token = { .type = TOKEN_EOF };
225 static struct token *expand_one_arg(struct token *head, struct token *token,
226 struct token *arglist, struct token *arguments)
228 int nr = arg_number(arglist, token->ident);
229 struct token *orig_head = head;
231 if (nr >= 0) {
232 struct token *arg = get_argument(nr, arguments);
233 struct token *last = token->next;
234 token->next = &eof_token_entry;
237 * Special case for gcc 'x ## arg' semantics: if 'arg' is empty
238 * then the 'x' goes away too.
240 if (match_op(head, SPECIAL_HASHHASH) && eof_token(arg)) {
241 arg = &empty_arg_token;
242 empty_arg_token.next = &eof_token_entry;
245 head = replace(token, head, arg);
246 if (!match_op(orig_head, SPECIAL_HASHHASH) && !match_op(last, SPECIAL_HASHHASH) && !match_op(orig_head, '#'))
247 head = expand_list(orig_head);
248 head->next = last;
249 return head;
251 return token;
254 static void expand_arguments(struct token *token, struct token *head,
255 struct token *arguments, struct token *arglist)
257 for (;;) {
258 struct token *next = head->next;
260 /* Did we hit the end of the current expansion? */
261 if (eof_token(next))
262 break;
264 if (match_op(next, '#')) {
265 struct token *nextnext = next->next;
266 int nr = arg_number(arglist, nextnext->ident);
267 if (nextnext != head && nr >= 0 && nextnext->type == TOKEN_IDENT) {
268 struct token *newtoken = stringify(nextnext, get_argument(nr, arguments));
269 replace(nextnext, head, newtoken);
270 continue;
272 warn(next, "'#' operation is not followed by argument name");
275 if (next->type == TOKEN_IDENT)
276 next = expand_one_arg(head, next, arglist, arguments);
278 head = next;
283 * Possibly valid combinations:
284 * - anything + 'empty_arg_token' is empty.
285 * - ident + ident - combine (==ident)
286 * - ident + number - combine (==ident)
287 * - number + number - combine (==number)
288 * - number + ident - combine (==number)
289 * - string + string - leave as is, C will combine them anyway
290 * others cause an error and leave the two tokens as separate tokens.
292 static struct token *hashhash(struct token *head, struct token *first, struct token *second)
294 static char buffer[512], *p;
295 struct token *newtoken;
296 static const char *src;
297 int len;
299 first->next = second;
302 * Special case for gcc 'x ## arg' semantics: if 'arg' is empty
303 * then the 'x' goes away too.
305 * See expand_one_arg.
307 if (second->type == TOKEN_EOF) {
308 head->next = second->next;
309 return head;
312 p = buffer;
313 switch (first->type) {
314 case TOKEN_INTEGER:
315 len = strlen(first->integer);
316 src = first->integer;
317 break;
318 case TOKEN_IDENT:
319 len = first->ident->len;
320 src = first->ident->name;
321 break;
322 default:
323 return second;
325 memcpy(p, src, len);
326 p += len;
328 switch (second->type) {
329 case TOKEN_INTEGER:
330 len = strlen(second->integer);
331 src = second->integer;
332 break;
333 case TOKEN_IDENT:
334 len = second->ident->len;
335 src = second->ident->name;
336 break;
337 default:
338 return second;
340 memcpy(p, src, len);
341 p += len;
342 *p++ = 0;
344 newtoken = alloc_token(first);
345 head->next = newtoken;
346 newtoken->type = first->type;
347 switch (newtoken->type) {
348 case TOKEN_IDENT:
349 newtoken->ident = built_in_ident(buffer);
350 break;
351 case TOKEN_INTEGER:
352 newtoken->integer = __alloc_bytes(p - buffer);
353 memcpy(newtoken->integer, buffer, p - buffer);
354 break;
356 return newtoken;
359 static void retokenize(struct token *head)
361 struct token * next = head->next;
362 struct token * nextnext = next->next;
363 struct token * nextnextnext = nextnext->next;
365 if (eof_token(next) || eof_token(nextnext))
366 return;
368 for (;;) {
369 if (eof_token(nextnextnext))
370 break;
372 if (match_op(nextnext, SPECIAL_HASHHASH)) {
373 struct token *newtoken = hashhash(head, next, nextnextnext);
375 next = newtoken;
376 nextnext = nextnextnext->next;
377 nextnextnext = nextnext->next;
379 newtoken->next = nextnext;
380 if (!eof_token(nextnext))
381 continue;
382 break;
385 head = next;
386 next = nextnext;
387 nextnext = nextnext->next;
388 nextnextnext = nextnextnext->next;
392 static struct token *expand(struct token *head, struct symbol *sym)
394 struct token *arguments, *token, *last;
396 sym->busy++;
397 token = head->next;
398 last = token->next;
400 arguments = NULL;
401 if (sym->arglist) {
402 arguments = last->next;
403 last = find_argument_end(last);
405 token->next = &eof_token_entry;
407 /* Replace the token with the token expansion */
408 replace(token, head, sym->expansion);
410 /* Then, replace all the arguments with their expansions */
411 if (arguments)
412 expand_arguments(token, head, arguments, sym->arglist);
414 /* Re-tokenize the sequence if any ## token exists.. */
415 retokenize(head);
417 /* Finally, expand the expansion itself .. */
418 head = expand_list(head);
420 /* Put the rest of the stuff in place again */
421 head->next = last;
422 sym->busy--;
423 return head;
426 static const char *token_name_sequence(struct token *token, int endop, struct token *start)
428 struct token *last;
429 static char buffer[256];
430 char *ptr = buffer;
432 last = token;
433 while (!eof_token(token) && !match_op(token, endop)) {
434 int len;
435 const char *val = token->string->data;
436 if (token->type != TOKEN_STRING)
437 val = show_token(token);
438 len = strlen(val);
439 memcpy(ptr, val, len);
440 ptr += len;
441 token = token->next;
443 *ptr = 0;
444 if (endop && !match_op(token, endop))
445 warn(start, "expected '>' at end of filename");
446 return buffer;
449 static void do_include(struct token *head, struct token *token, const char *filename)
451 int endlen = strlen(filename) + 1;
452 char **pptr = includepath, *path;
454 while ((path = *pptr++) != NULL) {
455 int fd, len = strlen(path);
456 static char fullname[PATH_MAX];
458 memcpy(fullname, path, len);
459 memcpy(fullname+len, filename, endlen);
460 fd = open(fullname, O_RDONLY);
461 if (fd >= 0) {
462 char * streamname = __alloc_bytes(len + endlen);
463 memcpy(streamname, fullname, len + endlen);
464 head->next = tokenize(streamname, fd, head->next);
465 close(fd);
466 return;
469 warn(token, "unable to open '%s'", filename);
472 static int handle_include(struct stream *stream, struct token *head, struct token *token)
474 const char *filename;
475 struct token *next;
476 int expect;
478 if (stream->constant == -1)
479 stream->constant = 0;
480 if (false_nesting)
481 return 1;
482 next = token->next;
483 expect = '>';
484 if (!match_op(next, '<')) {
485 expand_list(token);
486 expect = 0;
487 next = token;
489 token = next->next;
490 filename = token_name_sequence(token, expect, token);
491 do_include(head, token, filename);
492 return 1;
495 static int token_list_different(struct token *list1, struct token *list2)
497 for (;;) {
498 if (list1 == list2)
499 return 0;
500 if (!list1 || !list2)
501 return 1;
502 if (list1->type != list2->type)
503 return 1;
504 list1 = list1->next;
505 list2 = list2->next;
510 static int handle_define(struct stream *stream, struct token *head, struct token *token)
512 struct token *arglist, *expansion;
513 struct token *left = token->next;
514 struct symbol *sym;
515 struct ident *name;
517 if (left->type != TOKEN_IDENT) {
518 warn(head, "expected identifier to 'define'");
519 return 0;
521 if (false_nesting)
522 return 1;
523 name = left->ident;
525 arglist = NULL;
526 expansion = left->next;
527 if (!expansion->whitespace && match_op(expansion, '(')) {
528 arglist = expansion;
529 while (!eof_token(expansion)) {
530 struct token *next = expansion->next;
531 if (match_op(next, ')')) {
532 // Terminate the arglist
533 expansion->next = &eof_token_entry;
534 expansion = next->next;
535 break;
537 if (match_op(next, ','))
538 expansion->next = next->next;
539 expansion = next;
541 arglist = arglist->next;
544 sym = lookup_symbol(name, NS_PREPROCESSOR);
545 if (sym) {
546 if (token_list_different(sym->expansion, expansion) ||
547 token_list_different(sym->arglist, arglist)) {
548 warn(left, "preprocessor token redefined");
549 warn(sym->token, "this was the original definition");
551 return 1;
553 sym = alloc_symbol(left, SYM_NONE);
554 bind_symbol(sym, name, NS_PREPROCESSOR);
556 sym->expansion = expansion;
557 sym->arglist = arglist;
558 return 1;
561 static int handle_undef(struct stream *stream, struct token *head, struct token *token)
563 struct token *left = token->next;
564 struct symbol **sym;
566 if (left->type != TOKEN_IDENT) {
567 warn(head, "expected identifier to 'undef'");
568 return 0;
570 if (false_nesting)
571 return 1;
572 sym = &left->ident->symbols;
573 while (*sym) {
574 struct symbol *t = *sym;
575 if (t->namespace == NS_PREPROCESSOR) {
576 *sym = t->next_id;
577 return 1;
579 sym = &t->next_id;
581 return 1;
584 static int preprocessor_if(struct token *token, int true)
586 if (if_nesting == 0)
587 unmatched_if = token;
588 elif_ignore[if_nesting] = false_nesting || true;
589 if (false_nesting || !true) {
590 false_nesting++;
591 return 1;
593 true_nesting++;
594 return 1;
597 static int token_defined(struct token *token)
599 if (token->type == TOKEN_IDENT)
600 return lookup_symbol(token->ident, NS_PREPROCESSOR) != NULL;
602 warn(token, "expected identifier for #if[n]def");
603 return 0;
606 static int handle_ifdef(struct stream *stream, struct token *head, struct token *token)
608 return preprocessor_if(token, token_defined(token->next));
611 static int handle_ifndef(struct stream *stream, struct token *head, struct token *token)
613 struct token *next = token->next;
614 if (stream->constant == -1) {
615 int newconstant = 0;
616 if (next->type == TOKEN_IDENT) {
617 if (!stream->protect || stream->protect == next->ident) {
618 newconstant = -2;
619 stream->protect = next->ident;
620 stream->nesting = if_nesting+1;
623 stream->constant = newconstant;
625 return preprocessor_if(token, !token_defined(next));
628 static int expression_value(struct token *head)
630 struct expression *expr;
631 struct token *token;
632 long long value;
634 expand_defined(head);
635 expand_list(head);
636 token = constant_expression(head->next, &expr);
637 if (!eof_token(token))
638 warn(token, "garbage at end: %s", show_token_sequence(token));
639 value = get_expression_value(expr);
640 return value != 0;
643 static int handle_if(struct stream *stream, struct token *head, struct token *token)
645 int value = 0;
646 if (!false_nesting)
647 value = expression_value(token);
648 return preprocessor_if(token, value);
651 static int handle_elif(struct stream * stream, struct token *head, struct token *token)
653 if (stream->nesting == if_nesting)
654 stream->constant = 0;
655 if (false_nesting) {
656 /* If this whole if-thing is if'ed out, an elif cannot help */
657 if (elif_ignore[if_nesting-1])
658 return 1;
659 if (expression_value(token)) {
660 false_nesting--;
661 true_nesting++;
662 elif_ignore[if_nesting-1] = 1;
664 return 1;
666 if (true_nesting) {
667 false_nesting = 1;
668 true_nesting--;
669 return 1;
671 warn(token, "unmatched '#elif'");
672 return 1;
675 static int handle_else(struct stream *stream, struct token *head, struct token *token)
677 if (stream->nesting == if_nesting)
678 stream->constant = 0;
679 if (false_nesting) {
680 /* If this whole if-thing is if'ed out, an else cannot help */
681 if (elif_ignore[if_nesting-1])
682 return 1;
683 false_nesting--;
684 true_nesting++;
685 elif_ignore[if_nesting-1] = 1;
686 return 1;
688 if (true_nesting) {
689 true_nesting--;
690 false_nesting = 1;
691 return 1;
693 warn(token, "unmatched #else");
694 return 1;
697 static int handle_endif(struct stream *stream, struct token *head, struct token *token)
699 if (stream->constant == -2 && stream->nesting == if_nesting)
700 stream->constant = -1;
702 if (false_nesting) {
703 false_nesting--;
704 return 1;
706 if (true_nesting) {
707 true_nesting--;
708 return 1;
710 warn(token, "unmatched #endif");
711 return 1;
714 static const char *show_token_sequence(struct token *token)
716 static char buffer[256];
717 char *ptr = buffer;
718 int whitespace = 0;
720 if (!token)
721 return "<none>";
722 while (!eof_token(token) && !match_op(token, SPECIAL_ARG_SEPARATOR)) {
723 const char *val = show_token(token);
724 int len = strlen(val);
725 if (whitespace)
726 *ptr++ = ' ';
727 memcpy(ptr, val, len);
728 ptr += len;
729 token = token->next;
730 whitespace = token->whitespace;
732 *ptr = 0;
733 return buffer;
736 static int handle_warning(struct stream *stream, struct token *head, struct token *token)
738 if (false_nesting)
739 return 1;
740 warn(token, "%s", show_token_sequence(token->next));
741 return 1;
744 static int handle_error(struct stream *stream, struct token *head, struct token *token)
746 if (false_nesting)
747 return 1;
748 error(token, "%s", show_token_sequence(token->next));
749 return 1;
752 static int handle_preprocessor_command(struct stream *stream, struct token *head, struct ident *ident, struct token *token)
754 int i;
755 static struct {
756 const char *name;
757 int (*handler)(struct stream *, struct token *, struct token *);
758 } handlers[] = {
759 { "define", handle_define },
760 { "undef", handle_undef },
761 { "ifdef", handle_ifdef },
762 { "ifndef", handle_ifndef },
763 { "else", handle_else },
764 { "endif", handle_endif },
765 { "if", handle_if },
766 { "elif", handle_elif },
767 { "warning", handle_warning },
768 { "error", handle_error },
769 { "include", handle_include },
772 for (i = 0; i < (sizeof (handlers) / sizeof (handlers[0])); i++) {
773 if (match_string_ident(ident, handlers[i].name))
774 return handlers[i].handler(stream, head, token);
776 return 0;
779 static void handle_preprocessor_line(struct stream *stream, struct token * head, struct token *token)
781 if (!token)
782 return;
784 if (token->type == TOKEN_IDENT)
785 if (handle_preprocessor_command(stream, head, token->ident, token))
786 return;
787 warn(token, "unrecognized preprocessor line '%s'", show_token_sequence(token));
790 static void preprocessor_line(struct stream *stream, struct token * head)
792 struct token *start = head->next, *next;
793 struct token **tp = &start->next;
795 for (;;) {
796 next = *tp;
797 if (next->newline)
798 break;
799 tp = &next->next;
801 head->next = next;
802 *tp = &eof_token_entry;
803 handle_preprocessor_line(stream, head, start->next);
806 static void do_preprocess(struct token *head)
808 do {
809 struct token *next = head->next;
810 struct stream *stream = input_streams + next->stream;
812 if (next->newline && match_op(next, '#')) {
813 preprocessor_line(stream, head);
814 continue;
817 if (false_nesting) {
818 head->next = next->next;
819 continue;
822 switch (next->type) {
823 case TOKEN_STREAMEND:
824 if (stream->constant == -1 && stream->protect) {
825 stream->constant = 1;
827 /* fallthrough */
828 case TOKEN_STREAMBEGIN:
829 head->next = next->next;
830 continue;
832 case TOKEN_IDENT:
833 next = expand_one_symbol(head, next);
834 /* fallthrough */
835 default:
837 * Any token expansion (even if it ended up being an
838 * empty expansion) in this stream implies it can't
839 * be constant.
841 stream->constant = 0;
844 head = next;
845 } while (!eof_token(head));
848 struct token * preprocess(struct token *token)
850 struct token header = { 0, };
852 header.next = token;
853 do_preprocess(&header);
854 if (if_nesting)
855 warn(unmatched_if, "unmatched preprocessor conditional");
857 // Drop all expressions from pre-processing, they're not used any more.
858 clear_expression_alloc();
860 return header.next;