pre-process.c

   1 /*
   2  * Do C preprocessing, based on a token list gathered by
   3  * the tokenizer.
   4  *
   5  * This may not be the smartest preprocessor on the planet.
   6  *
   7  * Copyright (C) 2003 Transmeta Corp.
   8  *               2003 Linus Torvalds
   9  *
  10  *  Licensed under the Open Software License version 1.1
  11  */
  12 #include <stdio.h>
  13 #include <stdlib.h>
  14 #include <stdarg.h>
  15 #include <stddef.h>
  16 #include <string.h>
  17 #include <ctype.h>
  18 #include <unistd.h>
  19 #include <fcntl.h>
  20 #include <limits.h>
  21
  22 #include "pre-process.h"
  23 #include "lib.h"
  24 #include "parse.h"
  25 #include "token.h"
  26 #include "symbol.h"
  27 #include "expression.h"
  28
  29 int verbose = 0;
  30 int preprocessing = 0;
  31
  32 #define MAX_NEST (256)
  33 static int true_nesting = 0;
  34 static int false_nesting = 0;
  35 static struct token *unmatched_if = NULL;
  36 static char elif_ignore[MAX_NEST];
  37 #define if_nesting (true_nesting + false_nesting)
  38
  39 #define INCLUDEPATHS 32
  40 const char *includepath[INCLUDEPATHS+1] = {
  41         NULL
  42 };
  43
  44 const char *sys_includepath[] = {
  45         "/usr/include",
  46         "/usr/local/include",
  47         NULL,
  48 };
  49
  50 const char *gcc_includepath[] = {
  51         GCC_INTERNAL_INCLUDE,
  52         NULL
  53 };
  54
  55
  56 /*
  57  * This is stupid - the tokenizer already guarantees unique
  58  * identifiers, so we should just compare identifier pointers
  59  */
  60 int match_string_ident(struct ident *ident, const char *str)
  61 {
  62         return !str[ident->len] && !memcmp(str, ident->name, ident->len);
  63 }
  64
  65 static struct token *alloc_token(struct position *pos)
  66 {
  67         struct token *token = __alloc_token(0);
  68
  69         token->pos.stream = pos->stream;
  70         token->pos.line = pos->line;
  71         token->pos.pos = pos->pos;
  72         token->pos.whitespace = 1;
  73         return token;
  74 }
  75
  76 static const char *show_token_sequence(struct token *token);
  77
  78 /* Expand symbol 'sym' at '*list' */
  79 static struct token **expand(struct token **, struct symbol *);
  80
  81 static void replace_with_string(struct token *token, const char *str)
  82 {
  83         int size = strlen(str) + 1;
  84         struct string *s = __alloc_string(size);
  85
  86         s->length = size;
  87         memcpy(s->data, str, size);
  88         token_type(token) = TOKEN_STRING;
  89         token->string = s;
  90 }
  91
  92 static void replace_with_integer(struct token *token, unsigned int val)
  93 {
  94         char *buf = __alloc_bytes(10);
  95         sprintf(buf, "%d", val);
  96         token_type(token) = TOKEN_NUMBER;
  97         token->number = buf;
  98 }
  99
 100 static void replace_with_defined(struct token *token)
 101 {
 102         char *string[] = { "0", "1" };
 103         int defined = 0;
 104         if (token_type(token) != TOKEN_IDENT)
 105                 warn(token->pos, "operator \"defined\" requires an identifier");
 106         else if (lookup_symbol(token->ident, NS_PREPROCESSOR))
 107                 defined = 1;
 108         token_type(token) = TOKEN_NUMBER;
 109         token->number = string[defined];
 110 }
 111
 112 struct token **expand_one_symbol(struct token **list)
 113 {
 114         struct token *token = *list;
 115         struct symbol *sym;
 116
 117         if (token->pos.noexpand)
 118                 return &token->next;
 119
 120         sym = lookup_symbol(token->ident, NS_PREPROCESSOR);
 121         if (sym)
 122                 return expand(list, sym);
 123         if (token->ident == &__LINE___ident) {
 124                 replace_with_integer(token, token->pos.line);
 125         } else if (token->ident == &__FILE___ident) {
 126                 replace_with_string(token, (input_streams + token->pos.stream)->name);
 127         }
 128         return &token->next;
 129 }
 130
 131 static inline struct token *scan_next(struct token **where)
 132 {
 133         struct token *token = *where;
 134         if (token_type(token) != TOKEN_UNTAINT)
 135                 return token;
 136         do {
 137                 token->ident->tainted = 0;
 138                 token = token->next;
 139         } while (token_type(token) == TOKEN_UNTAINT);
 140         *where = token;
 141         return token;
 142 }
 143
 144 static struct token **expand_list(struct token **list)
 145 {
 146         struct token *next;
 147         while (!eof_token(next = scan_next(list))) {
 148                 if (token_type(next) == TOKEN_IDENT)
 149                         list = expand_one_symbol(list);
 150                 else
 151                         list = &next->next;
 152         }
 153         return list;
 154 }
 155
 156 static struct token *collect_arg(struct token *prev, int vararg, struct position *pos)
 157 {
 158         struct token **p = &prev->next;
 159         struct token *next;
 160         int nesting = 0;
 161
 162         while (!eof_token(next = scan_next(p))) {
 163                 if (match_op(next, '(')) {
 164                         nesting++;
 165                 } else if (match_op(next, ')')) {
 166                         if (!nesting--)
 167                                 break;
 168                 } else if (match_op(next, ',') && !nesting && !vararg) {
 169                         break;
 170                 }
 171                 next->pos.stream = pos->stream;
 172                 next->pos.line = pos->line;
 173                 next->pos.pos = pos->pos;
 174                 p = &next->next;
 175         }
 176         *p = &eof_token_entry;
 177         return next;
 178 }
 179
 180 /*
 181  * We store arglist as <counter> [arg1] <number of uses for arg1> ... eof
 182  */
 183
 184 struct arg {
 185         struct token *arg;
 186         struct token *expanded;
 187         struct token *str;
 188         int n_normal;
 189         int n_quoted;
 190         int n_str;
 191 };
 192
 193 static int collect_arguments(struct token *start, struct token *arglist, struct arg *args, struct token *what)
 194 {
 195         int wanted = arglist->count.normal;
 196         struct token *next = NULL;
 197         int count = 0;
 198
 199         arglist = arglist->next;        /* skip counter */
 200
 201         if (!wanted) {
 202                 next = collect_arg(start, 0, &what->pos);
 203                 if (eof_token(next))
 204                         goto Eclosing;
 205                 if (!eof_token(start->next) || !match_op(next, ')')) {
 206                         count++;
 207                         goto Emany;
 208                 }
 209         } else {
 210                 for (count = 0; count < wanted; count++) {
 211                         struct argcount *p = &arglist->next->count;
 212                         next = collect_arg(start, p->vararg, &what->pos);
 213                         arglist = arglist->next->next;
 214                         if (eof_token(next))
 215                                 goto Eclosing;
 216                         args[count].arg = start->next;
 217                         args[count].n_normal = p->normal;
 218                         args[count].n_quoted = p->quoted;
 219                         args[count].n_str = p->str;
 220                         if (match_op(next, ')')) {
 221                                 count++;
 222                                 break;
 223                         }
 224                         start = next;
 225                 }
 226                 if (count == wanted && !match_op(next, ')'))
 227                         goto Emany;
 228                 if (count == wanted - 1) {
 229                         struct argcount *p = &arglist->next->count;
 230                         if (!p->vararg)
 231                                 goto Efew;
 232                         args[count].arg = NULL;
 233                         args[count].n_normal = p->normal;
 234                         args[count].n_quoted = p->quoted;
 235                         args[count].n_str = p->str;
 236                 }
 237                 if (count < wanted - 1)
 238                         goto Efew;
 239         }
 240         what->next = next->next;
 241         return 1;
 242
 243 Efew:
 244         warn(what->pos, "macro \"%s\" requires %d arguments, but only %d given",
 245                 show_token(what), wanted, count);
 246         goto out;
 247 Emany:
 248         while (match_op(next, ',')) {
 249                 next = collect_arg(next, 0, &what->pos);
 250                 count++;
 251         }
 252         if (eof_token(next))
 253                 goto Eclosing;
 254         warn(what->pos, "macro \"%s\" passed %d arguments, but takes just %d",
 255                 show_token(what), count, wanted);
 256         goto out;
 257 Eclosing:
 258         warn(what->pos, "unterminated argument list invoking macro \"%s\"",
 259                 show_token(what));
 260 out:
 261         what->next = next->next;
 262         return 0;
 263 }
 264
 265 static struct token *dup_list(struct token *list)
 266 {
 267         struct token *res;
 268         struct token **p = &res;
 269
 270         while (!eof_token(list)) {
 271                 struct token *newtok = __alloc_token(0);
 272                 *newtok = *list;
 273                 *p = newtok;
 274                 p = &newtok->next;
 275                 list = list->next;
 276         }
 277         return res;
 278 }
 279
 280 static struct token *stringify(struct token *arg)
 281 {
 282         const char *s = show_token_sequence(arg);
 283         int size = strlen(s)+1;
 284         struct token *token = __alloc_token(0);
 285         struct string *string = __alloc_string(size);
 286
 287         memcpy(string->data, s, size);
 288         string->length = size;
 289         token->pos = arg->pos;
 290         token_type(token) = TOKEN_STRING;
 291         token->string = string;
 292         token->next = &eof_token_entry;
 293         return token;
 294 }
 295
 296 static void expand_arguments(int count, struct arg *args)
 297 {
 298         int i;
 299         for (i = 0; i < count; i++) {
 300                 struct token *arg = args[i].arg;
 301                 if (!arg)
 302                         arg = &eof_token_entry;
 303                 if (args[i].n_str)
 304                         args[i].str = stringify(arg);
 305                 if (args[i].n_normal) {
 306                         if (!args[i].n_quoted) {
 307                                 args[i].expanded = arg;
 308                                 args[i].arg = NULL;
 309                         } else if (eof_token(arg)) {
 310                                 args[i].expanded = arg;
 311                         } else {
 312                                 args[i].expanded = dup_list(arg);
 313                         }
 314                         expand_list(&args[i].expanded);
 315                 }
 316         }
 317 }
 318
 319 /*
 320  * Possibly valid combinations:
 321  *  - ident + ident -> ident
 322  *  - ident + number -> ident unless number contains '.', '+' or '-'.
 323  *  - number + number -> number
 324  *  - number + ident -> number
 325  *  - number + '.' -> number
 326  *  - number + '+' or '-' -> number, if number used to end on [eEpP].
 327  *  - '.' + number -> number, if number used to start with a digit.
 328  *  - special + special -> either special or an error.
 329  */
 330 static enum token_type combine(struct token *left, struct token *right, char *p)
 331 {
 332         int len;
 333         enum token_type t1 = token_type(left), t2 = token_type(right);
 334
 335         if (t1 != TOKEN_IDENT && t1 != TOKEN_NUMBER && t1 != TOKEN_SPECIAL)
 336                 return TOKEN_ERROR;
 337
 338         if (t2 != TOKEN_IDENT && t2 != TOKEN_NUMBER && t2 != TOKEN_SPECIAL)
 339                 return TOKEN_ERROR;
 340
 341         strcpy(p, show_token(left));
 342         strcat(p, show_token(right));
 343         len = strlen(p);
 344
 345         if (len >= 256)
 346                 return TOKEN_ERROR;
 347
 348         if (t1 == TOKEN_IDENT) {
 349                 if (t2 == TOKEN_SPECIAL)
 350                         return TOKEN_ERROR;
 351                 if (t2 == TOKEN_NUMBER && strpbrk(p, "+-."))
 352                         return TOKEN_ERROR;
 353                 return TOKEN_IDENT;
 354         }
 355
 356         if (t1 == TOKEN_NUMBER) {
 357                 if (t2 == TOKEN_SPECIAL) {
 358                         switch (right->special) {
 359                         case '.':
 360                                 break;
 361                         case '+': case '-':
 362                                 if (strchr("eEpP", p[len - 2]))
 363                                         break;
 364                         default:
 365                                 return TOKEN_ERROR;
 366                         }
 367                 }
 368                 return TOKEN_NUMBER;
 369         }
 370
 371         if (p[0] == '.' && isdigit(p[1]))
 372                 return TOKEN_NUMBER;
 373
 374         return TOKEN_SPECIAL;
 375 }
 376
 377 static int merge(struct token *left, struct token *right)
 378 {
 379         extern unsigned char combinations[][3];
 380         static char buffer[512];
 381         int n;
 382
 383         switch (combine(left, right, buffer)) {
 384         case TOKEN_IDENT:
 385                 left->ident = built_in_ident(buffer);
 386                 left->pos.noexpand = 0;
 387                 return 1;
 388
 389         case TOKEN_NUMBER:
 390                 token_type(left) = TOKEN_NUMBER;        /* could be . + num */
 391                 left->number = __alloc_bytes(strlen(buffer) + 1);
 392                 memcpy(left->number, buffer, strlen(buffer) + 1);
 393                 return 1;
 394
 395         case TOKEN_SPECIAL:
 396                 if (buffer[2] && buffer[3])
 397                         break;
 398                 for (n = SPECIAL_BASE; n < SPECIAL_ARG_SEPARATOR; n++) {
 399                         if (!memcmp(buffer, combinations[n-SPECIAL_BASE], 3)) {
 400                                 left->special = n;
 401                                 return 1;
 402                         }
 403                 }
 404         default:
 405                 ;
 406         }
 407         warn(left->pos, "'##' failed: concatenation is not a valid token");
 408         return 0;
 409 }
 410
 411 static struct token *dup_token(struct token *token, struct position *streampos, struct position *pos)
 412 {
 413         struct token *alloc = alloc_token(streampos);
 414         token_type(alloc) = token_type(token);
 415         alloc->pos.newline = pos->newline;
 416         alloc->pos.whitespace = pos->whitespace;
 417         alloc->number = token->number;
 418         alloc->pos.noexpand = token->pos.noexpand;
 419         return alloc;
 420 }
 421
 422 static struct token **copy(struct token **where, struct token *list, int *count)
 423 {
 424         int need_copy = --*count;
 425         while (!eof_token(list)) {
 426                 struct token *token;
 427                 if (need_copy)
 428                         token = dup_token(list, &list->pos, &list->pos);
 429                 else
 430                         token = list;
 431                 if (token_type(token) == TOKEN_IDENT && token->ident->tainted)
 432                         token->pos.noexpand = 1;
 433                 *where = token;
 434                 where = &token->next;
 435                 list = list->next;
 436         }
 437         *where = &eof_token_entry;
 438         return where;
 439 }
 440
 441 static struct token **substitute(struct token **list, struct token *body, struct arg *args)
 442 {
 443         struct token *token = *list;
 444         struct position *base_pos = &token->pos;
 445         struct position *pos = base_pos;
 446         int *count;
 447         enum {Normal, Placeholder, Concat} state = Normal;
 448
 449         for (; !eof_token(body); body = body->next, pos = &body->pos) {
 450                 struct token *added, *arg;
 451                 struct token **tail;
 452
 453                 switch (token_type(body)) {
 454                 case TOKEN_GNU_KLUDGE:
 455                         /*
 456                          * GNU kludge: if we had <comma>##<vararg>, behaviour
 457                          * depends on whether we had enough arguments to have
 458                          * a vararg.  If we did, ## is just ignored.  Otherwise
 459                          * both , and ## are ignored.  Comma should come from
 460                          * the body of macro and not be an argument of earlier
 461                          * concatenation.
 462                          */
 463                         if (!args[body->next->argnum].arg)
 464                                 continue;
 465                         added = dup_token(body, base_pos, pos);
 466                         token_type(added) = TOKEN_SPECIAL;
 467                         tail = &added->next;
 468                         break;
 469
 470                 case TOKEN_STR_ARGUMENT:
 471                         arg = args[body->argnum].str;
 472                         count = &args[body->argnum].n_str;
 473                         goto copy_arg;
 474
 475                 case TOKEN_QUOTED_ARGUMENT:
 476                         arg = args[body->argnum].arg;
 477                         count = &args[body->argnum].n_quoted;
 478                         if (!arg || eof_token(arg)) {
 479                                 if (state == Concat)
 480                                         state = Normal;
 481                                 else
 482                                         state = Placeholder;
 483                                 continue;
 484                         }
 485                         goto copy_arg;
 486
 487                 case TOKEN_MACRO_ARGUMENT:
 488                         arg = args[body->argnum].expanded;
 489                         count = &args[body->argnum].n_normal;
 490                         if (eof_token(arg)) {
 491                                 state = Normal;
 492                                 continue;
 493                         }
 494                 copy_arg:
 495                         tail = copy(&added, arg, count);
 496                         added->pos.newline = pos->newline;
 497                         added->pos.whitespace = pos->whitespace;
 498                         break;
 499
 500                 case TOKEN_CONCAT:
 501                         if (state == Placeholder)
 502                                 state = Normal;
 503                         else
 504                                 state = Concat;
 505                         continue;
 506
 507                 case TOKEN_IDENT:
 508                         added = dup_token(body, base_pos, pos);
 509                         if (added->ident->tainted)
 510                                 added->pos.noexpand = 1;
 511                         tail = &added->next;
 512                         break;
 513
 514                 default:
 515                         added = dup_token(body, base_pos, pos);
 516                         tail = &added->next;
 517                         break;
 518                 }
 519
 520                 /*
 521                  * if we got to doing real concatenation, we already have
 522                  * added something into the list, so containing_token() is OK.
 523                  */
 524                 if (state == Concat && merge(containing_token(list), added)) {
 525                         *list = added->next;
 526                         if (tail != &added->next)
 527                                 list = tail;
 528                 } else {
 529                         *list = added;
 530                         list = tail;
 531                 }
 532                 state = Normal;
 533         }
 534         *list = &eof_token_entry;
 535         return list;
 536 }
 537
 538 static struct token **expand(struct token **list, struct symbol *sym)
 539 {
 540         struct token *last;
 541         struct token *token = *list;
 542         struct ident *expanding = token->ident;
 543         struct token **tail;
 544         int nargs = sym->arglist ? sym->arglist->count.normal : 0;
 545         struct arg args[nargs];
 546
 547         if (expanding->tainted) {
 548                 token->pos.noexpand = 1;
 549                 return &token->next;
 550         }
 551
 552         if (sym->arglist) {
 553                 if (!match_op(scan_next(&token->next), '('))
 554                         return &token->next;
 555                 if (!collect_arguments(token->next, sym->arglist, args, token))
 556                         return &token->next;
 557                 expand_arguments(nargs, args);
 558         }
 559
 560         expanding->tainted = 1;
 561
 562         last = token->next;
 563         tail = substitute(list, sym->expansion, args);
 564         *tail = last;
 565
 566         return list;
 567 }
 568
 569 static const char *token_name_sequence(struct token *token, int endop, struct token *start)
 570 {
 571         struct token *last;
 572         static char buffer[256];
 573         char *ptr = buffer;
 574
 575         last = token;
 576         while (!eof_token(token) && !match_op(token, endop)) {
 577                 int len;
 578                 const char *val = token->string->data;
 579                 if (token_type(token) != TOKEN_STRING)
 580                         val = show_token(token);
 581                 len = strlen(val);
 582                 memcpy(ptr, val, len);
 583                 ptr += len;
 584                 token = token->next;
 585         }
 586         *ptr = 0;
 587         if (endop && !match_op(token, endop))
 588                 warn(start->pos, "expected '>' at end of filename");
 589         return buffer;
 590 }
 591
 592 static int try_include(const char *path, int plen, const char *filename, int flen, struct token **where)
 593 {
 594         int fd;
 595         static char fullname[PATH_MAX];
 596
 597         memcpy(fullname, path, plen);
 598         if (plen && path[plen-1] != '/') {
 599                 fullname[plen] = '/';
 600                 plen++;
 601         }
 602         memcpy(fullname+plen, filename, flen);
 603         fd = open(fullname, O_RDONLY);
 604         if (fd >= 0) {
 605                 char * streamname = __alloc_bytes(plen + flen);
 606                 memcpy(streamname, fullname, plen + flen);
 607                 *where = tokenize(streamname, fd, *where);
 608                 close(fd);
 609                 return 1;
 610         }
 611         return 0;
 612 }
 613
 614 static int do_include_path(const char **pptr, struct token **list, struct token *token, const char *filename, int flen)
 615 {
 616         const char *path;
 617
 618         while ((path = *pptr++) != NULL) {
 619                 if (!try_include(path, strlen(path), filename, flen, list))
 620                         continue;
 621                 return 1;
 622         }
 623         return 0;
 624 }
 625
 626
 627 static void do_include(int local, struct stream *stream, struct token **list, struct token *token, const char *filename)
 628 {
 629         int flen = strlen(filename) + 1;
 630
 631         /* Absolute path? */
 632         if (filename[0] == '/') {
 633                 if (try_include("", 0, filename, flen, list))
 634                         return;
 635                 goto out;
 636         }
 637
 638         /* Same directory as current stream? */
 639         if (local) {
 640                 const char *path;
 641                 char *slash;
 642                 int plen;
 643
 644                 path = stream->name;
 645                 slash = strrchr(path, '/');
 646                 plen = slash ? slash - path : 0;
 647
 648                 if (try_include(path, plen, filename, flen, list))
 649                         return;
 650         }
 651
 652         /* Check the standard include paths.. */
 653         if (do_include_path(includepath, list, token, filename, flen))
 654                 return;
 655         if (do_include_path(sys_includepath, list, token, filename, flen))
 656                 return;
 657         if (do_include_path(gcc_includepath, list, token, filename, flen))
 658                 return;
 659
 660 out:
 661         error(token->pos, "unable to open '%s'", filename);
 662 }
 663
 664 static int handle_include(struct stream *stream, struct token **list, struct token *token)
 665 {
 666         const char *filename;
 667         struct token *next;
 668         int expect;
 669
 670         if (stream->constant == -1)
 671                 stream->constant = 0;
 672         if (false_nesting)
 673                 return 1;
 674         next = token->next;
 675         expect = '>';
 676         if (!match_op(next, '<')) {
 677                 expand_list(&token->next);
 678                 expect = 0;
 679                 next = token;
 680                 if (match_op(token->next, '<')) {
 681                         next = token->next;
 682                         expect = '>';
 683                 }
 684         }
 685         token = next->next;
 686         filename = token_name_sequence(token, expect, token);
 687         do_include(!expect, stream, list, token, filename);
 688         return 1;
 689 }
 690
 691 static int token_different(struct token *t1, struct token *t2)
 692 {
 693         int different;
 694
 695         if (token_type(t1) != token_type(t2))
 696                 return 1;
 697
 698         switch (token_type(t1)) {
 699         case TOKEN_IDENT:
 700                 different = t1->ident != t2->ident;
 701                 break;
 702         case TOKEN_ARG_COUNT:
 703         case TOKEN_UNTAINT:
 704         case TOKEN_CONCAT:
 705         case TOKEN_GNU_KLUDGE:
 706                 different = 0;
 707                 break;
 708         case TOKEN_NUMBER:
 709                 different = strcmp(t1->number, t2->number);
 710                 break;
 711         case TOKEN_SPECIAL:
 712                 different = t1->special != t2->special;
 713                 break;
 714         case TOKEN_MACRO_ARGUMENT:
 715         case TOKEN_QUOTED_ARGUMENT:
 716         case TOKEN_STR_ARGUMENT:
 717                 different = t1->argnum != t2->argnum;
 718                 break;
 719         case TOKEN_CHAR:
 720                 different = t1->character != t2->character;
 721                 break;
 722         case TOKEN_STRING: {
 723                 struct string *s1, *s2;
 724
 725                 s1 = t1->string;
 726                 s2 = t2->string;
 727                 different = 1;
 728                 if (s1->length != s2->length)
 729                         break;
 730                 different = memcmp(s1->data, s2->data, s1->length);
 731                 break;
 732         }
 733         default:
 734                 different = 1;
 735                 break;
 736         }
 737         return different;
 738 }
 739
 740 static int token_list_different(struct token *list1, struct token *list2)
 741 {
 742         for (;;) {
 743                 if (list1 == list2)
 744                         return 0;
 745                 if (!list1 || !list2)
 746                         return 1;
 747                 if (token_different(list1, list2))
 748                         return 1;
 749                 list1 = list1->next;
 750                 list2 = list2->next;
 751         }
 752 }
 753
 754 static inline void set_arg_count(struct token *token)
 755 {
 756         token_type(token) = TOKEN_ARG_COUNT;
 757         token->count.normal = token->count.quoted =
 758         token->count.str = token->count.vararg = 0;
 759 }
 760
 761 static struct token *parse_arguments(struct token *list)
 762 {
 763         struct token *arg = list->next, *next = list;
 764         struct argcount *count = &list->count;
 765
 766         set_arg_count(list);
 767
 768         if (match_op(arg, ')')) {
 769                 next = arg->next;
 770                 list->next = &eof_token_entry;
 771                 return next;
 772         }
 773
 774         while (token_type(arg) == TOKEN_IDENT) {
 775                 if (arg->ident == &__VA_ARGS___ident)
 776                         goto Eva_args;
 777                 if (!++count->normal)
 778                         goto Eargs;
 779                 next = arg->next;
 780
 781                 if (match_op(next, ',')) {
 782                         set_arg_count(next);
 783                         arg = next->next;
 784                         continue;
 785                 }
 786
 787                 if (match_op(next, ')')) {
 788                         set_arg_count(next);
 789                         next = next->next;
 790                         arg->next->next = &eof_token_entry;
 791                         return next;
 792                 }
 793
 794                 /* normal cases are finished here */
 795
 796                 if (match_op(next, SPECIAL_ELLIPSIS)) {
 797                         if (match_op(next->next, ')')) {
 798                                 set_arg_count(next);
 799                                 next->count.vararg = 1;
 800                                 next = next->next;
 801                                 arg->next->next = &eof_token_entry;
 802                                 return next->next;
 803                         }
 804
 805                         arg = next;
 806                         goto Enotclosed;
 807                 }
 808
 809                 if (eof_token(next)) {
 810                         goto Enotclosed;
 811                 } else {
 812                         arg = next;
 813                         goto Ebadstuff;
 814                 }
 815         }
 816
 817         if (match_op(arg, SPECIAL_ELLIPSIS)) {
 818                 next = arg->next;
 819                 token_type(arg) = TOKEN_IDENT;
 820                 arg->ident = &__VA_ARGS___ident;
 821                 if (!match_op(next, ')'))
 822                         goto Enotclosed;
 823                 if (!++count->normal)
 824                         goto Eargs;
 825                 set_arg_count(next);
 826                 next->count.vararg = 1;
 827                 next = next->next;
 828                 arg->next->next = &eof_token_entry;
 829                 return next;
 830         }
 831
 832         if (eof_token(arg)) {
 833                 arg = next;
 834                 goto Enotclosed;
 835         }
 836         if (match_op(arg, ','))
 837                 goto Emissing;
 838         else
 839                 goto Ebadstuff;
 840
 841
 842 Emissing:
 843         warn(arg->pos, "parameter name missing");
 844         return NULL;
 845 Ebadstuff:
 846         warn(arg->pos, "\"%s\" may not appear in macro parameter list",
 847                 show_token(arg));
 848         return NULL;
 849 Enotclosed:
 850         warn(arg->pos, "missing ')' in macro parameter list");
 851         return NULL;
 852 Eva_args:
 853         warn(arg->pos, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 854         return NULL;
 855 Eargs:
 856         warn(arg->pos, "too many arguments in macro definition");
 857         return NULL;
 858 }
 859
 860 static int try_arg(struct token *token, enum token_type type, struct token *arglist)
 861 {
 862         struct ident *ident = token->ident;
 863         int nr;
 864
 865         if (!arglist || token_type(token) != TOKEN_IDENT)
 866                 return 0;
 867
 868         arglist = arglist->next;
 869
 870         for (nr = 0; !eof_token(arglist); nr++, arglist = arglist->next->next) {
 871                 if (arglist->ident == ident) {
 872                         struct argcount *count = &arglist->next->count;
 873                         int n;
 874
 875                         token->argnum = nr;
 876                         token_type(token) = type;
 877                         switch (type) {
 878                         case TOKEN_MACRO_ARGUMENT:
 879                                 n = ++count->normal;
 880                                 break;
 881                         case TOKEN_QUOTED_ARGUMENT:
 882                                 n = ++count->quoted;
 883                                 break;
 884                         default:
 885                                 n = ++count->str;
 886                         }
 887                         if (n)
 888                                 return count->vararg ? 2 : 1;
 889                         token_type(token) = TOKEN_ERROR;
 890                         return -1;
 891                 }
 892         }
 893         return 0;
 894 }
 895
 896 static struct token *parse_expansion(struct token *expansion, struct token *arglist, struct ident *name)
 897 {
 898         struct token *token = expansion;
 899         struct token **p;
 900         struct token *last = NULL;
 901
 902         if (match_op(token, SPECIAL_HASHHASH))
 903                 goto Econcat;
 904
 905         for (p = &expansion; !eof_token(token); p = &token->next, token = *p) {
 906                 if (match_op(token, '#')) {
 907                         if (arglist) {
 908                                 struct token *next = token->next;
 909                                 if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
 910                                         goto Equote;
 911                                 next->pos.whitespace = token->pos.whitespace;
 912                                 token = *p = next;
 913                         } else {
 914                                 token->pos.noexpand = 1;
 915                         }
 916                 } else if (match_op(token, SPECIAL_HASHHASH)) {
 917                         struct token *next = token->next;
 918                         int arg = try_arg(next, TOKEN_QUOTED_ARGUMENT, arglist);
 919                         token_type(token) = TOKEN_CONCAT;
 920                         if (arg) {
 921                                 token = next;
 922                                 /* GNU kludge */
 923                                 if (arg == 2 && last && match_op(last, ',')) {
 924                                         token_type(last) = TOKEN_GNU_KLUDGE;
 925                                         last->next = token;
 926                                 }
 927                         } else if (match_op(next, SPECIAL_HASHHASH))
 928                                 token = next;
 929                         else if (match_op(next, ','))
 930                                 token = next;
 931                         else if (eof_token(next))
 932                                 goto Econcat;
 933                 } else if (match_op(token->next, SPECIAL_HASHHASH)) {
 934                         try_arg(token, TOKEN_QUOTED_ARGUMENT, arglist);
 935                 } else {
 936                         try_arg(token, TOKEN_MACRO_ARGUMENT, arglist);
 937                 }
 938                 if (token_type(token) == TOKEN_ERROR)
 939                         goto Earg;
 940                 last = token;
 941         }
 942         token = alloc_token(&expansion->pos);
 943         token_type(token) = TOKEN_UNTAINT;
 944         token->ident = name;
 945         token->next = *p;
 946         *p = token;
 947         return expansion;
 948
 949 Equote:
 950         warn(token->pos, "'#' is not followed by a macro parameter");
 951         return NULL;
 952
 953 Econcat:
 954         warn(token->pos, "'##' cannot appear at the ends of macro expansion");
 955         return NULL;
 956 Earg:
 957         warn(token->pos, "too many instances of argument in body");
 958         return NULL;
 959 }
 960
 961 static int handle_define(struct stream *stream, struct token **line, struct token *token)
 962 {
 963         struct token *arglist, *expansion;
 964         struct token *left = token->next;
 965         struct symbol *sym;
 966         struct ident *name;
 967
 968         if (token_type(left) != TOKEN_IDENT) {
 969                 warn(token->pos, "expected identifier to 'define'");
 970                 return 0;
 971         }
 972         if (false_nesting)
 973                 return 1;
 974         name = left->ident;
 975
 976         arglist = NULL;
 977         expansion = left->next;
 978         if (!expansion->pos.whitespace && match_op(expansion, '(')) {
 979                 arglist = expansion;
 980                 expansion = parse_arguments(expansion);
 981                 if (!expansion)
 982                         return 1;
 983         }
 984
 985         expansion = parse_expansion(expansion, arglist, name);
 986         if (!expansion)
 987                 return 1;
 988
 989         sym = lookup_symbol(name, NS_PREPROCESSOR);
 990         if (sym) {
 991                 if (token_list_different(sym->expansion, expansion) ||
 992                     token_list_different(sym->arglist, arglist)) {
 993                         warn(left->pos, "preprocessor token %.*s redefined",
 994                                         name->len, name->name);
 995                         info(sym->pos, "this was the original definition");
 996                 }
 997                 return 1;
 998         }
 999         sym = alloc_symbol(left->pos, SYM_NODE);
1000         bind_symbol(sym, name, NS_PREPROCESSOR);
1001
1002         sym->expansion = expansion;
1003         sym->arglist = arglist;
1004         return 1;
1005 }
1006
1007 static int handle_undef(struct stream *stream, struct token **line, struct token *token)
1008 {
1009         struct token *left = token->next;
1010         struct symbol **sym;
1011
1012         if (token_type(left) != TOKEN_IDENT) {
1013                 warn(token->pos, "expected identifier to 'undef'");
1014                 return 0;
1015         }
1016         if (false_nesting)
1017                 return 1;
1018         sym = &left->ident->symbols;
1019         while (*sym) {
1020                 struct symbol *t = *sym;
1021                 if (t->namespace == NS_PREPROCESSOR) {
1022                         *sym = t->next_id;
1023                         return 1;
1024                 }
1025                 sym = &t->next_id;
1026         }
1027         return 1;
1028 }
1029
1030 static int preprocessor_if(struct token *token, int true)
1031 {
1032         if (if_nesting == 0)
1033                 unmatched_if = token;
1034         if (if_nesting >= MAX_NEST)
1035                 error(token->pos, "Maximum preprocessor conditional level exhausted");
1036         elif_ignore[if_nesting] = false_nesting || true;
1037         if (false_nesting || !true) {
1038                 false_nesting++;
1039                 return 1;
1040         }
1041         true_nesting++;
1042         return 1;
1043 }
1044
1045 static int token_defined(struct token *token)
1046 {
1047         if (token_type(token) == TOKEN_IDENT)
1048                 return lookup_symbol(token->ident, NS_PREPROCESSOR) != NULL;
1049
1050         warn(token->pos, "expected identifier for #if[n]def");
1051         return 0;
1052 }
1053
1054 static int handle_ifdef(struct stream *stream, struct token **line, struct token *token)
1055 {
1056         return preprocessor_if(token, token_defined(token->next));
1057 }
1058
1059 static int handle_ifndef(struct stream *stream, struct token **line, struct token *token)
1060 {
1061         struct token *next = token->next;
1062         if (stream->constant == -1) {
1063                 int newconstant = 0;
1064                 if (token_type(next) == TOKEN_IDENT) {
1065                         if (!stream->protect || stream->protect == next->ident) {
1066                                 newconstant = -2;
1067                                 stream->protect = next->ident;
1068                                 stream->nesting = if_nesting+1;
1069                         }
1070                 }
1071                 stream->constant = newconstant;
1072         }
1073         return preprocessor_if(token, !token_defined(next));
1074 }
1075
1076 /*
1077  * Expression handling for #if and #elif; it differs from normal expansion
1078  * due to special treatment of "defined".
1079  */
1080 static int expression_value(struct token **where)
1081 {
1082         struct expression *expr;
1083         struct token *p;
1084         struct token **list = where, **beginning = NULL;
1085         long long value;
1086         int state = 0;
1087
1088         while (!eof_token(p = scan_next(list))) {
1089                 switch (state) {
1090                 case 0:
1091                         if (token_type(p) == TOKEN_IDENT) {
1092                                 if (p->ident != &defined_ident) {
1093                                         list = expand_one_symbol(list);
1094                                         continue;
1095                                 }
1096                                 state = 1;
1097                                 beginning = list;
1098                         }
1099                         break;
1100                 case 1:
1101                         if (match_op(p, '(')) {
1102                                 state = 2;
1103                         } else {
1104                                 state = 0;
1105                                 replace_with_defined(p);
1106                                 *beginning = p;
1107                         }
1108                         break;
1109                 case 2:
1110                         if (token_type(p) == TOKEN_IDENT)
1111                                 state = 3;
1112                         else
1113                                 state = 0;
1114                         replace_with_defined(p);
1115                         *beginning = p;
1116                         break;
1117                 case 3:
1118                         state = 0;
1119                         if (!match_op(p, ')'))
1120                                 warn(p->pos, "missing ')' after \"defined\"");
1121                         *list = p->next;
1122                         continue;
1123                 }
1124                 list = &p->next;
1125         }
1126
1127         p = constant_expression(*where, &expr);
1128         if (!eof_token(p))
1129                 warn(p->pos, "garbage at end: %s", show_token_sequence(p));
1130         value = get_expression_value(expr);
1131         return value != 0;
1132 }
1133
1134 static int handle_if(struct stream *stream, struct token **line, struct token *token)
1135 {
1136         int value = 0;
1137         if (!false_nesting)
1138                 value = expression_value(&token->next);
1139         return preprocessor_if(token, value);
1140 }
1141
1142 static int handle_elif(struct stream * stream, struct token **line, struct token *token)
1143 {
1144         if (stream->nesting == if_nesting)
1145                 stream->constant = 0;
1146         if (false_nesting) {
1147                 /* If this whole if-thing is if'ed out, an elif cannot help */
1148                 if (elif_ignore[if_nesting-1])
1149                         return 1;
1150                 if (expression_value(&token->next)) {
1151                         false_nesting--;
1152                         true_nesting++;
1153                         elif_ignore[if_nesting-1] = 1;
1154                 }
1155                 return 1;
1156         }
1157         if (true_nesting) {
1158                 false_nesting = 1;
1159                 true_nesting--;
1160                 return 1;
1161         }
1162         warn(token->pos, "unmatched '#elif'");
1163         return 1;
1164 }
1165
1166 static int handle_else(struct stream *stream, struct token **line, struct token *token)
1167 {
1168         if (stream->nesting == if_nesting)
1169                 stream->constant = 0;
1170         if (false_nesting) {
1171                 /* If this whole if-thing is if'ed out, an else cannot help */
1172                 if (elif_ignore[if_nesting-1])
1173                         return 1;
1174                 false_nesting--;
1175                 true_nesting++;
1176                 elif_ignore[if_nesting-1] = 1;
1177                 return 1;
1178         }
1179         if (true_nesting) {
1180                 true_nesting--;
1181                 false_nesting = 1;
1182                 return 1;
1183         }
1184         warn(token->pos, "unmatched #else");
1185         return 1;
1186 }
1187
1188 static int handle_endif(struct stream *stream, struct token **line, struct token *token)
1189 {
1190         if (stream->constant == -2 && stream->nesting == if_nesting)
1191                 stream->constant = -1;
1192
1193         if (false_nesting) {
1194                 false_nesting--;
1195                 return 1;
1196         }
1197         if (true_nesting) {
1198                 true_nesting--;
1199                 return 1;
1200         }
1201         warn(token->pos, "unmatched #endif");
1202         return 1;
1203 }
1204
1205 static const char *show_token_sequence(struct token *token)
1206 {
1207         static char buffer[1024];
1208         char *ptr = buffer;
1209         int whitespace = 0;
1210
1211         if (!token)
1212                 return "<none>";
1213         while (!eof_token(token)) {
1214                 const char *val = show_token(token);
1215                 int len = strlen(val);
1216
1217                 if (ptr + whitespace + len > buffer + sizeof(buffer)) {
1218                         warn(token->pos, "too long token expansion");
1219                         break;
1220                 }
1221
1222                 if (whitespace)
1223                         *ptr++ = ' ';
1224                 memcpy(ptr, val, len);
1225                 ptr += len;
1226                 token = token->next;
1227                 whitespace = token->pos.whitespace;
1228         }
1229         *ptr = 0;
1230         return buffer;
1231 }
1232
1233 static int handle_warning(struct stream *stream, struct token **line, struct token *token)
1234 {
1235         if (false_nesting)
1236                 return 1;
1237         warn(token->pos, "%s", show_token_sequence(token->next));
1238         return 1;
1239 }
1240
1241 static int handle_error(struct stream *stream, struct token **line, struct token *token)
1242 {
1243         if (false_nesting)
1244                 return 1;
1245         warn(token->pos, "%s", show_token_sequence(token->next));
1246         return 1;
1247 }
1248
1249 static int handle_nostdinc(struct stream *stream, struct token **line, struct token *token)
1250 {
1251         if (false_nesting)
1252                 return 1;
1253         includepath[0] = NULL;
1254         return 1;
1255 }
1256
1257 static void add_path_entry(struct token *token, const char *path)
1258 {
1259         int i;
1260
1261         for (i = 0; i < INCLUDEPATHS; i++) {
1262                 if (!includepath[i]) {
1263                         includepath[i] = path;
1264                         includepath[i+1] = NULL;
1265                         return;
1266                 }
1267         }
1268         warn(token->pos, "too many include path entries");
1269 }
1270
1271 static int handle_add_include(struct stream *stream, struct token **line, struct token *token)
1272 {
1273         for (;;) {
1274                 token = token->next;
1275                 if (eof_token(token))
1276                         return 1;
1277                 if (token_type(token) != TOKEN_STRING) {
1278                         warn(token->pos, "expected path string");
1279                         return 1;
1280                 }
1281                 add_path_entry(token, token->string->data);
1282         }
1283 }
1284
1285 /*
1286  * We replace "#pragma xxx" with "__pragma__" in the token
1287  * stream. Just as an example.
1288  *
1289  * We'll just #define that away for now, but the theory here
1290  * is that we can use this to insert arbitrary token sequences
1291  * to turn the pragma's into internal front-end sequences for
1292  * when we actually start caring about them.
1293  *
1294  * So eventually this will turn into some kind of extended
1295  * __attribute__() like thing, except called __pragma__(xxx).
1296  */
1297 static int handle_pragma(struct stream *stream, struct token **line, struct token *token)
1298 {
1299         struct token *next = *line;
1300
1301         token->ident = &pragma_ident;
1302         token->pos.newline = 1;
1303         token->pos.whitespace = 1;
1304         token->pos.pos = 1;
1305         *line = token;
1306         token->next = next;
1307         return 1;
1308 }
1309
1310 static int handle_preprocessor_command(struct stream *stream, struct token **line, struct ident *ident, struct token *token)
1311 {
1312         int i;
1313         static struct {
1314                 const char *name;
1315                 int (*handler)(struct stream *, struct token **, struct token *);
1316         } handlers[] = {
1317                 { "define",     handle_define },
1318                 { "undef",      handle_undef },
1319                 { "ifdef",      handle_ifdef },
1320                 { "ifndef",     handle_ifndef },
1321                 { "else",       handle_else },
1322                 { "endif",      handle_endif },
1323                 { "if",         handle_if },
1324                 { "elif",       handle_elif },
1325                 { "warning",    handle_warning },
1326                 { "error",      handle_error },
1327                 { "include",    handle_include },
1328                 { "pragma",     handle_pragma },
1329
1330                 // our internal preprocessor tokens
1331                 { "nostdinc",   handle_nostdinc },
1332                 { "add_include", handle_add_include },
1333         };
1334
1335         for (i = 0; i < (sizeof (handlers) / sizeof (handlers[0])); i++) {
1336                 if (match_string_ident(ident, handlers[i].name))
1337                         return handlers[i].handler(stream, line, token);
1338         }
1339         return 0;
1340 }
1341
1342 static void handle_preprocessor_line(struct stream *stream, struct token **line, struct token *token)
1343 {
1344         if (!token)
1345                 return;
1346
1347         if (token_type(token) == TOKEN_IDENT)
1348                 if (handle_preprocessor_command(stream, line, token->ident, token))
1349                         return;
1350         warn(token->pos, "unrecognized preprocessor line '%s'", show_token_sequence(token));
1351 }
1352
1353 static void preprocessor_line(struct stream *stream, struct token **line)
1354 {
1355         struct token *start = *line, *next;
1356         struct token **tp = &start->next;
1357
1358         for (;;) {
1359                 next = *tp;
1360                 if (next->pos.newline)
1361                         break;
1362                 tp = &next->next;
1363         }
1364         *line = next;
1365         *tp = &eof_token_entry;
1366         handle_preprocessor_line(stream, line, start->next);
1367 }
1368
1369 static void do_preprocess(struct token **list)
1370 {
1371         struct token *next;
1372
1373         while (!eof_token(next = scan_next(list))) {
1374                 struct stream *stream = input_streams + next->pos.stream;
1375
1376                 if (next->pos.newline && match_op(next, '#')) {
1377                         if (!next->pos.noexpand) {
1378                                 preprocessor_line(stream, list);
1379                                 continue;
1380                         }
1381                 }
1382
1383                 if (false_nesting) {
1384                         *list = next->next;
1385                         continue;
1386                 }
1387
1388                 switch (token_type(next)) {
1389                 case TOKEN_STREAMEND:
1390                         if (stream->constant == -1 && stream->protect) {
1391                                 stream->constant = 1;
1392                         }
1393                         /* fallthrough */
1394                 case TOKEN_STREAMBEGIN:
1395                         *list = next->next;
1396                         continue;
1397
1398                 case TOKEN_IDENT:
1399                         list = expand_one_symbol(list);
1400                         break;
1401                 default:
1402                         list = &next->next;
1403                 }
1404                 /*
1405                  * Any token expansion (even if it ended up being an
1406                  * empty expansion) in this stream implies it can't
1407                  * be constant.
1408                  */
1409                 stream->constant = 0;
1410         }
1411 }
1412
1413 struct token * preprocess(struct token *token)
1414 {
1415         preprocessing = 1;
1416         do_preprocess(&token);
1417         if (if_nesting)
1418                 warn(unmatched_if->pos, "unmatched preprocessor conditional");
1419
1420         // Drop all expressions from pre-processing, they're not used any more.
1421         clear_expression_alloc();
1422         preprocessing = 0;
1423
1424         return token;
1425 }