pre-process.c

   1 /*
   2  * Do C preprocessing, based on a token list gathered by
   3  * the tokenizer.
   4  *
   5  * This may not be the smartest preprocessor on the planet.
   6  *
   7  * Copyright (C) 2003 Transmeta Corp.
   8  *               2003-2004 Linus Torvalds
   9  *
  10  * Permission is hereby granted, free of charge, to any person obtaining a copy
  11  * of this software and associated documentation files (the "Software"), to deal
  12  * in the Software without restriction, including without limitation the rights
  13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14  * copies of the Software, and to permit persons to whom the Software is
  15  * furnished to do so, subject to the following conditions:
  16  *
  17  * The above copyright notice and this permission notice shall be included in
  18  * all copies or substantial portions of the Software.
  19  *
  20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26  * THE SOFTWARE.
  27  */
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <stdarg.h>
  31 #include <stddef.h>
  32 #include <string.h>
  33 #include <ctype.h>
  34 #include <unistd.h>
  35 #include <fcntl.h>
  36 #include <limits.h>
  37 #include <time.h>
  38 #include <dirent.h>
  39 #include <sys/stat.h>
  40
  41 #include "lib.h"
  42 #include "allocate.h"
  43 #include "parse.h"
  44 #include "token.h"
  45 #include "symbol.h"
  46 #include "expression.h"
  47 #include "scope.h"
  48
  49 static struct ident_list *macros;       // only needed for -dD
  50 static int false_nesting = 0;
  51 static int counter_macro = 0;           // __COUNTER__ expansion
  52
  53 #define INCLUDEPATHS 300
  54 const char *includepath[INCLUDEPATHS+1] = {
  55         "",
  56         "/usr/include",
  57         "/usr/local/include",
  58         NULL
  59 };
  60
  61 static const char **quote_includepath = includepath;
  62 static const char **angle_includepath = includepath + 1;
  63 static const char **isys_includepath   = includepath + 1;
  64 static const char **sys_includepath   = includepath + 1;
  65 static const char **dirafter_includepath = includepath + 3;
  66
  67 #define dirty_stream(stream)                            \
  68         do {                                            \
  69                 if (!stream->dirty) {                   \
  70                         stream->dirty = 1;              \
  71                         if (!stream->ifndef)            \
  72                                 stream->protect = NULL; \
  73                 }                                       \
  74         } while(0)
  75
  76 #define end_group(stream)                                       \
  77         do {                                                    \
  78                 if (stream->ifndef == stream->top_if) {         \
  79                         stream->ifndef = NULL;                  \
  80                         if (!stream->dirty)                     \
  81                                 stream->protect = NULL;         \
  82                         else if (stream->protect)               \
  83                                 stream->dirty = 0;              \
  84                 }                                               \
  85         } while(0)
  86
  87 #define nesting_error(stream)           \
  88         do {                            \
  89                 stream->dirty = 1;      \
  90                 stream->ifndef = NULL;  \
  91                 stream->protect = NULL; \
  92         } while(0)
  93
  94 static struct token *alloc_token(struct position *pos)
  95 {
  96         struct token *token = __alloc_token(0);
  97
  98         token->pos.stream = pos->stream;
  99         token->pos.line = pos->line;
 100         token->pos.pos = pos->pos;
 101         token->pos.whitespace = 1;
 102         return token;
 103 }
 104
 105 /* Expand symbol 'sym' at '*list' */
 106 static int expand(struct token **, struct symbol *);
 107
 108 static void replace_with_string(struct token *token, const char *str)
 109 {
 110         int size = strlen(str) + 1;
 111         struct string *s = __alloc_string(size);
 112
 113         s->length = size;
 114         memcpy(s->data, str, size);
 115         token_type(token) = TOKEN_STRING;
 116         token->string = s;
 117 }
 118
 119 static void replace_with_integer(struct token *token, unsigned int val)
 120 {
 121         char *buf = __alloc_bytes(11);
 122         sprintf(buf, "%u", val);
 123         token_type(token) = TOKEN_NUMBER;
 124         token->number = buf;
 125 }
 126
 127 static struct symbol *lookup_macro(struct ident *ident)
 128 {
 129         struct symbol *sym = lookup_symbol(ident, NS_MACRO | NS_UNDEF);
 130         if (sym && sym->namespace != NS_MACRO)
 131                 sym = NULL;
 132         return sym;
 133 }
 134
 135 static int token_defined(struct token *token)
 136 {
 137         if (token_type(token) == TOKEN_IDENT) {
 138                 struct symbol *sym = lookup_macro(token->ident);
 139                 if (sym) {
 140                         sym->used_in = file_scope;
 141                         return 1;
 142                 }
 143                 return 0;
 144         }
 145
 146         sparse_error(token->pos, "expected preprocessor identifier");
 147         return 0;
 148 }
 149
 150 static void replace_with_defined(struct token *token)
 151 {
 152         static const char *string[] = { "0", "1" };
 153         int defined = token_defined(token);
 154
 155         token_type(token) = TOKEN_NUMBER;
 156         token->number = string[defined];
 157 }
 158
 159 static int expand_one_symbol(struct token **list)
 160 {
 161         struct token *token = *list;
 162         struct symbol *sym;
 163         static char buffer[12]; /* __DATE__: 3 + ' ' + 2 + ' ' + 4 + '\0' */
 164         static time_t t = 0;
 165
 166         if (token->pos.noexpand)
 167                 return 1;
 168
 169         sym = lookup_macro(token->ident);
 170         if (sym) {
 171                 store_macro_pos(token);
 172                 sym->used_in = file_scope;
 173                 return expand(list, sym);
 174         }
 175         if (token->ident == &__LINE___ident) {
 176                 replace_with_integer(token, token->pos.line);
 177         } else if (token->ident == &__FILE___ident) {
 178                 replace_with_string(token, stream_name(token->pos.stream));
 179         } else if (token->ident == &__DATE___ident) {
 180                 if (!t)
 181                         time(&t);
 182                 strftime(buffer, 12, "%b %e %Y", localtime(&t));
 183                 replace_with_string(token, buffer);
 184         } else if (token->ident == &__TIME___ident) {
 185                 if (!t)
 186                         time(&t);
 187                 strftime(buffer, 9, "%T", localtime(&t));
 188                 replace_with_string(token, buffer);
 189         } else if (token->ident == &__COUNTER___ident) {
 190                 replace_with_integer(token, counter_macro++);
 191         }
 192         return 1;
 193 }
 194
 195 static inline struct token *scan_next(struct token **where)
 196 {
 197         struct token *token = *where;
 198         if (token_type(token) != TOKEN_UNTAINT)
 199                 return token;
 200         do {
 201                 token->ident->tainted = 0;
 202                 token = token->next;
 203         } while (token_type(token) == TOKEN_UNTAINT);
 204         *where = token;
 205         return token;
 206 }
 207
 208 static void expand_list(struct token **list)
 209 {
 210         struct token *next;
 211         while (!eof_token(next = scan_next(list))) {
 212                 if (token_type(next) != TOKEN_IDENT || expand_one_symbol(list))
 213                         list = &next->next;
 214         }
 215 }
 216
 217 static void preprocessor_line(struct stream *stream, struct token **line);
 218
 219 static struct token *collect_arg(struct token *prev, int vararg, struct position *pos, int count)
 220 {
 221         struct stream *stream = input_streams + prev->pos.stream;
 222         struct token **p = &prev->next;
 223         struct token *next;
 224         int nesting = 0;
 225
 226         while (!eof_token(next = scan_next(p))) {
 227                 if (next->pos.newline && match_op(next, '#')) {
 228                         if (!next->pos.noexpand) {
 229                                 sparse_error(next->pos,
 230                                              "directive in argument list");
 231                                 preprocessor_line(stream, p);
 232                                 __free_token(next);     /* Free the '#' token */
 233                                 continue;
 234                         }
 235                 }
 236                 switch (token_type(next)) {
 237                 case TOKEN_STREAMEND:
 238                 case TOKEN_STREAMBEGIN:
 239                         *p = &eof_token_entry;
 240                         return next;
 241                 case TOKEN_STRING:
 242                 case TOKEN_WIDE_STRING:
 243                         if (count > 1)
 244                                 next->string->immutable = 1;
 245                         break;
 246                 }
 247                 if (false_nesting) {
 248                         *p = next->next;
 249                         __free_token(next);
 250                         continue;
 251                 }
 252                 if (match_op(next, '(')) {
 253                         nesting++;
 254                 } else if (match_op(next, ')')) {
 255                         if (!nesting--)
 256                                 break;
 257                 } else if (match_op(next, ',') && !nesting && !vararg) {
 258                         break;
 259                 }
 260                 next->pos.stream = pos->stream;
 261                 next->pos.line = pos->line;
 262                 next->pos.pos = pos->pos;
 263                 p = &next->next;
 264         }
 265         *p = &eof_token_entry;
 266         return next;
 267 }
 268
 269 /*
 270  * We store arglist as <counter> [arg1] <number of uses for arg1> ... eof
 271  */
 272
 273 struct arg {
 274         struct token *arg;
 275         struct token *expanded;
 276         struct token *str;
 277         int n_normal;
 278         int n_quoted;
 279         int n_str;
 280 };
 281
 282 static int collect_arguments(struct token *start, struct token *arglist, struct arg *args, struct token *what)
 283 {
 284         int wanted = arglist->count.normal;
 285         struct token *next = NULL;
 286         int count = 0;
 287
 288         arglist = arglist->next;        /* skip counter */
 289
 290         if (!wanted) {
 291                 next = collect_arg(start, 0, &what->pos, 0);
 292                 if (eof_token(next))
 293                         goto Eclosing;
 294                 if (!eof_token(start->next) || !match_op(next, ')')) {
 295                         count++;
 296                         goto Emany;
 297                 }
 298         } else {
 299                 for (count = 0; count < wanted; count++) {
 300                         struct argcount *p = &arglist->next->count;
 301                         next = collect_arg(start, p->vararg, &what->pos, p->normal);
 302                         if (eof_token(next))
 303                                 goto Eclosing;
 304                         if (p->vararg && wanted == 1 && eof_token(start->next))
 305                                 break;
 306                         arglist = arglist->next->next;
 307                         args[count].arg = start->next;
 308                         args[count].n_normal = p->normal;
 309                         args[count].n_quoted = p->quoted;
 310                         args[count].n_str = p->str;
 311                         if (match_op(next, ')')) {
 312                                 count++;
 313                                 break;
 314                         }
 315                         start = next;
 316                 }
 317                 if (count == wanted && !match_op(next, ')'))
 318                         goto Emany;
 319                 if (count == wanted - 1) {
 320                         struct argcount *p = &arglist->next->count;
 321                         if (!p->vararg)
 322                                 goto Efew;
 323                         args[count].arg = NULL;
 324                         args[count].n_normal = p->normal;
 325                         args[count].n_quoted = p->quoted;
 326                         args[count].n_str = p->str;
 327                 }
 328                 if (count < wanted - 1)
 329                         goto Efew;
 330         }
 331         what->next = next->next;
 332         return 1;
 333
 334 Efew:
 335         sparse_error(what->pos, "macro \"%s\" requires %d arguments, but only %d given",
 336                 show_token(what), wanted, count);
 337         goto out;
 338 Emany:
 339         while (match_op(next, ',')) {
 340                 next = collect_arg(next, 0, &what->pos, 0);
 341                 count++;
 342         }
 343         if (eof_token(next))
 344                 goto Eclosing;
 345         sparse_error(what->pos, "macro \"%s\" passed %d arguments, but takes just %d",
 346                 show_token(what), count, wanted);
 347         goto out;
 348 Eclosing:
 349         sparse_error(what->pos, "unterminated argument list invoking macro \"%s\"",
 350                 show_token(what));
 351 out:
 352         what->next = next->next;
 353         return 0;
 354 }
 355
 356 static struct token *dup_list(struct token *list)
 357 {
 358         struct token *res = NULL;
 359         struct token **p = &res;
 360
 361         while (!eof_token(list)) {
 362                 struct token *newtok = __alloc_token(0);
 363                 *newtok = *list;
 364                 *p = newtok;
 365                 p = &newtok->next;
 366                 list = list->next;
 367         }
 368         return res;
 369 }
 370
 371 static const char *show_token_sequence(struct token *token, int quote)
 372 {
 373         static char buffer[MAX_STRING];
 374         char *ptr = buffer;
 375         int whitespace = 0;
 376
 377         if (!token && !quote)
 378                 return "<none>";
 379         while (!eof_token(token)) {
 380                 const char *val = quote ? quote_token(token) : show_token(token);
 381                 int len = strlen(val);
 382
 383                 if (ptr + whitespace + len >= buffer + sizeof(buffer)) {
 384                         sparse_error(token->pos, "too long token expansion");
 385                         break;
 386                 }
 387
 388                 if (whitespace)
 389                         *ptr++ = ' ';
 390                 memcpy(ptr, val, len);
 391                 ptr += len;
 392                 token = token->next;
 393                 whitespace = token->pos.whitespace;
 394         }
 395         *ptr = 0;
 396         return buffer;
 397 }
 398
 399 static struct token *stringify(struct token *arg)
 400 {
 401         const char *s = show_token_sequence(arg, 1);
 402         int size = strlen(s)+1;
 403         struct token *token = __alloc_token(0);
 404         struct string *string = __alloc_string(size);
 405
 406         memcpy(string->data, s, size);
 407         string->length = size;
 408         token->pos = arg->pos;
 409         token_type(token) = TOKEN_STRING;
 410         token->string = string;
 411         token->next = &eof_token_entry;
 412         return token;
 413 }
 414
 415 static void expand_arguments(int count, struct arg *args)
 416 {
 417         int i;
 418         for (i = 0; i < count; i++) {
 419                 struct token *arg = args[i].arg;
 420                 if (!arg)
 421                         arg = &eof_token_entry;
 422                 if (args[i].n_str)
 423                         args[i].str = stringify(arg);
 424                 if (args[i].n_normal) {
 425                         if (!args[i].n_quoted) {
 426                                 args[i].expanded = arg;
 427                                 args[i].arg = NULL;
 428                         } else if (eof_token(arg)) {
 429                                 args[i].expanded = arg;
 430                         } else {
 431                                 args[i].expanded = dup_list(arg);
 432                         }
 433                         expand_list(&args[i].expanded);
 434                 }
 435         }
 436 }
 437
 438 /*
 439  * Possibly valid combinations:
 440  *  - ident + ident -> ident
 441  *  - ident + number -> ident unless number contains '.', '+' or '-'.
 442  *  - 'L' + char constant -> wide char constant
 443  *  - 'L' + string literal -> wide string literal
 444  *  - number + number -> number
 445  *  - number + ident -> number
 446  *  - number + '.' -> number
 447  *  - number + '+' or '-' -> number, if number used to end on [eEpP].
 448  *  - '.' + number -> number, if number used to start with a digit.
 449  *  - special + special -> either special or an error.
 450  */
 451 static enum token_type combine(struct token *left, struct token *right, char *p)
 452 {
 453         int len;
 454         enum token_type t1 = token_type(left), t2 = token_type(right);
 455
 456         if (t1 != TOKEN_IDENT && t1 != TOKEN_NUMBER && t1 != TOKEN_SPECIAL)
 457                 return TOKEN_ERROR;
 458
 459         if (t1 == TOKEN_IDENT && left->ident == &L_ident) {
 460                 if (t2 >= TOKEN_CHAR && t2 < TOKEN_WIDE_CHAR)
 461                         return t2 + TOKEN_WIDE_CHAR - TOKEN_CHAR;
 462                 if (t2 == TOKEN_STRING)
 463                         return TOKEN_WIDE_STRING;
 464         }
 465
 466         if (t2 != TOKEN_IDENT && t2 != TOKEN_NUMBER && t2 != TOKEN_SPECIAL)
 467                 return TOKEN_ERROR;
 468
 469         strcpy(p, show_token(left));
 470         strcat(p, show_token(right));
 471         len = strlen(p);
 472
 473         if (len >= 256)
 474                 return TOKEN_ERROR;
 475
 476         if (t1 == TOKEN_IDENT) {
 477                 if (t2 == TOKEN_SPECIAL)
 478                         return TOKEN_ERROR;
 479                 if (t2 == TOKEN_NUMBER && strpbrk(p, "+-."))
 480                         return TOKEN_ERROR;
 481                 return TOKEN_IDENT;
 482         }
 483
 484         if (t1 == TOKEN_NUMBER) {
 485                 if (t2 == TOKEN_SPECIAL) {
 486                         switch (right->special) {
 487                         case '.':
 488                                 break;
 489                         case '+': case '-':
 490                                 if (strchr("eEpP", p[len - 2]))
 491                                         break;
 492                         default:
 493                                 return TOKEN_ERROR;
 494                         }
 495                 }
 496                 return TOKEN_NUMBER;
 497         }
 498
 499         if (p[0] == '.' && isdigit((unsigned char)p[1]))
 500                 return TOKEN_NUMBER;
 501
 502         return TOKEN_SPECIAL;
 503 }
 504
 505 static int merge(struct token *left, struct token *right)
 506 {
 507         static char buffer[512];
 508         enum token_type res = combine(left, right, buffer);
 509         int n;
 510
 511         switch (res) {
 512         case TOKEN_IDENT:
 513                 left->ident = built_in_ident(buffer);
 514                 left->pos.noexpand = 0;
 515                 return 1;
 516
 517         case TOKEN_NUMBER: {
 518                 char *number = __alloc_bytes(strlen(buffer) + 1);
 519                 memcpy(number, buffer, strlen(buffer) + 1);
 520                 token_type(left) = TOKEN_NUMBER;        /* could be . + num */
 521                 left->number = number;
 522                 return 1;
 523         }
 524
 525         case TOKEN_SPECIAL:
 526                 if (buffer[2] && buffer[3])
 527                         break;
 528                 for (n = SPECIAL_BASE; n < SPECIAL_ARG_SEPARATOR; n++) {
 529                         if (!memcmp(buffer, combinations[n-SPECIAL_BASE], 3)) {
 530                                 left->special = n;
 531                                 return 1;
 532                         }
 533                 }
 534                 break;
 535
 536         case TOKEN_WIDE_CHAR:
 537         case TOKEN_WIDE_STRING:
 538                 token_type(left) = res;
 539                 left->pos.noexpand = 0;
 540                 left->string = right->string;
 541                 return 1;
 542
 543         case TOKEN_WIDE_CHAR_EMBEDDED_0 ... TOKEN_WIDE_CHAR_EMBEDDED_3:
 544                 token_type(left) = res;
 545                 left->pos.noexpand = 0;
 546                 memcpy(left->embedded, right->embedded, 4);
 547                 return 1;
 548
 549         default:
 550                 ;
 551         }
 552         sparse_error(left->pos, "'##' failed: concatenation is not a valid token");
 553         return 0;
 554 }
 555
 556 static struct token *dup_token(struct token *token, struct position *streampos)
 557 {
 558         struct token *alloc = alloc_token(streampos);
 559         token_type(alloc) = token_type(token);
 560         alloc->pos.newline = token->pos.newline;
 561         alloc->pos.whitespace = token->pos.whitespace;
 562         alloc->number = token->number;
 563         alloc->pos.noexpand = token->pos.noexpand;
 564         return alloc;
 565 }
 566
 567 static struct token **copy(struct token **where, struct token *list, int *count)
 568 {
 569         int need_copy = --*count;
 570         while (!eof_token(list)) {
 571                 struct token *token;
 572                 if (need_copy)
 573                         token = dup_token(list, &list->pos);
 574                 else
 575                         token = list;
 576                 if (token_type(token) == TOKEN_IDENT && token->ident->tainted)
 577                         token->pos.noexpand = 1;
 578                 *where = token;
 579                 where = &token->next;
 580                 list = list->next;
 581         }
 582         *where = &eof_token_entry;
 583         return where;
 584 }
 585
 586 static int handle_kludge(struct token **p, struct arg *args)
 587 {
 588         struct token *t = (*p)->next->next;
 589         while (1) {
 590                 struct arg *v = &args[t->argnum];
 591                 if (token_type(t->next) != TOKEN_CONCAT) {
 592                         if (v->arg) {
 593                                 /* ignore the first ## */
 594                                 *p = (*p)->next;
 595                                 return 0;
 596                         }
 597                         /* skip the entire thing */
 598                         *p = t;
 599                         return 1;
 600                 }
 601                 if (v->arg && !eof_token(v->arg))
 602                         return 0; /* no magic */
 603                 t = t->next->next;
 604         }
 605 }
 606
 607 static struct token **substitute(struct token **list, struct token *body, struct arg *args)
 608 {
 609         struct position *base_pos = &(*list)->pos;
 610         int *count;
 611         enum {Normal, Placeholder, Concat} state = Normal;
 612
 613         for (; !eof_token(body); body = body->next) {
 614                 struct token *added, *arg;
 615                 struct token **tail;
 616                 struct token *t;
 617
 618                 switch (token_type(body)) {
 619                 case TOKEN_GNU_KLUDGE:
 620                         /*
 621                          * GNU kludge: if we had <comma>##<vararg>, behaviour
 622                          * depends on whether we had enough arguments to have
 623                          * a vararg.  If we did, ## is just ignored.  Otherwise
 624                          * both , and ## are ignored.  Worse, there can be
 625                          * an arbitrary number of ##<arg> in between; if all of
 626                          * those are empty, we act as if they hadn't been there,
 627                          * otherwise we act as if the kludge didn't exist.
 628                          */
 629                         t = body;
 630                         if (handle_kludge(&body, args)) {
 631                                 if (state == Concat)
 632                                         state = Normal;
 633                                 else
 634                                         state = Placeholder;
 635                                 continue;
 636                         }
 637                         added = dup_token(t, base_pos);
 638                         token_type(added) = TOKEN_SPECIAL;
 639                         tail = &added->next;
 640                         break;
 641
 642                 case TOKEN_STR_ARGUMENT:
 643                         arg = args[body->argnum].str;
 644                         count = &args[body->argnum].n_str;
 645                         goto copy_arg;
 646
 647                 case TOKEN_QUOTED_ARGUMENT:
 648                         arg = args[body->argnum].arg;
 649                         count = &args[body->argnum].n_quoted;
 650                         if (!arg || eof_token(arg)) {
 651                                 if (state == Concat)
 652                                         state = Normal;
 653                                 else
 654                                         state = Placeholder;
 655                                 continue;
 656                         }
 657                         goto copy_arg;
 658
 659                 case TOKEN_MACRO_ARGUMENT:
 660                         arg = args[body->argnum].expanded;
 661                         count = &args[body->argnum].n_normal;
 662                         if (eof_token(arg)) {
 663                                 state = Normal;
 664                                 continue;
 665                         }
 666                 copy_arg:
 667                         tail = copy(&added, arg, count);
 668                         added->pos.newline = body->pos.newline;
 669                         added->pos.whitespace = body->pos.whitespace;
 670                         break;
 671
 672                 case TOKEN_CONCAT:
 673                         if (state == Placeholder)
 674                                 state = Normal;
 675                         else
 676                                 state = Concat;
 677                         continue;
 678
 679                 case TOKEN_IDENT:
 680                         added = dup_token(body, base_pos);
 681                         if (added->ident->tainted)
 682                                 added->pos.noexpand = 1;
 683                         tail = &added->next;
 684                         break;
 685
 686                 default:
 687                         added = dup_token(body, base_pos);
 688                         tail = &added->next;
 689                         break;
 690                 }
 691
 692                 /*
 693                  * if we got to doing real concatenation, we already have
 694                  * added something into the list, so containing_token() is OK.
 695                  */
 696                 if (state == Concat && merge(containing_token(list), added)) {
 697                         *list = added->next;
 698                         if (tail != &added->next)
 699                                 list = tail;
 700                 } else {
 701                         *list = added;
 702                         list = tail;
 703                 }
 704                 state = Normal;
 705         }
 706         *list = &eof_token_entry;
 707         return list;
 708 }
 709
 710 static int expand(struct token **list, struct symbol *sym)
 711 {
 712         struct token *last;
 713         struct token *token = *list;
 714         struct ident *expanding = token->ident;
 715         struct token **tail;
 716         int nargs = sym->arglist ? sym->arglist->count.normal : 0;
 717         struct arg args[nargs];
 718
 719         if (expanding->tainted) {
 720                 token->pos.noexpand = 1;
 721                 return 1;
 722         }
 723
 724         if (sym->arglist) {
 725                 if (!match_op(scan_next(&token->next), '('))
 726                         return 1;
 727                 if (!collect_arguments(token->next, sym->arglist, args, token))
 728                         return 1;
 729                 expand_arguments(nargs, args);
 730         }
 731
 732         expanding->tainted = 1;
 733
 734         last = token->next;
 735         tail = substitute(list, sym->expansion, args);
 736         /*
 737          * Note that it won't be eof - at least TOKEN_UNTAINT will be there.
 738          * We still can lose the newline flag if the sucker expands to nothing,
 739          * but the price of dealing with that is probably too high (we'd need
 740          * to collect the flags during scan_next())
 741          */
 742         (*list)->pos.newline = token->pos.newline;
 743         (*list)->pos.whitespace = token->pos.whitespace;
 744         *tail = last;
 745
 746         return 0;
 747 }
 748
 749 static const char *token_name_sequence(struct token *token, int endop, struct token *start)
 750 {
 751         static char buffer[256];
 752         char *ptr = buffer;
 753
 754         while (!eof_token(token) && !match_op(token, endop)) {
 755                 int len;
 756                 const char *val = token->string->data;
 757                 if (token_type(token) != TOKEN_STRING)
 758                         val = show_token(token);
 759                 len = strlen(val);
 760                 memcpy(ptr, val, len);
 761                 ptr += len;
 762                 token = token->next;
 763         }
 764         *ptr = 0;
 765         if (endop && !match_op(token, endop))
 766                 sparse_error(start->pos, "expected '>' at end of filename");
 767         return buffer;
 768 }
 769
 770 static int already_tokenized(const char *path)
 771 {
 772         int stream, next;
 773
 774         for (stream = *hash_stream(path); stream >= 0 ; stream = next) {
 775                 struct stream *s = input_streams + stream;
 776
 777                 next = s->next_stream;
 778                 if (s->once) {
 779                         if (strcmp(path, s->name))
 780                                 continue;
 781                         return 1;
 782                 }
 783                 if (s->constant != CONSTANT_FILE_YES)
 784                         continue;
 785                 if (strcmp(path, s->name))
 786                         continue;
 787                 if (s->protect && !lookup_macro(s->protect))
 788                         continue;
 789                 return 1;
 790         }
 791         return 0;
 792 }
 793
 794 /* Handle include of header files.
 795  * The relevant options are made compatible with gcc. The only options that
 796  * are not supported is -withprefix and friends.
 797  *
 798  * Three set of include paths are known:
 799  * quote_includepath:   Path to search when using #include "file.h"
 800  * angle_includepath:   Paths to search when using #include <file.h>
 801  * isys_includepath:    Paths specified with -isystem, come before the
 802  *                      built-in system include paths. Gcc would suppress
 803  *                      warnings from system headers. Here we separate
 804  *                      them from the angle_ ones to keep search ordering.
 805  *
 806  * sys_includepath:     Built-in include paths.
 807  * dirafter_includepath Paths added with -dirafter.
 808  *
 809  * The above is implemented as one array with pointers
 810  *                         +--------------+
 811  * quote_includepath --->  |              |
 812  *                         +--------------+
 813  *                         |              |
 814  *                         +--------------+
 815  * angle_includepath --->  |              |
 816  *                         +--------------+
 817  * isys_includepath  --->  |              |
 818  *                         +--------------+
 819  * sys_includepath   --->  |              |
 820  *                         +--------------+
 821  * dirafter_includepath -> |              |
 822  *                         +--------------+
 823  *
 824  * -I dir insert dir just before isys_includepath and move the rest
 825  * -I- makes all dirs specified with -I before to quote dirs only and
 826  *   angle_includepath is set equal to isys_includepath.
 827  * -nostdinc removes all sys dirs by storing NULL in entry pointed
 828  *   to by * sys_includepath. Note that this will reset all dirs built-in
 829  *   and added before -nostdinc by -isystem and -idirafter.
 830  * -isystem dir adds dir where isys_includepath points adding this dir as
 831  *   first systemdir
 832  * -idirafter dir adds dir to the end of the list
 833  */
 834
 835 static void set_stream_include_path(struct stream *stream)
 836 {
 837         const char *path = stream->path;
 838         if (!path) {
 839                 const char *p = strrchr(stream->name, '/');
 840                 path = "";
 841                 if (p) {
 842                         int len = p - stream->name + 1;
 843                         char *m = malloc(len+1);
 844                         /* This includes the final "/" */
 845                         memcpy(m, stream->name, len);
 846                         m[len] = 0;
 847                         path = m;
 848                 }
 849                 stream->path = path;
 850         }
 851         includepath[0] = path;
 852 }
 853
 854 static int try_include(const char *path, const char *filename, int flen, struct token **where, const char **next_path)
 855 {
 856         int fd;
 857         int plen = strlen(path);
 858         static char fullname[PATH_MAX];
 859
 860         memcpy(fullname, path, plen);
 861         if (plen && path[plen-1] != '/') {
 862                 fullname[plen] = '/';
 863                 plen++;
 864         }
 865         memcpy(fullname+plen, filename, flen);
 866         if (already_tokenized(fullname))
 867                 return 1;
 868         fd = open(fullname, O_RDONLY);
 869         if (fd >= 0) {
 870                 char * streamname = __alloc_bytes(plen + flen);
 871                 memcpy(streamname, fullname, plen + flen);
 872                 *where = tokenize(streamname, fd, *where, next_path);
 873                 close(fd);
 874                 return 1;
 875         }
 876         return 0;
 877 }
 878
 879 static int do_include_path(const char **pptr, struct token **list, struct token *token, const char *filename, int flen)
 880 {
 881         const char *path;
 882
 883         while ((path = *pptr++) != NULL) {
 884                 if (!try_include(path, filename, flen, list, pptr))
 885                         continue;
 886                 return 1;
 887         }
 888         return 0;
 889 }
 890
 891 static int free_preprocessor_line(struct token *token)
 892 {
 893         while (token_type(token) != TOKEN_EOF) {
 894                 struct token *free = token;
 895                 token = token->next;
 896                 __free_token(free);
 897         };
 898         return 1;
 899 }
 900
 901 const char *find_include(const char *skip, const char *look_for)
 902 {
 903         DIR *dp;
 904         struct dirent *entry;
 905         struct stat statbuf;
 906         const char *ret;
 907         char cwd[PATH_MAX];
 908         static char buf[PATH_MAX + 1];
 909
 910         dp = opendir(".");
 911         if (!dp)
 912                 return NULL;
 913
 914         if (!getcwd(cwd, sizeof(cwd)))
 915                 return NULL;
 916
 917         while ((entry = readdir(dp))) {
 918                 lstat(entry->d_name, &statbuf);
 919
 920                 if (strcmp(entry->d_name, look_for) == 0) {
 921                         snprintf(buf, sizeof(buf), "%s/%s", cwd, entry->d_name);
 922                         return buf;
 923                 }
 924
 925                 if (S_ISDIR(statbuf.st_mode)) {
 926                         /* Found a directory, but ignore . and .. */
 927                         if (strcmp(".", entry->d_name) == 0 ||
 928                             strcmp("..", entry->d_name) == 0 ||
 929                             strcmp(skip, entry->d_name) == 0)
 930                                 continue;
 931
 932                         chdir(entry->d_name);
 933                         ret = find_include("", look_for);
 934                         chdir("..");
 935                         if (ret)
 936                                 return ret;
 937                 }
 938         }
 939         closedir(dp);
 940
 941         return NULL;
 942 }
 943
 944 const char *search_dir(const char *stop, const char *look_for)
 945 {
 946         char cwd[PATH_MAX];
 947         int len;
 948         const char *ret;
 949         int cnt = 0;
 950
 951         if (!getcwd(cwd, sizeof(cwd)))
 952                 return NULL;
 953
 954         len = strlen(cwd);
 955         while (len >= 0) {
 956                 ret = find_include(cnt++ ? cwd + len + 1 : "", look_for);
 957                 if (ret)
 958                         return ret;
 959
 960                 if (strcmp(cwd, stop) == 0 ||
 961                     strcmp(cwd, "/usr/include") == 0 ||
 962                     strcmp(cwd, "/usr/local/include") == 0 ||
 963                     strlen(cwd) <= 10 ||  /* heck...  don't search /usr/lib/ */
 964                     strcmp(cwd, "/") == 0)
 965                         return NULL;
 966
 967                 while (--len >= 0) {
 968                         if (cwd[len] == '/') {
 969                                 cwd[len] = '\0';
 970                                 break;
 971                         }
 972                 }
 973
 974                 chdir("..");
 975         }
 976         return NULL;
 977 }
 978
 979 static void use_best_guess_header_file(struct token *token, const char *filename, struct token **list)
 980 {
 981         char cwd[PATH_MAX];
 982         char dir_part[PATH_MAX];
 983         const char *file_part;
 984         const char *include_name;
 985         int len;
 986
 987         if (!filename || filename[0] == '\0')
 988                 return;
 989
 990         file_part = filename;
 991         while ((filename = strchr(filename, '/'))) {
 992                 ++filename;
 993                 if (filename[0])
 994                         file_part = filename;
 995         }
 996
 997         snprintf(dir_part, sizeof(dir_part), "%s", stream_name(token->pos.stream));
 998         len = strlen(dir_part);
 999         while (--len >= 0) {
1000                 if (dir_part[len] == '/') {
1001                         dir_part[len] = '\0';
1002                         break;
1003                 }
1004         }
1005         if (len < 0)
1006                 sprintf(dir_part, ".");
1007
1008         if (!getcwd(cwd, sizeof(cwd)))
1009                 return;
1010
1011         chdir(dir_part);
1012         include_name = search_dir(cwd, file_part);
1013         chdir(cwd);
1014         if (!include_name)
1015                 return;
1016         sparse_error(token->pos, "using '%s'", include_name);
1017
1018         try_include("", include_name, strlen(include_name), list, includepath);
1019 }
1020
1021 static int handle_include_path(struct stream *stream, struct token **list, struct token *token, int how)
1022 {
1023         const char *filename;
1024         struct token *next;
1025         const char **path;
1026         int expect;
1027         int flen;
1028
1029         next = token->next;
1030         expect = '>';
1031         if (!match_op(next, '<')) {
1032                 expand_list(&token->next);
1033                 expect = 0;
1034                 next = token;
1035                 if (match_op(token->next, '<')) {
1036                         next = token->next;
1037                         expect = '>';
1038                 }
1039         }
1040
1041         token = next->next;
1042         filename = token_name_sequence(token, expect, token);
1043         flen = strlen(filename) + 1;
1044
1045         /* Absolute path? */
1046         if (filename[0] == '/') {
1047                 if (try_include("", filename, flen, list, includepath))
1048                         return 0;
1049                 goto out;
1050         }
1051
1052         switch (how) {
1053         case 1:
1054                 path = stream->next_path;
1055                 break;
1056         case 2:
1057                 includepath[0] = "";
1058                 path = includepath;
1059                 break;
1060         default:
1061                 /* Dir of input file is first dir to search for quoted includes */
1062                 set_stream_include_path(stream);
1063                 path = expect ? angle_includepath : quote_includepath;
1064                 break;
1065         }
1066         /* Check the standard include paths.. */
1067         if (do_include_path(path, list, token, filename, flen))
1068                 return 0;
1069 out:
1070         sparse_error(token->pos, "unable to open '%s'", filename);
1071         use_best_guess_header_file(token, filename, list);
1072         return 0;
1073 }
1074
1075 static int handle_include(struct stream *stream, struct token **list, struct token *token)
1076 {
1077         return handle_include_path(stream, list, token, 0);
1078 }
1079
1080 static int handle_include_next(struct stream *stream, struct token **list, struct token *token)
1081 {
1082         return handle_include_path(stream, list, token, 1);
1083 }
1084
1085 static int handle_argv_include(struct stream *stream, struct token **list, struct token *token)
1086 {
1087         return handle_include_path(stream, list, token, 2);
1088 }
1089
1090 static int token_different(struct token *t1, struct token *t2)
1091 {
1092         int different;
1093
1094         if (token_type(t1) != token_type(t2))
1095                 return 1;
1096
1097         switch (token_type(t1)) {
1098         case TOKEN_IDENT:
1099                 different = t1->ident != t2->ident;
1100                 break;
1101         case TOKEN_ARG_COUNT:
1102         case TOKEN_UNTAINT:
1103         case TOKEN_CONCAT:
1104         case TOKEN_GNU_KLUDGE:
1105                 different = 0;
1106                 break;
1107         case TOKEN_NUMBER:
1108                 different = strcmp(t1->number, t2->number);
1109                 break;
1110         case TOKEN_SPECIAL:
1111                 different = t1->special != t2->special;
1112                 break;
1113         case TOKEN_MACRO_ARGUMENT:
1114         case TOKEN_QUOTED_ARGUMENT:
1115         case TOKEN_STR_ARGUMENT:
1116                 different = t1->argnum != t2->argnum;
1117                 break;
1118         case TOKEN_CHAR_EMBEDDED_0 ... TOKEN_CHAR_EMBEDDED_3:
1119         case TOKEN_WIDE_CHAR_EMBEDDED_0 ... TOKEN_WIDE_CHAR_EMBEDDED_3:
1120                 different = memcmp(t1->embedded, t2->embedded, 4);
1121                 break;
1122         case TOKEN_CHAR:
1123         case TOKEN_WIDE_CHAR:
1124         case TOKEN_STRING:
1125         case TOKEN_WIDE_STRING: {
1126                 struct string *s1, *s2;
1127
1128                 s1 = t1->string;
1129                 s2 = t2->string;
1130                 different = 1;
1131                 if (s1->length != s2->length)
1132                         break;
1133                 different = memcmp(s1->data, s2->data, s1->length);
1134                 break;
1135         }
1136         default:
1137                 different = 1;
1138                 break;
1139         }
1140         return different;
1141 }
1142
1143 static int token_list_different(struct token *list1, struct token *list2)
1144 {
1145         for (;;) {
1146                 if (list1 == list2)
1147                         return 0;
1148                 if (!list1 || !list2)
1149                         return 1;
1150                 if (token_different(list1, list2))
1151                         return 1;
1152                 list1 = list1->next;
1153                 list2 = list2->next;
1154         }
1155 }
1156
1157 static inline void set_arg_count(struct token *token)
1158 {
1159         token_type(token) = TOKEN_ARG_COUNT;
1160         token->count.normal = token->count.quoted =
1161         token->count.str = token->count.vararg = 0;
1162 }
1163
1164 static struct token *parse_arguments(struct token *list)
1165 {
1166         struct token *arg = list->next, *next = list;
1167         struct argcount *count = &list->count;
1168
1169         set_arg_count(list);
1170
1171         if (match_op(arg, ')')) {
1172                 next = arg->next;
1173                 list->next = &eof_token_entry;
1174                 return next;
1175         }
1176
1177         while (token_type(arg) == TOKEN_IDENT) {
1178                 if (arg->ident == &__VA_ARGS___ident)
1179                         goto Eva_args;
1180                 if (!++count->normal)
1181                         goto Eargs;
1182                 next = arg->next;
1183
1184                 if (match_op(next, ',')) {
1185                         set_arg_count(next);
1186                         arg = next->next;
1187                         continue;
1188                 }
1189
1190                 if (match_op(next, ')')) {
1191                         set_arg_count(next);
1192                         next = next->next;
1193                         arg->next->next = &eof_token_entry;
1194                         return next;
1195                 }
1196
1197                 /* normal cases are finished here */
1198
1199                 if (match_op(next, SPECIAL_ELLIPSIS)) {
1200                         if (match_op(next->next, ')')) {
1201                                 set_arg_count(next);
1202                                 next->count.vararg = 1;
1203                                 next = next->next;
1204                                 arg->next->next = &eof_token_entry;
1205                                 return next->next;
1206                         }
1207
1208                         arg = next;
1209                         goto Enotclosed;
1210                 }
1211
1212                 if (eof_token(next)) {
1213                         goto Enotclosed;
1214                 } else {
1215                         arg = next;
1216                         goto Ebadstuff;
1217                 }
1218         }
1219
1220         if (match_op(arg, SPECIAL_ELLIPSIS)) {
1221                 next = arg->next;
1222                 token_type(arg) = TOKEN_IDENT;
1223                 arg->ident = &__VA_ARGS___ident;
1224                 if (!match_op(next, ')'))
1225                         goto Enotclosed;
1226                 if (!++count->normal)
1227                         goto Eargs;
1228                 set_arg_count(next);
1229                 next->count.vararg = 1;
1230                 next = next->next;
1231                 arg->next->next = &eof_token_entry;
1232                 return next;
1233         }
1234
1235         if (eof_token(arg)) {
1236                 arg = next;
1237                 goto Enotclosed;
1238         }
1239         if (match_op(arg, ','))
1240                 goto Emissing;
1241         else
1242                 goto Ebadstuff;
1243
1244
1245 Emissing:
1246         sparse_error(arg->pos, "parameter name missing");
1247         return NULL;
1248 Ebadstuff:
1249         sparse_error(arg->pos, "\"%s\" may not appear in macro parameter list",
1250                 show_token(arg));
1251         return NULL;
1252 Enotclosed:
1253         sparse_error(arg->pos, "missing ')' in macro parameter list");
1254         return NULL;
1255 Eva_args:
1256         sparse_error(arg->pos, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
1257         return NULL;
1258 Eargs:
1259         sparse_error(arg->pos, "too many arguments in macro definition");
1260         return NULL;
1261 }
1262
1263 static int try_arg(struct token *token, enum token_type type, struct token *arglist)
1264 {
1265         struct ident *ident = token->ident;
1266         int nr;
1267
1268         if (!arglist || token_type(token) != TOKEN_IDENT)
1269                 return 0;
1270
1271         arglist = arglist->next;
1272
1273         for (nr = 0; !eof_token(arglist); nr++, arglist = arglist->next->next) {
1274                 if (arglist->ident == ident) {
1275                         struct argcount *count = &arglist->next->count;
1276                         int n;
1277
1278                         token->argnum = nr;
1279                         token_type(token) = type;
1280                         switch (type) {
1281                         case TOKEN_MACRO_ARGUMENT:
1282                                 n = ++count->normal;
1283                                 break;
1284                         case TOKEN_QUOTED_ARGUMENT:
1285                                 n = ++count->quoted;
1286                                 break;
1287                         default:
1288                                 n = ++count->str;
1289                         }
1290                         if (n)
1291                                 return count->vararg ? 2 : 1;
1292                         /*
1293                          * XXX - need saner handling of that
1294                          * (>= 1024 instances of argument)
1295                          */
1296                         token_type(token) = TOKEN_ERROR;
1297                         return -1;
1298                 }
1299         }
1300         return 0;
1301 }
1302
1303 static struct token *handle_hash(struct token **p, struct token *arglist)
1304 {
1305         struct token *token = *p;
1306         if (arglist) {
1307                 struct token *next = token->next;
1308                 if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
1309                         goto Equote;
1310                 next->pos.whitespace = token->pos.whitespace;
1311                 __free_token(token);
1312                 token = *p = next;
1313         } else {
1314                 token->pos.noexpand = 1;
1315         }
1316         return token;
1317
1318 Equote:
1319         sparse_error(token->pos, "'#' is not followed by a macro parameter");
1320         return NULL;
1321 }
1322
1323 /* token->next is ## */
1324 static struct token *handle_hashhash(struct token *token, struct token *arglist)
1325 {
1326         struct token *last = token;
1327         struct token *concat;
1328         int state = match_op(token, ',');
1329
1330         try_arg(token, TOKEN_QUOTED_ARGUMENT, arglist);
1331
1332         while (1) {
1333                 struct token *t;
1334                 int is_arg;
1335
1336                 /* eat duplicate ## */
1337                 concat = token->next;
1338                 while (match_op(t = concat->next, SPECIAL_HASHHASH)) {
1339                         token->next = t;
1340                         __free_token(concat);
1341                         concat = t;
1342                 }
1343                 token_type(concat) = TOKEN_CONCAT;
1344
1345                 if (eof_token(t))
1346                         goto Econcat;
1347
1348                 if (match_op(t, '#')) {
1349                         t = handle_hash(&concat->next, arglist);
1350                         if (!t)
1351                                 return NULL;
1352                 }
1353
1354                 is_arg = try_arg(t, TOKEN_QUOTED_ARGUMENT, arglist);
1355
1356                 if (state == 1 && is_arg) {
1357                         state = is_arg;
1358                 } else {
1359                         last = t;
1360                         state = match_op(t, ',');
1361                 }
1362
1363                 token = t;
1364                 if (!match_op(token->next, SPECIAL_HASHHASH))
1365                         break;
1366         }
1367         /* handle GNU ,##__VA_ARGS__ kludge, in all its weirdness */
1368         if (state == 2)
1369                 token_type(last) = TOKEN_GNU_KLUDGE;
1370         return token;
1371
1372 Econcat:
1373         sparse_error(concat->pos, "'##' cannot appear at the ends of macro expansion");
1374         return NULL;
1375 }
1376
1377 static struct token *parse_expansion(struct token *expansion, struct token *arglist, struct ident *name)
1378 {
1379         struct token *token = expansion;
1380         struct token **p;
1381
1382         if (match_op(token, SPECIAL_HASHHASH))
1383                 goto Econcat;
1384
1385         for (p = &expansion; !eof_token(token); p = &token->next, token = *p) {
1386                 if (match_op(token, '#')) {
1387                         token = handle_hash(p, arglist);
1388                         if (!token)
1389                                 return NULL;
1390                 }
1391                 if (match_op(token->next, SPECIAL_HASHHASH)) {
1392                         token = handle_hashhash(token, arglist);
1393                         if (!token)
1394                                 return NULL;
1395                 } else {
1396                         try_arg(token, TOKEN_MACRO_ARGUMENT, arglist);
1397                 }
1398                 switch (token_type(token)) {
1399                 case TOKEN_ERROR:
1400                         goto Earg;
1401
1402                 case TOKEN_STRING:
1403                 case TOKEN_WIDE_STRING:
1404                         token->string->immutable = 1;
1405                         break;
1406                 }
1407         }
1408         token = alloc_token(&expansion->pos);
1409         token_type(token) = TOKEN_UNTAINT;
1410         token->ident = name;
1411         token->next = *p;
1412         *p = token;
1413         return expansion;
1414
1415 Econcat:
1416         sparse_error(token->pos, "'##' cannot appear at the ends of macro expansion");
1417         return NULL;
1418 Earg:
1419         sparse_error(token->pos, "too many instances of argument in body");
1420         return NULL;
1421 }
1422
1423 static int do_handle_define(struct stream *stream, struct token **line, struct token *token, int attr)
1424 {
1425         struct token *arglist, *expansion;
1426         struct token *left = token->next;
1427         struct symbol *sym;
1428         struct ident *name;
1429         int ret;
1430
1431         if (token_type(left) != TOKEN_IDENT) {
1432                 sparse_error(token->pos, "expected identifier to 'define'");
1433                 return 1;
1434         }
1435
1436         name = left->ident;
1437
1438         arglist = NULL;
1439         expansion = left->next;
1440         if (!expansion->pos.whitespace) {
1441                 if (match_op(expansion, '(')) {
1442                         arglist = expansion;
1443                         expansion = parse_arguments(expansion);
1444                         if (!expansion)
1445                                 return 1;
1446                 } else if (!eof_token(expansion)) {
1447                         warning(expansion->pos,
1448                                 "no whitespace before object-like macro body");
1449                 }
1450         }
1451
1452         expansion = parse_expansion(expansion, arglist, name);
1453         if (!expansion)
1454                 return 1;
1455
1456         ret = 1;
1457         sym = lookup_symbol(name, NS_MACRO | NS_UNDEF);
1458         if (sym) {
1459                 int clean;
1460
1461                 if (attr < sym->attr)
1462                         goto out;
1463
1464                 clean = (attr == sym->attr && sym->namespace == NS_MACRO);
1465
1466                 if (token_list_different(sym->expansion, expansion) ||
1467                     token_list_different(sym->arglist, arglist)) {
1468                         ret = 0;
1469                         if ((clean && attr == SYM_ATTR_NORMAL)
1470                                         || sym->used_in == file_scope) {
1471                                 warning(left->pos, "preprocessor token %.*s redefined",
1472                                                 name->len, name->name);
1473                                 info(sym->pos, "this was the original definition");
1474                         }
1475                 } else if (clean)
1476                         goto out;
1477         }
1478
1479         if (!sym || sym->scope != file_scope) {
1480                 sym = alloc_symbol(left->pos, SYM_NODE);
1481                 bind_symbol(sym, name, NS_MACRO);
1482                 add_ident(&macros, name);
1483                 ret = 0;
1484         }
1485
1486         if (!ret) {
1487                 sym->expansion = expansion;
1488                 sym->arglist = arglist;
1489                 __free_token(token);    /* Free the "define" token, but not the rest of the line */
1490         }
1491
1492         sym->namespace = NS_MACRO;
1493         sym->used_in = NULL;
1494         sym->attr = attr;
1495 out:
1496         return ret;
1497 }
1498
1499 static int handle_define(struct stream *stream, struct token **line, struct token *token)
1500 {
1501         return do_handle_define(stream, line, token, SYM_ATTR_NORMAL);
1502 }
1503
1504 static int handle_weak_define(struct stream *stream, struct token **line, struct token *token)
1505 {
1506         return do_handle_define(stream, line, token, SYM_ATTR_WEAK);
1507 }
1508
1509 static int handle_strong_define(struct stream *stream, struct token **line, struct token *token)
1510 {
1511         return do_handle_define(stream, line, token, SYM_ATTR_STRONG);
1512 }
1513
1514 static int do_handle_undef(struct stream *stream, struct token **line, struct token *token, int attr)
1515 {
1516         struct token *left = token->next;
1517         struct symbol *sym;
1518
1519         if (token_type(left) != TOKEN_IDENT) {
1520                 sparse_error(token->pos, "expected identifier to 'undef'");
1521                 return 1;
1522         }
1523
1524         sym = lookup_symbol(left->ident, NS_MACRO | NS_UNDEF);
1525         if (sym) {
1526                 if (attr < sym->attr)
1527                         return 1;
1528                 if (attr == sym->attr && sym->namespace == NS_UNDEF)
1529                         return 1;
1530         } else if (attr <= SYM_ATTR_NORMAL)
1531                 return 1;
1532
1533         if (!sym || sym->scope != file_scope) {
1534                 sym = alloc_symbol(left->pos, SYM_NODE);
1535                 bind_symbol(sym, left->ident, NS_MACRO);
1536         }
1537
1538         sym->namespace = NS_UNDEF;
1539         sym->used_in = NULL;
1540         sym->attr = attr;
1541
1542         return 1;
1543 }
1544
1545 static int handle_undef(struct stream *stream, struct token **line, struct token *token)
1546 {
1547         return do_handle_undef(stream, line, token, SYM_ATTR_NORMAL);
1548 }
1549
1550 static int handle_strong_undef(struct stream *stream, struct token **line, struct token *token)
1551 {
1552         return do_handle_undef(stream, line, token, SYM_ATTR_STRONG);
1553 }
1554
1555 static int preprocessor_if(struct stream *stream, struct token *token, int true)
1556 {
1557         token_type(token) = false_nesting ? TOKEN_SKIP_GROUPS : TOKEN_IF;
1558         free_preprocessor_line(token->next);
1559         token->next = stream->top_if;
1560         stream->top_if = token;
1561         if (false_nesting || true != 1)
1562                 false_nesting++;
1563         return 0;
1564 }
1565
1566 static int handle_ifdef(struct stream *stream, struct token **line, struct token *token)
1567 {
1568         struct token *next = token->next;
1569         int arg;
1570         if (token_type(next) == TOKEN_IDENT) {
1571                 arg = token_defined(next);
1572         } else {
1573                 dirty_stream(stream);
1574                 if (!false_nesting)
1575                         sparse_error(token->pos, "expected preprocessor identifier");
1576                 arg = -1;
1577         }
1578         return preprocessor_if(stream, token, arg);
1579 }
1580
1581 static int handle_ifndef(struct stream *stream, struct token **line, struct token *token)
1582 {
1583         struct token *next = token->next;
1584         int arg;
1585         if (token_type(next) == TOKEN_IDENT) {
1586                 if (!stream->dirty && !stream->ifndef) {
1587                         if (!stream->protect) {
1588                                 stream->ifndef = token;
1589                                 stream->protect = next->ident;
1590                         } else if (stream->protect == next->ident) {
1591                                 stream->ifndef = token;
1592                                 stream->dirty = 1;
1593                         }
1594                 }
1595                 arg = !token_defined(next);
1596         } else {
1597                 dirty_stream(stream);
1598                 if (!false_nesting)
1599                         sparse_error(token->pos, "expected preprocessor identifier");
1600                 arg = -1;
1601         }
1602
1603         return preprocessor_if(stream, token, arg);
1604 }
1605
1606 static const char *show_token_sequence(struct token *token, int quote);
1607
1608 /*
1609  * Expression handling for #if and #elif; it differs from normal expansion
1610  * due to special treatment of "defined".
1611  */
1612 static int expression_value(struct token **where)
1613 {
1614         struct expression *expr;
1615         struct token *p;
1616         struct token **list = where, **beginning = NULL;
1617         long long value;
1618         int state = 0;
1619
1620         while (!eof_token(p = scan_next(list))) {
1621                 switch (state) {
1622                 case 0:
1623                         if (token_type(p) != TOKEN_IDENT)
1624                                 break;
1625                         if (p->ident == &defined_ident) {
1626                                 state = 1;
1627                                 beginning = list;
1628                                 break;
1629                         }
1630                         if (!expand_one_symbol(list))
1631                                 continue;
1632                         if (token_type(p) != TOKEN_IDENT)
1633                                 break;
1634                         token_type(p) = TOKEN_ZERO_IDENT;
1635                         break;
1636                 case 1:
1637                         if (match_op(p, '(')) {
1638                                 state = 2;
1639                         } else {
1640                                 state = 0;
1641                                 replace_with_defined(p);
1642                                 *beginning = p;
1643                         }
1644                         break;
1645                 case 2:
1646                         if (token_type(p) == TOKEN_IDENT)
1647                                 state = 3;
1648                         else
1649                                 state = 0;
1650                         replace_with_defined(p);
1651                         *beginning = p;
1652                         break;
1653                 case 3:
1654                         state = 0;
1655                         if (!match_op(p, ')'))
1656                                 sparse_error(p->pos, "missing ')' after \"defined\"");
1657                         *list = p->next;
1658                         continue;
1659                 }
1660                 list = &p->next;
1661         }
1662
1663         p = constant_expression(*where, &expr);
1664         if (!eof_token(p))
1665                 sparse_error(p->pos, "garbage at end: %s", show_token_sequence(p, 0));
1666         value = get_expression_value(expr);
1667         return value != 0;
1668 }
1669
1670 static int handle_if(struct stream *stream, struct token **line, struct token *token)
1671 {
1672         int value = 0;
1673         if (!false_nesting)
1674                 value = expression_value(&token->next);
1675
1676         dirty_stream(stream);
1677         return preprocessor_if(stream, token, value);
1678 }
1679
1680 static int handle_elif(struct stream * stream, struct token **line, struct token *token)
1681 {
1682         struct token *top_if = stream->top_if;
1683         end_group(stream);
1684
1685         if (!top_if) {
1686                 nesting_error(stream);
1687                 sparse_error(token->pos, "unmatched #elif within stream");
1688                 return 1;
1689         }
1690
1691         if (token_type(top_if) == TOKEN_ELSE) {
1692                 nesting_error(stream);
1693                 sparse_error(token->pos, "#elif after #else");
1694                 if (!false_nesting)
1695                         false_nesting = 1;
1696                 return 1;
1697         }
1698
1699         dirty_stream(stream);
1700         if (token_type(top_if) != TOKEN_IF)
1701                 return 1;
1702         if (false_nesting) {
1703                 false_nesting = 0;
1704                 if (!expression_value(&token->next))
1705                         false_nesting = 1;
1706         } else {
1707                 false_nesting = 1;
1708                 token_type(top_if) = TOKEN_SKIP_GROUPS;
1709         }
1710         return 1;
1711 }
1712
1713 static int handle_else(struct stream *stream, struct token **line, struct token *token)
1714 {
1715         struct token *top_if = stream->top_if;
1716         end_group(stream);
1717
1718         if (!top_if) {
1719                 nesting_error(stream);
1720                 sparse_error(token->pos, "unmatched #else within stream");
1721                 return 1;
1722         }
1723
1724         if (token_type(top_if) == TOKEN_ELSE) {
1725                 nesting_error(stream);
1726                 sparse_error(token->pos, "#else after #else");
1727         }
1728         if (false_nesting) {
1729                 if (token_type(top_if) == TOKEN_IF)
1730                         false_nesting = 0;
1731         } else {
1732                 false_nesting = 1;
1733         }
1734         token_type(top_if) = TOKEN_ELSE;
1735         return 1;
1736 }
1737
1738 static int handle_endif(struct stream *stream, struct token **line, struct token *token)
1739 {
1740         struct token *top_if = stream->top_if;
1741         end_group(stream);
1742         if (!top_if) {
1743                 nesting_error(stream);
1744                 sparse_error(token->pos, "unmatched #endif in stream");
1745                 return 1;
1746         }
1747         if (false_nesting)
1748                 false_nesting--;
1749         stream->top_if = top_if->next;
1750         __free_token(top_if);
1751         return 1;
1752 }
1753
1754 static int handle_warning(struct stream *stream, struct token **line, struct token *token)
1755 {
1756         warning(token->pos, "%s", show_token_sequence(token->next, 0));
1757         return 1;
1758 }
1759
1760 static int handle_error(struct stream *stream, struct token **line, struct token *token)
1761 {
1762         sparse_error(token->pos, "%s", show_token_sequence(token->next, 0));
1763         return 1;
1764 }
1765
1766 static int handle_nostdinc(struct stream *stream, struct token **line, struct token *token)
1767 {
1768         /*
1769          * Do we have any non-system includes?
1770          * Clear them out if so..
1771          */
1772         *sys_includepath = NULL;
1773         return 1;
1774 }
1775
1776 static inline void update_inc_ptrs(const char ***where)
1777 {
1778
1779         if (*where <= dirafter_includepath) {
1780                 dirafter_includepath++;
1781                 /* If this was the entry that we prepend, don't
1782                  * rise the lower entries, even if they are at
1783                  * the same level. */
1784                 if (where == &dirafter_includepath)
1785                         return;
1786         }
1787         if (*where <= sys_includepath) {
1788                 sys_includepath++;
1789                 if (where == &sys_includepath)
1790                         return;
1791         }
1792         if (*where <= isys_includepath) {
1793                 isys_includepath++;
1794                 if (where == &isys_includepath)
1795                         return;
1796         }
1797
1798         /* angle_includepath is actually never updated, since we
1799          * don't suppport -iquote rught now. May change some day. */
1800         if (*where <= angle_includepath) {
1801                 angle_includepath++;
1802                 if (where == &angle_includepath)
1803                         return;
1804         }
1805 }
1806
1807 /* Add a path before 'where' and update the pointers associated with the
1808  * includepath array */
1809 static void add_path_entry(struct token *token, const char *path,
1810         const char ***where)
1811 {
1812         const char **dst;
1813         const char *next;
1814
1815         /* Need one free entry.. */
1816         if (includepath[INCLUDEPATHS-2])
1817                 error_die(token->pos, "too many include path entries");
1818
1819         /* check that this is not a duplicate */
1820         dst = includepath;
1821         while (*dst) {
1822                 if (strcmp(*dst, path) == 0)
1823                         return;
1824                 dst++;
1825         }
1826         next = path;
1827         dst = *where;
1828
1829         update_inc_ptrs(where);
1830
1831         /*
1832          * Move them all up starting at dst,
1833          * insert the new entry..
1834          */
1835         do {
1836                 const char *tmp = *dst;
1837                 *dst = next;
1838                 next = tmp;
1839                 dst++;
1840         } while (next);
1841 }
1842
1843 static int handle_add_include(struct stream *stream, struct token **line, struct token *token)
1844 {
1845         for (;;) {
1846                 token = token->next;
1847                 if (eof_token(token))
1848                         return 1;
1849                 if (token_type(token) != TOKEN_STRING) {
1850                         warning(token->pos, "expected path string");
1851                         return 1;
1852                 }
1853                 add_path_entry(token, token->string->data, &isys_includepath);
1854         }
1855 }
1856
1857 static int handle_add_isystem(struct stream *stream, struct token **line, struct token *token)
1858 {
1859         for (;;) {
1860                 token = token->next;
1861                 if (eof_token(token))
1862                         return 1;
1863                 if (token_type(token) != TOKEN_STRING) {
1864                         sparse_error(token->pos, "expected path string");
1865                         return 1;
1866                 }
1867                 add_path_entry(token, token->string->data, &sys_includepath);
1868         }
1869 }
1870
1871 static int handle_add_system(struct stream *stream, struct token **line, struct token *token)
1872 {
1873         for (;;) {
1874                 token = token->next;
1875                 if (eof_token(token))
1876                         return 1;
1877                 if (token_type(token) != TOKEN_STRING) {
1878                         sparse_error(token->pos, "expected path string");
1879                         return 1;
1880                 }
1881                 add_path_entry(token, token->string->data, &dirafter_includepath);
1882         }
1883 }
1884
1885 /* Add to end on includepath list - no pointer updates */
1886 static void add_dirafter_entry(struct token *token, const char *path)
1887 {
1888         const char **dst = includepath;
1889
1890         /* Need one free entry.. */
1891         if (includepath[INCLUDEPATHS-2])
1892                 error_die(token->pos, "too many include path entries");
1893
1894         /* Add to the end */
1895         while (*dst)
1896                 dst++;
1897         *dst = path;
1898         dst++;
1899         *dst = NULL;
1900 }
1901
1902 static int handle_add_dirafter(struct stream *stream, struct token **line, struct token *token)
1903 {
1904         for (;;) {
1905                 token = token->next;
1906                 if (eof_token(token))
1907                         return 1;
1908                 if (token_type(token) != TOKEN_STRING) {
1909                         sparse_error(token->pos, "expected path string");
1910                         return 1;
1911                 }
1912                 add_dirafter_entry(token, token->string->data);
1913         }
1914 }
1915
1916 static int handle_split_include(struct stream *stream, struct token **line, struct token *token)
1917 {
1918         /*
1919          * -I-
1920          *  From info gcc:
1921          *  Split the include path.  Any directories specified with `-I'
1922          *  options before `-I-' are searched only for headers requested with
1923          *  `#include "FILE"'; they are not searched for `#include <FILE>'.
1924          *  If additional directories are specified with `-I' options after
1925          *  the `-I-', those directories are searched for all `#include'
1926          *  directives.
1927          *  In addition, `-I-' inhibits the use of the directory of the current
1928          *  file directory as the first search directory for `#include "FILE"'.
1929          */
1930         quote_includepath = includepath+1;
1931         angle_includepath = sys_includepath;
1932         return 1;
1933 }
1934
1935 /*
1936  * We replace "#pragma xxx" with "__pragma__" in the token
1937  * stream. Just as an example.
1938  *
1939  * We'll just #define that away for now, but the theory here
1940  * is that we can use this to insert arbitrary token sequences
1941  * to turn the pragmas into internal front-end sequences for
1942  * when we actually start caring about them.
1943  *
1944  * So eventually this will turn into some kind of extended
1945  * __attribute__() like thing, except called __pragma__(xxx).
1946  */
1947 static int handle_pragma(struct stream *stream, struct token **line, struct token *token)
1948 {
1949         struct token *next = *line;
1950
1951         if (match_ident(token->next, &once_ident) && eof_token(token->next->next)) {
1952                 stream->once = 1;
1953                 return 1;
1954         }
1955         token->ident = &pragma_ident;
1956         token->pos.newline = 1;
1957         token->pos.whitespace = 1;
1958         token->pos.pos = 1;
1959         *line = token;
1960         token->next = next;
1961         return 0;
1962 }
1963
1964 /*
1965  * We ignore #line for now.
1966  */
1967 static int handle_line(struct stream *stream, struct token **line, struct token *token)
1968 {
1969         return 1;
1970 }
1971
1972 /*
1973  * Ignore "#ident".
1974  */
1975 static int handle_ident(struct stream *stream, struct token **line, struct token *token)
1976 {
1977         return 1;
1978 }
1979
1980 static int handle_nondirective(struct stream *stream, struct token **line, struct token *token)
1981 {
1982         sparse_error(token->pos, "unrecognized preprocessor line '%s'", show_token_sequence(token, 0));
1983         return 1;
1984 }
1985
1986
1987 static void init_preprocessor(void)
1988 {
1989         int i;
1990         int stream = init_stream("preprocessor", -1, includepath);
1991         static struct {
1992                 const char *name;
1993                 int (*handler)(struct stream *, struct token **, struct token *);
1994         } normal[] = {
1995                 { "define",             handle_define },
1996                 { "weak_define",        handle_weak_define },
1997                 { "strong_define",      handle_strong_define },
1998                 { "undef",              handle_undef },
1999                 { "strong_undef",       handle_strong_undef },
2000                 { "warning",            handle_warning },
2001                 { "error",              handle_error },
2002                 { "include",            handle_include },
2003                 { "include_next",       handle_include_next },
2004                 { "pragma",             handle_pragma },
2005                 { "line",               handle_line },
2006                 { "ident",              handle_ident },
2007
2008                 // our internal preprocessor tokens
2009                 { "nostdinc",      handle_nostdinc },
2010                 { "add_include",   handle_add_include },
2011                 { "add_isystem",   handle_add_isystem },
2012                 { "add_system",    handle_add_system },
2013                 { "add_dirafter",  handle_add_dirafter },
2014                 { "split_include", handle_split_include },
2015                 { "argv_include",  handle_argv_include },
2016         }, special[] = {
2017                 { "ifdef",      handle_ifdef },
2018                 { "ifndef",     handle_ifndef },
2019                 { "else",       handle_else },
2020                 { "endif",      handle_endif },
2021                 { "if",         handle_if },
2022                 { "elif",       handle_elif },
2023         };
2024
2025         for (i = 0; i < ARRAY_SIZE(normal); i++) {
2026                 struct symbol *sym;
2027                 sym = create_symbol(stream, normal[i].name, SYM_PREPROCESSOR, NS_PREPROCESSOR);
2028                 sym->handler = normal[i].handler;
2029                 sym->normal = 1;
2030         }
2031         for (i = 0; i < ARRAY_SIZE(special); i++) {
2032                 struct symbol *sym;
2033                 sym = create_symbol(stream, special[i].name, SYM_PREPROCESSOR, NS_PREPROCESSOR);
2034                 sym->handler = special[i].handler;
2035                 sym->normal = 0;
2036         }
2037
2038         counter_macro = 0;
2039 }
2040
2041 static void handle_preprocessor_line(struct stream *stream, struct token **line, struct token *start)
2042 {
2043         int (*handler)(struct stream *, struct token **, struct token *);
2044         struct token *token = start->next;
2045         int is_normal = 1;
2046
2047         if (eof_token(token))
2048                 return;
2049
2050         if (token_type(token) == TOKEN_IDENT) {
2051                 struct symbol *sym = lookup_symbol(token->ident, NS_PREPROCESSOR);
2052                 if (sym) {
2053                         handler = sym->handler;
2054                         is_normal = sym->normal;
2055                 } else {
2056                         handler = handle_nondirective;
2057                 }
2058         } else if (token_type(token) == TOKEN_NUMBER) {
2059                 handler = handle_line;
2060         } else {
2061                 handler = handle_nondirective;
2062         }
2063
2064         if (is_normal) {
2065                 dirty_stream(stream);
2066                 if (false_nesting)
2067                         goto out;
2068         }
2069         if (!handler(stream, line, token))      /* all set */
2070                 return;
2071
2072 out:
2073         free_preprocessor_line(token);
2074 }
2075
2076 static void preprocessor_line(struct stream *stream, struct token **line)
2077 {
2078         struct token *start = *line, *next;
2079         struct token **tp = &start->next;
2080
2081         for (;;) {
2082                 next = *tp;
2083                 if (next->pos.newline)
2084                         break;
2085                 tp = &next->next;
2086         }
2087         *line = next;
2088         *tp = &eof_token_entry;
2089         handle_preprocessor_line(stream, line, start);
2090 }
2091
2092 static void do_preprocess(struct token **list)
2093 {
2094         struct token *next;
2095
2096         while (!eof_token(next = scan_next(list))) {
2097                 struct stream *stream = input_streams + next->pos.stream;
2098
2099                 if (next->pos.newline && match_op(next, '#')) {
2100                         if (!next->pos.noexpand) {
2101                                 preprocessor_line(stream, list);
2102                                 __free_token(next);     /* Free the '#' token */
2103                                 continue;
2104                         }
2105                 }
2106
2107                 switch (token_type(next)) {
2108                 case TOKEN_STREAMEND:
2109                         if (stream->top_if) {
2110                                 nesting_error(stream);
2111                                 sparse_error(stream->top_if->pos, "unterminated preprocessor conditional");
2112                                 stream->top_if = NULL;
2113                                 false_nesting = 0;
2114                         }
2115                         if (!stream->dirty)
2116                                 stream->constant = CONSTANT_FILE_YES;
2117                         *list = next->next;
2118                         continue;
2119                 case TOKEN_STREAMBEGIN:
2120                         *list = next->next;
2121                         continue;
2122
2123                 default:
2124                         dirty_stream(stream);
2125                         if (false_nesting) {
2126                                 *list = next->next;
2127                                 __free_token(next);
2128                                 continue;
2129                         }
2130
2131                         if (token_type(next) != TOKEN_IDENT ||
2132                             expand_one_symbol(list))
2133                                 list = &next->next;
2134                 }
2135         }
2136 }
2137
2138 void init_include_path(void)
2139 {
2140         FILE *fp;
2141         char path[256];
2142         char arch[32];
2143         char os[32];
2144
2145         fp = popen("/bin/uname -m", "r");
2146         if (!fp)
2147                 return;
2148         if (!fgets(arch, sizeof(arch) - 1, fp))
2149                 return;
2150         pclose(fp);
2151         if (arch[strlen(arch) - 1] == '\n')
2152                 arch[strlen(arch) - 1] = '\0';
2153
2154         fp = popen("/bin/uname -o", "r");
2155         if (!fp)
2156                 return;
2157         fgets(os, sizeof(os) - 1, fp);
2158         pclose(fp);
2159
2160         if (strcmp(os, "GNU/Linux\n") != 0)
2161                 return;
2162         strcpy(os, "linux-gnu");
2163
2164         snprintf(path, sizeof(path), "/usr/include/%s-%s/", arch, os);
2165         add_pre_buffer("#add_system \"%s/\"\n", path);
2166 }
2167
2168 struct token * preprocess(struct token *token)
2169 {
2170         preprocessing = 1;
2171         init_preprocessor();
2172         do_preprocess(&token);
2173
2174         // Drop all expressions from preprocessing, they're not used any more.
2175         // This is not true when we have multiple files, though ;/
2176         // clear_expression_alloc();
2177         preprocessing = 0;
2178
2179         return token;
2180 }
2181
2182 static void dump_macro(struct symbol *sym)
2183 {
2184         int nargs = sym->arglist ? sym->arglist->count.normal : 0;
2185         struct token *args[nargs];
2186         struct token *token;
2187
2188         printf("#define %s", show_ident(sym->ident));
2189         token = sym->arglist;
2190         if (token) {
2191                 const char *sep = "";
2192                 int narg = 0;
2193                 putchar('(');
2194                 for (; !eof_token(token); token = token->next) {
2195                         if (token_type(token) == TOKEN_ARG_COUNT)
2196                                 continue;
2197                         printf("%s%s", sep, show_token(token));
2198                         args[narg++] = token;
2199                         sep = ", ";
2200                 }
2201                 putchar(')');
2202         }
2203         putchar(' ');
2204
2205         token = sym->expansion;
2206         while (!eof_token(token)) {
2207                 struct token *next = token->next;
2208                 switch (token_type(token)) {
2209                 case TOKEN_UNTAINT:
2210                         break;
2211                 case TOKEN_MACRO_ARGUMENT:
2212                         token = args[token->argnum];
2213                         /* fall-through */
2214                 default:
2215                         printf("%s", show_token(token));
2216                         if (next->pos.whitespace)
2217                                 putchar(' ');
2218                 }
2219                 token = next;
2220         }
2221         putchar('\n');
2222 }
2223
2224 void dump_macro_definitions(void)
2225 {
2226         struct ident *name;
2227
2228         FOR_EACH_PTR(macros, name) {
2229                 struct symbol *sym = lookup_macro(name);
2230                 if (sym)
2231                         dump_macro(sym);
2232         } END_FOR_EACH_PTR(name);
2233 }