libcpp/traditional.c

   1 /* CPP Library - traditional lexical analysis and macro expansion.
   2    Copyright (C) 2002-2019 Free Software Foundation, Inc.
   3    Contributed by Neil Booth, May 2002
   4
   5 This program is free software; you can redistribute it and/or modify it
   6 under the terms of the GNU General Public License as published by the
   7 Free Software Foundation; either version 3, or (at your option) any
   8 later version.
   9
  10 This program is distributed in the hope that it will be useful,
  11 but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 GNU General Public License for more details.
  14
  15 You should have received a copy of the GNU General Public License
  16 along with this program; see the file COPYING3.  If not see
  17 <http://www.gnu.org/licenses/>.  */
  18
  19 #include "config.h"
  20 #include "system.h"
  21 #include "cpplib.h"
  22 #include "internal.h"
  23
  24 /* The replacement text of a function-like macro is stored as a
  25    contiguous sequence of aligned blocks, each representing the text
  26    between subsequent parameters.
  27
  28    Each block comprises the text between its surrounding parameters,
  29    the length of that text, and the one-based index of the following
  30    parameter.  The final block in the replacement text is easily
  31    recognizable as it has an argument index of zero.  */
  32
  33 struct block
  34 {
  35   unsigned int text_len;
  36   unsigned short arg_index;
  37   uchar text[1];
  38 };
  39
  40 #define BLOCK_HEADER_LEN offsetof (struct block, text)
  41 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN))
  42
  43 /* Structure holding information about a function-like macro
  44    invocation.  */
  45 struct fun_macro
  46 {
  47   /* Memory buffer holding the trad_arg array.  */
  48   _cpp_buff *buff;
  49
  50   /* An array of size the number of macro parameters + 1, containing
  51      the offsets of the start of each macro argument in the output
  52      buffer.  The argument continues until the character before the
  53      start of the next one.  */
  54   size_t *args;
  55
  56   /* The hashnode of the macro.  */
  57   cpp_hashnode *node;
  58
  59   /* The offset of the macro name in the output buffer.  */
  60   size_t offset;
  61
  62   /* The line the macro name appeared on.  */
  63   location_t line;
  64
  65   /* Number of parameters.  */
  66   unsigned int paramc;
  67
  68   /* Zero-based index of argument being currently lexed.  */
  69   unsigned int argc;
  70 };
  71
  72 /* Lexing state.  It is mostly used to prevent macro expansion.  */
  73 enum ls {ls_none = 0,           /* Normal state.  */
  74          ls_fun_open,           /* When looking for '('.  */
  75          ls_fun_close,          /* When looking for ')'.  */
  76          ls_defined,            /* After defined.  */
  77          ls_defined_close,      /* Looking for ')' of defined().  */
  78          ls_hash,               /* After # in preprocessor conditional.  */
  79          ls_predicate,          /* After the predicate, maybe paren?  */
  80          ls_answer,             /* In answer to predicate.  */
  81          ls_has_include,        /* After __has_include__.  */
  82          ls_has_include_close}; /* Looking for ')' of __has_include__.  */
  83
  84 /* Lexing TODO: Maybe handle space in escaped newlines.  Stop lex.c
  85    from recognizing comments and directives during its lexing pass.  */
  86
  87 static const uchar *skip_whitespace (cpp_reader *, const uchar *, int);
  88 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
  89 static const uchar *copy_comment (cpp_reader *, const uchar *, int);
  90 static void check_output_buffer (cpp_reader *, size_t);
  91 static void push_replacement_text (cpp_reader *, cpp_hashnode *);
  92 static bool scan_parameters (cpp_reader *, unsigned *);
  93 static bool recursive_macro (cpp_reader *, cpp_hashnode *);
  94 static void save_replacement_text (cpp_reader *, cpp_macro *, unsigned int);
  95 static void maybe_start_funlike (cpp_reader *, cpp_hashnode *, const uchar *,
  96                                  struct fun_macro *);
  97 static void save_argument (struct fun_macro *, size_t);
  98 static void replace_args_and_push (cpp_reader *, struct fun_macro *);
  99 static size_t canonicalize_text (uchar *, const uchar *, size_t, uchar *);
 100
 101 /* Ensures we have N bytes' space in the output buffer, and
 102    reallocates it if not.  */
 103 static void
 104 check_output_buffer (cpp_reader *pfile, size_t n)
 105 {
 106   /* We might need two bytes to terminate an unterminated comment, and
 107      one more to terminate the line with a NUL.  */
 108   n += 2 + 1;
 109
 110   if (n > (size_t) (pfile->out.limit - pfile->out.cur))
 111     {
 112       size_t size = pfile->out.cur - pfile->out.base;
 113       size_t new_size = (size + n) * 3 / 2;
 114
 115       pfile->out.base = XRESIZEVEC (unsigned char, pfile->out.base, new_size);
 116       pfile->out.limit = pfile->out.base + new_size;
 117       pfile->out.cur = pfile->out.base + size;
 118     }
 119 }
 120
 121 /* Skip a C-style block comment in a macro as a result of -CC.
 122    PFILE->buffer->cur points to the initial asterisk of the comment,
 123    change it to point to after the '*' and '/' characters that terminate it.
 124    Return true if the macro has not been termined, in that case set
 125    PFILE->buffer->cur to the end of the buffer.  */
 126 static bool
 127 skip_macro_block_comment (cpp_reader *pfile)
 128 {
 129   const uchar *cur = pfile->buffer->cur;
 130
 131   cur++;
 132   if (*cur == '/')
 133     cur++;
 134
 135   /* People like decorating comments with '*', so check for '/'
 136      instead for efficiency.  */
 137   while (! (*cur++ == '/' && cur[-2] == '*'))
 138     if (cur[-1] == '\n')
 139       {
 140         pfile->buffer->cur = cur - 1;
 141         return true;
 142       }
 143
 144   pfile->buffer->cur = cur;
 145   return false;
 146 }
 147
 148 /* CUR points to the asterisk introducing a comment in the current
 149    context.  IN_DEFINE is true if we are in the replacement text of a
 150    macro.
 151
 152    The asterisk and following comment is copied to the buffer pointed
 153    to by pfile->out.cur, which must be of sufficient size.
 154    Unterminated comments are diagnosed, and correctly terminated in
 155    the output.  pfile->out.cur is updated depending upon IN_DEFINE,
 156    -C, -CC and pfile->state.in_directive.
 157
 158    Returns a pointer to the first character after the comment in the
 159    input buffer.  */
 160 static const uchar *
 161 copy_comment (cpp_reader *pfile, const uchar *cur, int in_define)
 162 {
 163   bool unterminated, copy = false;
 164   location_t src_loc = pfile->line_table->highest_line;
 165   cpp_buffer *buffer = pfile->buffer;
 166
 167   buffer->cur = cur;
 168   if (pfile->context->prev)
 169     unterminated = skip_macro_block_comment (pfile);
 170   else
 171     unterminated = _cpp_skip_block_comment (pfile);
 172
 173   if (unterminated)
 174     cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
 175                          "unterminated comment");
 176
 177   /* Comments in directives become spaces so that tokens are properly
 178      separated when the ISO preprocessor re-lexes the line.  The
 179      exception is #define.  */
 180   if (pfile->state.in_directive)
 181     {
 182       if (in_define)
 183         {
 184           if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
 185             pfile->out.cur--;
 186           else
 187             copy = true;
 188         }
 189       else
 190         pfile->out.cur[-1] = ' ';
 191     }
 192   else if (CPP_OPTION (pfile, discard_comments))
 193     pfile->out.cur--;
 194   else
 195     copy = true;
 196
 197   if (copy)
 198     {
 199       size_t len = (size_t) (buffer->cur - cur);
 200       memcpy (pfile->out.cur, cur, len);
 201       pfile->out.cur += len;
 202       if (unterminated)
 203         {
 204           *pfile->out.cur++ = '*';
 205           *pfile->out.cur++ = '/';
 206         }
 207     }
 208
 209   return buffer->cur;
 210 }
 211
 212 /* CUR points to any character in the input buffer.  Skips over all
 213    contiguous horizontal white space and NULs, including comments if
 214    SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
 215    character or the end of the current context.  Escaped newlines are
 216    removed.
 217
 218    The whitespace is copied verbatim to the output buffer, except that
 219    comments are handled as described in copy_comment().
 220    pfile->out.cur is updated.
 221
 222    Returns a pointer to the first character after the whitespace in
 223    the input buffer.  */
 224 static const uchar *
 225 skip_whitespace (cpp_reader *pfile, const uchar *cur, int skip_comments)
 226 {
 227   uchar *out = pfile->out.cur;
 228
 229   for (;;)
 230     {
 231       unsigned int c = *cur++;
 232       *out++ = c;
 233
 234       if (is_nvspace (c))
 235         continue;
 236
 237       if (c == '/' && *cur == '*' && skip_comments)
 238         {
 239           pfile->out.cur = out;
 240           cur = copy_comment (pfile, cur, false /* in_define */);
 241           out = pfile->out.cur;
 242           continue;
 243         }
 244
 245       out--;
 246       break;
 247     }
 248
 249   pfile->out.cur = out;
 250   return cur - 1;
 251 }
 252
 253 /* Lexes and outputs an identifier starting at CUR, which is assumed
 254    to point to a valid first character of an identifier.  Returns
 255    the hashnode, and updates out.cur.  */
 256 static cpp_hashnode *
 257 lex_identifier (cpp_reader *pfile, const uchar *cur)
 258 {
 259   size_t len;
 260   uchar *out = pfile->out.cur;
 261   cpp_hashnode *result;
 262
 263   do
 264     *out++ = *cur++;
 265   while (is_numchar (*cur));
 266
 267   CUR (pfile->context) = cur;
 268   len = out - pfile->out.cur;
 269   result = CPP_HASHNODE (ht_lookup (pfile->hash_table, pfile->out.cur,
 270                                     len, HT_ALLOC));
 271   pfile->out.cur = out;
 272   return result;
 273 }
 274
 275 /* Overlays the true file buffer temporarily with text of length LEN
 276    starting at START.  The true buffer is restored upon calling
 277    restore_buff().  */
 278 void
 279 _cpp_overlay_buffer (cpp_reader *pfile, const uchar *start, size_t len)
 280 {
 281   cpp_buffer *buffer = pfile->buffer;
 282
 283   pfile->overlaid_buffer = buffer;
 284   pfile->saved_cur = buffer->cur;
 285   pfile->saved_rlimit = buffer->rlimit;
 286   pfile->saved_line_base = buffer->next_line;
 287   buffer->need_line = false;
 288
 289   buffer->cur = start;
 290   buffer->line_base = start;
 291   buffer->rlimit = start + len;
 292 }
 293
 294 /* Restores a buffer overlaid by _cpp_overlay_buffer().  */
 295 void
 296 _cpp_remove_overlay (cpp_reader *pfile)
 297 {
 298   cpp_buffer *buffer = pfile->overlaid_buffer;
 299
 300   buffer->cur = pfile->saved_cur;
 301   buffer->rlimit = pfile->saved_rlimit;
 302   buffer->line_base = pfile->saved_line_base;
 303   buffer->need_line = true;
 304
 305   pfile->overlaid_buffer = NULL;
 306 }
 307
 308 /* Reads a logical line into the output buffer.  Returns TRUE if there
 309    is more text left in the buffer.  */
 310 bool
 311 _cpp_read_logical_line_trad (cpp_reader *pfile)
 312 {
 313   do
 314     {
 315       if (pfile->buffer->need_line && !_cpp_get_fresh_line (pfile))
 316         return false;
 317     }
 318   while (!_cpp_scan_out_logical_line (pfile, NULL, false)
 319          || pfile->state.skipping);
 320
 321   return pfile->buffer != NULL;
 322 }
 323
 324 /* Return true if NODE is a fun_like macro.  */
 325 static inline bool
 326 fun_like_macro (cpp_hashnode *node)
 327 {
 328   if (cpp_builtin_macro_p (node))
 329     return (node->value.builtin == BT_HAS_ATTRIBUTE
 330             || node->value.builtin == BT_HAS_BUILTIN);
 331   return node->value.macro->fun_like;
 332 }
 333
 334 /* Set up state for finding the opening '(' of a function-like
 335    macro.  */
 336 static void
 337 maybe_start_funlike (cpp_reader *pfile, cpp_hashnode *node, const uchar *start,
 338                      struct fun_macro *macro)
 339 {
 340   unsigned int n;
 341   if (cpp_builtin_macro_p (node))
 342     n = 1;
 343   else
 344     n = node->value.macro->paramc;
 345
 346   if (macro->buff)
 347     _cpp_release_buff (pfile, macro->buff);
 348   macro->buff = _cpp_get_buff (pfile, (n + 1) * sizeof (size_t));
 349   macro->args = (size_t *) BUFF_FRONT (macro->buff);
 350   macro->node = node;
 351   macro->offset = start - pfile->out.base;
 352   macro->paramc = n;
 353   macro->argc = 0;
 354 }
 355
 356 /* Save the OFFSET of the start of the next argument to MACRO.  */
 357 static void
 358 save_argument (struct fun_macro *macro, size_t offset)
 359 {
 360   macro->argc++;
 361   if (macro->argc <= macro->paramc)
 362     macro->args[macro->argc] = offset;
 363 }
 364
 365 /* Copies the next logical line in the current buffer (starting at
 366    buffer->cur) to the output buffer.  The output is guaranteed to
 367    terminate with a NUL character.  buffer->cur is updated.
 368
 369    If MACRO is non-NULL, then we are scanning the replacement list of
 370    MACRO, and we call save_replacement_text() every time we meet an
 371    argument.
 372
 373    If BUILTIN_MACRO_ARG is true, this is called to macro expand
 374    arguments of builtin function-like macros.  */
 375 bool
 376 _cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro,
 377                             bool builtin_macro_arg)
 378 {
 379   bool result = true;
 380   cpp_context *context;
 381   const uchar *cur;
 382   uchar *out;
 383   struct fun_macro fmacro;
 384   unsigned int c, paren_depth = 0, quote;
 385   enum ls lex_state = ls_none;
 386   bool header_ok;
 387   const uchar *start_of_input_line;
 388
 389   fmacro.buff = NULL;
 390   fmacro.args = NULL;
 391   fmacro.node = NULL;
 392   fmacro.offset = 0;
 393   fmacro.line = 0;
 394   fmacro.paramc = 0;
 395   fmacro.argc = 0;
 396
 397   quote = 0;
 398   header_ok = pfile->state.angled_headers;
 399   CUR (pfile->context) = pfile->buffer->cur;
 400   RLIMIT (pfile->context) = pfile->buffer->rlimit;
 401   if (!builtin_macro_arg)
 402     {
 403       pfile->out.cur = pfile->out.base;
 404       pfile->out.first_line = pfile->line_table->highest_line;
 405     }
 406   /* start_of_input_line is needed to make sure that directives really,
 407      really start at the first character of the line.  */
 408   start_of_input_line = pfile->buffer->cur;
 409  new_context:
 410   context = pfile->context;
 411   cur = CUR (context);
 412   check_output_buffer (pfile, RLIMIT (context) - cur);
 413   out = pfile->out.cur;
 414
 415   for (;;)
 416     {
 417       if (!context->prev
 418           && !builtin_macro_arg
 419           && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos)
 420         {
 421           pfile->buffer->cur = cur;
 422           _cpp_process_line_notes (pfile, false);
 423         }
 424       c = *cur++;
 425       *out++ = c;
 426
 427       /* Whitespace should "continue" out of the switch,
 428          non-whitespace should "break" out of it.  */
 429       switch (c)
 430         {
 431         case ' ':
 432         case '\t':
 433         case '\f':
 434         case '\v':
 435         case '\0':
 436           continue;
 437
 438         case '\n':
 439           /* If this is a macro's expansion, pop it.  */
 440           if (context->prev)
 441             {
 442               pfile->out.cur = out - 1;
 443               _cpp_pop_context (pfile);
 444               goto new_context;
 445             }
 446
 447           /* Omit the newline from the output buffer.  */
 448           pfile->out.cur = out - 1;
 449           pfile->buffer->cur = cur;
 450           if (builtin_macro_arg)
 451             goto done;
 452           pfile->buffer->need_line = true;
 453           CPP_INCREMENT_LINE (pfile, 0);
 454
 455           if ((lex_state == ls_fun_open || lex_state == ls_fun_close)
 456               && !pfile->state.in_directive
 457               && _cpp_get_fresh_line (pfile))
 458             {
 459               /* Newlines in arguments become a space, but we don't
 460                  clear any in-progress quote.  */
 461               if (lex_state == ls_fun_close)
 462                 out[-1] = ' ';
 463               cur = pfile->buffer->cur;
 464               continue;
 465             }
 466           goto done;
 467
 468         case '<':
 469           if (header_ok)
 470             quote = '>';
 471           break;
 472         case '>':
 473           if (c == quote)
 474             quote = 0;
 475           break;
 476
 477         case '"':
 478         case '\'':
 479           if (c == quote)
 480             quote = 0;
 481           else if (!quote)
 482             quote = c;
 483           break;
 484
 485         case '\\':
 486           /* Skip escaped quotes here, it's easier than above.  */
 487           if (*cur == '\\' || *cur == '"' || *cur == '\'')
 488             *out++ = *cur++;
 489           break;
 490
 491         case '/':
 492           /* Traditional CPP does not recognize comments within
 493              literals.  */
 494           if (!quote && *cur == '*')
 495             {
 496               pfile->out.cur = out;
 497               cur = copy_comment (pfile, cur, macro != 0);
 498               out = pfile->out.cur;
 499               continue;
 500             }
 501           break;
 502
 503         case '_':
 504         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 505         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 506         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 507         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 508         case 'y': case 'z':
 509         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 510         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 511         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 512         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 513         case 'Y': case 'Z':
 514           if (!pfile->state.skipping && (quote == 0 || macro))
 515             {
 516               cpp_hashnode *node;
 517               uchar *out_start = out - 1;
 518
 519               pfile->out.cur = out_start;
 520               node = lex_identifier (pfile, cur - 1);
 521               out = pfile->out.cur;
 522               cur = CUR (context);
 523
 524               if (cpp_macro_p (node)
 525                   /* Should we expand for ls_answer?  */
 526                   && (lex_state == ls_none || lex_state == ls_fun_open)
 527                   && !pfile->state.prevent_expansion)
 528                 {
 529                   /* Macros invalidate MI optimization.  */
 530                   pfile->mi_valid = false;
 531                   if (fun_like_macro (node))
 532                     {
 533                       maybe_start_funlike (pfile, node, out_start, &fmacro);
 534                       lex_state = ls_fun_open;
 535                       fmacro.line = pfile->line_table->highest_line;
 536                       continue;
 537                     }
 538                   else if (!recursive_macro (pfile, node))
 539                     {
 540                       /* Remove the object-like macro's name from the
 541                          output, and push its replacement text.  */
 542                       pfile->out.cur = out_start;
 543                       push_replacement_text (pfile, node);
 544                       lex_state = ls_none;
 545                       goto new_context;
 546                     }
 547                 }
 548               else if (macro && node->type == NT_MACRO_ARG)
 549                 {
 550                   /* Found a parameter in the replacement text of a
 551                      #define.  Remove its name from the output.  */
 552                   pfile->out.cur = out_start;
 553                   save_replacement_text (pfile, macro, node->value.arg_index);
 554                   out = pfile->out.base;
 555                 }
 556               else if (lex_state == ls_hash)
 557                 {
 558                   lex_state = ls_predicate;
 559                   continue;
 560                 }
 561               else if (pfile->state.in_expression
 562                        && node == pfile->spec_nodes.n_defined)
 563                 {
 564                   lex_state = ls_defined;
 565                   continue;
 566                 }
 567               else if (pfile->state.in_expression
 568                        && (node == pfile->spec_nodes.n__has_include__
 569                         || node == pfile->spec_nodes.n__has_include_next__))
 570                 {
 571                   lex_state = ls_has_include;
 572                   continue;
 573                 }
 574             }
 575           break;
 576
 577         case '(':
 578           if (quote == 0)
 579             {
 580               paren_depth++;
 581               if (lex_state == ls_fun_open)
 582                 {
 583                   if (recursive_macro (pfile, fmacro.node))
 584                     lex_state = ls_none;
 585                   else
 586                     {
 587                       lex_state = ls_fun_close;
 588                       paren_depth = 1;
 589                       out = pfile->out.base + fmacro.offset;
 590                       fmacro.args[0] = fmacro.offset;
 591                     }
 592                 }
 593               else if (lex_state == ls_predicate)
 594                 lex_state = ls_answer;
 595               else if (lex_state == ls_defined)
 596                 lex_state = ls_defined_close;
 597               else if (lex_state == ls_has_include)
 598                 lex_state = ls_has_include_close;
 599             }
 600           break;
 601
 602         case ',':
 603           if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1)
 604             save_argument (&fmacro, out - pfile->out.base);
 605           break;
 606
 607         case ')':
 608           if (quote == 0)
 609             {
 610               paren_depth--;
 611               if (lex_state == ls_fun_close && paren_depth == 0)
 612                 {
 613                   if (cpp_builtin_macro_p (fmacro.node))
 614                     {
 615                       /* Handle builtin function-like macros like
 616                          __has_attribute.  The already parsed arguments
 617                          are put into a buffer, which is then preprocessed
 618                          and the result is fed to _cpp_push_text_context
 619                          with disabled expansion, where the ISO preprocessor
 620                          parses it.  While in traditional preprocessing
 621                          macro arguments aren't immediately expanded, they in
 622                          the end are because the macro with replaced arguments
 623                          is preprocessed again.  For the builtin function-like
 624                          macros we need the argument immediately though,
 625                          if we don't preprocess them, they would behave
 626                          very differently from ISO preprocessor handling
 627                          of those builtin macros.  So, this handling is
 628                          more similar to traditional preprocessing of
 629                          #if directives, where we also keep preprocessing
 630                          until everything is expanded, and then feed the
 631                          result with disabled expansion to ISO preprocessor
 632                          for handling the directives.  */
 633                       lex_state = ls_none;
 634                       save_argument (&fmacro, out - pfile->out.base);
 635                       cpp_macro m;
 636                       memset (&m, '\0', sizeof (m));
 637                       m.paramc = fmacro.paramc;
 638                       if (_cpp_arguments_ok (pfile, &m, fmacro.node,
 639                                              fmacro.argc))
 640                         {
 641                           size_t len = fmacro.args[1] - fmacro.args[0];
 642                           uchar *buf;
 643
 644                           /* Remove the macro's invocation from the
 645                              output, and push its replacement text.  */
 646                           pfile->out.cur = pfile->out.base + fmacro.offset;
 647                           CUR (context) = cur;
 648                           buf = _cpp_unaligned_alloc (pfile, len + 2);
 649                           buf[0] = '(';
 650                           memcpy (buf + 1, pfile->out.base + fmacro.args[0],
 651                                   len);
 652                           buf[len + 1] = '\n';
 653
 654                           const unsigned char *ctx_rlimit = RLIMIT (context);
 655                           const unsigned char *saved_cur = pfile->buffer->cur;
 656                           const unsigned char *saved_rlimit
 657                             = pfile->buffer->rlimit;
 658                           const unsigned char *saved_line_base
 659                             = pfile->buffer->line_base;
 660                           bool saved_need_line = pfile->buffer->need_line;
 661                           cpp_buffer *saved_overlaid_buffer
 662                             = pfile->overlaid_buffer;
 663                           pfile->buffer->cur = buf;
 664                           pfile->buffer->line_base = buf;
 665                           pfile->buffer->rlimit = buf + len + 1;
 666                           pfile->buffer->need_line = false;
 667                           pfile->overlaid_buffer = pfile->buffer;
 668                           bool saved_in_directive = pfile->state.in_directive;
 669                           pfile->state.in_directive = true;
 670                           cpp_context *saved_prev_context = context->prev;
 671                           context->prev = NULL;
 672
 673                           _cpp_scan_out_logical_line (pfile, NULL, true);
 674
 675                           pfile->state.in_directive = saved_in_directive;
 676                           check_output_buffer (pfile, 1);
 677                           *pfile->out.cur = '\n';
 678                           pfile->buffer->cur = pfile->out.base + fmacro.offset;
 679                           pfile->buffer->line_base = pfile->buffer->cur;
 680                           pfile->buffer->rlimit = pfile->out.cur;
 681                           CUR (context) = pfile->buffer->cur;
 682                           RLIMIT (context) = pfile->buffer->rlimit;
 683
 684                           pfile->state.prevent_expansion++;
 685                           const uchar *text
 686                             = _cpp_builtin_macro_text (pfile, fmacro.node);
 687                           pfile->state.prevent_expansion--;
 688
 689                           context->prev = saved_prev_context;
 690                           pfile->buffer->cur = saved_cur;
 691                           pfile->buffer->rlimit = saved_rlimit;
 692                           pfile->buffer->line_base = saved_line_base;
 693                           pfile->buffer->need_line = saved_need_line;
 694                           pfile->overlaid_buffer = saved_overlaid_buffer;
 695                           pfile->out.cur = pfile->out.base + fmacro.offset;
 696                           CUR (context) = cur;
 697                           RLIMIT (context) = ctx_rlimit;
 698                           len = ustrlen (text);
 699                           buf = _cpp_unaligned_alloc (pfile, len + 1);
 700                           memcpy (buf, text, len);
 701                           buf[len] = '\n';
 702                           text = buf;
 703                           _cpp_push_text_context (pfile, fmacro.node,
 704                                                   text, len);
 705                           goto new_context;
 706                         }
 707                       break;
 708                     }
 709
 710                   cpp_macro *m = fmacro.node->value.macro;
 711
 712                   m->used = 1;
 713                   lex_state = ls_none;
 714                   save_argument (&fmacro, out - pfile->out.base);
 715
 716                   /* A single zero-length argument is no argument.  */
 717                   if (fmacro.argc == 1
 718                       && m->paramc == 0
 719                       && out == pfile->out.base + fmacro.offset + 1)
 720                     fmacro.argc = 0;
 721
 722                   if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
 723                     {
 724                       /* Remove the macro's invocation from the
 725                          output, and push its replacement text.  */
 726                       pfile->out.cur = pfile->out.base + fmacro.offset;
 727                       CUR (context) = cur;
 728                       replace_args_and_push (pfile, &fmacro);
 729                       goto new_context;
 730                     }
 731                 }
 732               else if (lex_state == ls_answer || lex_state == ls_defined_close
 733                         || lex_state == ls_has_include_close)
 734                 lex_state = ls_none;
 735             }
 736           break;
 737
 738         case '#':
 739           if (cur - 1 == start_of_input_line
 740               /* A '#' from a macro doesn't start a directive.  */
 741               && !pfile->context->prev
 742               && !pfile->state.in_directive)
 743             {
 744               /* A directive.  With the way _cpp_handle_directive
 745                  currently works, we only want to call it if either we
 746                  know the directive is OK, or we want it to fail and
 747                  be removed from the output.  If we want it to be
 748                  passed through (the assembler case) then we must not
 749                  call _cpp_handle_directive.  */
 750               pfile->out.cur = out;
 751               cur = skip_whitespace (pfile, cur, true /* skip_comments */);
 752               out = pfile->out.cur;
 753
 754               if (*cur == '\n')
 755                 {
 756                   /* Null directive.  Ignore it and don't invalidate
 757                      the MI optimization.  */
 758                   pfile->buffer->need_line = true;
 759                   CPP_INCREMENT_LINE (pfile, 0);
 760                   result = false;
 761                   goto done;
 762                 }
 763               else
 764                 {
 765                   bool do_it = false;
 766
 767                   if (is_numstart (*cur)
 768                       && CPP_OPTION (pfile, lang) != CLK_ASM)
 769                     do_it = true;
 770                   else if (is_idstart (*cur))
 771                     /* Check whether we know this directive, but don't
 772                        advance.  */
 773                     do_it = lex_identifier (pfile, cur)->is_directive;
 774
 775                   if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM)
 776                     {
 777                       /* This is a kludge.  We want to have the ISO
 778                          preprocessor lex the next token.  */
 779                       pfile->buffer->cur = cur;
 780                       _cpp_handle_directive (pfile, false /* indented */);
 781                       result = false;
 782                       goto done;
 783                     }
 784                 }
 785             }
 786
 787           if (pfile->state.in_expression)
 788             {
 789               lex_state = ls_hash;
 790               continue;
 791             }
 792           break;
 793
 794         default:
 795           break;
 796         }
 797
 798       /* Non-whitespace disables MI optimization and stops treating
 799          '<' as a quote in #include.  */
 800       header_ok = false;
 801       if (!pfile->state.in_directive)
 802         pfile->mi_valid = false;
 803
 804       if (lex_state == ls_none)
 805         continue;
 806
 807       /* Some of these transitions of state are syntax errors.  The
 808          ISO preprocessor will issue errors later.  */
 809       if (lex_state == ls_fun_open)
 810         /* Missing '('.  */
 811         lex_state = ls_none;
 812       else if (lex_state == ls_hash
 813                || lex_state == ls_predicate
 814                || lex_state == ls_defined
 815                || lex_state == ls_has_include)
 816         lex_state = ls_none;
 817
 818       /* ls_answer and ls_defined_close keep going until ')'.  */
 819     }
 820
 821  done:
 822   if (fmacro.buff)
 823     _cpp_release_buff (pfile, fmacro.buff);
 824
 825   if (lex_state == ls_fun_close)
 826     cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0,
 827                          "unterminated argument list invoking macro \"%s\"",
 828                          NODE_NAME (fmacro.node));
 829   return result;
 830 }
 831
 832 /* Push a context holding the replacement text of the macro NODE on
 833    the context stack.  NODE is either object-like, or a function-like
 834    macro with no arguments.  */
 835 static void
 836 push_replacement_text (cpp_reader *pfile, cpp_hashnode *node)
 837 {
 838   size_t len;
 839   const uchar *text;
 840   uchar *buf;
 841
 842   if (cpp_builtin_macro_p (node))
 843     {
 844       text = _cpp_builtin_macro_text (pfile, node);
 845       len = ustrlen (text);
 846       buf = _cpp_unaligned_alloc (pfile, len + 1);
 847       memcpy (buf, text, len);
 848       buf[len] = '\n';
 849       text = buf;
 850     }
 851   else
 852     {
 853       cpp_macro *macro = node->value.macro;
 854       macro->used = 1;
 855       text = macro->exp.text;
 856       len = macro->count;
 857     }
 858
 859   _cpp_push_text_context (pfile, node, text, len);
 860 }
 861
 862 /* Returns TRUE if traditional macro recursion is detected.  */
 863 static bool
 864 recursive_macro (cpp_reader *pfile, cpp_hashnode *node)
 865 {
 866   bool recursing = !!(node->flags & NODE_DISABLED);
 867
 868   /* Object-like macros that are already expanding are necessarily
 869      recursive.
 870
 871      However, it is possible to have traditional function-like macros
 872      that are not infinitely recursive but recurse to any given depth.
 873      Further, it is easy to construct examples that get ever longer
 874      until the point they stop recursing.  So there is no easy way to
 875      detect true recursion; instead we assume any expansion more than
 876      20 deep since the first invocation of this macro must be
 877      recursing.  */
 878   if (recursing && fun_like_macro (node))
 879     {
 880       size_t depth = 0;
 881       cpp_context *context = pfile->context;
 882
 883       do
 884         {
 885           depth++;
 886           if (context->c.macro == node && depth > 20)
 887             break;
 888           context = context->prev;
 889         }
 890       while (context);
 891       recursing = context != NULL;
 892     }
 893
 894   if (recursing)
 895     cpp_error (pfile, CPP_DL_ERROR,
 896                "detected recursion whilst expanding macro \"%s\"",
 897                NODE_NAME (node));
 898
 899   return recursing;
 900 }
 901
 902 /* Return the length of the replacement text of a function-like or
 903    object-like non-builtin macro.  */
 904 size_t
 905 _cpp_replacement_text_len (const cpp_macro *macro)
 906 {
 907   size_t len;
 908
 909   if (macro->fun_like && (macro->paramc != 0))
 910     {
 911       const uchar *exp;
 912
 913       len = 0;
 914       for (exp = macro->exp.text;;)
 915         {
 916           struct block *b = (struct block *) exp;
 917
 918           len += b->text_len;
 919           if (b->arg_index == 0)
 920             break;
 921           len += NODE_LEN (macro->parm.params[b->arg_index - 1]);
 922           exp += BLOCK_LEN (b->text_len);
 923         }
 924     }
 925   else
 926     len = macro->count;
 927
 928   return len;
 929 }
 930
 931 /* Copy the replacement text of MACRO to DEST, which must be of
 932    sufficient size.  It is not NUL-terminated.  The next character is
 933    returned.  */
 934 uchar *
 935 _cpp_copy_replacement_text (const cpp_macro *macro, uchar *dest)
 936 {
 937   if (macro->fun_like && (macro->paramc != 0))
 938     {
 939       const uchar *exp;
 940
 941       for (exp = macro->exp.text;;)
 942         {
 943           struct block *b = (struct block *) exp;
 944           cpp_hashnode *param;
 945
 946           memcpy (dest, b->text, b->text_len);
 947           dest += b->text_len;
 948           if (b->arg_index == 0)
 949             break;
 950           param = macro->parm.params[b->arg_index - 1];
 951           memcpy (dest, NODE_NAME (param), NODE_LEN (param));
 952           dest += NODE_LEN (param);
 953           exp += BLOCK_LEN (b->text_len);
 954         }
 955     }
 956   else
 957     {
 958       memcpy (dest, macro->exp.text, macro->count);
 959       dest += macro->count;
 960     }
 961
 962   return dest;
 963 }
 964
 965 /* Push a context holding the replacement text of the macro NODE on
 966    the context stack.  NODE is either object-like, or a function-like
 967    macro with no arguments.  */
 968 static void
 969 replace_args_and_push (cpp_reader *pfile, struct fun_macro *fmacro)
 970 {
 971   cpp_macro *macro = fmacro->node->value.macro;
 972
 973   if (macro->paramc == 0)
 974     push_replacement_text (pfile, fmacro->node);
 975   else
 976     {
 977       const uchar *exp;
 978       uchar *p;
 979       _cpp_buff *buff;
 980       size_t len = 0;
 981       int cxtquote = 0;
 982
 983       /* Get an estimate of the length of the argument-replaced text.
 984          This is a worst case estimate, assuming that every replacement
 985          text character needs quoting.  */
 986       for (exp = macro->exp.text;;)
 987         {
 988           struct block *b = (struct block *) exp;
 989
 990           len += b->text_len;
 991           if (b->arg_index == 0)
 992             break;
 993           len += 2 * (fmacro->args[b->arg_index]
 994                       - fmacro->args[b->arg_index - 1] - 1);
 995           exp += BLOCK_LEN (b->text_len);
 996         }
 997
 998       /* Allocate room for the expansion plus \n.  */
 999       buff = _cpp_get_buff (pfile, len + 1);
1000
1001       /* Copy the expansion and replace arguments.  */
1002       /* Accumulate actual length, including quoting as necessary */
1003       p = BUFF_FRONT (buff);
1004       len = 0;
1005       for (exp = macro->exp.text;;)
1006         {
1007           struct block *b = (struct block *) exp;
1008           size_t arglen;
1009           int argquote;
1010           uchar *base;
1011           uchar *in;
1012
1013           len += b->text_len;
1014           /* Copy the non-argument text literally, keeping
1015              track of whether matching quotes have been seen. */
1016           for (arglen = b->text_len, in = b->text; arglen > 0; arglen--)
1017             {
1018               if (*in == '"')
1019                 cxtquote = ! cxtquote;
1020               *p++ = *in++;
1021             }
1022           /* Done if no more arguments */
1023           if (b->arg_index == 0)
1024             break;
1025           arglen = (fmacro->args[b->arg_index]
1026                     - fmacro->args[b->arg_index - 1] - 1);
1027           base = pfile->out.base + fmacro->args[b->arg_index - 1];
1028           in = base;
1029 #if 0
1030           /* Skip leading whitespace in the text for the argument to
1031              be substituted. To be compatible with gcc 2.95, we would
1032              also need to trim trailing whitespace. Gcc 2.95 trims
1033              leading and trailing whitespace, which may be a bug.  The
1034              current gcc testsuite explicitly checks that this leading
1035              and trailing whitespace in actual arguments is
1036              preserved. */
1037           while (arglen > 0 && is_space (*in))
1038             {
1039               in++;
1040               arglen--;
1041             }
1042 #endif
1043           for (argquote = 0; arglen > 0; arglen--)
1044             {
1045               if (cxtquote && *in == '"')
1046                 {
1047                   if (in > base && *(in-1) != '\\')
1048                     argquote = ! argquote;
1049                   /* Always add backslash before double quote if argument
1050                      is expanded in a quoted context */
1051                   *p++ = '\\';
1052                   len++;
1053                 }
1054               else if (cxtquote && argquote && *in == '\\')
1055                 {
1056                   /* Always add backslash before a backslash in an argument
1057                      that is expanded in a quoted context and also in the
1058                      range of a quoted context in the argument itself. */
1059                   *p++ = '\\';
1060                   len++;
1061                 }
1062               *p++ = *in++;
1063               len++;
1064             }
1065           exp += BLOCK_LEN (b->text_len);
1066         }
1067
1068       /* \n-terminate.  */
1069       *p = '\n';
1070       _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
1071
1072       /* So we free buffer allocation when macro is left.  */
1073       pfile->context->buff = buff;
1074     }
1075 }
1076
1077 /* Read and record the parameters, if any, of a function-like macro
1078    definition.  Destroys pfile->out.cur.
1079
1080    Returns true on success, false on failure (syntax error or a
1081    duplicate parameter).  On success, CUR (pfile->context) is just
1082    past the closing parenthesis.  */
1083 static bool
1084 scan_parameters (cpp_reader *pfile, unsigned *n_ptr)
1085 {
1086   const uchar *cur = CUR (pfile->context) + 1;
1087   bool ok;
1088
1089   unsigned nparms = 0;
1090   for (;;)
1091     {
1092       cur = skip_whitespace (pfile, cur, true /* skip_comments */);
1093
1094       if (is_idstart (*cur))
1095         {
1096           struct cpp_hashnode *id = lex_identifier (pfile, cur);
1097           ok = false;
1098           if (!_cpp_save_parameter (pfile, nparms, id, id))
1099             break;
1100           nparms++;
1101           cur = skip_whitespace (pfile, CUR (pfile->context),
1102                                  true /* skip_comments */);
1103           if (*cur == ',')
1104             {
1105               cur++;
1106               continue;
1107             }
1108           ok = (*cur == ')');
1109           break;
1110         }
1111
1112       ok = (*cur == ')' && !nparms);
1113       break;
1114     }
1115
1116   *n_ptr = nparms;
1117
1118   if (!ok)
1119     cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list");
1120
1121   CUR (pfile->context) = cur + (*cur == ')');
1122
1123   return ok;
1124 }
1125
1126 /* Save the text from pfile->out.base to pfile->out.cur as
1127    the replacement text for the current macro, followed by argument
1128    ARG_INDEX, with zero indicating the end of the replacement
1129    text.  */
1130 static void
1131 save_replacement_text (cpp_reader *pfile, cpp_macro *macro,
1132                        unsigned int arg_index)
1133 {
1134   size_t len = pfile->out.cur - pfile->out.base;
1135   uchar *exp;
1136
1137   if (macro->paramc == 0)
1138     {
1139       /* Object-like and function-like macros without parameters
1140          simply store their \n-terminated replacement text.  */
1141       exp = _cpp_unaligned_alloc (pfile, len + 1);
1142       memcpy (exp, pfile->out.base, len);
1143       exp[len] = '\n';
1144       macro->exp.text = exp;
1145       macro->count = len;
1146     }
1147   else
1148     {
1149       /* Store the text's length (unsigned int), the argument index
1150          (unsigned short, base 1) and then the text.  */
1151       size_t blen = BLOCK_LEN (len);
1152       struct block *block;
1153
1154       if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
1155         _cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
1156
1157       exp = BUFF_FRONT (pfile->a_buff);
1158       block = (struct block *) (exp + macro->count);
1159       macro->exp.text = exp;
1160
1161       /* Write out the block information.  */
1162       block->text_len = len;
1163       block->arg_index = arg_index;
1164       memcpy (block->text, pfile->out.base, len);
1165
1166       /* Lex the rest into the start of the output buffer.  */
1167       pfile->out.cur = pfile->out.base;
1168
1169       macro->count += blen;
1170
1171       /* If we've finished, commit the memory.  */
1172       if (arg_index == 0)
1173         BUFF_FRONT (pfile->a_buff) += macro->count;
1174     }
1175 }
1176
1177 /* Analyze and save the replacement text of a macro.  Returns true on
1178    success.  */
1179 cpp_macro *
1180 _cpp_create_trad_definition (cpp_reader *pfile)
1181 {
1182   const uchar *cur;
1183   uchar *limit;
1184   cpp_context *context = pfile->context;
1185   unsigned nparms = 0;
1186   int fun_like = 0;
1187   cpp_hashnode **params = NULL;
1188
1189   /* The context has not been set up for command line defines, and CUR
1190      has not been updated for the macro name for in-file defines.  */
1191   pfile->out.cur = pfile->out.base;
1192   CUR (context) = pfile->buffer->cur;
1193   RLIMIT (context) = pfile->buffer->rlimit;
1194   check_output_buffer (pfile, RLIMIT (context) - CUR (context));
1195
1196   /* Is this a function-like macro?  */
1197   if (* CUR (context) == '(')
1198     {
1199       fun_like = +1;
1200       if (scan_parameters (pfile, &nparms))
1201         params = (cpp_hashnode **)_cpp_commit_buff
1202           (pfile, sizeof (cpp_hashnode *) * nparms);
1203       else
1204         fun_like = -1;
1205     }
1206
1207   cpp_macro *macro = NULL;
1208
1209   if (fun_like >= 0)
1210     {
1211       macro = _cpp_new_macro (pfile, cmk_traditional,
1212                               _cpp_aligned_alloc (pfile, sizeof (cpp_macro)));
1213       macro->parm.params = params;
1214       macro->paramc = nparms;
1215       macro->fun_like = fun_like != 0;
1216     }
1217
1218   /* Skip leading whitespace in the replacement text.  */
1219   pfile->buffer->cur
1220     = skip_whitespace (pfile, CUR (context),
1221                        CPP_OPTION (pfile, discard_comments_in_macro_exp));
1222
1223   pfile->state.prevent_expansion++;
1224   _cpp_scan_out_logical_line (pfile, macro, false);
1225   pfile->state.prevent_expansion--;
1226
1227   _cpp_unsave_parameters (pfile, nparms);
1228
1229   if (macro)
1230     {
1231       /* Skip trailing white space.  */
1232       cur = pfile->out.base;
1233       limit = pfile->out.cur;
1234       while (limit > cur && is_space (limit[-1]))
1235         limit--;
1236       pfile->out.cur = limit;
1237       save_replacement_text (pfile, macro, 0);
1238     }
1239
1240   return macro;
1241 }
1242
1243 /* Copy SRC of length LEN to DEST, but convert all contiguous
1244    whitespace to a single space, provided it is not in quotes.  The
1245    quote currently in effect is pointed to by PQUOTE, and is updated
1246    by the function.  Returns the number of bytes copied.  */
1247 static size_t
1248 canonicalize_text (uchar *dest, const uchar *src, size_t len, uchar *pquote)
1249 {
1250   uchar *orig_dest = dest;
1251   uchar quote = *pquote;
1252
1253   while (len)
1254     {
1255       if (is_space (*src) && !quote)
1256         {
1257           do
1258             src++, len--;
1259           while (len && is_space (*src));
1260           *dest++ = ' ';
1261         }
1262       else
1263         {
1264           if (*src == '\'' || *src == '"')
1265             {
1266               if (!quote)
1267                 quote = *src;
1268               else if (quote == *src)
1269                 quote = 0;
1270             }
1271           *dest++ = *src++, len--;
1272         }
1273     }
1274
1275   *pquote = quote;
1276   return dest - orig_dest;
1277 }
1278
1279 /* Returns true if MACRO1 and MACRO2 have expansions different other
1280    than in the form of their whitespace.  */
1281 bool
1282 _cpp_expansions_different_trad (const cpp_macro *macro1,
1283                                 const cpp_macro *macro2)
1284 {
1285   uchar *p1 = XNEWVEC (uchar, macro1->count + macro2->count);
1286   uchar *p2 = p1 + macro1->count;
1287   uchar quote1 = 0, quote2 = 0;
1288   bool mismatch;
1289   size_t len1, len2;
1290
1291   if (macro1->paramc > 0)
1292     {
1293       const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
1294
1295       mismatch = true;
1296       for (;;)
1297         {
1298           struct block *b1 = (struct block *) exp1;
1299           struct block *b2 = (struct block *) exp2;
1300
1301           if (b1->arg_index != b2->arg_index)
1302             break;
1303
1304           len1 = canonicalize_text (p1, b1->text, b1->text_len, &quote1);
1305           len2 = canonicalize_text (p2, b2->text, b2->text_len, &quote2);
1306           if (len1 != len2 || memcmp (p1, p2, len1))
1307             break;
1308           if (b1->arg_index == 0)
1309             {
1310               mismatch = false;
1311               break;
1312             }
1313           exp1 += BLOCK_LEN (b1->text_len);
1314           exp2 += BLOCK_LEN (b2->text_len);
1315         }
1316     }
1317   else
1318     {
1319       len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, &quote1);
1320       len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, &quote2);
1321       mismatch = (len1 != len2 || memcmp (p1, p2, len1));
1322     }
1323
1324   free (p1);
1325   return mismatch;
1326 }