Avoid unnecessary dependencies on COND_EXEC insns.
[official-gcc.git] / gcc / cpplex.c
blob1dc401e8f08204dedbcef8169f4ac22d2728367b
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
37 #include "config.h"
38 #include "system.h"
39 #include "cpplib.h"
40 #include "cpphash.h"
41 #include "symcat.h"
43 /* Tokens with SPELL_STRING store their spelling in the token list,
44 and it's length in the token->val.name.len. */
45 enum spell_type
47 SPELL_OPERATOR = 0,
48 SPELL_CHAR,
49 SPELL_IDENT,
50 SPELL_STRING,
51 SPELL_NONE
54 struct token_spelling
56 enum spell_type category;
57 const unsigned char *name;
60 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
61 U":>", U"<%", U"%>"};
63 #define OP(e, s) { SPELL_OPERATOR, U s },
64 #define TK(e, s) { s, U STRINGX (e) },
65 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
66 #undef OP
67 #undef TK
69 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
70 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
72 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
74 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
76 static int skip_block_comment PARAMS ((cpp_reader *));
77 static int skip_line_comment PARAMS ((cpp_reader *));
78 static void adjust_column PARAMS ((cpp_reader *));
79 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
80 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
81 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
82 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
83 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
84 static void unterminated PARAMS ((cpp_reader *, int));
85 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
87 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
88 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
89 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
91 static cpp_chunk *new_chunk PARAMS ((unsigned int));
92 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
94 /* Utility routine:
96 Compares, the token TOKEN to the NUL-terminated string STRING.
97 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
99 int
100 cpp_ideq (token, string)
101 const cpp_token *token;
102 const char *string;
104 if (token->type != CPP_NAME)
105 return 0;
107 return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
110 /* Call when meeting a newline. Returns the character after the newline
111 (or carriage-return newline combination), or EOF. */
112 static cppchar_t
113 handle_newline (buffer, newline_char)
114 cpp_buffer *buffer;
115 cppchar_t newline_char;
117 cppchar_t next = EOF;
119 buffer->col_adjust = 0;
120 buffer->lineno++;
121 buffer->line_base = buffer->cur;
123 /* Handle CR-LF and LF-CR combinations, get the next character. */
124 if (buffer->cur < buffer->rlimit)
126 next = *buffer->cur++;
127 if (next + newline_char == '\r' + '\n')
129 buffer->line_base = buffer->cur;
130 if (buffer->cur < buffer->rlimit)
131 next = *buffer->cur++;
132 else
133 next = EOF;
137 buffer->read_ahead = next;
138 return next;
141 /* Subroutine of skip_escaped_newlines; called when a trigraph is
142 encountered. It warns if necessary, and returns true if the
143 trigraph should be honoured. FROM_CHAR is the third character of a
144 trigraph, and presumed to be the previous character for position
145 reporting. */
146 static int
147 trigraph_ok (pfile, from_char)
148 cpp_reader *pfile;
149 cppchar_t from_char;
151 int accept = CPP_OPTION (pfile, trigraphs);
153 /* Don't warn about trigraphs in comments. */
154 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
156 cpp_buffer *buffer = pfile->buffer;
157 if (accept)
158 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
159 "trigraph ??%c converted to %c",
160 (int) from_char,
161 (int) _cpp_trigraph_map[from_char]);
162 else if (buffer->cur != buffer->last_Wtrigraphs)
164 buffer->last_Wtrigraphs = buffer->cur;
165 cpp_warning_with_line (pfile, buffer->lineno,
166 CPP_BUF_COL (buffer) - 2,
167 "trigraph ??%c ignored", (int) from_char);
171 return accept;
174 /* Assumes local variables buffer and result. */
175 #define ACCEPT_CHAR(t) \
176 do { result->type = t; buffer->read_ahead = EOF; } while (0)
178 /* When we move to multibyte character sets, add to these something
179 that saves and restores the state of the multibyte conversion
180 library. This probably involves saving and restoring a "cookie".
181 In the case of glibc it is an 8-byte structure, so is not a high
182 overhead operation. In any case, it's out of the fast path. */
183 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
184 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
186 /* Skips any escaped newlines introduced by NEXT, which is either a
187 '?' or a '\\'. Returns the next character, which will also have
188 been placed in buffer->read_ahead. This routine performs
189 preprocessing stages 1 and 2 of the ISO C standard. */
190 static cppchar_t
191 skip_escaped_newlines (buffer, next)
192 cpp_buffer *buffer;
193 cppchar_t next;
195 /* Only do this if we apply stages 1 and 2. */
196 if (!buffer->from_stage3)
198 cppchar_t next1;
199 const unsigned char *saved_cur;
200 int space;
204 if (buffer->cur == buffer->rlimit)
205 break;
207 SAVE_STATE ();
208 if (next == '?')
210 next1 = *buffer->cur++;
211 if (next1 != '?' || buffer->cur == buffer->rlimit)
213 RESTORE_STATE ();
214 break;
217 next1 = *buffer->cur++;
218 if (!_cpp_trigraph_map[next1]
219 || !trigraph_ok (buffer->pfile, next1))
221 RESTORE_STATE ();
222 break;
225 /* We have a full trigraph here. */
226 next = _cpp_trigraph_map[next1];
227 if (next != '\\' || buffer->cur == buffer->rlimit)
228 break;
229 SAVE_STATE ();
232 /* We have a backslash, and room for at least one more character. */
233 space = 0;
236 next1 = *buffer->cur++;
237 if (!is_nvspace (next1))
238 break;
239 space = 1;
241 while (buffer->cur < buffer->rlimit);
243 if (!is_vspace (next1))
245 RESTORE_STATE ();
246 break;
249 if (space && !buffer->pfile->state.lexing_comment)
250 cpp_warning (buffer->pfile,
251 "backslash and newline separated by space");
253 next = handle_newline (buffer, next1);
254 if (next == EOF)
255 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
257 while (next == '\\' || next == '?');
260 buffer->read_ahead = next;
261 return next;
264 /* Obtain the next character, after trigraph conversion and skipping
265 an arbitrary string of escaped newlines. The common case of no
266 trigraphs or escaped newlines falls through quickly. */
267 static cppchar_t
268 get_effective_char (buffer)
269 cpp_buffer *buffer;
271 cppchar_t next = EOF;
273 if (buffer->cur < buffer->rlimit)
275 next = *buffer->cur++;
277 /* '?' can introduce trigraphs (and therefore backslash); '\\'
278 can introduce escaped newlines, which we want to skip, or
279 UCNs, which, depending upon lexer state, we will handle in
280 the future. */
281 if (next == '?' || next == '\\')
282 next = skip_escaped_newlines (buffer, next);
285 buffer->read_ahead = next;
286 return next;
289 /* Skip a C-style block comment. We find the end of the comment by
290 seeing if an asterisk is before every '/' we encounter. Returns
291 non-zero if comment terminated by EOF, zero otherwise. */
292 static int
293 skip_block_comment (pfile)
294 cpp_reader *pfile;
296 cpp_buffer *buffer = pfile->buffer;
297 cppchar_t c = EOF, prevc = EOF;
299 pfile->state.lexing_comment = 1;
300 while (buffer->cur != buffer->rlimit)
302 prevc = c, c = *buffer->cur++;
304 next_char:
305 /* FIXME: For speed, create a new character class of characters
306 of interest inside block comments. */
307 if (c == '?' || c == '\\')
308 c = skip_escaped_newlines (buffer, c);
310 /* People like decorating comments with '*', so check for '/'
311 instead for efficiency. */
312 if (c == '/')
314 if (prevc == '*')
315 break;
317 /* Warn about potential nested comments, but not if the '/'
318 comes immediately before the true comment delimeter.
319 Don't bother to get it right across escaped newlines. */
320 if (CPP_OPTION (pfile, warn_comments)
321 && buffer->cur != buffer->rlimit)
323 prevc = c, c = *buffer->cur++;
324 if (c == '*' && buffer->cur != buffer->rlimit)
326 prevc = c, c = *buffer->cur++;
327 if (c != '/')
328 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
329 CPP_BUF_COL (buffer),
330 "\"/*\" within comment");
332 goto next_char;
335 else if (is_vspace (c))
337 prevc = c, c = handle_newline (buffer, c);
338 goto next_char;
340 else if (c == '\t')
341 adjust_column (pfile);
344 pfile->state.lexing_comment = 0;
345 buffer->read_ahead = EOF;
346 return c != '/' || prevc != '*';
349 /* Skip a C++ line comment. Handles escaped newlines. Returns
350 non-zero if a multiline comment. The following new line, if any,
351 is left in buffer->read_ahead. */
352 static int
353 skip_line_comment (pfile)
354 cpp_reader *pfile;
356 cpp_buffer *buffer = pfile->buffer;
357 unsigned int orig_lineno = buffer->lineno;
358 cppchar_t c;
360 pfile->state.lexing_comment = 1;
363 c = EOF;
364 if (buffer->cur == buffer->rlimit)
365 break;
367 c = *buffer->cur++;
368 if (c == '?' || c == '\\')
369 c = skip_escaped_newlines (buffer, c);
371 while (!is_vspace (c));
373 pfile->state.lexing_comment = 0;
374 buffer->read_ahead = c; /* Leave any newline for caller. */
375 return orig_lineno != buffer->lineno;
378 /* pfile->buffer->cur is one beyond the \t character. Update
379 col_adjust so we track the column correctly. */
380 static void
381 adjust_column (pfile)
382 cpp_reader *pfile;
384 cpp_buffer *buffer = pfile->buffer;
385 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
387 /* Round it up to multiple of the tabstop, but subtract 1 since the
388 tab itself occupies a character position. */
389 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
390 - col % CPP_OPTION (pfile, tabstop)) - 1;
393 /* Skips whitespace, saving the next non-whitespace character.
394 Adjusts pfile->col_adjust to account for tabs. Without this,
395 tokens might be assigned an incorrect column. */
396 static void
397 skip_whitespace (pfile, c)
398 cpp_reader *pfile;
399 cppchar_t c;
401 cpp_buffer *buffer = pfile->buffer;
402 unsigned int warned = 0;
406 /* Horizontal space always OK. */
407 if (c == ' ')
409 else if (c == '\t')
410 adjust_column (pfile);
411 /* Just \f \v or \0 left. */
412 else if (c == '\0')
414 if (!warned)
416 cpp_warning (pfile, "null character(s) ignored");
417 warned = 1;
420 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
421 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
422 CPP_BUF_COL (buffer),
423 "%s in preprocessing directive",
424 c == '\f' ? "form feed" : "vertical tab");
426 c = EOF;
427 if (buffer->cur == buffer->rlimit)
428 break;
429 c = *buffer->cur++;
431 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
432 while (is_nvspace (c));
434 /* Remember the next character. */
435 buffer->read_ahead = c;
438 /* See if the characters of a number token are valid in a name (no
439 '.', '+' or '-'). */
440 static int
441 name_p (pfile, string)
442 cpp_reader *pfile;
443 const cpp_string *string;
445 unsigned int i;
447 for (i = 0; i < string->len; i++)
448 if (!is_idchar (string->text[i]))
449 return 0;
451 return 1;
454 /* Parse an identifier, skipping embedded backslash-newlines.
455 Calculate the hash value of the token while parsing, for improved
456 performance. The hashing algorithm *must* match cpp_lookup(). */
458 static cpp_hashnode *
459 parse_identifier (pfile, c)
460 cpp_reader *pfile;
461 cppchar_t c;
463 cpp_hashnode *result;
464 cpp_buffer *buffer = pfile->buffer;
465 unsigned char *dest, *limit;
466 unsigned int r = 0, saw_dollar = 0;
468 dest = POOL_FRONT (&pfile->ident_pool);
469 limit = POOL_LIMIT (&pfile->ident_pool);
475 /* Need room for terminating null. */
476 if (dest + 1 >= limit)
477 limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
479 *dest++ = c;
480 r = HASHSTEP (r, c);
482 if (c == '$')
483 saw_dollar++;
485 c = EOF;
486 if (buffer->cur == buffer->rlimit)
487 break;
489 c = *buffer->cur++;
491 while (is_idchar (c));
493 /* Potential escaped newline? */
494 if (c != '?' && c != '\\')
495 break;
496 c = skip_escaped_newlines (buffer, c);
498 while (is_idchar (c));
500 /* Remember the next character. */
501 buffer->read_ahead = c;
503 /* $ is not a identifier character in the standard, but is commonly
504 accepted as an extension. Don't warn about it in skipped
505 conditional blocks. */
506 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
507 cpp_pedwarn (pfile, "'$' character(s) in identifier");
509 /* Identifiers are null-terminated. */
510 *dest = '\0';
512 /* This routine commits the memory if necessary. */
513 result = _cpp_lookup_with_hash (pfile,
514 dest - POOL_FRONT (&pfile->ident_pool), r);
516 /* Some identifiers require diagnostics when lexed. */
517 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
519 /* It is allowed to poison the same identifier twice. */
520 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
521 cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
523 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
524 replacement list of a variadic macro. */
525 if (result == pfile->spec_nodes.n__VA_ARGS__
526 && !pfile->state.va_args_ok)
527 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
530 return result;
533 /* Parse a number, skipping embedded backslash-newlines. */
534 static void
535 parse_number (pfile, number, c, leading_period)
536 cpp_reader *pfile;
537 cpp_string *number;
538 cppchar_t c;
539 int leading_period;
541 cpp_buffer *buffer = pfile->buffer;
542 cpp_pool *pool = &pfile->ident_pool;
543 unsigned char *dest, *limit;
545 dest = POOL_FRONT (pool);
546 limit = POOL_LIMIT (pool);
548 /* Place a leading period. */
549 if (leading_period)
551 if (dest >= limit)
552 limit = _cpp_next_chunk (pool, 0, &dest);
553 *dest++ = '.';
560 /* Need room for terminating null. */
561 if (dest + 1 >= limit)
562 limit = _cpp_next_chunk (pool, 0, &dest);
563 *dest++ = c;
565 c = EOF;
566 if (buffer->cur == buffer->rlimit)
567 break;
569 c = *buffer->cur++;
571 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
573 /* Potential escaped newline? */
574 if (c != '?' && c != '\\')
575 break;
576 c = skip_escaped_newlines (buffer, c);
578 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
580 /* Remember the next character. */
581 buffer->read_ahead = c;
583 /* Null-terminate the number. */
584 *dest = '\0';
586 number->text = POOL_FRONT (pool);
587 number->len = dest - number->text;
588 POOL_COMMIT (pool, number->len + 1);
591 /* Subroutine of parse_string. Emits error for unterminated strings. */
592 static void
593 unterminated (pfile, term)
594 cpp_reader *pfile;
595 int term;
597 cpp_error (pfile, "missing terminating %c character", term);
599 if (term == '\"' && pfile->mlstring_pos.line
600 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
602 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
603 pfile->mlstring_pos.col,
604 "possible start of unterminated string literal");
605 pfile->mlstring_pos.line = 0;
609 /* Subroutine of parse_string. */
610 static int
611 unescaped_terminator_p (pfile, dest)
612 cpp_reader *pfile;
613 const unsigned char *dest;
615 const unsigned char *start, *temp;
617 /* In #include-style directives, terminators are not escapeable. */
618 if (pfile->state.angled_headers)
619 return 1;
621 start = POOL_FRONT (&pfile->ident_pool);
623 /* An odd number of consecutive backslashes represents an escaped
624 terminator. */
625 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
628 return ((dest - temp) & 1) == 0;
631 /* Parses a string, character constant, or angle-bracketed header file
632 name. Handles embedded trigraphs and escaped newlines.
634 Multi-line strings are allowed, but they are deprecated within
635 directives. */
636 static void
637 parse_string (pfile, token, terminator)
638 cpp_reader *pfile;
639 cpp_token *token;
640 cppchar_t terminator;
642 cpp_buffer *buffer = pfile->buffer;
643 cpp_pool *pool = &pfile->ident_pool;
644 unsigned char *dest, *limit;
645 cppchar_t c;
646 unsigned int nulls = 0;
648 dest = POOL_FRONT (pool);
649 limit = POOL_LIMIT (pool);
651 for (;;)
653 if (buffer->cur == buffer->rlimit)
655 c = EOF;
656 unterminated (pfile, terminator);
657 break;
659 c = *buffer->cur++;
661 have_char:
662 /* Handle trigraphs, escaped newlines etc. */
663 if (c == '?' || c == '\\')
664 c = skip_escaped_newlines (buffer, c);
666 if (c == terminator && unescaped_terminator_p (pfile, dest))
668 c = EOF;
669 break;
671 else if (is_vspace (c))
673 /* In assembly language, silently terminate string and
674 character literals at end of line. This is a kludge
675 around not knowing where comments are. */
676 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
677 break;
679 /* Character constants and header names may not extend over
680 multiple lines. In Standard C, neither may strings.
681 Unfortunately, we accept multiline strings as an
682 extension, except in #include family directives. */
683 if (terminator != '"' || pfile->state.angled_headers)
685 unterminated (pfile, terminator);
686 break;
689 if (pfile->mlstring_pos.line == 0)
691 pfile->mlstring_pos = pfile->lexer_pos;
692 if (CPP_PEDANTIC (pfile))
693 cpp_pedwarn (pfile, "multi-line string constant");
696 handle_newline (buffer, c); /* Stores to read_ahead. */
697 c = '\n';
699 else if (c == '\0')
701 if (nulls++ == 0)
702 cpp_warning (pfile, "null character(s) preserved in literal");
705 /* No terminating null for strings - they could contain nulls. */
706 if (dest >= limit)
707 limit = _cpp_next_chunk (pool, 0, &dest);
708 *dest++ = c;
710 /* If we had a new line, the next character is in read_ahead. */
711 if (c != '\n')
712 continue;
713 c = buffer->read_ahead;
714 if (c != EOF)
715 goto have_char;
718 /* Remember the next character. */
719 buffer->read_ahead = c;
721 token->val.str.text = POOL_FRONT (pool);
722 token->val.str.len = dest - token->val.str.text;
723 POOL_COMMIT (pool, token->val.str.len);
726 /* The stored comment includes the comment start and any terminator. */
727 static void
728 save_comment (pfile, token, from)
729 cpp_reader *pfile;
730 cpp_token *token;
731 const unsigned char *from;
733 unsigned char *buffer;
734 unsigned int len;
736 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
737 /* C++ comments probably (not definitely) have moved past a new
738 line, which we don't want to save in the comment. */
739 if (pfile->buffer->read_ahead != EOF)
740 len--;
741 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
743 token->type = CPP_COMMENT;
744 token->val.str.len = len;
745 token->val.str.text = buffer;
747 buffer[0] = '/';
748 memcpy (buffer + 1, from, len - 1);
751 /* Subroutine of lex_token to handle '%'. A little tricky, since we
752 want to avoid stepping back when lexing %:%X. */
753 static void
754 lex_percent (buffer, result)
755 cpp_buffer *buffer;
756 cpp_token *result;
758 cppchar_t c;
760 result->type = CPP_MOD;
761 /* Parsing %:%X could leave an extra character. */
762 if (buffer->extra_char == EOF)
763 c = get_effective_char (buffer);
764 else
766 c = buffer->read_ahead = buffer->extra_char;
767 buffer->extra_char = EOF;
770 if (c == '=')
771 ACCEPT_CHAR (CPP_MOD_EQ);
772 else if (CPP_OPTION (buffer->pfile, digraphs))
774 if (c == ':')
776 result->flags |= DIGRAPH;
777 ACCEPT_CHAR (CPP_HASH);
778 if (get_effective_char (buffer) == '%')
780 buffer->extra_char = get_effective_char (buffer);
781 if (buffer->extra_char == ':')
783 buffer->extra_char = EOF;
784 ACCEPT_CHAR (CPP_PASTE);
786 else
787 /* We'll catch the extra_char when we're called back. */
788 buffer->read_ahead = '%';
791 else if (c == '>')
793 result->flags |= DIGRAPH;
794 ACCEPT_CHAR (CPP_CLOSE_BRACE);
799 /* Subroutine of lex_token to handle '.'. This is tricky, since we
800 want to avoid stepping back when lexing '...' or '.123'. In the
801 latter case we should also set a flag for parse_number. */
802 static void
803 lex_dot (pfile, result)
804 cpp_reader *pfile;
805 cpp_token *result;
807 cpp_buffer *buffer = pfile->buffer;
808 cppchar_t c;
810 /* Parsing ..X could leave an extra character. */
811 if (buffer->extra_char == EOF)
812 c = get_effective_char (buffer);
813 else
815 c = buffer->read_ahead = buffer->extra_char;
816 buffer->extra_char = EOF;
819 /* All known character sets have 0...9 contiguous. */
820 if (c >= '0' && c <= '9')
822 result->type = CPP_NUMBER;
823 parse_number (pfile, &result->val.str, c, 1);
825 else
827 result->type = CPP_DOT;
828 if (c == '.')
830 buffer->extra_char = get_effective_char (buffer);
831 if (buffer->extra_char == '.')
833 buffer->extra_char = EOF;
834 ACCEPT_CHAR (CPP_ELLIPSIS);
836 else
837 /* We'll catch the extra_char when we're called back. */
838 buffer->read_ahead = '.';
840 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
841 ACCEPT_CHAR (CPP_DOT_STAR);
845 void
846 _cpp_lex_token (pfile, result)
847 cpp_reader *pfile;
848 cpp_token *result;
850 cppchar_t c;
851 cpp_buffer *buffer;
852 const unsigned char *comment_start;
853 unsigned char bol;
855 skip:
856 bol = pfile->state.next_bol;
857 done_directive:
858 buffer = pfile->buffer;
859 pfile->state.next_bol = 0;
860 result->flags = 0;
861 next_char:
862 pfile->lexer_pos.line = buffer->lineno;
863 next_char2:
864 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
866 c = buffer->read_ahead;
867 if (c == EOF && buffer->cur < buffer->rlimit)
869 c = *buffer->cur++;
870 pfile->lexer_pos.col++;
873 do_switch:
874 buffer->read_ahead = EOF;
875 switch (c)
877 case EOF:
878 /* Non-empty files should end in a newline. Ignore for command
879 line and _Pragma buffers. */
880 if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
881 cpp_pedwarn (pfile, "no newline at end of file");
882 pfile->state.next_bol = 1;
883 pfile->skipping = 0; /* In case missing #endif. */
884 result->type = CPP_EOF;
885 /* Don't do MI optimisation. */
886 return;
888 case ' ': case '\t': case '\f': case '\v': case '\0':
889 skip_whitespace (pfile, c);
890 result->flags |= PREV_WHITE;
891 goto next_char2;
893 case '\n': case '\r':
894 if (!pfile->state.in_directive)
896 handle_newline (buffer, c);
897 bol = 1;
898 pfile->lexer_pos.output_line = buffer->lineno;
899 /* This is a new line, so clear any white space flag.
900 Newlines in arguments are white space (6.10.3.10);
901 parse_arg takes care of that. */
902 result->flags &= ~PREV_WHITE;
903 goto next_char;
906 /* Don't let directives spill over to the next line. */
907 buffer->read_ahead = c;
908 pfile->state.next_bol = 1;
909 result->type = CPP_EOF;
910 /* Don't break; pfile->skipping might be true. */
911 return;
913 case '?':
914 case '\\':
915 /* These could start an escaped newline, or '?' a trigraph. Let
916 skip_escaped_newlines do all the work. */
918 unsigned int lineno = buffer->lineno;
920 c = skip_escaped_newlines (buffer, c);
921 if (lineno != buffer->lineno)
922 /* We had at least one escaped newline of some sort, and the
923 next character is in buffer->read_ahead. Update the
924 token's line and column. */
925 goto next_char;
927 /* We are either the original '?' or '\\', or a trigraph. */
928 result->type = CPP_QUERY;
929 buffer->read_ahead = EOF;
930 if (c == '\\')
931 goto random_char;
932 else if (c != '?')
933 goto do_switch;
935 break;
937 case '0': case '1': case '2': case '3': case '4':
938 case '5': case '6': case '7': case '8': case '9':
939 result->type = CPP_NUMBER;
940 parse_number (pfile, &result->val.str, c, 0);
941 break;
943 case '$':
944 if (!CPP_OPTION (pfile, dollars_in_ident))
945 goto random_char;
946 /* Fall through... */
948 case '_':
949 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
950 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
951 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
952 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
953 case 'y': case 'z':
954 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
955 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
956 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
957 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
958 case 'Y': case 'Z':
959 result->type = CPP_NAME;
960 result->val.node = parse_identifier (pfile, c);
962 /* 'L' may introduce wide characters or strings. */
963 if (result->val.node == pfile->spec_nodes.n_L)
965 c = buffer->read_ahead; /* For make_string. */
966 if (c == '\'' || c == '"')
968 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
969 goto make_string;
972 /* Convert named operators to their proper types. */
973 else if (result->val.node->flags & NODE_OPERATOR)
975 result->flags |= NAMED_OP;
976 result->type = result->val.node->value.operator;
978 break;
980 case '\'':
981 case '"':
982 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
983 make_string:
984 parse_string (pfile, result, c);
985 break;
987 case '/':
988 /* A potential block or line comment. */
989 comment_start = buffer->cur;
990 result->type = CPP_DIV;
991 c = get_effective_char (buffer);
992 if (c == '=')
993 ACCEPT_CHAR (CPP_DIV_EQ);
994 if (c != '/' && c != '*')
995 break;
997 if (c == '*')
999 if (skip_block_comment (pfile))
1000 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1001 pfile->lexer_pos.col,
1002 "unterminated comment");
1004 else
1006 if (!CPP_OPTION (pfile, cplusplus_comments)
1007 && !CPP_IN_SYSTEM_HEADER (pfile))
1008 break;
1010 /* Warn about comments only if pedantically GNUC89, and not
1011 in system headers. */
1012 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1013 && ! buffer->warned_cplusplus_comments)
1015 cpp_pedwarn (pfile,
1016 "C++ style comments are not allowed in ISO C89");
1017 cpp_pedwarn (pfile,
1018 "(this will be reported only once per input file)");
1019 buffer->warned_cplusplus_comments = 1;
1022 /* Skip_line_comment updates buffer->read_ahead. */
1023 if (skip_line_comment (pfile))
1024 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1025 pfile->lexer_pos.col,
1026 "multi-line comment");
1029 /* Skipping the comment has updated buffer->read_ahead. */
1030 if (!pfile->state.save_comments)
1032 result->flags |= PREV_WHITE;
1033 goto next_char;
1036 /* Save the comment as a token in its own right. */
1037 save_comment (pfile, result, comment_start);
1038 /* Don't do MI optimisation. */
1039 return;
1041 case '<':
1042 if (pfile->state.angled_headers)
1044 result->type = CPP_HEADER_NAME;
1045 c = '>'; /* terminator. */
1046 goto make_string;
1049 result->type = CPP_LESS;
1050 c = get_effective_char (buffer);
1051 if (c == '=')
1052 ACCEPT_CHAR (CPP_LESS_EQ);
1053 else if (c == '<')
1055 ACCEPT_CHAR (CPP_LSHIFT);
1056 if (get_effective_char (buffer) == '=')
1057 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1059 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1061 ACCEPT_CHAR (CPP_MIN);
1062 if (get_effective_char (buffer) == '=')
1063 ACCEPT_CHAR (CPP_MIN_EQ);
1065 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1067 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1068 result->flags |= DIGRAPH;
1070 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1072 ACCEPT_CHAR (CPP_OPEN_BRACE);
1073 result->flags |= DIGRAPH;
1075 break;
1077 case '>':
1078 result->type = CPP_GREATER;
1079 c = get_effective_char (buffer);
1080 if (c == '=')
1081 ACCEPT_CHAR (CPP_GREATER_EQ);
1082 else if (c == '>')
1084 ACCEPT_CHAR (CPP_RSHIFT);
1085 if (get_effective_char (buffer) == '=')
1086 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1088 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1090 ACCEPT_CHAR (CPP_MAX);
1091 if (get_effective_char (buffer) == '=')
1092 ACCEPT_CHAR (CPP_MAX_EQ);
1094 break;
1096 case '%':
1097 lex_percent (buffer, result);
1098 if (result->type == CPP_HASH)
1099 goto do_hash;
1100 break;
1102 case '.':
1103 lex_dot (pfile, result);
1104 break;
1106 case '+':
1107 result->type = CPP_PLUS;
1108 c = get_effective_char (buffer);
1109 if (c == '=')
1110 ACCEPT_CHAR (CPP_PLUS_EQ);
1111 else if (c == '+')
1112 ACCEPT_CHAR (CPP_PLUS_PLUS);
1113 break;
1115 case '-':
1116 result->type = CPP_MINUS;
1117 c = get_effective_char (buffer);
1118 if (c == '>')
1120 ACCEPT_CHAR (CPP_DEREF);
1121 if (CPP_OPTION (pfile, cplusplus)
1122 && get_effective_char (buffer) == '*')
1123 ACCEPT_CHAR (CPP_DEREF_STAR);
1125 else if (c == '=')
1126 ACCEPT_CHAR (CPP_MINUS_EQ);
1127 else if (c == '-')
1128 ACCEPT_CHAR (CPP_MINUS_MINUS);
1129 break;
1131 case '*':
1132 result->type = CPP_MULT;
1133 if (get_effective_char (buffer) == '=')
1134 ACCEPT_CHAR (CPP_MULT_EQ);
1135 break;
1137 case '=':
1138 result->type = CPP_EQ;
1139 if (get_effective_char (buffer) == '=')
1140 ACCEPT_CHAR (CPP_EQ_EQ);
1141 break;
1143 case '!':
1144 result->type = CPP_NOT;
1145 if (get_effective_char (buffer) == '=')
1146 ACCEPT_CHAR (CPP_NOT_EQ);
1147 break;
1149 case '&':
1150 result->type = CPP_AND;
1151 c = get_effective_char (buffer);
1152 if (c == '=')
1153 ACCEPT_CHAR (CPP_AND_EQ);
1154 else if (c == '&')
1155 ACCEPT_CHAR (CPP_AND_AND);
1156 break;
1158 case '#':
1159 c = buffer->extra_char; /* Can be set by error condition below. */
1160 if (c != EOF)
1162 buffer->read_ahead = c;
1163 buffer->extra_char = EOF;
1165 else
1166 c = get_effective_char (buffer);
1168 if (c == '#')
1170 ACCEPT_CHAR (CPP_PASTE);
1171 break;
1174 result->type = CPP_HASH;
1175 do_hash:
1176 if (bol)
1178 if (pfile->state.parsing_args)
1180 /* 6.10.3 paragraph 11: If there are sequences of
1181 preprocessing tokens within the list of arguments that
1182 would otherwise act as preprocessing directives, the
1183 behavior is undefined.
1185 This implementation will report a hard error, terminate
1186 the macro invocation, and proceed to process the
1187 directive. */
1188 cpp_error (pfile,
1189 "directives may not be used inside a macro argument");
1191 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1192 buffer->extra_char = buffer->read_ahead;
1193 buffer->read_ahead = '#';
1194 pfile->state.next_bol = 1;
1195 result->type = CPP_EOF;
1197 /* Get whitespace right - newline_in_args sets it. */
1198 if (pfile->lexer_pos.col == 1)
1199 result->flags &= ~PREV_WHITE;
1201 else
1203 /* This is the hash introducing a directive. */
1204 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1205 goto done_directive; /* bol still 1. */
1206 /* This is in fact an assembler #. */
1209 break;
1211 case '|':
1212 result->type = CPP_OR;
1213 c = get_effective_char (buffer);
1214 if (c == '=')
1215 ACCEPT_CHAR (CPP_OR_EQ);
1216 else if (c == '|')
1217 ACCEPT_CHAR (CPP_OR_OR);
1218 break;
1220 case '^':
1221 result->type = CPP_XOR;
1222 if (get_effective_char (buffer) == '=')
1223 ACCEPT_CHAR (CPP_XOR_EQ);
1224 break;
1226 case ':':
1227 result->type = CPP_COLON;
1228 c = get_effective_char (buffer);
1229 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1230 ACCEPT_CHAR (CPP_SCOPE);
1231 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1233 result->flags |= DIGRAPH;
1234 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1236 break;
1238 case '~': result->type = CPP_COMPL; break;
1239 case ',': result->type = CPP_COMMA; break;
1240 case '(': result->type = CPP_OPEN_PAREN; break;
1241 case ')': result->type = CPP_CLOSE_PAREN; break;
1242 case '[': result->type = CPP_OPEN_SQUARE; break;
1243 case ']': result->type = CPP_CLOSE_SQUARE; break;
1244 case '{': result->type = CPP_OPEN_BRACE; break;
1245 case '}': result->type = CPP_CLOSE_BRACE; break;
1246 case ';': result->type = CPP_SEMICOLON; break;
1248 case '@':
1249 if (CPP_OPTION (pfile, objc))
1251 /* In Objective C, '@' may begin keywords or strings, like
1252 @keyword or @"string". It would be nice to call
1253 get_effective_char here and test the result. However, we
1254 would then need to pass 2 characters to parse_identifier,
1255 making it ugly and slowing down its main loop. Instead,
1256 we assume we have an identifier, and recover if not. */
1257 result->type = CPP_NAME;
1258 result->val.node = parse_identifier (pfile, c);
1259 if (result->val.node->length != 1)
1260 break;
1262 /* OK, so it wasn't an identifier. Maybe a string? */
1263 if (buffer->read_ahead == '"')
1265 c = '"';
1266 ACCEPT_CHAR (CPP_OSTRING);
1267 goto make_string;
1270 goto random_char;
1272 random_char:
1273 default:
1274 result->type = CPP_OTHER;
1275 result->val.c = c;
1276 break;
1279 if (pfile->skipping)
1280 goto skip;
1282 /* If not in a directive, this token invalidates controlling macros. */
1283 if (!pfile->state.in_directive)
1284 pfile->mi_state = MI_FAILED;
1287 /* An upper bound on the number of bytes needed to spell a token,
1288 including preceding whitespace. */
1289 unsigned int
1290 cpp_token_len (token)
1291 const cpp_token *token;
1293 unsigned int len;
1295 switch (TOKEN_SPELL (token))
1297 default: len = 0; break;
1298 case SPELL_STRING: len = token->val.str.len; break;
1299 case SPELL_IDENT: len = token->val.node->length; break;
1301 /* 1 for whitespace, 4 for comment delimeters. */
1302 return len + 5;
1305 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1306 already contain the enough space to hold the token's spelling.
1307 Returns a pointer to the character after the last character
1308 written. */
1309 unsigned char *
1310 cpp_spell_token (pfile, token, buffer)
1311 cpp_reader *pfile; /* Would be nice to be rid of this... */
1312 const cpp_token *token;
1313 unsigned char *buffer;
1315 switch (TOKEN_SPELL (token))
1317 case SPELL_OPERATOR:
1319 const unsigned char *spelling;
1320 unsigned char c;
1322 if (token->flags & DIGRAPH)
1323 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1324 else if (token->flags & NAMED_OP)
1325 goto spell_ident;
1326 else
1327 spelling = TOKEN_NAME (token);
1329 while ((c = *spelling++) != '\0')
1330 *buffer++ = c;
1332 break;
1334 case SPELL_IDENT:
1335 spell_ident:
1336 memcpy (buffer, token->val.node->name, token->val.node->length);
1337 buffer += token->val.node->length;
1338 break;
1340 case SPELL_STRING:
1342 int left, right, tag;
1343 switch (token->type)
1345 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1346 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1347 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1348 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1349 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1350 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1351 default: left = '\0'; right = '\0'; tag = '\0'; break;
1353 if (tag) *buffer++ = tag;
1354 if (left) *buffer++ = left;
1355 memcpy (buffer, token->val.str.text, token->val.str.len);
1356 buffer += token->val.str.len;
1357 if (right) *buffer++ = right;
1359 break;
1361 case SPELL_CHAR:
1362 *buffer++ = token->val.c;
1363 break;
1365 case SPELL_NONE:
1366 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1367 break;
1370 return buffer;
1373 /* Returns a token as a null-terminated string. The string is
1374 temporary, and automatically freed later. Useful for diagnostics. */
1375 unsigned char *
1376 cpp_token_as_text (pfile, token)
1377 cpp_reader *pfile;
1378 const cpp_token *token;
1380 unsigned int len = cpp_token_len (token);
1381 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1383 end = cpp_spell_token (pfile, token, start);
1384 end[0] = '\0';
1386 return start;
1389 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1390 const char *
1391 cpp_type2name (type)
1392 enum cpp_ttype type;
1394 return (const char *) token_spellings[type].name;
1397 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1398 for efficiency - to avoid double-buffering. Also, outputs a space
1399 if PREV_WHITE is flagged. */
1400 void
1401 cpp_output_token (token, fp)
1402 const cpp_token *token;
1403 FILE *fp;
1405 if (token->flags & PREV_WHITE)
1406 putc (' ', fp);
1408 switch (TOKEN_SPELL (token))
1410 case SPELL_OPERATOR:
1412 const unsigned char *spelling;
1414 if (token->flags & DIGRAPH)
1415 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1416 else if (token->flags & NAMED_OP)
1417 goto spell_ident;
1418 else
1419 spelling = TOKEN_NAME (token);
1421 ufputs (spelling, fp);
1423 break;
1425 spell_ident:
1426 case SPELL_IDENT:
1427 ufputs (token->val.node->name, fp);
1428 break;
1430 case SPELL_STRING:
1432 int left, right, tag;
1433 switch (token->type)
1435 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1436 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1437 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1438 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1439 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1440 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1441 default: left = '\0'; right = '\0'; tag = '\0'; break;
1443 if (tag) putc (tag, fp);
1444 if (left) putc (left, fp);
1445 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1446 if (right) putc (right, fp);
1448 break;
1450 case SPELL_CHAR:
1451 putc (token->val.c, fp);
1452 break;
1454 case SPELL_NONE:
1455 /* An error, most probably. */
1456 break;
1460 /* Compare two tokens. */
1462 _cpp_equiv_tokens (a, b)
1463 const cpp_token *a, *b;
1465 if (a->type == b->type && a->flags == b->flags)
1466 switch (TOKEN_SPELL (a))
1468 default: /* Keep compiler happy. */
1469 case SPELL_OPERATOR:
1470 return 1;
1471 case SPELL_CHAR:
1472 return a->val.c == b->val.c; /* Character. */
1473 case SPELL_NONE:
1474 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1475 case SPELL_IDENT:
1476 return a->val.node == b->val.node;
1477 case SPELL_STRING:
1478 return (a->val.str.len == b->val.str.len
1479 && !memcmp (a->val.str.text, b->val.str.text,
1480 a->val.str.len));
1483 return 0;
1486 #if 0
1487 /* Compare two token lists. */
1489 _cpp_equiv_toklists (a, b)
1490 const struct toklist *a, *b;
1492 unsigned int i, count;
1494 count = a->limit - a->first;
1495 if (count != (b->limit - b->first))
1496 return 0;
1498 for (i = 0; i < count; i++)
1499 if (! _cpp_equiv_tokens (&a->first[i], &b->first[i]))
1500 return 0;
1502 return 1;
1504 #endif
1506 /* Determine whether two tokens can be pasted together, and if so,
1507 what the resulting token is. Returns CPP_EOF if the tokens cannot
1508 be pasted, or the appropriate type for the merged token if they
1509 can. */
1510 enum cpp_ttype
1511 cpp_can_paste (pfile, token1, token2, digraph)
1512 cpp_reader * pfile;
1513 const cpp_token *token1, *token2;
1514 int* digraph;
1516 enum cpp_ttype a = token1->type, b = token2->type;
1517 int cxx = CPP_OPTION (pfile, cplusplus);
1519 /* Treat named operators as if they were ordinary NAMEs. */
1520 if (token1->flags & NAMED_OP)
1521 a = CPP_NAME;
1522 if (token2->flags & NAMED_OP)
1523 b = CPP_NAME;
1525 if (a <= CPP_LAST_EQ && b == CPP_EQ)
1526 return a + (CPP_EQ_EQ - CPP_EQ);
1528 switch (a)
1530 case CPP_GREATER:
1531 if (b == a) return CPP_RSHIFT;
1532 if (b == CPP_QUERY && cxx) return CPP_MAX;
1533 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1534 break;
1535 case CPP_LESS:
1536 if (b == a) return CPP_LSHIFT;
1537 if (b == CPP_QUERY && cxx) return CPP_MIN;
1538 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1539 if (CPP_OPTION (pfile, digraphs))
1541 if (b == CPP_COLON)
1542 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1543 if (b == CPP_MOD)
1544 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1546 break;
1548 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1549 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1550 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1552 case CPP_MINUS:
1553 if (b == a) return CPP_MINUS_MINUS;
1554 if (b == CPP_GREATER) return CPP_DEREF;
1555 break;
1556 case CPP_COLON:
1557 if (b == a && cxx) return CPP_SCOPE;
1558 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1559 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1560 break;
1562 case CPP_MOD:
1563 if (CPP_OPTION (pfile, digraphs))
1565 if (b == CPP_GREATER)
1566 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1567 if (b == CPP_COLON)
1568 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1570 break;
1571 case CPP_DEREF:
1572 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1573 break;
1574 case CPP_DOT:
1575 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1576 if (b == CPP_NUMBER) return CPP_NUMBER;
1577 break;
1579 case CPP_HASH:
1580 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1581 /* %:%: digraph */
1582 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1583 break;
1585 case CPP_NAME:
1586 if (b == CPP_NAME) return CPP_NAME;
1587 if (b == CPP_NUMBER
1588 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1589 if (b == CPP_CHAR
1590 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1591 if (b == CPP_STRING
1592 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1593 break;
1595 case CPP_NUMBER:
1596 if (b == CPP_NUMBER) return CPP_NUMBER;
1597 if (b == CPP_NAME) return CPP_NUMBER;
1598 if (b == CPP_DOT) return CPP_NUMBER;
1599 /* Numbers cannot have length zero, so this is safe. */
1600 if ((b == CPP_PLUS || b == CPP_MINUS)
1601 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1602 return CPP_NUMBER;
1603 break;
1605 case CPP_OTHER:
1606 if (CPP_OPTION (pfile, objc) && token1->val.c == '@')
1608 if (b == CPP_NAME) return CPP_NAME;
1609 if (b == CPP_STRING) return CPP_OSTRING;
1612 default:
1613 break;
1616 return CPP_EOF;
1619 /* Returns nonzero if a space should be inserted to avoid an
1620 accidental token paste for output. For simplicity, it is
1621 conservative, and occasionally advises a space where one is not
1622 needed, e.g. "." and ".2". */
1625 cpp_avoid_paste (pfile, token1, token2)
1626 cpp_reader *pfile;
1627 const cpp_token *token1, *token2;
1629 enum cpp_ttype a = token1->type, b = token2->type;
1630 cppchar_t c;
1632 if (token1->flags & NAMED_OP)
1633 a = CPP_NAME;
1634 if (token2->flags & NAMED_OP)
1635 b = CPP_NAME;
1637 c = EOF;
1638 if (token2->flags & DIGRAPH)
1639 c = digraph_spellings[b - CPP_FIRST_DIGRAPH][0];
1640 else if (token_spellings[b].category == SPELL_OPERATOR)
1641 c = token_spellings[b].name[0];
1643 /* Quickly get everything that can paste with an '='. */
1644 if (a <= CPP_LAST_EQ && c == '=')
1645 return 1;
1647 switch (a)
1649 case CPP_GREATER: return c == '>' || c == '?';
1650 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1651 case CPP_PLUS: return c == '+';
1652 case CPP_MINUS: return c == '-' || c == '>';
1653 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1654 case CPP_MOD: return c == ':' || c == '>';
1655 case CPP_AND: return c == '&';
1656 case CPP_OR: return c == '|';
1657 case CPP_COLON: return c == ':' || c == '>';
1658 case CPP_DEREF: return c == '*';
1659 case CPP_DOT: return c == '.' || c == '%';
1660 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1661 case CPP_NAME: return ((b == CPP_NUMBER
1662 && name_p (pfile, &token2->val.str))
1663 || b == CPP_NAME
1664 || b == CPP_CHAR || b == CPP_STRING); /* L */
1665 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1666 || c == '.' || c == '+' || c == '-');
1667 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1668 && token1->val.c == '@'
1669 && (b == CPP_NAME || b == CPP_STRING));
1670 default: break;
1673 return 0;
1676 /* Output all the remaining tokens on the current line, and a newline
1677 character, to FP. Leading whitespace is removed. */
1678 void
1679 cpp_output_line (pfile, fp)
1680 cpp_reader *pfile;
1681 FILE *fp;
1683 cpp_token token;
1685 cpp_get_token (pfile, &token);
1686 token.flags &= ~PREV_WHITE;
1687 while (token.type != CPP_EOF)
1689 cpp_output_token (&token, fp);
1690 cpp_get_token (pfile, &token);
1693 putc ('\n', fp);
1696 /* Memory pools. */
1698 struct dummy
1700 char c;
1701 union
1703 double d;
1704 int *p;
1705 } u;
1708 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1710 static int
1711 chunk_suitable (pool, chunk, size)
1712 cpp_pool *pool;
1713 cpp_chunk *chunk;
1714 unsigned int size;
1716 /* Being at least twice SIZE means we can use memcpy in
1717 _cpp_next_chunk rather than memmove. Besides, it's a good idea
1718 anyway. */
1719 return (chunk && pool->locked != chunk
1720 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1723 /* Returns the end of the new pool. PTR points to a char in the old
1724 pool, and is updated to point to the same char in the new pool. */
1725 unsigned char *
1726 _cpp_next_chunk (pool, len, ptr)
1727 cpp_pool *pool;
1728 unsigned int len;
1729 unsigned char **ptr;
1731 cpp_chunk *chunk = pool->cur->next;
1733 /* LEN is the minimum size we want in the new pool. */
1734 len += POOL_ROOM (pool);
1735 if (! chunk_suitable (pool, chunk, len))
1737 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1739 chunk->next = pool->cur->next;
1740 pool->cur->next = chunk;
1743 /* Update the pointer before changing chunk's front. */
1744 if (ptr)
1745 *ptr += chunk->base - POOL_FRONT (pool);
1747 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1748 chunk->front = chunk->base;
1750 pool->cur = chunk;
1751 return POOL_LIMIT (pool);
1754 static cpp_chunk *
1755 new_chunk (size)
1756 unsigned int size;
1758 unsigned char *base;
1759 cpp_chunk *result;
1761 size = ALIGN (size, DEFAULT_ALIGNMENT);
1762 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1763 /* Put the chunk descriptor at the end. Then chunk overruns will
1764 cause obvious chaos. */
1765 result = (cpp_chunk *) (base + size);
1766 result->base = base;
1767 result->front = base;
1768 result->limit = base + size;
1769 result->next = 0;
1771 return result;
1774 void
1775 _cpp_init_pool (pool, size, align, temp)
1776 cpp_pool *pool;
1777 unsigned int size, align, temp;
1779 if (align == 0)
1780 align = DEFAULT_ALIGNMENT;
1781 if (align & (align - 1))
1782 abort ();
1783 pool->align = align;
1784 pool->cur = new_chunk (size);
1785 pool->locked = 0;
1786 pool->locks = 0;
1787 if (temp)
1788 pool->cur->next = pool->cur;
1791 void
1792 _cpp_lock_pool (pool)
1793 cpp_pool *pool;
1795 if (pool->locks++ == 0)
1796 pool->locked = pool->cur;
1799 void
1800 _cpp_unlock_pool (pool)
1801 cpp_pool *pool;
1803 if (--pool->locks == 0)
1804 pool->locked = 0;
1807 void
1808 _cpp_free_pool (pool)
1809 cpp_pool *pool;
1811 cpp_chunk *chunk = pool->cur, *next;
1815 next = chunk->next;
1816 free (chunk->base);
1817 chunk = next;
1819 while (chunk && chunk != pool->cur);
1822 /* Reserve LEN bytes from a memory pool. */
1823 unsigned char *
1824 _cpp_pool_reserve (pool, len)
1825 cpp_pool *pool;
1826 unsigned int len;
1828 len = ALIGN (len, pool->align);
1829 if (len > (unsigned int) POOL_ROOM (pool))
1830 _cpp_next_chunk (pool, len, 0);
1832 return POOL_FRONT (pool);
1835 /* Allocate LEN bytes from a memory pool. */
1836 unsigned char *
1837 _cpp_pool_alloc (pool, len)
1838 cpp_pool *pool;
1839 unsigned int len;
1841 unsigned char *result = _cpp_pool_reserve (pool, len);
1843 POOL_COMMIT (pool, len);
1844 return result;