2001-04-09 Andrew MacLeod <amacleod@redhat.com>
[official-gcc.git] / gcc / cpplex.c
blobefc0aa570c76b1eb7e7a021721d23c62d7de3d3f
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
37 #include "config.h"
38 #include "system.h"
39 #include "cpplib.h"
40 #include "cpphash.h"
42 /* Tokens with SPELL_STRING store their spelling in the token list,
43 and it's length in the token->val.name.len. */
44 enum spell_type
46 SPELL_OPERATOR = 0,
47 SPELL_CHAR,
48 SPELL_IDENT,
49 SPELL_STRING,
50 SPELL_NONE
53 struct token_spelling
55 enum spell_type category;
56 const unsigned char *name;
59 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
60 U":>", U"<%", U"%>"};
62 #define OP(e, s) { SPELL_OPERATOR, U s },
63 #define TK(e, s) { s, U STRINGX (e) },
64 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
65 #undef OP
66 #undef TK
68 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
69 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
71 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
72 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
73 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
75 static int skip_block_comment PARAMS ((cpp_reader *));
76 static int skip_line_comment PARAMS ((cpp_reader *));
77 static void adjust_column PARAMS ((cpp_reader *));
78 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
79 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
80 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
81 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
82 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
83 static void unterminated PARAMS ((cpp_reader *, int));
84 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
85 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
86 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
87 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
88 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
90 static cpp_chunk *new_chunk PARAMS ((unsigned int));
91 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
93 /* Utility routine:
95 Compares, the token TOKEN to the NUL-terminated string STRING.
96 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
98 int
99 cpp_ideq (token, string)
100 const cpp_token *token;
101 const char *string;
103 if (token->type != CPP_NAME)
104 return 0;
106 return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
109 /* Call when meeting a newline. Returns the character after the newline
110 (or carriage-return newline combination), or EOF. */
111 static cppchar_t
112 handle_newline (buffer, newline_char)
113 cpp_buffer *buffer;
114 cppchar_t newline_char;
116 cppchar_t next = EOF;
118 buffer->col_adjust = 0;
119 buffer->lineno++;
120 buffer->line_base = buffer->cur;
122 /* Handle CR-LF and LF-CR combinations, get the next character. */
123 if (buffer->cur < buffer->rlimit)
125 next = *buffer->cur++;
126 if (next + newline_char == '\r' + '\n')
128 buffer->line_base = buffer->cur;
129 if (buffer->cur < buffer->rlimit)
130 next = *buffer->cur++;
131 else
132 next = EOF;
136 buffer->read_ahead = next;
137 return next;
140 /* Subroutine of skip_escaped_newlines; called when a trigraph is
141 encountered. It warns if necessary, and returns true if the
142 trigraph should be honoured. FROM_CHAR is the third character of a
143 trigraph, and presumed to be the previous character for position
144 reporting. */
145 static int
146 trigraph_ok (pfile, from_char)
147 cpp_reader *pfile;
148 cppchar_t from_char;
150 int accept = CPP_OPTION (pfile, trigraphs);
152 /* Don't warn about trigraphs in comments. */
153 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
155 cpp_buffer *buffer = pfile->buffer;
156 if (accept)
157 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
158 "trigraph ??%c converted to %c",
159 (int) from_char,
160 (int) _cpp_trigraph_map[from_char]);
161 else if (buffer->cur != buffer->last_Wtrigraphs)
163 buffer->last_Wtrigraphs = buffer->cur;
164 cpp_warning_with_line (pfile, buffer->lineno,
165 CPP_BUF_COL (buffer) - 2,
166 "trigraph ??%c ignored", (int) from_char);
170 return accept;
173 /* Assumes local variables buffer and result. */
174 #define ACCEPT_CHAR(t) \
175 do { result->type = t; buffer->read_ahead = EOF; } while (0)
177 /* When we move to multibyte character sets, add to these something
178 that saves and restores the state of the multibyte conversion
179 library. This probably involves saving and restoring a "cookie".
180 In the case of glibc it is an 8-byte structure, so is not a high
181 overhead operation. In any case, it's out of the fast path. */
182 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
183 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
185 /* Skips any escaped newlines introduced by NEXT, which is either a
186 '?' or a '\\'. Returns the next character, which will also have
187 been placed in buffer->read_ahead. This routine performs
188 preprocessing stages 1 and 2 of the ISO C standard. */
189 static cppchar_t
190 skip_escaped_newlines (buffer, next)
191 cpp_buffer *buffer;
192 cppchar_t next;
194 /* Only do this if we apply stages 1 and 2. */
195 if (!buffer->from_stage3)
197 cppchar_t next1;
198 const unsigned char *saved_cur;
199 int space;
203 if (buffer->cur == buffer->rlimit)
204 break;
206 SAVE_STATE ();
207 if (next == '?')
209 next1 = *buffer->cur++;
210 if (next1 != '?' || buffer->cur == buffer->rlimit)
212 RESTORE_STATE ();
213 break;
216 next1 = *buffer->cur++;
217 if (!_cpp_trigraph_map[next1]
218 || !trigraph_ok (buffer->pfile, next1))
220 RESTORE_STATE ();
221 break;
224 /* We have a full trigraph here. */
225 next = _cpp_trigraph_map[next1];
226 if (next != '\\' || buffer->cur == buffer->rlimit)
227 break;
228 SAVE_STATE ();
231 /* We have a backslash, and room for at least one more character. */
232 space = 0;
235 next1 = *buffer->cur++;
236 if (!is_nvspace (next1))
237 break;
238 space = 1;
240 while (buffer->cur < buffer->rlimit);
242 if (!is_vspace (next1))
244 RESTORE_STATE ();
245 break;
248 if (space && !buffer->pfile->state.lexing_comment)
249 cpp_warning (buffer->pfile,
250 "backslash and newline separated by space");
252 next = handle_newline (buffer, next1);
253 if (next == EOF)
254 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
256 while (next == '\\' || next == '?');
259 buffer->read_ahead = next;
260 return next;
263 /* Obtain the next character, after trigraph conversion and skipping
264 an arbitrary string of escaped newlines. The common case of no
265 trigraphs or escaped newlines falls through quickly. */
266 static cppchar_t
267 get_effective_char (buffer)
268 cpp_buffer *buffer;
270 cppchar_t next = EOF;
272 if (buffer->cur < buffer->rlimit)
274 next = *buffer->cur++;
276 /* '?' can introduce trigraphs (and therefore backslash); '\\'
277 can introduce escaped newlines, which we want to skip, or
278 UCNs, which, depending upon lexer state, we will handle in
279 the future. */
280 if (next == '?' || next == '\\')
281 next = skip_escaped_newlines (buffer, next);
284 buffer->read_ahead = next;
285 return next;
288 /* Skip a C-style block comment. We find the end of the comment by
289 seeing if an asterisk is before every '/' we encounter. Returns
290 non-zero if comment terminated by EOF, zero otherwise. */
291 static int
292 skip_block_comment (pfile)
293 cpp_reader *pfile;
295 cpp_buffer *buffer = pfile->buffer;
296 cppchar_t c = EOF, prevc = EOF;
298 pfile->state.lexing_comment = 1;
299 while (buffer->cur != buffer->rlimit)
301 prevc = c, c = *buffer->cur++;
303 next_char:
304 /* FIXME: For speed, create a new character class of characters
305 of interest inside block comments. */
306 if (c == '?' || c == '\\')
307 c = skip_escaped_newlines (buffer, c);
309 /* People like decorating comments with '*', so check for '/'
310 instead for efficiency. */
311 if (c == '/')
313 if (prevc == '*')
314 break;
316 /* Warn about potential nested comments, but not if the '/'
317 comes immediately before the true comment delimeter.
318 Don't bother to get it right across escaped newlines. */
319 if (CPP_OPTION (pfile, warn_comments)
320 && buffer->cur != buffer->rlimit)
322 prevc = c, c = *buffer->cur++;
323 if (c == '*' && buffer->cur != buffer->rlimit)
325 prevc = c, c = *buffer->cur++;
326 if (c != '/')
327 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
328 CPP_BUF_COL (buffer),
329 "\"/*\" within comment");
331 goto next_char;
334 else if (is_vspace (c))
336 prevc = c, c = handle_newline (buffer, c);
337 goto next_char;
339 else if (c == '\t')
340 adjust_column (pfile);
343 pfile->state.lexing_comment = 0;
344 buffer->read_ahead = EOF;
345 return c != '/' || prevc != '*';
348 /* Skip a C++ line comment. Handles escaped newlines. Returns
349 non-zero if a multiline comment. The following new line, if any,
350 is left in buffer->read_ahead. */
351 static int
352 skip_line_comment (pfile)
353 cpp_reader *pfile;
355 cpp_buffer *buffer = pfile->buffer;
356 unsigned int orig_lineno = buffer->lineno;
357 cppchar_t c;
359 pfile->state.lexing_comment = 1;
362 c = EOF;
363 if (buffer->cur == buffer->rlimit)
364 break;
366 c = *buffer->cur++;
367 if (c == '?' || c == '\\')
368 c = skip_escaped_newlines (buffer, c);
370 while (!is_vspace (c));
372 pfile->state.lexing_comment = 0;
373 buffer->read_ahead = c; /* Leave any newline for caller. */
374 return orig_lineno != buffer->lineno;
377 /* pfile->buffer->cur is one beyond the \t character. Update
378 col_adjust so we track the column correctly. */
379 static void
380 adjust_column (pfile)
381 cpp_reader *pfile;
383 cpp_buffer *buffer = pfile->buffer;
384 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
386 /* Round it up to multiple of the tabstop, but subtract 1 since the
387 tab itself occupies a character position. */
388 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
389 - col % CPP_OPTION (pfile, tabstop)) - 1;
392 /* Skips whitespace, saving the next non-whitespace character.
393 Adjusts pfile->col_adjust to account for tabs. Without this,
394 tokens might be assigned an incorrect column. */
395 static void
396 skip_whitespace (pfile, c)
397 cpp_reader *pfile;
398 cppchar_t c;
400 cpp_buffer *buffer = pfile->buffer;
401 unsigned int warned = 0;
405 /* Horizontal space always OK. */
406 if (c == ' ')
408 else if (c == '\t')
409 adjust_column (pfile);
410 /* Just \f \v or \0 left. */
411 else if (c == '\0')
413 if (!warned)
415 cpp_warning (pfile, "null character(s) ignored");
416 warned = 1;
419 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
420 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
421 CPP_BUF_COL (buffer),
422 "%s in preprocessing directive",
423 c == '\f' ? "form feed" : "vertical tab");
425 c = EOF;
426 if (buffer->cur == buffer->rlimit)
427 break;
428 c = *buffer->cur++;
430 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
431 while (is_nvspace (c));
433 /* Remember the next character. */
434 buffer->read_ahead = c;
437 /* See if the characters of a number token are valid in a name (no
438 '.', '+' or '-'). */
439 static int
440 name_p (pfile, string)
441 cpp_reader *pfile;
442 const cpp_string *string;
444 unsigned int i;
446 for (i = 0; i < string->len; i++)
447 if (!is_idchar (string->text[i]))
448 return 0;
450 return 1;
453 /* Parse an identifier, skipping embedded backslash-newlines.
454 Calculate the hash value of the token while parsing, for improved
455 performance. The hashing algorithm *must* match cpp_lookup(). */
457 static cpp_hashnode *
458 parse_identifier (pfile, c)
459 cpp_reader *pfile;
460 cppchar_t c;
462 cpp_hashnode *result;
463 cpp_buffer *buffer = pfile->buffer;
464 unsigned char *dest, *limit;
465 unsigned int r = 0, saw_dollar = 0;
467 dest = POOL_FRONT (&pfile->ident_pool);
468 limit = POOL_LIMIT (&pfile->ident_pool);
474 /* Need room for terminating null. */
475 if (dest + 1 >= limit)
476 limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
478 *dest++ = c;
479 r = HASHSTEP (r, c);
481 if (c == '$')
482 saw_dollar++;
484 c = EOF;
485 if (buffer->cur == buffer->rlimit)
486 break;
488 c = *buffer->cur++;
490 while (is_idchar (c));
492 /* Potential escaped newline? */
493 if (c != '?' && c != '\\')
494 break;
495 c = skip_escaped_newlines (buffer, c);
497 while (is_idchar (c));
499 /* Remember the next character. */
500 buffer->read_ahead = c;
502 /* $ is not a identifier character in the standard, but is commonly
503 accepted as an extension. Don't warn about it in skipped
504 conditional blocks. */
505 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
506 cpp_pedwarn (pfile, "'$' character(s) in identifier");
508 /* Identifiers are null-terminated. */
509 *dest = '\0';
511 /* This routine commits the memory if necessary. */
512 result = _cpp_lookup_with_hash (pfile,
513 dest - POOL_FRONT (&pfile->ident_pool), r);
515 /* Some identifiers require diagnostics when lexed. */
516 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
518 /* It is allowed to poison the same identifier twice. */
519 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
520 cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
522 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
523 replacement list of a variadic macro. */
524 if (result == pfile->spec_nodes.n__VA_ARGS__
525 && !pfile->state.va_args_ok)
526 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
529 return result;
532 /* Parse a number, skipping embedded backslash-newlines. */
533 static void
534 parse_number (pfile, number, c, leading_period)
535 cpp_reader *pfile;
536 cpp_string *number;
537 cppchar_t c;
538 int leading_period;
540 cpp_buffer *buffer = pfile->buffer;
541 cpp_pool *pool = &pfile->ident_pool;
542 unsigned char *dest, *limit;
544 dest = POOL_FRONT (pool);
545 limit = POOL_LIMIT (pool);
547 /* Place a leading period. */
548 if (leading_period)
550 if (dest >= limit)
551 limit = _cpp_next_chunk (pool, 0, &dest);
552 *dest++ = '.';
559 /* Need room for terminating null. */
560 if (dest + 1 >= limit)
561 limit = _cpp_next_chunk (pool, 0, &dest);
562 *dest++ = c;
564 c = EOF;
565 if (buffer->cur == buffer->rlimit)
566 break;
568 c = *buffer->cur++;
570 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
572 /* Potential escaped newline? */
573 if (c != '?' && c != '\\')
574 break;
575 c = skip_escaped_newlines (buffer, c);
577 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
579 /* Remember the next character. */
580 buffer->read_ahead = c;
582 /* Null-terminate the number. */
583 *dest = '\0';
585 number->text = POOL_FRONT (pool);
586 number->len = dest - number->text;
587 POOL_COMMIT (pool, number->len + 1);
590 /* Subroutine of parse_string. Emits error for unterminated strings. */
591 static void
592 unterminated (pfile, term)
593 cpp_reader *pfile;
594 int term;
596 cpp_error (pfile, "missing terminating %c character", term);
598 if (term == '\"' && pfile->mlstring_pos.line
599 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
601 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
602 pfile->mlstring_pos.col,
603 "possible start of unterminated string literal");
604 pfile->mlstring_pos.line = 0;
608 /* Subroutine of parse_string. */
609 static int
610 unescaped_terminator_p (pfile, dest)
611 cpp_reader *pfile;
612 const unsigned char *dest;
614 const unsigned char *start, *temp;
616 /* In #include-style directives, terminators are not escapeable. */
617 if (pfile->state.angled_headers)
618 return 1;
620 start = POOL_FRONT (&pfile->ident_pool);
622 /* An odd number of consecutive backslashes represents an escaped
623 terminator. */
624 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
627 return ((dest - temp) & 1) == 0;
630 /* Parses a string, character constant, or angle-bracketed header file
631 name. Handles embedded trigraphs and escaped newlines. The stored
632 string is guaranteed NUL-terminated, but it is not guaranteed that
633 this is the first NUL since embedded NULs are preserved.
635 Multi-line strings are allowed, but they are deprecated. */
636 static void
637 parse_string (pfile, token, terminator)
638 cpp_reader *pfile;
639 cpp_token *token;
640 cppchar_t terminator;
642 cpp_buffer *buffer = pfile->buffer;
643 cpp_pool *pool = &pfile->ident_pool;
644 unsigned char *dest, *limit;
645 cppchar_t c;
646 unsigned int nulls = 0;
648 dest = POOL_FRONT (pool);
649 limit = POOL_LIMIT (pool);
651 for (;;)
653 if (buffer->cur == buffer->rlimit)
654 c = EOF;
655 else
656 c = *buffer->cur++;
658 have_char:
659 /* We need space for the terminating NUL. */
660 if (dest >= limit)
661 limit = _cpp_next_chunk (pool, 0, &dest);
663 if (c == EOF)
665 unterminated (pfile, terminator);
666 break;
669 /* Handle trigraphs, escaped newlines etc. */
670 if (c == '?' || c == '\\')
671 c = skip_escaped_newlines (buffer, c);
673 if (c == terminator && unescaped_terminator_p (pfile, dest))
675 c = EOF;
676 break;
678 else if (is_vspace (c))
680 /* In assembly language, silently terminate string and
681 character literals at end of line. This is a kludge
682 around not knowing where comments are. */
683 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
684 break;
686 /* Character constants and header names may not extend over
687 multiple lines. In Standard C, neither may strings.
688 Unfortunately, we accept multiline strings as an
689 extension, except in #include family directives. */
690 if (terminator != '"' || pfile->state.angled_headers)
692 unterminated (pfile, terminator);
693 break;
696 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
697 if (pfile->mlstring_pos.line == 0)
698 pfile->mlstring_pos = pfile->lexer_pos;
700 c = handle_newline (buffer, c);
701 *dest++ = '\n';
702 goto have_char;
704 else if (c == '\0')
706 if (nulls++ == 0)
707 cpp_warning (pfile, "null character(s) preserved in literal");
710 *dest++ = c;
713 /* Remember the next character. */
714 buffer->read_ahead = c;
715 *dest = '\0';
717 token->val.str.text = POOL_FRONT (pool);
718 token->val.str.len = dest - token->val.str.text;
719 POOL_COMMIT (pool, token->val.str.len + 1);
722 /* The stored comment includes the comment start and any terminator. */
723 static void
724 save_comment (pfile, token, from)
725 cpp_reader *pfile;
726 cpp_token *token;
727 const unsigned char *from;
729 unsigned char *buffer;
730 unsigned int len;
732 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
733 /* C++ comments probably (not definitely) have moved past a new
734 line, which we don't want to save in the comment. */
735 if (pfile->buffer->read_ahead != EOF)
736 len--;
737 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
739 token->type = CPP_COMMENT;
740 token->val.str.len = len;
741 token->val.str.text = buffer;
743 buffer[0] = '/';
744 memcpy (buffer + 1, from, len - 1);
747 /* Subroutine of lex_token to handle '%'. A little tricky, since we
748 want to avoid stepping back when lexing %:%X. */
749 static void
750 lex_percent (buffer, result)
751 cpp_buffer *buffer;
752 cpp_token *result;
754 cppchar_t c;
756 result->type = CPP_MOD;
757 /* Parsing %:%X could leave an extra character. */
758 if (buffer->extra_char == EOF)
759 c = get_effective_char (buffer);
760 else
762 c = buffer->read_ahead = buffer->extra_char;
763 buffer->extra_char = EOF;
766 if (c == '=')
767 ACCEPT_CHAR (CPP_MOD_EQ);
768 else if (CPP_OPTION (buffer->pfile, digraphs))
770 if (c == ':')
772 result->flags |= DIGRAPH;
773 ACCEPT_CHAR (CPP_HASH);
774 if (get_effective_char (buffer) == '%')
776 buffer->extra_char = get_effective_char (buffer);
777 if (buffer->extra_char == ':')
779 buffer->extra_char = EOF;
780 ACCEPT_CHAR (CPP_PASTE);
782 else
783 /* We'll catch the extra_char when we're called back. */
784 buffer->read_ahead = '%';
787 else if (c == '>')
789 result->flags |= DIGRAPH;
790 ACCEPT_CHAR (CPP_CLOSE_BRACE);
795 /* Subroutine of lex_token to handle '.'. This is tricky, since we
796 want to avoid stepping back when lexing '...' or '.123'. In the
797 latter case we should also set a flag for parse_number. */
798 static void
799 lex_dot (pfile, result)
800 cpp_reader *pfile;
801 cpp_token *result;
803 cpp_buffer *buffer = pfile->buffer;
804 cppchar_t c;
806 /* Parsing ..X could leave an extra character. */
807 if (buffer->extra_char == EOF)
808 c = get_effective_char (buffer);
809 else
811 c = buffer->read_ahead = buffer->extra_char;
812 buffer->extra_char = EOF;
815 /* All known character sets have 0...9 contiguous. */
816 if (c >= '0' && c <= '9')
818 result->type = CPP_NUMBER;
819 parse_number (pfile, &result->val.str, c, 1);
821 else
823 result->type = CPP_DOT;
824 if (c == '.')
826 buffer->extra_char = get_effective_char (buffer);
827 if (buffer->extra_char == '.')
829 buffer->extra_char = EOF;
830 ACCEPT_CHAR (CPP_ELLIPSIS);
832 else
833 /* We'll catch the extra_char when we're called back. */
834 buffer->read_ahead = '.';
836 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
837 ACCEPT_CHAR (CPP_DOT_STAR);
841 void
842 _cpp_lex_token (pfile, result)
843 cpp_reader *pfile;
844 cpp_token *result;
846 cppchar_t c;
847 cpp_buffer *buffer;
848 const unsigned char *comment_start;
849 unsigned char bol;
851 skip:
852 bol = pfile->state.next_bol;
853 done_directive:
854 buffer = pfile->buffer;
855 pfile->state.next_bol = 0;
856 result->flags = buffer->saved_flags;
857 buffer->saved_flags = 0;
858 next_char:
859 pfile->lexer_pos.line = buffer->lineno;
860 next_char2:
861 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
863 c = buffer->read_ahead;
864 if (c == EOF && buffer->cur < buffer->rlimit)
866 c = *buffer->cur++;
867 pfile->lexer_pos.col++;
870 do_switch:
871 buffer->read_ahead = EOF;
872 switch (c)
874 case EOF:
875 /* Non-empty files should end in a newline. Ignore for command
876 line and _Pragma buffers. */
877 if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
878 cpp_pedwarn (pfile, "no newline at end of file");
879 pfile->state.next_bol = 1;
880 pfile->skipping = 0; /* In case missing #endif. */
881 result->type = CPP_EOF;
882 /* Don't do MI optimisation. */
883 return;
885 case ' ': case '\t': case '\f': case '\v': case '\0':
886 skip_whitespace (pfile, c);
887 result->flags |= PREV_WHITE;
888 goto next_char2;
890 case '\n': case '\r':
891 if (!pfile->state.in_directive)
893 handle_newline (buffer, c);
894 bol = 1;
895 pfile->lexer_pos.output_line = buffer->lineno;
896 /* This is a new line, so clear any white space flag.
897 Newlines in arguments are white space (6.10.3.10);
898 parse_arg takes care of that. */
899 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
900 goto next_char;
903 /* Don't let directives spill over to the next line. */
904 buffer->read_ahead = c;
905 pfile->state.next_bol = 1;
906 result->type = CPP_EOF;
907 /* Don't break; pfile->skipping might be true. */
908 return;
910 case '?':
911 case '\\':
912 /* These could start an escaped newline, or '?' a trigraph. Let
913 skip_escaped_newlines do all the work. */
915 unsigned int lineno = buffer->lineno;
917 c = skip_escaped_newlines (buffer, c);
918 if (lineno != buffer->lineno)
919 /* We had at least one escaped newline of some sort, and the
920 next character is in buffer->read_ahead. Update the
921 token's line and column. */
922 goto next_char;
924 /* We are either the original '?' or '\\', or a trigraph. */
925 result->type = CPP_QUERY;
926 buffer->read_ahead = EOF;
927 if (c == '\\')
928 goto random_char;
929 else if (c != '?')
930 goto do_switch;
932 break;
934 case '0': case '1': case '2': case '3': case '4':
935 case '5': case '6': case '7': case '8': case '9':
936 result->type = CPP_NUMBER;
937 parse_number (pfile, &result->val.str, c, 0);
938 break;
940 case '$':
941 if (!CPP_OPTION (pfile, dollars_in_ident))
942 goto random_char;
943 /* Fall through... */
945 case '_':
946 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
947 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
948 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
949 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
950 case 'y': case 'z':
951 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
952 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
953 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
954 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
955 case 'Y': case 'Z':
956 result->type = CPP_NAME;
957 result->val.node = parse_identifier (pfile, c);
959 /* 'L' may introduce wide characters or strings. */
960 if (result->val.node == pfile->spec_nodes.n_L)
962 c = buffer->read_ahead; /* For make_string. */
963 if (c == '\'' || c == '"')
965 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
966 goto make_string;
969 /* Convert named operators to their proper types. */
970 else if (result->val.node->flags & NODE_OPERATOR)
972 result->flags |= NAMED_OP;
973 result->type = result->val.node->value.operator;
975 break;
977 case '\'':
978 case '"':
979 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
980 make_string:
981 parse_string (pfile, result, c);
982 break;
984 case '/':
985 /* A potential block or line comment. */
986 comment_start = buffer->cur;
987 result->type = CPP_DIV;
988 c = get_effective_char (buffer);
989 if (c == '=')
990 ACCEPT_CHAR (CPP_DIV_EQ);
991 if (c != '/' && c != '*')
992 break;
994 if (c == '*')
996 if (skip_block_comment (pfile))
997 cpp_error_with_line (pfile, pfile->lexer_pos.line,
998 pfile->lexer_pos.col,
999 "unterminated comment");
1001 else
1003 if (!CPP_OPTION (pfile, cplusplus_comments)
1004 && !CPP_IN_SYSTEM_HEADER (pfile))
1005 break;
1007 /* Warn about comments only if pedantically GNUC89, and not
1008 in system headers. */
1009 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1010 && ! buffer->warned_cplusplus_comments)
1012 cpp_pedwarn (pfile,
1013 "C++ style comments are not allowed in ISO C89");
1014 cpp_pedwarn (pfile,
1015 "(this will be reported only once per input file)");
1016 buffer->warned_cplusplus_comments = 1;
1019 /* Skip_line_comment updates buffer->read_ahead. */
1020 if (skip_line_comment (pfile))
1021 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1022 pfile->lexer_pos.col,
1023 "multi-line comment");
1026 /* Skipping the comment has updated buffer->read_ahead. */
1027 if (!pfile->state.save_comments)
1029 result->flags |= PREV_WHITE;
1030 goto next_char;
1033 /* Save the comment as a token in its own right. */
1034 save_comment (pfile, result, comment_start);
1035 /* Don't do MI optimisation. */
1036 return;
1038 case '<':
1039 if (pfile->state.angled_headers)
1041 result->type = CPP_HEADER_NAME;
1042 c = '>'; /* terminator. */
1043 goto make_string;
1046 result->type = CPP_LESS;
1047 c = get_effective_char (buffer);
1048 if (c == '=')
1049 ACCEPT_CHAR (CPP_LESS_EQ);
1050 else if (c == '<')
1052 ACCEPT_CHAR (CPP_LSHIFT);
1053 if (get_effective_char (buffer) == '=')
1054 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1056 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1058 ACCEPT_CHAR (CPP_MIN);
1059 if (get_effective_char (buffer) == '=')
1060 ACCEPT_CHAR (CPP_MIN_EQ);
1062 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1064 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1065 result->flags |= DIGRAPH;
1067 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1069 ACCEPT_CHAR (CPP_OPEN_BRACE);
1070 result->flags |= DIGRAPH;
1072 break;
1074 case '>':
1075 result->type = CPP_GREATER;
1076 c = get_effective_char (buffer);
1077 if (c == '=')
1078 ACCEPT_CHAR (CPP_GREATER_EQ);
1079 else if (c == '>')
1081 ACCEPT_CHAR (CPP_RSHIFT);
1082 if (get_effective_char (buffer) == '=')
1083 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1085 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1087 ACCEPT_CHAR (CPP_MAX);
1088 if (get_effective_char (buffer) == '=')
1089 ACCEPT_CHAR (CPP_MAX_EQ);
1091 break;
1093 case '%':
1094 lex_percent (buffer, result);
1095 if (result->type == CPP_HASH)
1096 goto do_hash;
1097 break;
1099 case '.':
1100 lex_dot (pfile, result);
1101 break;
1103 case '+':
1104 result->type = CPP_PLUS;
1105 c = get_effective_char (buffer);
1106 if (c == '=')
1107 ACCEPT_CHAR (CPP_PLUS_EQ);
1108 else if (c == '+')
1109 ACCEPT_CHAR (CPP_PLUS_PLUS);
1110 break;
1112 case '-':
1113 result->type = CPP_MINUS;
1114 c = get_effective_char (buffer);
1115 if (c == '>')
1117 ACCEPT_CHAR (CPP_DEREF);
1118 if (CPP_OPTION (pfile, cplusplus)
1119 && get_effective_char (buffer) == '*')
1120 ACCEPT_CHAR (CPP_DEREF_STAR);
1122 else if (c == '=')
1123 ACCEPT_CHAR (CPP_MINUS_EQ);
1124 else if (c == '-')
1125 ACCEPT_CHAR (CPP_MINUS_MINUS);
1126 break;
1128 case '*':
1129 result->type = CPP_MULT;
1130 if (get_effective_char (buffer) == '=')
1131 ACCEPT_CHAR (CPP_MULT_EQ);
1132 break;
1134 case '=':
1135 result->type = CPP_EQ;
1136 if (get_effective_char (buffer) == '=')
1137 ACCEPT_CHAR (CPP_EQ_EQ);
1138 break;
1140 case '!':
1141 result->type = CPP_NOT;
1142 if (get_effective_char (buffer) == '=')
1143 ACCEPT_CHAR (CPP_NOT_EQ);
1144 break;
1146 case '&':
1147 result->type = CPP_AND;
1148 c = get_effective_char (buffer);
1149 if (c == '=')
1150 ACCEPT_CHAR (CPP_AND_EQ);
1151 else if (c == '&')
1152 ACCEPT_CHAR (CPP_AND_AND);
1153 break;
1155 case '#':
1156 c = buffer->extra_char; /* Can be set by error condition below. */
1157 if (c != EOF)
1159 buffer->read_ahead = c;
1160 buffer->extra_char = EOF;
1162 else
1163 c = get_effective_char (buffer);
1165 if (c == '#')
1167 ACCEPT_CHAR (CPP_PASTE);
1168 break;
1171 result->type = CPP_HASH;
1172 do_hash:
1173 if (!bol)
1174 break;
1175 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1176 tokens within the list of arguments that would otherwise act
1177 as preprocessing directives, the behavior is undefined.
1179 This implementation will report a hard error, terminate the
1180 macro invocation, and proceed to process the directive. */
1181 if (pfile->state.parsing_args)
1183 if (pfile->state.parsing_args == 2)
1184 cpp_error (pfile,
1185 "directives may not be used inside a macro argument");
1187 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1188 buffer->extra_char = buffer->read_ahead;
1189 buffer->read_ahead = '#';
1190 pfile->state.next_bol = 1;
1191 result->type = CPP_EOF;
1193 /* Get whitespace right - newline_in_args sets it. */
1194 if (pfile->lexer_pos.col == 1)
1195 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1197 else
1199 /* This is the hash introducing a directive. */
1200 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1201 goto done_directive; /* bol still 1. */
1202 /* This is in fact an assembler #. */
1204 break;
1206 case '|':
1207 result->type = CPP_OR;
1208 c = get_effective_char (buffer);
1209 if (c == '=')
1210 ACCEPT_CHAR (CPP_OR_EQ);
1211 else if (c == '|')
1212 ACCEPT_CHAR (CPP_OR_OR);
1213 break;
1215 case '^':
1216 result->type = CPP_XOR;
1217 if (get_effective_char (buffer) == '=')
1218 ACCEPT_CHAR (CPP_XOR_EQ);
1219 break;
1221 case ':':
1222 result->type = CPP_COLON;
1223 c = get_effective_char (buffer);
1224 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1225 ACCEPT_CHAR (CPP_SCOPE);
1226 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1228 result->flags |= DIGRAPH;
1229 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1231 break;
1233 case '~': result->type = CPP_COMPL; break;
1234 case ',': result->type = CPP_COMMA; break;
1235 case '(': result->type = CPP_OPEN_PAREN; break;
1236 case ')': result->type = CPP_CLOSE_PAREN; break;
1237 case '[': result->type = CPP_OPEN_SQUARE; break;
1238 case ']': result->type = CPP_CLOSE_SQUARE; break;
1239 case '{': result->type = CPP_OPEN_BRACE; break;
1240 case '}': result->type = CPP_CLOSE_BRACE; break;
1241 case ';': result->type = CPP_SEMICOLON; break;
1243 /* @ is a punctuator in Objective C. */
1244 case '@': result->type = CPP_ATSIGN; break;
1246 random_char:
1247 default:
1248 result->type = CPP_OTHER;
1249 result->val.c = c;
1250 break;
1253 if (pfile->skipping)
1254 goto skip;
1256 /* If not in a directive, this token invalidates controlling macros. */
1257 if (!pfile->state.in_directive)
1258 pfile->mi_state = MI_FAILED;
1261 /* An upper bound on the number of bytes needed to spell a token,
1262 including preceding whitespace. */
1263 unsigned int
1264 cpp_token_len (token)
1265 const cpp_token *token;
1267 unsigned int len;
1269 switch (TOKEN_SPELL (token))
1271 default: len = 0; break;
1272 case SPELL_STRING: len = token->val.str.len; break;
1273 case SPELL_IDENT: len = token->val.node->length; break;
1275 /* 1 for whitespace, 4 for comment delimeters. */
1276 return len + 5;
1279 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1280 already contain the enough space to hold the token's spelling.
1281 Returns a pointer to the character after the last character
1282 written. */
1283 unsigned char *
1284 cpp_spell_token (pfile, token, buffer)
1285 cpp_reader *pfile; /* Would be nice to be rid of this... */
1286 const cpp_token *token;
1287 unsigned char *buffer;
1289 switch (TOKEN_SPELL (token))
1291 case SPELL_OPERATOR:
1293 const unsigned char *spelling;
1294 unsigned char c;
1296 if (token->flags & DIGRAPH)
1297 spelling
1298 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1299 else if (token->flags & NAMED_OP)
1300 goto spell_ident;
1301 else
1302 spelling = TOKEN_NAME (token);
1304 while ((c = *spelling++) != '\0')
1305 *buffer++ = c;
1307 break;
1309 case SPELL_IDENT:
1310 spell_ident:
1311 memcpy (buffer, token->val.node->name, token->val.node->length);
1312 buffer += token->val.node->length;
1313 break;
1315 case SPELL_STRING:
1317 int left, right, tag;
1318 switch (token->type)
1320 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1321 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1322 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1323 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1324 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1325 default: left = '\0'; right = '\0'; tag = '\0'; break;
1327 if (tag) *buffer++ = tag;
1328 if (left) *buffer++ = left;
1329 memcpy (buffer, token->val.str.text, token->val.str.len);
1330 buffer += token->val.str.len;
1331 if (right) *buffer++ = right;
1333 break;
1335 case SPELL_CHAR:
1336 *buffer++ = token->val.c;
1337 break;
1339 case SPELL_NONE:
1340 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1341 break;
1344 return buffer;
1347 /* Returns a token as a null-terminated string. The string is
1348 temporary, and automatically freed later. Useful for diagnostics. */
1349 unsigned char *
1350 cpp_token_as_text (pfile, token)
1351 cpp_reader *pfile;
1352 const cpp_token *token;
1354 unsigned int len = cpp_token_len (token);
1355 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1357 end = cpp_spell_token (pfile, token, start);
1358 end[0] = '\0';
1360 return start;
1363 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1364 const char *
1365 cpp_type2name (type)
1366 enum cpp_ttype type;
1368 return (const char *) token_spellings[type].name;
1371 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1372 for efficiency - to avoid double-buffering. Also, outputs a space
1373 if PREV_WHITE is flagged. */
1374 void
1375 cpp_output_token (token, fp)
1376 const cpp_token *token;
1377 FILE *fp;
1379 if (token->flags & PREV_WHITE)
1380 putc (' ', fp);
1382 switch (TOKEN_SPELL (token))
1384 case SPELL_OPERATOR:
1386 const unsigned char *spelling;
1388 if (token->flags & DIGRAPH)
1389 spelling
1390 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1391 else if (token->flags & NAMED_OP)
1392 goto spell_ident;
1393 else
1394 spelling = TOKEN_NAME (token);
1396 ufputs (spelling, fp);
1398 break;
1400 spell_ident:
1401 case SPELL_IDENT:
1402 ufputs (token->val.node->name, fp);
1403 break;
1405 case SPELL_STRING:
1407 int left, right, tag;
1408 switch (token->type)
1410 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1411 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1412 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1413 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1414 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1415 default: left = '\0'; right = '\0'; tag = '\0'; break;
1417 if (tag) putc (tag, fp);
1418 if (left) putc (left, fp);
1419 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1420 if (right) putc (right, fp);
1422 break;
1424 case SPELL_CHAR:
1425 putc (token->val.c, fp);
1426 break;
1428 case SPELL_NONE:
1429 /* An error, most probably. */
1430 break;
1434 /* Compare two tokens. */
1436 _cpp_equiv_tokens (a, b)
1437 const cpp_token *a, *b;
1439 if (a->type == b->type && a->flags == b->flags)
1440 switch (TOKEN_SPELL (a))
1442 default: /* Keep compiler happy. */
1443 case SPELL_OPERATOR:
1444 return 1;
1445 case SPELL_CHAR:
1446 return a->val.c == b->val.c; /* Character. */
1447 case SPELL_NONE:
1448 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1449 case SPELL_IDENT:
1450 return a->val.node == b->val.node;
1451 case SPELL_STRING:
1452 return (a->val.str.len == b->val.str.len
1453 && !memcmp (a->val.str.text, b->val.str.text,
1454 a->val.str.len));
1457 return 0;
1460 /* Determine whether two tokens can be pasted together, and if so,
1461 what the resulting token is. Returns CPP_EOF if the tokens cannot
1462 be pasted, or the appropriate type for the merged token if they
1463 can. */
1464 enum cpp_ttype
1465 cpp_can_paste (pfile, token1, token2, digraph)
1466 cpp_reader * pfile;
1467 const cpp_token *token1, *token2;
1468 int* digraph;
1470 enum cpp_ttype a = token1->type, b = token2->type;
1471 int cxx = CPP_OPTION (pfile, cplusplus);
1473 /* Treat named operators as if they were ordinary NAMEs. */
1474 if (token1->flags & NAMED_OP)
1475 a = CPP_NAME;
1476 if (token2->flags & NAMED_OP)
1477 b = CPP_NAME;
1479 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1480 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1482 switch (a)
1484 case CPP_GREATER:
1485 if (b == a) return CPP_RSHIFT;
1486 if (b == CPP_QUERY && cxx) return CPP_MAX;
1487 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1488 break;
1489 case CPP_LESS:
1490 if (b == a) return CPP_LSHIFT;
1491 if (b == CPP_QUERY && cxx) return CPP_MIN;
1492 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1493 if (CPP_OPTION (pfile, digraphs))
1495 if (b == CPP_COLON)
1496 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1497 if (b == CPP_MOD)
1498 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1500 break;
1502 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1503 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1504 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1506 case CPP_MINUS:
1507 if (b == a) return CPP_MINUS_MINUS;
1508 if (b == CPP_GREATER) return CPP_DEREF;
1509 break;
1510 case CPP_COLON:
1511 if (b == a && cxx) return CPP_SCOPE;
1512 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1513 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1514 break;
1516 case CPP_MOD:
1517 if (CPP_OPTION (pfile, digraphs))
1519 if (b == CPP_GREATER)
1520 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1521 if (b == CPP_COLON)
1522 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1524 break;
1525 case CPP_DEREF:
1526 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1527 break;
1528 case CPP_DOT:
1529 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1530 if (b == CPP_NUMBER) return CPP_NUMBER;
1531 break;
1533 case CPP_HASH:
1534 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1535 /* %:%: digraph */
1536 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1537 break;
1539 case CPP_NAME:
1540 if (b == CPP_NAME) return CPP_NAME;
1541 if (b == CPP_NUMBER
1542 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1543 if (b == CPP_CHAR
1544 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1545 if (b == CPP_STRING
1546 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1547 break;
1549 case CPP_NUMBER:
1550 if (b == CPP_NUMBER) return CPP_NUMBER;
1551 if (b == CPP_NAME) return CPP_NUMBER;
1552 if (b == CPP_DOT) return CPP_NUMBER;
1553 /* Numbers cannot have length zero, so this is safe. */
1554 if ((b == CPP_PLUS || b == CPP_MINUS)
1555 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1556 return CPP_NUMBER;
1557 break;
1559 default:
1560 break;
1563 return CPP_EOF;
1566 /* Returns nonzero if a space should be inserted to avoid an
1567 accidental token paste for output. For simplicity, it is
1568 conservative, and occasionally advises a space where one is not
1569 needed, e.g. "." and ".2". */
1572 cpp_avoid_paste (pfile, token1, token2)
1573 cpp_reader *pfile;
1574 const cpp_token *token1, *token2;
1576 enum cpp_ttype a = token1->type, b = token2->type;
1577 cppchar_t c;
1579 if (token1->flags & NAMED_OP)
1580 a = CPP_NAME;
1581 if (token2->flags & NAMED_OP)
1582 b = CPP_NAME;
1584 c = EOF;
1585 if (token2->flags & DIGRAPH)
1586 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1587 else if (token_spellings[b].category == SPELL_OPERATOR)
1588 c = token_spellings[b].name[0];
1590 /* Quickly get everything that can paste with an '='. */
1591 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1592 return 1;
1594 switch (a)
1596 case CPP_GREATER: return c == '>' || c == '?';
1597 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1598 case CPP_PLUS: return c == '+';
1599 case CPP_MINUS: return c == '-' || c == '>';
1600 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1601 case CPP_MOD: return c == ':' || c == '>';
1602 case CPP_AND: return c == '&';
1603 case CPP_OR: return c == '|';
1604 case CPP_COLON: return c == ':' || c == '>';
1605 case CPP_DEREF: return c == '*';
1606 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1607 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1608 case CPP_NAME: return ((b == CPP_NUMBER
1609 && name_p (pfile, &token2->val.str))
1610 || b == CPP_NAME
1611 || b == CPP_CHAR || b == CPP_STRING); /* L */
1612 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1613 || c == '.' || c == '+' || c == '-');
1614 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1615 && token1->val.c == '@'
1616 && (b == CPP_NAME || b == CPP_STRING));
1617 default: break;
1620 return 0;
1623 /* Output all the remaining tokens on the current line, and a newline
1624 character, to FP. Leading whitespace is removed. */
1625 void
1626 cpp_output_line (pfile, fp)
1627 cpp_reader *pfile;
1628 FILE *fp;
1630 cpp_token token;
1632 cpp_get_token (pfile, &token);
1633 token.flags &= ~PREV_WHITE;
1634 while (token.type != CPP_EOF)
1636 cpp_output_token (&token, fp);
1637 cpp_get_token (pfile, &token);
1640 putc ('\n', fp);
1643 /* Memory pools. */
1645 struct dummy
1647 char c;
1648 union
1650 double d;
1651 int *p;
1652 } u;
1655 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1657 static int
1658 chunk_suitable (pool, chunk, size)
1659 cpp_pool *pool;
1660 cpp_chunk *chunk;
1661 unsigned int size;
1663 /* Being at least twice SIZE means we can use memcpy in
1664 _cpp_next_chunk rather than memmove. Besides, it's a good idea
1665 anyway. */
1666 return (chunk && pool->locked != chunk
1667 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1670 /* Returns the end of the new pool. PTR points to a char in the old
1671 pool, and is updated to point to the same char in the new pool. */
1672 unsigned char *
1673 _cpp_next_chunk (pool, len, ptr)
1674 cpp_pool *pool;
1675 unsigned int len;
1676 unsigned char **ptr;
1678 cpp_chunk *chunk = pool->cur->next;
1680 /* LEN is the minimum size we want in the new pool. */
1681 len += POOL_ROOM (pool);
1682 if (! chunk_suitable (pool, chunk, len))
1684 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1686 chunk->next = pool->cur->next;
1687 pool->cur->next = chunk;
1690 /* Update the pointer before changing chunk's front. */
1691 if (ptr)
1692 *ptr += chunk->base - POOL_FRONT (pool);
1694 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1695 chunk->front = chunk->base;
1697 pool->cur = chunk;
1698 return POOL_LIMIT (pool);
1701 static cpp_chunk *
1702 new_chunk (size)
1703 unsigned int size;
1705 unsigned char *base;
1706 cpp_chunk *result;
1708 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
1709 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1710 /* Put the chunk descriptor at the end. Then chunk overruns will
1711 cause obvious chaos. */
1712 result = (cpp_chunk *) (base + size);
1713 result->base = base;
1714 result->front = base;
1715 result->limit = base + size;
1716 result->next = 0;
1718 return result;
1721 void
1722 _cpp_init_pool (pool, size, align, temp)
1723 cpp_pool *pool;
1724 unsigned int size, align, temp;
1726 if (align == 0)
1727 align = DEFAULT_ALIGNMENT;
1728 if (align & (align - 1))
1729 abort ();
1730 pool->align = align;
1731 pool->cur = new_chunk (size);
1732 pool->locked = 0;
1733 pool->locks = 0;
1734 if (temp)
1735 pool->cur->next = pool->cur;
1738 void
1739 _cpp_lock_pool (pool)
1740 cpp_pool *pool;
1742 if (pool->locks++ == 0)
1743 pool->locked = pool->cur;
1746 void
1747 _cpp_unlock_pool (pool)
1748 cpp_pool *pool;
1750 if (--pool->locks == 0)
1751 pool->locked = 0;
1754 void
1755 _cpp_free_pool (pool)
1756 cpp_pool *pool;
1758 cpp_chunk *chunk = pool->cur, *next;
1762 next = chunk->next;
1763 free (chunk->base);
1764 chunk = next;
1766 while (chunk && chunk != pool->cur);
1769 /* Reserve LEN bytes from a memory pool. */
1770 unsigned char *
1771 _cpp_pool_reserve (pool, len)
1772 cpp_pool *pool;
1773 unsigned int len;
1775 len = POOL_ALIGN (len, pool->align);
1776 if (len > (unsigned int) POOL_ROOM (pool))
1777 _cpp_next_chunk (pool, len, 0);
1779 return POOL_FRONT (pool);
1782 /* Allocate LEN bytes from a memory pool. */
1783 unsigned char *
1784 _cpp_pool_alloc (pool, len)
1785 cpp_pool *pool;
1786 unsigned int len;
1788 unsigned char *result = _cpp_pool_reserve (pool, len);
1790 POOL_COMMIT (pool, len);
1791 return result;