* config/i386/i386.md (mmx_pinsrw): Output operands in correct
[official-gcc.git] / gcc / cpplex.c
blobfb5eec55929e568b70569cfbb28101b518a450e4
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
37 #include "config.h"
38 #include "system.h"
39 #include "cpplib.h"
40 #include "cpphash.h"
41 #include "symcat.h"
43 /* Tokens with SPELL_STRING store their spelling in the token list,
44 and it's length in the token->val.name.len. */
45 enum spell_type
47 SPELL_OPERATOR = 0,
48 SPELL_CHAR,
49 SPELL_IDENT,
50 SPELL_STRING,
51 SPELL_NONE
54 struct token_spelling
56 enum spell_type category;
57 const unsigned char *name;
60 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
61 U":>", U"<%", U"%>"};
63 #define OP(e, s) { SPELL_OPERATOR, U s },
64 #define TK(e, s) { s, U STRINGX (e) },
65 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
66 #undef OP
67 #undef TK
69 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
70 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
72 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
74 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
76 static int skip_block_comment PARAMS ((cpp_reader *));
77 static int skip_line_comment PARAMS ((cpp_reader *));
78 static void adjust_column PARAMS ((cpp_reader *));
79 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
80 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
81 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
82 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
83 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
84 static void unterminated PARAMS ((cpp_reader *, int));
85 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
87 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
88 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
89 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
91 static cpp_chunk *new_chunk PARAMS ((unsigned int));
92 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
94 /* Utility routine:
96 Compares, the token TOKEN to the NUL-terminated string STRING.
97 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
99 int
100 cpp_ideq (token, string)
101 const cpp_token *token;
102 const char *string;
104 if (token->type != CPP_NAME)
105 return 0;
107 return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
110 /* Call when meeting a newline. Returns the character after the newline
111 (or carriage-return newline combination), or EOF. */
112 static cppchar_t
113 handle_newline (buffer, newline_char)
114 cpp_buffer *buffer;
115 cppchar_t newline_char;
117 cppchar_t next = EOF;
119 buffer->col_adjust = 0;
120 buffer->lineno++;
121 buffer->line_base = buffer->cur;
123 /* Handle CR-LF and LF-CR combinations, get the next character. */
124 if (buffer->cur < buffer->rlimit)
126 next = *buffer->cur++;
127 if (next + newline_char == '\r' + '\n')
129 buffer->line_base = buffer->cur;
130 if (buffer->cur < buffer->rlimit)
131 next = *buffer->cur++;
132 else
133 next = EOF;
137 buffer->read_ahead = next;
138 return next;
141 /* Subroutine of skip_escaped_newlines; called when a trigraph is
142 encountered. It warns if necessary, and returns true if the
143 trigraph should be honoured. FROM_CHAR is the third character of a
144 trigraph, and presumed to be the previous character for position
145 reporting. */
146 static int
147 trigraph_ok (pfile, from_char)
148 cpp_reader *pfile;
149 cppchar_t from_char;
151 int accept = CPP_OPTION (pfile, trigraphs);
153 /* Don't warn about trigraphs in comments. */
154 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
156 cpp_buffer *buffer = pfile->buffer;
157 if (accept)
158 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
159 "trigraph ??%c converted to %c",
160 (int) from_char,
161 (int) _cpp_trigraph_map[from_char]);
162 else
163 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
164 "trigraph ??%c ignored", (int) from_char);
167 return accept;
170 /* Assumes local variables buffer and result. */
171 #define ACCEPT_CHAR(t) \
172 do { result->type = t; buffer->read_ahead = EOF; } while (0)
174 /* When we move to multibyte character sets, add to these something
175 that saves and restores the state of the multibyte conversion
176 library. This probably involves saving and restoring a "cookie".
177 In the case of glibc it is an 8-byte structure, so is not a high
178 overhead operation. In any case, it's out of the fast path. */
179 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
180 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
182 /* Skips any escaped newlines introduced by NEXT, which is either a
183 '?' or a '\\'. Returns the next character, which will also have
184 been placed in buffer->read_ahead. This routine performs
185 preprocessing stages 1 and 2 of the ISO C standard. */
186 static cppchar_t
187 skip_escaped_newlines (buffer, next)
188 cpp_buffer *buffer;
189 cppchar_t next;
191 /* Only do this if we apply stages 1 and 2. */
192 if (!buffer->from_stage3)
194 cppchar_t next1;
195 const unsigned char *saved_cur;
196 int space;
200 if (buffer->cur == buffer->rlimit)
201 break;
203 SAVE_STATE ();
204 if (next == '?')
206 next1 = *buffer->cur++;
207 if (next1 != '?' || buffer->cur == buffer->rlimit)
209 RESTORE_STATE ();
210 break;
213 next1 = *buffer->cur++;
214 if (!_cpp_trigraph_map[next1]
215 || !trigraph_ok (buffer->pfile, next1))
217 RESTORE_STATE ();
218 break;
221 /* We have a full trigraph here. */
222 next = _cpp_trigraph_map[next1];
223 if (next != '\\' || buffer->cur == buffer->rlimit)
224 break;
225 SAVE_STATE ();
228 /* We have a backslash, and room for at least one more character. */
229 space = 0;
232 next1 = *buffer->cur++;
233 if (!is_nvspace (next1))
234 break;
235 space = 1;
237 while (buffer->cur < buffer->rlimit);
239 if (!is_vspace (next1))
241 RESTORE_STATE ();
242 break;
245 if (space)
246 cpp_warning (buffer->pfile,
247 "backslash and newline separated by space");
249 next = handle_newline (buffer, next1);
250 if (next == EOF)
251 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
253 while (next == '\\' || next == '?');
256 buffer->read_ahead = next;
257 return next;
260 /* Obtain the next character, after trigraph conversion and skipping
261 an arbitrary string of escaped newlines. The common case of no
262 trigraphs or escaped newlines falls through quickly. */
263 static cppchar_t
264 get_effective_char (buffer)
265 cpp_buffer *buffer;
267 cppchar_t next = EOF;
269 if (buffer->cur < buffer->rlimit)
271 next = *buffer->cur++;
273 /* '?' can introduce trigraphs (and therefore backslash); '\\'
274 can introduce escaped newlines, which we want to skip, or
275 UCNs, which, depending upon lexer state, we will handle in
276 the future. */
277 if (next == '?' || next == '\\')
278 next = skip_escaped_newlines (buffer, next);
281 buffer->read_ahead = next;
282 return next;
285 /* Skip a C-style block comment. We find the end of the comment by
286 seeing if an asterisk is before every '/' we encounter. Returns
287 non-zero if comment terminated by EOF, zero otherwise. */
288 static int
289 skip_block_comment (pfile)
290 cpp_reader *pfile;
292 cpp_buffer *buffer = pfile->buffer;
293 cppchar_t c = EOF, prevc = EOF;
295 pfile->state.lexing_comment = 1;
296 while (buffer->cur != buffer->rlimit)
298 prevc = c, c = *buffer->cur++;
300 next_char:
301 /* FIXME: For speed, create a new character class of characters
302 of interest inside block comments. */
303 if (c == '?' || c == '\\')
304 c = skip_escaped_newlines (buffer, c);
306 /* People like decorating comments with '*', so check for '/'
307 instead for efficiency. */
308 if (c == '/')
310 if (prevc == '*')
311 break;
313 /* Warn about potential nested comments, but not if the '/'
314 comes immediately before the true comment delimeter.
315 Don't bother to get it right across escaped newlines. */
316 if (CPP_OPTION (pfile, warn_comments)
317 && buffer->cur != buffer->rlimit)
319 prevc = c, c = *buffer->cur++;
320 if (c == '*' && buffer->cur != buffer->rlimit)
322 prevc = c, c = *buffer->cur++;
323 if (c != '/')
324 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
325 CPP_BUF_COL (buffer),
326 "\"/*\" within comment");
328 goto next_char;
331 else if (is_vspace (c))
333 prevc = c, c = handle_newline (buffer, c);
334 goto next_char;
336 else if (c == '\t')
337 adjust_column (pfile);
340 pfile->state.lexing_comment = 0;
341 buffer->read_ahead = EOF;
342 return c != '/' || prevc != '*';
345 /* Skip a C++ line comment. Handles escaped newlines. Returns
346 non-zero if a multiline comment. The following new line, if any,
347 is left in buffer->read_ahead. */
348 static int
349 skip_line_comment (pfile)
350 cpp_reader *pfile;
352 cpp_buffer *buffer = pfile->buffer;
353 unsigned int orig_lineno = buffer->lineno;
354 cppchar_t c;
356 pfile->state.lexing_comment = 1;
359 c = EOF;
360 if (buffer->cur == buffer->rlimit)
361 break;
363 c = *buffer->cur++;
364 if (c == '?' || c == '\\')
365 c = skip_escaped_newlines (buffer, c);
367 while (!is_vspace (c));
369 pfile->state.lexing_comment = 0;
370 buffer->read_ahead = c; /* Leave any newline for caller. */
371 return orig_lineno != buffer->lineno;
374 /* pfile->buffer->cur is one beyond the \t character. Update
375 col_adjust so we track the column correctly. */
376 static void
377 adjust_column (pfile)
378 cpp_reader *pfile;
380 cpp_buffer *buffer = pfile->buffer;
381 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
383 /* Round it up to multiple of the tabstop, but subtract 1 since the
384 tab itself occupies a character position. */
385 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
386 - col % CPP_OPTION (pfile, tabstop)) - 1;
389 /* Skips whitespace, saving the next non-whitespace character.
390 Adjusts pfile->col_adjust to account for tabs. Without this,
391 tokens might be assigned an incorrect column. */
392 static void
393 skip_whitespace (pfile, c)
394 cpp_reader *pfile;
395 cppchar_t c;
397 cpp_buffer *buffer = pfile->buffer;
398 unsigned int warned = 0;
402 /* Horizontal space always OK. */
403 if (c == ' ')
405 else if (c == '\t')
406 adjust_column (pfile);
407 /* Just \f \v or \0 left. */
408 else if (c == '\0')
410 if (!warned)
412 cpp_warning (pfile, "null character(s) ignored");
413 warned = 1;
416 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
417 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
418 CPP_BUF_COL (buffer),
419 "%s in preprocessing directive",
420 c == '\f' ? "form feed" : "vertical tab");
422 c = EOF;
423 if (buffer->cur == buffer->rlimit)
424 break;
425 c = *buffer->cur++;
427 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
428 while (is_nvspace (c));
430 /* Remember the next character. */
431 buffer->read_ahead = c;
434 /* See if the characters of a number token are valid in a name (no
435 '.', '+' or '-'). */
436 static int
437 name_p (pfile, string)
438 cpp_reader *pfile;
439 const cpp_string *string;
441 unsigned int i;
443 for (i = 0; i < string->len; i++)
444 if (!is_idchar (string->text[i]))
445 return 0;
447 return 1;
450 /* Parse an identifier, skipping embedded backslash-newlines.
451 Calculate the hash value of the token while parsing, for improved
452 performance. The hashing algorithm *must* match cpp_lookup(). */
454 static cpp_hashnode *
455 parse_identifier (pfile, c)
456 cpp_reader *pfile;
457 cppchar_t c;
459 cpp_hashnode *result;
460 cpp_buffer *buffer = pfile->buffer;
461 unsigned char *dest, *limit;
462 unsigned int r = 0, saw_dollar = 0;
464 dest = POOL_FRONT (&pfile->ident_pool);
465 limit = POOL_LIMIT (&pfile->ident_pool);
471 /* Need room for terminating null. */
472 if (dest + 1 >= limit)
473 limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
475 *dest++ = c;
476 r = HASHSTEP (r, c);
478 if (c == '$')
479 saw_dollar++;
481 c = EOF;
482 if (buffer->cur == buffer->rlimit)
483 break;
485 c = *buffer->cur++;
487 while (is_idchar (c));
489 /* Potential escaped newline? */
490 if (c != '?' && c != '\\')
491 break;
492 c = skip_escaped_newlines (buffer, c);
494 while (is_idchar (c));
496 /* Remember the next character. */
497 buffer->read_ahead = c;
499 /* $ is not a identifier character in the standard, but is commonly
500 accepted as an extension. Don't warn about it in skipped
501 conditional blocks. */
502 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
503 cpp_pedwarn (pfile, "'$' character(s) in identifier");
505 /* Identifiers are null-terminated. */
506 *dest = '\0';
508 /* This routine commits the memory if necessary. */
509 result = _cpp_lookup_with_hash (pfile,
510 dest - POOL_FRONT (&pfile->ident_pool), r);
512 /* Some identifiers require diagnostics when lexed. */
513 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
515 /* It is allowed to poison the same identifier twice. */
516 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
517 cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
519 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
520 replacement list of a variable-arguments macro. */
521 if (result == pfile->spec_nodes.n__VA_ARGS__
522 && !pfile->state.va_args_ok)
523 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variable-argument macro");
526 return result;
529 /* Parse a number, skipping embedded backslash-newlines. */
530 static void
531 parse_number (pfile, number, c, leading_period)
532 cpp_reader *pfile;
533 cpp_string *number;
534 cppchar_t c;
535 int leading_period;
537 cpp_buffer *buffer = pfile->buffer;
538 cpp_pool *pool = pfile->string_pool;
539 unsigned char *dest, *limit;
541 dest = POOL_FRONT (pool);
542 limit = POOL_LIMIT (pool);
544 /* Place a leading period. */
545 if (leading_period)
547 if (dest >= limit)
548 limit = _cpp_next_chunk (pool, 0, &dest);
549 *dest++ = '.';
556 /* Need room for terminating null. */
557 if (dest + 1 >= limit)
558 limit = _cpp_next_chunk (pool, 0, &dest);
559 *dest++ = c;
561 c = EOF;
562 if (buffer->cur == buffer->rlimit)
563 break;
565 c = *buffer->cur++;
567 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
569 /* Potential escaped newline? */
570 if (c != '?' && c != '\\')
571 break;
572 c = skip_escaped_newlines (buffer, c);
574 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
576 /* Remember the next character. */
577 buffer->read_ahead = c;
579 /* Null-terminate the number. */
580 *dest = '\0';
582 number->text = POOL_FRONT (pool);
583 number->len = dest - number->text;
584 POOL_COMMIT (pool, number->len + 1);
587 /* Subroutine of parse_string. Emits error for unterminated strings. */
588 static void
589 unterminated (pfile, term)
590 cpp_reader *pfile;
591 int term;
593 cpp_error (pfile, "missing terminating %c character", term);
595 if (term == '\"' && pfile->mlstring_pos.line
596 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
598 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
599 pfile->mlstring_pos.col,
600 "possible start of unterminated string literal");
601 pfile->mlstring_pos.line = 0;
605 /* Subroutine of parse_string. */
606 static int
607 unescaped_terminator_p (pfile, dest)
608 cpp_reader *pfile;
609 const unsigned char *dest;
611 const unsigned char *start, *temp;
613 /* In #include-style directives, terminators are not escapeable. */
614 if (pfile->state.angled_headers)
615 return 1;
617 start = POOL_FRONT (pfile->string_pool);
619 /* An odd number of consecutive backslashes represents an escaped
620 terminator. */
621 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
624 return ((dest - temp) & 1) == 0;
627 /* Parses a string, character constant, or angle-bracketed header file
628 name. Handles embedded trigraphs and escaped newlines.
630 Multi-line strings are allowed, but they are deprecated within
631 directives. */
632 static void
633 parse_string (pfile, token, terminator)
634 cpp_reader *pfile;
635 cpp_token *token;
636 cppchar_t terminator;
638 cpp_buffer *buffer = pfile->buffer;
639 cpp_pool *pool = pfile->string_pool;
640 unsigned char *dest, *limit;
641 cppchar_t c;
642 unsigned int nulls = 0;
644 dest = POOL_FRONT (pool);
645 limit = POOL_LIMIT (pool);
647 for (;;)
649 if (buffer->cur == buffer->rlimit)
651 c = EOF;
652 unterminated (pfile, terminator);
653 break;
655 c = *buffer->cur++;
657 have_char:
658 /* Handle trigraphs, escaped newlines etc. */
659 if (c == '?' || c == '\\')
660 c = skip_escaped_newlines (buffer, c);
662 if (c == terminator && unescaped_terminator_p (pfile, dest))
664 c = EOF;
665 break;
667 else if (is_vspace (c))
669 /* In assembly language, silently terminate string and
670 character literals at end of line. This is a kludge
671 around not knowing where comments are. */
672 if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
673 break;
675 /* Character constants and header names may not extend over
676 multiple lines. In Standard C, neither may strings.
677 Unfortunately, we accept multiline strings as an
678 extension, except in #include family directives. */
679 if (terminator != '"' || pfile->state.angled_headers)
681 unterminated (pfile, terminator);
682 break;
685 if (pfile->mlstring_pos.line == 0)
687 pfile->mlstring_pos = pfile->lexer_pos;
688 if (CPP_PEDANTIC (pfile))
689 cpp_pedwarn (pfile, "multi-line string constant");
692 handle_newline (buffer, c); /* Stores to read_ahead. */
693 c = '\n';
695 else if (c == '\0')
697 if (nulls++ == 0)
698 cpp_warning (pfile, "null character(s) preserved in literal");
701 /* No terminating null for strings - they could contain nulls. */
702 if (dest >= limit)
703 limit = _cpp_next_chunk (pool, 0, &dest);
704 *dest++ = c;
706 /* If we had a new line, the next character is in read_ahead. */
707 if (c != '\n')
708 continue;
709 c = buffer->read_ahead;
710 if (c != EOF)
711 goto have_char;
714 /* Remember the next character. */
715 buffer->read_ahead = c;
717 token->val.str.text = POOL_FRONT (pool);
718 token->val.str.len = dest - token->val.str.text;
719 POOL_COMMIT (pool, token->val.str.len);
722 /* The stored comment includes the comment start and any terminator. */
723 static void
724 save_comment (pfile, token, from)
725 cpp_reader *pfile;
726 cpp_token *token;
727 const unsigned char *from;
729 unsigned char *buffer;
730 unsigned int len;
732 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
733 /* C++ comments probably (not definitely) have moved past a new
734 line, which we don't want to save in the comment. */
735 if (pfile->buffer->read_ahead != EOF)
736 len--;
737 buffer = _cpp_pool_alloc (pfile->string_pool, len);
739 token->type = CPP_COMMENT;
740 token->val.str.len = len;
741 token->val.str.text = buffer;
743 buffer[0] = '/';
744 memcpy (buffer + 1, from, len - 1);
747 /* Subroutine of lex_token to handle '%'. A little tricky, since we
748 want to avoid stepping back when lexing %:%X. */
749 static void
750 lex_percent (buffer, result)
751 cpp_buffer *buffer;
752 cpp_token *result;
754 cppchar_t c;
756 result->type = CPP_MOD;
757 /* Parsing %:%X could leave an extra character. */
758 if (buffer->extra_char == EOF)
759 c = get_effective_char (buffer);
760 else
762 c = buffer->read_ahead = buffer->extra_char;
763 buffer->extra_char = EOF;
766 if (c == '=')
767 ACCEPT_CHAR (CPP_MOD_EQ);
768 else if (CPP_OPTION (buffer->pfile, digraphs))
770 if (c == ':')
772 result->flags |= DIGRAPH;
773 ACCEPT_CHAR (CPP_HASH);
774 if (get_effective_char (buffer) == '%')
776 buffer->extra_char = get_effective_char (buffer);
777 if (buffer->extra_char == ':')
779 buffer->extra_char = EOF;
780 ACCEPT_CHAR (CPP_PASTE);
782 else
783 /* We'll catch the extra_char when we're called back. */
784 buffer->read_ahead = '%';
787 else if (c == '>')
789 result->flags |= DIGRAPH;
790 ACCEPT_CHAR (CPP_CLOSE_BRACE);
795 /* Subroutine of lex_token to handle '.'. This is tricky, since we
796 want to avoid stepping back when lexing '...' or '.123'. In the
797 latter case we should also set a flag for parse_number. */
798 static void
799 lex_dot (pfile, result)
800 cpp_reader *pfile;
801 cpp_token *result;
803 cpp_buffer *buffer = pfile->buffer;
804 cppchar_t c;
806 /* Parsing ..X could leave an extra character. */
807 if (buffer->extra_char == EOF)
808 c = get_effective_char (buffer);
809 else
811 c = buffer->read_ahead = buffer->extra_char;
812 buffer->extra_char = EOF;
815 /* All known character sets have 0...9 contiguous. */
816 if (c >= '0' && c <= '9')
818 result->type = CPP_NUMBER;
819 parse_number (pfile, &result->val.str, c, 1);
821 else
823 result->type = CPP_DOT;
824 if (c == '.')
826 buffer->extra_char = get_effective_char (buffer);
827 if (buffer->extra_char == '.')
829 buffer->extra_char = EOF;
830 ACCEPT_CHAR (CPP_ELLIPSIS);
832 else
833 /* We'll catch the extra_char when we're called back. */
834 buffer->read_ahead = '.';
836 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
837 ACCEPT_CHAR (CPP_DOT_STAR);
841 void
842 _cpp_lex_token (pfile, result)
843 cpp_reader *pfile;
844 cpp_token *result;
846 cppchar_t c;
847 cpp_buffer *buffer;
848 const unsigned char *comment_start;
849 unsigned char was_skip_newlines = pfile->state.skip_newlines;
850 unsigned char newline_in_args = 0;
852 done_directive:
853 buffer = pfile->buffer;
854 pfile->state.skip_newlines = 0;
855 result->flags = 0;
856 next_char:
857 pfile->lexer_pos.line = buffer->lineno;
858 next_char2:
859 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
861 c = buffer->read_ahead;
862 if (c == EOF && buffer->cur < buffer->rlimit)
864 c = *buffer->cur++;
865 pfile->lexer_pos.col++;
868 do_switch:
869 buffer->read_ahead = EOF;
870 switch (c)
872 case EOF:
873 /* Non-empty files should end in a newline. Ignore for command
874 line and _Pragma buffers. */
875 if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
876 cpp_pedwarn (pfile, "no newline at end of file");
877 pfile->state.skip_newlines = 1;
878 result->type = CPP_EOF;
879 break;
881 case ' ': case '\t': case '\f': case '\v': case '\0':
882 skip_whitespace (pfile, c);
883 result->flags |= PREV_WHITE;
884 goto next_char2;
886 case '\n': case '\r':
887 /* Don't let directives spill over to the next line. */
888 if (pfile->state.in_directive)
889 buffer->read_ahead = c;
890 else
892 handle_newline (buffer, c);
894 pfile->lexer_pos.output_line = buffer->lineno;
896 /* Skip newlines in macro arguments (except in directives). */
897 if (pfile->state.parsing_args)
899 /* Set the whitespace flag. */
900 newline_in_args = 1;
901 result->flags |= PREV_WHITE;
902 goto next_char;
905 if (was_skip_newlines)
907 /* Clear any whitespace flag. */
908 result->flags &= ~PREV_WHITE;
909 goto next_char;
913 /* Next we're at BOL, so skip new lines. */
914 pfile->state.skip_newlines = 1;
915 result->type = CPP_EOF;
916 break;
918 case '?':
919 case '\\':
920 /* These could start an escaped newline, or '?' a trigraph. Let
921 skip_escaped_newlines do all the work. */
923 unsigned int lineno = buffer->lineno;
925 c = skip_escaped_newlines (buffer, c);
926 if (lineno != buffer->lineno)
927 /* We had at least one escaped newline of some sort, and the
928 next character is in buffer->read_ahead. Update the
929 token's line and column. */
930 goto next_char;
932 /* We are either the original '?' or '\\', or a trigraph. */
933 result->type = CPP_QUERY;
934 buffer->read_ahead = EOF;
935 if (c == '\\')
936 goto random_char;
937 else if (c != '?')
938 goto do_switch;
940 break;
942 case '0': case '1': case '2': case '3': case '4':
943 case '5': case '6': case '7': case '8': case '9':
944 result->type = CPP_NUMBER;
945 parse_number (pfile, &result->val.str, c, 0);
946 break;
948 case '$':
949 if (!CPP_OPTION (pfile, dollars_in_ident))
950 goto random_char;
951 /* Fall through... */
953 case '_':
954 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
955 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
956 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
957 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
958 case 'y': case 'z':
959 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
960 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
961 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
962 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
963 case 'Y': case 'Z':
964 result->type = CPP_NAME;
965 result->val.node = parse_identifier (pfile, c);
967 /* 'L' may introduce wide characters or strings. */
968 if (result->val.node == pfile->spec_nodes.n_L)
970 c = buffer->read_ahead; /* For make_string. */
971 if (c == '\'' || c == '"')
973 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
974 goto make_string;
977 /* Convert named operators to their proper types. */
978 else if (result->val.node->flags & NODE_OPERATOR)
980 result->flags |= NAMED_OP;
981 result->type = result->val.node->value.operator;
983 break;
985 case '\'':
986 case '"':
987 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
988 make_string:
989 parse_string (pfile, result, c);
990 break;
992 case '/':
993 /* A potential block or line comment. */
994 comment_start = buffer->cur;
995 result->type = CPP_DIV;
996 c = get_effective_char (buffer);
997 if (c == '=')
998 ACCEPT_CHAR (CPP_DIV_EQ);
999 if (c != '/' && c != '*')
1000 break;
1002 if (c == '*')
1004 if (skip_block_comment (pfile))
1005 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1006 pfile->lexer_pos.col,
1007 "unterminated comment");
1009 else
1011 if (!CPP_OPTION (pfile, cplusplus_comments)
1012 && !CPP_IN_SYSTEM_HEADER (pfile))
1013 break;
1015 /* We silently allow C++ comments in system headers,
1016 irrespective of conformance mode, because lots of
1017 broken systems do that and trying to clean it up in
1018 fixincludes is a nightmare. */
1019 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1020 && ! buffer->warned_cplusplus_comments)
1022 cpp_pedwarn (pfile,
1023 "C++ style comments are not allowed in ISO C89");
1024 cpp_pedwarn (pfile,
1025 "(this will be reported only once per input file)");
1026 buffer->warned_cplusplus_comments = 1;
1029 /* Skip_line_comment updates buffer->read_ahead. */
1030 if (skip_line_comment (pfile))
1031 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1032 pfile->lexer_pos.col,
1033 "multi-line comment");
1036 /* Skipping the comment has updated buffer->read_ahead. */
1037 if (!pfile->state.save_comments)
1039 result->flags |= PREV_WHITE;
1040 goto next_char;
1043 /* Save the comment as a token in its own right. */
1044 save_comment (pfile, result, comment_start);
1045 break;
1047 case '<':
1048 if (pfile->state.angled_headers)
1050 result->type = CPP_HEADER_NAME;
1051 c = '>'; /* terminator. */
1052 goto make_string;
1055 result->type = CPP_LESS;
1056 c = get_effective_char (buffer);
1057 if (c == '=')
1058 ACCEPT_CHAR (CPP_LESS_EQ);
1059 else if (c == '<')
1061 ACCEPT_CHAR (CPP_LSHIFT);
1062 if (get_effective_char (buffer) == '=')
1063 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1065 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1067 ACCEPT_CHAR (CPP_MIN);
1068 if (get_effective_char (buffer) == '=')
1069 ACCEPT_CHAR (CPP_MIN_EQ);
1071 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1073 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1074 result->flags |= DIGRAPH;
1076 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1078 ACCEPT_CHAR (CPP_OPEN_BRACE);
1079 result->flags |= DIGRAPH;
1081 break;
1083 case '>':
1084 result->type = CPP_GREATER;
1085 c = get_effective_char (buffer);
1086 if (c == '=')
1087 ACCEPT_CHAR (CPP_GREATER_EQ);
1088 else if (c == '>')
1090 ACCEPT_CHAR (CPP_RSHIFT);
1091 if (get_effective_char (buffer) == '=')
1092 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1094 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1096 ACCEPT_CHAR (CPP_MAX);
1097 if (get_effective_char (buffer) == '=')
1098 ACCEPT_CHAR (CPP_MAX_EQ);
1100 break;
1102 case '%':
1103 lex_percent (buffer, result);
1104 if (result->type == CPP_HASH)
1105 goto do_hash;
1106 break;
1108 case '.':
1109 lex_dot (pfile, result);
1110 break;
1112 case '+':
1113 result->type = CPP_PLUS;
1114 c = get_effective_char (buffer);
1115 if (c == '=')
1116 ACCEPT_CHAR (CPP_PLUS_EQ);
1117 else if (c == '+')
1118 ACCEPT_CHAR (CPP_PLUS_PLUS);
1119 break;
1121 case '-':
1122 result->type = CPP_MINUS;
1123 c = get_effective_char (buffer);
1124 if (c == '>')
1126 ACCEPT_CHAR (CPP_DEREF);
1127 if (CPP_OPTION (pfile, cplusplus)
1128 && get_effective_char (buffer) == '*')
1129 ACCEPT_CHAR (CPP_DEREF_STAR);
1131 else if (c == '=')
1132 ACCEPT_CHAR (CPP_MINUS_EQ);
1133 else if (c == '-')
1134 ACCEPT_CHAR (CPP_MINUS_MINUS);
1135 break;
1137 case '*':
1138 result->type = CPP_MULT;
1139 if (get_effective_char (buffer) == '=')
1140 ACCEPT_CHAR (CPP_MULT_EQ);
1141 break;
1143 case '=':
1144 result->type = CPP_EQ;
1145 if (get_effective_char (buffer) == '=')
1146 ACCEPT_CHAR (CPP_EQ_EQ);
1147 break;
1149 case '!':
1150 result->type = CPP_NOT;
1151 if (get_effective_char (buffer) == '=')
1152 ACCEPT_CHAR (CPP_NOT_EQ);
1153 break;
1155 case '&':
1156 result->type = CPP_AND;
1157 c = get_effective_char (buffer);
1158 if (c == '=')
1159 ACCEPT_CHAR (CPP_AND_EQ);
1160 else if (c == '&')
1161 ACCEPT_CHAR (CPP_AND_AND);
1162 break;
1164 case '#':
1165 c = buffer->extra_char; /* Can be set by error condition below. */
1166 if (c != EOF)
1168 buffer->read_ahead = c;
1169 buffer->extra_char = EOF;
1171 else
1172 c = get_effective_char (buffer);
1174 if (c == '#')
1175 ACCEPT_CHAR (CPP_PASTE);
1176 else
1178 result->type = CPP_HASH;
1179 do_hash:
1180 if (newline_in_args)
1182 /* 6.10.3 paragraph 11: If there are sequences of
1183 preprocessing tokens within the list of arguments that
1184 would otherwise act as preprocessing directives, the
1185 behavior is undefined.
1187 This implementation will report a hard error, terminate
1188 the macro invocation, and proceed to process the
1189 directive. */
1190 cpp_error (pfile,
1191 "directives may not be used inside a macro argument");
1193 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1194 buffer->extra_char = buffer->read_ahead;
1195 buffer->read_ahead = '#';
1196 pfile->state.skip_newlines = 1;
1197 result->type = CPP_EOF;
1199 /* Get whitespace right - newline_in_args sets it. */
1200 if (pfile->lexer_pos.col == 1)
1201 result->flags &= ~PREV_WHITE;
1203 else if (was_skip_newlines)
1205 /* This is the hash introducing a directive. */
1206 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1207 goto done_directive; /* was_skip_newlines still 1. */
1208 /* This is in fact an assembler #. */
1211 break;
1213 case '|':
1214 result->type = CPP_OR;
1215 c = get_effective_char (buffer);
1216 if (c == '=')
1217 ACCEPT_CHAR (CPP_OR_EQ);
1218 else if (c == '|')
1219 ACCEPT_CHAR (CPP_OR_OR);
1220 break;
1222 case '^':
1223 result->type = CPP_XOR;
1224 if (get_effective_char (buffer) == '=')
1225 ACCEPT_CHAR (CPP_XOR_EQ);
1226 break;
1228 case ':':
1229 result->type = CPP_COLON;
1230 c = get_effective_char (buffer);
1231 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1232 ACCEPT_CHAR (CPP_SCOPE);
1233 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1235 result->flags |= DIGRAPH;
1236 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1238 break;
1240 case '~': result->type = CPP_COMPL; break;
1241 case ',': result->type = CPP_COMMA; break;
1242 case '(': result->type = CPP_OPEN_PAREN; break;
1243 case ')': result->type = CPP_CLOSE_PAREN; break;
1244 case '[': result->type = CPP_OPEN_SQUARE; break;
1245 case ']': result->type = CPP_CLOSE_SQUARE; break;
1246 case '{': result->type = CPP_OPEN_BRACE; break;
1247 case '}': result->type = CPP_CLOSE_BRACE; break;
1248 case ';': result->type = CPP_SEMICOLON; break;
1250 case '@':
1251 if (CPP_OPTION (pfile, objc))
1253 /* In Objective C, '@' may begin keywords or strings, like
1254 @keyword or @"string". It would be nice to call
1255 get_effective_char here and test the result. However, we
1256 would then need to pass 2 characters to parse_identifier,
1257 making it ugly and slowing down its main loop. Instead,
1258 we assume we have an identifier, and recover if not. */
1259 result->type = CPP_NAME;
1260 result->val.node = parse_identifier (pfile, c);
1261 if (result->val.node->length != 1)
1262 break;
1264 /* OK, so it wasn't an identifier. Maybe a string? */
1265 if (buffer->read_ahead == '"')
1267 c = '"';
1268 ACCEPT_CHAR (CPP_OSTRING);
1269 goto make_string;
1272 goto random_char;
1274 random_char:
1275 default:
1276 result->type = CPP_OTHER;
1277 result->val.c = c;
1278 break;
1282 /* An upper bound on the number of bytes needed to spell a token,
1283 including preceding whitespace. */
1284 unsigned int
1285 cpp_token_len (token)
1286 const cpp_token *token;
1288 unsigned int len;
1290 switch (TOKEN_SPELL (token))
1292 default: len = 0; break;
1293 case SPELL_STRING: len = token->val.str.len; break;
1294 case SPELL_IDENT: len = token->val.node->length; break;
1296 /* 1 for whitespace, 4 for comment delimeters. */
1297 return len + 5;
1300 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1301 already contain the enough space to hold the token's spelling.
1302 Returns a pointer to the character after the last character
1303 written. */
1304 unsigned char *
1305 cpp_spell_token (pfile, token, buffer)
1306 cpp_reader *pfile; /* Would be nice to be rid of this... */
1307 const cpp_token *token;
1308 unsigned char *buffer;
1310 switch (TOKEN_SPELL (token))
1312 case SPELL_OPERATOR:
1314 const unsigned char *spelling;
1315 unsigned char c;
1317 if (token->flags & DIGRAPH)
1318 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1319 else if (token->flags & NAMED_OP)
1320 goto spell_ident;
1321 else
1322 spelling = TOKEN_NAME (token);
1324 while ((c = *spelling++) != '\0')
1325 *buffer++ = c;
1327 break;
1329 case SPELL_IDENT:
1330 spell_ident:
1331 memcpy (buffer, token->val.node->name, token->val.node->length);
1332 buffer += token->val.node->length;
1333 break;
1335 case SPELL_STRING:
1337 int left, right, tag;
1338 switch (token->type)
1340 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1341 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1342 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1343 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1344 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1345 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1346 default: left = '\0'; right = '\0'; tag = '\0'; break;
1348 if (tag) *buffer++ = tag;
1349 if (left) *buffer++ = left;
1350 memcpy (buffer, token->val.str.text, token->val.str.len);
1351 buffer += token->val.str.len;
1352 if (right) *buffer++ = right;
1354 break;
1356 case SPELL_CHAR:
1357 *buffer++ = token->val.c;
1358 break;
1360 case SPELL_NONE:
1361 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1362 break;
1365 return buffer;
1368 /* Returns a token as a null-terminated string. The string is
1369 temporary, and automatically freed later. Useful for diagnostics. */
1370 unsigned char *
1371 cpp_token_as_text (pfile, token)
1372 cpp_reader *pfile;
1373 const cpp_token *token;
1375 unsigned int len = cpp_token_len (token);
1376 unsigned char *start = _cpp_pool_alloc (&pfile->temp_string_pool, len), *end;
1378 end = cpp_spell_token (pfile, token, start);
1379 end[0] = '\0';
1381 return start;
1384 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1385 const char *
1386 cpp_type2name (type)
1387 enum cpp_ttype type;
1389 return (const char *) token_spellings[type].name;
1392 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1393 for efficiency - to avoid double-buffering. Also, outputs a space
1394 if PREV_WHITE is flagged. */
1395 void
1396 cpp_output_token (token, fp)
1397 const cpp_token *token;
1398 FILE *fp;
1400 if (token->flags & PREV_WHITE)
1401 putc (' ', fp);
1403 switch (TOKEN_SPELL (token))
1405 case SPELL_OPERATOR:
1407 const unsigned char *spelling;
1409 if (token->flags & DIGRAPH)
1410 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1411 else if (token->flags & NAMED_OP)
1412 goto spell_ident;
1413 else
1414 spelling = TOKEN_NAME (token);
1416 ufputs (spelling, fp);
1418 break;
1420 spell_ident:
1421 case SPELL_IDENT:
1422 ufputs (token->val.node->name, fp);
1423 break;
1425 case SPELL_STRING:
1427 int left, right, tag;
1428 switch (token->type)
1430 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1431 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1432 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1433 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1434 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1435 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1436 default: left = '\0'; right = '\0'; tag = '\0'; break;
1438 if (tag) putc (tag, fp);
1439 if (left) putc (left, fp);
1440 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1441 if (right) putc (right, fp);
1443 break;
1445 case SPELL_CHAR:
1446 putc (token->val.c, fp);
1447 break;
1449 case SPELL_NONE:
1450 /* An error, most probably. */
1451 break;
1455 /* Compare two tokens. */
1457 _cpp_equiv_tokens (a, b)
1458 const cpp_token *a, *b;
1460 if (a->type == b->type && a->flags == b->flags)
1461 switch (TOKEN_SPELL (a))
1463 default: /* Keep compiler happy. */
1464 case SPELL_OPERATOR:
1465 return 1;
1466 case SPELL_CHAR:
1467 return a->val.c == b->val.c; /* Character. */
1468 case SPELL_NONE:
1469 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1470 case SPELL_IDENT:
1471 return a->val.node == b->val.node;
1472 case SPELL_STRING:
1473 return (a->val.str.len == b->val.str.len
1474 && !memcmp (a->val.str.text, b->val.str.text,
1475 a->val.str.len));
1478 return 0;
1481 #if 0
1482 /* Compare two token lists. */
1484 _cpp_equiv_toklists (a, b)
1485 const struct toklist *a, *b;
1487 unsigned int i, count;
1489 count = a->limit - a->first;
1490 if (count != (b->limit - b->first))
1491 return 0;
1493 for (i = 0; i < count; i++)
1494 if (! _cpp_equiv_tokens (&a->first[i], &b->first[i]))
1495 return 0;
1497 return 1;
1499 #endif
1501 /* Determine whether two tokens can be pasted together, and if so,
1502 what the resulting token is. Returns CPP_EOF if the tokens cannot
1503 be pasted, or the appropriate type for the merged token if they
1504 can. */
1505 enum cpp_ttype
1506 cpp_can_paste (pfile, token1, token2, digraph)
1507 cpp_reader * pfile;
1508 const cpp_token *token1, *token2;
1509 int* digraph;
1511 enum cpp_ttype a = token1->type, b = token2->type;
1512 int cxx = CPP_OPTION (pfile, cplusplus);
1514 /* Treat named operators as if they were ordinary NAMEs. */
1515 if (token1->flags & NAMED_OP)
1516 a = CPP_NAME;
1517 if (token2->flags & NAMED_OP)
1518 b = CPP_NAME;
1520 if (a <= CPP_LAST_EQ && b == CPP_EQ)
1521 return a + (CPP_EQ_EQ - CPP_EQ);
1523 switch (a)
1525 case CPP_GREATER:
1526 if (b == a) return CPP_RSHIFT;
1527 if (b == CPP_QUERY && cxx) return CPP_MAX;
1528 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1529 break;
1530 case CPP_LESS:
1531 if (b == a) return CPP_LSHIFT;
1532 if (b == CPP_QUERY && cxx) return CPP_MIN;
1533 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1534 if (CPP_OPTION (pfile, digraphs))
1536 if (b == CPP_COLON)
1537 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1538 if (b == CPP_MOD)
1539 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1541 break;
1543 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1544 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1545 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1547 case CPP_MINUS:
1548 if (b == a) return CPP_MINUS_MINUS;
1549 if (b == CPP_GREATER) return CPP_DEREF;
1550 break;
1551 case CPP_COLON:
1552 if (b == a && cxx) return CPP_SCOPE;
1553 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1554 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1555 break;
1557 case CPP_MOD:
1558 if (CPP_OPTION (pfile, digraphs))
1560 if (b == CPP_GREATER)
1561 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1562 if (b == CPP_COLON)
1563 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1565 break;
1566 case CPP_DEREF:
1567 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1568 break;
1569 case CPP_DOT:
1570 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1571 if (b == CPP_NUMBER) return CPP_NUMBER;
1572 break;
1574 case CPP_HASH:
1575 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1576 /* %:%: digraph */
1577 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1578 break;
1580 case CPP_NAME:
1581 if (b == CPP_NAME) return CPP_NAME;
1582 if (b == CPP_NUMBER
1583 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1584 if (b == CPP_CHAR
1585 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1586 if (b == CPP_STRING
1587 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1588 break;
1590 case CPP_NUMBER:
1591 if (b == CPP_NUMBER) return CPP_NUMBER;
1592 if (b == CPP_NAME) return CPP_NUMBER;
1593 if (b == CPP_DOT) return CPP_NUMBER;
1594 /* Numbers cannot have length zero, so this is safe. */
1595 if ((b == CPP_PLUS || b == CPP_MINUS)
1596 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1597 return CPP_NUMBER;
1598 break;
1600 case CPP_OTHER:
1601 if (CPP_OPTION (pfile, objc) && token1->val.c == '@')
1603 if (b == CPP_NAME) return CPP_NAME;
1604 if (b == CPP_STRING) return CPP_OSTRING;
1607 default:
1608 break;
1611 return CPP_EOF;
1614 /* Returns nonzero if a space should be inserted to avoid an
1615 accidental token paste for output. For simplicity, it is
1616 conservative, and occasionally advises a space where one is not
1617 needed, e.g. "." and ".2". */
1620 cpp_avoid_paste (pfile, token1, token2)
1621 cpp_reader *pfile;
1622 const cpp_token *token1, *token2;
1624 enum cpp_ttype a = token1->type, b = token2->type;
1625 cppchar_t c;
1627 if (token1->flags & NAMED_OP)
1628 a = CPP_NAME;
1629 if (token2->flags & NAMED_OP)
1630 b = CPP_NAME;
1632 c = EOF;
1633 if (token2->flags & DIGRAPH)
1634 c = digraph_spellings[b - CPP_FIRST_DIGRAPH][0];
1635 else if (token_spellings[b].category == SPELL_OPERATOR)
1636 c = token_spellings[b].name[0];
1638 /* Quickly get everything that can paste with an '='. */
1639 if (a <= CPP_LAST_EQ && c == '=')
1640 return 1;
1642 switch (a)
1644 case CPP_GREATER: return c == '>' || c == '?';
1645 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1646 case CPP_PLUS: return c == '+';
1647 case CPP_MINUS: return c == '-' || c == '>';
1648 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1649 case CPP_MOD: return c == ':' || c == '>';
1650 case CPP_AND: return c == '&';
1651 case CPP_OR: return c == '|';
1652 case CPP_COLON: return c == ':' || c == '>';
1653 case CPP_DEREF: return c == '*';
1654 case CPP_DOT: return c == '.' || c == '%';
1655 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1656 case CPP_NAME: return ((b == CPP_NUMBER
1657 && name_p (pfile, &token2->val.str))
1658 || b == CPP_NAME
1659 || b == CPP_CHAR || b == CPP_STRING); /* L */
1660 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1661 || c == '.' || c == '+' || c == '-');
1662 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1663 && token1->val.c == '@'
1664 && (b == CPP_NAME || b == CPP_STRING));
1665 default: break;
1668 return 0;
1671 /* Output all the remaining tokens on the current line, and a newline
1672 character, to FP. Leading whitespace is removed. */
1673 void
1674 cpp_output_line (pfile, fp)
1675 cpp_reader *pfile;
1676 FILE *fp;
1678 cpp_token token;
1680 _cpp_get_token (pfile, &token);
1681 token.flags &= ~PREV_WHITE;
1682 while (token.type != CPP_EOF)
1684 cpp_output_token (&token, fp);
1685 _cpp_get_token (pfile, &token);
1688 putc ('\n', fp);
1691 /* Memory pools. */
1693 struct dummy
1695 char c;
1696 union
1698 double d;
1699 int *p;
1700 } u;
1703 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1705 static int
1706 chunk_suitable (pool, chunk, size)
1707 cpp_pool *pool;
1708 cpp_chunk *chunk;
1709 unsigned int size;
1711 /* Being at least twice SIZE means we can use memcpy in
1712 _cpp_next_chunk rather than memmove. Besides, it's a good idea
1713 anyway. */
1714 return (chunk && pool->locked != chunk
1715 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1718 /* Returns the end of the new pool. PTR points to a char in the old
1719 pool, and is updated to point to the same char in the new pool. */
1720 unsigned char *
1721 _cpp_next_chunk (pool, len, ptr)
1722 cpp_pool *pool;
1723 unsigned int len;
1724 unsigned char **ptr;
1726 cpp_chunk *chunk = pool->cur->next;
1728 /* LEN is the minimum size we want in the new pool. */
1729 len += POOL_ROOM (pool);
1730 if (! chunk_suitable (pool, chunk, len))
1732 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1734 chunk->next = pool->cur->next;
1735 pool->cur->next = chunk;
1738 /* Update the pointer before changing chunk's front. */
1739 if (ptr)
1740 *ptr += chunk->base - POOL_FRONT (pool);
1742 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1743 chunk->front = chunk->base;
1745 pool->cur = chunk;
1746 return POOL_LIMIT (pool);
1749 static cpp_chunk *
1750 new_chunk (size)
1751 unsigned int size;
1753 unsigned char *base;
1754 cpp_chunk *result;
1756 size = ALIGN (size, DEFAULT_ALIGNMENT);
1757 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1758 /* Put the chunk descriptor at the end. Then chunk overruns will
1759 cause obvious chaos. */
1760 result = (cpp_chunk *) (base + size);
1761 result->base = base;
1762 result->front = base;
1763 result->limit = base + size;
1764 result->next = 0;
1766 return result;
1769 void
1770 _cpp_init_pool (pool, size, align, temp)
1771 cpp_pool *pool;
1772 unsigned int size, align, temp;
1774 if (align == 0)
1775 align = DEFAULT_ALIGNMENT;
1776 if (align & (align - 1))
1777 abort ();
1778 pool->align = align;
1779 pool->cur = new_chunk (size);
1780 pool->locked = 0;
1781 pool->locks = 0;
1782 if (temp)
1783 pool->cur->next = pool->cur;
1786 void
1787 _cpp_lock_pool (pool)
1788 cpp_pool *pool;
1790 if (pool->locks++ == 0)
1791 pool->locked = pool->cur;
1794 void
1795 _cpp_unlock_pool (pool)
1796 cpp_pool *pool;
1798 if (--pool->locks == 0)
1799 pool->locked = 0;
1802 void
1803 _cpp_free_pool (pool)
1804 cpp_pool *pool;
1806 cpp_chunk *chunk = pool->cur, *next;
1810 next = chunk->next;
1811 free (chunk->base);
1812 chunk = next;
1814 while (chunk && chunk != pool->cur);
1817 /* Reserve LEN bytes from a memory pool. */
1818 unsigned char *
1819 _cpp_pool_reserve (pool, len)
1820 cpp_pool *pool;
1821 unsigned int len;
1823 len = ALIGN (len, pool->align);
1824 if (len > (unsigned int) POOL_ROOM (pool))
1825 _cpp_next_chunk (pool, len, 0);
1827 return POOL_FRONT (pool);
1830 /* Allocate LEN bytes from a memory pool. */
1831 unsigned char *
1832 _cpp_pool_alloc (pool, len)
1833 cpp_pool *pool;
1834 unsigned int len;
1836 unsigned char *result = _cpp_pool_reserve (pool, len);
1838 POOL_COMMIT (pool, len);
1839 return result;