* config/i370/xm-mvs.h, config/i370/xm-oe.h,
[official-gcc.git] / gcc / cpplex.c
blobd1b90764a438fb371716340908a8325002043770
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
37 #include "config.h"
38 #include "system.h"
39 #include "cpplib.h"
40 #include "cpphash.h"
41 #include "symcat.h"
43 /* Tokens with SPELL_STRING store their spelling in the token list,
44 and it's length in the token->val.name.len. */
45 enum spell_type
47 SPELL_OPERATOR = 0,
48 SPELL_CHAR,
49 SPELL_IDENT,
50 SPELL_STRING,
51 SPELL_NONE
54 struct token_spelling
56 enum spell_type category;
57 const unsigned char *name;
60 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
61 U":>", U"<%", U"%>"};
63 #define OP(e, s) { SPELL_OPERATOR, U s },
64 #define TK(e, s) { s, U STRINGX (e) },
65 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
66 #undef OP
67 #undef TK
69 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
70 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
72 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
74 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
76 static int skip_block_comment PARAMS ((cpp_reader *));
77 static int skip_line_comment PARAMS ((cpp_reader *));
78 static void adjust_column PARAMS ((cpp_reader *));
79 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
80 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
81 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
82 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
83 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
84 static void unterminated PARAMS ((cpp_reader *, int));
85 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
87 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
88 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
89 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
91 static cpp_chunk *new_chunk PARAMS ((unsigned int));
92 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
94 /* Utility routine:
96 Compares, the token TOKEN to the NUL-terminated string STRING.
97 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
99 int
100 cpp_ideq (token, string)
101 const cpp_token *token;
102 const char *string;
104 if (token->type != CPP_NAME)
105 return 0;
107 return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
110 /* Call when meeting a newline. Returns the character after the newline
111 (or carriage-return newline combination), or EOF. */
112 static cppchar_t
113 handle_newline (buffer, newline_char)
114 cpp_buffer *buffer;
115 cppchar_t newline_char;
117 cppchar_t next = EOF;
119 buffer->col_adjust = 0;
120 buffer->lineno++;
121 buffer->line_base = buffer->cur;
123 /* Handle CR-LF and LF-CR combinations, get the next character. */
124 if (buffer->cur < buffer->rlimit)
126 next = *buffer->cur++;
127 if (next + newline_char == '\r' + '\n')
129 buffer->line_base = buffer->cur;
130 if (buffer->cur < buffer->rlimit)
131 next = *buffer->cur++;
132 else
133 next = EOF;
137 buffer->read_ahead = next;
138 return next;
141 /* Subroutine of skip_escaped_newlines; called when a trigraph is
142 encountered. It warns if necessary, and returns true if the
143 trigraph should be honoured. FROM_CHAR is the third character of a
144 trigraph, and presumed to be the previous character for position
145 reporting. */
146 static int
147 trigraph_ok (pfile, from_char)
148 cpp_reader *pfile;
149 cppchar_t from_char;
151 int accept = CPP_OPTION (pfile, trigraphs);
153 /* Don't warn about trigraphs in comments. */
154 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
156 cpp_buffer *buffer = pfile->buffer;
157 if (accept)
158 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
159 "trigraph ??%c converted to %c",
160 (int) from_char,
161 (int) _cpp_trigraph_map[from_char]);
162 else if (buffer->cur != buffer->last_Wtrigraphs)
164 buffer->last_Wtrigraphs = buffer->cur;
165 cpp_warning_with_line (pfile, buffer->lineno,
166 CPP_BUF_COL (buffer) - 2,
167 "trigraph ??%c ignored", (int) from_char);
171 return accept;
174 /* Assumes local variables buffer and result. */
175 #define ACCEPT_CHAR(t) \
176 do { result->type = t; buffer->read_ahead = EOF; } while (0)
178 /* When we move to multibyte character sets, add to these something
179 that saves and restores the state of the multibyte conversion
180 library. This probably involves saving and restoring a "cookie".
181 In the case of glibc it is an 8-byte structure, so is not a high
182 overhead operation. In any case, it's out of the fast path. */
183 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
184 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
186 /* Skips any escaped newlines introduced by NEXT, which is either a
187 '?' or a '\\'. Returns the next character, which will also have
188 been placed in buffer->read_ahead. This routine performs
189 preprocessing stages 1 and 2 of the ISO C standard. */
190 static cppchar_t
191 skip_escaped_newlines (buffer, next)
192 cpp_buffer *buffer;
193 cppchar_t next;
195 /* Only do this if we apply stages 1 and 2. */
196 if (!buffer->from_stage3)
198 cppchar_t next1;
199 const unsigned char *saved_cur;
200 int space;
204 if (buffer->cur == buffer->rlimit)
205 break;
207 SAVE_STATE ();
208 if (next == '?')
210 next1 = *buffer->cur++;
211 if (next1 != '?' || buffer->cur == buffer->rlimit)
213 RESTORE_STATE ();
214 break;
217 next1 = *buffer->cur++;
218 if (!_cpp_trigraph_map[next1]
219 || !trigraph_ok (buffer->pfile, next1))
221 RESTORE_STATE ();
222 break;
225 /* We have a full trigraph here. */
226 next = _cpp_trigraph_map[next1];
227 if (next != '\\' || buffer->cur == buffer->rlimit)
228 break;
229 SAVE_STATE ();
232 /* We have a backslash, and room for at least one more character. */
233 space = 0;
236 next1 = *buffer->cur++;
237 if (!is_nvspace (next1))
238 break;
239 space = 1;
241 while (buffer->cur < buffer->rlimit);
243 if (!is_vspace (next1))
245 RESTORE_STATE ();
246 break;
249 if (space && !buffer->pfile->state.lexing_comment)
250 cpp_warning (buffer->pfile,
251 "backslash and newline separated by space");
253 next = handle_newline (buffer, next1);
254 if (next == EOF)
255 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
257 while (next == '\\' || next == '?');
260 buffer->read_ahead = next;
261 return next;
264 /* Obtain the next character, after trigraph conversion and skipping
265 an arbitrary string of escaped newlines. The common case of no
266 trigraphs or escaped newlines falls through quickly. */
267 static cppchar_t
268 get_effective_char (buffer)
269 cpp_buffer *buffer;
271 cppchar_t next = EOF;
273 if (buffer->cur < buffer->rlimit)
275 next = *buffer->cur++;
277 /* '?' can introduce trigraphs (and therefore backslash); '\\'
278 can introduce escaped newlines, which we want to skip, or
279 UCNs, which, depending upon lexer state, we will handle in
280 the future. */
281 if (next == '?' || next == '\\')
282 next = skip_escaped_newlines (buffer, next);
285 buffer->read_ahead = next;
286 return next;
289 /* Skip a C-style block comment. We find the end of the comment by
290 seeing if an asterisk is before every '/' we encounter. Returns
291 non-zero if comment terminated by EOF, zero otherwise. */
292 static int
293 skip_block_comment (pfile)
294 cpp_reader *pfile;
296 cpp_buffer *buffer = pfile->buffer;
297 cppchar_t c = EOF, prevc = EOF;
299 pfile->state.lexing_comment = 1;
300 while (buffer->cur != buffer->rlimit)
302 prevc = c, c = *buffer->cur++;
304 next_char:
305 /* FIXME: For speed, create a new character class of characters
306 of interest inside block comments. */
307 if (c == '?' || c == '\\')
308 c = skip_escaped_newlines (buffer, c);
310 /* People like decorating comments with '*', so check for '/'
311 instead for efficiency. */
312 if (c == '/')
314 if (prevc == '*')
315 break;
317 /* Warn about potential nested comments, but not if the '/'
318 comes immediately before the true comment delimeter.
319 Don't bother to get it right across escaped newlines. */
320 if (CPP_OPTION (pfile, warn_comments)
321 && buffer->cur != buffer->rlimit)
323 prevc = c, c = *buffer->cur++;
324 if (c == '*' && buffer->cur != buffer->rlimit)
326 prevc = c, c = *buffer->cur++;
327 if (c != '/')
328 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
329 CPP_BUF_COL (buffer),
330 "\"/*\" within comment");
332 goto next_char;
335 else if (is_vspace (c))
337 prevc = c, c = handle_newline (buffer, c);
338 goto next_char;
340 else if (c == '\t')
341 adjust_column (pfile);
344 pfile->state.lexing_comment = 0;
345 buffer->read_ahead = EOF;
346 return c != '/' || prevc != '*';
349 /* Skip a C++ line comment. Handles escaped newlines. Returns
350 non-zero if a multiline comment. The following new line, if any,
351 is left in buffer->read_ahead. */
352 static int
353 skip_line_comment (pfile)
354 cpp_reader *pfile;
356 cpp_buffer *buffer = pfile->buffer;
357 unsigned int orig_lineno = buffer->lineno;
358 cppchar_t c;
360 pfile->state.lexing_comment = 1;
363 c = EOF;
364 if (buffer->cur == buffer->rlimit)
365 break;
367 c = *buffer->cur++;
368 if (c == '?' || c == '\\')
369 c = skip_escaped_newlines (buffer, c);
371 while (!is_vspace (c));
373 pfile->state.lexing_comment = 0;
374 buffer->read_ahead = c; /* Leave any newline for caller. */
375 return orig_lineno != buffer->lineno;
378 /* pfile->buffer->cur is one beyond the \t character. Update
379 col_adjust so we track the column correctly. */
380 static void
381 adjust_column (pfile)
382 cpp_reader *pfile;
384 cpp_buffer *buffer = pfile->buffer;
385 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
387 /* Round it up to multiple of the tabstop, but subtract 1 since the
388 tab itself occupies a character position. */
389 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
390 - col % CPP_OPTION (pfile, tabstop)) - 1;
393 /* Skips whitespace, saving the next non-whitespace character.
394 Adjusts pfile->col_adjust to account for tabs. Without this,
395 tokens might be assigned an incorrect column. */
396 static void
397 skip_whitespace (pfile, c)
398 cpp_reader *pfile;
399 cppchar_t c;
401 cpp_buffer *buffer = pfile->buffer;
402 unsigned int warned = 0;
406 /* Horizontal space always OK. */
407 if (c == ' ')
409 else if (c == '\t')
410 adjust_column (pfile);
411 /* Just \f \v or \0 left. */
412 else if (c == '\0')
414 if (!warned)
416 cpp_warning (pfile, "null character(s) ignored");
417 warned = 1;
420 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
421 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
422 CPP_BUF_COL (buffer),
423 "%s in preprocessing directive",
424 c == '\f' ? "form feed" : "vertical tab");
426 c = EOF;
427 if (buffer->cur == buffer->rlimit)
428 break;
429 c = *buffer->cur++;
431 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
432 while (is_nvspace (c));
434 /* Remember the next character. */
435 buffer->read_ahead = c;
438 /* See if the characters of a number token are valid in a name (no
439 '.', '+' or '-'). */
440 static int
441 name_p (pfile, string)
442 cpp_reader *pfile;
443 const cpp_string *string;
445 unsigned int i;
447 for (i = 0; i < string->len; i++)
448 if (!is_idchar (string->text[i]))
449 return 0;
451 return 1;
454 /* Parse an identifier, skipping embedded backslash-newlines.
455 Calculate the hash value of the token while parsing, for improved
456 performance. The hashing algorithm *must* match cpp_lookup(). */
458 static cpp_hashnode *
459 parse_identifier (pfile, c)
460 cpp_reader *pfile;
461 cppchar_t c;
463 cpp_hashnode *result;
464 cpp_buffer *buffer = pfile->buffer;
465 unsigned char *dest, *limit;
466 unsigned int r = 0, saw_dollar = 0;
468 dest = POOL_FRONT (&pfile->ident_pool);
469 limit = POOL_LIMIT (&pfile->ident_pool);
475 /* Need room for terminating null. */
476 if (dest + 1 >= limit)
477 limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
479 *dest++ = c;
480 r = HASHSTEP (r, c);
482 if (c == '$')
483 saw_dollar++;
485 c = EOF;
486 if (buffer->cur == buffer->rlimit)
487 break;
489 c = *buffer->cur++;
491 while (is_idchar (c));
493 /* Potential escaped newline? */
494 if (c != '?' && c != '\\')
495 break;
496 c = skip_escaped_newlines (buffer, c);
498 while (is_idchar (c));
500 /* Remember the next character. */
501 buffer->read_ahead = c;
503 /* $ is not a identifier character in the standard, but is commonly
504 accepted as an extension. Don't warn about it in skipped
505 conditional blocks. */
506 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
507 cpp_pedwarn (pfile, "'$' character(s) in identifier");
509 /* Identifiers are null-terminated. */
510 *dest = '\0';
512 /* This routine commits the memory if necessary. */
513 result = _cpp_lookup_with_hash (pfile,
514 dest - POOL_FRONT (&pfile->ident_pool), r);
516 /* Some identifiers require diagnostics when lexed. */
517 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
519 /* It is allowed to poison the same identifier twice. */
520 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
521 cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
523 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
524 replacement list of a variadic macro. */
525 if (result == pfile->spec_nodes.n__VA_ARGS__
526 && !pfile->state.va_args_ok)
527 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
530 return result;
533 /* Parse a number, skipping embedded backslash-newlines. */
534 static void
535 parse_number (pfile, number, c, leading_period)
536 cpp_reader *pfile;
537 cpp_string *number;
538 cppchar_t c;
539 int leading_period;
541 cpp_buffer *buffer = pfile->buffer;
542 cpp_pool *pool = &pfile->ident_pool;
543 unsigned char *dest, *limit;
545 dest = POOL_FRONT (pool);
546 limit = POOL_LIMIT (pool);
548 /* Place a leading period. */
549 if (leading_period)
551 if (dest >= limit)
552 limit = _cpp_next_chunk (pool, 0, &dest);
553 *dest++ = '.';
560 /* Need room for terminating null. */
561 if (dest + 1 >= limit)
562 limit = _cpp_next_chunk (pool, 0, &dest);
563 *dest++ = c;
565 c = EOF;
566 if (buffer->cur == buffer->rlimit)
567 break;
569 c = *buffer->cur++;
571 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
573 /* Potential escaped newline? */
574 if (c != '?' && c != '\\')
575 break;
576 c = skip_escaped_newlines (buffer, c);
578 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
580 /* Remember the next character. */
581 buffer->read_ahead = c;
583 /* Null-terminate the number. */
584 *dest = '\0';
586 number->text = POOL_FRONT (pool);
587 number->len = dest - number->text;
588 POOL_COMMIT (pool, number->len + 1);
591 /* Subroutine of parse_string. Emits error for unterminated strings. */
592 static void
593 unterminated (pfile, term)
594 cpp_reader *pfile;
595 int term;
597 cpp_error (pfile, "missing terminating %c character", term);
599 if (term == '\"' && pfile->mlstring_pos.line
600 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
602 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
603 pfile->mlstring_pos.col,
604 "possible start of unterminated string literal");
605 pfile->mlstring_pos.line = 0;
609 /* Subroutine of parse_string. */
610 static int
611 unescaped_terminator_p (pfile, dest)
612 cpp_reader *pfile;
613 const unsigned char *dest;
615 const unsigned char *start, *temp;
617 /* In #include-style directives, terminators are not escapeable. */
618 if (pfile->state.angled_headers)
619 return 1;
621 start = POOL_FRONT (&pfile->ident_pool);
623 /* An odd number of consecutive backslashes represents an escaped
624 terminator. */
625 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
628 return ((dest - temp) & 1) == 0;
631 /* Parses a string, character constant, or angle-bracketed header file
632 name. Handles embedded trigraphs and escaped newlines. The stored
633 string is guaranteed NUL-terminated, but it is not guaranteed that
634 this is the first NUL since embedded NULs are preserved.
636 Multi-line strings are allowed, but they are deprecated. */
637 static void
638 parse_string (pfile, token, terminator)
639 cpp_reader *pfile;
640 cpp_token *token;
641 cppchar_t terminator;
643 cpp_buffer *buffer = pfile->buffer;
644 cpp_pool *pool = &pfile->ident_pool;
645 unsigned char *dest, *limit;
646 cppchar_t c;
647 unsigned int nulls = 0;
649 dest = POOL_FRONT (pool);
650 limit = POOL_LIMIT (pool);
652 for (;;)
654 if (buffer->cur == buffer->rlimit)
655 c = EOF;
656 else
657 c = *buffer->cur++;
659 have_char:
660 /* We need space for the terminating NUL. */
661 if (dest >= limit)
662 limit = _cpp_next_chunk (pool, 0, &dest);
664 if (c == EOF)
666 unterminated (pfile, terminator);
667 break;
670 /* Handle trigraphs, escaped newlines etc. */
671 if (c == '?' || c == '\\')
672 c = skip_escaped_newlines (buffer, c);
674 if (c == terminator && unescaped_terminator_p (pfile, dest))
676 c = EOF;
677 break;
679 else if (is_vspace (c))
681 /* In assembly language, silently terminate string and
682 character literals at end of line. This is a kludge
683 around not knowing where comments are. */
684 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
685 break;
687 /* Character constants and header names may not extend over
688 multiple lines. In Standard C, neither may strings.
689 Unfortunately, we accept multiline strings as an
690 extension, except in #include family directives. */
691 if (terminator != '"' || pfile->state.angled_headers)
693 unterminated (pfile, terminator);
694 break;
697 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
698 if (pfile->mlstring_pos.line == 0)
699 pfile->mlstring_pos = pfile->lexer_pos;
701 c = handle_newline (buffer, c);
702 *dest++ = '\n';
703 goto have_char;
705 else if (c == '\0')
707 if (nulls++ == 0)
708 cpp_warning (pfile, "null character(s) preserved in literal");
711 *dest++ = c;
714 /* Remember the next character. */
715 buffer->read_ahead = c;
716 *dest = '\0';
718 token->val.str.text = POOL_FRONT (pool);
719 token->val.str.len = dest - token->val.str.text;
720 POOL_COMMIT (pool, token->val.str.len + 1);
723 /* The stored comment includes the comment start and any terminator. */
724 static void
725 save_comment (pfile, token, from)
726 cpp_reader *pfile;
727 cpp_token *token;
728 const unsigned char *from;
730 unsigned char *buffer;
731 unsigned int len;
733 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
734 /* C++ comments probably (not definitely) have moved past a new
735 line, which we don't want to save in the comment. */
736 if (pfile->buffer->read_ahead != EOF)
737 len--;
738 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
740 token->type = CPP_COMMENT;
741 token->val.str.len = len;
742 token->val.str.text = buffer;
744 buffer[0] = '/';
745 memcpy (buffer + 1, from, len - 1);
748 /* Subroutine of lex_token to handle '%'. A little tricky, since we
749 want to avoid stepping back when lexing %:%X. */
750 static void
751 lex_percent (buffer, result)
752 cpp_buffer *buffer;
753 cpp_token *result;
755 cppchar_t c;
757 result->type = CPP_MOD;
758 /* Parsing %:%X could leave an extra character. */
759 if (buffer->extra_char == EOF)
760 c = get_effective_char (buffer);
761 else
763 c = buffer->read_ahead = buffer->extra_char;
764 buffer->extra_char = EOF;
767 if (c == '=')
768 ACCEPT_CHAR (CPP_MOD_EQ);
769 else if (CPP_OPTION (buffer->pfile, digraphs))
771 if (c == ':')
773 result->flags |= DIGRAPH;
774 ACCEPT_CHAR (CPP_HASH);
775 if (get_effective_char (buffer) == '%')
777 buffer->extra_char = get_effective_char (buffer);
778 if (buffer->extra_char == ':')
780 buffer->extra_char = EOF;
781 ACCEPT_CHAR (CPP_PASTE);
783 else
784 /* We'll catch the extra_char when we're called back. */
785 buffer->read_ahead = '%';
788 else if (c == '>')
790 result->flags |= DIGRAPH;
791 ACCEPT_CHAR (CPP_CLOSE_BRACE);
796 /* Subroutine of lex_token to handle '.'. This is tricky, since we
797 want to avoid stepping back when lexing '...' or '.123'. In the
798 latter case we should also set a flag for parse_number. */
799 static void
800 lex_dot (pfile, result)
801 cpp_reader *pfile;
802 cpp_token *result;
804 cpp_buffer *buffer = pfile->buffer;
805 cppchar_t c;
807 /* Parsing ..X could leave an extra character. */
808 if (buffer->extra_char == EOF)
809 c = get_effective_char (buffer);
810 else
812 c = buffer->read_ahead = buffer->extra_char;
813 buffer->extra_char = EOF;
816 /* All known character sets have 0...9 contiguous. */
817 if (c >= '0' && c <= '9')
819 result->type = CPP_NUMBER;
820 parse_number (pfile, &result->val.str, c, 1);
822 else
824 result->type = CPP_DOT;
825 if (c == '.')
827 buffer->extra_char = get_effective_char (buffer);
828 if (buffer->extra_char == '.')
830 buffer->extra_char = EOF;
831 ACCEPT_CHAR (CPP_ELLIPSIS);
833 else
834 /* We'll catch the extra_char when we're called back. */
835 buffer->read_ahead = '.';
837 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
838 ACCEPT_CHAR (CPP_DOT_STAR);
842 void
843 _cpp_lex_token (pfile, result)
844 cpp_reader *pfile;
845 cpp_token *result;
847 cppchar_t c;
848 cpp_buffer *buffer;
849 const unsigned char *comment_start;
850 unsigned char bol;
852 skip:
853 bol = pfile->state.next_bol;
854 done_directive:
855 buffer = pfile->buffer;
856 pfile->state.next_bol = 0;
857 result->flags = buffer->saved_flags;
858 buffer->saved_flags = 0;
859 next_char:
860 pfile->lexer_pos.line = buffer->lineno;
861 next_char2:
862 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
864 c = buffer->read_ahead;
865 if (c == EOF && buffer->cur < buffer->rlimit)
867 c = *buffer->cur++;
868 pfile->lexer_pos.col++;
871 do_switch:
872 buffer->read_ahead = EOF;
873 switch (c)
875 case EOF:
876 /* Non-empty files should end in a newline. Ignore for command
877 line and _Pragma buffers. */
878 if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
879 cpp_pedwarn (pfile, "no newline at end of file");
880 pfile->state.next_bol = 1;
881 pfile->skipping = 0; /* In case missing #endif. */
882 result->type = CPP_EOF;
883 /* Don't do MI optimisation. */
884 return;
886 case ' ': case '\t': case '\f': case '\v': case '\0':
887 skip_whitespace (pfile, c);
888 result->flags |= PREV_WHITE;
889 goto next_char2;
891 case '\n': case '\r':
892 if (!pfile->state.in_directive)
894 handle_newline (buffer, c);
895 bol = 1;
896 pfile->lexer_pos.output_line = buffer->lineno;
897 /* This is a new line, so clear any white space flag.
898 Newlines in arguments are white space (6.10.3.10);
899 parse_arg takes care of that. */
900 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
901 goto next_char;
904 /* Don't let directives spill over to the next line. */
905 buffer->read_ahead = c;
906 pfile->state.next_bol = 1;
907 result->type = CPP_EOF;
908 /* Don't break; pfile->skipping might be true. */
909 return;
911 case '?':
912 case '\\':
913 /* These could start an escaped newline, or '?' a trigraph. Let
914 skip_escaped_newlines do all the work. */
916 unsigned int lineno = buffer->lineno;
918 c = skip_escaped_newlines (buffer, c);
919 if (lineno != buffer->lineno)
920 /* We had at least one escaped newline of some sort, and the
921 next character is in buffer->read_ahead. Update the
922 token's line and column. */
923 goto next_char;
925 /* We are either the original '?' or '\\', or a trigraph. */
926 result->type = CPP_QUERY;
927 buffer->read_ahead = EOF;
928 if (c == '\\')
929 goto random_char;
930 else if (c != '?')
931 goto do_switch;
933 break;
935 case '0': case '1': case '2': case '3': case '4':
936 case '5': case '6': case '7': case '8': case '9':
937 result->type = CPP_NUMBER;
938 parse_number (pfile, &result->val.str, c, 0);
939 break;
941 case '$':
942 if (!CPP_OPTION (pfile, dollars_in_ident))
943 goto random_char;
944 /* Fall through... */
946 case '_':
947 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
948 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
949 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
950 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
951 case 'y': case 'z':
952 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
953 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
954 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
955 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
956 case 'Y': case 'Z':
957 result->type = CPP_NAME;
958 result->val.node = parse_identifier (pfile, c);
960 /* 'L' may introduce wide characters or strings. */
961 if (result->val.node == pfile->spec_nodes.n_L)
963 c = buffer->read_ahead; /* For make_string. */
964 if (c == '\'' || c == '"')
966 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
967 goto make_string;
970 /* Convert named operators to their proper types. */
971 else if (result->val.node->flags & NODE_OPERATOR)
973 result->flags |= NAMED_OP;
974 result->type = result->val.node->value.operator;
976 break;
978 case '\'':
979 case '"':
980 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
981 make_string:
982 parse_string (pfile, result, c);
983 break;
985 case '/':
986 /* A potential block or line comment. */
987 comment_start = buffer->cur;
988 result->type = CPP_DIV;
989 c = get_effective_char (buffer);
990 if (c == '=')
991 ACCEPT_CHAR (CPP_DIV_EQ);
992 if (c != '/' && c != '*')
993 break;
995 if (c == '*')
997 if (skip_block_comment (pfile))
998 cpp_error_with_line (pfile, pfile->lexer_pos.line,
999 pfile->lexer_pos.col,
1000 "unterminated comment");
1002 else
1004 if (!CPP_OPTION (pfile, cplusplus_comments)
1005 && !CPP_IN_SYSTEM_HEADER (pfile))
1006 break;
1008 /* Warn about comments only if pedantically GNUC89, and not
1009 in system headers. */
1010 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1011 && ! buffer->warned_cplusplus_comments)
1013 cpp_pedwarn (pfile,
1014 "C++ style comments are not allowed in ISO C89");
1015 cpp_pedwarn (pfile,
1016 "(this will be reported only once per input file)");
1017 buffer->warned_cplusplus_comments = 1;
1020 /* Skip_line_comment updates buffer->read_ahead. */
1021 if (skip_line_comment (pfile))
1022 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1023 pfile->lexer_pos.col,
1024 "multi-line comment");
1027 /* Skipping the comment has updated buffer->read_ahead. */
1028 if (!pfile->state.save_comments)
1030 result->flags |= PREV_WHITE;
1031 goto next_char;
1034 /* Save the comment as a token in its own right. */
1035 save_comment (pfile, result, comment_start);
1036 /* Don't do MI optimisation. */
1037 return;
1039 case '<':
1040 if (pfile->state.angled_headers)
1042 result->type = CPP_HEADER_NAME;
1043 c = '>'; /* terminator. */
1044 goto make_string;
1047 result->type = CPP_LESS;
1048 c = get_effective_char (buffer);
1049 if (c == '=')
1050 ACCEPT_CHAR (CPP_LESS_EQ);
1051 else if (c == '<')
1053 ACCEPT_CHAR (CPP_LSHIFT);
1054 if (get_effective_char (buffer) == '=')
1055 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1057 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1059 ACCEPT_CHAR (CPP_MIN);
1060 if (get_effective_char (buffer) == '=')
1061 ACCEPT_CHAR (CPP_MIN_EQ);
1063 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1065 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1066 result->flags |= DIGRAPH;
1068 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1070 ACCEPT_CHAR (CPP_OPEN_BRACE);
1071 result->flags |= DIGRAPH;
1073 break;
1075 case '>':
1076 result->type = CPP_GREATER;
1077 c = get_effective_char (buffer);
1078 if (c == '=')
1079 ACCEPT_CHAR (CPP_GREATER_EQ);
1080 else if (c == '>')
1082 ACCEPT_CHAR (CPP_RSHIFT);
1083 if (get_effective_char (buffer) == '=')
1084 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1086 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1088 ACCEPT_CHAR (CPP_MAX);
1089 if (get_effective_char (buffer) == '=')
1090 ACCEPT_CHAR (CPP_MAX_EQ);
1092 break;
1094 case '%':
1095 lex_percent (buffer, result);
1096 if (result->type == CPP_HASH)
1097 goto do_hash;
1098 break;
1100 case '.':
1101 lex_dot (pfile, result);
1102 break;
1104 case '+':
1105 result->type = CPP_PLUS;
1106 c = get_effective_char (buffer);
1107 if (c == '=')
1108 ACCEPT_CHAR (CPP_PLUS_EQ);
1109 else if (c == '+')
1110 ACCEPT_CHAR (CPP_PLUS_PLUS);
1111 break;
1113 case '-':
1114 result->type = CPP_MINUS;
1115 c = get_effective_char (buffer);
1116 if (c == '>')
1118 ACCEPT_CHAR (CPP_DEREF);
1119 if (CPP_OPTION (pfile, cplusplus)
1120 && get_effective_char (buffer) == '*')
1121 ACCEPT_CHAR (CPP_DEREF_STAR);
1123 else if (c == '=')
1124 ACCEPT_CHAR (CPP_MINUS_EQ);
1125 else if (c == '-')
1126 ACCEPT_CHAR (CPP_MINUS_MINUS);
1127 break;
1129 case '*':
1130 result->type = CPP_MULT;
1131 if (get_effective_char (buffer) == '=')
1132 ACCEPT_CHAR (CPP_MULT_EQ);
1133 break;
1135 case '=':
1136 result->type = CPP_EQ;
1137 if (get_effective_char (buffer) == '=')
1138 ACCEPT_CHAR (CPP_EQ_EQ);
1139 break;
1141 case '!':
1142 result->type = CPP_NOT;
1143 if (get_effective_char (buffer) == '=')
1144 ACCEPT_CHAR (CPP_NOT_EQ);
1145 break;
1147 case '&':
1148 result->type = CPP_AND;
1149 c = get_effective_char (buffer);
1150 if (c == '=')
1151 ACCEPT_CHAR (CPP_AND_EQ);
1152 else if (c == '&')
1153 ACCEPT_CHAR (CPP_AND_AND);
1154 break;
1156 case '#':
1157 c = buffer->extra_char; /* Can be set by error condition below. */
1158 if (c != EOF)
1160 buffer->read_ahead = c;
1161 buffer->extra_char = EOF;
1163 else
1164 c = get_effective_char (buffer);
1166 if (c == '#')
1168 ACCEPT_CHAR (CPP_PASTE);
1169 break;
1172 result->type = CPP_HASH;
1173 do_hash:
1174 if (!bol)
1175 break;
1176 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1177 tokens within the list of arguments that would otherwise act
1178 as preprocessing directives, the behavior is undefined.
1180 This implementation will report a hard error, terminate the
1181 macro invocation, and proceed to process the directive. */
1182 if (pfile->state.parsing_args)
1184 if (pfile->state.parsing_args == 2)
1185 cpp_error (pfile,
1186 "directives may not be used inside a macro argument");
1188 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1189 buffer->extra_char = buffer->read_ahead;
1190 buffer->read_ahead = '#';
1191 pfile->state.next_bol = 1;
1192 result->type = CPP_EOF;
1194 /* Get whitespace right - newline_in_args sets it. */
1195 if (pfile->lexer_pos.col == 1)
1196 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1198 else
1200 /* This is the hash introducing a directive. */
1201 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1202 goto done_directive; /* bol still 1. */
1203 /* This is in fact an assembler #. */
1205 break;
1207 case '|':
1208 result->type = CPP_OR;
1209 c = get_effective_char (buffer);
1210 if (c == '=')
1211 ACCEPT_CHAR (CPP_OR_EQ);
1212 else if (c == '|')
1213 ACCEPT_CHAR (CPP_OR_OR);
1214 break;
1216 case '^':
1217 result->type = CPP_XOR;
1218 if (get_effective_char (buffer) == '=')
1219 ACCEPT_CHAR (CPP_XOR_EQ);
1220 break;
1222 case ':':
1223 result->type = CPP_COLON;
1224 c = get_effective_char (buffer);
1225 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1226 ACCEPT_CHAR (CPP_SCOPE);
1227 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1229 result->flags |= DIGRAPH;
1230 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1232 break;
1234 case '~': result->type = CPP_COMPL; break;
1235 case ',': result->type = CPP_COMMA; break;
1236 case '(': result->type = CPP_OPEN_PAREN; break;
1237 case ')': result->type = CPP_CLOSE_PAREN; break;
1238 case '[': result->type = CPP_OPEN_SQUARE; break;
1239 case ']': result->type = CPP_CLOSE_SQUARE; break;
1240 case '{': result->type = CPP_OPEN_BRACE; break;
1241 case '}': result->type = CPP_CLOSE_BRACE; break;
1242 case ';': result->type = CPP_SEMICOLON; break;
1244 /* @ is a punctuator in Objective C. */
1245 case '@': result->type = CPP_ATSIGN; break;
1247 random_char:
1248 default:
1249 result->type = CPP_OTHER;
1250 result->val.c = c;
1251 break;
1254 if (pfile->skipping)
1255 goto skip;
1257 /* If not in a directive, this token invalidates controlling macros. */
1258 if (!pfile->state.in_directive)
1259 pfile->mi_state = MI_FAILED;
1262 /* An upper bound on the number of bytes needed to spell a token,
1263 including preceding whitespace. */
1264 unsigned int
1265 cpp_token_len (token)
1266 const cpp_token *token;
1268 unsigned int len;
1270 switch (TOKEN_SPELL (token))
1272 default: len = 0; break;
1273 case SPELL_STRING: len = token->val.str.len; break;
1274 case SPELL_IDENT: len = token->val.node->length; break;
1276 /* 1 for whitespace, 4 for comment delimeters. */
1277 return len + 5;
1280 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1281 already contain the enough space to hold the token's spelling.
1282 Returns a pointer to the character after the last character
1283 written. */
1284 unsigned char *
1285 cpp_spell_token (pfile, token, buffer)
1286 cpp_reader *pfile; /* Would be nice to be rid of this... */
1287 const cpp_token *token;
1288 unsigned char *buffer;
1290 switch (TOKEN_SPELL (token))
1292 case SPELL_OPERATOR:
1294 const unsigned char *spelling;
1295 unsigned char c;
1297 if (token->flags & DIGRAPH)
1298 spelling
1299 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1300 else if (token->flags & NAMED_OP)
1301 goto spell_ident;
1302 else
1303 spelling = TOKEN_NAME (token);
1305 while ((c = *spelling++) != '\0')
1306 *buffer++ = c;
1308 break;
1310 case SPELL_IDENT:
1311 spell_ident:
1312 memcpy (buffer, token->val.node->name, token->val.node->length);
1313 buffer += token->val.node->length;
1314 break;
1316 case SPELL_STRING:
1318 int left, right, tag;
1319 switch (token->type)
1321 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1322 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1323 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1324 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1325 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1326 default: left = '\0'; right = '\0'; tag = '\0'; break;
1328 if (tag) *buffer++ = tag;
1329 if (left) *buffer++ = left;
1330 memcpy (buffer, token->val.str.text, token->val.str.len);
1331 buffer += token->val.str.len;
1332 if (right) *buffer++ = right;
1334 break;
1336 case SPELL_CHAR:
1337 *buffer++ = token->val.c;
1338 break;
1340 case SPELL_NONE:
1341 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1342 break;
1345 return buffer;
1348 /* Returns a token as a null-terminated string. The string is
1349 temporary, and automatically freed later. Useful for diagnostics. */
1350 unsigned char *
1351 cpp_token_as_text (pfile, token)
1352 cpp_reader *pfile;
1353 const cpp_token *token;
1355 unsigned int len = cpp_token_len (token);
1356 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1358 end = cpp_spell_token (pfile, token, start);
1359 end[0] = '\0';
1361 return start;
1364 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1365 const char *
1366 cpp_type2name (type)
1367 enum cpp_ttype type;
1369 return (const char *) token_spellings[type].name;
1372 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1373 for efficiency - to avoid double-buffering. Also, outputs a space
1374 if PREV_WHITE is flagged. */
1375 void
1376 cpp_output_token (token, fp)
1377 const cpp_token *token;
1378 FILE *fp;
1380 if (token->flags & PREV_WHITE)
1381 putc (' ', fp);
1383 switch (TOKEN_SPELL (token))
1385 case SPELL_OPERATOR:
1387 const unsigned char *spelling;
1389 if (token->flags & DIGRAPH)
1390 spelling
1391 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1392 else if (token->flags & NAMED_OP)
1393 goto spell_ident;
1394 else
1395 spelling = TOKEN_NAME (token);
1397 ufputs (spelling, fp);
1399 break;
1401 spell_ident:
1402 case SPELL_IDENT:
1403 ufputs (token->val.node->name, fp);
1404 break;
1406 case SPELL_STRING:
1408 int left, right, tag;
1409 switch (token->type)
1411 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1412 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1413 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1414 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1415 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1416 default: left = '\0'; right = '\0'; tag = '\0'; break;
1418 if (tag) putc (tag, fp);
1419 if (left) putc (left, fp);
1420 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1421 if (right) putc (right, fp);
1423 break;
1425 case SPELL_CHAR:
1426 putc (token->val.c, fp);
1427 break;
1429 case SPELL_NONE:
1430 /* An error, most probably. */
1431 break;
1435 /* Compare two tokens. */
1437 _cpp_equiv_tokens (a, b)
1438 const cpp_token *a, *b;
1440 if (a->type == b->type && a->flags == b->flags)
1441 switch (TOKEN_SPELL (a))
1443 default: /* Keep compiler happy. */
1444 case SPELL_OPERATOR:
1445 return 1;
1446 case SPELL_CHAR:
1447 return a->val.c == b->val.c; /* Character. */
1448 case SPELL_NONE:
1449 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1450 case SPELL_IDENT:
1451 return a->val.node == b->val.node;
1452 case SPELL_STRING:
1453 return (a->val.str.len == b->val.str.len
1454 && !memcmp (a->val.str.text, b->val.str.text,
1455 a->val.str.len));
1458 return 0;
1461 /* Determine whether two tokens can be pasted together, and if so,
1462 what the resulting token is. Returns CPP_EOF if the tokens cannot
1463 be pasted, or the appropriate type for the merged token if they
1464 can. */
1465 enum cpp_ttype
1466 cpp_can_paste (pfile, token1, token2, digraph)
1467 cpp_reader * pfile;
1468 const cpp_token *token1, *token2;
1469 int* digraph;
1471 enum cpp_ttype a = token1->type, b = token2->type;
1472 int cxx = CPP_OPTION (pfile, cplusplus);
1474 /* Treat named operators as if they were ordinary NAMEs. */
1475 if (token1->flags & NAMED_OP)
1476 a = CPP_NAME;
1477 if (token2->flags & NAMED_OP)
1478 b = CPP_NAME;
1480 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1481 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1483 switch (a)
1485 case CPP_GREATER:
1486 if (b == a) return CPP_RSHIFT;
1487 if (b == CPP_QUERY && cxx) return CPP_MAX;
1488 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1489 break;
1490 case CPP_LESS:
1491 if (b == a) return CPP_LSHIFT;
1492 if (b == CPP_QUERY && cxx) return CPP_MIN;
1493 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1494 if (CPP_OPTION (pfile, digraphs))
1496 if (b == CPP_COLON)
1497 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1498 if (b == CPP_MOD)
1499 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1501 break;
1503 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1504 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1505 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1507 case CPP_MINUS:
1508 if (b == a) return CPP_MINUS_MINUS;
1509 if (b == CPP_GREATER) return CPP_DEREF;
1510 break;
1511 case CPP_COLON:
1512 if (b == a && cxx) return CPP_SCOPE;
1513 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1514 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1515 break;
1517 case CPP_MOD:
1518 if (CPP_OPTION (pfile, digraphs))
1520 if (b == CPP_GREATER)
1521 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1522 if (b == CPP_COLON)
1523 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1525 break;
1526 case CPP_DEREF:
1527 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1528 break;
1529 case CPP_DOT:
1530 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1531 if (b == CPP_NUMBER) return CPP_NUMBER;
1532 break;
1534 case CPP_HASH:
1535 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1536 /* %:%: digraph */
1537 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1538 break;
1540 case CPP_NAME:
1541 if (b == CPP_NAME) return CPP_NAME;
1542 if (b == CPP_NUMBER
1543 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1544 if (b == CPP_CHAR
1545 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1546 if (b == CPP_STRING
1547 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1548 break;
1550 case CPP_NUMBER:
1551 if (b == CPP_NUMBER) return CPP_NUMBER;
1552 if (b == CPP_NAME) return CPP_NUMBER;
1553 if (b == CPP_DOT) return CPP_NUMBER;
1554 /* Numbers cannot have length zero, so this is safe. */
1555 if ((b == CPP_PLUS || b == CPP_MINUS)
1556 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1557 return CPP_NUMBER;
1558 break;
1560 default:
1561 break;
1564 return CPP_EOF;
1567 /* Returns nonzero if a space should be inserted to avoid an
1568 accidental token paste for output. For simplicity, it is
1569 conservative, and occasionally advises a space where one is not
1570 needed, e.g. "." and ".2". */
1573 cpp_avoid_paste (pfile, token1, token2)
1574 cpp_reader *pfile;
1575 const cpp_token *token1, *token2;
1577 enum cpp_ttype a = token1->type, b = token2->type;
1578 cppchar_t c;
1580 if (token1->flags & NAMED_OP)
1581 a = CPP_NAME;
1582 if (token2->flags & NAMED_OP)
1583 b = CPP_NAME;
1585 c = EOF;
1586 if (token2->flags & DIGRAPH)
1587 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1588 else if (token_spellings[b].category == SPELL_OPERATOR)
1589 c = token_spellings[b].name[0];
1591 /* Quickly get everything that can paste with an '='. */
1592 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1593 return 1;
1595 switch (a)
1597 case CPP_GREATER: return c == '>' || c == '?';
1598 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1599 case CPP_PLUS: return c == '+';
1600 case CPP_MINUS: return c == '-' || c == '>';
1601 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1602 case CPP_MOD: return c == ':' || c == '>';
1603 case CPP_AND: return c == '&';
1604 case CPP_OR: return c == '|';
1605 case CPP_COLON: return c == ':' || c == '>';
1606 case CPP_DEREF: return c == '*';
1607 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1608 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1609 case CPP_NAME: return ((b == CPP_NUMBER
1610 && name_p (pfile, &token2->val.str))
1611 || b == CPP_NAME
1612 || b == CPP_CHAR || b == CPP_STRING); /* L */
1613 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1614 || c == '.' || c == '+' || c == '-');
1615 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1616 && token1->val.c == '@'
1617 && (b == CPP_NAME || b == CPP_STRING));
1618 default: break;
1621 return 0;
1624 /* Output all the remaining tokens on the current line, and a newline
1625 character, to FP. Leading whitespace is removed. */
1626 void
1627 cpp_output_line (pfile, fp)
1628 cpp_reader *pfile;
1629 FILE *fp;
1631 cpp_token token;
1633 cpp_get_token (pfile, &token);
1634 token.flags &= ~PREV_WHITE;
1635 while (token.type != CPP_EOF)
1637 cpp_output_token (&token, fp);
1638 cpp_get_token (pfile, &token);
1641 putc ('\n', fp);
1644 /* Memory pools. */
1646 struct dummy
1648 char c;
1649 union
1651 double d;
1652 int *p;
1653 } u;
1656 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1658 static int
1659 chunk_suitable (pool, chunk, size)
1660 cpp_pool *pool;
1661 cpp_chunk *chunk;
1662 unsigned int size;
1664 /* Being at least twice SIZE means we can use memcpy in
1665 _cpp_next_chunk rather than memmove. Besides, it's a good idea
1666 anyway. */
1667 return (chunk && pool->locked != chunk
1668 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1671 /* Returns the end of the new pool. PTR points to a char in the old
1672 pool, and is updated to point to the same char in the new pool. */
1673 unsigned char *
1674 _cpp_next_chunk (pool, len, ptr)
1675 cpp_pool *pool;
1676 unsigned int len;
1677 unsigned char **ptr;
1679 cpp_chunk *chunk = pool->cur->next;
1681 /* LEN is the minimum size we want in the new pool. */
1682 len += POOL_ROOM (pool);
1683 if (! chunk_suitable (pool, chunk, len))
1685 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1687 chunk->next = pool->cur->next;
1688 pool->cur->next = chunk;
1691 /* Update the pointer before changing chunk's front. */
1692 if (ptr)
1693 *ptr += chunk->base - POOL_FRONT (pool);
1695 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1696 chunk->front = chunk->base;
1698 pool->cur = chunk;
1699 return POOL_LIMIT (pool);
1702 static cpp_chunk *
1703 new_chunk (size)
1704 unsigned int size;
1706 unsigned char *base;
1707 cpp_chunk *result;
1709 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
1710 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1711 /* Put the chunk descriptor at the end. Then chunk overruns will
1712 cause obvious chaos. */
1713 result = (cpp_chunk *) (base + size);
1714 result->base = base;
1715 result->front = base;
1716 result->limit = base + size;
1717 result->next = 0;
1719 return result;
1722 void
1723 _cpp_init_pool (pool, size, align, temp)
1724 cpp_pool *pool;
1725 unsigned int size, align, temp;
1727 if (align == 0)
1728 align = DEFAULT_ALIGNMENT;
1729 if (align & (align - 1))
1730 abort ();
1731 pool->align = align;
1732 pool->cur = new_chunk (size);
1733 pool->locked = 0;
1734 pool->locks = 0;
1735 if (temp)
1736 pool->cur->next = pool->cur;
1739 void
1740 _cpp_lock_pool (pool)
1741 cpp_pool *pool;
1743 if (pool->locks++ == 0)
1744 pool->locked = pool->cur;
1747 void
1748 _cpp_unlock_pool (pool)
1749 cpp_pool *pool;
1751 if (--pool->locks == 0)
1752 pool->locked = 0;
1755 void
1756 _cpp_free_pool (pool)
1757 cpp_pool *pool;
1759 cpp_chunk *chunk = pool->cur, *next;
1763 next = chunk->next;
1764 free (chunk->base);
1765 chunk = next;
1767 while (chunk && chunk != pool->cur);
1770 /* Reserve LEN bytes from a memory pool. */
1771 unsigned char *
1772 _cpp_pool_reserve (pool, len)
1773 cpp_pool *pool;
1774 unsigned int len;
1776 len = POOL_ALIGN (len, pool->align);
1777 if (len > (unsigned int) POOL_ROOM (pool))
1778 _cpp_next_chunk (pool, len, 0);
1780 return POOL_FRONT (pool);
1783 /* Allocate LEN bytes from a memory pool. */
1784 unsigned char *
1785 _cpp_pool_alloc (pool, len)
1786 cpp_pool *pool;
1787 unsigned int len;
1789 unsigned char *result = _cpp_pool_reserve (pool, len);
1791 POOL_COMMIT (pool, len);
1792 return result;