1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category
;
68 const unsigned char *name
;
71 const unsigned char *digraph_spellings
[] = {U
"%:", U
"%:%:", U
"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings
[N_TTYPES
] = {TTYPE_TABLE
};
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline
PARAMS ((cpp_buffer
*, cppchar_t
));
84 static cppchar_t skip_escaped_newlines
PARAMS ((cpp_buffer
*, cppchar_t
));
85 static cppchar_t get_effective_char
PARAMS ((cpp_buffer
*));
87 static int skip_block_comment
PARAMS ((cpp_reader
*));
88 static int skip_line_comment
PARAMS ((cpp_reader
*));
89 static void adjust_column
PARAMS ((cpp_reader
*));
90 static void skip_whitespace
PARAMS ((cpp_reader
*, cppchar_t
));
91 static cpp_hashnode
*parse_identifier
PARAMS ((cpp_reader
*, cppchar_t
));
92 static void parse_number
PARAMS ((cpp_reader
*, cpp_string
*, cppchar_t
, int));
93 static int unescaped_terminator_p
PARAMS ((cpp_reader
*, const U_CHAR
*));
94 static void parse_string
PARAMS ((cpp_reader
*, cpp_token
*, cppchar_t
));
95 static void unterminated
PARAMS ((cpp_reader
*, int));
96 static int trigraph_ok
PARAMS ((cpp_reader
*, cppchar_t
));
97 static void save_comment
PARAMS ((cpp_reader
*, cpp_token
*, const U_CHAR
*));
98 static void lex_percent
PARAMS ((cpp_buffer
*, cpp_token
*));
99 static void lex_dot
PARAMS ((cpp_reader
*, cpp_token
*));
100 static int name_p
PARAMS ((cpp_reader
*, const cpp_string
*));
101 static unsigned int parse_escape
PARAMS ((cpp_reader
*, const unsigned char **,
102 const unsigned char *, HOST_WIDE_INT
,
104 static unsigned int read_ucs
PARAMS ((cpp_reader
*, const unsigned char **,
105 const unsigned char *, unsigned int));
107 static cpp_chunk
*new_chunk
PARAMS ((unsigned int));
108 static int chunk_suitable
PARAMS ((cpp_pool
*, cpp_chunk
*, unsigned int));
109 static unsigned int hex_digit_value
PARAMS ((unsigned int));
113 Compares, the token TOKEN to the NUL-terminated string STRING.
114 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
117 cpp_ideq (token
, string
)
118 const cpp_token
*token
;
121 if (token
->type
!= CPP_NAME
)
124 return !ustrcmp (NODE_NAME (token
->val
.node
), (const U_CHAR
*) string
);
127 /* Call when meeting a newline. Returns the character after the newline
128 (or carriage-return newline combination), or EOF. */
130 handle_newline (buffer
, newline_char
)
132 cppchar_t newline_char
;
134 cppchar_t next
= EOF
;
136 buffer
->col_adjust
= 0;
138 buffer
->line_base
= buffer
->cur
;
140 /* Handle CR-LF and LF-CR combinations, get the next character. */
141 if (buffer
->cur
< buffer
->rlimit
)
143 next
= *buffer
->cur
++;
144 if (next
+ newline_char
== '\r' + '\n')
146 buffer
->line_base
= buffer
->cur
;
147 if (buffer
->cur
< buffer
->rlimit
)
148 next
= *buffer
->cur
++;
154 buffer
->read_ahead
= next
;
158 /* Subroutine of skip_escaped_newlines; called when a trigraph is
159 encountered. It warns if necessary, and returns true if the
160 trigraph should be honoured. FROM_CHAR is the third character of a
161 trigraph, and presumed to be the previous character for position
164 trigraph_ok (pfile
, from_char
)
168 int accept
= CPP_OPTION (pfile
, trigraphs
);
170 /* Don't warn about trigraphs in comments. */
171 if (CPP_OPTION (pfile
, warn_trigraphs
) && !pfile
->state
.lexing_comment
)
173 cpp_buffer
*buffer
= pfile
->buffer
;
175 cpp_warning_with_line (pfile
, buffer
->lineno
, CPP_BUF_COL (buffer
) - 2,
176 "trigraph ??%c converted to %c",
178 (int) _cpp_trigraph_map
[from_char
]);
179 else if (buffer
->cur
!= buffer
->last_Wtrigraphs
)
181 buffer
->last_Wtrigraphs
= buffer
->cur
;
182 cpp_warning_with_line (pfile
, buffer
->lineno
,
183 CPP_BUF_COL (buffer
) - 2,
184 "trigraph ??%c ignored", (int) from_char
);
191 /* Assumes local variables buffer and result. */
192 #define ACCEPT_CHAR(t) \
193 do { result->type = t; buffer->read_ahead = EOF; } while (0)
195 /* When we move to multibyte character sets, add to these something
196 that saves and restores the state of the multibyte conversion
197 library. This probably involves saving and restoring a "cookie".
198 In the case of glibc it is an 8-byte structure, so is not a high
199 overhead operation. In any case, it's out of the fast path. */
200 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
201 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
203 /* Skips any escaped newlines introduced by NEXT, which is either a
204 '?' or a '\\'. Returns the next character, which will also have
205 been placed in buffer->read_ahead. This routine performs
206 preprocessing stages 1 and 2 of the ISO C standard. */
208 skip_escaped_newlines (buffer
, next
)
212 /* Only do this if we apply stages 1 and 2. */
213 if (!buffer
->from_stage3
)
216 const unsigned char *saved_cur
;
221 if (buffer
->cur
== buffer
->rlimit
)
227 next1
= *buffer
->cur
++;
228 if (next1
!= '?' || buffer
->cur
== buffer
->rlimit
)
234 next1
= *buffer
->cur
++;
235 if (!_cpp_trigraph_map
[next1
]
236 || !trigraph_ok (buffer
->pfile
, next1
))
242 /* We have a full trigraph here. */
243 next
= _cpp_trigraph_map
[next1
];
244 if (next
!= '\\' || buffer
->cur
== buffer
->rlimit
)
249 /* We have a backslash, and room for at least one more character. */
253 next1
= *buffer
->cur
++;
254 if (!is_nvspace (next1
))
258 while (buffer
->cur
< buffer
->rlimit
);
260 if (!is_vspace (next1
))
266 if (space
&& !buffer
->pfile
->state
.lexing_comment
)
267 cpp_warning (buffer
->pfile
,
268 "backslash and newline separated by space");
270 next
= handle_newline (buffer
, next1
);
272 cpp_pedwarn (buffer
->pfile
, "backslash-newline at end of file");
274 while (next
== '\\' || next
== '?');
277 buffer
->read_ahead
= next
;
281 /* Obtain the next character, after trigraph conversion and skipping
282 an arbitrary string of escaped newlines. The common case of no
283 trigraphs or escaped newlines falls through quickly. */
285 get_effective_char (buffer
)
288 cppchar_t next
= EOF
;
290 if (buffer
->cur
< buffer
->rlimit
)
292 next
= *buffer
->cur
++;
294 /* '?' can introduce trigraphs (and therefore backslash); '\\'
295 can introduce escaped newlines, which we want to skip, or
296 UCNs, which, depending upon lexer state, we will handle in
298 if (next
== '?' || next
== '\\')
299 next
= skip_escaped_newlines (buffer
, next
);
302 buffer
->read_ahead
= next
;
306 /* Skip a C-style block comment. We find the end of the comment by
307 seeing if an asterisk is before every '/' we encounter. Returns
308 non-zero if comment terminated by EOF, zero otherwise. */
310 skip_block_comment (pfile
)
313 cpp_buffer
*buffer
= pfile
->buffer
;
314 cppchar_t c
= EOF
, prevc
= EOF
;
316 pfile
->state
.lexing_comment
= 1;
317 while (buffer
->cur
!= buffer
->rlimit
)
319 prevc
= c
, c
= *buffer
->cur
++;
322 /* FIXME: For speed, create a new character class of characters
323 of interest inside block comments. */
324 if (c
== '?' || c
== '\\')
325 c
= skip_escaped_newlines (buffer
, c
);
327 /* People like decorating comments with '*', so check for '/'
328 instead for efficiency. */
334 /* Warn about potential nested comments, but not if the '/'
335 comes immediately before the true comment delimeter.
336 Don't bother to get it right across escaped newlines. */
337 if (CPP_OPTION (pfile
, warn_comments
)
338 && buffer
->cur
!= buffer
->rlimit
)
340 prevc
= c
, c
= *buffer
->cur
++;
341 if (c
== '*' && buffer
->cur
!= buffer
->rlimit
)
343 prevc
= c
, c
= *buffer
->cur
++;
345 cpp_warning_with_line (pfile
, CPP_BUF_LINE (buffer
),
346 CPP_BUF_COL (buffer
),
347 "\"/*\" within comment");
352 else if (is_vspace (c
))
354 prevc
= c
, c
= handle_newline (buffer
, c
);
358 adjust_column (pfile
);
361 pfile
->state
.lexing_comment
= 0;
362 buffer
->read_ahead
= EOF
;
363 return c
!= '/' || prevc
!= '*';
366 /* Skip a C++ line comment. Handles escaped newlines. Returns
367 non-zero if a multiline comment. The following new line, if any,
368 is left in buffer->read_ahead. */
370 skip_line_comment (pfile
)
373 cpp_buffer
*buffer
= pfile
->buffer
;
374 unsigned int orig_lineno
= buffer
->lineno
;
377 pfile
->state
.lexing_comment
= 1;
381 if (buffer
->cur
== buffer
->rlimit
)
385 if (c
== '?' || c
== '\\')
386 c
= skip_escaped_newlines (buffer
, c
);
388 while (!is_vspace (c
));
390 pfile
->state
.lexing_comment
= 0;
391 buffer
->read_ahead
= c
; /* Leave any newline for caller. */
392 return orig_lineno
!= buffer
->lineno
;
395 /* pfile->buffer->cur is one beyond the \t character. Update
396 col_adjust so we track the column correctly. */
398 adjust_column (pfile
)
401 cpp_buffer
*buffer
= pfile
->buffer
;
402 unsigned int col
= CPP_BUF_COL (buffer
) - 1; /* Zero-based column. */
404 /* Round it up to multiple of the tabstop, but subtract 1 since the
405 tab itself occupies a character position. */
406 buffer
->col_adjust
+= (CPP_OPTION (pfile
, tabstop
)
407 - col
% CPP_OPTION (pfile
, tabstop
)) - 1;
410 /* Skips whitespace, saving the next non-whitespace character.
411 Adjusts pfile->col_adjust to account for tabs. Without this,
412 tokens might be assigned an incorrect column. */
414 skip_whitespace (pfile
, c
)
418 cpp_buffer
*buffer
= pfile
->buffer
;
419 unsigned int warned
= 0;
423 /* Horizontal space always OK. */
427 adjust_column (pfile
);
428 /* Just \f \v or \0 left. */
433 cpp_warning (pfile
, "null character(s) ignored");
437 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
438 cpp_pedwarn_with_line (pfile
, CPP_BUF_LINE (buffer
),
439 CPP_BUF_COL (buffer
),
440 "%s in preprocessing directive",
441 c
== '\f' ? "form feed" : "vertical tab");
444 if (buffer
->cur
== buffer
->rlimit
)
448 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
449 while (is_nvspace (c
));
451 /* Remember the next character. */
452 buffer
->read_ahead
= c
;
455 /* See if the characters of a number token are valid in a name (no
458 name_p (pfile
, string
)
460 const cpp_string
*string
;
464 for (i
= 0; i
< string
->len
; i
++)
465 if (!is_idchar (string
->text
[i
]))
471 /* Parse an identifier, skipping embedded backslash-newlines.
472 Calculate the hash value of the token while parsing, for improved
473 performance. The hashing algorithm *must* match cpp_lookup(). */
475 static cpp_hashnode
*
476 parse_identifier (pfile
, c
)
480 cpp_hashnode
*result
;
481 cpp_buffer
*buffer
= pfile
->buffer
;
482 unsigned int saw_dollar
= 0, len
;
483 struct obstack
*stack
= &pfile
->hash_table
->stack
;
489 obstack_1grow (stack
, c
);
495 if (buffer
->cur
== buffer
->rlimit
)
500 while (is_idchar (c
));
502 /* Potential escaped newline? */
503 if (c
!= '?' && c
!= '\\')
505 c
= skip_escaped_newlines (buffer
, c
);
507 while (is_idchar (c
));
509 /* Remember the next character. */
510 buffer
->read_ahead
= c
;
512 /* $ is not a identifier character in the standard, but is commonly
513 accepted as an extension. Don't warn about it in skipped
514 conditional blocks. */
515 if (saw_dollar
&& CPP_PEDANTIC (pfile
) && ! pfile
->skipping
)
516 cpp_pedwarn (pfile
, "'$' character(s) in identifier");
518 /* Identifiers are null-terminated. */
519 len
= obstack_object_size (stack
);
520 obstack_1grow (stack
, '\0');
522 /* This routine commits the memory if necessary. */
523 result
= (cpp_hashnode
*)
524 ht_lookup (pfile
->hash_table
, obstack_finish (stack
), len
, HT_ALLOCED
);
526 /* Some identifiers require diagnostics when lexed. */
527 if (result
->flags
& NODE_DIAGNOSTIC
&& !pfile
->skipping
)
529 /* It is allowed to poison the same identifier twice. */
530 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
531 cpp_error (pfile
, "attempt to use poisoned \"%s\"",
534 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
535 replacement list of a variadic macro. */
536 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
537 && !pfile
->state
.va_args_ok
)
538 cpp_pedwarn (pfile
, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
544 /* Parse a number, skipping embedded backslash-newlines. */
546 parse_number (pfile
, number
, c
, leading_period
)
552 cpp_buffer
*buffer
= pfile
->buffer
;
553 cpp_pool
*pool
= &pfile
->ident_pool
;
554 unsigned char *dest
, *limit
;
556 dest
= POOL_FRONT (pool
);
557 limit
= POOL_LIMIT (pool
);
559 /* Place a leading period. */
563 limit
= _cpp_next_chunk (pool
, 0, &dest
);
571 /* Need room for terminating null. */
572 if (dest
+ 1 >= limit
)
573 limit
= _cpp_next_chunk (pool
, 0, &dest
);
577 if (buffer
->cur
== buffer
->rlimit
)
582 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
584 /* Potential escaped newline? */
585 if (c
!= '?' && c
!= '\\')
587 c
= skip_escaped_newlines (buffer
, c
);
589 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
591 /* Remember the next character. */
592 buffer
->read_ahead
= c
;
594 /* Null-terminate the number. */
597 number
->text
= POOL_FRONT (pool
);
598 number
->len
= dest
- number
->text
;
599 POOL_COMMIT (pool
, number
->len
+ 1);
602 /* Subroutine of parse_string. Emits error for unterminated strings. */
604 unterminated (pfile
, term
)
608 cpp_error (pfile
, "missing terminating %c character", term
);
610 if (term
== '\"' && pfile
->mlstring_pos
.line
611 && pfile
->mlstring_pos
.line
!= pfile
->lexer_pos
.line
)
613 cpp_error_with_line (pfile
, pfile
->mlstring_pos
.line
,
614 pfile
->mlstring_pos
.col
,
615 "possible start of unterminated string literal");
616 pfile
->mlstring_pos
.line
= 0;
620 /* Subroutine of parse_string. */
622 unescaped_terminator_p (pfile
, dest
)
624 const unsigned char *dest
;
626 const unsigned char *start
, *temp
;
628 /* In #include-style directives, terminators are not escapeable. */
629 if (pfile
->state
.angled_headers
)
632 start
= POOL_FRONT (&pfile
->ident_pool
);
634 /* An odd number of consecutive backslashes represents an escaped
636 for (temp
= dest
; temp
> start
&& temp
[-1] == '\\'; temp
--)
639 return ((dest
- temp
) & 1) == 0;
642 /* Parses a string, character constant, or angle-bracketed header file
643 name. Handles embedded trigraphs and escaped newlines. The stored
644 string is guaranteed NUL-terminated, but it is not guaranteed that
645 this is the first NUL since embedded NULs are preserved.
647 Multi-line strings are allowed, but they are deprecated. */
649 parse_string (pfile
, token
, terminator
)
652 cppchar_t terminator
;
654 cpp_buffer
*buffer
= pfile
->buffer
;
655 cpp_pool
*pool
= &pfile
->ident_pool
;
656 unsigned char *dest
, *limit
;
658 unsigned int nulls
= 0;
660 dest
= POOL_FRONT (pool
);
661 limit
= POOL_LIMIT (pool
);
665 if (buffer
->cur
== buffer
->rlimit
)
671 /* We need space for the terminating NUL. */
673 limit
= _cpp_next_chunk (pool
, 0, &dest
);
677 unterminated (pfile
, terminator
);
681 /* Handle trigraphs, escaped newlines etc. */
682 if (c
== '?' || c
== '\\')
683 c
= skip_escaped_newlines (buffer
, c
);
685 if (c
== terminator
&& unescaped_terminator_p (pfile
, dest
))
690 else if (is_vspace (c
))
692 /* In assembly language, silently terminate string and
693 character literals at end of line. This is a kludge
694 around not knowing where comments are. */
695 if (CPP_OPTION (pfile
, lang
) == CLK_ASM
&& terminator
!= '>')
698 /* Character constants and header names may not extend over
699 multiple lines. In Standard C, neither may strings.
700 Unfortunately, we accept multiline strings as an
701 extension, except in #include family directives. */
702 if (terminator
!= '"' || pfile
->state
.angled_headers
)
704 unterminated (pfile
, terminator
);
708 cpp_pedwarn (pfile
, "multi-line string literals are deprecated");
709 if (pfile
->mlstring_pos
.line
== 0)
710 pfile
->mlstring_pos
= pfile
->lexer_pos
;
712 c
= handle_newline (buffer
, c
);
719 cpp_warning (pfile
, "null character(s) preserved in literal");
725 /* Remember the next character. */
726 buffer
->read_ahead
= c
;
729 token
->val
.str
.text
= POOL_FRONT (pool
);
730 token
->val
.str
.len
= dest
- token
->val
.str
.text
;
731 POOL_COMMIT (pool
, token
->val
.str
.len
+ 1);
734 /* The stored comment includes the comment start and any terminator. */
736 save_comment (pfile
, token
, from
)
739 const unsigned char *from
;
741 unsigned char *buffer
;
744 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
745 /* C++ comments probably (not definitely) have moved past a new
746 line, which we don't want to save in the comment. */
747 if (pfile
->buffer
->read_ahead
!= EOF
)
749 buffer
= _cpp_pool_alloc (&pfile
->ident_pool
, len
);
751 token
->type
= CPP_COMMENT
;
752 token
->val
.str
.len
= len
;
753 token
->val
.str
.text
= buffer
;
756 memcpy (buffer
+ 1, from
, len
- 1);
759 /* Subroutine of lex_token to handle '%'. A little tricky, since we
760 want to avoid stepping back when lexing %:%X. */
762 lex_percent (buffer
, result
)
768 result
->type
= CPP_MOD
;
769 /* Parsing %:%X could leave an extra character. */
770 if (buffer
->extra_char
== EOF
)
771 c
= get_effective_char (buffer
);
774 c
= buffer
->read_ahead
= buffer
->extra_char
;
775 buffer
->extra_char
= EOF
;
779 ACCEPT_CHAR (CPP_MOD_EQ
);
780 else if (CPP_OPTION (buffer
->pfile
, digraphs
))
784 result
->flags
|= DIGRAPH
;
785 ACCEPT_CHAR (CPP_HASH
);
786 if (get_effective_char (buffer
) == '%')
788 buffer
->extra_char
= get_effective_char (buffer
);
789 if (buffer
->extra_char
== ':')
791 buffer
->extra_char
= EOF
;
792 ACCEPT_CHAR (CPP_PASTE
);
795 /* We'll catch the extra_char when we're called back. */
796 buffer
->read_ahead
= '%';
801 result
->flags
|= DIGRAPH
;
802 ACCEPT_CHAR (CPP_CLOSE_BRACE
);
807 /* Subroutine of lex_token to handle '.'. This is tricky, since we
808 want to avoid stepping back when lexing '...' or '.123'. In the
809 latter case we should also set a flag for parse_number. */
811 lex_dot (pfile
, result
)
815 cpp_buffer
*buffer
= pfile
->buffer
;
818 /* Parsing ..X could leave an extra character. */
819 if (buffer
->extra_char
== EOF
)
820 c
= get_effective_char (buffer
);
823 c
= buffer
->read_ahead
= buffer
->extra_char
;
824 buffer
->extra_char
= EOF
;
827 /* All known character sets have 0...9 contiguous. */
828 if (c
>= '0' && c
<= '9')
830 result
->type
= CPP_NUMBER
;
831 parse_number (pfile
, &result
->val
.str
, c
, 1);
835 result
->type
= CPP_DOT
;
838 buffer
->extra_char
= get_effective_char (buffer
);
839 if (buffer
->extra_char
== '.')
841 buffer
->extra_char
= EOF
;
842 ACCEPT_CHAR (CPP_ELLIPSIS
);
845 /* We'll catch the extra_char when we're called back. */
846 buffer
->read_ahead
= '.';
848 else if (c
== '*' && CPP_OPTION (pfile
, cplusplus
))
849 ACCEPT_CHAR (CPP_DOT_STAR
);
854 _cpp_lex_token (pfile
, result
)
860 const unsigned char *comment_start
;
864 bol
= pfile
->state
.next_bol
;
866 buffer
= pfile
->buffer
;
867 pfile
->state
.next_bol
= 0;
868 result
->flags
= buffer
->saved_flags
;
869 buffer
->saved_flags
= 0;
871 pfile
->lexer_pos
.line
= buffer
->lineno
;
873 pfile
->lexer_pos
.col
= CPP_BUF_COLUMN (buffer
, buffer
->cur
);
875 c
= buffer
->read_ahead
;
876 if (c
== EOF
&& buffer
->cur
< buffer
->rlimit
)
879 pfile
->lexer_pos
.col
++;
883 buffer
->read_ahead
= EOF
;
887 /* Non-empty files should end in a newline. Checking "bol" too
888 prevents multiple warnings when hitting the EOF more than
889 once, like in a directive. Don't warn for command line and
891 if (pfile
->lexer_pos
.col
!= 0 && !bol
&& !buffer
->from_stage3
)
892 cpp_pedwarn (pfile
, "no newline at end of file");
893 pfile
->state
.next_bol
= 1;
894 pfile
->skipping
= 0; /* In case missing #endif. */
895 result
->type
= CPP_EOF
;
896 /* Don't do MI optimisation. */
899 case ' ': case '\t': case '\f': case '\v': case '\0':
900 skip_whitespace (pfile
, c
);
901 result
->flags
|= PREV_WHITE
;
904 case '\n': case '\r':
905 if (!pfile
->state
.in_directive
)
907 handle_newline (buffer
, c
);
909 pfile
->lexer_pos
.output_line
= buffer
->lineno
;
910 /* This is a new line, so clear any white space flag.
911 Newlines in arguments are white space (6.10.3.10);
912 parse_arg takes care of that. */
913 result
->flags
&= ~(PREV_WHITE
| AVOID_LPASTE
);
917 /* Don't let directives spill over to the next line. */
918 buffer
->read_ahead
= c
;
919 pfile
->state
.next_bol
= 1;
920 result
->type
= CPP_EOF
;
921 /* Don't break; pfile->skipping might be true. */
926 /* These could start an escaped newline, or '?' a trigraph. Let
927 skip_escaped_newlines do all the work. */
929 unsigned int lineno
= buffer
->lineno
;
931 c
= skip_escaped_newlines (buffer
, c
);
932 if (lineno
!= buffer
->lineno
)
933 /* We had at least one escaped newline of some sort, and the
934 next character is in buffer->read_ahead. Update the
935 token's line and column. */
938 /* We are either the original '?' or '\\', or a trigraph. */
939 result
->type
= CPP_QUERY
;
940 buffer
->read_ahead
= EOF
;
948 case '0': case '1': case '2': case '3': case '4':
949 case '5': case '6': case '7': case '8': case '9':
950 result
->type
= CPP_NUMBER
;
951 parse_number (pfile
, &result
->val
.str
, c
, 0);
955 if (!CPP_OPTION (pfile
, dollars_in_ident
))
957 /* Fall through... */
960 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
961 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
962 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
963 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
965 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
966 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
967 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
968 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
970 result
->type
= CPP_NAME
;
971 result
->val
.node
= parse_identifier (pfile
, c
);
973 /* 'L' may introduce wide characters or strings. */
974 if (result
->val
.node
== pfile
->spec_nodes
.n_L
)
976 c
= buffer
->read_ahead
; /* For make_string. */
977 if (c
== '\'' || c
== '"')
979 ACCEPT_CHAR (c
== '"' ? CPP_WSTRING
: CPP_WCHAR
);
983 /* Convert named operators to their proper types. */
984 else if (result
->val
.node
->flags
& NODE_OPERATOR
)
986 result
->flags
|= NAMED_OP
;
987 result
->type
= result
->val
.node
->value
.operator;
993 result
->type
= c
== '"' ? CPP_STRING
: CPP_CHAR
;
995 parse_string (pfile
, result
, c
);
999 /* A potential block or line comment. */
1000 comment_start
= buffer
->cur
;
1001 result
->type
= CPP_DIV
;
1002 c
= get_effective_char (buffer
);
1004 ACCEPT_CHAR (CPP_DIV_EQ
);
1005 if (c
!= '/' && c
!= '*')
1007 if (buffer
->from_stage3
)
1012 if (skip_block_comment (pfile
))
1013 cpp_error_with_line (pfile
, pfile
->lexer_pos
.line
,
1014 pfile
->lexer_pos
.col
,
1015 "unterminated comment");
1019 if (!CPP_OPTION (pfile
, cplusplus_comments
)
1020 && !CPP_IN_SYSTEM_HEADER (pfile
))
1023 /* Warn about comments only if pedantically GNUC89, and not
1024 in system headers. */
1025 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
1026 && ! buffer
->warned_cplusplus_comments
)
1029 "C++ style comments are not allowed in ISO C89");
1031 "(this will be reported only once per input file)");
1032 buffer
->warned_cplusplus_comments
= 1;
1035 /* Skip_line_comment updates buffer->read_ahead. */
1036 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
1037 cpp_warning_with_line (pfile
, pfile
->lexer_pos
.line
,
1038 pfile
->lexer_pos
.col
,
1039 "multi-line comment");
1042 /* Skipping the comment has updated buffer->read_ahead. */
1043 if (!pfile
->state
.save_comments
)
1045 result
->flags
|= PREV_WHITE
;
1049 /* Save the comment as a token in its own right. */
1050 save_comment (pfile
, result
, comment_start
);
1051 /* Don't do MI optimisation. */
1055 if (pfile
->state
.angled_headers
)
1057 result
->type
= CPP_HEADER_NAME
;
1058 c
= '>'; /* terminator. */
1062 result
->type
= CPP_LESS
;
1063 c
= get_effective_char (buffer
);
1065 ACCEPT_CHAR (CPP_LESS_EQ
);
1068 ACCEPT_CHAR (CPP_LSHIFT
);
1069 if (get_effective_char (buffer
) == '=')
1070 ACCEPT_CHAR (CPP_LSHIFT_EQ
);
1072 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1074 ACCEPT_CHAR (CPP_MIN
);
1075 if (get_effective_char (buffer
) == '=')
1076 ACCEPT_CHAR (CPP_MIN_EQ
);
1078 else if (c
== ':' && CPP_OPTION (pfile
, digraphs
))
1080 ACCEPT_CHAR (CPP_OPEN_SQUARE
);
1081 result
->flags
|= DIGRAPH
;
1083 else if (c
== '%' && CPP_OPTION (pfile
, digraphs
))
1085 ACCEPT_CHAR (CPP_OPEN_BRACE
);
1086 result
->flags
|= DIGRAPH
;
1091 result
->type
= CPP_GREATER
;
1092 c
= get_effective_char (buffer
);
1094 ACCEPT_CHAR (CPP_GREATER_EQ
);
1097 ACCEPT_CHAR (CPP_RSHIFT
);
1098 if (get_effective_char (buffer
) == '=')
1099 ACCEPT_CHAR (CPP_RSHIFT_EQ
);
1101 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1103 ACCEPT_CHAR (CPP_MAX
);
1104 if (get_effective_char (buffer
) == '=')
1105 ACCEPT_CHAR (CPP_MAX_EQ
);
1110 lex_percent (buffer
, result
);
1111 if (result
->type
== CPP_HASH
)
1116 lex_dot (pfile
, result
);
1120 result
->type
= CPP_PLUS
;
1121 c
= get_effective_char (buffer
);
1123 ACCEPT_CHAR (CPP_PLUS_EQ
);
1125 ACCEPT_CHAR (CPP_PLUS_PLUS
);
1129 result
->type
= CPP_MINUS
;
1130 c
= get_effective_char (buffer
);
1133 ACCEPT_CHAR (CPP_DEREF
);
1134 if (CPP_OPTION (pfile
, cplusplus
)
1135 && get_effective_char (buffer
) == '*')
1136 ACCEPT_CHAR (CPP_DEREF_STAR
);
1139 ACCEPT_CHAR (CPP_MINUS_EQ
);
1141 ACCEPT_CHAR (CPP_MINUS_MINUS
);
1145 result
->type
= CPP_MULT
;
1146 if (get_effective_char (buffer
) == '=')
1147 ACCEPT_CHAR (CPP_MULT_EQ
);
1151 result
->type
= CPP_EQ
;
1152 if (get_effective_char (buffer
) == '=')
1153 ACCEPT_CHAR (CPP_EQ_EQ
);
1157 result
->type
= CPP_NOT
;
1158 if (get_effective_char (buffer
) == '=')
1159 ACCEPT_CHAR (CPP_NOT_EQ
);
1163 result
->type
= CPP_AND
;
1164 c
= get_effective_char (buffer
);
1166 ACCEPT_CHAR (CPP_AND_EQ
);
1168 ACCEPT_CHAR (CPP_AND_AND
);
1172 c
= buffer
->extra_char
; /* Can be set by error condition below. */
1175 buffer
->read_ahead
= c
;
1176 buffer
->extra_char
= EOF
;
1179 c
= get_effective_char (buffer
);
1183 ACCEPT_CHAR (CPP_PASTE
);
1187 result
->type
= CPP_HASH
;
1191 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1192 tokens within the list of arguments that would otherwise act
1193 as preprocessing directives, the behavior is undefined.
1195 This implementation will report a hard error, terminate the
1196 macro invocation, and proceed to process the directive. */
1197 if (pfile
->state
.parsing_args
)
1199 if (pfile
->state
.parsing_args
== 2)
1201 "directives may not be used inside a macro argument");
1203 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1204 buffer
->extra_char
= buffer
->read_ahead
;
1205 buffer
->read_ahead
= '#';
1206 pfile
->state
.next_bol
= 1;
1207 result
->type
= CPP_EOF
;
1209 /* Get whitespace right - newline_in_args sets it. */
1210 if (pfile
->lexer_pos
.col
== 1)
1211 result
->flags
&= ~(PREV_WHITE
| AVOID_LPASTE
);
1215 /* This is the hash introducing a directive. */
1216 if (_cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
1217 goto done_directive
; /* bol still 1. */
1218 /* This is in fact an assembler #. */
1223 result
->type
= CPP_OR
;
1224 c
= get_effective_char (buffer
);
1226 ACCEPT_CHAR (CPP_OR_EQ
);
1228 ACCEPT_CHAR (CPP_OR_OR
);
1232 result
->type
= CPP_XOR
;
1233 if (get_effective_char (buffer
) == '=')
1234 ACCEPT_CHAR (CPP_XOR_EQ
);
1238 result
->type
= CPP_COLON
;
1239 c
= get_effective_char (buffer
);
1240 if (c
== ':' && CPP_OPTION (pfile
, cplusplus
))
1241 ACCEPT_CHAR (CPP_SCOPE
);
1242 else if (c
== '>' && CPP_OPTION (pfile
, digraphs
))
1244 result
->flags
|= DIGRAPH
;
1245 ACCEPT_CHAR (CPP_CLOSE_SQUARE
);
1249 case '~': result
->type
= CPP_COMPL
; break;
1250 case ',': result
->type
= CPP_COMMA
; break;
1251 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1252 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1253 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1254 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1255 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1256 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1257 case ';': result
->type
= CPP_SEMICOLON
; break;
1259 /* @ is a punctuator in Objective C. */
1260 case '@': result
->type
= CPP_ATSIGN
; break;
1264 result
->type
= CPP_OTHER
;
1269 if (pfile
->skipping
)
1272 /* If not in a directive, this token invalidates controlling macros. */
1273 if (!pfile
->state
.in_directive
)
1274 pfile
->mi_state
= MI_FAILED
;
1277 /* An upper bound on the number of bytes needed to spell a token,
1278 including preceding whitespace. */
1280 cpp_token_len (token
)
1281 const cpp_token
*token
;
1285 switch (TOKEN_SPELL (token
))
1287 default: len
= 0; break;
1288 case SPELL_STRING
: len
= token
->val
.str
.len
; break;
1289 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
); break;
1291 /* 1 for whitespace, 4 for comment delimeters. */
1295 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1296 already contain the enough space to hold the token's spelling.
1297 Returns a pointer to the character after the last character
1300 cpp_spell_token (pfile
, token
, buffer
)
1301 cpp_reader
*pfile
; /* Would be nice to be rid of this... */
1302 const cpp_token
*token
;
1303 unsigned char *buffer
;
1305 switch (TOKEN_SPELL (token
))
1307 case SPELL_OPERATOR
:
1309 const unsigned char *spelling
;
1312 if (token
->flags
& DIGRAPH
)
1314 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1315 else if (token
->flags
& NAMED_OP
)
1318 spelling
= TOKEN_NAME (token
);
1320 while ((c
= *spelling
++) != '\0')
1327 memcpy (buffer
, NODE_NAME (token
->val
.node
), NODE_LEN (token
->val
.node
));
1328 buffer
+= NODE_LEN (token
->val
.node
);
1333 int left
, right
, tag
;
1334 switch (token
->type
)
1336 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1337 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1338 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1339 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1340 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1341 default: left
= '\0'; right
= '\0'; tag
= '\0'; break;
1343 if (tag
) *buffer
++ = tag
;
1344 if (left
) *buffer
++ = left
;
1345 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1346 buffer
+= token
->val
.str
.len
;
1347 if (right
) *buffer
++ = right
;
1352 *buffer
++ = token
->val
.c
;
1356 cpp_ice (pfile
, "Unspellable token %s", TOKEN_NAME (token
));
1363 /* Returns a token as a null-terminated string. The string is
1364 temporary, and automatically freed later. Useful for diagnostics. */
1366 cpp_token_as_text (pfile
, token
)
1368 const cpp_token
*token
;
1370 unsigned int len
= cpp_token_len (token
);
1371 unsigned char *start
= _cpp_pool_alloc (&pfile
->ident_pool
, len
), *end
;
1373 end
= cpp_spell_token (pfile
, token
, start
);
1379 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1381 cpp_type2name (type
)
1382 enum cpp_ttype type
;
1384 return (const char *) token_spellings
[type
].name
;
1387 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1388 for efficiency - to avoid double-buffering. Also, outputs a space
1389 if PREV_WHITE is flagged. */
1391 cpp_output_token (token
, fp
)
1392 const cpp_token
*token
;
1395 if (token
->flags
& PREV_WHITE
)
1398 switch (TOKEN_SPELL (token
))
1400 case SPELL_OPERATOR
:
1402 const unsigned char *spelling
;
1404 if (token
->flags
& DIGRAPH
)
1406 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1407 else if (token
->flags
& NAMED_OP
)
1410 spelling
= TOKEN_NAME (token
);
1412 ufputs (spelling
, fp
);
1418 ufputs (NODE_NAME (token
->val
.node
), fp
);
1423 int left
, right
, tag
;
1424 switch (token
->type
)
1426 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1427 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1428 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1429 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1430 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1431 default: left
= '\0'; right
= '\0'; tag
= '\0'; break;
1433 if (tag
) putc (tag
, fp
);
1434 if (left
) putc (left
, fp
);
1435 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1436 if (right
) putc (right
, fp
);
1441 putc (token
->val
.c
, fp
);
1445 /* An error, most probably. */
1450 /* Compare two tokens. */
1452 _cpp_equiv_tokens (a
, b
)
1453 const cpp_token
*a
, *b
;
1455 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1456 switch (TOKEN_SPELL (a
))
1458 default: /* Keep compiler happy. */
1459 case SPELL_OPERATOR
:
1462 return a
->val
.c
== b
->val
.c
; /* Character. */
1464 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1466 return a
->val
.node
== b
->val
.node
;
1468 return (a
->val
.str
.len
== b
->val
.str
.len
1469 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1476 /* Determine whether two tokens can be pasted together, and if so,
1477 what the resulting token is. Returns CPP_EOF if the tokens cannot
1478 be pasted, or the appropriate type for the merged token if they
1481 cpp_can_paste (pfile
, token1
, token2
, digraph
)
1483 const cpp_token
*token1
, *token2
;
1486 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1487 int cxx
= CPP_OPTION (pfile
, cplusplus
);
1489 /* Treat named operators as if they were ordinary NAMEs. */
1490 if (token1
->flags
& NAMED_OP
)
1492 if (token2
->flags
& NAMED_OP
)
1495 if ((int) a
<= (int) CPP_LAST_EQ
&& b
== CPP_EQ
)
1496 return (enum cpp_ttype
) ((int) a
+ ((int) CPP_EQ_EQ
- (int) CPP_EQ
));
1501 if (b
== a
) return CPP_RSHIFT
;
1502 if (b
== CPP_QUERY
&& cxx
) return CPP_MAX
;
1503 if (b
== CPP_GREATER_EQ
) return CPP_RSHIFT_EQ
;
1506 if (b
== a
) return CPP_LSHIFT
;
1507 if (b
== CPP_QUERY
&& cxx
) return CPP_MIN
;
1508 if (b
== CPP_LESS_EQ
) return CPP_LSHIFT_EQ
;
1509 if (CPP_OPTION (pfile
, digraphs
))
1512 {*digraph
= 1; return CPP_OPEN_SQUARE
;} /* <: digraph */
1514 {*digraph
= 1; return CPP_OPEN_BRACE
;} /* <% digraph */
1518 case CPP_PLUS
: if (b
== a
) return CPP_PLUS_PLUS
; break;
1519 case CPP_AND
: if (b
== a
) return CPP_AND_AND
; break;
1520 case CPP_OR
: if (b
== a
) return CPP_OR_OR
; break;
1523 if (b
== a
) return CPP_MINUS_MINUS
;
1524 if (b
== CPP_GREATER
) return CPP_DEREF
;
1527 if (b
== a
&& cxx
) return CPP_SCOPE
;
1528 if (b
== CPP_GREATER
&& CPP_OPTION (pfile
, digraphs
))
1529 {*digraph
= 1; return CPP_CLOSE_SQUARE
;} /* :> digraph */
1533 if (CPP_OPTION (pfile
, digraphs
))
1535 if (b
== CPP_GREATER
)
1536 {*digraph
= 1; return CPP_CLOSE_BRACE
;} /* %> digraph */
1538 {*digraph
= 1; return CPP_HASH
;} /* %: digraph */
1542 if (b
== CPP_MULT
&& cxx
) return CPP_DEREF_STAR
;
1545 if (b
== CPP_MULT
&& cxx
) return CPP_DOT_STAR
;
1546 if (b
== CPP_NUMBER
) return CPP_NUMBER
;
1550 if (b
== a
&& (token1
->flags
& DIGRAPH
) == (token2
->flags
& DIGRAPH
))
1552 {*digraph
= (token1
->flags
& DIGRAPH
); return CPP_PASTE
;}
1556 if (b
== CPP_NAME
) return CPP_NAME
;
1558 && name_p (pfile
, &token2
->val
.str
)) return CPP_NAME
;
1560 && token1
->val
.node
== pfile
->spec_nodes
.n_L
) return CPP_WCHAR
;
1562 && token1
->val
.node
== pfile
->spec_nodes
.n_L
) return CPP_WSTRING
;
1566 if (b
== CPP_NUMBER
) return CPP_NUMBER
;
1567 if (b
== CPP_NAME
) return CPP_NUMBER
;
1568 if (b
== CPP_DOT
) return CPP_NUMBER
;
1569 /* Numbers cannot have length zero, so this is safe. */
1570 if ((b
== CPP_PLUS
|| b
== CPP_MINUS
)
1571 && VALID_SIGN ('+', token1
->val
.str
.text
[token1
->val
.str
.len
- 1]))
1582 /* Returns nonzero if a space should be inserted to avoid an
1583 accidental token paste for output. For simplicity, it is
1584 conservative, and occasionally advises a space where one is not
1585 needed, e.g. "." and ".2". */
1588 cpp_avoid_paste (pfile
, token1
, token2
)
1590 const cpp_token
*token1
, *token2
;
1592 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1595 if (token1
->flags
& NAMED_OP
)
1597 if (token2
->flags
& NAMED_OP
)
1601 if (token2
->flags
& DIGRAPH
)
1602 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1603 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1604 c
= token_spellings
[b
].name
[0];
1606 /* Quickly get everything that can paste with an '='. */
1607 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1612 case CPP_GREATER
: return c
== '>' || c
== '?';
1613 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1614 case CPP_PLUS
: return c
== '+';
1615 case CPP_MINUS
: return c
== '-' || c
== '>';
1616 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1617 case CPP_MOD
: return c
== ':' || c
== '>';
1618 case CPP_AND
: return c
== '&';
1619 case CPP_OR
: return c
== '|';
1620 case CPP_COLON
: return c
== ':' || c
== '>';
1621 case CPP_DEREF
: return c
== '*';
1622 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1623 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1624 case CPP_NAME
: return ((b
== CPP_NUMBER
1625 && name_p (pfile
, &token2
->val
.str
))
1627 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1628 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1629 || c
== '.' || c
== '+' || c
== '-');
1630 case CPP_OTHER
: return (CPP_OPTION (pfile
, objc
)
1631 && token1
->val
.c
== '@'
1632 && (b
== CPP_NAME
|| b
== CPP_STRING
));
1639 /* Output all the remaining tokens on the current line, and a newline
1640 character, to FP. Leading whitespace is removed. */
1642 cpp_output_line (pfile
, fp
)
1648 cpp_get_token (pfile
, &token
);
1649 token
.flags
&= ~PREV_WHITE
;
1650 while (token
.type
!= CPP_EOF
)
1652 cpp_output_token (&token
, fp
);
1653 cpp_get_token (pfile
, &token
);
1659 /* Returns the value of a hexadecimal digit. */
1664 if (c
>= 'a' && c
<= 'f')
1665 return c
- 'a' + 10;
1666 if (c
>= 'A' && c
<= 'F')
1667 return c
- 'A' + 10;
1668 if (c
>= '0' && c
<= '9')
1673 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
1675 [lex.charset]: The character designated by the universal character
1676 name \UNNNNNNNN is that character whose character short name in
1677 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1678 universal character name \uNNNN is that character whose character
1679 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1680 for a universal character name is less than 0x20 or in the range
1681 0x7F-0x9F (inclusive), or if the universal character name
1682 designates a character in the basic source character set, then the
1683 program is ill-formed.
1685 We assume that wchar_t is Unicode, so we don't need to do any
1686 mapping. Is this ever wrong? */
1689 read_ucs (pfile
, pstr
, limit
, length
)
1691 const unsigned char **pstr
;
1692 const unsigned char *limit
;
1693 unsigned int length
;
1695 const unsigned char *p
= *pstr
;
1696 unsigned int c
, code
= 0;
1698 for (; length
; --length
)
1702 cpp_error (pfile
, "incomplete universal-character-name");
1709 code
= (code
<< 4) + hex_digit_value (c
);
1715 "non-hex digit '%c' in universal-character-name", c
);
1721 #ifdef TARGET_EBCDIC
1722 cpp_error (pfile
, "universal-character-name on EBCDIC target");
1723 code
= 0x3f; /* EBCDIC invalid character */
1725 if (code
> 0x9f && !(code
& 0x80000000))
1726 ; /* True extended character, OK. */
1727 else if (code
>= 0x20 && code
< 0x7f)
1729 /* ASCII printable character. The C character set consists of all of
1730 these except $, @ and `. We use hex escapes so that this also
1731 works with EBCDIC hosts. */
1732 if (code
!= 0x24 && code
!= 0x40 && code
!= 0x60)
1733 cpp_error (pfile
, "universal-character-name used for '%c'", code
);
1736 cpp_error (pfile
, "invalid universal-character-name");
1743 /* Interpret an escape sequence, and return its value. PSTR points to
1744 the input pointer, which is just after the backslash. LIMIT is how
1745 much text we have. MASK is the precision for the target type (char
1746 or wchar_t). TRADITIONAL, if true, does not interpret escapes that
1747 did not exist in traditional C. */
1750 parse_escape (pfile
, pstr
, limit
, mask
, traditional
)
1752 const unsigned char **pstr
;
1753 const unsigned char *limit
;
1758 const unsigned char *str
= *pstr
;
1759 unsigned int c
= *str
++;
1763 case '\\': case '\'': case '"': case '?': break;
1764 case 'b': c
= TARGET_BS
; break;
1765 case 'f': c
= TARGET_FF
; break;
1766 case 'n': c
= TARGET_NEWLINE
; break;
1767 case 'r': c
= TARGET_CR
; break;
1768 case 't': c
= TARGET_TAB
; break;
1769 case 'v': c
= TARGET_VT
; break;
1771 case '(': case '{': case '[': case '%':
1772 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1773 '\%' is used to prevent SCCS from getting confused. */
1774 unknown
= CPP_PEDANTIC (pfile
);
1778 if (CPP_WTRADITIONAL (pfile
))
1779 cpp_warning (pfile
, "the meaning of '\\a' varies with -traditional");
1785 if (CPP_PEDANTIC (pfile
))
1786 cpp_pedwarn (pfile
, "non-ISO-standard escape sequence, '\\%c'", c
);
1790 /* Warnings and support checks handled by read_ucs(). */
1792 if (CPP_OPTION (pfile
, cplusplus
) || CPP_OPTION (pfile
, c99
))
1794 if (CPP_WTRADITIONAL (pfile
))
1796 "the meaning of '\\%c' varies with -traditional", c
);
1797 c
= read_ucs (pfile
, &str
, limit
, c
== 'u' ? 4 : 8);
1804 if (CPP_WTRADITIONAL (pfile
))
1805 cpp_warning (pfile
, "the meaning of '\\x' varies with -traditional");
1809 unsigned int i
= 0, overflow
= 0;
1810 int digits_found
= 0;
1818 overflow
|= i
^ (i
<< 4 >> 4);
1819 i
= (i
<< 4) + hex_digit_value (c
);
1824 cpp_error (pfile
, "\\x used with no following hex digits");
1826 if (overflow
| (i
!= (i
& mask
)))
1828 cpp_pedwarn (pfile
, "hex escape sequence out of range");
1835 case '0': case '1': case '2': case '3':
1836 case '4': case '5': case '6': case '7':
1838 unsigned int i
= c
- '0';
1841 while (str
< limit
&& ++count
< 3)
1844 if (c
< '0' || c
> '7')
1847 i
= (i
<< 3) + c
- '0';
1850 if (i
!= (i
& mask
))
1852 cpp_pedwarn (pfile
, "octal escape sequence out of range");
1867 cpp_pedwarn (pfile
, "unknown escape sequence '\\%c'", c
);
1869 cpp_pedwarn (pfile
, "unknown escape sequence: '\\%03o'", c
);
1876 #ifndef MAX_CHAR_TYPE_SIZE
1877 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1880 #ifndef MAX_WCHAR_TYPE_SIZE
1881 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1884 /* Interpret a (possibly wide) character constant in TOKEN.
1885 WARN_MULTI warns about multi-character charconsts, if not
1886 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1887 that did not exist in traditional C. PCHARS_SEEN points to a
1888 variable that is filled in with the number of characters seen. */
1890 cpp_interpret_charconst (pfile
, token
, warn_multi
, traditional
, pchars_seen
)
1892 const cpp_token
*token
;
1895 unsigned int *pchars_seen
;
1897 const unsigned char *str
= token
->val
.str
.text
;
1898 const unsigned char *limit
= str
+ token
->val
.str
.len
;
1899 unsigned int chars_seen
= 0;
1900 unsigned int width
, max_chars
, c
;
1901 unsigned HOST_WIDE_INT mask
;
1902 HOST_WIDE_INT result
= 0;
1904 #ifdef MULTIBYTE_CHARS
1905 (void) local_mbtowc (NULL
, NULL
, 0);
1908 /* Width in bits. */
1909 if (token
->type
== CPP_CHAR
)
1910 width
= MAX_CHAR_TYPE_SIZE
;
1912 width
= MAX_WCHAR_TYPE_SIZE
;
1914 if (width
< HOST_BITS_PER_WIDE_INT
)
1915 mask
= ((unsigned HOST_WIDE_INT
) 1 << width
) - 1;
1918 max_chars
= HOST_BITS_PER_WIDE_INT
/ width
;
1922 #ifdef MULTIBYTE_CHARS
1926 char_len
= local_mbtowc (&wc
, str
, limit
- str
);
1929 cpp_warning (pfile
, "ignoring invalid multibyte character");
1943 c
= parse_escape (pfile
, &str
, limit
, mask
, traditional
);
1944 if (width
< HOST_BITS_PER_WIDE_INT
&& c
> mask
)
1945 cpp_pedwarn (pfile
, "escape sequence out of range for character");
1948 #ifdef MAP_CHARACTER
1950 c
= MAP_CHARACTER (c
);
1953 /* Merge character into result; ignore excess chars. */
1954 if (++chars_seen
<= max_chars
)
1956 if (width
< HOST_BITS_PER_WIDE_INT
)
1957 result
= (result
<< width
) | (c
& mask
);
1963 if (chars_seen
== 0)
1964 cpp_error (pfile
, "empty character constant");
1965 else if (chars_seen
> max_chars
)
1967 chars_seen
= max_chars
;
1968 cpp_error (pfile
, "character constant too long");
1970 else if (chars_seen
> 1 && !traditional
&& warn_multi
)
1971 cpp_warning (pfile
, "multi-character character constant");
1973 /* If char type is signed, sign-extend the constant. The
1974 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1975 if (token
->type
== CPP_CHAR
&& chars_seen
)
1977 unsigned int nbits
= chars_seen
* width
;
1978 unsigned int mask
= (unsigned int) ~0 >> (HOST_BITS_PER_INT
- nbits
);
1980 if (pfile
->spec_nodes
.n__CHAR_UNSIGNED__
->type
== NT_MACRO
1981 || ((result
>> (nbits
- 1)) & 1) == 0)
1987 *pchars_seen
= chars_seen
;
2003 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2006 chunk_suitable (pool
, chunk
, size
)
2011 /* Being at least twice SIZE means we can use memcpy in
2012 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2014 return (chunk
&& pool
->locked
!= chunk
2015 && (unsigned int) (chunk
->limit
- chunk
->base
) >= size
* 2);
2018 /* Returns the end of the new pool. PTR points to a char in the old
2019 pool, and is updated to point to the same char in the new pool. */
2021 _cpp_next_chunk (pool
, len
, ptr
)
2024 unsigned char **ptr
;
2026 cpp_chunk
*chunk
= pool
->cur
->next
;
2028 /* LEN is the minimum size we want in the new pool. */
2029 len
+= POOL_ROOM (pool
);
2030 if (! chunk_suitable (pool
, chunk
, len
))
2032 chunk
= new_chunk (POOL_SIZE (pool
) * 2 + len
);
2034 chunk
->next
= pool
->cur
->next
;
2035 pool
->cur
->next
= chunk
;
2038 /* Update the pointer before changing chunk's front. */
2040 *ptr
+= chunk
->base
- POOL_FRONT (pool
);
2042 memcpy (chunk
->base
, POOL_FRONT (pool
), POOL_ROOM (pool
));
2043 chunk
->front
= chunk
->base
;
2046 return POOL_LIMIT (pool
);
2053 unsigned char *base
;
2056 size
= POOL_ALIGN (size
, DEFAULT_ALIGNMENT
);
2057 base
= (unsigned char *) xmalloc (size
+ sizeof (cpp_chunk
));
2058 /* Put the chunk descriptor at the end. Then chunk overruns will
2059 cause obvious chaos. */
2060 result
= (cpp_chunk
*) (base
+ size
);
2061 result
->base
= base
;
2062 result
->front
= base
;
2063 result
->limit
= base
+ size
;
2070 _cpp_init_pool (pool
, size
, align
, temp
)
2072 unsigned int size
, align
, temp
;
2075 align
= DEFAULT_ALIGNMENT
;
2076 if (align
& (align
- 1))
2078 pool
->align
= align
;
2079 pool
->cur
= new_chunk (size
);
2083 pool
->cur
->next
= pool
->cur
;
2087 _cpp_lock_pool (pool
)
2090 if (pool
->locks
++ == 0)
2091 pool
->locked
= pool
->cur
;
2095 _cpp_unlock_pool (pool
)
2098 if (--pool
->locks
== 0)
2103 _cpp_free_pool (pool
)
2106 cpp_chunk
*chunk
= pool
->cur
, *next
;
2114 while (chunk
&& chunk
!= pool
->cur
);
2117 /* Reserve LEN bytes from a memory pool. */
2119 _cpp_pool_reserve (pool
, len
)
2123 len
= POOL_ALIGN (len
, pool
->align
);
2124 if (len
> (unsigned int) POOL_ROOM (pool
))
2125 _cpp_next_chunk (pool
, len
, 0);
2127 return POOL_FRONT (pool
);
2130 /* Allocate LEN bytes from a memory pool. */
2132 _cpp_pool_alloc (pool
, len
)
2136 unsigned char *result
= _cpp_pool_reserve (pool
, len
);
2138 POOL_COMMIT (pool
, len
);