1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category
;
68 const unsigned char *name
;
71 const unsigned char *digraph_spellings
[] = {U
"%:", U
"%:%:", U
"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings
[N_TTYPES
] = {TTYPE_TABLE
};
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline
PARAMS ((cpp_buffer
*, cppchar_t
));
84 static cppchar_t skip_escaped_newlines
PARAMS ((cpp_buffer
*, cppchar_t
));
85 static cppchar_t get_effective_char
PARAMS ((cpp_buffer
*));
87 static int skip_block_comment
PARAMS ((cpp_reader
*));
88 static int skip_line_comment
PARAMS ((cpp_reader
*));
89 static void adjust_column
PARAMS ((cpp_reader
*));
90 static void skip_whitespace
PARAMS ((cpp_reader
*, cppchar_t
));
91 static cpp_hashnode
*parse_identifier
PARAMS ((cpp_reader
*, cppchar_t
));
92 static void parse_number
PARAMS ((cpp_reader
*, cpp_string
*, cppchar_t
, int));
93 static int unescaped_terminator_p
PARAMS ((cpp_reader
*, const U_CHAR
*));
94 static void parse_string
PARAMS ((cpp_reader
*, cpp_token
*, cppchar_t
));
95 static void unterminated
PARAMS ((cpp_reader
*, int));
96 static int trigraph_ok
PARAMS ((cpp_reader
*, cppchar_t
));
97 static void save_comment
PARAMS ((cpp_reader
*, cpp_token
*, const U_CHAR
*));
98 static void lex_percent
PARAMS ((cpp_buffer
*, cpp_token
*));
99 static void lex_dot
PARAMS ((cpp_reader
*, cpp_token
*));
100 static int name_p
PARAMS ((cpp_reader
*, const cpp_string
*));
101 static unsigned int parse_escape
PARAMS ((cpp_reader
*, const unsigned char **,
102 const unsigned char *, HOST_WIDE_INT
,
104 static unsigned int read_ucs
PARAMS ((cpp_reader
*, const unsigned char **,
105 const unsigned char *, unsigned int));
107 static cpp_chunk
*new_chunk
PARAMS ((unsigned int));
108 static int chunk_suitable
PARAMS ((cpp_pool
*, cpp_chunk
*, unsigned int));
109 static unsigned int hex_digit_value
PARAMS ((unsigned int));
113 Compares, the token TOKEN to the NUL-terminated string STRING.
114 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
117 cpp_ideq (token
, string
)
118 const cpp_token
*token
;
121 if (token
->type
!= CPP_NAME
)
124 return !ustrcmp (NODE_NAME (token
->val
.node
), (const U_CHAR
*) string
);
127 /* Call when meeting a newline. Returns the character after the newline
128 (or carriage-return newline combination), or EOF. */
130 handle_newline (buffer
, newline_char
)
132 cppchar_t newline_char
;
134 cppchar_t next
= EOF
;
136 buffer
->col_adjust
= 0;
138 buffer
->line_base
= buffer
->cur
;
140 /* Handle CR-LF and LF-CR combinations, get the next character. */
141 if (buffer
->cur
< buffer
->rlimit
)
143 next
= *buffer
->cur
++;
144 if (next
+ newline_char
== '\r' + '\n')
146 buffer
->line_base
= buffer
->cur
;
147 if (buffer
->cur
< buffer
->rlimit
)
148 next
= *buffer
->cur
++;
154 buffer
->read_ahead
= next
;
158 /* Subroutine of skip_escaped_newlines; called when a trigraph is
159 encountered. It warns if necessary, and returns true if the
160 trigraph should be honoured. FROM_CHAR is the third character of a
161 trigraph, and presumed to be the previous character for position
164 trigraph_ok (pfile
, from_char
)
168 int accept
= CPP_OPTION (pfile
, trigraphs
);
170 /* Don't warn about trigraphs in comments. */
171 if (CPP_OPTION (pfile
, warn_trigraphs
) && !pfile
->state
.lexing_comment
)
173 cpp_buffer
*buffer
= pfile
->buffer
;
175 cpp_warning_with_line (pfile
, buffer
->lineno
, CPP_BUF_COL (buffer
) - 2,
176 "trigraph ??%c converted to %c",
178 (int) _cpp_trigraph_map
[from_char
]);
179 else if (buffer
->cur
!= buffer
->last_Wtrigraphs
)
181 buffer
->last_Wtrigraphs
= buffer
->cur
;
182 cpp_warning_with_line (pfile
, buffer
->lineno
,
183 CPP_BUF_COL (buffer
) - 2,
184 "trigraph ??%c ignored", (int) from_char
);
191 /* Assumes local variables buffer and result. */
192 #define ACCEPT_CHAR(t) \
193 do { result->type = t; buffer->read_ahead = EOF; } while (0)
195 /* When we move to multibyte character sets, add to these something
196 that saves and restores the state of the multibyte conversion
197 library. This probably involves saving and restoring a "cookie".
198 In the case of glibc it is an 8-byte structure, so is not a high
199 overhead operation. In any case, it's out of the fast path. */
200 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
201 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
203 /* Skips any escaped newlines introduced by NEXT, which is either a
204 '?' or a '\\'. Returns the next character, which will also have
205 been placed in buffer->read_ahead. This routine performs
206 preprocessing stages 1 and 2 of the ISO C standard. */
208 skip_escaped_newlines (buffer
, next
)
212 /* Only do this if we apply stages 1 and 2. */
213 if (!buffer
->from_stage3
)
216 const unsigned char *saved_cur
;
221 if (buffer
->cur
== buffer
->rlimit
)
227 next1
= *buffer
->cur
++;
228 if (next1
!= '?' || buffer
->cur
== buffer
->rlimit
)
234 next1
= *buffer
->cur
++;
235 if (!_cpp_trigraph_map
[next1
]
236 || !trigraph_ok (buffer
->pfile
, next1
))
242 /* We have a full trigraph here. */
243 next
= _cpp_trigraph_map
[next1
];
244 if (next
!= '\\' || buffer
->cur
== buffer
->rlimit
)
249 /* We have a backslash, and room for at least one more character. */
253 next1
= *buffer
->cur
++;
254 if (!is_nvspace (next1
))
258 while (buffer
->cur
< buffer
->rlimit
);
260 if (!is_vspace (next1
))
266 if (space
&& !buffer
->pfile
->state
.lexing_comment
)
267 cpp_warning (buffer
->pfile
,
268 "backslash and newline separated by space");
270 next
= handle_newline (buffer
, next1
);
272 cpp_pedwarn (buffer
->pfile
, "backslash-newline at end of file");
274 while (next
== '\\' || next
== '?');
277 buffer
->read_ahead
= next
;
281 /* Obtain the next character, after trigraph conversion and skipping
282 an arbitrary string of escaped newlines. The common case of no
283 trigraphs or escaped newlines falls through quickly. */
285 get_effective_char (buffer
)
288 cppchar_t next
= EOF
;
290 if (buffer
->cur
< buffer
->rlimit
)
292 next
= *buffer
->cur
++;
294 /* '?' can introduce trigraphs (and therefore backslash); '\\'
295 can introduce escaped newlines, which we want to skip, or
296 UCNs, which, depending upon lexer state, we will handle in
298 if (next
== '?' || next
== '\\')
299 next
= skip_escaped_newlines (buffer
, next
);
302 buffer
->read_ahead
= next
;
306 /* Skip a C-style block comment. We find the end of the comment by
307 seeing if an asterisk is before every '/' we encounter. Returns
308 non-zero if comment terminated by EOF, zero otherwise. */
310 skip_block_comment (pfile
)
313 cpp_buffer
*buffer
= pfile
->buffer
;
314 cppchar_t c
= EOF
, prevc
= EOF
;
316 pfile
->state
.lexing_comment
= 1;
317 while (buffer
->cur
!= buffer
->rlimit
)
319 prevc
= c
, c
= *buffer
->cur
++;
322 /* FIXME: For speed, create a new character class of characters
323 of interest inside block comments. */
324 if (c
== '?' || c
== '\\')
325 c
= skip_escaped_newlines (buffer
, c
);
327 /* People like decorating comments with '*', so check for '/'
328 instead for efficiency. */
334 /* Warn about potential nested comments, but not if the '/'
335 comes immediately before the true comment delimeter.
336 Don't bother to get it right across escaped newlines. */
337 if (CPP_OPTION (pfile
, warn_comments
)
338 && buffer
->cur
!= buffer
->rlimit
)
340 prevc
= c
, c
= *buffer
->cur
++;
341 if (c
== '*' && buffer
->cur
!= buffer
->rlimit
)
343 prevc
= c
, c
= *buffer
->cur
++;
345 cpp_warning_with_line (pfile
, CPP_BUF_LINE (buffer
),
346 CPP_BUF_COL (buffer
),
347 "\"/*\" within comment");
352 else if (is_vspace (c
))
354 prevc
= c
, c
= handle_newline (buffer
, c
);
358 adjust_column (pfile
);
361 pfile
->state
.lexing_comment
= 0;
362 buffer
->read_ahead
= EOF
;
363 return c
!= '/' || prevc
!= '*';
366 /* Skip a C++ line comment. Handles escaped newlines. Returns
367 non-zero if a multiline comment. The following new line, if any,
368 is left in buffer->read_ahead. */
370 skip_line_comment (pfile
)
373 cpp_buffer
*buffer
= pfile
->buffer
;
374 unsigned int orig_lineno
= buffer
->lineno
;
377 pfile
->state
.lexing_comment
= 1;
381 if (buffer
->cur
== buffer
->rlimit
)
385 if (c
== '?' || c
== '\\')
386 c
= skip_escaped_newlines (buffer
, c
);
388 while (!is_vspace (c
));
390 pfile
->state
.lexing_comment
= 0;
391 buffer
->read_ahead
= c
; /* Leave any newline for caller. */
392 return orig_lineno
!= buffer
->lineno
;
395 /* pfile->buffer->cur is one beyond the \t character. Update
396 col_adjust so we track the column correctly. */
398 adjust_column (pfile
)
401 cpp_buffer
*buffer
= pfile
->buffer
;
402 unsigned int col
= CPP_BUF_COL (buffer
) - 1; /* Zero-based column. */
404 /* Round it up to multiple of the tabstop, but subtract 1 since the
405 tab itself occupies a character position. */
406 buffer
->col_adjust
+= (CPP_OPTION (pfile
, tabstop
)
407 - col
% CPP_OPTION (pfile
, tabstop
)) - 1;
410 /* Skips whitespace, saving the next non-whitespace character.
411 Adjusts pfile->col_adjust to account for tabs. Without this,
412 tokens might be assigned an incorrect column. */
414 skip_whitespace (pfile
, c
)
418 cpp_buffer
*buffer
= pfile
->buffer
;
419 unsigned int warned
= 0;
423 /* Horizontal space always OK. */
427 adjust_column (pfile
);
428 /* Just \f \v or \0 left. */
433 cpp_warning (pfile
, "null character(s) ignored");
437 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
438 cpp_pedwarn_with_line (pfile
, CPP_BUF_LINE (buffer
),
439 CPP_BUF_COL (buffer
),
440 "%s in preprocessing directive",
441 c
== '\f' ? "form feed" : "vertical tab");
444 if (buffer
->cur
== buffer
->rlimit
)
448 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
449 while (is_nvspace (c
));
451 /* Remember the next character. */
452 buffer
->read_ahead
= c
;
455 /* See if the characters of a number token are valid in a name (no
458 name_p (pfile
, string
)
460 const cpp_string
*string
;
464 for (i
= 0; i
< string
->len
; i
++)
465 if (!is_idchar (string
->text
[i
]))
471 /* Parse an identifier, skipping embedded backslash-newlines.
472 Calculate the hash value of the token while parsing, for improved
473 performance. The hashing algorithm *must* match cpp_lookup(). */
475 static cpp_hashnode
*
476 parse_identifier (pfile
, c
)
480 cpp_hashnode
*result
;
481 cpp_buffer
*buffer
= pfile
->buffer
;
482 unsigned char *dest
, *limit
;
483 unsigned int r
= 0, saw_dollar
= 0;
485 dest
= POOL_FRONT (&pfile
->ident_pool
);
486 limit
= POOL_LIMIT (&pfile
->ident_pool
);
492 /* Need room for terminating null. */
493 if (dest
+ 1 >= limit
)
494 limit
= _cpp_next_chunk (&pfile
->ident_pool
, 0, &dest
);
503 if (buffer
->cur
== buffer
->rlimit
)
508 while (is_idchar (c
));
510 /* Potential escaped newline? */
511 if (c
!= '?' && c
!= '\\')
513 c
= skip_escaped_newlines (buffer
, c
);
515 while (is_idchar (c
));
517 /* Remember the next character. */
518 buffer
->read_ahead
= c
;
520 /* $ is not a identifier character in the standard, but is commonly
521 accepted as an extension. Don't warn about it in skipped
522 conditional blocks. */
523 if (saw_dollar
&& CPP_PEDANTIC (pfile
) && ! pfile
->skipping
)
524 cpp_pedwarn (pfile
, "'$' character(s) in identifier");
526 /* Identifiers are null-terminated. */
529 /* This routine commits the memory if necessary. */
530 result
= _cpp_lookup_with_hash (pfile
,
531 dest
- POOL_FRONT (&pfile
->ident_pool
), r
);
533 /* Some identifiers require diagnostics when lexed. */
534 if (result
->flags
& NODE_DIAGNOSTIC
&& !pfile
->skipping
)
536 /* It is allowed to poison the same identifier twice. */
537 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
538 cpp_error (pfile
, "attempt to use poisoned \"%s\"",
541 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
542 replacement list of a variadic macro. */
543 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
544 && !pfile
->state
.va_args_ok
)
545 cpp_pedwarn (pfile
, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
551 /* Parse a number, skipping embedded backslash-newlines. */
553 parse_number (pfile
, number
, c
, leading_period
)
559 cpp_buffer
*buffer
= pfile
->buffer
;
560 cpp_pool
*pool
= &pfile
->ident_pool
;
561 unsigned char *dest
, *limit
;
563 dest
= POOL_FRONT (pool
);
564 limit
= POOL_LIMIT (pool
);
566 /* Place a leading period. */
570 limit
= _cpp_next_chunk (pool
, 0, &dest
);
578 /* Need room for terminating null. */
579 if (dest
+ 1 >= limit
)
580 limit
= _cpp_next_chunk (pool
, 0, &dest
);
584 if (buffer
->cur
== buffer
->rlimit
)
589 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
591 /* Potential escaped newline? */
592 if (c
!= '?' && c
!= '\\')
594 c
= skip_escaped_newlines (buffer
, c
);
596 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
598 /* Remember the next character. */
599 buffer
->read_ahead
= c
;
601 /* Null-terminate the number. */
604 number
->text
= POOL_FRONT (pool
);
605 number
->len
= dest
- number
->text
;
606 POOL_COMMIT (pool
, number
->len
+ 1);
609 /* Subroutine of parse_string. Emits error for unterminated strings. */
611 unterminated (pfile
, term
)
615 cpp_error (pfile
, "missing terminating %c character", term
);
617 if (term
== '\"' && pfile
->mlstring_pos
.line
618 && pfile
->mlstring_pos
.line
!= pfile
->lexer_pos
.line
)
620 cpp_error_with_line (pfile
, pfile
->mlstring_pos
.line
,
621 pfile
->mlstring_pos
.col
,
622 "possible start of unterminated string literal");
623 pfile
->mlstring_pos
.line
= 0;
627 /* Subroutine of parse_string. */
629 unescaped_terminator_p (pfile
, dest
)
631 const unsigned char *dest
;
633 const unsigned char *start
, *temp
;
635 /* In #include-style directives, terminators are not escapeable. */
636 if (pfile
->state
.angled_headers
)
639 start
= POOL_FRONT (&pfile
->ident_pool
);
641 /* An odd number of consecutive backslashes represents an escaped
643 for (temp
= dest
; temp
> start
&& temp
[-1] == '\\'; temp
--)
646 return ((dest
- temp
) & 1) == 0;
649 /* Parses a string, character constant, or angle-bracketed header file
650 name. Handles embedded trigraphs and escaped newlines. The stored
651 string is guaranteed NUL-terminated, but it is not guaranteed that
652 this is the first NUL since embedded NULs are preserved.
654 Multi-line strings are allowed, but they are deprecated. */
656 parse_string (pfile
, token
, terminator
)
659 cppchar_t terminator
;
661 cpp_buffer
*buffer
= pfile
->buffer
;
662 cpp_pool
*pool
= &pfile
->ident_pool
;
663 unsigned char *dest
, *limit
;
665 unsigned int nulls
= 0;
667 dest
= POOL_FRONT (pool
);
668 limit
= POOL_LIMIT (pool
);
672 if (buffer
->cur
== buffer
->rlimit
)
678 /* We need space for the terminating NUL. */
680 limit
= _cpp_next_chunk (pool
, 0, &dest
);
684 unterminated (pfile
, terminator
);
688 /* Handle trigraphs, escaped newlines etc. */
689 if (c
== '?' || c
== '\\')
690 c
= skip_escaped_newlines (buffer
, c
);
692 if (c
== terminator
&& unescaped_terminator_p (pfile
, dest
))
697 else if (is_vspace (c
))
699 /* In assembly language, silently terminate string and
700 character literals at end of line. This is a kludge
701 around not knowing where comments are. */
702 if (CPP_OPTION (pfile
, lang
) == CLK_ASM
&& terminator
!= '>')
705 /* Character constants and header names may not extend over
706 multiple lines. In Standard C, neither may strings.
707 Unfortunately, we accept multiline strings as an
708 extension, except in #include family directives. */
709 if (terminator
!= '"' || pfile
->state
.angled_headers
)
711 unterminated (pfile
, terminator
);
715 cpp_pedwarn (pfile
, "multi-line string literals are deprecated");
716 if (pfile
->mlstring_pos
.line
== 0)
717 pfile
->mlstring_pos
= pfile
->lexer_pos
;
719 c
= handle_newline (buffer
, c
);
726 cpp_warning (pfile
, "null character(s) preserved in literal");
732 /* Remember the next character. */
733 buffer
->read_ahead
= c
;
736 token
->val
.str
.text
= POOL_FRONT (pool
);
737 token
->val
.str
.len
= dest
- token
->val
.str
.text
;
738 POOL_COMMIT (pool
, token
->val
.str
.len
+ 1);
741 /* The stored comment includes the comment start and any terminator. */
743 save_comment (pfile
, token
, from
)
746 const unsigned char *from
;
748 unsigned char *buffer
;
751 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
752 /* C++ comments probably (not definitely) have moved past a new
753 line, which we don't want to save in the comment. */
754 if (pfile
->buffer
->read_ahead
!= EOF
)
756 buffer
= _cpp_pool_alloc (&pfile
->ident_pool
, len
);
758 token
->type
= CPP_COMMENT
;
759 token
->val
.str
.len
= len
;
760 token
->val
.str
.text
= buffer
;
763 memcpy (buffer
+ 1, from
, len
- 1);
766 /* Subroutine of lex_token to handle '%'. A little tricky, since we
767 want to avoid stepping back when lexing %:%X. */
769 lex_percent (buffer
, result
)
775 result
->type
= CPP_MOD
;
776 /* Parsing %:%X could leave an extra character. */
777 if (buffer
->extra_char
== EOF
)
778 c
= get_effective_char (buffer
);
781 c
= buffer
->read_ahead
= buffer
->extra_char
;
782 buffer
->extra_char
= EOF
;
786 ACCEPT_CHAR (CPP_MOD_EQ
);
787 else if (CPP_OPTION (buffer
->pfile
, digraphs
))
791 result
->flags
|= DIGRAPH
;
792 ACCEPT_CHAR (CPP_HASH
);
793 if (get_effective_char (buffer
) == '%')
795 buffer
->extra_char
= get_effective_char (buffer
);
796 if (buffer
->extra_char
== ':')
798 buffer
->extra_char
= EOF
;
799 ACCEPT_CHAR (CPP_PASTE
);
802 /* We'll catch the extra_char when we're called back. */
803 buffer
->read_ahead
= '%';
808 result
->flags
|= DIGRAPH
;
809 ACCEPT_CHAR (CPP_CLOSE_BRACE
);
814 /* Subroutine of lex_token to handle '.'. This is tricky, since we
815 want to avoid stepping back when lexing '...' or '.123'. In the
816 latter case we should also set a flag for parse_number. */
818 lex_dot (pfile
, result
)
822 cpp_buffer
*buffer
= pfile
->buffer
;
825 /* Parsing ..X could leave an extra character. */
826 if (buffer
->extra_char
== EOF
)
827 c
= get_effective_char (buffer
);
830 c
= buffer
->read_ahead
= buffer
->extra_char
;
831 buffer
->extra_char
= EOF
;
834 /* All known character sets have 0...9 contiguous. */
835 if (c
>= '0' && c
<= '9')
837 result
->type
= CPP_NUMBER
;
838 parse_number (pfile
, &result
->val
.str
, c
, 1);
842 result
->type
= CPP_DOT
;
845 buffer
->extra_char
= get_effective_char (buffer
);
846 if (buffer
->extra_char
== '.')
848 buffer
->extra_char
= EOF
;
849 ACCEPT_CHAR (CPP_ELLIPSIS
);
852 /* We'll catch the extra_char when we're called back. */
853 buffer
->read_ahead
= '.';
855 else if (c
== '*' && CPP_OPTION (pfile
, cplusplus
))
856 ACCEPT_CHAR (CPP_DOT_STAR
);
861 _cpp_lex_token (pfile
, result
)
867 const unsigned char *comment_start
;
871 bol
= pfile
->state
.next_bol
;
873 buffer
= pfile
->buffer
;
874 pfile
->state
.next_bol
= 0;
875 result
->flags
= buffer
->saved_flags
;
876 buffer
->saved_flags
= 0;
878 pfile
->lexer_pos
.line
= buffer
->lineno
;
880 pfile
->lexer_pos
.col
= CPP_BUF_COLUMN (buffer
, buffer
->cur
);
882 c
= buffer
->read_ahead
;
883 if (c
== EOF
&& buffer
->cur
< buffer
->rlimit
)
886 pfile
->lexer_pos
.col
++;
890 buffer
->read_ahead
= EOF
;
894 /* Non-empty files should end in a newline. Checking "bol" too
895 prevents multiple warnings when hitting the EOF more than
896 once, like in a directive. Don't warn for command line and
898 if (pfile
->lexer_pos
.col
!= 0 && !bol
&& !buffer
->from_stage3
)
899 cpp_pedwarn (pfile
, "no newline at end of file");
900 pfile
->state
.next_bol
= 1;
901 pfile
->skipping
= 0; /* In case missing #endif. */
902 result
->type
= CPP_EOF
;
903 /* Don't do MI optimisation. */
906 case ' ': case '\t': case '\f': case '\v': case '\0':
907 skip_whitespace (pfile
, c
);
908 result
->flags
|= PREV_WHITE
;
911 case '\n': case '\r':
912 if (!pfile
->state
.in_directive
)
914 handle_newline (buffer
, c
);
916 pfile
->lexer_pos
.output_line
= buffer
->lineno
;
917 /* This is a new line, so clear any white space flag.
918 Newlines in arguments are white space (6.10.3.10);
919 parse_arg takes care of that. */
920 result
->flags
&= ~(PREV_WHITE
| AVOID_LPASTE
);
924 /* Don't let directives spill over to the next line. */
925 buffer
->read_ahead
= c
;
926 pfile
->state
.next_bol
= 1;
927 result
->type
= CPP_EOF
;
928 /* Don't break; pfile->skipping might be true. */
933 /* These could start an escaped newline, or '?' a trigraph. Let
934 skip_escaped_newlines do all the work. */
936 unsigned int lineno
= buffer
->lineno
;
938 c
= skip_escaped_newlines (buffer
, c
);
939 if (lineno
!= buffer
->lineno
)
940 /* We had at least one escaped newline of some sort, and the
941 next character is in buffer->read_ahead. Update the
942 token's line and column. */
945 /* We are either the original '?' or '\\', or a trigraph. */
946 result
->type
= CPP_QUERY
;
947 buffer
->read_ahead
= EOF
;
955 case '0': case '1': case '2': case '3': case '4':
956 case '5': case '6': case '7': case '8': case '9':
957 result
->type
= CPP_NUMBER
;
958 parse_number (pfile
, &result
->val
.str
, c
, 0);
962 if (!CPP_OPTION (pfile
, dollars_in_ident
))
964 /* Fall through... */
967 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
968 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
969 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
970 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
972 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
973 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
974 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
975 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
977 result
->type
= CPP_NAME
;
978 result
->val
.node
= parse_identifier (pfile
, c
);
980 /* 'L' may introduce wide characters or strings. */
981 if (result
->val
.node
== pfile
->spec_nodes
.n_L
)
983 c
= buffer
->read_ahead
; /* For make_string. */
984 if (c
== '\'' || c
== '"')
986 ACCEPT_CHAR (c
== '"' ? CPP_WSTRING
: CPP_WCHAR
);
990 /* Convert named operators to their proper types. */
991 else if (result
->val
.node
->flags
& NODE_OPERATOR
)
993 result
->flags
|= NAMED_OP
;
994 result
->type
= result
->val
.node
->value
.operator;
1000 result
->type
= c
== '"' ? CPP_STRING
: CPP_CHAR
;
1002 parse_string (pfile
, result
, c
);
1006 /* A potential block or line comment. */
1007 comment_start
= buffer
->cur
;
1008 result
->type
= CPP_DIV
;
1009 c
= get_effective_char (buffer
);
1011 ACCEPT_CHAR (CPP_DIV_EQ
);
1012 if (c
!= '/' && c
!= '*')
1014 if (buffer
->from_stage3
)
1019 if (skip_block_comment (pfile
))
1020 cpp_error_with_line (pfile
, pfile
->lexer_pos
.line
,
1021 pfile
->lexer_pos
.col
,
1022 "unterminated comment");
1026 if (!CPP_OPTION (pfile
, cplusplus_comments
)
1027 && !CPP_IN_SYSTEM_HEADER (pfile
))
1030 /* Warn about comments only if pedantically GNUC89, and not
1031 in system headers. */
1032 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
1033 && ! buffer
->warned_cplusplus_comments
)
1036 "C++ style comments are not allowed in ISO C89");
1038 "(this will be reported only once per input file)");
1039 buffer
->warned_cplusplus_comments
= 1;
1042 /* Skip_line_comment updates buffer->read_ahead. */
1043 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
1044 cpp_warning_with_line (pfile
, pfile
->lexer_pos
.line
,
1045 pfile
->lexer_pos
.col
,
1046 "multi-line comment");
1049 /* Skipping the comment has updated buffer->read_ahead. */
1050 if (!pfile
->state
.save_comments
)
1052 result
->flags
|= PREV_WHITE
;
1056 /* Save the comment as a token in its own right. */
1057 save_comment (pfile
, result
, comment_start
);
1058 /* Don't do MI optimisation. */
1062 if (pfile
->state
.angled_headers
)
1064 result
->type
= CPP_HEADER_NAME
;
1065 c
= '>'; /* terminator. */
1069 result
->type
= CPP_LESS
;
1070 c
= get_effective_char (buffer
);
1072 ACCEPT_CHAR (CPP_LESS_EQ
);
1075 ACCEPT_CHAR (CPP_LSHIFT
);
1076 if (get_effective_char (buffer
) == '=')
1077 ACCEPT_CHAR (CPP_LSHIFT_EQ
);
1079 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1081 ACCEPT_CHAR (CPP_MIN
);
1082 if (get_effective_char (buffer
) == '=')
1083 ACCEPT_CHAR (CPP_MIN_EQ
);
1085 else if (c
== ':' && CPP_OPTION (pfile
, digraphs
))
1087 ACCEPT_CHAR (CPP_OPEN_SQUARE
);
1088 result
->flags
|= DIGRAPH
;
1090 else if (c
== '%' && CPP_OPTION (pfile
, digraphs
))
1092 ACCEPT_CHAR (CPP_OPEN_BRACE
);
1093 result
->flags
|= DIGRAPH
;
1098 result
->type
= CPP_GREATER
;
1099 c
= get_effective_char (buffer
);
1101 ACCEPT_CHAR (CPP_GREATER_EQ
);
1104 ACCEPT_CHAR (CPP_RSHIFT
);
1105 if (get_effective_char (buffer
) == '=')
1106 ACCEPT_CHAR (CPP_RSHIFT_EQ
);
1108 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1110 ACCEPT_CHAR (CPP_MAX
);
1111 if (get_effective_char (buffer
) == '=')
1112 ACCEPT_CHAR (CPP_MAX_EQ
);
1117 lex_percent (buffer
, result
);
1118 if (result
->type
== CPP_HASH
)
1123 lex_dot (pfile
, result
);
1127 result
->type
= CPP_PLUS
;
1128 c
= get_effective_char (buffer
);
1130 ACCEPT_CHAR (CPP_PLUS_EQ
);
1132 ACCEPT_CHAR (CPP_PLUS_PLUS
);
1136 result
->type
= CPP_MINUS
;
1137 c
= get_effective_char (buffer
);
1140 ACCEPT_CHAR (CPP_DEREF
);
1141 if (CPP_OPTION (pfile
, cplusplus
)
1142 && get_effective_char (buffer
) == '*')
1143 ACCEPT_CHAR (CPP_DEREF_STAR
);
1146 ACCEPT_CHAR (CPP_MINUS_EQ
);
1148 ACCEPT_CHAR (CPP_MINUS_MINUS
);
1152 result
->type
= CPP_MULT
;
1153 if (get_effective_char (buffer
) == '=')
1154 ACCEPT_CHAR (CPP_MULT_EQ
);
1158 result
->type
= CPP_EQ
;
1159 if (get_effective_char (buffer
) == '=')
1160 ACCEPT_CHAR (CPP_EQ_EQ
);
1164 result
->type
= CPP_NOT
;
1165 if (get_effective_char (buffer
) == '=')
1166 ACCEPT_CHAR (CPP_NOT_EQ
);
1170 result
->type
= CPP_AND
;
1171 c
= get_effective_char (buffer
);
1173 ACCEPT_CHAR (CPP_AND_EQ
);
1175 ACCEPT_CHAR (CPP_AND_AND
);
1179 c
= buffer
->extra_char
; /* Can be set by error condition below. */
1182 buffer
->read_ahead
= c
;
1183 buffer
->extra_char
= EOF
;
1186 c
= get_effective_char (buffer
);
1190 ACCEPT_CHAR (CPP_PASTE
);
1194 result
->type
= CPP_HASH
;
1198 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1199 tokens within the list of arguments that would otherwise act
1200 as preprocessing directives, the behavior is undefined.
1202 This implementation will report a hard error, terminate the
1203 macro invocation, and proceed to process the directive. */
1204 if (pfile
->state
.parsing_args
)
1206 if (pfile
->state
.parsing_args
== 2)
1208 "directives may not be used inside a macro argument");
1210 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1211 buffer
->extra_char
= buffer
->read_ahead
;
1212 buffer
->read_ahead
= '#';
1213 pfile
->state
.next_bol
= 1;
1214 result
->type
= CPP_EOF
;
1216 /* Get whitespace right - newline_in_args sets it. */
1217 if (pfile
->lexer_pos
.col
== 1)
1218 result
->flags
&= ~(PREV_WHITE
| AVOID_LPASTE
);
1222 /* This is the hash introducing a directive. */
1223 if (_cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
1224 goto done_directive
; /* bol still 1. */
1225 /* This is in fact an assembler #. */
1230 result
->type
= CPP_OR
;
1231 c
= get_effective_char (buffer
);
1233 ACCEPT_CHAR (CPP_OR_EQ
);
1235 ACCEPT_CHAR (CPP_OR_OR
);
1239 result
->type
= CPP_XOR
;
1240 if (get_effective_char (buffer
) == '=')
1241 ACCEPT_CHAR (CPP_XOR_EQ
);
1245 result
->type
= CPP_COLON
;
1246 c
= get_effective_char (buffer
);
1247 if (c
== ':' && CPP_OPTION (pfile
, cplusplus
))
1248 ACCEPT_CHAR (CPP_SCOPE
);
1249 else if (c
== '>' && CPP_OPTION (pfile
, digraphs
))
1251 result
->flags
|= DIGRAPH
;
1252 ACCEPT_CHAR (CPP_CLOSE_SQUARE
);
1256 case '~': result
->type
= CPP_COMPL
; break;
1257 case ',': result
->type
= CPP_COMMA
; break;
1258 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1259 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1260 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1261 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1262 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1263 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1264 case ';': result
->type
= CPP_SEMICOLON
; break;
1266 /* @ is a punctuator in Objective C. */
1267 case '@': result
->type
= CPP_ATSIGN
; break;
1271 result
->type
= CPP_OTHER
;
1276 if (pfile
->skipping
)
1279 /* If not in a directive, this token invalidates controlling macros. */
1280 if (!pfile
->state
.in_directive
)
1281 pfile
->mi_state
= MI_FAILED
;
1284 /* An upper bound on the number of bytes needed to spell a token,
1285 including preceding whitespace. */
1287 cpp_token_len (token
)
1288 const cpp_token
*token
;
1292 switch (TOKEN_SPELL (token
))
1294 default: len
= 0; break;
1295 case SPELL_STRING
: len
= token
->val
.str
.len
; break;
1296 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
); break;
1298 /* 1 for whitespace, 4 for comment delimeters. */
1302 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1303 already contain the enough space to hold the token's spelling.
1304 Returns a pointer to the character after the last character
1307 cpp_spell_token (pfile
, token
, buffer
)
1308 cpp_reader
*pfile
; /* Would be nice to be rid of this... */
1309 const cpp_token
*token
;
1310 unsigned char *buffer
;
1312 switch (TOKEN_SPELL (token
))
1314 case SPELL_OPERATOR
:
1316 const unsigned char *spelling
;
1319 if (token
->flags
& DIGRAPH
)
1321 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1322 else if (token
->flags
& NAMED_OP
)
1325 spelling
= TOKEN_NAME (token
);
1327 while ((c
= *spelling
++) != '\0')
1334 memcpy (buffer
, NODE_NAME (token
->val
.node
), NODE_LEN (token
->val
.node
));
1335 buffer
+= NODE_LEN (token
->val
.node
);
1340 int left
, right
, tag
;
1341 switch (token
->type
)
1343 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1344 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1345 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1346 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1347 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1348 default: left
= '\0'; right
= '\0'; tag
= '\0'; break;
1350 if (tag
) *buffer
++ = tag
;
1351 if (left
) *buffer
++ = left
;
1352 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1353 buffer
+= token
->val
.str
.len
;
1354 if (right
) *buffer
++ = right
;
1359 *buffer
++ = token
->val
.c
;
1363 cpp_ice (pfile
, "Unspellable token %s", TOKEN_NAME (token
));
1370 /* Returns a token as a null-terminated string. The string is
1371 temporary, and automatically freed later. Useful for diagnostics. */
1373 cpp_token_as_text (pfile
, token
)
1375 const cpp_token
*token
;
1377 unsigned int len
= cpp_token_len (token
);
1378 unsigned char *start
= _cpp_pool_alloc (&pfile
->ident_pool
, len
), *end
;
1380 end
= cpp_spell_token (pfile
, token
, start
);
1386 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1388 cpp_type2name (type
)
1389 enum cpp_ttype type
;
1391 return (const char *) token_spellings
[type
].name
;
1394 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1395 for efficiency - to avoid double-buffering. Also, outputs a space
1396 if PREV_WHITE is flagged. */
1398 cpp_output_token (token
, fp
)
1399 const cpp_token
*token
;
1402 if (token
->flags
& PREV_WHITE
)
1405 switch (TOKEN_SPELL (token
))
1407 case SPELL_OPERATOR
:
1409 const unsigned char *spelling
;
1411 if (token
->flags
& DIGRAPH
)
1413 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1414 else if (token
->flags
& NAMED_OP
)
1417 spelling
= TOKEN_NAME (token
);
1419 ufputs (spelling
, fp
);
1425 ufputs (NODE_NAME (token
->val
.node
), fp
);
1430 int left
, right
, tag
;
1431 switch (token
->type
)
1433 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1434 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1435 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1436 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1437 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1438 default: left
= '\0'; right
= '\0'; tag
= '\0'; break;
1440 if (tag
) putc (tag
, fp
);
1441 if (left
) putc (left
, fp
);
1442 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1443 if (right
) putc (right
, fp
);
1448 putc (token
->val
.c
, fp
);
1452 /* An error, most probably. */
1457 /* Compare two tokens. */
1459 _cpp_equiv_tokens (a
, b
)
1460 const cpp_token
*a
, *b
;
1462 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1463 switch (TOKEN_SPELL (a
))
1465 default: /* Keep compiler happy. */
1466 case SPELL_OPERATOR
:
1469 return a
->val
.c
== b
->val
.c
; /* Character. */
1471 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1473 return a
->val
.node
== b
->val
.node
;
1475 return (a
->val
.str
.len
== b
->val
.str
.len
1476 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1483 /* Determine whether two tokens can be pasted together, and if so,
1484 what the resulting token is. Returns CPP_EOF if the tokens cannot
1485 be pasted, or the appropriate type for the merged token if they
1488 cpp_can_paste (pfile
, token1
, token2
, digraph
)
1490 const cpp_token
*token1
, *token2
;
1493 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1494 int cxx
= CPP_OPTION (pfile
, cplusplus
);
1496 /* Treat named operators as if they were ordinary NAMEs. */
1497 if (token1
->flags
& NAMED_OP
)
1499 if (token2
->flags
& NAMED_OP
)
1502 if ((int) a
<= (int) CPP_LAST_EQ
&& b
== CPP_EQ
)
1503 return (enum cpp_ttype
) ((int) a
+ ((int) CPP_EQ_EQ
- (int) CPP_EQ
));
1508 if (b
== a
) return CPP_RSHIFT
;
1509 if (b
== CPP_QUERY
&& cxx
) return CPP_MAX
;
1510 if (b
== CPP_GREATER_EQ
) return CPP_RSHIFT_EQ
;
1513 if (b
== a
) return CPP_LSHIFT
;
1514 if (b
== CPP_QUERY
&& cxx
) return CPP_MIN
;
1515 if (b
== CPP_LESS_EQ
) return CPP_LSHIFT_EQ
;
1516 if (CPP_OPTION (pfile
, digraphs
))
1519 {*digraph
= 1; return CPP_OPEN_SQUARE
;} /* <: digraph */
1521 {*digraph
= 1; return CPP_OPEN_BRACE
;} /* <% digraph */
1525 case CPP_PLUS
: if (b
== a
) return CPP_PLUS_PLUS
; break;
1526 case CPP_AND
: if (b
== a
) return CPP_AND_AND
; break;
1527 case CPP_OR
: if (b
== a
) return CPP_OR_OR
; break;
1530 if (b
== a
) return CPP_MINUS_MINUS
;
1531 if (b
== CPP_GREATER
) return CPP_DEREF
;
1534 if (b
== a
&& cxx
) return CPP_SCOPE
;
1535 if (b
== CPP_GREATER
&& CPP_OPTION (pfile
, digraphs
))
1536 {*digraph
= 1; return CPP_CLOSE_SQUARE
;} /* :> digraph */
1540 if (CPP_OPTION (pfile
, digraphs
))
1542 if (b
== CPP_GREATER
)
1543 {*digraph
= 1; return CPP_CLOSE_BRACE
;} /* %> digraph */
1545 {*digraph
= 1; return CPP_HASH
;} /* %: digraph */
1549 if (b
== CPP_MULT
&& cxx
) return CPP_DEREF_STAR
;
1552 if (b
== CPP_MULT
&& cxx
) return CPP_DOT_STAR
;
1553 if (b
== CPP_NUMBER
) return CPP_NUMBER
;
1557 if (b
== a
&& (token1
->flags
& DIGRAPH
) == (token2
->flags
& DIGRAPH
))
1559 {*digraph
= (token1
->flags
& DIGRAPH
); return CPP_PASTE
;}
1563 if (b
== CPP_NAME
) return CPP_NAME
;
1565 && name_p (pfile
, &token2
->val
.str
)) return CPP_NAME
;
1567 && token1
->val
.node
== pfile
->spec_nodes
.n_L
) return CPP_WCHAR
;
1569 && token1
->val
.node
== pfile
->spec_nodes
.n_L
) return CPP_WSTRING
;
1573 if (b
== CPP_NUMBER
) return CPP_NUMBER
;
1574 if (b
== CPP_NAME
) return CPP_NUMBER
;
1575 if (b
== CPP_DOT
) return CPP_NUMBER
;
1576 /* Numbers cannot have length zero, so this is safe. */
1577 if ((b
== CPP_PLUS
|| b
== CPP_MINUS
)
1578 && VALID_SIGN ('+', token1
->val
.str
.text
[token1
->val
.str
.len
- 1]))
1589 /* Returns nonzero if a space should be inserted to avoid an
1590 accidental token paste for output. For simplicity, it is
1591 conservative, and occasionally advises a space where one is not
1592 needed, e.g. "." and ".2". */
1595 cpp_avoid_paste (pfile
, token1
, token2
)
1597 const cpp_token
*token1
, *token2
;
1599 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1602 if (token1
->flags
& NAMED_OP
)
1604 if (token2
->flags
& NAMED_OP
)
1608 if (token2
->flags
& DIGRAPH
)
1609 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1610 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1611 c
= token_spellings
[b
].name
[0];
1613 /* Quickly get everything that can paste with an '='. */
1614 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1619 case CPP_GREATER
: return c
== '>' || c
== '?';
1620 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1621 case CPP_PLUS
: return c
== '+';
1622 case CPP_MINUS
: return c
== '-' || c
== '>';
1623 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1624 case CPP_MOD
: return c
== ':' || c
== '>';
1625 case CPP_AND
: return c
== '&';
1626 case CPP_OR
: return c
== '|';
1627 case CPP_COLON
: return c
== ':' || c
== '>';
1628 case CPP_DEREF
: return c
== '*';
1629 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1630 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1631 case CPP_NAME
: return ((b
== CPP_NUMBER
1632 && name_p (pfile
, &token2
->val
.str
))
1634 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1635 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1636 || c
== '.' || c
== '+' || c
== '-');
1637 case CPP_OTHER
: return (CPP_OPTION (pfile
, objc
)
1638 && token1
->val
.c
== '@'
1639 && (b
== CPP_NAME
|| b
== CPP_STRING
));
1646 /* Output all the remaining tokens on the current line, and a newline
1647 character, to FP. Leading whitespace is removed. */
1649 cpp_output_line (pfile
, fp
)
1655 cpp_get_token (pfile
, &token
);
1656 token
.flags
&= ~PREV_WHITE
;
1657 while (token
.type
!= CPP_EOF
)
1659 cpp_output_token (&token
, fp
);
1660 cpp_get_token (pfile
, &token
);
1666 /* Returns the value of a hexadecimal digit. */
1671 if (c
>= 'a' && c
<= 'f')
1672 return c
- 'a' + 10;
1673 if (c
>= 'A' && c
<= 'F')
1674 return c
- 'A' + 10;
1675 if (c
>= '0' && c
<= '9')
1680 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
1682 [lex.charset]: The character designated by the universal character
1683 name \UNNNNNNNN is that character whose character short name in
1684 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1685 universal character name \uNNNN is that character whose character
1686 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1687 for a universal character name is less than 0x20 or in the range
1688 0x7F-0x9F (inclusive), or if the universal character name
1689 designates a character in the basic source character set, then the
1690 program is ill-formed.
1692 We assume that wchar_t is Unicode, so we don't need to do any
1693 mapping. Is this ever wrong? */
1696 read_ucs (pfile
, pstr
, limit
, length
)
1698 const unsigned char **pstr
;
1699 const unsigned char *limit
;
1700 unsigned int length
;
1702 const unsigned char *p
= *pstr
;
1703 unsigned int c
, code
= 0;
1705 for (; length
; --length
)
1709 cpp_error (pfile
, "incomplete universal-character-name");
1716 code
= (code
<< 4) + hex_digit_value (c
);
1722 "non-hex digit '%c' in universal-character-name", c
);
1728 #ifdef TARGET_EBCDIC
1729 cpp_error (pfile
, "universal-character-name on EBCDIC target");
1730 code
= 0x3f; /* EBCDIC invalid character */
1732 if (code
> 0x9f && !(code
& 0x80000000))
1733 ; /* True extended character, OK. */
1734 else if (code
>= 0x20 && code
< 0x7f)
1736 /* ASCII printable character. The C character set consists of all of
1737 these except $, @ and `. We use hex escapes so that this also
1738 works with EBCDIC hosts. */
1739 if (code
!= 0x24 && code
!= 0x40 && code
!= 0x60)
1740 cpp_error (pfile
, "universal-character-name used for '%c'", code
);
1743 cpp_error (pfile
, "invalid universal-character-name");
1750 /* Interpret an escape sequence, and return its value. PSTR points to
1751 the input pointer, which is just after the backslash. LIMIT is how
1752 much text we have. MASK is the precision for the target type (char
1753 or wchar_t). TRADITIONAL, if true, does not interpret escapes that
1754 did not exist in traditional C. */
1757 parse_escape (pfile
, pstr
, limit
, mask
, traditional
)
1759 const unsigned char **pstr
;
1760 const unsigned char *limit
;
1765 const unsigned char *str
= *pstr
;
1766 unsigned int c
= *str
++;
1770 case '\\': case '\'': case '"': case '?': break;
1771 case 'b': c
= TARGET_BS
; break;
1772 case 'f': c
= TARGET_FF
; break;
1773 case 'n': c
= TARGET_NEWLINE
; break;
1774 case 'r': c
= TARGET_CR
; break;
1775 case 't': c
= TARGET_TAB
; break;
1776 case 'v': c
= TARGET_VT
; break;
1778 case '(': case '{': case '[': case '%':
1779 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1780 '\%' is used to prevent SCCS from getting confused. */
1781 unknown
= CPP_PEDANTIC (pfile
);
1785 if (CPP_WTRADITIONAL (pfile
))
1786 cpp_warning (pfile
, "the meaning of '\\a' varies with -traditional");
1792 if (CPP_PEDANTIC (pfile
))
1793 cpp_pedwarn (pfile
, "non-ISO-standard escape sequence, '\\%c'", c
);
1797 /* Warnings and support checks handled by read_ucs(). */
1799 if (CPP_OPTION (pfile
, cplusplus
) || CPP_OPTION (pfile
, c99
))
1801 if (CPP_WTRADITIONAL (pfile
))
1803 "the meaning of '\\%c' varies with -traditional", c
);
1804 c
= read_ucs (pfile
, &str
, limit
, c
== 'u' ? 4 : 8);
1811 if (CPP_WTRADITIONAL (pfile
))
1812 cpp_warning (pfile
, "the meaning of '\\x' varies with -traditional");
1816 unsigned int i
= 0, overflow
= 0;
1817 int digits_found
= 0;
1825 overflow
|= i
^ (i
<< 4 >> 4);
1826 i
= (i
<< 4) + hex_digit_value (c
);
1831 cpp_error (pfile
, "\\x used with no following hex digits");
1833 if (overflow
| (i
!= (i
& mask
)))
1835 cpp_pedwarn (pfile
, "hex escape sequence out of range");
1842 case '0': case '1': case '2': case '3':
1843 case '4': case '5': case '6': case '7':
1845 unsigned int i
= c
- '0';
1848 while (str
< limit
&& ++count
< 3)
1851 if (c
< '0' || c
> '7')
1854 i
= (i
<< 3) + c
- '0';
1857 if (i
!= (i
& mask
))
1859 cpp_pedwarn (pfile
, "octal escape sequence out of range");
1874 cpp_pedwarn (pfile
, "unknown escape sequence '\\%c'", c
);
1876 cpp_pedwarn (pfile
, "unknown escape sequence: '\\%03o'", c
);
1883 #ifndef MAX_CHAR_TYPE_SIZE
1884 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1887 #ifndef MAX_WCHAR_TYPE_SIZE
1888 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1891 /* Interpret a (possibly wide) character constant in TOKEN.
1892 WARN_MULTI warns about multi-character charconsts, if not
1893 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1894 that did not exist in traditional C. PCHARS_SEEN points to a
1895 variable that is filled in with the number of characters seen. */
1897 cpp_interpret_charconst (pfile
, token
, warn_multi
, traditional
, pchars_seen
)
1899 const cpp_token
*token
;
1902 unsigned int *pchars_seen
;
1904 const unsigned char *str
= token
->val
.str
.text
;
1905 const unsigned char *limit
= str
+ token
->val
.str
.len
;
1906 unsigned int chars_seen
= 0;
1907 unsigned int width
, max_chars
, c
;
1908 HOST_WIDE_INT result
= 0, mask
;
1910 #ifdef MULTIBYTE_CHARS
1911 (void) local_mbtowc (NULL
, NULL
, 0);
1914 /* Width in bits. */
1915 if (token
->type
== CPP_CHAR
)
1916 width
= MAX_CHAR_TYPE_SIZE
;
1918 width
= MAX_WCHAR_TYPE_SIZE
;
1920 if (width
< HOST_BITS_PER_WIDE_INT
)
1921 mask
= ((unsigned HOST_WIDE_INT
) 1 << width
) - 1;
1924 max_chars
= HOST_BITS_PER_WIDE_INT
/ width
;
1928 #ifdef MULTIBYTE_CHARS
1932 char_len
= local_mbtowc (&wc
, str
, limit
- str
);
1935 cpp_warning (pfile
, "ignoring invalid multibyte character");
1949 c
= parse_escape (pfile
, &str
, limit
, mask
, traditional
);
1950 if (width
< HOST_BITS_PER_WIDE_INT
&& c
> mask
)
1951 cpp_pedwarn (pfile
, "escape sequence out of range for character");
1954 #ifdef MAP_CHARACTER
1956 c
= MAP_CHARACTER (c
);
1959 /* Merge character into result; ignore excess chars. */
1960 if (++chars_seen
<= max_chars
)
1962 if (width
< HOST_BITS_PER_WIDE_INT
)
1963 result
= (result
<< width
) | (c
& mask
);
1969 if (chars_seen
== 0)
1970 cpp_error (pfile
, "empty character constant");
1971 else if (chars_seen
> max_chars
)
1973 chars_seen
= max_chars
;
1974 cpp_error (pfile
, "character constant too long");
1976 else if (chars_seen
> 1 && !traditional
&& warn_multi
)
1977 cpp_warning (pfile
, "multi-character character constant");
1979 /* If char type is signed, sign-extend the constant. The
1980 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1981 if (token
->type
== CPP_CHAR
&& chars_seen
)
1983 unsigned int nbits
= chars_seen
* width
;
1984 unsigned int mask
= (unsigned int) ~0 >> (HOST_BITS_PER_INT
- nbits
);
1986 if (pfile
->spec_nodes
.n__CHAR_UNSIGNED__
->type
== NT_MACRO
1987 || ((result
>> (nbits
- 1)) & 1) == 0)
1993 *pchars_seen
= chars_seen
;
2009 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2012 chunk_suitable (pool
, chunk
, size
)
2017 /* Being at least twice SIZE means we can use memcpy in
2018 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2020 return (chunk
&& pool
->locked
!= chunk
2021 && (unsigned int) (chunk
->limit
- chunk
->base
) >= size
* 2);
2024 /* Returns the end of the new pool. PTR points to a char in the old
2025 pool, and is updated to point to the same char in the new pool. */
2027 _cpp_next_chunk (pool
, len
, ptr
)
2030 unsigned char **ptr
;
2032 cpp_chunk
*chunk
= pool
->cur
->next
;
2034 /* LEN is the minimum size we want in the new pool. */
2035 len
+= POOL_ROOM (pool
);
2036 if (! chunk_suitable (pool
, chunk
, len
))
2038 chunk
= new_chunk (POOL_SIZE (pool
) * 2 + len
);
2040 chunk
->next
= pool
->cur
->next
;
2041 pool
->cur
->next
= chunk
;
2044 /* Update the pointer before changing chunk's front. */
2046 *ptr
+= chunk
->base
- POOL_FRONT (pool
);
2048 memcpy (chunk
->base
, POOL_FRONT (pool
), POOL_ROOM (pool
));
2049 chunk
->front
= chunk
->base
;
2052 return POOL_LIMIT (pool
);
2059 unsigned char *base
;
2062 size
= POOL_ALIGN (size
, DEFAULT_ALIGNMENT
);
2063 base
= (unsigned char *) xmalloc (size
+ sizeof (cpp_chunk
));
2064 /* Put the chunk descriptor at the end. Then chunk overruns will
2065 cause obvious chaos. */
2066 result
= (cpp_chunk
*) (base
+ size
);
2067 result
->base
= base
;
2068 result
->front
= base
;
2069 result
->limit
= base
+ size
;
2076 _cpp_init_pool (pool
, size
, align
, temp
)
2078 unsigned int size
, align
, temp
;
2081 align
= DEFAULT_ALIGNMENT
;
2082 if (align
& (align
- 1))
2084 pool
->align
= align
;
2085 pool
->cur
= new_chunk (size
);
2089 pool
->cur
->next
= pool
->cur
;
2093 _cpp_lock_pool (pool
)
2096 if (pool
->locks
++ == 0)
2097 pool
->locked
= pool
->cur
;
2101 _cpp_unlock_pool (pool
)
2104 if (--pool
->locks
== 0)
2109 _cpp_free_pool (pool
)
2112 cpp_chunk
*chunk
= pool
->cur
, *next
;
2120 while (chunk
&& chunk
!= pool
->cur
);
2123 /* Reserve LEN bytes from a memory pool. */
2125 _cpp_pool_reserve (pool
, len
)
2129 len
= POOL_ALIGN (len
, pool
->align
);
2130 if (len
> (unsigned int) POOL_ROOM (pool
))
2131 _cpp_next_chunk (pool
, len
, 0);
2133 return POOL_FRONT (pool
);
2136 /* Allocate LEN bytes from a memory pool. */
2138 _cpp_pool_alloc (pool
, len
)
2142 unsigned char *result
= _cpp_pool_reserve (pool
, len
);
2144 POOL_COMMIT (pool
, len
);