1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category
;
68 const unsigned char *name
;
71 const unsigned char *digraph_spellings
[] = {U
"%:", U
"%:%:", U
"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings
[N_TTYPES
] = {TTYPE_TABLE
};
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline
PARAMS ((cpp_buffer
*, cppchar_t
));
84 static cppchar_t skip_escaped_newlines
PARAMS ((cpp_buffer
*, cppchar_t
));
85 static cppchar_t get_effective_char
PARAMS ((cpp_buffer
*));
87 static int skip_block_comment
PARAMS ((cpp_reader
*));
88 static int skip_line_comment
PARAMS ((cpp_reader
*));
89 static void adjust_column
PARAMS ((cpp_reader
*));
90 static void skip_whitespace
PARAMS ((cpp_reader
*, cppchar_t
));
91 static cpp_hashnode
*parse_identifier
PARAMS ((cpp_reader
*, cppchar_t
));
92 static void parse_number
PARAMS ((cpp_reader
*, cpp_string
*, cppchar_t
, int));
93 static int unescaped_terminator_p
PARAMS ((cpp_reader
*, const U_CHAR
*));
94 static void parse_string
PARAMS ((cpp_reader
*, cpp_token
*, cppchar_t
));
95 static void unterminated
PARAMS ((cpp_reader
*, int));
96 static int trigraph_ok
PARAMS ((cpp_reader
*, cppchar_t
));
97 static void save_comment
PARAMS ((cpp_reader
*, cpp_token
*, const U_CHAR
*));
98 static void lex_percent
PARAMS ((cpp_buffer
*, cpp_token
*));
99 static void lex_dot
PARAMS ((cpp_reader
*, cpp_token
*));
100 static int name_p
PARAMS ((cpp_reader
*, const cpp_string
*));
101 static unsigned int parse_escape
PARAMS ((cpp_reader
*, const unsigned char **,
102 const unsigned char *, HOST_WIDE_INT
,
104 static unsigned int read_ucs
PARAMS ((cpp_reader
*, const unsigned char **,
105 const unsigned char *, unsigned int));
107 static cpp_chunk
*new_chunk
PARAMS ((unsigned int));
108 static int chunk_suitable
PARAMS ((cpp_pool
*, cpp_chunk
*, unsigned int));
109 static unsigned int hex_digit_value
PARAMS ((unsigned int));
113 Compares, the token TOKEN to the NUL-terminated string STRING.
114 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
117 cpp_ideq (token
, string
)
118 const cpp_token
*token
;
121 if (token
->type
!= CPP_NAME
)
124 return !ustrcmp (token
->val
.node
->name
, (const U_CHAR
*) string
);
127 /* Call when meeting a newline. Returns the character after the newline
128 (or carriage-return newline combination), or EOF. */
130 handle_newline (buffer
, newline_char
)
132 cppchar_t newline_char
;
134 cppchar_t next
= EOF
;
136 buffer
->col_adjust
= 0;
138 buffer
->line_base
= buffer
->cur
;
140 /* Handle CR-LF and LF-CR combinations, get the next character. */
141 if (buffer
->cur
< buffer
->rlimit
)
143 next
= *buffer
->cur
++;
144 if (next
+ newline_char
== '\r' + '\n')
146 buffer
->line_base
= buffer
->cur
;
147 if (buffer
->cur
< buffer
->rlimit
)
148 next
= *buffer
->cur
++;
154 buffer
->read_ahead
= next
;
158 /* Subroutine of skip_escaped_newlines; called when a trigraph is
159 encountered. It warns if necessary, and returns true if the
160 trigraph should be honoured. FROM_CHAR is the third character of a
161 trigraph, and presumed to be the previous character for position
164 trigraph_ok (pfile
, from_char
)
168 int accept
= CPP_OPTION (pfile
, trigraphs
);
170 /* Don't warn about trigraphs in comments. */
171 if (CPP_OPTION (pfile
, warn_trigraphs
) && !pfile
->state
.lexing_comment
)
173 cpp_buffer
*buffer
= pfile
->buffer
;
175 cpp_warning_with_line (pfile
, buffer
->lineno
, CPP_BUF_COL (buffer
) - 2,
176 "trigraph ??%c converted to %c",
178 (int) _cpp_trigraph_map
[from_char
]);
179 else if (buffer
->cur
!= buffer
->last_Wtrigraphs
)
181 buffer
->last_Wtrigraphs
= buffer
->cur
;
182 cpp_warning_with_line (pfile
, buffer
->lineno
,
183 CPP_BUF_COL (buffer
) - 2,
184 "trigraph ??%c ignored", (int) from_char
);
191 /* Assumes local variables buffer and result. */
192 #define ACCEPT_CHAR(t) \
193 do { result->type = t; buffer->read_ahead = EOF; } while (0)
195 /* When we move to multibyte character sets, add to these something
196 that saves and restores the state of the multibyte conversion
197 library. This probably involves saving and restoring a "cookie".
198 In the case of glibc it is an 8-byte structure, so is not a high
199 overhead operation. In any case, it's out of the fast path. */
200 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
201 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
203 /* Skips any escaped newlines introduced by NEXT, which is either a
204 '?' or a '\\'. Returns the next character, which will also have
205 been placed in buffer->read_ahead. This routine performs
206 preprocessing stages 1 and 2 of the ISO C standard. */
208 skip_escaped_newlines (buffer
, next
)
212 /* Only do this if we apply stages 1 and 2. */
213 if (!buffer
->from_stage3
)
216 const unsigned char *saved_cur
;
221 if (buffer
->cur
== buffer
->rlimit
)
227 next1
= *buffer
->cur
++;
228 if (next1
!= '?' || buffer
->cur
== buffer
->rlimit
)
234 next1
= *buffer
->cur
++;
235 if (!_cpp_trigraph_map
[next1
]
236 || !trigraph_ok (buffer
->pfile
, next1
))
242 /* We have a full trigraph here. */
243 next
= _cpp_trigraph_map
[next1
];
244 if (next
!= '\\' || buffer
->cur
== buffer
->rlimit
)
249 /* We have a backslash, and room for at least one more character. */
253 next1
= *buffer
->cur
++;
254 if (!is_nvspace (next1
))
258 while (buffer
->cur
< buffer
->rlimit
);
260 if (!is_vspace (next1
))
266 if (space
&& !buffer
->pfile
->state
.lexing_comment
)
267 cpp_warning (buffer
->pfile
,
268 "backslash and newline separated by space");
270 next
= handle_newline (buffer
, next1
);
272 cpp_pedwarn (buffer
->pfile
, "backslash-newline at end of file");
274 while (next
== '\\' || next
== '?');
277 buffer
->read_ahead
= next
;
281 /* Obtain the next character, after trigraph conversion and skipping
282 an arbitrary string of escaped newlines. The common case of no
283 trigraphs or escaped newlines falls through quickly. */
285 get_effective_char (buffer
)
288 cppchar_t next
= EOF
;
290 if (buffer
->cur
< buffer
->rlimit
)
292 next
= *buffer
->cur
++;
294 /* '?' can introduce trigraphs (and therefore backslash); '\\'
295 can introduce escaped newlines, which we want to skip, or
296 UCNs, which, depending upon lexer state, we will handle in
298 if (next
== '?' || next
== '\\')
299 next
= skip_escaped_newlines (buffer
, next
);
302 buffer
->read_ahead
= next
;
306 /* Skip a C-style block comment. We find the end of the comment by
307 seeing if an asterisk is before every '/' we encounter. Returns
308 non-zero if comment terminated by EOF, zero otherwise. */
310 skip_block_comment (pfile
)
313 cpp_buffer
*buffer
= pfile
->buffer
;
314 cppchar_t c
= EOF
, prevc
= EOF
;
316 pfile
->state
.lexing_comment
= 1;
317 while (buffer
->cur
!= buffer
->rlimit
)
319 prevc
= c
, c
= *buffer
->cur
++;
322 /* FIXME: For speed, create a new character class of characters
323 of interest inside block comments. */
324 if (c
== '?' || c
== '\\')
325 c
= skip_escaped_newlines (buffer
, c
);
327 /* People like decorating comments with '*', so check for '/'
328 instead for efficiency. */
334 /* Warn about potential nested comments, but not if the '/'
335 comes immediately before the true comment delimeter.
336 Don't bother to get it right across escaped newlines. */
337 if (CPP_OPTION (pfile
, warn_comments
)
338 && buffer
->cur
!= buffer
->rlimit
)
340 prevc
= c
, c
= *buffer
->cur
++;
341 if (c
== '*' && buffer
->cur
!= buffer
->rlimit
)
343 prevc
= c
, c
= *buffer
->cur
++;
345 cpp_warning_with_line (pfile
, CPP_BUF_LINE (buffer
),
346 CPP_BUF_COL (buffer
),
347 "\"/*\" within comment");
352 else if (is_vspace (c
))
354 prevc
= c
, c
= handle_newline (buffer
, c
);
358 adjust_column (pfile
);
361 pfile
->state
.lexing_comment
= 0;
362 buffer
->read_ahead
= EOF
;
363 return c
!= '/' || prevc
!= '*';
366 /* Skip a C++ line comment. Handles escaped newlines. Returns
367 non-zero if a multiline comment. The following new line, if any,
368 is left in buffer->read_ahead. */
370 skip_line_comment (pfile
)
373 cpp_buffer
*buffer
= pfile
->buffer
;
374 unsigned int orig_lineno
= buffer
->lineno
;
377 pfile
->state
.lexing_comment
= 1;
381 if (buffer
->cur
== buffer
->rlimit
)
385 if (c
== '?' || c
== '\\')
386 c
= skip_escaped_newlines (buffer
, c
);
388 while (!is_vspace (c
));
390 pfile
->state
.lexing_comment
= 0;
391 buffer
->read_ahead
= c
; /* Leave any newline for caller. */
392 return orig_lineno
!= buffer
->lineno
;
395 /* pfile->buffer->cur is one beyond the \t character. Update
396 col_adjust so we track the column correctly. */
398 adjust_column (pfile
)
401 cpp_buffer
*buffer
= pfile
->buffer
;
402 unsigned int col
= CPP_BUF_COL (buffer
) - 1; /* Zero-based column. */
404 /* Round it up to multiple of the tabstop, but subtract 1 since the
405 tab itself occupies a character position. */
406 buffer
->col_adjust
+= (CPP_OPTION (pfile
, tabstop
)
407 - col
% CPP_OPTION (pfile
, tabstop
)) - 1;
410 /* Skips whitespace, saving the next non-whitespace character.
411 Adjusts pfile->col_adjust to account for tabs. Without this,
412 tokens might be assigned an incorrect column. */
414 skip_whitespace (pfile
, c
)
418 cpp_buffer
*buffer
= pfile
->buffer
;
419 unsigned int warned
= 0;
423 /* Horizontal space always OK. */
427 adjust_column (pfile
);
428 /* Just \f \v or \0 left. */
433 cpp_warning (pfile
, "null character(s) ignored");
437 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
438 cpp_pedwarn_with_line (pfile
, CPP_BUF_LINE (buffer
),
439 CPP_BUF_COL (buffer
),
440 "%s in preprocessing directive",
441 c
== '\f' ? "form feed" : "vertical tab");
444 if (buffer
->cur
== buffer
->rlimit
)
448 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
449 while (is_nvspace (c
));
451 /* Remember the next character. */
452 buffer
->read_ahead
= c
;
455 /* See if the characters of a number token are valid in a name (no
458 name_p (pfile
, string
)
460 const cpp_string
*string
;
464 for (i
= 0; i
< string
->len
; i
++)
465 if (!is_idchar (string
->text
[i
]))
471 /* Parse an identifier, skipping embedded backslash-newlines.
472 Calculate the hash value of the token while parsing, for improved
473 performance. The hashing algorithm *must* match cpp_lookup(). */
475 static cpp_hashnode
*
476 parse_identifier (pfile
, c
)
480 cpp_hashnode
*result
;
481 cpp_buffer
*buffer
= pfile
->buffer
;
482 unsigned char *dest
, *limit
;
483 unsigned int r
= 0, saw_dollar
= 0;
485 dest
= POOL_FRONT (&pfile
->ident_pool
);
486 limit
= POOL_LIMIT (&pfile
->ident_pool
);
492 /* Need room for terminating null. */
493 if (dest
+ 1 >= limit
)
494 limit
= _cpp_next_chunk (&pfile
->ident_pool
, 0, &dest
);
503 if (buffer
->cur
== buffer
->rlimit
)
508 while (is_idchar (c
));
510 /* Potential escaped newline? */
511 if (c
!= '?' && c
!= '\\')
513 c
= skip_escaped_newlines (buffer
, c
);
515 while (is_idchar (c
));
517 /* Remember the next character. */
518 buffer
->read_ahead
= c
;
520 /* $ is not a identifier character in the standard, but is commonly
521 accepted as an extension. Don't warn about it in skipped
522 conditional blocks. */
523 if (saw_dollar
&& CPP_PEDANTIC (pfile
) && ! pfile
->skipping
)
524 cpp_pedwarn (pfile
, "'$' character(s) in identifier");
526 /* Identifiers are null-terminated. */
529 /* This routine commits the memory if necessary. */
530 result
= _cpp_lookup_with_hash (pfile
,
531 dest
- POOL_FRONT (&pfile
->ident_pool
), r
);
533 /* Some identifiers require diagnostics when lexed. */
534 if (result
->flags
& NODE_DIAGNOSTIC
&& !pfile
->skipping
)
536 /* It is allowed to poison the same identifier twice. */
537 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
538 cpp_error (pfile
, "attempt to use poisoned \"%s\"", result
->name
);
540 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
541 replacement list of a variadic macro. */
542 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
543 && !pfile
->state
.va_args_ok
)
544 cpp_pedwarn (pfile
, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
550 /* Parse a number, skipping embedded backslash-newlines. */
552 parse_number (pfile
, number
, c
, leading_period
)
558 cpp_buffer
*buffer
= pfile
->buffer
;
559 cpp_pool
*pool
= &pfile
->ident_pool
;
560 unsigned char *dest
, *limit
;
562 dest
= POOL_FRONT (pool
);
563 limit
= POOL_LIMIT (pool
);
565 /* Place a leading period. */
569 limit
= _cpp_next_chunk (pool
, 0, &dest
);
577 /* Need room for terminating null. */
578 if (dest
+ 1 >= limit
)
579 limit
= _cpp_next_chunk (pool
, 0, &dest
);
583 if (buffer
->cur
== buffer
->rlimit
)
588 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
590 /* Potential escaped newline? */
591 if (c
!= '?' && c
!= '\\')
593 c
= skip_escaped_newlines (buffer
, c
);
595 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
597 /* Remember the next character. */
598 buffer
->read_ahead
= c
;
600 /* Null-terminate the number. */
603 number
->text
= POOL_FRONT (pool
);
604 number
->len
= dest
- number
->text
;
605 POOL_COMMIT (pool
, number
->len
+ 1);
608 /* Subroutine of parse_string. Emits error for unterminated strings. */
610 unterminated (pfile
, term
)
614 cpp_error (pfile
, "missing terminating %c character", term
);
616 if (term
== '\"' && pfile
->mlstring_pos
.line
617 && pfile
->mlstring_pos
.line
!= pfile
->lexer_pos
.line
)
619 cpp_error_with_line (pfile
, pfile
->mlstring_pos
.line
,
620 pfile
->mlstring_pos
.col
,
621 "possible start of unterminated string literal");
622 pfile
->mlstring_pos
.line
= 0;
626 /* Subroutine of parse_string. */
628 unescaped_terminator_p (pfile
, dest
)
630 const unsigned char *dest
;
632 const unsigned char *start
, *temp
;
634 /* In #include-style directives, terminators are not escapeable. */
635 if (pfile
->state
.angled_headers
)
638 start
= POOL_FRONT (&pfile
->ident_pool
);
640 /* An odd number of consecutive backslashes represents an escaped
642 for (temp
= dest
; temp
> start
&& temp
[-1] == '\\'; temp
--)
645 return ((dest
- temp
) & 1) == 0;
648 /* Parses a string, character constant, or angle-bracketed header file
649 name. Handles embedded trigraphs and escaped newlines. The stored
650 string is guaranteed NUL-terminated, but it is not guaranteed that
651 this is the first NUL since embedded NULs are preserved.
653 Multi-line strings are allowed, but they are deprecated. */
655 parse_string (pfile
, token
, terminator
)
658 cppchar_t terminator
;
660 cpp_buffer
*buffer
= pfile
->buffer
;
661 cpp_pool
*pool
= &pfile
->ident_pool
;
662 unsigned char *dest
, *limit
;
664 unsigned int nulls
= 0;
666 dest
= POOL_FRONT (pool
);
667 limit
= POOL_LIMIT (pool
);
671 if (buffer
->cur
== buffer
->rlimit
)
677 /* We need space for the terminating NUL. */
679 limit
= _cpp_next_chunk (pool
, 0, &dest
);
683 unterminated (pfile
, terminator
);
687 /* Handle trigraphs, escaped newlines etc. */
688 if (c
== '?' || c
== '\\')
689 c
= skip_escaped_newlines (buffer
, c
);
691 if (c
== terminator
&& unescaped_terminator_p (pfile
, dest
))
696 else if (is_vspace (c
))
698 /* In assembly language, silently terminate string and
699 character literals at end of line. This is a kludge
700 around not knowing where comments are. */
701 if (CPP_OPTION (pfile
, lang
) == CLK_ASM
&& terminator
!= '>')
704 /* Character constants and header names may not extend over
705 multiple lines. In Standard C, neither may strings.
706 Unfortunately, we accept multiline strings as an
707 extension, except in #include family directives. */
708 if (terminator
!= '"' || pfile
->state
.angled_headers
)
710 unterminated (pfile
, terminator
);
714 cpp_pedwarn (pfile
, "multi-line string literals are deprecated");
715 if (pfile
->mlstring_pos
.line
== 0)
716 pfile
->mlstring_pos
= pfile
->lexer_pos
;
718 c
= handle_newline (buffer
, c
);
725 cpp_warning (pfile
, "null character(s) preserved in literal");
731 /* Remember the next character. */
732 buffer
->read_ahead
= c
;
735 token
->val
.str
.text
= POOL_FRONT (pool
);
736 token
->val
.str
.len
= dest
- token
->val
.str
.text
;
737 POOL_COMMIT (pool
, token
->val
.str
.len
+ 1);
740 /* The stored comment includes the comment start and any terminator. */
742 save_comment (pfile
, token
, from
)
745 const unsigned char *from
;
747 unsigned char *buffer
;
750 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
751 /* C++ comments probably (not definitely) have moved past a new
752 line, which we don't want to save in the comment. */
753 if (pfile
->buffer
->read_ahead
!= EOF
)
755 buffer
= _cpp_pool_alloc (&pfile
->ident_pool
, len
);
757 token
->type
= CPP_COMMENT
;
758 token
->val
.str
.len
= len
;
759 token
->val
.str
.text
= buffer
;
762 memcpy (buffer
+ 1, from
, len
- 1);
765 /* Subroutine of lex_token to handle '%'. A little tricky, since we
766 want to avoid stepping back when lexing %:%X. */
768 lex_percent (buffer
, result
)
774 result
->type
= CPP_MOD
;
775 /* Parsing %:%X could leave an extra character. */
776 if (buffer
->extra_char
== EOF
)
777 c
= get_effective_char (buffer
);
780 c
= buffer
->read_ahead
= buffer
->extra_char
;
781 buffer
->extra_char
= EOF
;
785 ACCEPT_CHAR (CPP_MOD_EQ
);
786 else if (CPP_OPTION (buffer
->pfile
, digraphs
))
790 result
->flags
|= DIGRAPH
;
791 ACCEPT_CHAR (CPP_HASH
);
792 if (get_effective_char (buffer
) == '%')
794 buffer
->extra_char
= get_effective_char (buffer
);
795 if (buffer
->extra_char
== ':')
797 buffer
->extra_char
= EOF
;
798 ACCEPT_CHAR (CPP_PASTE
);
801 /* We'll catch the extra_char when we're called back. */
802 buffer
->read_ahead
= '%';
807 result
->flags
|= DIGRAPH
;
808 ACCEPT_CHAR (CPP_CLOSE_BRACE
);
813 /* Subroutine of lex_token to handle '.'. This is tricky, since we
814 want to avoid stepping back when lexing '...' or '.123'. In the
815 latter case we should also set a flag for parse_number. */
817 lex_dot (pfile
, result
)
821 cpp_buffer
*buffer
= pfile
->buffer
;
824 /* Parsing ..X could leave an extra character. */
825 if (buffer
->extra_char
== EOF
)
826 c
= get_effective_char (buffer
);
829 c
= buffer
->read_ahead
= buffer
->extra_char
;
830 buffer
->extra_char
= EOF
;
833 /* All known character sets have 0...9 contiguous. */
834 if (c
>= '0' && c
<= '9')
836 result
->type
= CPP_NUMBER
;
837 parse_number (pfile
, &result
->val
.str
, c
, 1);
841 result
->type
= CPP_DOT
;
844 buffer
->extra_char
= get_effective_char (buffer
);
845 if (buffer
->extra_char
== '.')
847 buffer
->extra_char
= EOF
;
848 ACCEPT_CHAR (CPP_ELLIPSIS
);
851 /* We'll catch the extra_char when we're called back. */
852 buffer
->read_ahead
= '.';
854 else if (c
== '*' && CPP_OPTION (pfile
, cplusplus
))
855 ACCEPT_CHAR (CPP_DOT_STAR
);
860 _cpp_lex_token (pfile
, result
)
866 const unsigned char *comment_start
;
870 bol
= pfile
->state
.next_bol
;
872 buffer
= pfile
->buffer
;
873 pfile
->state
.next_bol
= 0;
874 result
->flags
= buffer
->saved_flags
;
875 buffer
->saved_flags
= 0;
877 pfile
->lexer_pos
.line
= buffer
->lineno
;
879 pfile
->lexer_pos
.col
= CPP_BUF_COLUMN (buffer
, buffer
->cur
);
881 c
= buffer
->read_ahead
;
882 if (c
== EOF
&& buffer
->cur
< buffer
->rlimit
)
885 pfile
->lexer_pos
.col
++;
889 buffer
->read_ahead
= EOF
;
893 /* Non-empty files should end in a newline. Checking "bol" too
894 prevents multiple warnings when hitting the EOF more than
895 once, like in a directive. Don't warn for command line and
897 if (pfile
->lexer_pos
.col
!= 0 && !bol
&& !buffer
->from_stage3
)
898 cpp_pedwarn (pfile
, "no newline at end of file");
899 pfile
->state
.next_bol
= 1;
900 pfile
->skipping
= 0; /* In case missing #endif. */
901 result
->type
= CPP_EOF
;
902 /* Don't do MI optimisation. */
905 case ' ': case '\t': case '\f': case '\v': case '\0':
906 skip_whitespace (pfile
, c
);
907 result
->flags
|= PREV_WHITE
;
910 case '\n': case '\r':
911 if (!pfile
->state
.in_directive
)
913 handle_newline (buffer
, c
);
915 pfile
->lexer_pos
.output_line
= buffer
->lineno
;
916 /* This is a new line, so clear any white space flag.
917 Newlines in arguments are white space (6.10.3.10);
918 parse_arg takes care of that. */
919 result
->flags
&= ~(PREV_WHITE
| AVOID_LPASTE
);
923 /* Don't let directives spill over to the next line. */
924 buffer
->read_ahead
= c
;
925 pfile
->state
.next_bol
= 1;
926 result
->type
= CPP_EOF
;
927 /* Don't break; pfile->skipping might be true. */
932 /* These could start an escaped newline, or '?' a trigraph. Let
933 skip_escaped_newlines do all the work. */
935 unsigned int lineno
= buffer
->lineno
;
937 c
= skip_escaped_newlines (buffer
, c
);
938 if (lineno
!= buffer
->lineno
)
939 /* We had at least one escaped newline of some sort, and the
940 next character is in buffer->read_ahead. Update the
941 token's line and column. */
944 /* We are either the original '?' or '\\', or a trigraph. */
945 result
->type
= CPP_QUERY
;
946 buffer
->read_ahead
= EOF
;
954 case '0': case '1': case '2': case '3': case '4':
955 case '5': case '6': case '7': case '8': case '9':
956 result
->type
= CPP_NUMBER
;
957 parse_number (pfile
, &result
->val
.str
, c
, 0);
961 if (!CPP_OPTION (pfile
, dollars_in_ident
))
963 /* Fall through... */
966 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
967 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
968 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
969 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
971 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
972 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
973 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
974 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
976 result
->type
= CPP_NAME
;
977 result
->val
.node
= parse_identifier (pfile
, c
);
979 /* 'L' may introduce wide characters or strings. */
980 if (result
->val
.node
== pfile
->spec_nodes
.n_L
)
982 c
= buffer
->read_ahead
; /* For make_string. */
983 if (c
== '\'' || c
== '"')
985 ACCEPT_CHAR (c
== '"' ? CPP_WSTRING
: CPP_WCHAR
);
989 /* Convert named operators to their proper types. */
990 else if (result
->val
.node
->flags
& NODE_OPERATOR
)
992 result
->flags
|= NAMED_OP
;
993 result
->type
= result
->val
.node
->value
.operator;
999 result
->type
= c
== '"' ? CPP_STRING
: CPP_CHAR
;
1001 parse_string (pfile
, result
, c
);
1005 /* A potential block or line comment. */
1006 comment_start
= buffer
->cur
;
1007 result
->type
= CPP_DIV
;
1008 c
= get_effective_char (buffer
);
1010 ACCEPT_CHAR (CPP_DIV_EQ
);
1011 if (c
!= '/' && c
!= '*')
1016 if (skip_block_comment (pfile
))
1017 cpp_error_with_line (pfile
, pfile
->lexer_pos
.line
,
1018 pfile
->lexer_pos
.col
,
1019 "unterminated comment");
1023 if (!CPP_OPTION (pfile
, cplusplus_comments
)
1024 && !CPP_IN_SYSTEM_HEADER (pfile
))
1027 /* Warn about comments only if pedantically GNUC89, and not
1028 in system headers. */
1029 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
1030 && ! buffer
->warned_cplusplus_comments
)
1033 "C++ style comments are not allowed in ISO C89");
1035 "(this will be reported only once per input file)");
1036 buffer
->warned_cplusplus_comments
= 1;
1039 /* Skip_line_comment updates buffer->read_ahead. */
1040 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
1041 cpp_warning_with_line (pfile
, pfile
->lexer_pos
.line
,
1042 pfile
->lexer_pos
.col
,
1043 "multi-line comment");
1046 /* Skipping the comment has updated buffer->read_ahead. */
1047 if (!pfile
->state
.save_comments
)
1049 result
->flags
|= PREV_WHITE
;
1053 /* Save the comment as a token in its own right. */
1054 save_comment (pfile
, result
, comment_start
);
1055 /* Don't do MI optimisation. */
1059 if (pfile
->state
.angled_headers
)
1061 result
->type
= CPP_HEADER_NAME
;
1062 c
= '>'; /* terminator. */
1066 result
->type
= CPP_LESS
;
1067 c
= get_effective_char (buffer
);
1069 ACCEPT_CHAR (CPP_LESS_EQ
);
1072 ACCEPT_CHAR (CPP_LSHIFT
);
1073 if (get_effective_char (buffer
) == '=')
1074 ACCEPT_CHAR (CPP_LSHIFT_EQ
);
1076 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1078 ACCEPT_CHAR (CPP_MIN
);
1079 if (get_effective_char (buffer
) == '=')
1080 ACCEPT_CHAR (CPP_MIN_EQ
);
1082 else if (c
== ':' && CPP_OPTION (pfile
, digraphs
))
1084 ACCEPT_CHAR (CPP_OPEN_SQUARE
);
1085 result
->flags
|= DIGRAPH
;
1087 else if (c
== '%' && CPP_OPTION (pfile
, digraphs
))
1089 ACCEPT_CHAR (CPP_OPEN_BRACE
);
1090 result
->flags
|= DIGRAPH
;
1095 result
->type
= CPP_GREATER
;
1096 c
= get_effective_char (buffer
);
1098 ACCEPT_CHAR (CPP_GREATER_EQ
);
1101 ACCEPT_CHAR (CPP_RSHIFT
);
1102 if (get_effective_char (buffer
) == '=')
1103 ACCEPT_CHAR (CPP_RSHIFT_EQ
);
1105 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1107 ACCEPT_CHAR (CPP_MAX
);
1108 if (get_effective_char (buffer
) == '=')
1109 ACCEPT_CHAR (CPP_MAX_EQ
);
1114 lex_percent (buffer
, result
);
1115 if (result
->type
== CPP_HASH
)
1120 lex_dot (pfile
, result
);
1124 result
->type
= CPP_PLUS
;
1125 c
= get_effective_char (buffer
);
1127 ACCEPT_CHAR (CPP_PLUS_EQ
);
1129 ACCEPT_CHAR (CPP_PLUS_PLUS
);
1133 result
->type
= CPP_MINUS
;
1134 c
= get_effective_char (buffer
);
1137 ACCEPT_CHAR (CPP_DEREF
);
1138 if (CPP_OPTION (pfile
, cplusplus
)
1139 && get_effective_char (buffer
) == '*')
1140 ACCEPT_CHAR (CPP_DEREF_STAR
);
1143 ACCEPT_CHAR (CPP_MINUS_EQ
);
1145 ACCEPT_CHAR (CPP_MINUS_MINUS
);
1149 result
->type
= CPP_MULT
;
1150 if (get_effective_char (buffer
) == '=')
1151 ACCEPT_CHAR (CPP_MULT_EQ
);
1155 result
->type
= CPP_EQ
;
1156 if (get_effective_char (buffer
) == '=')
1157 ACCEPT_CHAR (CPP_EQ_EQ
);
1161 result
->type
= CPP_NOT
;
1162 if (get_effective_char (buffer
) == '=')
1163 ACCEPT_CHAR (CPP_NOT_EQ
);
1167 result
->type
= CPP_AND
;
1168 c
= get_effective_char (buffer
);
1170 ACCEPT_CHAR (CPP_AND_EQ
);
1172 ACCEPT_CHAR (CPP_AND_AND
);
1176 c
= buffer
->extra_char
; /* Can be set by error condition below. */
1179 buffer
->read_ahead
= c
;
1180 buffer
->extra_char
= EOF
;
1183 c
= get_effective_char (buffer
);
1187 ACCEPT_CHAR (CPP_PASTE
);
1191 result
->type
= CPP_HASH
;
1195 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1196 tokens within the list of arguments that would otherwise act
1197 as preprocessing directives, the behavior is undefined.
1199 This implementation will report a hard error, terminate the
1200 macro invocation, and proceed to process the directive. */
1201 if (pfile
->state
.parsing_args
)
1203 if (pfile
->state
.parsing_args
== 2)
1205 "directives may not be used inside a macro argument");
1207 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1208 buffer
->extra_char
= buffer
->read_ahead
;
1209 buffer
->read_ahead
= '#';
1210 pfile
->state
.next_bol
= 1;
1211 result
->type
= CPP_EOF
;
1213 /* Get whitespace right - newline_in_args sets it. */
1214 if (pfile
->lexer_pos
.col
== 1)
1215 result
->flags
&= ~(PREV_WHITE
| AVOID_LPASTE
);
1219 /* This is the hash introducing a directive. */
1220 if (_cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
1221 goto done_directive
; /* bol still 1. */
1222 /* This is in fact an assembler #. */
1227 result
->type
= CPP_OR
;
1228 c
= get_effective_char (buffer
);
1230 ACCEPT_CHAR (CPP_OR_EQ
);
1232 ACCEPT_CHAR (CPP_OR_OR
);
1236 result
->type
= CPP_XOR
;
1237 if (get_effective_char (buffer
) == '=')
1238 ACCEPT_CHAR (CPP_XOR_EQ
);
1242 result
->type
= CPP_COLON
;
1243 c
= get_effective_char (buffer
);
1244 if (c
== ':' && CPP_OPTION (pfile
, cplusplus
))
1245 ACCEPT_CHAR (CPP_SCOPE
);
1246 else if (c
== '>' && CPP_OPTION (pfile
, digraphs
))
1248 result
->flags
|= DIGRAPH
;
1249 ACCEPT_CHAR (CPP_CLOSE_SQUARE
);
1253 case '~': result
->type
= CPP_COMPL
; break;
1254 case ',': result
->type
= CPP_COMMA
; break;
1255 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1256 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1257 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1258 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1259 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1260 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1261 case ';': result
->type
= CPP_SEMICOLON
; break;
1263 /* @ is a punctuator in Objective C. */
1264 case '@': result
->type
= CPP_ATSIGN
; break;
1268 result
->type
= CPP_OTHER
;
1273 if (pfile
->skipping
)
1276 /* If not in a directive, this token invalidates controlling macros. */
1277 if (!pfile
->state
.in_directive
)
1278 pfile
->mi_state
= MI_FAILED
;
1281 /* An upper bound on the number of bytes needed to spell a token,
1282 including preceding whitespace. */
1284 cpp_token_len (token
)
1285 const cpp_token
*token
;
1289 switch (TOKEN_SPELL (token
))
1291 default: len
= 0; break;
1292 case SPELL_STRING
: len
= token
->val
.str
.len
; break;
1293 case SPELL_IDENT
: len
= token
->val
.node
->length
; break;
1295 /* 1 for whitespace, 4 for comment delimeters. */
1299 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1300 already contain the enough space to hold the token's spelling.
1301 Returns a pointer to the character after the last character
1304 cpp_spell_token (pfile
, token
, buffer
)
1305 cpp_reader
*pfile
; /* Would be nice to be rid of this... */
1306 const cpp_token
*token
;
1307 unsigned char *buffer
;
1309 switch (TOKEN_SPELL (token
))
1311 case SPELL_OPERATOR
:
1313 const unsigned char *spelling
;
1316 if (token
->flags
& DIGRAPH
)
1318 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1319 else if (token
->flags
& NAMED_OP
)
1322 spelling
= TOKEN_NAME (token
);
1324 while ((c
= *spelling
++) != '\0')
1331 memcpy (buffer
, token
->val
.node
->name
, token
->val
.node
->length
);
1332 buffer
+= token
->val
.node
->length
;
1337 int left
, right
, tag
;
1338 switch (token
->type
)
1340 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1341 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1342 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1343 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1344 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1345 default: left
= '\0'; right
= '\0'; tag
= '\0'; break;
1347 if (tag
) *buffer
++ = tag
;
1348 if (left
) *buffer
++ = left
;
1349 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1350 buffer
+= token
->val
.str
.len
;
1351 if (right
) *buffer
++ = right
;
1356 *buffer
++ = token
->val
.c
;
1360 cpp_ice (pfile
, "Unspellable token %s", TOKEN_NAME (token
));
1367 /* Returns a token as a null-terminated string. The string is
1368 temporary, and automatically freed later. Useful for diagnostics. */
1370 cpp_token_as_text (pfile
, token
)
1372 const cpp_token
*token
;
1374 unsigned int len
= cpp_token_len (token
);
1375 unsigned char *start
= _cpp_pool_alloc (&pfile
->ident_pool
, len
), *end
;
1377 end
= cpp_spell_token (pfile
, token
, start
);
1383 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1385 cpp_type2name (type
)
1386 enum cpp_ttype type
;
1388 return (const char *) token_spellings
[type
].name
;
1391 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1392 for efficiency - to avoid double-buffering. Also, outputs a space
1393 if PREV_WHITE is flagged. */
1395 cpp_output_token (token
, fp
)
1396 const cpp_token
*token
;
1399 if (token
->flags
& PREV_WHITE
)
1402 switch (TOKEN_SPELL (token
))
1404 case SPELL_OPERATOR
:
1406 const unsigned char *spelling
;
1408 if (token
->flags
& DIGRAPH
)
1410 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1411 else if (token
->flags
& NAMED_OP
)
1414 spelling
= TOKEN_NAME (token
);
1416 ufputs (spelling
, fp
);
1422 ufputs (token
->val
.node
->name
, fp
);
1427 int left
, right
, tag
;
1428 switch (token
->type
)
1430 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1431 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1432 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1433 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1434 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1435 default: left
= '\0'; right
= '\0'; tag
= '\0'; break;
1437 if (tag
) putc (tag
, fp
);
1438 if (left
) putc (left
, fp
);
1439 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1440 if (right
) putc (right
, fp
);
1445 putc (token
->val
.c
, fp
);
1449 /* An error, most probably. */
1454 /* Compare two tokens. */
1456 _cpp_equiv_tokens (a
, b
)
1457 const cpp_token
*a
, *b
;
1459 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1460 switch (TOKEN_SPELL (a
))
1462 default: /* Keep compiler happy. */
1463 case SPELL_OPERATOR
:
1466 return a
->val
.c
== b
->val
.c
; /* Character. */
1468 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1470 return a
->val
.node
== b
->val
.node
;
1472 return (a
->val
.str
.len
== b
->val
.str
.len
1473 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1480 /* Determine whether two tokens can be pasted together, and if so,
1481 what the resulting token is. Returns CPP_EOF if the tokens cannot
1482 be pasted, or the appropriate type for the merged token if they
1485 cpp_can_paste (pfile
, token1
, token2
, digraph
)
1487 const cpp_token
*token1
, *token2
;
1490 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1491 int cxx
= CPP_OPTION (pfile
, cplusplus
);
1493 /* Treat named operators as if they were ordinary NAMEs. */
1494 if (token1
->flags
& NAMED_OP
)
1496 if (token2
->flags
& NAMED_OP
)
1499 if ((int) a
<= (int) CPP_LAST_EQ
&& b
== CPP_EQ
)
1500 return (enum cpp_ttype
) ((int) a
+ ((int) CPP_EQ_EQ
- (int) CPP_EQ
));
1505 if (b
== a
) return CPP_RSHIFT
;
1506 if (b
== CPP_QUERY
&& cxx
) return CPP_MAX
;
1507 if (b
== CPP_GREATER_EQ
) return CPP_RSHIFT_EQ
;
1510 if (b
== a
) return CPP_LSHIFT
;
1511 if (b
== CPP_QUERY
&& cxx
) return CPP_MIN
;
1512 if (b
== CPP_LESS_EQ
) return CPP_LSHIFT_EQ
;
1513 if (CPP_OPTION (pfile
, digraphs
))
1516 {*digraph
= 1; return CPP_OPEN_SQUARE
;} /* <: digraph */
1518 {*digraph
= 1; return CPP_OPEN_BRACE
;} /* <% digraph */
1522 case CPP_PLUS
: if (b
== a
) return CPP_PLUS_PLUS
; break;
1523 case CPP_AND
: if (b
== a
) return CPP_AND_AND
; break;
1524 case CPP_OR
: if (b
== a
) return CPP_OR_OR
; break;
1527 if (b
== a
) return CPP_MINUS_MINUS
;
1528 if (b
== CPP_GREATER
) return CPP_DEREF
;
1531 if (b
== a
&& cxx
) return CPP_SCOPE
;
1532 if (b
== CPP_GREATER
&& CPP_OPTION (pfile
, digraphs
))
1533 {*digraph
= 1; return CPP_CLOSE_SQUARE
;} /* :> digraph */
1537 if (CPP_OPTION (pfile
, digraphs
))
1539 if (b
== CPP_GREATER
)
1540 {*digraph
= 1; return CPP_CLOSE_BRACE
;} /* %> digraph */
1542 {*digraph
= 1; return CPP_HASH
;} /* %: digraph */
1546 if (b
== CPP_MULT
&& cxx
) return CPP_DEREF_STAR
;
1549 if (b
== CPP_MULT
&& cxx
) return CPP_DOT_STAR
;
1550 if (b
== CPP_NUMBER
) return CPP_NUMBER
;
1554 if (b
== a
&& (token1
->flags
& DIGRAPH
) == (token2
->flags
& DIGRAPH
))
1556 {*digraph
= (token1
->flags
& DIGRAPH
); return CPP_PASTE
;}
1560 if (b
== CPP_NAME
) return CPP_NAME
;
1562 && name_p (pfile
, &token2
->val
.str
)) return CPP_NAME
;
1564 && token1
->val
.node
== pfile
->spec_nodes
.n_L
) return CPP_WCHAR
;
1566 && token1
->val
.node
== pfile
->spec_nodes
.n_L
) return CPP_WSTRING
;
1570 if (b
== CPP_NUMBER
) return CPP_NUMBER
;
1571 if (b
== CPP_NAME
) return CPP_NUMBER
;
1572 if (b
== CPP_DOT
) return CPP_NUMBER
;
1573 /* Numbers cannot have length zero, so this is safe. */
1574 if ((b
== CPP_PLUS
|| b
== CPP_MINUS
)
1575 && VALID_SIGN ('+', token1
->val
.str
.text
[token1
->val
.str
.len
- 1]))
1586 /* Returns nonzero if a space should be inserted to avoid an
1587 accidental token paste for output. For simplicity, it is
1588 conservative, and occasionally advises a space where one is not
1589 needed, e.g. "." and ".2". */
1592 cpp_avoid_paste (pfile
, token1
, token2
)
1594 const cpp_token
*token1
, *token2
;
1596 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1599 if (token1
->flags
& NAMED_OP
)
1601 if (token2
->flags
& NAMED_OP
)
1605 if (token2
->flags
& DIGRAPH
)
1606 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1607 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1608 c
= token_spellings
[b
].name
[0];
1610 /* Quickly get everything that can paste with an '='. */
1611 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1616 case CPP_GREATER
: return c
== '>' || c
== '?';
1617 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1618 case CPP_PLUS
: return c
== '+';
1619 case CPP_MINUS
: return c
== '-' || c
== '>';
1620 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1621 case CPP_MOD
: return c
== ':' || c
== '>';
1622 case CPP_AND
: return c
== '&';
1623 case CPP_OR
: return c
== '|';
1624 case CPP_COLON
: return c
== ':' || c
== '>';
1625 case CPP_DEREF
: return c
== '*';
1626 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1627 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1628 case CPP_NAME
: return ((b
== CPP_NUMBER
1629 && name_p (pfile
, &token2
->val
.str
))
1631 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1632 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1633 || c
== '.' || c
== '+' || c
== '-');
1634 case CPP_OTHER
: return (CPP_OPTION (pfile
, objc
)
1635 && token1
->val
.c
== '@'
1636 && (b
== CPP_NAME
|| b
== CPP_STRING
));
1643 /* Output all the remaining tokens on the current line, and a newline
1644 character, to FP. Leading whitespace is removed. */
1646 cpp_output_line (pfile
, fp
)
1652 cpp_get_token (pfile
, &token
);
1653 token
.flags
&= ~PREV_WHITE
;
1654 while (token
.type
!= CPP_EOF
)
1656 cpp_output_token (&token
, fp
);
1657 cpp_get_token (pfile
, &token
);
1663 /* Returns the value of a hexadecimal digit. */
1668 if (c
>= 'a' && c
<= 'f')
1669 return c
- 'a' + 10;
1670 if (c
>= 'A' && c
<= 'F')
1671 return c
- 'A' + 10;
1672 if (c
>= '0' && c
<= '9')
1677 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
1679 [lex.charset]: The character designated by the universal character
1680 name \UNNNNNNNN is that character whose character short name in
1681 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1682 universal character name \uNNNN is that character whose character
1683 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1684 for a universal character name is less than 0x20 or in the range
1685 0x7F-0x9F (inclusive), or if the universal character name
1686 designates a character in the basic source character set, then the
1687 program is ill-formed.
1689 We assume that wchar_t is Unicode, so we don't need to do any
1690 mapping. Is this ever wrong? */
1693 read_ucs (pfile
, pstr
, limit
, length
)
1695 const unsigned char **pstr
;
1696 const unsigned char *limit
;
1697 unsigned int length
;
1699 const unsigned char *p
= *pstr
;
1700 unsigned int c
, code
= 0;
1702 for (; length
; --length
)
1706 cpp_error (pfile
, "incomplete universal-character-name");
1713 code
= (code
<< 4) + hex_digit_value (c
);
1719 "non-hex digit '%c' in universal-character-name", c
);
1725 #ifdef TARGET_EBCDIC
1726 cpp_error (pfile
, "universal-character-name on EBCDIC target");
1727 code
= 0x3f; /* EBCDIC invalid character */
1729 if (code
> 0x9f && !(code
& 0x80000000))
1730 ; /* True extended character, OK. */
1731 else if (code
>= 0x20 && code
< 0x7f)
1733 /* ASCII printable character. The C character set consists of all of
1734 these except $, @ and `. We use hex escapes so that this also
1735 works with EBCDIC hosts. */
1736 if (code
!= 0x24 && code
!= 0x40 && code
!= 0x60)
1737 cpp_error (pfile
, "universal-character-name used for '%c'", code
);
1740 cpp_error (pfile
, "invalid universal-character-name");
1747 /* Interpret an escape sequence, and return its value. PSTR points to
1748 the input pointer, which is just after the backslash. LIMIT is how
1749 much text we have. MASK is the precision for the target type (char
1750 or wchar_t). TRADITIONAL, if true, does not interpret escapes that
1751 did not exist in traditional C. */
1754 parse_escape (pfile
, pstr
, limit
, mask
, traditional
)
1756 const unsigned char **pstr
;
1757 const unsigned char *limit
;
1762 const unsigned char *str
= *pstr
;
1763 unsigned int c
= *str
++;
1767 case '\\': case '\'': case '"': case '?': break;
1768 case 'b': c
= TARGET_BS
; break;
1769 case 'f': c
= TARGET_FF
; break;
1770 case 'n': c
= TARGET_NEWLINE
; break;
1771 case 'r': c
= TARGET_CR
; break;
1772 case 't': c
= TARGET_TAB
; break;
1773 case 'v': c
= TARGET_VT
; break;
1775 case '(': case '{': case '[': case '%':
1776 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1777 '\%' is used to prevent SCCS from getting confused. */
1778 unknown
= CPP_PEDANTIC (pfile
);
1782 if (CPP_WTRADITIONAL (pfile
))
1783 cpp_warning (pfile
, "the meaning of '\\a' varies with -traditional");
1789 if (CPP_PEDANTIC (pfile
))
1790 cpp_pedwarn (pfile
, "non-ISO-standard escape sequence, '\\%c'", c
);
1794 /* Warnings and support checks handled by read_ucs(). */
1796 if (CPP_OPTION (pfile
, cplusplus
) || CPP_OPTION (pfile
, c99
))
1798 if (CPP_WTRADITIONAL (pfile
))
1800 "the meaning of '\\%c' varies with -traditional", c
);
1801 c
= read_ucs (pfile
, &str
, limit
, c
== 'u' ? 4 : 8);
1808 if (CPP_WTRADITIONAL (pfile
))
1809 cpp_warning (pfile
, "the meaning of '\\x' varies with -traditional");
1813 unsigned int i
= 0, overflow
= 0;
1814 int digits_found
= 0;
1822 overflow
|= i
^ (i
<< 4 >> 4);
1823 i
= (i
<< 4) + hex_digit_value (c
);
1828 cpp_error (pfile
, "\\x used with no following hex digits");
1830 if (overflow
| (i
!= (i
& mask
)))
1832 cpp_pedwarn (pfile
, "hex escape sequence out of range");
1839 case '0': case '1': case '2': case '3':
1840 case '4': case '5': case '6': case '7':
1842 unsigned int i
= c
- '0';
1845 while (str
< limit
&& ++count
< 3)
1848 if (c
< '0' || c
> '7')
1851 i
= (i
<< 3) + c
- '0';
1854 if (i
!= (i
& mask
))
1856 cpp_pedwarn (pfile
, "octal escape sequence out of range");
1871 cpp_pedwarn (pfile
, "unknown escape sequence '\\%c'", c
);
1873 cpp_pedwarn (pfile
, "unknown escape sequence: '\\%03o'", c
);
1880 #ifndef MAX_CHAR_TYPE_SIZE
1881 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1884 #ifndef MAX_WCHAR_TYPE_SIZE
1885 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1888 /* Interpret a (possibly wide) character constant in TOKEN.
1889 WARN_MULTI warns about multi-character charconsts, if not
1890 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1891 that did not exist in traditional C. PCHARS_SEEN points to a
1892 variable that is filled in with the number of characters seen. */
1894 cpp_interpret_charconst (pfile
, token
, warn_multi
, traditional
, pchars_seen
)
1896 const cpp_token
*token
;
1899 unsigned int *pchars_seen
;
1901 const unsigned char *str
= token
->val
.str
.text
;
1902 const unsigned char *limit
= str
+ token
->val
.str
.len
;
1903 unsigned int chars_seen
= 0;
1904 unsigned int width
, max_chars
, c
;
1905 HOST_WIDE_INT result
= 0, mask
;
1907 #ifdef MULTIBYTE_CHARS
1908 (void) local_mbtowc (NULL
, NULL
, 0);
1911 /* Width in bits. */
1912 if (token
->type
== CPP_CHAR
)
1913 width
= MAX_CHAR_TYPE_SIZE
;
1915 width
= MAX_WCHAR_TYPE_SIZE
;
1917 if (width
< HOST_BITS_PER_WIDE_INT
)
1918 mask
= ((unsigned HOST_WIDE_INT
) 1 << width
) - 1;
1921 max_chars
= HOST_BITS_PER_WIDE_INT
/ width
;
1925 #ifdef MULTIBYTE_CHARS
1929 char_len
= local_mbtowc (&wc
, str
, limit
- str
);
1932 cpp_warning (pfile
, "ignoring invalid multibyte character");
1946 c
= parse_escape (pfile
, &str
, limit
, mask
, traditional
);
1947 if (width
< HOST_BITS_PER_WIDE_INT
&& c
> mask
)
1948 cpp_pedwarn (pfile
, "escape sequence out of range for character");
1951 #ifdef MAP_CHARACTER
1953 c
= MAP_CHARACTER (c
);
1956 /* Merge character into result; ignore excess chars. */
1957 if (++chars_seen
<= max_chars
)
1959 if (width
< HOST_BITS_PER_WIDE_INT
)
1960 result
= (result
<< width
) | (c
& mask
);
1966 if (chars_seen
== 0)
1967 cpp_error (pfile
, "empty character constant");
1968 else if (chars_seen
> max_chars
)
1970 chars_seen
= max_chars
;
1971 cpp_error (pfile
, "character constant too long");
1973 else if (chars_seen
> 1 && !traditional
&& warn_multi
)
1974 cpp_warning (pfile
, "multi-character character constant");
1976 /* If char type is signed, sign-extend the constant. The
1977 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1978 if (token
->type
== CPP_CHAR
&& chars_seen
)
1980 unsigned int nbits
= chars_seen
* width
;
1981 unsigned int mask
= (unsigned int) ~0 >> (HOST_BITS_PER_INT
- nbits
);
1983 if (pfile
->spec_nodes
.n__CHAR_UNSIGNED__
->type
== NT_MACRO
1984 || ((result
>> (nbits
- 1)) & 1) == 0)
1990 *pchars_seen
= chars_seen
;
2006 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2009 chunk_suitable (pool
, chunk
, size
)
2014 /* Being at least twice SIZE means we can use memcpy in
2015 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2017 return (chunk
&& pool
->locked
!= chunk
2018 && (unsigned int) (chunk
->limit
- chunk
->base
) >= size
* 2);
2021 /* Returns the end of the new pool. PTR points to a char in the old
2022 pool, and is updated to point to the same char in the new pool. */
2024 _cpp_next_chunk (pool
, len
, ptr
)
2027 unsigned char **ptr
;
2029 cpp_chunk
*chunk
= pool
->cur
->next
;
2031 /* LEN is the minimum size we want in the new pool. */
2032 len
+= POOL_ROOM (pool
);
2033 if (! chunk_suitable (pool
, chunk
, len
))
2035 chunk
= new_chunk (POOL_SIZE (pool
) * 2 + len
);
2037 chunk
->next
= pool
->cur
->next
;
2038 pool
->cur
->next
= chunk
;
2041 /* Update the pointer before changing chunk's front. */
2043 *ptr
+= chunk
->base
- POOL_FRONT (pool
);
2045 memcpy (chunk
->base
, POOL_FRONT (pool
), POOL_ROOM (pool
));
2046 chunk
->front
= chunk
->base
;
2049 return POOL_LIMIT (pool
);
2056 unsigned char *base
;
2059 size
= POOL_ALIGN (size
, DEFAULT_ALIGNMENT
);
2060 base
= (unsigned char *) xmalloc (size
+ sizeof (cpp_chunk
));
2061 /* Put the chunk descriptor at the end. Then chunk overruns will
2062 cause obvious chaos. */
2063 result
= (cpp_chunk
*) (base
+ size
);
2064 result
->base
= base
;
2065 result
->front
= base
;
2066 result
->limit
= base
+ size
;
2073 _cpp_init_pool (pool
, size
, align
, temp
)
2075 unsigned int size
, align
, temp
;
2078 align
= DEFAULT_ALIGNMENT
;
2079 if (align
& (align
- 1))
2081 pool
->align
= align
;
2082 pool
->cur
= new_chunk (size
);
2086 pool
->cur
->next
= pool
->cur
;
2090 _cpp_lock_pool (pool
)
2093 if (pool
->locks
++ == 0)
2094 pool
->locked
= pool
->cur
;
2098 _cpp_unlock_pool (pool
)
2101 if (--pool
->locks
== 0)
2106 _cpp_free_pool (pool
)
2109 cpp_chunk
*chunk
= pool
->cur
, *next
;
2117 while (chunk
&& chunk
!= pool
->cur
);
2120 /* Reserve LEN bytes from a memory pool. */
2122 _cpp_pool_reserve (pool
, len
)
2126 len
= POOL_ALIGN (len
, pool
->align
);
2127 if (len
> (unsigned int) POOL_ROOM (pool
))
2128 _cpp_next_chunk (pool
, len
, 0);
2130 return POOL_FRONT (pool
);
2133 /* Allocate LEN bytes from a memory pool. */
2135 _cpp_pool_alloc (pool
, len
)
2139 unsigned char *result
= _cpp_pool_reserve (pool
, len
);
2141 POOL_COMMIT (pool
, len
);