1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category
;
68 const unsigned char *name
;
71 const unsigned char *digraph_spellings
[] = {U
"%:", U
"%:%:", U
"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings
[N_TTYPES
] = {TTYPE_TABLE
};
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline
PARAMS ((cpp_buffer
*, cppchar_t
));
84 static cppchar_t skip_escaped_newlines
PARAMS ((cpp_buffer
*, cppchar_t
));
85 static cppchar_t get_effective_char
PARAMS ((cpp_buffer
*));
87 static int skip_block_comment
PARAMS ((cpp_reader
*));
88 static int skip_line_comment
PARAMS ((cpp_reader
*));
89 static void adjust_column
PARAMS ((cpp_reader
*));
90 static void skip_whitespace
PARAMS ((cpp_reader
*, cppchar_t
));
91 static cpp_hashnode
*parse_identifier
PARAMS ((cpp_reader
*, cppchar_t
));
92 static void parse_number
PARAMS ((cpp_reader
*, cpp_string
*, cppchar_t
, int));
93 static int unescaped_terminator_p
PARAMS ((cpp_reader
*, const U_CHAR
*));
94 static void parse_string
PARAMS ((cpp_reader
*, cpp_token
*, cppchar_t
));
95 static void unterminated
PARAMS ((cpp_reader
*, int));
96 static int trigraph_ok
PARAMS ((cpp_reader
*, cppchar_t
));
97 static void save_comment
PARAMS ((cpp_reader
*, cpp_token
*, const U_CHAR
*));
98 static void lex_percent
PARAMS ((cpp_buffer
*, cpp_token
*));
99 static void lex_dot
PARAMS ((cpp_reader
*, cpp_token
*));
100 static int name_p
PARAMS ((cpp_reader
*, const cpp_string
*));
101 static int maybe_read_ucs
PARAMS ((cpp_reader
*, const unsigned char **,
102 const unsigned char *, unsigned int *));
104 static cpp_chunk
*new_chunk
PARAMS ((unsigned int));
105 static int chunk_suitable
PARAMS ((cpp_pool
*, cpp_chunk
*, unsigned int));
106 static unsigned int hex_digit_value
PARAMS ((unsigned int));
110 Compares, the token TOKEN to the NUL-terminated string STRING.
111 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
114 cpp_ideq (token
, string
)
115 const cpp_token
*token
;
118 if (token
->type
!= CPP_NAME
)
121 return !ustrcmp (NODE_NAME (token
->val
.node
), (const U_CHAR
*) string
);
124 /* Call when meeting a newline. Returns the character after the newline
125 (or carriage-return newline combination), or EOF. */
127 handle_newline (buffer
, newline_char
)
129 cppchar_t newline_char
;
131 cppchar_t next
= EOF
;
133 buffer
->col_adjust
= 0;
135 buffer
->line_base
= buffer
->cur
;
137 /* Handle CR-LF and LF-CR combinations, get the next character. */
138 if (buffer
->cur
< buffer
->rlimit
)
140 next
= *buffer
->cur
++;
141 if (next
+ newline_char
== '\r' + '\n')
143 buffer
->line_base
= buffer
->cur
;
144 if (buffer
->cur
< buffer
->rlimit
)
145 next
= *buffer
->cur
++;
151 buffer
->read_ahead
= next
;
155 /* Subroutine of skip_escaped_newlines; called when a trigraph is
156 encountered. It warns if necessary, and returns true if the
157 trigraph should be honoured. FROM_CHAR is the third character of a
158 trigraph, and presumed to be the previous character for position
161 trigraph_ok (pfile
, from_char
)
165 int accept
= CPP_OPTION (pfile
, trigraphs
);
167 /* Don't warn about trigraphs in comments. */
168 if (CPP_OPTION (pfile
, warn_trigraphs
) && !pfile
->state
.lexing_comment
)
170 cpp_buffer
*buffer
= pfile
->buffer
;
172 cpp_warning_with_line (pfile
, buffer
->lineno
, CPP_BUF_COL (buffer
) - 2,
173 "trigraph ??%c converted to %c",
175 (int) _cpp_trigraph_map
[from_char
]);
176 else if (buffer
->cur
!= buffer
->last_Wtrigraphs
)
178 buffer
->last_Wtrigraphs
= buffer
->cur
;
179 cpp_warning_with_line (pfile
, buffer
->lineno
,
180 CPP_BUF_COL (buffer
) - 2,
181 "trigraph ??%c ignored", (int) from_char
);
188 /* Assumes local variables buffer and result. */
189 #define ACCEPT_CHAR(t) \
190 do { result->type = t; buffer->read_ahead = EOF; } while (0)
192 /* When we move to multibyte character sets, add to these something
193 that saves and restores the state of the multibyte conversion
194 library. This probably involves saving and restoring a "cookie".
195 In the case of glibc it is an 8-byte structure, so is not a high
196 overhead operation. In any case, it's out of the fast path. */
197 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
198 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
200 /* Skips any escaped newlines introduced by NEXT, which is either a
201 '?' or a '\\'. Returns the next character, which will also have
202 been placed in buffer->read_ahead. This routine performs
203 preprocessing stages 1 and 2 of the ISO C standard. */
205 skip_escaped_newlines (buffer
, next
)
209 /* Only do this if we apply stages 1 and 2. */
210 if (!buffer
->from_stage3
)
213 const unsigned char *saved_cur
;
218 if (buffer
->cur
== buffer
->rlimit
)
224 next1
= *buffer
->cur
++;
225 if (next1
!= '?' || buffer
->cur
== buffer
->rlimit
)
231 next1
= *buffer
->cur
++;
232 if (!_cpp_trigraph_map
[next1
]
233 || !trigraph_ok (buffer
->pfile
, next1
))
239 /* We have a full trigraph here. */
240 next
= _cpp_trigraph_map
[next1
];
241 if (next
!= '\\' || buffer
->cur
== buffer
->rlimit
)
246 /* We have a backslash, and room for at least one more character. */
250 next1
= *buffer
->cur
++;
251 if (!is_nvspace (next1
))
255 while (buffer
->cur
< buffer
->rlimit
);
257 if (!is_vspace (next1
))
263 if (space
&& !buffer
->pfile
->state
.lexing_comment
)
264 cpp_warning (buffer
->pfile
,
265 "backslash and newline separated by space");
267 next
= handle_newline (buffer
, next1
);
269 cpp_pedwarn (buffer
->pfile
, "backslash-newline at end of file");
271 while (next
== '\\' || next
== '?');
274 buffer
->read_ahead
= next
;
278 /* Obtain the next character, after trigraph conversion and skipping
279 an arbitrary string of escaped newlines. The common case of no
280 trigraphs or escaped newlines falls through quickly. */
282 get_effective_char (buffer
)
285 cppchar_t next
= EOF
;
287 if (buffer
->cur
< buffer
->rlimit
)
289 next
= *buffer
->cur
++;
291 /* '?' can introduce trigraphs (and therefore backslash); '\\'
292 can introduce escaped newlines, which we want to skip, or
293 UCNs, which, depending upon lexer state, we will handle in
295 if (next
== '?' || next
== '\\')
296 next
= skip_escaped_newlines (buffer
, next
);
299 buffer
->read_ahead
= next
;
303 /* Skip a C-style block comment. We find the end of the comment by
304 seeing if an asterisk is before every '/' we encounter. Returns
305 non-zero if comment terminated by EOF, zero otherwise. */
307 skip_block_comment (pfile
)
310 cpp_buffer
*buffer
= pfile
->buffer
;
311 cppchar_t c
= EOF
, prevc
= EOF
;
313 pfile
->state
.lexing_comment
= 1;
314 while (buffer
->cur
!= buffer
->rlimit
)
316 prevc
= c
, c
= *buffer
->cur
++;
319 /* FIXME: For speed, create a new character class of characters
320 of interest inside block comments. */
321 if (c
== '?' || c
== '\\')
322 c
= skip_escaped_newlines (buffer
, c
);
324 /* People like decorating comments with '*', so check for '/'
325 instead for efficiency. */
331 /* Warn about potential nested comments, but not if the '/'
332 comes immediately before the true comment delimeter.
333 Don't bother to get it right across escaped newlines. */
334 if (CPP_OPTION (pfile
, warn_comments
)
335 && buffer
->cur
!= buffer
->rlimit
)
337 prevc
= c
, c
= *buffer
->cur
++;
338 if (c
== '*' && buffer
->cur
!= buffer
->rlimit
)
340 prevc
= c
, c
= *buffer
->cur
++;
342 cpp_warning_with_line (pfile
, CPP_BUF_LINE (buffer
),
343 CPP_BUF_COL (buffer
),
344 "\"/*\" within comment");
349 else if (is_vspace (c
))
351 prevc
= c
, c
= handle_newline (buffer
, c
);
355 adjust_column (pfile
);
358 pfile
->state
.lexing_comment
= 0;
359 buffer
->read_ahead
= EOF
;
360 return c
!= '/' || prevc
!= '*';
363 /* Skip a C++ line comment. Handles escaped newlines. Returns
364 non-zero if a multiline comment. The following new line, if any,
365 is left in buffer->read_ahead. */
367 skip_line_comment (pfile
)
370 cpp_buffer
*buffer
= pfile
->buffer
;
371 unsigned int orig_lineno
= buffer
->lineno
;
374 pfile
->state
.lexing_comment
= 1;
378 if (buffer
->cur
== buffer
->rlimit
)
382 if (c
== '?' || c
== '\\')
383 c
= skip_escaped_newlines (buffer
, c
);
385 while (!is_vspace (c
));
387 pfile
->state
.lexing_comment
= 0;
388 buffer
->read_ahead
= c
; /* Leave any newline for caller. */
389 return orig_lineno
!= buffer
->lineno
;
392 /* pfile->buffer->cur is one beyond the \t character. Update
393 col_adjust so we track the column correctly. */
395 adjust_column (pfile
)
398 cpp_buffer
*buffer
= pfile
->buffer
;
399 unsigned int col
= CPP_BUF_COL (buffer
) - 1; /* Zero-based column. */
401 /* Round it up to multiple of the tabstop, but subtract 1 since the
402 tab itself occupies a character position. */
403 buffer
->col_adjust
+= (CPP_OPTION (pfile
, tabstop
)
404 - col
% CPP_OPTION (pfile
, tabstop
)) - 1;
407 /* Skips whitespace, saving the next non-whitespace character.
408 Adjusts pfile->col_adjust to account for tabs. Without this,
409 tokens might be assigned an incorrect column. */
411 skip_whitespace (pfile
, c
)
415 cpp_buffer
*buffer
= pfile
->buffer
;
416 unsigned int warned
= 0;
420 /* Horizontal space always OK. */
424 adjust_column (pfile
);
425 /* Just \f \v or \0 left. */
430 cpp_warning (pfile
, "null character(s) ignored");
434 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
435 cpp_pedwarn_with_line (pfile
, CPP_BUF_LINE (buffer
),
436 CPP_BUF_COL (buffer
),
437 "%s in preprocessing directive",
438 c
== '\f' ? "form feed" : "vertical tab");
441 if (buffer
->cur
== buffer
->rlimit
)
445 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
446 while (is_nvspace (c
));
448 /* Remember the next character. */
449 buffer
->read_ahead
= c
;
452 /* See if the characters of a number token are valid in a name (no
455 name_p (pfile
, string
)
457 const cpp_string
*string
;
461 for (i
= 0; i
< string
->len
; i
++)
462 if (!is_idchar (string
->text
[i
]))
468 /* Parse an identifier, skipping embedded backslash-newlines.
469 Calculate the hash value of the token while parsing, for improved
470 performance. The hashing algorithm *must* match cpp_lookup(). */
472 static cpp_hashnode
*
473 parse_identifier (pfile
, c
)
477 cpp_hashnode
*result
;
478 cpp_buffer
*buffer
= pfile
->buffer
;
479 unsigned int saw_dollar
= 0, len
;
480 struct obstack
*stack
= &pfile
->hash_table
->stack
;
486 obstack_1grow (stack
, c
);
492 if (buffer
->cur
== buffer
->rlimit
)
497 while (is_idchar (c
));
499 /* Potential escaped newline? */
500 if (c
!= '?' && c
!= '\\')
502 c
= skip_escaped_newlines (buffer
, c
);
504 while (is_idchar (c
));
506 /* Remember the next character. */
507 buffer
->read_ahead
= c
;
509 /* $ is not a identifier character in the standard, but is commonly
510 accepted as an extension. Don't warn about it in skipped
511 conditional blocks. */
512 if (saw_dollar
&& CPP_PEDANTIC (pfile
) && ! pfile
->skipping
)
513 cpp_pedwarn (pfile
, "'$' character(s) in identifier");
515 /* Identifiers are null-terminated. */
516 len
= obstack_object_size (stack
);
517 obstack_1grow (stack
, '\0');
519 /* This routine commits the memory if necessary. */
520 result
= (cpp_hashnode
*)
521 ht_lookup (pfile
->hash_table
, obstack_finish (stack
), len
, HT_ALLOCED
);
523 /* Some identifiers require diagnostics when lexed. */
524 if (result
->flags
& NODE_DIAGNOSTIC
&& !pfile
->skipping
)
526 /* It is allowed to poison the same identifier twice. */
527 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
528 cpp_error (pfile
, "attempt to use poisoned \"%s\"",
531 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
532 replacement list of a variadic macro. */
533 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
534 && !pfile
->state
.va_args_ok
)
535 cpp_pedwarn (pfile
, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
541 /* Parse a number, skipping embedded backslash-newlines. */
543 parse_number (pfile
, number
, c
, leading_period
)
549 cpp_buffer
*buffer
= pfile
->buffer
;
550 cpp_pool
*pool
= &pfile
->ident_pool
;
551 unsigned char *dest
, *limit
;
553 dest
= POOL_FRONT (pool
);
554 limit
= POOL_LIMIT (pool
);
556 /* Place a leading period. */
560 limit
= _cpp_next_chunk (pool
, 0, &dest
);
568 /* Need room for terminating null. */
569 if (dest
+ 1 >= limit
)
570 limit
= _cpp_next_chunk (pool
, 0, &dest
);
574 if (buffer
->cur
== buffer
->rlimit
)
579 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
581 /* Potential escaped newline? */
582 if (c
!= '?' && c
!= '\\')
584 c
= skip_escaped_newlines (buffer
, c
);
586 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
588 /* Remember the next character. */
589 buffer
->read_ahead
= c
;
591 /* Null-terminate the number. */
594 number
->text
= POOL_FRONT (pool
);
595 number
->len
= dest
- number
->text
;
596 POOL_COMMIT (pool
, number
->len
+ 1);
599 /* Subroutine of parse_string. Emits error for unterminated strings. */
601 unterminated (pfile
, term
)
605 cpp_error (pfile
, "missing terminating %c character", term
);
607 if (term
== '\"' && pfile
->mlstring_pos
.line
608 && pfile
->mlstring_pos
.line
!= pfile
->lexer_pos
.line
)
610 cpp_error_with_line (pfile
, pfile
->mlstring_pos
.line
,
611 pfile
->mlstring_pos
.col
,
612 "possible start of unterminated string literal");
613 pfile
->mlstring_pos
.line
= 0;
617 /* Subroutine of parse_string. */
619 unescaped_terminator_p (pfile
, dest
)
621 const unsigned char *dest
;
623 const unsigned char *start
, *temp
;
625 /* In #include-style directives, terminators are not escapeable. */
626 if (pfile
->state
.angled_headers
)
629 start
= POOL_FRONT (&pfile
->ident_pool
);
631 /* An odd number of consecutive backslashes represents an escaped
633 for (temp
= dest
; temp
> start
&& temp
[-1] == '\\'; temp
--)
636 return ((dest
- temp
) & 1) == 0;
639 /* Parses a string, character constant, or angle-bracketed header file
640 name. Handles embedded trigraphs and escaped newlines. The stored
641 string is guaranteed NUL-terminated, but it is not guaranteed that
642 this is the first NUL since embedded NULs are preserved.
644 Multi-line strings are allowed, but they are deprecated. */
646 parse_string (pfile
, token
, terminator
)
649 cppchar_t terminator
;
651 cpp_buffer
*buffer
= pfile
->buffer
;
652 cpp_pool
*pool
= &pfile
->ident_pool
;
653 unsigned char *dest
, *limit
;
655 unsigned int nulls
= 0;
657 dest
= POOL_FRONT (pool
);
658 limit
= POOL_LIMIT (pool
);
662 if (buffer
->cur
== buffer
->rlimit
)
668 /* We need space for the terminating NUL. */
670 limit
= _cpp_next_chunk (pool
, 0, &dest
);
674 unterminated (pfile
, terminator
);
678 /* Handle trigraphs, escaped newlines etc. */
679 if (c
== '?' || c
== '\\')
680 c
= skip_escaped_newlines (buffer
, c
);
682 if (c
== terminator
&& unescaped_terminator_p (pfile
, dest
))
687 else if (is_vspace (c
))
689 /* In assembly language, silently terminate string and
690 character literals at end of line. This is a kludge
691 around not knowing where comments are. */
692 if (CPP_OPTION (pfile
, lang
) == CLK_ASM
&& terminator
!= '>')
695 /* Character constants and header names may not extend over
696 multiple lines. In Standard C, neither may strings.
697 Unfortunately, we accept multiline strings as an
698 extension, except in #include family directives. */
699 if (terminator
!= '"' || pfile
->state
.angled_headers
)
701 unterminated (pfile
, terminator
);
705 cpp_pedwarn (pfile
, "multi-line string literals are deprecated");
706 if (pfile
->mlstring_pos
.line
== 0)
707 pfile
->mlstring_pos
= pfile
->lexer_pos
;
709 c
= handle_newline (buffer
, c
);
716 cpp_warning (pfile
, "null character(s) preserved in literal");
722 /* Remember the next character. */
723 buffer
->read_ahead
= c
;
726 token
->val
.str
.text
= POOL_FRONT (pool
);
727 token
->val
.str
.len
= dest
- token
->val
.str
.text
;
728 POOL_COMMIT (pool
, token
->val
.str
.len
+ 1);
731 /* The stored comment includes the comment start and any terminator. */
733 save_comment (pfile
, token
, from
)
736 const unsigned char *from
;
738 unsigned char *buffer
;
741 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
742 /* C++ comments probably (not definitely) have moved past a new
743 line, which we don't want to save in the comment. */
744 if (pfile
->buffer
->read_ahead
!= EOF
)
746 buffer
= _cpp_pool_alloc (&pfile
->ident_pool
, len
);
748 token
->type
= CPP_COMMENT
;
749 token
->val
.str
.len
= len
;
750 token
->val
.str
.text
= buffer
;
753 memcpy (buffer
+ 1, from
, len
- 1);
756 /* Subroutine of lex_token to handle '%'. A little tricky, since we
757 want to avoid stepping back when lexing %:%X. */
759 lex_percent (buffer
, result
)
765 result
->type
= CPP_MOD
;
766 /* Parsing %:%X could leave an extra character. */
767 if (buffer
->extra_char
== EOF
)
768 c
= get_effective_char (buffer
);
771 c
= buffer
->read_ahead
= buffer
->extra_char
;
772 buffer
->extra_char
= EOF
;
776 ACCEPT_CHAR (CPP_MOD_EQ
);
777 else if (CPP_OPTION (buffer
->pfile
, digraphs
))
781 result
->flags
|= DIGRAPH
;
782 ACCEPT_CHAR (CPP_HASH
);
783 if (get_effective_char (buffer
) == '%')
785 buffer
->extra_char
= get_effective_char (buffer
);
786 if (buffer
->extra_char
== ':')
788 buffer
->extra_char
= EOF
;
789 ACCEPT_CHAR (CPP_PASTE
);
792 /* We'll catch the extra_char when we're called back. */
793 buffer
->read_ahead
= '%';
798 result
->flags
|= DIGRAPH
;
799 ACCEPT_CHAR (CPP_CLOSE_BRACE
);
804 /* Subroutine of lex_token to handle '.'. This is tricky, since we
805 want to avoid stepping back when lexing '...' or '.123'. In the
806 latter case we should also set a flag for parse_number. */
808 lex_dot (pfile
, result
)
812 cpp_buffer
*buffer
= pfile
->buffer
;
815 /* Parsing ..X could leave an extra character. */
816 if (buffer
->extra_char
== EOF
)
817 c
= get_effective_char (buffer
);
820 c
= buffer
->read_ahead
= buffer
->extra_char
;
821 buffer
->extra_char
= EOF
;
824 /* All known character sets have 0...9 contiguous. */
825 if (c
>= '0' && c
<= '9')
827 result
->type
= CPP_NUMBER
;
828 parse_number (pfile
, &result
->val
.str
, c
, 1);
832 result
->type
= CPP_DOT
;
835 buffer
->extra_char
= get_effective_char (buffer
);
836 if (buffer
->extra_char
== '.')
838 buffer
->extra_char
= EOF
;
839 ACCEPT_CHAR (CPP_ELLIPSIS
);
842 /* We'll catch the extra_char when we're called back. */
843 buffer
->read_ahead
= '.';
845 else if (c
== '*' && CPP_OPTION (pfile
, cplusplus
))
846 ACCEPT_CHAR (CPP_DOT_STAR
);
851 _cpp_lex_token (pfile
, result
)
857 const unsigned char *comment_start
;
861 bol
= pfile
->state
.next_bol
;
863 buffer
= pfile
->buffer
;
864 pfile
->state
.next_bol
= 0;
865 result
->flags
= buffer
->saved_flags
;
866 buffer
->saved_flags
= 0;
868 pfile
->lexer_pos
.line
= buffer
->lineno
;
870 pfile
->lexer_pos
.col
= CPP_BUF_COLUMN (buffer
, buffer
->cur
);
872 c
= buffer
->read_ahead
;
873 if (c
== EOF
&& buffer
->cur
< buffer
->rlimit
)
876 pfile
->lexer_pos
.col
++;
880 buffer
->read_ahead
= EOF
;
884 /* Non-empty files should end in a newline. Checking "bol" too
885 prevents multiple warnings when hitting the EOF more than
886 once, like in a directive. Don't warn for command line and
888 if (pfile
->lexer_pos
.col
!= 0 && !bol
&& !buffer
->from_stage3
)
889 cpp_pedwarn (pfile
, "no newline at end of file");
890 pfile
->state
.next_bol
= 1;
891 pfile
->skipping
= 0; /* In case missing #endif. */
892 result
->type
= CPP_EOF
;
893 /* Don't do MI optimisation. */
896 case ' ': case '\t': case '\f': case '\v': case '\0':
897 skip_whitespace (pfile
, c
);
898 result
->flags
|= PREV_WHITE
;
901 case '\n': case '\r':
902 if (!pfile
->state
.in_directive
)
904 handle_newline (buffer
, c
);
906 pfile
->lexer_pos
.output_line
= buffer
->lineno
;
907 /* This is a new line, so clear any white space flag.
908 Newlines in arguments are white space (6.10.3.10);
909 parse_arg takes care of that. */
910 result
->flags
&= ~(PREV_WHITE
| AVOID_LPASTE
);
914 /* Don't let directives spill over to the next line. */
915 buffer
->read_ahead
= c
;
916 pfile
->state
.next_bol
= 1;
917 result
->type
= CPP_EOF
;
918 /* Don't break; pfile->skipping might be true. */
923 /* These could start an escaped newline, or '?' a trigraph. Let
924 skip_escaped_newlines do all the work. */
926 unsigned int lineno
= buffer
->lineno
;
928 c
= skip_escaped_newlines (buffer
, c
);
929 if (lineno
!= buffer
->lineno
)
930 /* We had at least one escaped newline of some sort, and the
931 next character is in buffer->read_ahead. Update the
932 token's line and column. */
935 /* We are either the original '?' or '\\', or a trigraph. */
936 result
->type
= CPP_QUERY
;
937 buffer
->read_ahead
= EOF
;
945 case '0': case '1': case '2': case '3': case '4':
946 case '5': case '6': case '7': case '8': case '9':
947 result
->type
= CPP_NUMBER
;
948 parse_number (pfile
, &result
->val
.str
, c
, 0);
952 if (!CPP_OPTION (pfile
, dollars_in_ident
))
954 /* Fall through... */
957 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
958 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
959 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
960 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
962 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
963 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
964 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
965 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
967 result
->type
= CPP_NAME
;
968 result
->val
.node
= parse_identifier (pfile
, c
);
970 /* 'L' may introduce wide characters or strings. */
971 if (result
->val
.node
== pfile
->spec_nodes
.n_L
)
973 c
= buffer
->read_ahead
; /* For make_string. */
974 if (c
== '\'' || c
== '"')
976 ACCEPT_CHAR (c
== '"' ? CPP_WSTRING
: CPP_WCHAR
);
980 /* Convert named operators to their proper types. */
981 else if (result
->val
.node
->flags
& NODE_OPERATOR
)
983 result
->flags
|= NAMED_OP
;
984 result
->type
= result
->val
.node
->value
.operator;
990 result
->type
= c
== '"' ? CPP_STRING
: CPP_CHAR
;
992 parse_string (pfile
, result
, c
);
996 /* A potential block or line comment. */
997 comment_start
= buffer
->cur
;
998 result
->type
= CPP_DIV
;
999 c
= get_effective_char (buffer
);
1001 ACCEPT_CHAR (CPP_DIV_EQ
);
1002 if (c
!= '/' && c
!= '*')
1004 if (buffer
->from_stage3
)
1009 if (skip_block_comment (pfile
))
1010 cpp_error_with_line (pfile
, pfile
->lexer_pos
.line
,
1011 pfile
->lexer_pos
.col
,
1012 "unterminated comment");
1016 if (!CPP_OPTION (pfile
, cplusplus_comments
)
1017 && !CPP_IN_SYSTEM_HEADER (pfile
))
1020 /* Warn about comments only if pedantically GNUC89, and not
1021 in system headers. */
1022 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
1023 && ! buffer
->warned_cplusplus_comments
)
1026 "C++ style comments are not allowed in ISO C89");
1028 "(this will be reported only once per input file)");
1029 buffer
->warned_cplusplus_comments
= 1;
1032 /* Skip_line_comment updates buffer->read_ahead. */
1033 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
1034 cpp_warning_with_line (pfile
, pfile
->lexer_pos
.line
,
1035 pfile
->lexer_pos
.col
,
1036 "multi-line comment");
1039 /* Skipping the comment has updated buffer->read_ahead. */
1040 if (!pfile
->state
.save_comments
)
1042 result
->flags
|= PREV_WHITE
;
1046 /* Save the comment as a token in its own right. */
1047 save_comment (pfile
, result
, comment_start
);
1048 /* Don't do MI optimisation. */
1052 if (pfile
->state
.angled_headers
)
1054 result
->type
= CPP_HEADER_NAME
;
1055 c
= '>'; /* terminator. */
1059 result
->type
= CPP_LESS
;
1060 c
= get_effective_char (buffer
);
1062 ACCEPT_CHAR (CPP_LESS_EQ
);
1065 ACCEPT_CHAR (CPP_LSHIFT
);
1066 if (get_effective_char (buffer
) == '=')
1067 ACCEPT_CHAR (CPP_LSHIFT_EQ
);
1069 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1071 ACCEPT_CHAR (CPP_MIN
);
1072 if (get_effective_char (buffer
) == '=')
1073 ACCEPT_CHAR (CPP_MIN_EQ
);
1075 else if (c
== ':' && CPP_OPTION (pfile
, digraphs
))
1077 ACCEPT_CHAR (CPP_OPEN_SQUARE
);
1078 result
->flags
|= DIGRAPH
;
1080 else if (c
== '%' && CPP_OPTION (pfile
, digraphs
))
1082 ACCEPT_CHAR (CPP_OPEN_BRACE
);
1083 result
->flags
|= DIGRAPH
;
1088 result
->type
= CPP_GREATER
;
1089 c
= get_effective_char (buffer
);
1091 ACCEPT_CHAR (CPP_GREATER_EQ
);
1094 ACCEPT_CHAR (CPP_RSHIFT
);
1095 if (get_effective_char (buffer
) == '=')
1096 ACCEPT_CHAR (CPP_RSHIFT_EQ
);
1098 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1100 ACCEPT_CHAR (CPP_MAX
);
1101 if (get_effective_char (buffer
) == '=')
1102 ACCEPT_CHAR (CPP_MAX_EQ
);
1107 lex_percent (buffer
, result
);
1108 if (result
->type
== CPP_HASH
)
1113 lex_dot (pfile
, result
);
1117 result
->type
= CPP_PLUS
;
1118 c
= get_effective_char (buffer
);
1120 ACCEPT_CHAR (CPP_PLUS_EQ
);
1122 ACCEPT_CHAR (CPP_PLUS_PLUS
);
1126 result
->type
= CPP_MINUS
;
1127 c
= get_effective_char (buffer
);
1130 ACCEPT_CHAR (CPP_DEREF
);
1131 if (CPP_OPTION (pfile
, cplusplus
)
1132 && get_effective_char (buffer
) == '*')
1133 ACCEPT_CHAR (CPP_DEREF_STAR
);
1136 ACCEPT_CHAR (CPP_MINUS_EQ
);
1138 ACCEPT_CHAR (CPP_MINUS_MINUS
);
1142 result
->type
= CPP_MULT
;
1143 if (get_effective_char (buffer
) == '=')
1144 ACCEPT_CHAR (CPP_MULT_EQ
);
1148 result
->type
= CPP_EQ
;
1149 if (get_effective_char (buffer
) == '=')
1150 ACCEPT_CHAR (CPP_EQ_EQ
);
1154 result
->type
= CPP_NOT
;
1155 if (get_effective_char (buffer
) == '=')
1156 ACCEPT_CHAR (CPP_NOT_EQ
);
1160 result
->type
= CPP_AND
;
1161 c
= get_effective_char (buffer
);
1163 ACCEPT_CHAR (CPP_AND_EQ
);
1165 ACCEPT_CHAR (CPP_AND_AND
);
1169 c
= buffer
->extra_char
; /* Can be set by error condition below. */
1172 buffer
->read_ahead
= c
;
1173 buffer
->extra_char
= EOF
;
1176 c
= get_effective_char (buffer
);
1180 ACCEPT_CHAR (CPP_PASTE
);
1184 result
->type
= CPP_HASH
;
1188 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1189 tokens within the list of arguments that would otherwise act
1190 as preprocessing directives, the behavior is undefined.
1192 This implementation will report a hard error, terminate the
1193 macro invocation, and proceed to process the directive. */
1194 if (pfile
->state
.parsing_args
)
1196 if (pfile
->state
.parsing_args
== 2)
1198 "directives may not be used inside a macro argument");
1200 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1201 buffer
->extra_char
= buffer
->read_ahead
;
1202 buffer
->read_ahead
= '#';
1203 pfile
->state
.next_bol
= 1;
1204 result
->type
= CPP_EOF
;
1206 /* Get whitespace right - newline_in_args sets it. */
1207 if (pfile
->lexer_pos
.col
== 1)
1208 result
->flags
&= ~(PREV_WHITE
| AVOID_LPASTE
);
1212 /* This is the hash introducing a directive. */
1213 if (_cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
1214 goto done_directive
; /* bol still 1. */
1215 /* This is in fact an assembler #. */
1220 result
->type
= CPP_OR
;
1221 c
= get_effective_char (buffer
);
1223 ACCEPT_CHAR (CPP_OR_EQ
);
1225 ACCEPT_CHAR (CPP_OR_OR
);
1229 result
->type
= CPP_XOR
;
1230 if (get_effective_char (buffer
) == '=')
1231 ACCEPT_CHAR (CPP_XOR_EQ
);
1235 result
->type
= CPP_COLON
;
1236 c
= get_effective_char (buffer
);
1237 if (c
== ':' && CPP_OPTION (pfile
, cplusplus
))
1238 ACCEPT_CHAR (CPP_SCOPE
);
1239 else if (c
== '>' && CPP_OPTION (pfile
, digraphs
))
1241 result
->flags
|= DIGRAPH
;
1242 ACCEPT_CHAR (CPP_CLOSE_SQUARE
);
1246 case '~': result
->type
= CPP_COMPL
; break;
1247 case ',': result
->type
= CPP_COMMA
; break;
1248 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1249 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1250 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1251 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1252 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1253 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1254 case ';': result
->type
= CPP_SEMICOLON
; break;
1256 /* @ is a punctuator in Objective C. */
1257 case '@': result
->type
= CPP_ATSIGN
; break;
1261 result
->type
= CPP_OTHER
;
1266 if (pfile
->skipping
)
1269 /* If not in a directive, this token invalidates controlling macros. */
1270 if (!pfile
->state
.in_directive
)
1271 pfile
->mi_state
= MI_FAILED
;
1274 /* An upper bound on the number of bytes needed to spell a token,
1275 including preceding whitespace. */
1277 cpp_token_len (token
)
1278 const cpp_token
*token
;
1282 switch (TOKEN_SPELL (token
))
1284 default: len
= 0; break;
1285 case SPELL_STRING
: len
= token
->val
.str
.len
; break;
1286 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
); break;
1288 /* 1 for whitespace, 4 for comment delimeters. */
1292 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1293 already contain the enough space to hold the token's spelling.
1294 Returns a pointer to the character after the last character
1297 cpp_spell_token (pfile
, token
, buffer
)
1298 cpp_reader
*pfile
; /* Would be nice to be rid of this... */
1299 const cpp_token
*token
;
1300 unsigned char *buffer
;
1302 switch (TOKEN_SPELL (token
))
1304 case SPELL_OPERATOR
:
1306 const unsigned char *spelling
;
1309 if (token
->flags
& DIGRAPH
)
1311 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1312 else if (token
->flags
& NAMED_OP
)
1315 spelling
= TOKEN_NAME (token
);
1317 while ((c
= *spelling
++) != '\0')
1324 memcpy (buffer
, NODE_NAME (token
->val
.node
), NODE_LEN (token
->val
.node
));
1325 buffer
+= NODE_LEN (token
->val
.node
);
1330 int left
, right
, tag
;
1331 switch (token
->type
)
1333 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1334 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1335 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1336 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1337 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1338 default: left
= '\0'; right
= '\0'; tag
= '\0'; break;
1340 if (tag
) *buffer
++ = tag
;
1341 if (left
) *buffer
++ = left
;
1342 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1343 buffer
+= token
->val
.str
.len
;
1344 if (right
) *buffer
++ = right
;
1349 *buffer
++ = token
->val
.c
;
1353 cpp_ice (pfile
, "Unspellable token %s", TOKEN_NAME (token
));
1360 /* Returns a token as a null-terminated string. The string is
1361 temporary, and automatically freed later. Useful for diagnostics. */
1363 cpp_token_as_text (pfile
, token
)
1365 const cpp_token
*token
;
1367 unsigned int len
= cpp_token_len (token
);
1368 unsigned char *start
= _cpp_pool_alloc (&pfile
->ident_pool
, len
), *end
;
1370 end
= cpp_spell_token (pfile
, token
, start
);
1376 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1378 cpp_type2name (type
)
1379 enum cpp_ttype type
;
1381 return (const char *) token_spellings
[type
].name
;
1384 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1385 for efficiency - to avoid double-buffering. Also, outputs a space
1386 if PREV_WHITE is flagged. */
1388 cpp_output_token (token
, fp
)
1389 const cpp_token
*token
;
1392 if (token
->flags
& PREV_WHITE
)
1395 switch (TOKEN_SPELL (token
))
1397 case SPELL_OPERATOR
:
1399 const unsigned char *spelling
;
1401 if (token
->flags
& DIGRAPH
)
1403 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1404 else if (token
->flags
& NAMED_OP
)
1407 spelling
= TOKEN_NAME (token
);
1409 ufputs (spelling
, fp
);
1415 ufputs (NODE_NAME (token
->val
.node
), fp
);
1420 int left
, right
, tag
;
1421 switch (token
->type
)
1423 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1424 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1425 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1426 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1427 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1428 default: left
= '\0'; right
= '\0'; tag
= '\0'; break;
1430 if (tag
) putc (tag
, fp
);
1431 if (left
) putc (left
, fp
);
1432 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1433 if (right
) putc (right
, fp
);
1438 putc (token
->val
.c
, fp
);
1442 /* An error, most probably. */
1447 /* Compare two tokens. */
1449 _cpp_equiv_tokens (a
, b
)
1450 const cpp_token
*a
, *b
;
1452 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1453 switch (TOKEN_SPELL (a
))
1455 default: /* Keep compiler happy. */
1456 case SPELL_OPERATOR
:
1459 return a
->val
.c
== b
->val
.c
; /* Character. */
1461 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1463 return a
->val
.node
== b
->val
.node
;
1465 return (a
->val
.str
.len
== b
->val
.str
.len
1466 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1473 /* Determine whether two tokens can be pasted together, and if so,
1474 what the resulting token is. Returns CPP_EOF if the tokens cannot
1475 be pasted, or the appropriate type for the merged token if they
1478 cpp_can_paste (pfile
, token1
, token2
, digraph
)
1480 const cpp_token
*token1
, *token2
;
1483 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1484 int cxx
= CPP_OPTION (pfile
, cplusplus
);
1486 /* Treat named operators as if they were ordinary NAMEs. */
1487 if (token1
->flags
& NAMED_OP
)
1489 if (token2
->flags
& NAMED_OP
)
1492 if ((int) a
<= (int) CPP_LAST_EQ
&& b
== CPP_EQ
)
1493 return (enum cpp_ttype
) ((int) a
+ ((int) CPP_EQ_EQ
- (int) CPP_EQ
));
1498 if (b
== a
) return CPP_RSHIFT
;
1499 if (b
== CPP_QUERY
&& cxx
) return CPP_MAX
;
1500 if (b
== CPP_GREATER_EQ
) return CPP_RSHIFT_EQ
;
1503 if (b
== a
) return CPP_LSHIFT
;
1504 if (b
== CPP_QUERY
&& cxx
) return CPP_MIN
;
1505 if (b
== CPP_LESS_EQ
) return CPP_LSHIFT_EQ
;
1506 if (CPP_OPTION (pfile
, digraphs
))
1509 {*digraph
= 1; return CPP_OPEN_SQUARE
;} /* <: digraph */
1511 {*digraph
= 1; return CPP_OPEN_BRACE
;} /* <% digraph */
1515 case CPP_PLUS
: if (b
== a
) return CPP_PLUS_PLUS
; break;
1516 case CPP_AND
: if (b
== a
) return CPP_AND_AND
; break;
1517 case CPP_OR
: if (b
== a
) return CPP_OR_OR
; break;
1520 if (b
== a
) return CPP_MINUS_MINUS
;
1521 if (b
== CPP_GREATER
) return CPP_DEREF
;
1524 if (b
== a
&& cxx
) return CPP_SCOPE
;
1525 if (b
== CPP_GREATER
&& CPP_OPTION (pfile
, digraphs
))
1526 {*digraph
= 1; return CPP_CLOSE_SQUARE
;} /* :> digraph */
1530 if (CPP_OPTION (pfile
, digraphs
))
1532 if (b
== CPP_GREATER
)
1533 {*digraph
= 1; return CPP_CLOSE_BRACE
;} /* %> digraph */
1535 {*digraph
= 1; return CPP_HASH
;} /* %: digraph */
1539 if (b
== CPP_MULT
&& cxx
) return CPP_DEREF_STAR
;
1542 if (b
== CPP_MULT
&& cxx
) return CPP_DOT_STAR
;
1543 if (b
== CPP_NUMBER
) return CPP_NUMBER
;
1547 if (b
== a
&& (token1
->flags
& DIGRAPH
) == (token2
->flags
& DIGRAPH
))
1549 {*digraph
= (token1
->flags
& DIGRAPH
); return CPP_PASTE
;}
1553 if (b
== CPP_NAME
) return CPP_NAME
;
1555 && name_p (pfile
, &token2
->val
.str
)) return CPP_NAME
;
1557 && token1
->val
.node
== pfile
->spec_nodes
.n_L
) return CPP_WCHAR
;
1559 && token1
->val
.node
== pfile
->spec_nodes
.n_L
) return CPP_WSTRING
;
1563 if (b
== CPP_NUMBER
) return CPP_NUMBER
;
1564 if (b
== CPP_NAME
) return CPP_NUMBER
;
1565 if (b
== CPP_DOT
) return CPP_NUMBER
;
1566 /* Numbers cannot have length zero, so this is safe. */
1567 if ((b
== CPP_PLUS
|| b
== CPP_MINUS
)
1568 && VALID_SIGN ('+', token1
->val
.str
.text
[token1
->val
.str
.len
- 1]))
1579 /* Returns nonzero if a space should be inserted to avoid an
1580 accidental token paste for output. For simplicity, it is
1581 conservative, and occasionally advises a space where one is not
1582 needed, e.g. "." and ".2". */
1585 cpp_avoid_paste (pfile
, token1
, token2
)
1587 const cpp_token
*token1
, *token2
;
1589 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1592 if (token1
->flags
& NAMED_OP
)
1594 if (token2
->flags
& NAMED_OP
)
1598 if (token2
->flags
& DIGRAPH
)
1599 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1600 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1601 c
= token_spellings
[b
].name
[0];
1603 /* Quickly get everything that can paste with an '='. */
1604 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1609 case CPP_GREATER
: return c
== '>' || c
== '?';
1610 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1611 case CPP_PLUS
: return c
== '+';
1612 case CPP_MINUS
: return c
== '-' || c
== '>';
1613 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1614 case CPP_MOD
: return c
== ':' || c
== '>';
1615 case CPP_AND
: return c
== '&';
1616 case CPP_OR
: return c
== '|';
1617 case CPP_COLON
: return c
== ':' || c
== '>';
1618 case CPP_DEREF
: return c
== '*';
1619 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1620 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1621 case CPP_NAME
: return ((b
== CPP_NUMBER
1622 && name_p (pfile
, &token2
->val
.str
))
1624 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1625 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1626 || c
== '.' || c
== '+' || c
== '-');
1627 case CPP_OTHER
: return (CPP_OPTION (pfile
, objc
)
1628 && token1
->val
.c
== '@'
1629 && (b
== CPP_NAME
|| b
== CPP_STRING
));
1636 /* Output all the remaining tokens on the current line, and a newline
1637 character, to FP. Leading whitespace is removed. */
1639 cpp_output_line (pfile
, fp
)
1645 cpp_get_token (pfile
, &token
);
1646 token
.flags
&= ~PREV_WHITE
;
1647 while (token
.type
!= CPP_EOF
)
1649 cpp_output_token (&token
, fp
);
1650 cpp_get_token (pfile
, &token
);
1656 /* Returns the value of a hexadecimal digit. */
1661 if (c
>= 'a' && c
<= 'f')
1662 return c
- 'a' + 10;
1663 if (c
>= 'A' && c
<= 'F')
1664 return c
- 'A' + 10;
1665 if (c
>= '0' && c
<= '9')
1670 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1671 failure if cpplib is not parsing C++ or C99. Such failure is
1672 silent, and no variables are updated. Otherwise returns 0, and
1673 warns if -Wtraditional.
1675 [lex.charset]: The character designated by the universal character
1676 name \UNNNNNNNN is that character whose character short name in
1677 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1678 universal character name \uNNNN is that character whose character
1679 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1680 for a universal character name is less than 0x20 or in the range
1681 0x7F-0x9F (inclusive), or if the universal character name
1682 designates a character in the basic source character set, then the
1683 program is ill-formed.
1685 We assume that wchar_t is Unicode, so we don't need to do any
1686 mapping. Is this ever wrong?
1688 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1689 LIMIT is the end of the string or charconst. PSTR is updated to
1690 point after the UCS on return, and the UCS is written into PC. */
1693 maybe_read_ucs (pfile
, pstr
, limit
, pc
)
1695 const unsigned char **pstr
;
1696 const unsigned char *limit
;
1699 const unsigned char *p
= *pstr
;
1700 unsigned int code
= 0;
1701 unsigned int c
= *pc
, length
;
1703 /* Only attempt to interpret a UCS for C++ and C99. */
1704 if (! (CPP_OPTION (pfile
, cplusplus
) || CPP_OPTION (pfile
, c99
)))
1707 if (CPP_WTRADITIONAL (pfile
))
1708 cpp_warning (pfile
, "the meaning of '\\%c' varies with -traditional", c
);
1710 length
= (c
== 'u' ? 4: 8);
1712 if ((size_t) (limit
- p
) < length
)
1714 cpp_error (pfile
, "incomplete universal-character-name");
1715 /* Skip to the end to avoid more diagnostics. */
1720 for (; length
; length
--, p
++)
1724 code
= (code
<< 4) + hex_digit_value (c
);
1728 "non-hex digit '%c' in universal-character-name", c
);
1729 /* We shouldn't skip in case there are multibyte chars. */
1735 #ifdef TARGET_EBCDIC
1736 cpp_error (pfile
, "universal-character-name on EBCDIC target");
1737 code
= 0x3f; /* EBCDIC invalid character */
1739 /* True extended characters are OK. */
1741 && !(code
& 0x80000000)
1742 && !(code
>= 0xD800 && code
<= 0xDFFF))
1744 /* The standard permits $, @ and ` to be specified as UCNs. We use
1745 hex escapes so that this also works with EBCDIC hosts. */
1746 else if (code
== 0x24 || code
== 0x40 || code
== 0x60)
1748 /* Don't give another error if one occurred above. */
1749 else if (length
== 0)
1750 cpp_error (pfile
, "universal-character-name out of range");
1758 /* Interpret an escape sequence, and return its value. PSTR points to
1759 the input pointer, which is just after the backslash. LIMIT is how
1760 much text we have. MASK is a bitmask for the precision for the
1761 destination type (char or wchar_t). TRADITIONAL, if true, does not
1762 interpret escapes that did not exist in traditional C.
1764 Handles all relevant diagnostics. */
1767 cpp_parse_escape (pfile
, pstr
, limit
, mask
, traditional
)
1769 const unsigned char **pstr
;
1770 const unsigned char *limit
;
1771 unsigned HOST_WIDE_INT mask
;
1775 const unsigned char *str
= *pstr
;
1776 unsigned int c
= *str
++;
1780 case '\\': case '\'': case '"': case '?': break;
1781 case 'b': c
= TARGET_BS
; break;
1782 case 'f': c
= TARGET_FF
; break;
1783 case 'n': c
= TARGET_NEWLINE
; break;
1784 case 'r': c
= TARGET_CR
; break;
1785 case 't': c
= TARGET_TAB
; break;
1786 case 'v': c
= TARGET_VT
; break;
1788 case '(': case '{': case '[': case '%':
1789 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1790 '\%' is used to prevent SCCS from getting confused. */
1791 unknown
= CPP_PEDANTIC (pfile
);
1795 if (CPP_WTRADITIONAL (pfile
))
1796 cpp_warning (pfile
, "the meaning of '\\a' varies with -traditional");
1802 if (CPP_PEDANTIC (pfile
))
1803 cpp_pedwarn (pfile
, "non-ISO-standard escape sequence, '\\%c'", c
);
1808 unknown
= maybe_read_ucs (pfile
, &str
, limit
, &c
);
1812 if (CPP_WTRADITIONAL (pfile
))
1813 cpp_warning (pfile
, "the meaning of '\\x' varies with -traditional");
1817 unsigned int i
= 0, overflow
= 0;
1818 int digits_found
= 0;
1826 overflow
|= i
^ (i
<< 4 >> 4);
1827 i
= (i
<< 4) + hex_digit_value (c
);
1832 cpp_error (pfile
, "\\x used with no following hex digits");
1834 if (overflow
| (i
!= (i
& mask
)))
1836 cpp_pedwarn (pfile
, "hex escape sequence out of range");
1843 case '0': case '1': case '2': case '3':
1844 case '4': case '5': case '6': case '7':
1846 unsigned int i
= c
- '0';
1849 while (str
< limit
&& ++count
< 3)
1852 if (c
< '0' || c
> '7')
1855 i
= (i
<< 3) + c
- '0';
1858 if (i
!= (i
& mask
))
1860 cpp_pedwarn (pfile
, "octal escape sequence out of range");
1875 cpp_pedwarn (pfile
, "unknown escape sequence '\\%c'", c
);
1877 cpp_pedwarn (pfile
, "unknown escape sequence: '\\%03o'", c
);
1881 cpp_pedwarn (pfile
, "escape sequence out of range for character");
1887 #ifndef MAX_CHAR_TYPE_SIZE
1888 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1891 #ifndef MAX_WCHAR_TYPE_SIZE
1892 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1895 /* Interpret a (possibly wide) character constant in TOKEN.
1896 WARN_MULTI warns about multi-character charconsts, if not
1897 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1898 that did not exist in traditional C. PCHARS_SEEN points to a
1899 variable that is filled in with the number of characters seen. */
1901 cpp_interpret_charconst (pfile
, token
, warn_multi
, traditional
, pchars_seen
)
1903 const cpp_token
*token
;
1906 unsigned int *pchars_seen
;
1908 const unsigned char *str
= token
->val
.str
.text
;
1909 const unsigned char *limit
= str
+ token
->val
.str
.len
;
1910 unsigned int chars_seen
= 0;
1911 unsigned int width
, max_chars
, c
;
1912 unsigned HOST_WIDE_INT mask
;
1913 HOST_WIDE_INT result
= 0;
1915 #ifdef MULTIBYTE_CHARS
1916 (void) local_mbtowc (NULL
, NULL
, 0);
1919 /* Width in bits. */
1920 if (token
->type
== CPP_CHAR
)
1921 width
= MAX_CHAR_TYPE_SIZE
;
1923 width
= MAX_WCHAR_TYPE_SIZE
;
1925 if (width
< HOST_BITS_PER_WIDE_INT
)
1926 mask
= ((unsigned HOST_WIDE_INT
) 1 << width
) - 1;
1929 max_chars
= HOST_BITS_PER_WIDE_INT
/ width
;
1933 #ifdef MULTIBYTE_CHARS
1937 char_len
= local_mbtowc (&wc
, str
, limit
- str
);
1940 cpp_warning (pfile
, "ignoring invalid multibyte character");
1953 c
= cpp_parse_escape (pfile
, &str
, limit
, mask
, traditional
);
1955 #ifdef MAP_CHARACTER
1957 c
= MAP_CHARACTER (c
);
1960 /* Merge character into result; ignore excess chars. */
1961 if (++chars_seen
<= max_chars
)
1963 if (width
< HOST_BITS_PER_WIDE_INT
)
1964 result
= (result
<< width
) | (c
& mask
);
1970 if (chars_seen
== 0)
1971 cpp_error (pfile
, "empty character constant");
1972 else if (chars_seen
> max_chars
)
1974 chars_seen
= max_chars
;
1975 cpp_warning (pfile
, "character constant too long");
1977 else if (chars_seen
> 1 && !traditional
&& warn_multi
)
1978 cpp_warning (pfile
, "multi-character character constant");
1980 /* If char type is signed, sign-extend the constant. The
1981 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1982 if (token
->type
== CPP_CHAR
&& chars_seen
)
1984 unsigned int nbits
= chars_seen
* width
;
1985 unsigned int mask
= (unsigned int) ~0 >> (HOST_BITS_PER_INT
- nbits
);
1987 if (pfile
->spec_nodes
.n__CHAR_UNSIGNED__
->type
== NT_MACRO
1988 || ((result
>> (nbits
- 1)) & 1) == 0)
1994 *pchars_seen
= chars_seen
;
2010 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2013 chunk_suitable (pool
, chunk
, size
)
2018 /* Being at least twice SIZE means we can use memcpy in
2019 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2021 return (chunk
&& pool
->locked
!= chunk
2022 && (unsigned int) (chunk
->limit
- chunk
->base
) >= size
* 2);
2025 /* Returns the end of the new pool. PTR points to a char in the old
2026 pool, and is updated to point to the same char in the new pool. */
2028 _cpp_next_chunk (pool
, len
, ptr
)
2031 unsigned char **ptr
;
2033 cpp_chunk
*chunk
= pool
->cur
->next
;
2035 /* LEN is the minimum size we want in the new pool. */
2036 len
+= POOL_ROOM (pool
);
2037 if (! chunk_suitable (pool
, chunk
, len
))
2039 chunk
= new_chunk (POOL_SIZE (pool
) * 2 + len
);
2041 chunk
->next
= pool
->cur
->next
;
2042 pool
->cur
->next
= chunk
;
2045 /* Update the pointer before changing chunk's front. */
2047 *ptr
+= chunk
->base
- POOL_FRONT (pool
);
2049 memcpy (chunk
->base
, POOL_FRONT (pool
), POOL_ROOM (pool
));
2050 chunk
->front
= chunk
->base
;
2053 return POOL_LIMIT (pool
);
2060 unsigned char *base
;
2063 size
= POOL_ALIGN (size
, DEFAULT_ALIGNMENT
);
2064 base
= (unsigned char *) xmalloc (size
+ sizeof (cpp_chunk
));
2065 /* Put the chunk descriptor at the end. Then chunk overruns will
2066 cause obvious chaos. */
2067 result
= (cpp_chunk
*) (base
+ size
);
2068 result
->base
= base
;
2069 result
->front
= base
;
2070 result
->limit
= base
+ size
;
2077 _cpp_init_pool (pool
, size
, align
, temp
)
2079 unsigned int size
, align
, temp
;
2082 align
= DEFAULT_ALIGNMENT
;
2083 if (align
& (align
- 1))
2085 pool
->align
= align
;
2086 pool
->cur
= new_chunk (size
);
2090 pool
->cur
->next
= pool
->cur
;
2094 _cpp_lock_pool (pool
)
2097 if (pool
->locks
++ == 0)
2098 pool
->locked
= pool
->cur
;
2102 _cpp_unlock_pool (pool
)
2105 if (--pool
->locks
== 0)
2110 _cpp_free_pool (pool
)
2113 cpp_chunk
*chunk
= pool
->cur
, *next
;
2121 while (chunk
&& chunk
!= pool
->cur
);
2124 /* Reserve LEN bytes from a memory pool. */
2126 _cpp_pool_reserve (pool
, len
)
2130 len
= POOL_ALIGN (len
, pool
->align
);
2131 if (len
> (unsigned int) POOL_ROOM (pool
))
2132 _cpp_next_chunk (pool
, len
, 0);
2134 return POOL_FRONT (pool
);
2137 /* Allocate LEN bytes from a memory pool. */
2139 _cpp_pool_alloc (pool
, len
)
2143 unsigned char *result
= _cpp_pool_reserve (pool
, len
);
2145 POOL_COMMIT (pool
, len
);