1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category
;
68 const unsigned char *name
;
71 const unsigned char *digraph_spellings
[] = {U
"%:", U
"%:%:", U
"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings
[N_TTYPES
] = {TTYPE_TABLE
};
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline
PARAMS ((cpp_buffer
*, cppchar_t
));
84 static cppchar_t skip_escaped_newlines
PARAMS ((cpp_buffer
*, cppchar_t
));
85 static cppchar_t get_effective_char
PARAMS ((cpp_buffer
*));
87 static int skip_block_comment
PARAMS ((cpp_reader
*));
88 static int skip_line_comment
PARAMS ((cpp_reader
*));
89 static void adjust_column
PARAMS ((cpp_reader
*));
90 static void skip_whitespace
PARAMS ((cpp_reader
*, cppchar_t
));
91 static cpp_hashnode
*parse_identifier
PARAMS ((cpp_reader
*, cppchar_t
));
92 static void parse_number
PARAMS ((cpp_reader
*, cpp_string
*, cppchar_t
, int));
93 static int unescaped_terminator_p
PARAMS ((cpp_reader
*, const U_CHAR
*));
94 static void parse_string
PARAMS ((cpp_reader
*, cpp_token
*, cppchar_t
));
95 static void unterminated
PARAMS ((cpp_reader
*, int));
96 static int trigraph_ok
PARAMS ((cpp_reader
*, cppchar_t
));
97 static void save_comment
PARAMS ((cpp_reader
*, cpp_token
*, const U_CHAR
*));
98 static void lex_percent
PARAMS ((cpp_buffer
*, cpp_token
*));
99 static void lex_dot
PARAMS ((cpp_reader
*, cpp_token
*));
100 static int name_p
PARAMS ((cpp_reader
*, const cpp_string
*));
101 static int maybe_read_ucs
PARAMS ((cpp_reader
*, const unsigned char **,
102 const unsigned char *, unsigned int *));
104 static cpp_chunk
*new_chunk
PARAMS ((unsigned int));
105 static int chunk_suitable
PARAMS ((cpp_pool
*, cpp_chunk
*, unsigned int));
106 static unsigned int hex_digit_value
PARAMS ((unsigned int));
110 Compares, the token TOKEN to the NUL-terminated string STRING.
111 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
114 cpp_ideq (token
, string
)
115 const cpp_token
*token
;
118 if (token
->type
!= CPP_NAME
)
121 return !ustrcmp (NODE_NAME (token
->val
.node
), (const U_CHAR
*) string
);
124 /* Call when meeting a newline. Returns the character after the newline
125 (or carriage-return newline combination), or EOF. */
127 handle_newline (buffer
, newline_char
)
129 cppchar_t newline_char
;
131 cppchar_t next
= EOF
;
133 buffer
->col_adjust
= 0;
135 buffer
->line_base
= buffer
->cur
;
137 /* Handle CR-LF and LF-CR combinations, get the next character. */
138 if (buffer
->cur
< buffer
->rlimit
)
140 next
= *buffer
->cur
++;
141 if (next
+ newline_char
== '\r' + '\n')
143 buffer
->line_base
= buffer
->cur
;
144 if (buffer
->cur
< buffer
->rlimit
)
145 next
= *buffer
->cur
++;
151 buffer
->read_ahead
= next
;
155 /* Subroutine of skip_escaped_newlines; called when a trigraph is
156 encountered. It warns if necessary, and returns true if the
157 trigraph should be honoured. FROM_CHAR is the third character of a
158 trigraph, and presumed to be the previous character for position
161 trigraph_ok (pfile
, from_char
)
165 int accept
= CPP_OPTION (pfile
, trigraphs
);
167 /* Don't warn about trigraphs in comments. */
168 if (CPP_OPTION (pfile
, warn_trigraphs
) && !pfile
->state
.lexing_comment
)
170 cpp_buffer
*buffer
= pfile
->buffer
;
172 cpp_warning_with_line (pfile
, buffer
->lineno
, CPP_BUF_COL (buffer
) - 2,
173 "trigraph ??%c converted to %c",
175 (int) _cpp_trigraph_map
[from_char
]);
176 else if (buffer
->cur
!= buffer
->last_Wtrigraphs
)
178 buffer
->last_Wtrigraphs
= buffer
->cur
;
179 cpp_warning_with_line (pfile
, buffer
->lineno
,
180 CPP_BUF_COL (buffer
) - 2,
181 "trigraph ??%c ignored", (int) from_char
);
188 /* Assumes local variables buffer and result. */
189 #define ACCEPT_CHAR(t) \
190 do { result->type = t; buffer->read_ahead = EOF; } while (0)
192 /* When we move to multibyte character sets, add to these something
193 that saves and restores the state of the multibyte conversion
194 library. This probably involves saving and restoring a "cookie".
195 In the case of glibc it is an 8-byte structure, so is not a high
196 overhead operation. In any case, it's out of the fast path. */
197 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
198 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
200 /* Skips any escaped newlines introduced by NEXT, which is either a
201 '?' or a '\\'. Returns the next character, which will also have
202 been placed in buffer->read_ahead. This routine performs
203 preprocessing stages 1 and 2 of the ISO C standard. */
205 skip_escaped_newlines (buffer
, next
)
209 /* Only do this if we apply stages 1 and 2. */
210 if (!buffer
->from_stage3
)
213 const unsigned char *saved_cur
;
218 if (buffer
->cur
== buffer
->rlimit
)
224 next1
= *buffer
->cur
++;
225 if (next1
!= '?' || buffer
->cur
== buffer
->rlimit
)
231 next1
= *buffer
->cur
++;
232 if (!_cpp_trigraph_map
[next1
]
233 || !trigraph_ok (buffer
->pfile
, next1
))
239 /* We have a full trigraph here. */
240 next
= _cpp_trigraph_map
[next1
];
241 if (next
!= '\\' || buffer
->cur
== buffer
->rlimit
)
246 /* We have a backslash, and room for at least one more character. */
250 next1
= *buffer
->cur
++;
251 if (!is_nvspace (next1
))
255 while (buffer
->cur
< buffer
->rlimit
);
257 if (!is_vspace (next1
))
263 if (space
&& !buffer
->pfile
->state
.lexing_comment
)
264 cpp_warning (buffer
->pfile
,
265 "backslash and newline separated by space");
267 next
= handle_newline (buffer
, next1
);
269 cpp_pedwarn (buffer
->pfile
, "backslash-newline at end of file");
271 while (next
== '\\' || next
== '?');
274 buffer
->read_ahead
= next
;
278 /* Obtain the next character, after trigraph conversion and skipping
279 an arbitrary string of escaped newlines. The common case of no
280 trigraphs or escaped newlines falls through quickly. */
282 get_effective_char (buffer
)
285 cppchar_t next
= EOF
;
287 if (buffer
->cur
< buffer
->rlimit
)
289 next
= *buffer
->cur
++;
291 /* '?' can introduce trigraphs (and therefore backslash); '\\'
292 can introduce escaped newlines, which we want to skip, or
293 UCNs, which, depending upon lexer state, we will handle in
295 if (next
== '?' || next
== '\\')
296 next
= skip_escaped_newlines (buffer
, next
);
299 buffer
->read_ahead
= next
;
303 /* Skip a C-style block comment. We find the end of the comment by
304 seeing if an asterisk is before every '/' we encounter. Returns
305 non-zero if comment terminated by EOF, zero otherwise. */
307 skip_block_comment (pfile
)
310 cpp_buffer
*buffer
= pfile
->buffer
;
311 cppchar_t c
= EOF
, prevc
= EOF
;
313 pfile
->state
.lexing_comment
= 1;
314 while (buffer
->cur
!= buffer
->rlimit
)
316 prevc
= c
, c
= *buffer
->cur
++;
319 /* FIXME: For speed, create a new character class of characters
320 of interest inside block comments. */
321 if (c
== '?' || c
== '\\')
322 c
= skip_escaped_newlines (buffer
, c
);
324 /* People like decorating comments with '*', so check for '/'
325 instead for efficiency. */
331 /* Warn about potential nested comments, but not if the '/'
332 comes immediately before the true comment delimeter.
333 Don't bother to get it right across escaped newlines. */
334 if (CPP_OPTION (pfile
, warn_comments
)
335 && buffer
->cur
!= buffer
->rlimit
)
337 prevc
= c
, c
= *buffer
->cur
++;
338 if (c
== '*' && buffer
->cur
!= buffer
->rlimit
)
340 prevc
= c
, c
= *buffer
->cur
++;
342 cpp_warning_with_line (pfile
, CPP_BUF_LINE (buffer
),
343 CPP_BUF_COL (buffer
),
344 "\"/*\" within comment");
349 else if (is_vspace (c
))
351 prevc
= c
, c
= handle_newline (buffer
, c
);
355 adjust_column (pfile
);
358 pfile
->state
.lexing_comment
= 0;
359 buffer
->read_ahead
= EOF
;
360 return c
!= '/' || prevc
!= '*';
363 /* Skip a C++ line comment. Handles escaped newlines. Returns
364 non-zero if a multiline comment. The following new line, if any,
365 is left in buffer->read_ahead. */
367 skip_line_comment (pfile
)
370 cpp_buffer
*buffer
= pfile
->buffer
;
371 unsigned int orig_lineno
= buffer
->lineno
;
374 pfile
->state
.lexing_comment
= 1;
378 if (buffer
->cur
== buffer
->rlimit
)
382 if (c
== '?' || c
== '\\')
383 c
= skip_escaped_newlines (buffer
, c
);
385 while (!is_vspace (c
));
387 pfile
->state
.lexing_comment
= 0;
388 buffer
->read_ahead
= c
; /* Leave any newline for caller. */
389 return orig_lineno
!= buffer
->lineno
;
392 /* pfile->buffer->cur is one beyond the \t character. Update
393 col_adjust so we track the column correctly. */
395 adjust_column (pfile
)
398 cpp_buffer
*buffer
= pfile
->buffer
;
399 unsigned int col
= CPP_BUF_COL (buffer
) - 1; /* Zero-based column. */
401 /* Round it up to multiple of the tabstop, but subtract 1 since the
402 tab itself occupies a character position. */
403 buffer
->col_adjust
+= (CPP_OPTION (pfile
, tabstop
)
404 - col
% CPP_OPTION (pfile
, tabstop
)) - 1;
407 /* Skips whitespace, saving the next non-whitespace character.
408 Adjusts pfile->col_adjust to account for tabs. Without this,
409 tokens might be assigned an incorrect column. */
411 skip_whitespace (pfile
, c
)
415 cpp_buffer
*buffer
= pfile
->buffer
;
416 unsigned int warned
= 0;
420 /* Horizontal space always OK. */
424 adjust_column (pfile
);
425 /* Just \f \v or \0 left. */
430 cpp_warning (pfile
, "null character(s) ignored");
434 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
435 cpp_pedwarn_with_line (pfile
, CPP_BUF_LINE (buffer
),
436 CPP_BUF_COL (buffer
),
437 "%s in preprocessing directive",
438 c
== '\f' ? "form feed" : "vertical tab");
441 if (buffer
->cur
== buffer
->rlimit
)
445 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
446 while (is_nvspace (c
));
448 /* Remember the next character. */
449 buffer
->read_ahead
= c
;
452 /* See if the characters of a number token are valid in a name (no
455 name_p (pfile
, string
)
457 const cpp_string
*string
;
461 for (i
= 0; i
< string
->len
; i
++)
462 if (!is_idchar (string
->text
[i
]))
468 /* Parse an identifier, skipping embedded backslash-newlines.
469 Calculate the hash value of the token while parsing, for improved
470 performance. The hashing algorithm *must* match cpp_lookup(). */
472 static cpp_hashnode
*
473 parse_identifier (pfile
, c
)
477 cpp_hashnode
*result
;
478 cpp_buffer
*buffer
= pfile
->buffer
;
479 unsigned int saw_dollar
= 0, len
;
480 struct obstack
*stack
= &pfile
->hash_table
->stack
;
486 obstack_1grow (stack
, c
);
492 if (buffer
->cur
== buffer
->rlimit
)
497 while (is_idchar (c
));
499 /* Potential escaped newline? */
500 if (c
!= '?' && c
!= '\\')
502 c
= skip_escaped_newlines (buffer
, c
);
504 while (is_idchar (c
));
506 /* Remember the next character. */
507 buffer
->read_ahead
= c
;
509 /* $ is not a identifier character in the standard, but is commonly
510 accepted as an extension. Don't warn about it in skipped
511 conditional blocks. */
512 if (saw_dollar
&& CPP_PEDANTIC (pfile
) && ! pfile
->skipping
)
513 cpp_pedwarn (pfile
, "'$' character(s) in identifier");
515 /* Identifiers are null-terminated. */
516 len
= obstack_object_size (stack
);
517 obstack_1grow (stack
, '\0');
519 /* This routine commits the memory if necessary. */
520 result
= (cpp_hashnode
*)
521 ht_lookup (pfile
->hash_table
, obstack_finish (stack
), len
, HT_ALLOCED
);
523 /* Some identifiers require diagnostics when lexed. */
524 if (result
->flags
& NODE_DIAGNOSTIC
&& !pfile
->skipping
)
526 /* It is allowed to poison the same identifier twice. */
527 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
528 cpp_error (pfile
, "attempt to use poisoned \"%s\"",
531 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
532 replacement list of a variadic macro. */
533 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
534 && !pfile
->state
.va_args_ok
)
535 cpp_pedwarn (pfile
, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
541 /* Parse a number, skipping embedded backslash-newlines. */
543 parse_number (pfile
, number
, c
, leading_period
)
549 cpp_buffer
*buffer
= pfile
->buffer
;
550 cpp_pool
*pool
= &pfile
->ident_pool
;
551 unsigned char *dest
, *limit
;
553 dest
= POOL_FRONT (pool
);
554 limit
= POOL_LIMIT (pool
);
556 /* Place a leading period. */
560 limit
= _cpp_next_chunk (pool
, 0, &dest
);
568 /* Need room for terminating null. */
569 if (dest
+ 1 >= limit
)
570 limit
= _cpp_next_chunk (pool
, 0, &dest
);
574 if (buffer
->cur
== buffer
->rlimit
)
579 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
581 /* Potential escaped newline? */
582 if (c
!= '?' && c
!= '\\')
584 c
= skip_escaped_newlines (buffer
, c
);
586 while (is_numchar (c
) || c
== '.' || VALID_SIGN (c
, dest
[-1]));
588 /* Remember the next character. */
589 buffer
->read_ahead
= c
;
591 /* Null-terminate the number. */
594 number
->text
= POOL_FRONT (pool
);
595 number
->len
= dest
- number
->text
;
596 POOL_COMMIT (pool
, number
->len
+ 1);
599 /* Subroutine of parse_string. Emits error for unterminated strings. */
601 unterminated (pfile
, term
)
605 cpp_error (pfile
, "missing terminating %c character", term
);
607 if (term
== '\"' && pfile
->mlstring_pos
.line
608 && pfile
->mlstring_pos
.line
!= pfile
->lexer_pos
.line
)
610 cpp_error_with_line (pfile
, pfile
->mlstring_pos
.line
,
611 pfile
->mlstring_pos
.col
,
612 "possible start of unterminated string literal");
613 pfile
->mlstring_pos
.line
= 0;
617 /* Subroutine of parse_string. */
619 unescaped_terminator_p (pfile
, dest
)
621 const unsigned char *dest
;
623 const unsigned char *start
, *temp
;
625 /* In #include-style directives, terminators are not escapeable. */
626 if (pfile
->state
.angled_headers
)
629 start
= POOL_FRONT (&pfile
->ident_pool
);
631 /* An odd number of consecutive backslashes represents an escaped
633 for (temp
= dest
; temp
> start
&& temp
[-1] == '\\'; temp
--)
636 return ((dest
- temp
) & 1) == 0;
639 /* Parses a string, character constant, or angle-bracketed header file
640 name. Handles embedded trigraphs and escaped newlines. The stored
641 string is guaranteed NUL-terminated, but it is not guaranteed that
642 this is the first NUL since embedded NULs are preserved.
644 Multi-line strings are allowed, but they are deprecated. */
646 parse_string (pfile
, token
, terminator
)
649 cppchar_t terminator
;
651 cpp_buffer
*buffer
= pfile
->buffer
;
652 cpp_pool
*pool
= &pfile
->ident_pool
;
653 unsigned char *dest
, *limit
;
655 unsigned int nulls
= 0;
657 dest
= POOL_FRONT (pool
);
658 limit
= POOL_LIMIT (pool
);
662 if (buffer
->cur
== buffer
->rlimit
)
668 /* We need space for the terminating NUL. */
670 limit
= _cpp_next_chunk (pool
, 0, &dest
);
674 unterminated (pfile
, terminator
);
678 /* Handle trigraphs, escaped newlines etc. */
679 if (c
== '?' || c
== '\\')
680 c
= skip_escaped_newlines (buffer
, c
);
682 if (c
== terminator
&& unescaped_terminator_p (pfile
, dest
))
687 else if (is_vspace (c
))
689 /* In assembly language, silently terminate string and
690 character literals at end of line. This is a kludge
691 around not knowing where comments are. */
692 if (CPP_OPTION (pfile
, lang
) == CLK_ASM
&& terminator
!= '>')
695 /* Character constants and header names may not extend over
696 multiple lines. In Standard C, neither may strings.
697 Unfortunately, we accept multiline strings as an
698 extension, except in #include family directives. */
699 if (terminator
!= '"' || pfile
->state
.angled_headers
)
701 unterminated (pfile
, terminator
);
705 cpp_pedwarn (pfile
, "multi-line string literals are deprecated");
706 if (pfile
->mlstring_pos
.line
== 0)
707 pfile
->mlstring_pos
= pfile
->lexer_pos
;
709 c
= handle_newline (buffer
, c
);
716 cpp_warning (pfile
, "null character(s) preserved in literal");
722 /* Remember the next character. */
723 buffer
->read_ahead
= c
;
726 token
->val
.str
.text
= POOL_FRONT (pool
);
727 token
->val
.str
.len
= dest
- token
->val
.str
.text
;
728 POOL_COMMIT (pool
, token
->val
.str
.len
+ 1);
731 /* The stored comment includes the comment start and any terminator. */
733 save_comment (pfile
, token
, from
)
736 const unsigned char *from
;
738 unsigned char *buffer
;
741 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
742 /* C++ comments probably (not definitely) have moved past a new
743 line, which we don't want to save in the comment. */
744 if (pfile
->buffer
->read_ahead
!= EOF
)
746 buffer
= _cpp_pool_alloc (&pfile
->ident_pool
, len
);
748 token
->type
= CPP_COMMENT
;
749 token
->val
.str
.len
= len
;
750 token
->val
.str
.text
= buffer
;
753 memcpy (buffer
+ 1, from
, len
- 1);
756 /* Subroutine of lex_token to handle '%'. A little tricky, since we
757 want to avoid stepping back when lexing %:%X. */
759 lex_percent (buffer
, result
)
765 result
->type
= CPP_MOD
;
766 /* Parsing %:%X could leave an extra character. */
767 if (buffer
->extra_char
== EOF
)
768 c
= get_effective_char (buffer
);
771 c
= buffer
->read_ahead
= buffer
->extra_char
;
772 buffer
->extra_char
= EOF
;
776 ACCEPT_CHAR (CPP_MOD_EQ
);
777 else if (CPP_OPTION (buffer
->pfile
, digraphs
))
781 result
->flags
|= DIGRAPH
;
782 ACCEPT_CHAR (CPP_HASH
);
783 if (get_effective_char (buffer
) == '%')
785 buffer
->extra_char
= get_effective_char (buffer
);
786 if (buffer
->extra_char
== ':')
788 buffer
->extra_char
= EOF
;
789 ACCEPT_CHAR (CPP_PASTE
);
792 /* We'll catch the extra_char when we're called back. */
793 buffer
->read_ahead
= '%';
798 result
->flags
|= DIGRAPH
;
799 ACCEPT_CHAR (CPP_CLOSE_BRACE
);
804 /* Subroutine of lex_token to handle '.'. This is tricky, since we
805 want to avoid stepping back when lexing '...' or '.123'. In the
806 latter case we should also set a flag for parse_number. */
808 lex_dot (pfile
, result
)
812 cpp_buffer
*buffer
= pfile
->buffer
;
815 /* Parsing ..X could leave an extra character. */
816 if (buffer
->extra_char
== EOF
)
817 c
= get_effective_char (buffer
);
820 c
= buffer
->read_ahead
= buffer
->extra_char
;
821 buffer
->extra_char
= EOF
;
824 /* All known character sets have 0...9 contiguous. */
825 if (c
>= '0' && c
<= '9')
827 result
->type
= CPP_NUMBER
;
828 parse_number (pfile
, &result
->val
.str
, c
, 1);
832 result
->type
= CPP_DOT
;
835 buffer
->extra_char
= get_effective_char (buffer
);
836 if (buffer
->extra_char
== '.')
838 buffer
->extra_char
= EOF
;
839 ACCEPT_CHAR (CPP_ELLIPSIS
);
842 /* We'll catch the extra_char when we're called back. */
843 buffer
->read_ahead
= '.';
845 else if (c
== '*' && CPP_OPTION (pfile
, cplusplus
))
846 ACCEPT_CHAR (CPP_DOT_STAR
);
851 _cpp_lex_token (pfile
, result
)
857 const unsigned char *comment_start
;
861 bol
= pfile
->state
.next_bol
;
863 buffer
= pfile
->buffer
;
864 pfile
->state
.next_bol
= 0;
865 result
->flags
= buffer
->saved_flags
;
866 buffer
->saved_flags
= 0;
868 pfile
->lexer_pos
.line
= buffer
->lineno
;
870 pfile
->lexer_pos
.col
= CPP_BUF_COLUMN (buffer
, buffer
->cur
);
872 c
= buffer
->read_ahead
;
873 if (c
== EOF
&& buffer
->cur
< buffer
->rlimit
)
876 pfile
->lexer_pos
.col
++;
880 buffer
->read_ahead
= EOF
;
884 /* Non-empty files should end in a newline. Checking "bol" too
885 prevents multiple warnings when hitting the EOF more than
886 once, like in a directive. Don't warn for command line and
888 if (pfile
->lexer_pos
.col
!= 0 && !bol
&& !buffer
->from_stage3
)
889 cpp_pedwarn (pfile
, "no newline at end of file");
890 pfile
->state
.next_bol
= 1;
891 pfile
->skipping
= 0; /* In case missing #endif. */
892 result
->type
= CPP_EOF
;
893 /* Don't do MI optimisation. */
896 case ' ': case '\t': case '\f': case '\v': case '\0':
897 skip_whitespace (pfile
, c
);
898 result
->flags
|= PREV_WHITE
;
901 case '\n': case '\r':
902 if (!pfile
->state
.in_directive
)
904 handle_newline (buffer
, c
);
906 pfile
->lexer_pos
.output_line
= buffer
->lineno
;
907 /* This is a new line, so clear any white space flag.
908 Newlines in arguments are white space (6.10.3.10);
909 parse_arg takes care of that. */
910 result
->flags
&= ~(PREV_WHITE
| AVOID_LPASTE
);
914 /* Don't let directives spill over to the next line. */
915 buffer
->read_ahead
= c
;
916 pfile
->state
.next_bol
= 1;
917 result
->type
= CPP_EOF
;
918 /* Don't break; pfile->skipping might be true. */
923 /* These could start an escaped newline, or '?' a trigraph. Let
924 skip_escaped_newlines do all the work. */
926 unsigned int lineno
= buffer
->lineno
;
928 c
= skip_escaped_newlines (buffer
, c
);
929 if (lineno
!= buffer
->lineno
)
930 /* We had at least one escaped newline of some sort, and the
931 next character is in buffer->read_ahead. Update the
932 token's line and column. */
935 /* We are either the original '?' or '\\', or a trigraph. */
936 result
->type
= CPP_QUERY
;
937 buffer
->read_ahead
= EOF
;
945 case '0': case '1': case '2': case '3': case '4':
946 case '5': case '6': case '7': case '8': case '9':
947 result
->type
= CPP_NUMBER
;
948 parse_number (pfile
, &result
->val
.str
, c
, 0);
952 if (!CPP_OPTION (pfile
, dollars_in_ident
))
954 /* Fall through... */
957 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
958 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
959 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
960 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
962 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
963 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
964 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
965 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
967 result
->type
= CPP_NAME
;
968 result
->val
.node
= parse_identifier (pfile
, c
);
970 /* 'L' may introduce wide characters or strings. */
971 if (result
->val
.node
== pfile
->spec_nodes
.n_L
)
973 c
= buffer
->read_ahead
; /* For make_string. */
974 if (c
== '\'' || c
== '"')
976 ACCEPT_CHAR (c
== '"' ? CPP_WSTRING
: CPP_WCHAR
);
980 /* Convert named operators to their proper types. */
981 else if (result
->val
.node
->flags
& NODE_OPERATOR
)
983 result
->flags
|= NAMED_OP
;
984 result
->type
= result
->val
.node
->value
.operator;
990 result
->type
= c
== '"' ? CPP_STRING
: CPP_CHAR
;
992 parse_string (pfile
, result
, c
);
996 /* A potential block or line comment. */
997 comment_start
= buffer
->cur
;
998 result
->type
= CPP_DIV
;
999 c
= get_effective_char (buffer
);
1001 ACCEPT_CHAR (CPP_DIV_EQ
);
1002 if (c
!= '/' && c
!= '*')
1007 if (skip_block_comment (pfile
))
1008 cpp_error_with_line (pfile
, pfile
->lexer_pos
.line
,
1009 pfile
->lexer_pos
.col
,
1010 "unterminated comment");
1014 if (!CPP_OPTION (pfile
, cplusplus_comments
)
1015 && !CPP_IN_SYSTEM_HEADER (pfile
))
1018 /* Warn about comments only if pedantically GNUC89, and not
1019 in system headers. */
1020 if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
&& CPP_PEDANTIC (pfile
)
1021 && ! buffer
->warned_cplusplus_comments
)
1024 "C++ style comments are not allowed in ISO C89");
1026 "(this will be reported only once per input file)");
1027 buffer
->warned_cplusplus_comments
= 1;
1030 /* Skip_line_comment updates buffer->read_ahead. */
1031 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
1032 cpp_warning_with_line (pfile
, pfile
->lexer_pos
.line
,
1033 pfile
->lexer_pos
.col
,
1034 "multi-line comment");
1037 /* Skipping the comment has updated buffer->read_ahead. */
1038 if (!pfile
->state
.save_comments
)
1040 result
->flags
|= PREV_WHITE
;
1044 /* Save the comment as a token in its own right. */
1045 save_comment (pfile
, result
, comment_start
);
1046 /* Don't do MI optimisation. */
1050 if (pfile
->state
.angled_headers
)
1052 result
->type
= CPP_HEADER_NAME
;
1053 c
= '>'; /* terminator. */
1057 result
->type
= CPP_LESS
;
1058 c
= get_effective_char (buffer
);
1060 ACCEPT_CHAR (CPP_LESS_EQ
);
1063 ACCEPT_CHAR (CPP_LSHIFT
);
1064 if (get_effective_char (buffer
) == '=')
1065 ACCEPT_CHAR (CPP_LSHIFT_EQ
);
1067 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1069 ACCEPT_CHAR (CPP_MIN
);
1070 if (get_effective_char (buffer
) == '=')
1071 ACCEPT_CHAR (CPP_MIN_EQ
);
1073 else if (c
== ':' && CPP_OPTION (pfile
, digraphs
))
1075 ACCEPT_CHAR (CPP_OPEN_SQUARE
);
1076 result
->flags
|= DIGRAPH
;
1078 else if (c
== '%' && CPP_OPTION (pfile
, digraphs
))
1080 ACCEPT_CHAR (CPP_OPEN_BRACE
);
1081 result
->flags
|= DIGRAPH
;
1086 result
->type
= CPP_GREATER
;
1087 c
= get_effective_char (buffer
);
1089 ACCEPT_CHAR (CPP_GREATER_EQ
);
1092 ACCEPT_CHAR (CPP_RSHIFT
);
1093 if (get_effective_char (buffer
) == '=')
1094 ACCEPT_CHAR (CPP_RSHIFT_EQ
);
1096 else if (c
== '?' && CPP_OPTION (pfile
, cplusplus
))
1098 ACCEPT_CHAR (CPP_MAX
);
1099 if (get_effective_char (buffer
) == '=')
1100 ACCEPT_CHAR (CPP_MAX_EQ
);
1105 lex_percent (buffer
, result
);
1106 if (result
->type
== CPP_HASH
)
1111 lex_dot (pfile
, result
);
1115 result
->type
= CPP_PLUS
;
1116 c
= get_effective_char (buffer
);
1118 ACCEPT_CHAR (CPP_PLUS_EQ
);
1120 ACCEPT_CHAR (CPP_PLUS_PLUS
);
1124 result
->type
= CPP_MINUS
;
1125 c
= get_effective_char (buffer
);
1128 ACCEPT_CHAR (CPP_DEREF
);
1129 if (CPP_OPTION (pfile
, cplusplus
)
1130 && get_effective_char (buffer
) == '*')
1131 ACCEPT_CHAR (CPP_DEREF_STAR
);
1134 ACCEPT_CHAR (CPP_MINUS_EQ
);
1136 ACCEPT_CHAR (CPP_MINUS_MINUS
);
1140 result
->type
= CPP_MULT
;
1141 if (get_effective_char (buffer
) == '=')
1142 ACCEPT_CHAR (CPP_MULT_EQ
);
1146 result
->type
= CPP_EQ
;
1147 if (get_effective_char (buffer
) == '=')
1148 ACCEPT_CHAR (CPP_EQ_EQ
);
1152 result
->type
= CPP_NOT
;
1153 if (get_effective_char (buffer
) == '=')
1154 ACCEPT_CHAR (CPP_NOT_EQ
);
1158 result
->type
= CPP_AND
;
1159 c
= get_effective_char (buffer
);
1161 ACCEPT_CHAR (CPP_AND_EQ
);
1163 ACCEPT_CHAR (CPP_AND_AND
);
1167 c
= buffer
->extra_char
; /* Can be set by error condition below. */
1170 buffer
->read_ahead
= c
;
1171 buffer
->extra_char
= EOF
;
1174 c
= get_effective_char (buffer
);
1178 ACCEPT_CHAR (CPP_PASTE
);
1182 result
->type
= CPP_HASH
;
1186 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1187 tokens within the list of arguments that would otherwise act
1188 as preprocessing directives, the behavior is undefined.
1190 This implementation will report a hard error, terminate the
1191 macro invocation, and proceed to process the directive. */
1192 if (pfile
->state
.parsing_args
)
1194 if (pfile
->state
.parsing_args
== 2)
1196 "directives may not be used inside a macro argument");
1198 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1199 buffer
->extra_char
= buffer
->read_ahead
;
1200 buffer
->read_ahead
= '#';
1201 pfile
->state
.next_bol
= 1;
1202 result
->type
= CPP_EOF
;
1204 /* Get whitespace right - newline_in_args sets it. */
1205 if (pfile
->lexer_pos
.col
== 1)
1206 result
->flags
&= ~(PREV_WHITE
| AVOID_LPASTE
);
1210 /* This is the hash introducing a directive. */
1211 if (_cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
1212 goto done_directive
; /* bol still 1. */
1213 /* This is in fact an assembler #. */
1218 result
->type
= CPP_OR
;
1219 c
= get_effective_char (buffer
);
1221 ACCEPT_CHAR (CPP_OR_EQ
);
1223 ACCEPT_CHAR (CPP_OR_OR
);
1227 result
->type
= CPP_XOR
;
1228 if (get_effective_char (buffer
) == '=')
1229 ACCEPT_CHAR (CPP_XOR_EQ
);
1233 result
->type
= CPP_COLON
;
1234 c
= get_effective_char (buffer
);
1235 if (c
== ':' && CPP_OPTION (pfile
, cplusplus
))
1236 ACCEPT_CHAR (CPP_SCOPE
);
1237 else if (c
== '>' && CPP_OPTION (pfile
, digraphs
))
1239 result
->flags
|= DIGRAPH
;
1240 ACCEPT_CHAR (CPP_CLOSE_SQUARE
);
1244 case '~': result
->type
= CPP_COMPL
; break;
1245 case ',': result
->type
= CPP_COMMA
; break;
1246 case '(': result
->type
= CPP_OPEN_PAREN
; break;
1247 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
1248 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
1249 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
1250 case '{': result
->type
= CPP_OPEN_BRACE
; break;
1251 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
1252 case ';': result
->type
= CPP_SEMICOLON
; break;
1254 /* @ is a punctuator in Objective C. */
1255 case '@': result
->type
= CPP_ATSIGN
; break;
1259 result
->type
= CPP_OTHER
;
1264 if (pfile
->skipping
)
1267 /* If not in a directive, this token invalidates controlling macros. */
1268 if (!pfile
->state
.in_directive
)
1269 pfile
->mi_state
= MI_FAILED
;
1272 /* An upper bound on the number of bytes needed to spell a token,
1273 including preceding whitespace. */
1275 cpp_token_len (token
)
1276 const cpp_token
*token
;
1280 switch (TOKEN_SPELL (token
))
1282 default: len
= 0; break;
1283 case SPELL_STRING
: len
= token
->val
.str
.len
; break;
1284 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
); break;
1286 /* 1 for whitespace, 4 for comment delimeters. */
1290 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1291 already contain the enough space to hold the token's spelling.
1292 Returns a pointer to the character after the last character
1295 cpp_spell_token (pfile
, token
, buffer
)
1296 cpp_reader
*pfile
; /* Would be nice to be rid of this... */
1297 const cpp_token
*token
;
1298 unsigned char *buffer
;
1300 switch (TOKEN_SPELL (token
))
1302 case SPELL_OPERATOR
:
1304 const unsigned char *spelling
;
1307 if (token
->flags
& DIGRAPH
)
1309 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1310 else if (token
->flags
& NAMED_OP
)
1313 spelling
= TOKEN_NAME (token
);
1315 while ((c
= *spelling
++) != '\0')
1322 memcpy (buffer
, NODE_NAME (token
->val
.node
), NODE_LEN (token
->val
.node
));
1323 buffer
+= NODE_LEN (token
->val
.node
);
1328 int left
, right
, tag
;
1329 switch (token
->type
)
1331 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1332 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1333 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1334 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1335 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1336 default: left
= '\0'; right
= '\0'; tag
= '\0'; break;
1338 if (tag
) *buffer
++ = tag
;
1339 if (left
) *buffer
++ = left
;
1340 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
1341 buffer
+= token
->val
.str
.len
;
1342 if (right
) *buffer
++ = right
;
1347 *buffer
++ = token
->val
.c
;
1351 cpp_ice (pfile
, "Unspellable token %s", TOKEN_NAME (token
));
1358 /* Returns a token as a null-terminated string. The string is
1359 temporary, and automatically freed later. Useful for diagnostics. */
1361 cpp_token_as_text (pfile
, token
)
1363 const cpp_token
*token
;
1365 unsigned int len
= cpp_token_len (token
);
1366 unsigned char *start
= _cpp_pool_alloc (&pfile
->ident_pool
, len
), *end
;
1368 end
= cpp_spell_token (pfile
, token
, start
);
1374 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1376 cpp_type2name (type
)
1377 enum cpp_ttype type
;
1379 return (const char *) token_spellings
[type
].name
;
1382 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1383 for efficiency - to avoid double-buffering. Also, outputs a space
1384 if PREV_WHITE is flagged. */
1386 cpp_output_token (token
, fp
)
1387 const cpp_token
*token
;
1390 if (token
->flags
& PREV_WHITE
)
1393 switch (TOKEN_SPELL (token
))
1395 case SPELL_OPERATOR
:
1397 const unsigned char *spelling
;
1399 if (token
->flags
& DIGRAPH
)
1401 = digraph_spellings
[(int) token
->type
- (int) CPP_FIRST_DIGRAPH
];
1402 else if (token
->flags
& NAMED_OP
)
1405 spelling
= TOKEN_NAME (token
);
1407 ufputs (spelling
, fp
);
1413 ufputs (NODE_NAME (token
->val
.node
), fp
);
1418 int left
, right
, tag
;
1419 switch (token
->type
)
1421 case CPP_STRING
: left
= '"'; right
= '"'; tag
= '\0'; break;
1422 case CPP_WSTRING
: left
= '"'; right
= '"'; tag
= 'L'; break;
1423 case CPP_CHAR
: left
= '\''; right
= '\''; tag
= '\0'; break;
1424 case CPP_WCHAR
: left
= '\''; right
= '\''; tag
= 'L'; break;
1425 case CPP_HEADER_NAME
: left
= '<'; right
= '>'; tag
= '\0'; break;
1426 default: left
= '\0'; right
= '\0'; tag
= '\0'; break;
1428 if (tag
) putc (tag
, fp
);
1429 if (left
) putc (left
, fp
);
1430 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
1431 if (right
) putc (right
, fp
);
1436 putc (token
->val
.c
, fp
);
1440 /* An error, most probably. */
1445 /* Compare two tokens. */
1447 _cpp_equiv_tokens (a
, b
)
1448 const cpp_token
*a
, *b
;
1450 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
1451 switch (TOKEN_SPELL (a
))
1453 default: /* Keep compiler happy. */
1454 case SPELL_OPERATOR
:
1457 return a
->val
.c
== b
->val
.c
; /* Character. */
1459 return (a
->type
!= CPP_MACRO_ARG
|| a
->val
.arg_no
== b
->val
.arg_no
);
1461 return a
->val
.node
== b
->val
.node
;
1463 return (a
->val
.str
.len
== b
->val
.str
.len
1464 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
1471 /* Determine whether two tokens can be pasted together, and if so,
1472 what the resulting token is. Returns CPP_EOF if the tokens cannot
1473 be pasted, or the appropriate type for the merged token if they
1476 cpp_can_paste (pfile
, token1
, token2
, digraph
)
1478 const cpp_token
*token1
, *token2
;
1481 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1482 int cxx
= CPP_OPTION (pfile
, cplusplus
);
1484 /* Treat named operators as if they were ordinary NAMEs. */
1485 if (token1
->flags
& NAMED_OP
)
1487 if (token2
->flags
& NAMED_OP
)
1490 if ((int) a
<= (int) CPP_LAST_EQ
&& b
== CPP_EQ
)
1491 return (enum cpp_ttype
) ((int) a
+ ((int) CPP_EQ_EQ
- (int) CPP_EQ
));
1496 if (b
== a
) return CPP_RSHIFT
;
1497 if (b
== CPP_QUERY
&& cxx
) return CPP_MAX
;
1498 if (b
== CPP_GREATER_EQ
) return CPP_RSHIFT_EQ
;
1501 if (b
== a
) return CPP_LSHIFT
;
1502 if (b
== CPP_QUERY
&& cxx
) return CPP_MIN
;
1503 if (b
== CPP_LESS_EQ
) return CPP_LSHIFT_EQ
;
1504 if (CPP_OPTION (pfile
, digraphs
))
1507 {*digraph
= 1; return CPP_OPEN_SQUARE
;} /* <: digraph */
1509 {*digraph
= 1; return CPP_OPEN_BRACE
;} /* <% digraph */
1513 case CPP_PLUS
: if (b
== a
) return CPP_PLUS_PLUS
; break;
1514 case CPP_AND
: if (b
== a
) return CPP_AND_AND
; break;
1515 case CPP_OR
: if (b
== a
) return CPP_OR_OR
; break;
1518 if (b
== a
) return CPP_MINUS_MINUS
;
1519 if (b
== CPP_GREATER
) return CPP_DEREF
;
1522 if (b
== a
&& cxx
) return CPP_SCOPE
;
1523 if (b
== CPP_GREATER
&& CPP_OPTION (pfile
, digraphs
))
1524 {*digraph
= 1; return CPP_CLOSE_SQUARE
;} /* :> digraph */
1528 if (CPP_OPTION (pfile
, digraphs
))
1530 if (b
== CPP_GREATER
)
1531 {*digraph
= 1; return CPP_CLOSE_BRACE
;} /* %> digraph */
1533 {*digraph
= 1; return CPP_HASH
;} /* %: digraph */
1537 if (b
== CPP_MULT
&& cxx
) return CPP_DEREF_STAR
;
1540 if (b
== CPP_MULT
&& cxx
) return CPP_DOT_STAR
;
1541 if (b
== CPP_NUMBER
) return CPP_NUMBER
;
1545 if (b
== a
&& (token1
->flags
& DIGRAPH
) == (token2
->flags
& DIGRAPH
))
1547 {*digraph
= (token1
->flags
& DIGRAPH
); return CPP_PASTE
;}
1551 if (b
== CPP_NAME
) return CPP_NAME
;
1553 && name_p (pfile
, &token2
->val
.str
)) return CPP_NAME
;
1555 && token1
->val
.node
== pfile
->spec_nodes
.n_L
) return CPP_WCHAR
;
1557 && token1
->val
.node
== pfile
->spec_nodes
.n_L
) return CPP_WSTRING
;
1561 if (b
== CPP_NUMBER
) return CPP_NUMBER
;
1562 if (b
== CPP_NAME
) return CPP_NUMBER
;
1563 if (b
== CPP_DOT
) return CPP_NUMBER
;
1564 /* Numbers cannot have length zero, so this is safe. */
1565 if ((b
== CPP_PLUS
|| b
== CPP_MINUS
)
1566 && VALID_SIGN ('+', token1
->val
.str
.text
[token1
->val
.str
.len
- 1]))
1577 /* Returns nonzero if a space should be inserted to avoid an
1578 accidental token paste for output. For simplicity, it is
1579 conservative, and occasionally advises a space where one is not
1580 needed, e.g. "." and ".2". */
1583 cpp_avoid_paste (pfile
, token1
, token2
)
1585 const cpp_token
*token1
, *token2
;
1587 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
1590 if (token1
->flags
& NAMED_OP
)
1592 if (token2
->flags
& NAMED_OP
)
1596 if (token2
->flags
& DIGRAPH
)
1597 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
1598 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
1599 c
= token_spellings
[b
].name
[0];
1601 /* Quickly get everything that can paste with an '='. */
1602 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
1607 case CPP_GREATER
: return c
== '>' || c
== '?';
1608 case CPP_LESS
: return c
== '<' || c
== '?' || c
== '%' || c
== ':';
1609 case CPP_PLUS
: return c
== '+';
1610 case CPP_MINUS
: return c
== '-' || c
== '>';
1611 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
1612 case CPP_MOD
: return c
== ':' || c
== '>';
1613 case CPP_AND
: return c
== '&';
1614 case CPP_OR
: return c
== '|';
1615 case CPP_COLON
: return c
== ':' || c
== '>';
1616 case CPP_DEREF
: return c
== '*';
1617 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
1618 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
1619 case CPP_NAME
: return ((b
== CPP_NUMBER
1620 && name_p (pfile
, &token2
->val
.str
))
1622 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
1623 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
1624 || c
== '.' || c
== '+' || c
== '-');
1625 case CPP_OTHER
: return (CPP_OPTION (pfile
, objc
)
1626 && token1
->val
.c
== '@'
1627 && (b
== CPP_NAME
|| b
== CPP_STRING
));
1634 /* Output all the remaining tokens on the current line, and a newline
1635 character, to FP. Leading whitespace is removed. */
1637 cpp_output_line (pfile
, fp
)
1643 cpp_get_token (pfile
, &token
);
1644 token
.flags
&= ~PREV_WHITE
;
1645 while (token
.type
!= CPP_EOF
)
1647 cpp_output_token (&token
, fp
);
1648 cpp_get_token (pfile
, &token
);
1654 /* Returns the value of a hexadecimal digit. */
1659 if (c
>= 'a' && c
<= 'f')
1660 return c
- 'a' + 10;
1661 if (c
>= 'A' && c
<= 'F')
1662 return c
- 'A' + 10;
1663 if (c
>= '0' && c
<= '9')
1668 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1669 failure if cpplib is not parsing C++ or C99. Such failure is
1670 silent, and no variables are updated. Otherwise returns 0, and
1671 warns if -Wtraditional.
1673 [lex.charset]: The character designated by the universal character
1674 name \UNNNNNNNN is that character whose character short name in
1675 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1676 universal character name \uNNNN is that character whose character
1677 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1678 for a universal character name is less than 0x20 or in the range
1679 0x7F-0x9F (inclusive), or if the universal character name
1680 designates a character in the basic source character set, then the
1681 program is ill-formed.
1683 We assume that wchar_t is Unicode, so we don't need to do any
1684 mapping. Is this ever wrong?
1686 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1687 LIMIT is the end of the string or charconst. PSTR is updated to
1688 point after the UCS on return, and the UCS is written into PC. */
1691 maybe_read_ucs (pfile
, pstr
, limit
, pc
)
1693 const unsigned char **pstr
;
1694 const unsigned char *limit
;
1697 const unsigned char *p
= *pstr
;
1698 unsigned int code
= 0;
1699 unsigned int c
= *pc
, length
;
1701 /* Only attempt to interpret a UCS for C++ and C99. */
1702 if (! (CPP_OPTION (pfile
, cplusplus
) || CPP_OPTION (pfile
, c99
)))
1705 if (CPP_WTRADITIONAL (pfile
))
1706 cpp_warning (pfile
, "the meaning of '\\%c' varies with -traditional", c
);
1708 length
= (c
== 'u' ? 4: 8);
1710 if ((size_t) (limit
- p
) < length
)
1712 cpp_error (pfile
, "incomplete universal-character-name");
1713 /* Skip to the end to avoid more diagnostics. */
1718 for (; length
; length
--, p
++)
1722 code
= (code
<< 4) + hex_digit_value (c
);
1726 "non-hex digit '%c' in universal-character-name", c
);
1727 /* We shouldn't skip in case there are multibyte chars. */
1733 #ifdef TARGET_EBCDIC
1734 cpp_error (pfile
, "universal-character-name on EBCDIC target");
1735 code
= 0x3f; /* EBCDIC invalid character */
1737 /* True extended characters are OK. */
1739 && !(code
& 0x80000000)
1740 && !(code
>= 0xD800 && code
<= 0xDFFF))
1742 /* The standard permits $, @ and ` to be specified as UCNs. We use
1743 hex escapes so that this also works with EBCDIC hosts. */
1744 else if (code
== 0x24 || code
== 0x40 || code
== 0x60)
1746 /* Don't give another error if one occurred above. */
1747 else if (length
== 0)
1748 cpp_error (pfile
, "universal-character-name out of range");
1756 /* Interpret an escape sequence, and return its value. PSTR points to
1757 the input pointer, which is just after the backslash. LIMIT is how
1758 much text we have. MASK is a bitmask for the precision for the
1759 destination type (char or wchar_t). TRADITIONAL, if true, does not
1760 interpret escapes that did not exist in traditional C.
1762 Handles all relevant diagnostics. */
1765 cpp_parse_escape (pfile
, pstr
, limit
, mask
, traditional
)
1767 const unsigned char **pstr
;
1768 const unsigned char *limit
;
1769 unsigned HOST_WIDE_INT mask
;
1773 const unsigned char *str
= *pstr
;
1774 unsigned int c
= *str
++;
1778 case '\\': case '\'': case '"': case '?': break;
1779 case 'b': c
= TARGET_BS
; break;
1780 case 'f': c
= TARGET_FF
; break;
1781 case 'n': c
= TARGET_NEWLINE
; break;
1782 case 'r': c
= TARGET_CR
; break;
1783 case 't': c
= TARGET_TAB
; break;
1784 case 'v': c
= TARGET_VT
; break;
1786 case '(': case '{': case '[': case '%':
1787 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1788 '\%' is used to prevent SCCS from getting confused. */
1789 unknown
= CPP_PEDANTIC (pfile
);
1793 if (CPP_WTRADITIONAL (pfile
))
1794 cpp_warning (pfile
, "the meaning of '\\a' varies with -traditional");
1800 if (CPP_PEDANTIC (pfile
))
1801 cpp_pedwarn (pfile
, "non-ISO-standard escape sequence, '\\%c'", c
);
1806 unknown
= maybe_read_ucs (pfile
, &str
, limit
, &c
);
1810 if (CPP_WTRADITIONAL (pfile
))
1811 cpp_warning (pfile
, "the meaning of '\\x' varies with -traditional");
1815 unsigned int i
= 0, overflow
= 0;
1816 int digits_found
= 0;
1824 overflow
|= i
^ (i
<< 4 >> 4);
1825 i
= (i
<< 4) + hex_digit_value (c
);
1830 cpp_error (pfile
, "\\x used with no following hex digits");
1832 if (overflow
| (i
!= (i
& mask
)))
1834 cpp_pedwarn (pfile
, "hex escape sequence out of range");
1841 case '0': case '1': case '2': case '3':
1842 case '4': case '5': case '6': case '7':
1844 unsigned int i
= c
- '0';
1847 while (str
< limit
&& ++count
< 3)
1850 if (c
< '0' || c
> '7')
1853 i
= (i
<< 3) + c
- '0';
1856 if (i
!= (i
& mask
))
1858 cpp_pedwarn (pfile
, "octal escape sequence out of range");
1873 cpp_pedwarn (pfile
, "unknown escape sequence '\\%c'", c
);
1875 cpp_pedwarn (pfile
, "unknown escape sequence: '\\%03o'", c
);
1879 cpp_pedwarn (pfile
, "escape sequence out of range for character");
1885 #ifndef MAX_CHAR_TYPE_SIZE
1886 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1889 #ifndef MAX_WCHAR_TYPE_SIZE
1890 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1893 /* Interpret a (possibly wide) character constant in TOKEN.
1894 WARN_MULTI warns about multi-character charconsts, if not
1895 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1896 that did not exist in traditional C. PCHARS_SEEN points to a
1897 variable that is filled in with the number of characters seen. */
1899 cpp_interpret_charconst (pfile
, token
, warn_multi
, traditional
, pchars_seen
)
1901 const cpp_token
*token
;
1904 unsigned int *pchars_seen
;
1906 const unsigned char *str
= token
->val
.str
.text
;
1907 const unsigned char *limit
= str
+ token
->val
.str
.len
;
1908 unsigned int chars_seen
= 0;
1909 unsigned int width
, max_chars
, c
;
1910 unsigned HOST_WIDE_INT mask
;
1911 HOST_WIDE_INT result
= 0;
1913 #ifdef MULTIBYTE_CHARS
1914 (void) local_mbtowc (NULL
, NULL
, 0);
1917 /* Width in bits. */
1918 if (token
->type
== CPP_CHAR
)
1919 width
= MAX_CHAR_TYPE_SIZE
;
1921 width
= MAX_WCHAR_TYPE_SIZE
;
1923 if (width
< HOST_BITS_PER_WIDE_INT
)
1924 mask
= ((unsigned HOST_WIDE_INT
) 1 << width
) - 1;
1927 max_chars
= HOST_BITS_PER_WIDE_INT
/ width
;
1931 #ifdef MULTIBYTE_CHARS
1935 char_len
= local_mbtowc (&wc
, str
, limit
- str
);
1938 cpp_warning (pfile
, "ignoring invalid multibyte character");
1951 c
= cpp_parse_escape (pfile
, &str
, limit
, mask
, traditional
);
1953 #ifdef MAP_CHARACTER
1955 c
= MAP_CHARACTER (c
);
1958 /* Merge character into result; ignore excess chars. */
1959 if (++chars_seen
<= max_chars
)
1961 if (width
< HOST_BITS_PER_WIDE_INT
)
1962 result
= (result
<< width
) | (c
& mask
);
1968 if (chars_seen
== 0)
1969 cpp_error (pfile
, "empty character constant");
1970 else if (chars_seen
> max_chars
)
1972 chars_seen
= max_chars
;
1973 cpp_warning (pfile
, "character constant too long");
1975 else if (chars_seen
> 1 && !traditional
&& warn_multi
)
1976 cpp_warning (pfile
, "multi-character character constant");
1978 /* If char type is signed, sign-extend the constant. The
1979 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1980 if (token
->type
== CPP_CHAR
&& chars_seen
)
1982 unsigned int nbits
= chars_seen
* width
;
1983 unsigned int mask
= (unsigned int) ~0 >> (HOST_BITS_PER_INT
- nbits
);
1985 if (pfile
->spec_nodes
.n__CHAR_UNSIGNED__
->type
== NT_MACRO
1986 || ((result
>> (nbits
- 1)) & 1) == 0)
1992 *pchars_seen
= chars_seen
;
2008 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2011 chunk_suitable (pool
, chunk
, size
)
2016 /* Being at least twice SIZE means we can use memcpy in
2017 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2019 return (chunk
&& pool
->locked
!= chunk
2020 && (unsigned int) (chunk
->limit
- chunk
->base
) >= size
* 2);
2023 /* Returns the end of the new pool. PTR points to a char in the old
2024 pool, and is updated to point to the same char in the new pool. */
2026 _cpp_next_chunk (pool
, len
, ptr
)
2029 unsigned char **ptr
;
2031 cpp_chunk
*chunk
= pool
->cur
->next
;
2033 /* LEN is the minimum size we want in the new pool. */
2034 len
+= POOL_ROOM (pool
);
2035 if (! chunk_suitable (pool
, chunk
, len
))
2037 chunk
= new_chunk (POOL_SIZE (pool
) * 2 + len
);
2039 chunk
->next
= pool
->cur
->next
;
2040 pool
->cur
->next
= chunk
;
2043 /* Update the pointer before changing chunk's front. */
2045 *ptr
+= chunk
->base
- POOL_FRONT (pool
);
2047 memcpy (chunk
->base
, POOL_FRONT (pool
), POOL_ROOM (pool
));
2048 chunk
->front
= chunk
->base
;
2051 return POOL_LIMIT (pool
);
2058 unsigned char *base
;
2061 size
= POOL_ALIGN (size
, DEFAULT_ALIGNMENT
);
2062 base
= (unsigned char *) xmalloc (size
+ sizeof (cpp_chunk
));
2063 /* Put the chunk descriptor at the end. Then chunk overruns will
2064 cause obvious chaos. */
2065 result
= (cpp_chunk
*) (base
+ size
);
2066 result
->base
= base
;
2067 result
->front
= base
;
2068 result
->limit
= base
+ size
;
2075 _cpp_init_pool (pool
, size
, align
, temp
)
2077 unsigned int size
, align
, temp
;
2080 align
= DEFAULT_ALIGNMENT
;
2081 if (align
& (align
- 1))
2083 pool
->align
= align
;
2084 pool
->cur
= new_chunk (size
);
2088 pool
->cur
->next
= pool
->cur
;
2092 _cpp_lock_pool (pool
)
2095 if (pool
->locks
++ == 0)
2096 pool
->locked
= pool
->cur
;
2100 _cpp_unlock_pool (pool
)
2103 if (--pool
->locks
== 0)
2108 _cpp_free_pool (pool
)
2111 cpp_chunk
*chunk
= pool
->cur
, *next
;
2119 while (chunk
&& chunk
!= pool
->cur
);
2122 /* Reserve LEN bytes from a memory pool. */
2124 _cpp_pool_reserve (pool
, len
)
2128 len
= POOL_ALIGN (len
, pool
->align
);
2129 if (len
> (unsigned int) POOL_ROOM (pool
))
2130 _cpp_next_chunk (pool
, len
, 0);
2132 return POOL_FRONT (pool
);
2135 /* Allocate LEN bytes from a memory pool. */
2137 _cpp_pool_alloc (pool
, len
)
2141 unsigned char *result
= _cpp_pool_reserve (pool
, len
);
2143 POOL_COMMIT (pool
, len
);