1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
29 #define PEEKBUF(BUFFER, N) \
30 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
31 #define GETBUF(BUFFER) \
32 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
33 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
35 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
36 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
37 #define GETC() GETBUF (CPP_BUFFER (pfile))
38 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
40 static void skip_block_comment
PARAMS ((cpp_reader
*));
41 static void skip_line_comment
PARAMS ((cpp_reader
*));
42 static int maybe_macroexpand
PARAMS ((cpp_reader
*, long));
43 static int skip_comment
PARAMS ((cpp_reader
*, int));
44 static int copy_comment
PARAMS ((cpp_reader
*, int));
45 static void skip_string
PARAMS ((cpp_reader
*, int));
46 static void parse_string
PARAMS ((cpp_reader
*, int));
47 static U_CHAR
*find_position
PARAMS ((U_CHAR
*, U_CHAR
*, unsigned long *));
48 static void null_warning
PARAMS ((cpp_reader
*, unsigned int));
50 static void safe_fwrite
PARAMS ((cpp_reader
*, const U_CHAR
*,
52 static void output_line_command
PARAMS ((cpp_reader
*, cpp_printer
*,
54 static void bump_column
PARAMS ((cpp_printer
*, unsigned int,
56 static void expand_name_space
PARAMS ((cpp_toklist
*, unsigned int));
57 static void expand_token_space
PARAMS ((cpp_toklist
*));
58 static void init_token_list
PARAMS ((cpp_reader
*, cpp_toklist
*, int));
59 static void pedantic_whitespace
PARAMS ((cpp_reader
*, U_CHAR
*,
62 #define auto_expand_name_space(list) \
63 expand_name_space ((list), (list)->name_cap / 2)
65 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
68 _cpp_grow_token_buffer (pfile
, n
)
72 long old_written
= CPP_WRITTEN (pfile
);
73 pfile
->token_buffer_size
= n
+ 2 * pfile
->token_buffer_size
;
74 pfile
->token_buffer
= (U_CHAR
*)
75 xrealloc(pfile
->token_buffer
, pfile
->token_buffer_size
);
76 CPP_SET_WRITTEN (pfile
, old_written
);
79 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
80 If BUFFER != NULL, then use the LENGTH characters in BUFFER
81 as the new input buffer.
82 Return the new buffer, or NULL on failure. */
85 cpp_push_buffer (pfile
, buffer
, length
)
90 cpp_buffer
*buf
= CPP_BUFFER (pfile
);
92 if (++pfile
->buffer_stack_depth
== CPP_STACK_MAX
)
94 cpp_fatal (pfile
, "macro or `#include' recursion too deep");
98 new = (cpp_buffer
*) xcalloc (1, sizeof (cpp_buffer
));
100 new->if_stack
= pfile
->if_stack
;
101 new->buf
= new->cur
= buffer
;
102 new->rlimit
= buffer
+ length
;
105 new->line_base
= NULL
;
107 CPP_BUFFER (pfile
) = new;
112 cpp_pop_buffer (pfile
)
115 cpp_buffer
*buf
= CPP_BUFFER (pfile
);
116 if (ACTIVE_MARK_P (pfile
))
117 cpp_ice (pfile
, "mark active in cpp_pop_buffer");
121 _cpp_unwind_if_stack (pfile
, buf
);
123 free ((PTR
) buf
->buf
);
124 if (pfile
->system_include_depth
)
125 pfile
->system_include_depth
--;
126 if (pfile
->potential_control_macro
)
128 buf
->ihash
->control_macro
= pfile
->potential_control_macro
;
129 pfile
->potential_control_macro
= 0;
131 pfile
->input_stack_listing_current
= 0;
135 HASHNODE
*m
= buf
->macro
;
138 if ((m
->type
== T_FMACRO
&& buf
->mapped
)
139 || m
->type
== T_SPECLINE
|| m
->type
== T_FILE
140 || m
->type
== T_BASE_FILE
|| m
->type
== T_INCLUDE_LEVEL
141 || m
->type
== T_STDC
)
142 free ((PTR
) buf
->buf
);
144 CPP_BUFFER (pfile
) = CPP_PREV_BUFFER (buf
);
146 pfile
->buffer_stack_depth
--;
147 return CPP_BUFFER (pfile
);
150 /* Deal with the annoying semantics of fwrite. */
152 safe_fwrite (pfile
, buf
, len
, fp
)
162 count
= fwrite (buf
, 1, len
, fp
);
171 cpp_notice_from_errno (pfile
, CPP_OPTION (pfile
, out_fname
));
174 /* Notify the compiler proper that the current line number has jumped,
175 or the current file name has changed. */
178 output_line_command (pfile
, print
, line
)
183 cpp_buffer
*ip
= cpp_file_buffer (pfile
);
184 enum { same
= 0, enter
, leave
, rname
} change
;
185 static const char * const codes
[] = { "", " 1", " 2", "" };
187 if (CPP_OPTION (pfile
, no_line_commands
))
190 /* Determine whether the current filename has changed, and if so,
191 how. 'nominal_fname' values are unique, so they can be compared
192 by comparing pointers. */
193 if (ip
->nominal_fname
== print
->last_fname
)
197 if (pfile
->buffer_stack_depth
== print
->last_bsd
)
201 if (pfile
->buffer_stack_depth
> print
->last_bsd
)
205 print
->last_bsd
= pfile
->buffer_stack_depth
;
207 print
->last_fname
= ip
->nominal_fname
;
209 /* If the current file has not changed, we can output a few newlines
210 instead if we want to increase the line number by a small amount.
211 We cannot do this if print->lineno is zero, because that means we
212 haven't output any line commands yet. (The very first line
213 command output is a `same_file' command.) */
214 if (change
== same
&& print
->lineno
!= 0
215 && line
>= print
->lineno
&& line
< print
->lineno
+ 8)
217 while (line
> print
->lineno
)
219 putc ('\n', print
->outf
);
225 #ifndef NO_IMPLICIT_EXTERN_C
226 if (CPP_OPTION (pfile
, cplusplus
))
227 fprintf (print
->outf
, "# %u \"%s\"%s%s%s\n", line
, ip
->nominal_fname
,
229 ip
->system_header_p
? " 3" : "",
230 (ip
->system_header_p
== 2) ? " 4" : "");
233 fprintf (print
->outf
, "# %u \"%s\"%s%s\n", line
, ip
->nominal_fname
,
235 ip
->system_header_p
? " 3" : "");
236 print
->lineno
= line
;
239 /* Write the contents of the token_buffer to the output stream, and
240 clear the token_buffer. Also handles generating line commands and
241 keeping track of file transitions. */
244 cpp_output_tokens (pfile
, print
)
250 if (CPP_WRITTEN (pfile
) - print
->written
)
252 if (CPP_PWRITTEN (pfile
)[-1] == '\n' && print
->lineno
)
254 safe_fwrite (pfile
, pfile
->token_buffer
,
255 CPP_WRITTEN (pfile
) - print
->written
, print
->outf
);
258 ip
= cpp_file_buffer (pfile
);
260 output_line_command (pfile
, print
, CPP_BUF_LINE (ip
));
262 CPP_SET_WRITTEN (pfile
, print
->written
);
265 /* Helper for cpp_output_list - increases the column number to match
266 what we expect it to be. */
269 bump_column (print
, from
, to
)
271 unsigned int from
, to
;
273 unsigned int tabs
, spcs
;
274 unsigned int delta
= to
- from
;
276 /* Only if FROM is 0, advance by tabs. */
278 tabs
= delta
/ 8, spcs
= delta
% 8;
280 tabs
= 0, spcs
= delta
;
282 while (tabs
--) putc ('\t', print
->outf
);
283 while (spcs
--) putc (' ', print
->outf
);
286 /* Write out the list L onto pfile->token_buffer. This function is
289 1) pfile->token_buffer is not going to continue to exist.
290 2) At the moment, tokens don't carry the information described
291 in cpplib.h; they are all strings.
292 3) The list has to be a complete line, and has to be written starting
293 at the beginning of a line. */
296 cpp_output_list (pfile
, print
, list
)
299 const cpp_toklist
*list
;
302 unsigned int curcol
= 1;
304 /* XXX Probably does not do what is intended. */
305 if (print
->lineno
!= list
->line
)
306 output_line_command (pfile
, print
, list
->line
);
308 for (i
= 0; i
< list
->tokens_used
; i
++)
310 if (TOK_TYPE (list
, i
) == CPP_VSPACE
)
312 output_line_command (pfile
, print
, list
->tokens
[i
].aux
);
316 if (curcol
< TOK_COL (list
, i
))
318 /* Insert space to bring the column to what it should be. */
319 bump_column (print
, curcol
- 1, TOK_COL (list
, i
));
320 curcol
= TOK_COL (list
, i
);
322 /* XXX We may have to insert space to prevent an accidental
324 safe_fwrite (pfile
, TOK_NAME (list
, i
), TOK_LEN (list
, i
), print
->outf
);
325 curcol
+= TOK_LEN (list
, i
);
329 /* Scan a string (which may have escape marks), perform macro expansion,
330 and write the result to the token_buffer. */
333 _cpp_expand_to_buffer (pfile
, buf
, length
)
339 enum cpp_ttype token
;
344 cpp_ice (pfile
, "length < 0 in cpp_expand_to_buffer");
348 /* Copy the buffer, because it might be in an unsafe place - for
349 example, a sequence on the token_buffer, where the pointers will
350 be invalidated if we enlarge the token_buffer. */
351 buf1
= alloca (length
);
352 memcpy (buf1
, buf
, length
);
354 /* Set up the input on the input stack. */
355 stop
= CPP_BUFFER (pfile
);
356 if (cpp_push_buffer (pfile
, buf1
, length
) == NULL
)
358 CPP_BUFFER (pfile
)->has_escapes
= 1;
360 /* Scan the input, create the output. */
363 token
= cpp_get_token (pfile
);
364 if (token
== CPP_EOF
&& CPP_BUFFER (pfile
) == stop
)
369 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
372 cpp_scan_buffer_nooutput (pfile
)
375 cpp_buffer
*stop
= CPP_PREV_BUFFER (CPP_BUFFER (pfile
));
376 enum cpp_ttype token
;
377 unsigned int old_written
= CPP_WRITTEN (pfile
);
378 /* In no-output mode, we can ignore everything but directives. */
381 if (! pfile
->only_seen_white
)
382 _cpp_skip_rest_of_line (pfile
);
383 token
= cpp_get_token (pfile
);
384 if (token
== CPP_EOF
&& CPP_BUFFER (pfile
) == stop
)
387 CPP_SET_WRITTEN (pfile
, old_written
);
390 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
393 cpp_scan_buffer (pfile
, print
)
397 cpp_buffer
*stop
= CPP_PREV_BUFFER (CPP_BUFFER (pfile
));
398 enum cpp_ttype token
;
402 token
= cpp_get_token (pfile
);
403 if (token
== CPP_EOF
|| token
== CPP_VSPACE
404 /* XXX Temporary kluge - force flush after #include only */
405 || (token
== CPP_DIRECTIVE
406 && CPP_BUFFER (pfile
)->nominal_fname
!= print
->last_fname
))
408 cpp_output_tokens (pfile
, print
);
409 if (token
== CPP_EOF
&& CPP_BUFFER (pfile
) == stop
)
415 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
418 cpp_file_buffer (pfile
)
423 for (ip
= CPP_BUFFER (pfile
); ip
; ip
= CPP_PREV_BUFFER (ip
))
424 if (ip
->ihash
!= NULL
)
429 /* Token-buffer helper functions. */
431 /* Expand a token list's string space. */
433 expand_name_space (list
, len
)
437 list
->name_cap
+= len
;
438 list
->namebuf
= (unsigned char *) xrealloc (list
->namebuf
, list
->name_cap
);
441 /* Expand the number of tokens in a list. */
443 expand_token_space (list
)
446 list
->tokens_cap
*= 2;
447 list
->tokens
= (cpp_token
*)
448 xrealloc (list
->tokens
- 1, (list
->tokens_cap
+ 1) * sizeof (cpp_token
));
449 list
->tokens
++; /* Skip the dummy. */
452 /* Initialize a token list. We allocate an extra token in front of
453 the token list, as this allows us to always peek at the previous
454 token without worrying about underflowing the list. */
456 init_token_list (pfile
, list
, recycle
)
461 /* Recycling a used list saves 3 free-malloc pairs. */
464 /* Initialize token space. Put a dummy token before the start
465 that will fail matches. */
466 list
->tokens_cap
= 256; /* 4K's worth. */
467 list
->tokens
= (cpp_token
*)
468 xmalloc ((list
->tokens_cap
+ 1) * sizeof (cpp_token
));
469 list
->tokens
[0].type
= CPP_EOF
;
472 /* Initialize name space. */
473 list
->name_cap
= 1024;
474 list
->namebuf
= (unsigned char *) xmalloc (list
->name_cap
);
476 /* Only create a comment space on demand. */
477 list
->comments_cap
= 0;
481 list
->tokens_used
= 0;
483 list
->comments_used
= 0;
485 list
->line
= pfile
->buffer
->lineno
;
486 list
->dir_handler
= 0;
490 /* Scan an entire line and create a token list for it. Does not
491 macro-expand or execute directives. */
494 _cpp_scan_line (pfile
, list
)
503 init_token_list (pfile
, list
, 1);
505 written
= CPP_WRITTEN (pfile
);
510 col
= CPP_BUFFER (pfile
)->cur
- CPP_BUFFER (pfile
)->line_base
;
511 type
= _cpp_lex_token (pfile
);
512 len
= CPP_WRITTEN (pfile
) - written
;
513 CPP_SET_WRITTEN (pfile
, written
);
514 if (type
== CPP_HSPACE
)
516 if (CPP_PEDANTIC (pfile
))
517 pedantic_whitespace (pfile
, pfile
->token_buffer
+ written
, len
);
521 else if (type
== CPP_COMMENT
)
522 /* Only happens when processing -traditional macro definitions.
523 Do not give this a token entry, but do not change space_before
527 if (list
->tokens_used
>= list
->tokens_cap
)
528 expand_token_space (list
);
529 if (list
->name_used
+ len
>= list
->name_cap
)
530 expand_name_space (list
, list
->name_used
+ len
+ 1 - list
->name_cap
);
532 if (type
== CPP_MACRO
)
536 TOK_TYPE (list
, i
) = type
;
537 TOK_COL (list
, i
) = col
;
538 TOK_FLAGS (list
, i
) = space_before
? PREV_WHITESPACE
: 0;
540 if (type
== CPP_VSPACE
)
543 TOK_LEN (list
, i
) = len
;
544 TOK_OFFSET (list
, i
) = list
->name_used
;
545 memcpy (TOK_NAME (list
, i
), CPP_PWRITTEN (pfile
), len
);
546 list
->name_used
+= len
;
550 TOK_AUX (list
, i
) = CPP_BUFFER (pfile
)->lineno
+ 1;
552 /* XXX Temporary kluge: put back the newline. */
557 /* Skip a C-style block comment. We know it's a comment, and point is
558 at the second character of the starter. */
560 skip_block_comment (pfile
)
563 unsigned int line
, col
;
564 const U_CHAR
*limit
, *cur
;
567 line
= CPP_BUF_LINE (CPP_BUFFER (pfile
));
568 col
= CPP_BUF_COL (CPP_BUFFER (pfile
));
569 limit
= CPP_BUFFER (pfile
)->rlimit
;
570 cur
= CPP_BUFFER (pfile
)->cur
;
575 if (c
== '\n' || c
== '\r')
577 /* \r cannot be a macro escape marker here. */
578 if (!ACTIVE_MARK_P (pfile
))
579 CPP_BUMP_LINE_CUR (pfile
, cur
);
583 /* Check for teminator. */
584 if (cur
< limit
&& *cur
== '/')
587 /* Warn about comment starter embedded in comment. */
588 if (cur
[-2] == '/' && CPP_OPTION (pfile
, warn_comments
))
589 cpp_warning_with_line (pfile
, CPP_BUFFER (pfile
)->lineno
,
590 cur
- CPP_BUFFER (pfile
)->line_base
,
591 "'/*' within comment");
595 cpp_error_with_line (pfile
, line
, col
, "unterminated comment");
598 CPP_BUFFER (pfile
)->cur
= cur
+ 1;
601 /* Skip a C++/Chill line comment. We know it's a comment, and point
602 is at the second character of the initiator. */
604 skip_line_comment (pfile
)
612 /* We don't have to worry about EOF in here. */
615 /* Don't consider final '\n' to be part of comment. */
621 /* \r cannot be a macro escape marker here. */
622 if (!ACTIVE_MARK_P (pfile
))
623 CPP_BUMP_LINE (pfile
);
624 if (CPP_OPTION (pfile
, warn_comments
))
625 cpp_warning (pfile
, "backslash-newline within line comment");
630 /* Skip a comment - C, C++, or Chill style. M is the first character
631 of the comment marker. If this really is a comment, skip to its
632 end and return ' '. If this is not a comment, return M (which will
636 skip_comment (pfile
, m
)
640 if (m
== '/' && PEEKC() == '*')
642 skip_block_comment (pfile
);
645 else if (m
== '/' && PEEKC() == '/')
647 if (CPP_BUFFER (pfile
)->system_header_p
)
649 /* We silently allow C++ comments in system headers, irrespective
650 of conformance mode, because lots of busted systems do that
651 and trying to clean it up in fixincludes is a nightmare. */
652 skip_line_comment (pfile
);
655 else if (CPP_OPTION (pfile
, cplusplus_comments
))
657 if (! CPP_BUFFER (pfile
)->warned_cplusplus_comments
)
659 if (CPP_WTRADITIONAL (pfile
))
661 "C++ style comments are not allowed in traditional C");
662 else if (CPP_OPTION (pfile
, c89
) && CPP_PEDANTIC (pfile
))
664 "C++ style comments are not allowed in ISO C89");
665 if (CPP_WTRADITIONAL (pfile
)
666 || (CPP_OPTION (pfile
, c89
) && CPP_PEDANTIC (pfile
)))
668 "(this will be reported only once per input file)");
669 CPP_BUFFER (pfile
)->warned_cplusplus_comments
= 1;
671 skip_line_comment (pfile
);
677 else if (m
== '-' && PEEKC() == '-'
678 && CPP_OPTION (pfile
, chill
))
680 skip_line_comment (pfile
);
687 /* Identical to skip_comment except that it copies the comment into the
688 token_buffer. This is used if !discard_comments. */
690 copy_comment (pfile
, m
)
694 const U_CHAR
*start
= CPP_BUFFER (pfile
)->cur
; /* XXX Layering violation */
697 if (skip_comment (pfile
, m
) == m
)
700 limit
= CPP_BUFFER (pfile
)->cur
;
701 CPP_RESERVE (pfile
, limit
- start
+ 2);
702 CPP_PUTC_Q (pfile
, m
);
703 for (; start
<= limit
; start
++)
705 CPP_PUTC_Q (pfile
, *start
);
711 null_warning (pfile
, count
)
716 cpp_warning (pfile
, "embedded null character ignored");
718 cpp_warning (pfile
, "embedded null characters ignored");
721 /* Skip whitespace \-newline and comments. Does not macro-expand. */
724 _cpp_skip_hspace (pfile
)
727 unsigned int null_count
= 0;
735 else if (is_hspace(c
))
737 if ((c
== '\f' || c
== '\v') && CPP_PEDANTIC (pfile
))
738 cpp_pedwarn (pfile
, "%s in preprocessing directive",
739 c
== '\f' ? "formfeed" : "vertical tab");
745 /* \r is a backslash-newline marker if !has_escapes, and
746 a deletable-whitespace or no-reexpansion marker otherwise. */
747 if (CPP_BUFFER (pfile
)->has_escapes
)
755 CPP_BUMP_LINE (pfile
);
757 else if (c
== '/' || c
== '-')
759 c
= skip_comment (pfile
, c
);
769 null_warning (pfile
, null_count
);
772 /* Read and discard the rest of the current line. */
775 _cpp_skip_rest_of_line (pfile
)
789 if (! CPP_BUFFER (pfile
)->has_escapes
)
790 CPP_BUMP_LINE (pfile
);
795 skip_string (pfile
, c
);
800 skip_comment (pfile
, c
);
805 if (CPP_PEDANTIC (pfile
))
806 cpp_pedwarn (pfile
, "%s in preprocessing directive",
807 c
== '\f' ? "formfeed" : "vertical tab");
814 /* Parse an identifier starting with C. */
817 _cpp_parse_name (pfile
, c
)
829 if (c
== '$' && CPP_PEDANTIC (pfile
))
830 cpp_pedwarn (pfile
, "`$' in identifier");
832 CPP_RESERVE(pfile
, 2); /* One more for final NUL. */
833 CPP_PUTC_Q (pfile
, c
);
841 /* Parse and skip over a string starting with C. A single quoted
842 string is treated like a double -- some programs (e.g., troff) are
843 perverse this way. (However, a single quoted string is not allowed
844 to extend over multiple lines.) */
846 skip_string (pfile
, c
)
850 unsigned int start_line
, start_column
;
851 unsigned int null_count
= 0;
853 start_line
= CPP_BUF_LINE (CPP_BUFFER (pfile
));
854 start_column
= CPP_BUF_COL (CPP_BUFFER (pfile
));
861 cpp_error_with_line (pfile
, start_line
, start_column
,
862 "unterminated string or character constant");
863 if (pfile
->multiline_string_line
!= start_line
864 && pfile
->multiline_string_line
!= 0)
865 cpp_error_with_line (pfile
,
866 pfile
->multiline_string_line
, -1,
867 "possible real start of unterminated constant");
868 pfile
->multiline_string_line
= 0;
876 CPP_BUMP_LINE (pfile
);
877 /* In Fortran and assembly language, silently terminate
878 strings of either variety at end of line. This is a
879 kludge around not knowing where comments are in these
881 if (CPP_OPTION (pfile
, lang_fortran
)
882 || CPP_OPTION (pfile
, lang_asm
))
887 /* Character constants may not extend over multiple lines.
888 In Standard C, neither may strings. We accept multiline
889 strings as an extension. */
892 cpp_error_with_line (pfile
, start_line
, start_column
,
893 "unterminated character constant");
897 if (CPP_PEDANTIC (pfile
) && pfile
->multiline_string_line
== 0)
898 cpp_pedwarn_with_line (pfile
, start_line
, start_column
,
899 "string constant runs past end of line");
900 if (pfile
->multiline_string_line
== 0)
901 pfile
->multiline_string_line
= start_line
;
905 if (CPP_BUFFER (pfile
)->has_escapes
)
907 cpp_ice (pfile
, "\\r escape inside string constant");
911 /* Backslash newline is replaced by nothing at all. */
912 CPP_BUMP_LINE (pfile
);
929 cpp_warning (pfile
, "null character in string or character constant");
930 else if (null_count
> 1)
931 cpp_warning (pfile
, "null characters in string or character constant");
934 /* Parse a string and copy it to the output. */
937 parse_string (pfile
, c
)
941 const U_CHAR
*start
= CPP_BUFFER (pfile
)->cur
; /* XXX Layering violation */
944 skip_string (pfile
, c
);
946 limit
= CPP_BUFFER (pfile
)->cur
;
947 CPP_RESERVE (pfile
, limit
- start
+ 2);
948 CPP_PUTC_Q (pfile
, c
);
949 for (; start
< limit
; start
++)
951 CPP_PUTC_Q (pfile
, *start
);
954 /* Read an assertion into the token buffer, converting to
955 canonical form: `#predicate(a n swe r)' The next non-whitespace
956 character to read should be the first letter of the predicate.
957 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
958 with answer (see callers for why). In case of 0, an error has been
961 _cpp_parse_assertion (pfile
)
965 _cpp_skip_hspace (pfile
);
969 cpp_error (pfile
, "assertion without predicate");
972 else if (! is_idstart(c
))
974 cpp_error (pfile
, "assertion predicate is not an identifier");
977 CPP_PUTC(pfile
, '#');
979 _cpp_parse_name (pfile
, c
);
984 if (is_hspace(c
) || c
== '\r')
985 _cpp_skip_hspace (pfile
);
991 CPP_PUTC(pfile
, '(');
994 while ((c
= GETC()) != ')')
1000 CPP_PUTC(pfile
, ' ');
1004 else if (c
== '\n' || c
== EOF
)
1006 if (c
== '\n') FORWARD(-1);
1007 cpp_error (pfile
, "un-terminated assertion answer");
1011 /* \r cannot be a macro escape here. */
1012 CPP_BUMP_LINE (pfile
);
1015 CPP_PUTC (pfile
, c
);
1020 if (pfile
->limit
[-1] == ' ')
1021 pfile
->limit
[-1] = ')';
1022 else if (pfile
->limit
[-1] == '(')
1024 cpp_error (pfile
, "empty token sequence in assertion");
1028 CPP_PUTC (pfile
, ')');
1033 /* Get the next token, and add it to the text in pfile->token_buffer.
1034 Return the kind of token we got. */
1037 _cpp_lex_token (pfile
)
1041 enum cpp_ttype token
;
1043 if (CPP_BUFFER (pfile
) == NULL
)
1054 if (PEEKC () == '=')
1058 if (CPP_OPTION (pfile
, discard_comments
))
1059 c
= skip_comment (pfile
, c
);
1061 c
= copy_comment (pfile
, c
);
1065 /* Comments are equivalent to spaces.
1066 For -traditional, a comment is equivalent to nothing. */
1067 if (!CPP_OPTION (pfile
, discard_comments
))
1069 else if (CPP_TRADITIONAL (pfile
))
1071 if (pfile
->parsing_define_directive
)
1077 CPP_PUTC (pfile
, c
);
1082 CPP_PUTC (pfile
, c
);
1085 if (pfile
->parsing_if_directive
)
1087 CPP_ADJUST_WRITTEN (pfile
, -1);
1088 if (_cpp_parse_assertion (pfile
))
1089 return CPP_ASSERTION
;
1093 if (pfile
->parsing_define_directive
)
1099 CPP_PUTC (pfile
, c2
);
1101 else if (c2
== '%' && PEEKN (1) == ':')
1103 /* Digraph: "%:" == "#". */
1105 CPP_RESERVE (pfile
, 2);
1106 CPP_PUTC_Q (pfile
, c2
);
1107 CPP_PUTC_Q (pfile
, GETC ());
1115 if (!pfile
->only_seen_white
)
1118 /* Remove the "#" or "%:" from the token buffer. */
1119 CPP_ADJUST_WRITTEN (pfile
, (c
== '#' ? -1 : -2));
1120 return CPP_DIRECTIVE
;
1124 parse_string (pfile
, c
);
1125 return c
== '\'' ? CPP_CHAR
: CPP_STRING
;
1128 if (!CPP_OPTION (pfile
, dollars_in_ident
))
1134 /* Digraph: ":>" == "]". */
1136 || (c2
== ':' && CPP_OPTION (pfile
, cplusplus
)))
1144 if (c2
== c
|| c2
== '=')
1149 /* Digraphs: "%:" == "#", "%>" == "}". */
1154 CPP_RESERVE (pfile
, 2);
1155 CPP_PUTC_Q (pfile
, c
);
1156 CPP_PUTC_Q (pfile
, c2
);
1162 CPP_RESERVE (pfile
, 2);
1163 CPP_PUTC_Q (pfile
, c
);
1164 CPP_PUTC_Q (pfile
, c2
);
1165 return CPP_OPEN_BRACE
;
1167 /* else fall through */
1173 if (PEEKC () == '=')
1181 if (CPP_OPTION (pfile
, chill
))
1182 goto comment
; /* Chill style comment */
1190 if (CPP_OPTION (pfile
, cplusplus
) && PEEKN (1) == '*')
1192 /* In C++, there's a ->* operator. */
1194 CPP_RESERVE (pfile
, 4);
1195 CPP_PUTC_Q (pfile
, c
);
1196 CPP_PUTC_Q (pfile
, GETC ());
1197 CPP_PUTC_Q (pfile
, GETC ());
1205 if (pfile
->parsing_include_directive
)
1209 CPP_PUTC (pfile
, c
);
1213 if (c
== '\n' || c
== EOF
)
1216 "missing '>' in `#include <FILENAME>'");
1221 if (!CPP_BUFFER (pfile
)->has_escapes
)
1223 /* Backslash newline is replaced by nothing. */
1224 CPP_ADJUST_WRITTEN (pfile
, -1);
1225 CPP_BUMP_LINE (pfile
);
1229 /* We might conceivably get \r- or \r<space> in
1230 here. Just delete 'em. */
1232 if (d
!= '-' && d
!= ' ')
1233 cpp_ice (pfile
, "unrecognized escape \\r%c", d
);
1234 CPP_ADJUST_WRITTEN (pfile
, -1);
1240 /* Digraphs: "<%" == "{", "<:" == "[". */
1245 CPP_RESERVE (pfile
, 2);
1246 CPP_PUTC_Q (pfile
, c
);
1247 CPP_PUTC_Q (pfile
, c2
);
1248 return CPP_CLOSE_BRACE
;
1252 /* else fall through */
1257 /* GNU C++ supports MIN and MAX operators <? and >?. */
1258 if (c2
!= c
&& (!CPP_OPTION (pfile
, cplusplus
) || c2
!= '?'))
1261 CPP_RESERVE (pfile
, 3);
1262 CPP_PUTC_Q (pfile
, c
);
1263 CPP_PUTC_Q (pfile
, c2
);
1264 if (PEEKC () == '=')
1265 CPP_PUTC_Q (pfile
, GETC ());
1272 CPP_PUTC (pfile
, c
);
1277 /* In C++ there's a .* operator. */
1278 if (CPP_OPTION (pfile
, cplusplus
) && c2
== '*')
1281 if (c2
== '.' && PEEKN(1) == '.')
1283 CPP_RESERVE (pfile
, 3);
1284 CPP_PUTC_Q (pfile
, '.');
1285 CPP_PUTC_Q (pfile
, '.');
1286 CPP_PUTC_Q (pfile
, '.');
1288 return CPP_ELLIPSIS
;
1293 CPP_RESERVE (pfile
, 2);
1294 CPP_PUTC_Q (pfile
, c
);
1295 CPP_PUTC_Q (pfile
, GETC ());
1300 if ((c2
== '\'' || c2
== '\"') && !CPP_TRADITIONAL (pfile
))
1302 CPP_PUTC (pfile
, c
);
1304 parse_string (pfile
, c
);
1305 return c
== '\'' ? CPP_WCHAR
: CPP_WSTRING
;
1309 case '0': case '1': case '2': case '3': case '4':
1310 case '5': case '6': case '7': case '8': case '9':
1315 CPP_RESERVE (pfile
, 2);
1316 CPP_PUTC_Q (pfile
, c
);
1320 if (!is_numchar(c
) && c
!= '.'
1321 && ((c2
!= 'e' && c2
!= 'E'
1322 && ((c2
!= 'p' && c2
!= 'P')
1323 || CPP_OPTION (pfile
, c89
)))
1324 || (c
!= '+' && c
!= '-')))
1330 case 'b': case 'c': case 'd': case 'h': case 'o':
1331 case 'B': case 'C': case 'D': case 'H': case 'O':
1332 if (CPP_OPTION (pfile
, chill
) && PEEKC () == '\'')
1334 CPP_RESERVE (pfile
, 2);
1335 CPP_PUTC_Q (pfile
, c
);
1336 CPP_PUTC_Q (pfile
, '\'');
1342 goto chill_number_eof
;
1345 CPP_PUTC (pfile
, c
);
1349 CPP_RESERVE (pfile
, 2);
1350 CPP_PUTC_Q (pfile
, c
);
1363 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1364 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1365 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1366 case 'x': case 'y': case 'z':
1367 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1368 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1369 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1372 _cpp_parse_name (pfile
, c
);
1375 case ' ': case '\t': case '\v': case '\f': case '\0':
1384 CPP_PUTC (pfile
, c
);
1386 if (c
== EOF
|| !is_hspace(c
))
1391 null_warning (pfile
, null_count
);
1396 if (CPP_BUFFER (pfile
)->has_escapes
)
1401 if (pfile
->output_escapes
)
1402 CPP_PUTS (pfile
, "\r-", 2);
1403 _cpp_parse_name (pfile
, GETC ());
1408 /* "\r " means a space, but only if necessary to prevent
1409 accidental token concatenation. */
1410 CPP_RESERVE (pfile
, 2);
1411 if (pfile
->output_escapes
)
1412 CPP_PUTC_Q (pfile
, '\r');
1413 CPP_PUTC_Q (pfile
, c
);
1418 cpp_ice (pfile
, "unrecognized escape \\r%c", c
);
1424 /* Backslash newline is ignored. */
1425 if (!ACTIVE_MARK_P (pfile
))
1426 CPP_BUMP_LINE (pfile
);
1431 CPP_PUTC (pfile
, c
);
1434 case '(': token
= CPP_OPEN_PAREN
; goto char1
;
1435 case ')': token
= CPP_CLOSE_PAREN
; goto char1
;
1436 case '{': token
= CPP_OPEN_BRACE
; goto char1
;
1437 case '}': token
= CPP_CLOSE_BRACE
; goto char1
;
1438 case ',': token
= CPP_COMMA
; goto char1
;
1439 case ';': token
= CPP_SEMICOLON
; goto char1
;
1445 CPP_PUTC (pfile
, c
);
1450 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1451 Caller is expected to have checked no_macro_expand. */
1453 maybe_macroexpand (pfile
, written
)
1457 U_CHAR
*macro
= pfile
->token_buffer
+ written
;
1458 size_t len
= CPP_WRITTEN (pfile
) - written
;
1459 HASHNODE
*hp
= _cpp_lookup (pfile
, macro
, len
);
1461 /* _cpp_lookup never returns null. */
1462 if (hp
->type
== T_VOID
)
1464 if (hp
->disabled
|| hp
->type
== T_IDENTITY
)
1466 if (pfile
->output_escapes
)
1468 /* Insert a no-reexpand marker before IDENT. */
1469 CPP_RESERVE (pfile
, 2);
1470 CPP_ADJUST_WRITTEN (pfile
, 2);
1471 macro
= pfile
->token_buffer
+ written
;
1473 memmove (macro
+ 2, macro
, len
);
1479 if (hp
->type
== T_EMPTY
)
1481 /* Special case optimization: macro expands to nothing. */
1482 CPP_SET_WRITTEN (pfile
, written
);
1483 CPP_PUTC_Q (pfile
, ' ');
1487 /* If macro wants an arglist, verify that a '(' follows. */
1488 if (hp
->type
== T_FMACRO
)
1490 int macbuf_whitespace
= 0;
1493 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile
)))
1495 const U_CHAR
*point
= CPP_BUFFER (pfile
)->cur
;
1498 _cpp_skip_hspace (pfile
);
1505 if (point
!= CPP_BUFFER (pfile
)->cur
)
1506 macbuf_whitespace
= 1;
1510 goto not_macro_call
;
1511 cpp_pop_buffer (pfile
);
1514 CPP_SET_MARK (pfile
);
1517 _cpp_skip_hspace (pfile
);
1524 CPP_GOTO_MARK (pfile
);
1529 if (macbuf_whitespace
)
1530 CPP_PUTC (pfile
, ' ');
1536 /* This is now known to be a macro call.
1537 Expand the macro, reading arguments as needed,
1538 and push the expansion on the input stack. */
1539 _cpp_macroexpand (pfile
, hp
);
1540 CPP_SET_WRITTEN (pfile
, written
);
1544 /* Complain about \v or \f in a preprocessing directive (constraint
1545 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1547 pedantic_whitespace (pfile
, p
, len
)
1555 cpp_pedwarn (pfile
, "vertical tab in preprocessing directive");
1556 else if (*p
== '\f')
1557 cpp_pedwarn (pfile
, "form feed in preprocessing directive");
1565 cpp_get_token (pfile
)
1568 enum cpp_ttype token
;
1569 long written
= CPP_WRITTEN (pfile
);
1572 token
= _cpp_lex_token (pfile
);
1577 pfile
->potential_control_macro
= 0;
1578 pfile
->only_seen_white
= 0;
1582 if (pfile
->only_seen_white
== 0)
1583 pfile
->only_seen_white
= 1;
1584 CPP_BUMP_LINE (pfile
);
1592 pfile
->potential_control_macro
= 0;
1593 if (_cpp_handle_directive (pfile
))
1594 return CPP_DIRECTIVE
;
1595 pfile
->only_seen_white
= 0;
1596 CPP_PUTC (pfile
, '#');
1600 pfile
->potential_control_macro
= 0;
1601 pfile
->only_seen_white
= 0;
1602 if (! pfile
->no_macro_expand
1603 && maybe_macroexpand (pfile
, written
))
1608 if (CPP_BUFFER (pfile
) == NULL
)
1610 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile
)))
1612 cpp_pop_buffer (pfile
);
1615 cpp_pop_buffer (pfile
);
1620 /* Like cpp_get_token, but skip spaces and comments. */
1623 cpp_get_non_space_token (pfile
)
1626 int old_written
= CPP_WRITTEN (pfile
);
1629 enum cpp_ttype token
= cpp_get_token (pfile
);
1630 if (token
!= CPP_COMMENT
&& token
!= CPP_HSPACE
&& token
!= CPP_VSPACE
)
1632 CPP_SET_WRITTEN (pfile
, old_written
);
1636 /* Like cpp_get_token, except that it does not execute directives,
1637 does not consume vertical space, and discards horizontal space. */
1639 _cpp_get_directive_token (pfile
)
1643 enum cpp_ttype token
;
1647 at_bol
= (CPP_BUFFER (pfile
)->cur
== CPP_BUFFER (pfile
)->line_base
);
1648 old_written
= CPP_WRITTEN (pfile
);
1649 token
= _cpp_lex_token (pfile
);
1656 /* Put it back and return VSPACE. */
1658 CPP_ADJUST_WRITTEN (pfile
, -1);
1662 /* The purpose of this rather strange check is to prevent pedantic
1663 warnings for ^L in an #ifdefed out block. */
1664 if (CPP_PEDANTIC (pfile
) && ! at_bol
)
1665 pedantic_whitespace (pfile
, pfile
->token_buffer
+ old_written
,
1666 CPP_WRITTEN (pfile
) - old_written
);
1667 CPP_SET_WRITTEN (pfile
, old_written
);
1672 /* Don't execute the directive, but don't smash it to OTHER either. */
1673 CPP_PUTC (pfile
, '#');
1674 return CPP_DIRECTIVE
;
1677 if (! pfile
->no_macro_expand
1678 && maybe_macroexpand (pfile
, old_written
))
1683 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile
)))
1685 cpp_pop_buffer (pfile
);
1689 /* This can happen for files that don't end with a newline,
1690 and for cpp_define and friends. Pretend they do, so
1691 callers don't have to deal. A warning will be issued by
1692 someone else, if necessary. */
1697 /* Determine the current line and column. Used only by read_and_prescan. */
1699 find_position (start
, limit
, linep
)
1702 unsigned long *linep
;
1704 unsigned long line
= *linep
;
1705 U_CHAR
*lbase
= start
;
1706 while (start
< limit
)
1708 U_CHAR ch
= *start
++;
1709 if (ch
== '\n' || ch
== '\r')
1719 /* The following table is used by _cpp_read_and_prescan. If we have
1720 designated initializers, it can be constant data; otherwise, it is
1721 set up at runtime by _cpp_init_input_buffer. */
1724 #define UCHAR_MAX 255 /* assume 8-bit bytes */
1727 #if (GCC_VERSION >= 2007)
1728 #define init_chartab() /* nothing */
1729 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1731 #define s(p, v) [p] = v,
1733 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1734 static void init_chartab PARAMS ((void)) { \
1735 unsigned char *x = chartab;
1737 #define s(p, v) x[p] = v;
1740 /* Table of characters that can't be handled in the inner loop.
1741 Also contains the mapping between trigraph third characters and their
1743 #define SPECCASE_CR 1
1744 #define SPECCASE_BACKSLASH 2
1745 #define SPECCASE_QUESTION 3
1748 s('\r', SPECCASE_CR
)
1749 s('\\', SPECCASE_BACKSLASH
)
1750 s('?', SPECCASE_QUESTION
)
1752 s('=', '#') s(')', ']') s('!', '|')
1753 s('(', '[') s('\'', '^') s('>', '}')
1754 s('/', '\\') s('<', '{') s('-', '~')
1761 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1762 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1764 /* Read the entire contents of file DESC into buffer BUF. LEN is how
1765 much memory to allocate initially; more will be allocated if
1766 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1767 canonical form (\n). If enabled, convert and/or warn about
1768 trigraphs. Convert backslash-newline to a one-character escape
1769 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1770 token). If there is no newline at the end of the file, add one and
1771 warn. Returns -1 on failure, or the actual length of the data to
1774 This function does a lot of work, and can be a serious performance
1775 bottleneck. It has been tuned heavily; make sure you understand it
1776 before hacking. The common case - no trigraphs, Unix style line
1777 breaks, backslash-newline set off by whitespace, newline at EOF -
1778 has been optimized at the expense of the others. The performance
1779 penalty for DOS style line breaks (\r\n) is about 15%.
1781 Warnings lose particularly heavily since we have to determine the
1782 line number, which involves scanning from the beginning of the file
1783 or from the last warning. The penalty for the absence of a newline
1784 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1786 If your file has more than one kind of end-of-line marker, you
1787 will get messed-up line numbering.
1789 So that the cases of the switch statement do not have to concern
1790 themselves with the complications of reading beyond the end of the
1791 buffer, the buffer is guaranteed to have at least 3 characters in
1792 it (or however many are left in the file, if less) on entry to the
1793 switch. This is enough to handle trigraphs and the "\\\n\r" and
1796 The end of the buffer is marked by a '\\', which, being a special
1797 character, guarantees we will exit the fast-scan loops and perform
1801 _cpp_read_and_prescan (pfile
, fp
, desc
, len
)
1807 U_CHAR
*buf
= (U_CHAR
*) xmalloc (len
);
1808 U_CHAR
*ip
, *op
, *line_base
;
1811 unsigned int deferred_newlines
;
1816 deferred_newlines
= 0;
1820 ibase
= pfile
->input_buffer
+ 3;
1822 ip
[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
1826 U_CHAR
*near_buff_end
;
1828 count
= read (desc
, ibase
, pfile
->input_buffer_len
);
1832 ibase
[count
] = '\\'; /* Marks end of buffer */
1835 near_buff_end
= pfile
->input_buffer
+ count
;
1840 size_t delta_line_base
;
1844 This could happen if the file is larger than half the
1845 maximum address space of the machine. */
1848 delta_op
= op
- buf
;
1849 delta_line_base
= line_base
- buf
;
1850 buf
= (U_CHAR
*) xrealloc (buf
, len
);
1851 op
= buf
+ delta_op
;
1852 line_base
= buf
+ delta_line_base
;
1859 /* Allow normal processing of the (at most 2) remaining
1860 characters. The end-of-buffer marker is still present
1861 and prevents false matches within the switch. */
1862 near_buff_end
= ibase
- 1;
1869 /* Deal with \-newline, potentially in the middle of a token. */
1870 if (deferred_newlines
)
1872 if (op
!= buf
&& ! is_space (op
[-1]) && op
[-1] != '\r')
1874 /* Previous was not white space. Skip to white
1875 space, if we can, before outputting the \r's */
1877 while (ip
[span
] != ' '
1880 && NORMAL(ip
[span
]))
1882 memcpy (op
, ip
, span
);
1885 if (! NORMAL(ip
[0]))
1888 while (deferred_newlines
)
1889 deferred_newlines
--, *op
++ = '\r';
1892 /* Copy as much as we can without special treatment. */
1894 while (NORMAL (ip
[span
])) span
++;
1895 memcpy (op
, ip
, span
);
1900 if (ip
> near_buff_end
) /* Do we have enough chars? */
1902 switch (chartab
[*ip
++])
1904 case SPECCASE_CR
: /* \r */
1913 case SPECCASE_BACKSLASH
: /* \ */
1916 deferred_newlines
++;
1918 if (*ip
== '\r') ip
++;
1920 else if (*ip
== '\r')
1922 deferred_newlines
++;
1924 if (*ip
== '\n') ip
++;
1930 case SPECCASE_QUESTION
: /* ? */
1934 *op
++ = '?'; /* Normal non-trigraph case */
1943 if (CPP_OPTION (pfile
, warn_trigraphs
))
1946 line_base
= find_position (line_base
, op
, &line
);
1947 col
= op
- line_base
+ 1;
1948 if (CPP_OPTION (pfile
, trigraphs
))
1949 cpp_warning_with_line (pfile
, line
, col
,
1950 "trigraph ??%c converted to %c", d
, t
);
1952 cpp_warning_with_line (pfile
, line
, col
,
1953 "trigraph ??%c ignored", d
);
1957 if (CPP_OPTION (pfile
, trigraphs
))
1959 op
[-1] = t
; /* Overwrite '?' */
1964 goto do_speccase
; /* May need buffer refill */
1976 /* Copy previous char plus unprocessed (at most 2) chars
1977 to beginning of buffer, refill it with another
1978 read(), and continue processing */
1979 memmove (ip
- count
- 1, ip
- 1, 4 - (ip
- near_buff_end
));
1989 line_base
= find_position (line_base
, op
, &line
);
1990 col
= op
- line_base
+ 1;
1991 cpp_warning_with_line (pfile
, line
, col
, "no newline at end of file");
1992 if (offset
+ 1 > len
)
1995 if (offset
+ 1 > len
)
1997 buf
= (U_CHAR
*) xrealloc (buf
, len
);
2003 fp
->buf
= ((len
- offset
< 20) ? buf
: (U_CHAR
*)xrealloc (buf
, op
- buf
));
2007 cpp_notice (pfile
, "%s is too large (>%lu bytes)", fp
->ihash
->name
,
2008 (unsigned long)offset
);
2013 cpp_error_from_errno (pfile
, fp
->ihash
->name
);
2018 /* Allocate pfile->input_buffer, and initialize chartab[]
2019 if it hasn't happened already. */
2022 _cpp_init_input_buffer (pfile
)
2028 init_token_list (pfile
, &pfile
->directbuf
, 0);
2030 /* Determine the appropriate size for the input buffer. Normal C
2031 source files are smaller than eight K. */
2032 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2033 address arithmetic all the time, and 3 for pushback during buffer
2034 refill, in case there's a potential trigraph or end-of-line
2035 digraph at the end of a block. */
2037 tmp
= (U_CHAR
*) xmalloc (8192 + 1 + 3);
2038 pfile
->input_buffer
= tmp
;
2039 pfile
->input_buffer_len
= 8192;
2043 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2044 and extending for LEN characters to the NUL-terminated string
2045 STRING. Typical usage:
2047 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2053 cpp_idcmp (token
, len
, string
)
2054 const U_CHAR
*token
;
2058 size_t len2
= strlen (string
);
2061 if ((r
= memcmp (token
, string
, MIN (len
, len2
))))
2064 /* The longer of the two strings sorts after the shorter. */
2067 else if (len
< len2
)
2075 /* Lexing algorithm.
2077 The original lexer in cpplib was made up of two passes: a first pass
2078 that replaced trigraphs and deleted esacped newlines, and a second
2079 pass that tokenized the result of the first pass. Tokenisation was
2080 performed by peeking at the next character in the input stream. For
2081 example, if the input stream contained "!=", the handler for the !
2082 character would peek at the next character, and if it were a '='
2083 would skip over it, and return a "!=" token, otherwise it would
2084 return just the "!" token.
2086 To implement a single-pass lexer, this peeking ahead is unworkable.
2087 An arbitrary number of escaped newlines, and trigraphs (in particular
2088 ??/ which translates to the escape \), could separate the '!' and '='
2089 in the input stream, yet the next token is still a "!=".
2091 Suppose instead that we lex by one logical line at a time, producing
2092 a token list or stack for each logical line, and when seeing the '!'
2093 push a CPP_NOT token on the list. Then if the '!' is part of a
2094 longer token ("!=") we know we must see the remainder of the token by
2095 the time we reach the end of the logical line. Thus we can have the
2096 '=' handler look at the previous token (at the end of the list / top
2097 of the stack) and see if it is a "!" token, and if so, instead of
2098 pushing a "=" token revise the existing token to be a "!=" token.
2100 This works in the presence of escaped newlines, because the '\' would
2101 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2102 newline ('\n' or '\r') handler looks at the token at the top of the
2103 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2104 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2105 the '=' handler would never see any intervening escaped newlines.
2107 To make trigraphs work in this context, as in precedence trigraphs
2108 are highest and converted before anything else, the '?' handler does
2109 lookahead to see if it is a trigraph, and if so skips the trigraph
2110 and pushes the token it represents onto the top of the stack. This
2111 also works in the particular case of a CPP_BACKSLASH trigraph.
2113 To the preprocessor, whitespace is only significant to the point of
2114 knowing whether whitespace precedes a particular token. For example,
2115 the '=' handler needs to know whether there was whitespace between it
2116 and a "!" token on the top of the stack, to make the token conversion
2117 decision correctly. So each token has a PREV_WHITESPACE flag to
2118 indicate this - the standard permits consecutive whitespace to be
2119 regarded as a single space. The compiler front ends are not
2120 interested in whitespace at all; they just require a token stream.
2121 Another place where whitespace is significant to the preprocessor is
2122 a #define statment - if there is whitespace between the macro name
2123 and an initial "(" token the macro is "object-like", otherwise it is
2124 a function-like macro that takes arguments.
2126 However, all is not rosy. Parsing of identifiers, numbers, comments
2127 and strings becomes trickier because of the possibility of raw
2128 trigraphs and escaped newlines in the input stream.
2130 The trigraphs are three consecutive characters beginning with two
2131 question marks. A question mark is not valid as part of a number or
2132 identifier, so parsing of a number or identifier terminates normally
2133 upon reaching it, returning to the mainloop which handles the
2134 trigraph just like it would in any other position. Similarly for the
2135 backslash of a backslash-newline combination. So we just need the
2136 escaped-newline dropper in the mainloop to check if the token on the
2137 top of the stack after dropping the escaped newline is a number or
2138 identifier, and if so to continue the processing it as if nothing had
2141 For strings, we replace trigraphs whenever we reach a quote or
2142 newline, because there might be a backslash trigraph escaping them.
2143 We need to be careful that we start trigraph replacing from where we
2144 left off previously, because it is possible for a first scan to leave
2145 "fake" trigraphs that a second scan would pick up as real (e.g. the
2146 sequence "????/\n=" would find a fake ??= trigraph after removing the
2149 For line comments, on reaching a newline we scan the previous
2150 character(s) to see if it escaped, and continue if it is. Block
2151 comments ignore everything and just focus on finding the comment
2152 termination mark. The only difficult thing, and it is surprisingly
2153 tricky, is checking if an asterisk precedes the final slash since
2154 they could be separated by escaped newlines. If the preprocessor is
2155 invoked with the output comments option, we don't bother removing
2156 escaped newlines and replacing trigraphs for output.
2158 Finally, numbers can begin with a period, which is pushed initially
2159 as a CPP_DOT token in its own right. The digit handler checks if the
2160 previous token was a CPP_DOT not separated by whitespace, and if so
2161 pops it off the stack and pushes a period into the number's buffer
2162 before calling the number parser.
2166 static void expand_comment_space
PARAMS ((cpp_toklist
*));
2167 void init_trigraph_map
PARAMS ((void));
2168 static unsigned char* trigraph_replace
PARAMS ((cpp_reader
*, unsigned char *,
2170 static const unsigned char *backslash_start
PARAMS ((cpp_reader
*,
2171 const unsigned char *));
2172 static int skip_block_comment
PARAMS ((cpp_reader
*));
2173 static int skip_line_comment
PARAMS ((cpp_reader
*));
2174 static void skip_whitespace
PARAMS ((cpp_reader
*, int));
2175 static void parse_name
PARAMS ((cpp_reader
*, cpp_toklist
*, cpp_name
*));
2176 static void parse_number
PARAMS ((cpp_reader
*, cpp_toklist
*, cpp_name
*));
2177 static void parse_string
PARAMS ((cpp_reader
*, cpp_toklist
*, cpp_name
*,
2179 static int trigraph_ok
PARAMS ((cpp_reader
*, const unsigned char *));
2180 static void copy_comment
PARAMS ((cpp_toklist
*, const unsigned char *,
2181 unsigned int, unsigned int, unsigned int));
2182 void _cpp_lex_line
PARAMS ((cpp_reader
*, cpp_toklist
*));
2184 static void _cpp_output_list
PARAMS ((cpp_reader
*, cpp_toklist
*));
2186 unsigned int spell_string
PARAMS ((unsigned char *, cpp_toklist
*,
2188 unsigned int spell_comment
PARAMS ((unsigned char *, cpp_toklist
*,
2190 unsigned int spell_name
PARAMS ((unsigned char *, cpp_toklist
*,
2193 typedef unsigned int (* speller
) PARAMS ((unsigned char *, cpp_toklist
*,
2196 /* Macros on a cpp_name. */
2197 #define INIT_NAME(list, name) \
2198 do {(name).len = 0; (name).offset = (list)->name_used;} while (0)
2200 #define IS_DIRECTIVE(list) (TOK_TYPE (list, 0) == CPP_HASH)
2201 #define COLUMN(cur) ((cur) - buffer->line_base)
2203 /* Maybe put these in the ISTABLE eventually. */
2204 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
2205 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
2207 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
2208 character, if any, is in buffer. */
2209 #define handle_newline(cur, limit, c) \
2211 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
2213 CPP_BUMP_LINE_CUR (pfile, (cur)); \
2216 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
2217 #define PREV_TOKEN_TYPE (cur_token[-1].type)
2219 #define SPELL_TEXT 0
2220 #define SPELL_HANDLER 1
2221 #define SPELL_CHAR 2
2222 #define SPELL_NONE 3
2225 #define T(e, s) {SPELL_TEXT, s},
2226 #define H(e, s) {SPELL_HANDLER, s},
2227 #define C(e, s) {SPELL_CHAR, s},
2228 #define N(e, s) {SPELL_NONE, s},
2229 #define E(e, s) {SPELL_EOL, s},
2231 static const struct token_spelling
2235 } token_spellings
[N_TTYPES
+ 1] = {TTYPE_TABLE
{0, 0} };
2243 static const unsigned char *digraph_spellings
[] = {"%:", "%:%:", "<:",
2247 expand_comment_space (list
)
2250 if (list
->comments_cap
== 0)
2252 list
->comments_cap
= 10;
2253 list
->comments
= (cpp_token
*)
2254 xmalloc (list
->comments_cap
* sizeof (cpp_token
));
2258 list
->comments_cap
*= 2;
2259 list
->comments
= (cpp_token
*)
2260 xrealloc (list
->comments
, list
->comments_cap
);
2265 cpp_free_token_list (list
)
2269 free (list
->comments
);
2270 free (list
->tokens
- 1); /* Backup over dummy token. */
2271 free (list
->namebuf
);
2275 static unsigned char trigraph_map
[256];
2278 init_trigraph_map ()
2280 trigraph_map
['='] = '#';
2281 trigraph_map
['('] = '[';
2282 trigraph_map
[')'] = ']';
2283 trigraph_map
['/'] = '\\';
2284 trigraph_map
['\''] = '^';
2285 trigraph_map
['<'] = '{';
2286 trigraph_map
['>'] = '}';
2287 trigraph_map
['!'] = '|';
2288 trigraph_map
['-'] = '~';
2291 /* Call when a trigraph is encountered. It warns if necessary, and
2292 returns true if the trigraph should be honoured. END is the third
2293 character of a trigraph in the input stream. */
2295 trigraph_ok (pfile
, end
)
2297 const unsigned char *end
;
2299 int accept
= CPP_OPTION (pfile
, trigraphs
);
2301 if (CPP_OPTION (pfile
, warn_trigraphs
))
2303 unsigned int col
= end
- 1 - pfile
->buffer
->line_base
;
2305 cpp_warning_with_line (pfile
, pfile
->buffer
->lineno
, col
,
2306 "trigraph ??%c converted to %c",
2307 (int) *end
, (int) trigraph_map
[*end
]);
2309 cpp_warning_with_line (pfile
, pfile
->buffer
->lineno
, col
,
2310 "trigraph ??%c ignored", (int) *end
);
2315 /* Scan a string for trigraphs, warning or replacing them inline as
2316 appropriate. When parsing a string, we must call this routine
2317 before processing a newline character (if trigraphs are enabled),
2318 since the newline might be escaped by a preceding backslash
2319 trigraph sequence. Returns a pointer to the end of the name after
2322 static unsigned char*
2323 trigraph_replace (pfile
, src
, limit
)
2326 unsigned char* limit
;
2328 unsigned char *dest
;
2330 /* Starting with src[1], find two consecutive '?'. The case of no
2331 trigraphs is streamlined. */
2333 for (; src
+ 1 < limit
; src
+= 2)
2338 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2341 else if (src
+ 2 == limit
|| src
[1] != '?')
2344 /* Check if it really is a trigraph. */
2345 if (trigraph_map
[src
[2]] == 0)
2349 goto trigraph_found
;
2353 /* Now we have a trigraph, we need to scan the remaining buffer, and
2354 copy-shifting its contents left if replacement is enabled. */
2355 for (; src
+ 2 < limit
; dest
++, src
++)
2356 if ((*dest
= *src
) == '?' && src
[1] == '?' && trigraph_map
[src
[2]])
2360 if (trigraph_ok (pfile
, pfile
->buffer
->cur
- (limit
- src
)))
2361 *dest
= trigraph_map
[*src
];
2364 /* Copy remaining (at most 2) characters. */
2370 /* If CUR is a backslash or the end of a trigraphed backslash, return
2371 a pointer to its beginning, otherwise NULL. We don't read beyond
2372 the buffer start, because there is the start of the comment in the
2374 static const unsigned char *
2375 backslash_start (pfile
, cur
)
2377 const unsigned char *cur
;
2381 if (cur
[0] == '/' && cur
[-1] == '?' && cur
[-2] == '?'
2382 && trigraph_ok (pfile
, cur
))
2387 /* Skip a C-style block comment. This is probably the trickiest
2388 handler. We find the end of the comment by seeing if an asterisk
2389 is before every '/' we encounter. The nasty complication is that a
2390 previous asterisk may be separated by one or more escaped newlines.
2391 Returns non-zero if comment terminated by EOF, zero otherwise. */
2393 skip_block_comment (pfile
)
2396 cpp_buffer
*buffer
= pfile
->buffer
;
2397 const unsigned char *char_after_star
= 0;
2398 register const unsigned char *cur
= buffer
->cur
;
2401 /* Inner loop would think the comment has ended if the first comment
2402 character is a '/'. Avoid this and keep the inner loop clean by
2403 skipping such a character. */
2404 if (cur
< buffer
->rlimit
&& cur
[0] == '/')
2407 for (; cur
< buffer
->rlimit
; )
2409 unsigned char c
= *cur
++;
2411 /* People like decorating comments with '*', so check for
2412 '/' instead for efficiency. */
2415 if (cur
[-2] == '*' || cur
- 1 == char_after_star
)
2418 /* Warn about potential nested comments, but not when
2419 the final character inside the comment is a '/'.
2420 Don't bother to get it right across escaped newlines. */
2421 if (CPP_OPTION (pfile
, warn_comments
) && cur
+ 1 < buffer
->rlimit
2422 && cur
[0] == '*' && cur
[1] != '/')
2425 cpp_warning (pfile
, "'/*' within comment");
2428 else if (IS_NEWLINE(c
))
2430 const unsigned char* bslash
= backslash_start (pfile
, cur
- 2);
2432 handle_newline (cur
, buffer
->rlimit
, c
);
2433 /* Work correctly if there is an asterisk before an
2434 arbirtrarily long sequence of escaped newlines. */
2435 if (bslash
&& (bslash
[-1] == '*' || bslash
== char_after_star
))
2436 char_after_star
= cur
;
2438 char_after_star
= 0;
2448 /* Skip a C++ or Chill line comment. Handles escaped newlines.
2449 Returns non-zero if a multiline comment. */
2451 skip_line_comment (pfile
)
2454 cpp_buffer
*buffer
= pfile
->buffer
;
2455 register const unsigned char *cur
= buffer
->cur
;
2458 for (; cur
< buffer
->rlimit
; )
2460 unsigned char c
= *cur
++;
2464 /* Check for a (trigaph?) backslash escaping the newline. */
2465 if (!backslash_start (pfile
, cur
- 2))
2468 handle_newline (cur
, buffer
->rlimit
, c
);
2474 buffer
->cur
= cur
- 1; /* Leave newline for caller. */
2478 /* Skips whitespace, stopping at next non-whitespace character. */
2480 skip_whitespace (pfile
, in_directive
)
2484 cpp_buffer
*buffer
= pfile
->buffer
;
2485 register const unsigned char *cur
= buffer
->cur
;
2486 unsigned short null_count
= 0;
2488 for (; cur
< buffer
->rlimit
; )
2490 unsigned char c
= *cur
++;
2492 if (IS_HSPACE(c
)) /* FIXME: Fix ISTABLE. */
2494 if (!is_space(c
) || IS_NEWLINE (c
)) /* Main loop handles newlines. */
2498 /* Mut be '\f' or '\v' */
2499 else if (in_directive
&& CPP_PEDANTIC (pfile
))
2500 cpp_pedwarn (pfile
, "%s in preprocessing directive",
2501 c
== '\f' ? "formfeed" : "vertical tab");
2506 buffer
->cur
= cur
- 1;
2508 cpp_warning (pfile
, null_count
> 1 ? "embedded null characters ignored"
2509 : "embedded null character ignored");
2512 /* Parse (append) an identifier. */
2514 parse_name (pfile
, list
, name
)
2519 const unsigned char *name_limit
;
2520 unsigned char *namebuf
;
2521 cpp_buffer
*buffer
= pfile
->buffer
;
2522 register const unsigned char *cur
= buffer
->cur
;
2525 name_limit
= list
->namebuf
+ list
->name_cap
;
2526 namebuf
= list
->namebuf
+ list
->name_used
;
2528 for (; cur
< buffer
->rlimit
&& namebuf
< name_limit
; )
2530 unsigned char c
= *namebuf
= *cur
; /* Copy a single char. */
2536 if (c
== '$' && CPP_PEDANTIC (pfile
))
2539 cpp_pedwarn (pfile
, "'$' character in identifier");
2543 /* Run out of name space? */
2544 if (cur
< buffer
->rlimit
)
2546 list
->name_used
= namebuf
- list
->namebuf
;
2547 auto_expand_name_space (list
);
2553 name
->len
= namebuf
- (list
->namebuf
+ name
->offset
);
2554 list
->name_used
= namebuf
- list
->namebuf
;
2557 /* Parse (append) a number. */
2559 #define VALID_SIGN(c, prevc) \
2560 (((c) == '+' || (c) == '-') && \
2561 ((prevc) == 'e' || (prevc) == 'E' \
2562 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2565 parse_number (pfile
, list
, name
)
2570 const unsigned char *name_limit
;
2571 unsigned char *namebuf
;
2572 cpp_buffer
*buffer
= pfile
->buffer
;
2573 register const unsigned char *cur
= buffer
->cur
;
2576 name_limit
= list
->namebuf
+ list
->name_cap
;
2577 namebuf
= list
->namebuf
+ list
->name_used
;
2579 for (; cur
< buffer
->rlimit
&& namebuf
< name_limit
; )
2581 unsigned char c
= *namebuf
= *cur
; /* Copy a single char. */
2583 /* Perhaps we should accept '$' here if we accept it for
2584 identifiers. We know namebuf[-1] is safe, because for c to
2585 be a sign we must have pushed at least one character. */
2586 if (!is_numchar (c
) && c
!= '.' && ! VALID_SIGN (c
, namebuf
[-1]))
2593 /* Run out of name space? */
2594 if (cur
< buffer
->rlimit
)
2596 list
->name_used
= namebuf
- list
->namebuf
;
2597 auto_expand_name_space (list
);
2603 name
->len
= namebuf
- (list
->namebuf
+ name
->offset
);
2604 list
->name_used
= namebuf
- list
->namebuf
;
2607 /* Places a string terminated by an unescaped TERMINATOR into a
2608 cpp_name, which should be expandable and thus at the top of the
2609 list's stack. Handles embedded trigraphs, if necessary, and
2612 Can be used for character constants (terminator = '\''), string
2613 constants ('"'), angled headers ('>') and assertions (')'). */
2616 parse_string (pfile
, list
, name
, terminator
)
2620 unsigned int terminator
;
2622 cpp_buffer
*buffer
= pfile
->buffer
;
2623 register const unsigned char *cur
= buffer
->cur
;
2624 const unsigned char *name_limit
;
2625 unsigned char *namebuf
;
2626 unsigned int null_count
= 0;
2627 int trigraphed_len
= 0;
2630 name_limit
= list
->namebuf
+ list
->name_cap
;
2631 namebuf
= list
->namebuf
+ list
->name_used
;
2633 for (; cur
< buffer
->rlimit
&& namebuf
< name_limit
; )
2635 unsigned int c
= *namebuf
++ = *cur
++; /* Copy a single char. */
2639 else if (c
== terminator
|| IS_NEWLINE (c
))
2641 unsigned char* name_start
= list
->namebuf
+ name
->offset
;
2643 /* Needed for trigraph_replace and multiline string warning. */
2646 /* Scan for trigraphs before checking if backslash-escaped. */
2647 if (CPP_OPTION (pfile
, trigraphs
)
2648 || CPP_OPTION (pfile
, warn_trigraphs
))
2650 namebuf
= trigraph_replace (pfile
, name_start
+ trigraphed_len
,
2652 trigraphed_len
= namebuf
- 2 - (name_start
+ trigraphed_len
);
2653 if (trigraphed_len
< 0)
2657 namebuf
--; /* Drop the newline / terminator from the name. */
2660 /* Drop a backslash newline, and continue. */
2661 if (namebuf
[-1] == '\\')
2663 handle_newline (cur
, buffer
->rlimit
, c
);
2670 /* In Fortran and assembly language, silently terminate
2671 strings of either variety at end of line. This is a
2672 kludge around not knowing where comments are in these
2674 if (CPP_OPTION (pfile
, lang_fortran
)
2675 || CPP_OPTION (pfile
, lang_asm
))
2678 /* Character constants, headers and asserts may not
2679 extend over multiple lines. In Standard C, neither
2680 may strings. We accept multiline strings as an
2681 extension, but not in directives. */
2682 if (terminator
!= '"' || IS_DIRECTIVE (list
))
2685 cur
++; /* Move forwards again. */
2687 if (pfile
->multiline_string_line
== 0)
2689 pfile
->multiline_string_line
= list
->line
;
2690 if (CPP_PEDANTIC (pfile
))
2691 cpp_pedwarn (pfile
, "multi-line string constant");
2695 handle_newline (cur
, buffer
->rlimit
, c
);
2699 unsigned char *temp
;
2701 /* An odd number of consecutive backslashes represents
2702 an escaped terminator. */
2704 while (temp
>= name_start
&& *temp
== '\\')
2707 if ((namebuf
- temp
) & 1)
2714 /* Run out of name space? */
2715 if (cur
< buffer
->rlimit
)
2717 list
->name_used
= namebuf
- list
->namebuf
;
2718 auto_expand_name_space (list
);
2722 /* We may not have trigraph-replaced the input for this code path,
2723 but as the input is in error by being unterminated we don't
2724 bother. Prevent warnings about no newlines at EOF. */
2725 if (IS_NEWLINE(cur
[-1]))
2729 cpp_error (pfile
, "missing terminating %c character", (int) terminator
);
2731 if (terminator
== '\"' && pfile
->multiline_string_line
!= list
->line
2732 && pfile
->multiline_string_line
!= 0)
2734 cpp_error_with_line (pfile
, pfile
->multiline_string_line
, -1,
2735 "possible start of unterminated string literal");
2736 pfile
->multiline_string_line
= 0;
2741 name
->len
= namebuf
- (list
->namebuf
+ name
->offset
);
2742 list
->name_used
= namebuf
- list
->namebuf
;
2745 cpp_warning (pfile
, (null_count
> 1 ? "null characters preserved"
2746 : "null character preserved"));
2749 /* The character C helps us distinguish comment types: '*' = C style,
2750 '-' = Chill-style and '/' = C++ style. For code simplicity, the
2751 stored comment includes any C-style comment terminator. */
2753 copy_comment (list
, from
, len
, tok_no
, type
)
2755 const unsigned char *from
;
2757 unsigned int tok_no
;
2762 if (list
->comments_used
== list
->comments_cap
)
2763 expand_comment_space (list
);
2765 if (list
->name_used
+ len
> list
->name_cap
)
2766 expand_name_space (list
, len
);
2768 comment
= &list
->comments
[list
->comments_used
++];
2769 comment
->type
= type
;
2770 comment
->aux
= tok_no
;
2771 comment
->val
.name
.len
= len
;
2772 comment
->val
.name
.offset
= list
->name_used
;
2774 memcpy (list
->namebuf
+ list
->name_used
, from
, len
);
2775 list
->name_used
+= len
;
2779 * The tokenizer's main loop. Returns a token list, representing a
2780 * logical line in the input file, terminated with a CPP_VSPACE
2781 * token. On EOF, a token list containing the single CPP_EOF token
2784 * Implementation relies almost entirely on lookback, rather than
2785 * looking forwards. This means that tokenization requires just
2786 * a single pass of the file, even in the presence of trigraphs and
2787 * escaped newlines, providing significant performance benefits.
2788 * Trigraph overhead is negligible if they are disabled, and low
2789 * even when enabled.
2792 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
2793 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
2794 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
2795 #define BACKUP_DIGRAPH(ttype) do { \
2796 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
2799 _cpp_lex_line (pfile
, list
)
2803 cpp_token
*cur_token
, *token_limit
;
2804 cpp_buffer
*buffer
= pfile
->buffer
;
2805 register const unsigned char *cur
= buffer
->cur
;
2806 unsigned char flags
= 0;
2809 token_limit
= list
->tokens
+ list
->tokens_cap
;
2810 cur_token
= list
->tokens
+ list
->tokens_used
;
2812 for (; cur
< buffer
->rlimit
&& cur_token
< token_limit
;)
2814 unsigned char c
= *cur
++;
2816 /* Optimize whitespace skipping, in particular the case of a
2817 single whitespace character, as every other token is probably
2818 whitespace. (' ' '\t' '\v' '\f' '\0'). */
2819 if (is_hspace ((unsigned int) c
))
2821 if (c
== '\0' || (cur
< buffer
->rlimit
&& is_hspace (*cur
)))
2823 buffer
->cur
= cur
- (c
== '\0'); /* Get the null warning. */
2824 skip_whitespace (pfile
, IS_DIRECTIVE (list
));
2827 flags
= PREV_WHITESPACE
;
2828 if (cur
== buffer
->rlimit
)
2833 /* Initialize current token. Its type is set in the switch. */
2834 cur_token
->col
= COLUMN (cur
);
2835 cur_token
->flags
= flags
;
2840 case '0': case '1': case '2': case '3': case '4':
2841 case '5': case '6': case '7': case '8': case '9':
2842 /* Prepend an immediately previous CPP_DOT token. */
2843 if (PREV_TOKEN_TYPE
== CPP_DOT
&& IMMED_TOKEN ())
2846 if (list
->name_cap
== list
->name_used
)
2847 auto_expand_name_space (list
);
2849 cur_token
->val
.name
.len
= 1;
2850 cur_token
->val
.name
.offset
= list
->name_used
;
2851 list
->namebuf
[list
->name_used
++] = '.';
2854 INIT_NAME (list
, cur_token
->val
.name
);
2855 cur
--; /* Backup character. */
2859 parse_number (pfile
, list
, &cur_token
->val
.name
);
2862 PUSH_TOKEN (CPP_NUMBER
); /* Number not yet interpreted. */
2867 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2868 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2869 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2870 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2872 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2873 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2874 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2875 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2877 INIT_NAME (list
, cur_token
->val
.name
);
2878 cur
--; /* Backup character. */
2879 cur_token
->type
= CPP_NAME
; /* Identifier, macro etc. */
2883 parse_name (pfile
, list
, &cur_token
->val
.name
);
2886 /* Find handler for newly created / extended directive. */
2887 if (IS_DIRECTIVE (list
) && cur_token
== &list
->tokens
[1])
2888 _cpp_check_directive (list
, cur_token
);
2895 cur_token
->type
= c
== '\'' ? CPP_CHAR
: CPP_STRING
;
2896 /* Do we have a wide string? */
2897 if (cur_token
[-1].type
== CPP_NAME
&& IMMED_TOKEN ()
2898 && cur_token
[-1].val
.name
.len
== 1
2899 && *(list
->namebuf
+ cur_token
[-1].val
.name
.offset
) == 'L'
2900 && !CPP_TRADITIONAL (pfile
))
2902 /* No need for 'L' any more. */
2904 (--cur_token
)->type
= (c
== '\'' ? CPP_WCHAR
: CPP_WSTRING
);
2908 /* Here c is one of ' " > or ). */
2909 INIT_NAME (list
, cur_token
->val
.name
);
2911 parse_string (pfile
, list
, &cur_token
->val
.name
, c
);
2917 cur_token
->type
= CPP_DIV
;
2920 if (PREV_TOKEN_TYPE
== CPP_DIV
)
2922 /* We silently allow C++ comments in system headers,
2923 irrespective of conformance mode, because lots of
2924 broken systems do that and trying to clean it up
2925 in fixincludes is a nightmare. */
2926 if (buffer
->system_header_p
)
2927 goto do_line_comment
;
2928 else if (CPP_OPTION (pfile
, cplusplus_comments
))
2930 if (CPP_OPTION (pfile
, c89
) && CPP_PEDANTIC (pfile
)
2931 && ! buffer
->warned_cplusplus_comments
)
2935 "C++ style comments are not allowed in ISO C89");
2937 "(this will be reported only once per input file)");
2938 buffer
->warned_cplusplus_comments
= 1;
2944 "comment start split across lines");
2945 if (skip_line_comment (pfile
))
2946 cpp_error_with_line (pfile
, list
->line
,
2948 "multi-line comment");
2949 if (!CPP_OPTION (pfile
, discard_comments
))
2950 copy_comment (list
, cur
, buffer
->cur
- cur
,
2951 cur_token
- 1 - list
->tokens
, c
== '/'
2952 ? CPP_CPP_COMMENT
: CPP_CHILL_COMMENT
);
2955 /* Back-up to first '-' or '/'. */
2957 if (!CPP_OPTION (pfile
, traditional
))
2958 flags
= PREV_WHITESPACE
;
2966 cur_token
->type
= CPP_MULT
;
2969 if (PREV_TOKEN_TYPE
== CPP_DIV
)
2974 "comment start '/*' split across lines");
2975 if (skip_block_comment (pfile
))
2976 cpp_error_with_line (pfile
, list
->line
, cur_token
[-1].col
,
2977 "unterminated comment");
2978 else if (buffer
->cur
[-2] != '*')
2980 "comment end '*/' split across lines");
2981 if (!CPP_OPTION (pfile
, discard_comments
))
2982 copy_comment (list
, cur
, buffer
->cur
- cur
,
2983 cur_token
- 1 - list
->tokens
, CPP_C_COMMENT
);
2987 if (!CPP_OPTION (pfile
, traditional
))
2988 flags
= PREV_WHITESPACE
;
2990 else if (CPP_OPTION (pfile
, cplusplus
))
2992 /* In C++, there are .* and ->* operators. */
2993 if (PREV_TOKEN_TYPE
== CPP_DEREF
)
2994 BACKUP_TOKEN (CPP_DEREF_STAR
);
2995 else if (PREV_TOKEN_TYPE
== CPP_DOT
)
2996 BACKUP_TOKEN (CPP_DOT_STAR
);
3004 handle_newline (cur
, buffer
->rlimit
, c
);
3005 if (PREV_TOKEN_TYPE
!= CPP_BACKSLASH
|| !IMMED_TOKEN ())
3007 if (PREV_TOKEN_TYPE
== CPP_BACKSLASH
)
3011 "backslash and newline separated by space");
3013 PUSH_TOKEN (CPP_VSPACE
);
3016 /* Remove the escaped newline. Then continue to process
3017 any interrupted name or number. */
3022 if (cur_token
->type
== CPP_NAME
)
3024 else if (cur_token
->type
== CPP_NUMBER
)
3025 goto continue_number
;
3031 if (IMMED_TOKEN () && PREV_TOKEN_TYPE
== CPP_MINUS
)
3033 if (CPP_OPTION (pfile
, chill
))
3034 goto do_line_comment
;
3035 REVISE_TOKEN (CPP_MINUS_MINUS
);
3038 PUSH_TOKEN (CPP_MINUS
);
3041 /* The digraph flag checking ensures that ## and %:%:
3042 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3045 if (PREV_TOKEN_TYPE
== CPP_HASH
&& IMMED_TOKEN ()
3046 && ((cur_token
->flags
^ cur_token
[-1].flags
) & DIGRAPH
) == 0)
3047 REVISE_TOKEN (CPP_PASTE
);
3049 PUSH_TOKEN (CPP_HASH
);
3053 cur_token
->type
= CPP_COLON
;
3056 if (PREV_TOKEN_TYPE
== CPP_COLON
3057 && CPP_OPTION (pfile
, cplusplus
))
3058 BACKUP_TOKEN (CPP_SCOPE
);
3059 /* Digraph: "<:" is a '[' */
3060 else if (PREV_TOKEN_TYPE
== CPP_LESS
)
3061 BACKUP_DIGRAPH (CPP_OPEN_SQUARE
);
3062 /* Digraph: "%:" is a '#' */
3063 else if (PREV_TOKEN_TYPE
== CPP_MOD
)
3065 (--cur_token
)->flags
|= DIGRAPH
;
3073 if (IMMED_TOKEN () && PREV_TOKEN_TYPE
== CPP_AND
)
3074 REVISE_TOKEN (CPP_AND_AND
);
3076 PUSH_TOKEN (CPP_AND
);
3081 if (IMMED_TOKEN () && PREV_TOKEN_TYPE
== CPP_OR
)
3082 REVISE_TOKEN (CPP_OR_OR
);
3084 PUSH_TOKEN (CPP_OR
);
3088 if (IMMED_TOKEN () && PREV_TOKEN_TYPE
== CPP_PLUS
)
3089 REVISE_TOKEN (CPP_PLUS_PLUS
);
3091 PUSH_TOKEN (CPP_PLUS
);
3095 /* This relies on equidistance of "?=" and "?" tokens. */
3096 if (IMMED_TOKEN () && PREV_TOKEN_TYPE
<= CPP_LAST_EQ
)
3097 REVISE_TOKEN (PREV_TOKEN_TYPE
+ (CPP_EQ_EQ
- CPP_EQ
));
3099 PUSH_TOKEN (CPP_EQ
);
3103 cur_token
->type
= CPP_GREATER
;
3106 if (PREV_TOKEN_TYPE
== CPP_GREATER
)
3107 BACKUP_TOKEN (CPP_RSHIFT
);
3108 else if (PREV_TOKEN_TYPE
== CPP_MINUS
)
3109 BACKUP_TOKEN (CPP_DEREF
);
3110 /* Digraph: ":>" is a ']' */
3111 else if (PREV_TOKEN_TYPE
== CPP_COLON
)
3112 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE
);
3113 /* Digraph: "%>" is a '}' */
3114 else if (PREV_TOKEN_TYPE
== CPP_MOD
)
3115 BACKUP_DIGRAPH (CPP_CLOSE_BRACE
);
3121 if (IMMED_TOKEN () && PREV_TOKEN_TYPE
== CPP_LESS
)
3123 REVISE_TOKEN (CPP_LSHIFT
);
3126 /* Is this the beginning of a header name? */
3127 if (list
->dir_flags
& SYNTAX_INCLUDE
)
3129 c
= '>'; /* Terminator. */
3130 cur_token
->type
= CPP_HEADER_NAME
;
3131 goto do_parse_string
;
3133 PUSH_TOKEN (CPP_LESS
);
3137 /* Digraph: "<%" is a '{' */
3138 cur_token
->type
= CPP_MOD
;
3139 if (IMMED_TOKEN () && PREV_TOKEN_TYPE
== CPP_LESS
)
3140 BACKUP_DIGRAPH (CPP_OPEN_BRACE
);
3145 /* Is this the beginning of an assertion string? */
3146 if (list
->dir_flags
& SYNTAX_ASSERT
)
3148 c
= ')'; /* Terminator. */
3149 cur_token
->type
= CPP_ASSERTION
;
3150 goto do_parse_string
;
3152 PUSH_TOKEN (CPP_OPEN_PAREN
);
3156 if (cur
+ 1 < buffer
->rlimit
&& *cur
== '?'
3157 && trigraph_map
[cur
[1]] && trigraph_ok (pfile
, cur
+ 1))
3159 /* Handle trigraph. */
3163 case '(': goto make_open_square
;
3164 case ')': goto make_close_square
;
3165 case '<': goto make_open_brace
;
3166 case '>': goto make_close_brace
;
3167 case '=': goto make_hash
;
3168 case '!': goto make_or
;
3169 case '-': goto make_complement
;
3170 case '/': goto make_backslash
;
3171 case '\'': goto make_xor
;
3174 if (IMMED_TOKEN () && CPP_OPTION (pfile
, cplusplus
))
3176 /* GNU C++ defines <? and >? operators. */
3177 if (PREV_TOKEN_TYPE
== CPP_LESS
)
3179 REVISE_TOKEN (CPP_MIN
);
3182 else if (PREV_TOKEN_TYPE
== CPP_GREATER
)
3184 REVISE_TOKEN (CPP_MAX
);
3188 PUSH_TOKEN (CPP_QUERY
);
3192 if (PREV_TOKEN_TYPE
== CPP_DOT
&& cur_token
[-2].type
== CPP_DOT
3194 && !(cur_token
[-1].flags
& PREV_WHITESPACE
))
3197 PUSH_TOKEN (CPP_ELLIPSIS
);
3200 PUSH_TOKEN (CPP_DOT
);
3204 case '~': PUSH_TOKEN (CPP_COMPL
); break;
3206 case '^': PUSH_TOKEN (CPP_XOR
); break;
3208 case '{': PUSH_TOKEN (CPP_OPEN_BRACE
); break;
3210 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE
); break;
3212 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE
); break;
3214 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE
); break;
3216 case '\\': PUSH_TOKEN (CPP_BACKSLASH
); break;
3217 case '!': PUSH_TOKEN (CPP_NOT
); break;
3218 case ',': PUSH_TOKEN (CPP_COMMA
); break;
3219 case ';': PUSH_TOKEN (CPP_SEMICOLON
); break;
3220 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN
); break;
3223 if (CPP_OPTION (pfile
, dollars_in_ident
))
3228 PUSH_TOKEN (CPP_OTHER
);
3233 /* Run out of token space? */
3234 if (cur_token
== token_limit
)
3236 list
->tokens_used
= cur_token
- list
->tokens
;
3237 expand_token_space (list
);
3241 cur_token
->type
= CPP_EOF
;
3242 cur_token
->flags
= flags
;
3244 if (cur_token
!= &list
->tokens
[0])
3246 /* Next call back will get just a CPP_EOF. */
3248 cpp_warning (pfile
, "no newline at end of file");
3249 PUSH_TOKEN (CPP_VSPACE
);
3255 list
->tokens_used
= cur_token
- list
->tokens
;
3257 /* FIXME: take this check out and put it in the caller.
3258 list->directive == 0 indicates an unknown directive (but null
3259 directive is OK). This is the first time we can be sure the
3260 directive is invalid, and thus warn about it, because it might
3261 have been split by escaped newlines. Also, don't complain about
3262 invalid directives in assembly source, we don't know where the
3263 comments are, and # may introduce assembler pseudo-ops. */
3265 if (IS_DIRECTIVE (list
) && list
->dir_handler
== 0
3266 && list
->tokens
[1].type
!= CPP_VSPACE
3267 && !CPP_OPTION (pfile
, lang_asm
))
3268 cpp_error_with_line (pfile
, list
->line
, list
->tokens
[1].col
,
3269 "invalid preprocessing directive");
3272 /* Token spelling functions. Used for output of a preprocessed file,
3273 stringizing and token pasting. They all assume sufficient buffer
3274 is allocated, and return exactly how much they used. */
3276 /* Needs buffer of 3 + len. */
3278 spell_string (buffer
, list
, token
)
3279 unsigned char *buffer
;
3283 unsigned char c
, *orig_buff
= buffer
;
3286 if (token
->type
== CPP_WSTRING
|| token
->type
== CPP_WCHAR
)
3288 c
= token
->type
== CPP_STRING
|| token
->type
== CPP_WSTRING
? '"': '\'';
3291 len
= token
->val
.name
.len
;
3292 memcpy (buffer
, list
->namebuf
+ token
->val
.name
.offset
, len
);
3295 return buffer
- orig_buff
;
3298 /* Needs buffer of len + 2. */
3300 spell_comment (buffer
, list
, token
)
3301 unsigned char *buffer
;
3307 if (token
->type
== CPP_C_COMMENT
)
3312 else if (token
->type
== CPP_CPP_COMMENT
)
3323 len
= token
->val
.name
.len
;
3324 memcpy (buffer
, list
->namebuf
+ token
->val
.name
.offset
, len
);
3329 /* Needs buffer of len. */
3331 spell_name (buffer
, list
, token
)
3332 unsigned char *buffer
;
3338 len
= token
->val
.name
.len
;
3339 memcpy (buffer
, list
->namebuf
+ token
->val
.name
.offset
, len
);
3346 _cpp_lex_file (pfile
)
3352 init_trigraph_map ();
3353 list
= (cpp_toklist
*) xmalloc (sizeof (cpp_toklist
));
3355 for (recycle
= 0; ;)
3357 init_token_list (pfile
, list
, recycle
);
3360 _cpp_lex_line (pfile
, list
);
3361 if (list
->tokens
[0].type
== CPP_EOF
)
3364 if (list
->dir_handler
)
3366 if (list
->dir_handler (pfile
))
3368 list
= (cpp_toklist
*) xmalloc (sizeof (cpp_toklist
));
3373 _cpp_output_list (pfile
, list
);
3377 /* This could be useful to other routines. If you allocate this many
3378 bytes, you have enough room to spell the token. */
3379 #define TOKEN_LEN(token) (4 + (token_spellings[token->type].type == \
3380 SPELL_HANDLER ? token->val.name.len: 0))
3383 _cpp_output_list (pfile
, list
)
3387 unsigned int comment_no
= 0;
3388 cpp_token
*token
, *comment_token
= 0;
3390 if (list
->comments_used
> 0)
3391 comment_token
= list
->tokens
+ list
->comments
[0].aux
;
3393 CPP_RESERVE (pfile
, 2); /* Always have room for " \n". */
3394 for (token
= &list
->tokens
[0];; token
++)
3396 if (token
->flags
& PREV_WHITESPACE
)
3398 /* Output comments if -C. Otherwise a space will do. */
3399 if (token
== comment_token
)
3401 cpp_token
*comment
= &list
->comments
[comment_no
];
3404 CPP_RESERVE (pfile
, 2 + TOKEN_LEN (comment
));
3405 pfile
->limit
+= spell_comment (pfile
->limit
, list
, comment
);
3406 comment_no
++, comment
++;
3407 if (comment_no
== list
->comments_used
)
3409 comment_token
= comment
->aux
+ list
->tokens
;
3411 while (comment_token
== token
);
3414 CPP_PUTC_Q (pfile
, ' ');
3417 CPP_RESERVE (pfile
, 2 + TOKEN_LEN (token
));
3418 switch (token_spellings
[token
->type
].type
)
3422 const unsigned char *spelling
;
3425 if (token
->flags
& DIGRAPH
)
3426 spelling
= digraph_spellings
[token
->type
- CPP_FIRST_DIGRAPH
];
3428 spelling
= token_spellings
[token
->type
].speller
;
3430 while ((c
= *spelling
++) != '\0')
3431 CPP_PUTC_Q (pfile
, c
);
3439 s
= (speller
) token_spellings
[token
->type
].speller
;
3440 pfile
->limit
+= s (pfile
->limit
, list
, token
);
3445 *pfile
->limit
++ = token
->aux
;
3449 CPP_PUTC_Q (pfile
, '\n');
3453 cpp_error (pfile
, "Unwriteable token");