Use new tail-calling mechanism on ARM.
[official-gcc.git] / gcc / cpplex.c
blob78df852670dd4ae62b45af674f7323f6f8658b8a
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "intl.h"
26 #include "cpplib.h"
27 #include "cpphash.h"
29 #define PEEKBUF(BUFFER, N) \
30 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
31 #define GETBUF(BUFFER) \
32 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
33 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
35 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
36 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
37 #define GETC() GETBUF (CPP_BUFFER (pfile))
38 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
40 static void skip_block_comment PARAMS ((cpp_reader *));
41 static void skip_line_comment PARAMS ((cpp_reader *));
42 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
43 static int skip_comment PARAMS ((cpp_reader *, int));
44 static int copy_comment PARAMS ((cpp_reader *, int));
45 static void skip_string PARAMS ((cpp_reader *, int));
46 static void parse_string PARAMS ((cpp_reader *, int));
47 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
48 static void null_warning PARAMS ((cpp_reader *, unsigned int));
50 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
51 size_t, FILE *));
52 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
53 unsigned int));
54 static void bump_column PARAMS ((cpp_printer *, unsigned int,
55 unsigned int));
56 static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
57 static void expand_token_space PARAMS ((cpp_toklist *));
58 static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int));
59 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
60 unsigned int));
62 #define auto_expand_name_space(list) \
63 expand_name_space ((list), (list)->name_cap / 2)
65 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
67 void
68 _cpp_grow_token_buffer (pfile, n)
69 cpp_reader *pfile;
70 long n;
72 long old_written = CPP_WRITTEN (pfile);
73 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
74 pfile->token_buffer = (U_CHAR *)
75 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
76 CPP_SET_WRITTEN (pfile, old_written);
79 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
80 If BUFFER != NULL, then use the LENGTH characters in BUFFER
81 as the new input buffer.
82 Return the new buffer, or NULL on failure. */
84 cpp_buffer *
85 cpp_push_buffer (pfile, buffer, length)
86 cpp_reader *pfile;
87 const U_CHAR *buffer;
88 long length;
90 cpp_buffer *buf = CPP_BUFFER (pfile);
91 cpp_buffer *new;
92 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
94 cpp_fatal (pfile, "macro or `#include' recursion too deep");
95 return NULL;
98 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
100 new->if_stack = pfile->if_stack;
101 new->buf = new->cur = buffer;
102 new->rlimit = buffer + length;
103 new->prev = buf;
104 new->mark = NULL;
105 new->line_base = NULL;
107 CPP_BUFFER (pfile) = new;
108 return new;
111 cpp_buffer *
112 cpp_pop_buffer (pfile)
113 cpp_reader *pfile;
115 cpp_buffer *buf = CPP_BUFFER (pfile);
116 if (ACTIVE_MARK_P (pfile))
117 cpp_ice (pfile, "mark active in cpp_pop_buffer");
119 if (buf->ihash)
121 _cpp_unwind_if_stack (pfile, buf);
122 if (buf->buf)
123 free ((PTR) buf->buf);
124 if (pfile->system_include_depth)
125 pfile->system_include_depth--;
126 if (pfile->potential_control_macro)
128 buf->ihash->control_macro = pfile->potential_control_macro;
129 pfile->potential_control_macro = 0;
131 pfile->input_stack_listing_current = 0;
133 else if (buf->macro)
135 HASHNODE *m = buf->macro;
137 m->disabled = 0;
138 if ((m->type == T_FMACRO && buf->mapped)
139 || m->type == T_SPECLINE || m->type == T_FILE
140 || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
141 || m->type == T_STDC)
142 free ((PTR) buf->buf);
144 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
145 free (buf);
146 pfile->buffer_stack_depth--;
147 return CPP_BUFFER (pfile);
150 /* Deal with the annoying semantics of fwrite. */
151 static void
152 safe_fwrite (pfile, buf, len, fp)
153 cpp_reader *pfile;
154 const U_CHAR *buf;
155 size_t len;
156 FILE *fp;
158 size_t count;
160 while (len)
162 count = fwrite (buf, 1, len, fp);
163 if (count == 0)
164 goto error;
165 len -= count;
166 buf += count;
168 return;
170 error:
171 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
174 /* Notify the compiler proper that the current line number has jumped,
175 or the current file name has changed. */
177 static void
178 output_line_command (pfile, print, line)
179 cpp_reader *pfile;
180 cpp_printer *print;
181 unsigned int line;
183 cpp_buffer *ip = cpp_file_buffer (pfile);
184 enum { same = 0, enter, leave, rname } change;
185 static const char * const codes[] = { "", " 1", " 2", "" };
187 if (CPP_OPTION (pfile, no_line_commands))
188 return;
190 /* Determine whether the current filename has changed, and if so,
191 how. 'nominal_fname' values are unique, so they can be compared
192 by comparing pointers. */
193 if (ip->nominal_fname == print->last_fname)
194 change = same;
195 else
197 if (pfile->buffer_stack_depth == print->last_bsd)
198 change = rname;
199 else
201 if (pfile->buffer_stack_depth > print->last_bsd)
202 change = enter;
203 else
204 change = leave;
205 print->last_bsd = pfile->buffer_stack_depth;
207 print->last_fname = ip->nominal_fname;
209 /* If the current file has not changed, we can output a few newlines
210 instead if we want to increase the line number by a small amount.
211 We cannot do this if print->lineno is zero, because that means we
212 haven't output any line commands yet. (The very first line
213 command output is a `same_file' command.) */
214 if (change == same && print->lineno != 0
215 && line >= print->lineno && line < print->lineno + 8)
217 while (line > print->lineno)
219 putc ('\n', print->outf);
220 print->lineno++;
222 return;
225 #ifndef NO_IMPLICIT_EXTERN_C
226 if (CPP_OPTION (pfile, cplusplus))
227 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
228 codes[change],
229 ip->system_header_p ? " 3" : "",
230 (ip->system_header_p == 2) ? " 4" : "");
231 else
232 #endif
233 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
234 codes[change],
235 ip->system_header_p ? " 3" : "");
236 print->lineno = line;
239 /* Write the contents of the token_buffer to the output stream, and
240 clear the token_buffer. Also handles generating line commands and
241 keeping track of file transitions. */
243 void
244 cpp_output_tokens (pfile, print)
245 cpp_reader *pfile;
246 cpp_printer *print;
248 cpp_buffer *ip;
250 if (CPP_WRITTEN (pfile) - print->written)
252 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
253 print->lineno++;
254 safe_fwrite (pfile, pfile->token_buffer,
255 CPP_WRITTEN (pfile) - print->written, print->outf);
258 ip = cpp_file_buffer (pfile);
259 if (ip)
260 output_line_command (pfile, print, CPP_BUF_LINE (ip));
262 CPP_SET_WRITTEN (pfile, print->written);
265 /* Helper for cpp_output_list - increases the column number to match
266 what we expect it to be. */
268 static void
269 bump_column (print, from, to)
270 cpp_printer *print;
271 unsigned int from, to;
273 unsigned int tabs, spcs;
274 unsigned int delta = to - from;
276 /* Only if FROM is 0, advance by tabs. */
277 if (from == 0)
278 tabs = delta / 8, spcs = delta % 8;
279 else
280 tabs = 0, spcs = delta;
282 while (tabs--) putc ('\t', print->outf);
283 while (spcs--) putc (' ', print->outf);
286 /* Write out the list L onto pfile->token_buffer. This function is
287 incomplete:
289 1) pfile->token_buffer is not going to continue to exist.
290 2) At the moment, tokens don't carry the information described
291 in cpplib.h; they are all strings.
292 3) The list has to be a complete line, and has to be written starting
293 at the beginning of a line. */
295 void
296 cpp_output_list (pfile, print, list)
297 cpp_reader *pfile;
298 cpp_printer *print;
299 const cpp_toklist *list;
301 unsigned int i;
302 unsigned int curcol = 1;
304 /* XXX Probably does not do what is intended. */
305 if (print->lineno != list->line)
306 output_line_command (pfile, print, list->line);
308 for (i = 0; i < list->tokens_used; i++)
310 if (TOK_TYPE (list, i) == CPP_VSPACE)
312 output_line_command (pfile, print, list->tokens[i].aux);
313 continue;
316 if (curcol < TOK_COL (list, i))
318 /* Insert space to bring the column to what it should be. */
319 bump_column (print, curcol - 1, TOK_COL (list, i));
320 curcol = TOK_COL (list, i);
322 /* XXX We may have to insert space to prevent an accidental
323 token paste. */
324 safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
325 curcol += TOK_LEN (list, i);
329 /* Scan a string (which may have escape marks), perform macro expansion,
330 and write the result to the token_buffer. */
332 void
333 _cpp_expand_to_buffer (pfile, buf, length)
334 cpp_reader *pfile;
335 const U_CHAR *buf;
336 int length;
338 cpp_buffer *stop;
339 enum cpp_ttype token;
340 U_CHAR *buf1;
342 if (length < 0)
344 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
345 return;
348 /* Copy the buffer, because it might be in an unsafe place - for
349 example, a sequence on the token_buffer, where the pointers will
350 be invalidated if we enlarge the token_buffer. */
351 buf1 = alloca (length);
352 memcpy (buf1, buf, length);
354 /* Set up the input on the input stack. */
355 stop = CPP_BUFFER (pfile);
356 if (cpp_push_buffer (pfile, buf1, length) == NULL)
357 return;
358 CPP_BUFFER (pfile)->has_escapes = 1;
360 /* Scan the input, create the output. */
361 for (;;)
363 token = cpp_get_token (pfile);
364 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
365 break;
369 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
371 void
372 cpp_scan_buffer_nooutput (pfile)
373 cpp_reader *pfile;
375 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
376 enum cpp_ttype token;
377 unsigned int old_written = CPP_WRITTEN (pfile);
378 /* In no-output mode, we can ignore everything but directives. */
379 for (;;)
381 if (! pfile->only_seen_white)
382 _cpp_skip_rest_of_line (pfile);
383 token = cpp_get_token (pfile);
384 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
385 break;
387 CPP_SET_WRITTEN (pfile, old_written);
390 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
392 void
393 cpp_scan_buffer (pfile, print)
394 cpp_reader *pfile;
395 cpp_printer *print;
397 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
398 enum cpp_ttype token;
400 for (;;)
402 token = cpp_get_token (pfile);
403 if (token == CPP_EOF || token == CPP_VSPACE
404 /* XXX Temporary kluge - force flush after #include only */
405 || (token == CPP_DIRECTIVE
406 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
408 cpp_output_tokens (pfile, print);
409 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
410 return;
415 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
417 cpp_buffer *
418 cpp_file_buffer (pfile)
419 cpp_reader *pfile;
421 cpp_buffer *ip;
423 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
424 if (ip->ihash != NULL)
425 return ip;
426 return NULL;
429 /* Token-buffer helper functions. */
431 /* Expand a token list's string space. */
432 static void
433 expand_name_space (list, len)
434 cpp_toklist *list;
435 unsigned int len;
437 list->name_cap += len;
438 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
441 /* Expand the number of tokens in a list. */
442 static void
443 expand_token_space (list)
444 cpp_toklist *list;
446 list->tokens_cap *= 2;
447 list->tokens = (cpp_token *)
448 xrealloc (list->tokens - 1, (list->tokens_cap + 1) * sizeof (cpp_token));
449 list->tokens++; /* Skip the dummy. */
452 /* Initialize a token list. We allocate an extra token in front of
453 the token list, as this allows us to always peek at the previous
454 token without worrying about underflowing the list. */
455 static void
456 init_token_list (pfile, list, recycle)
457 cpp_reader *pfile;
458 cpp_toklist *list;
459 int recycle;
461 /* Recycling a used list saves 3 free-malloc pairs. */
462 if (!recycle)
464 /* Initialize token space. Put a dummy token before the start
465 that will fail matches. */
466 list->tokens_cap = 256; /* 4K's worth. */
467 list->tokens = (cpp_token *)
468 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
469 list->tokens[0].type = CPP_EOF;
470 list->tokens++;
472 /* Initialize name space. */
473 list->name_cap = 1024;
474 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
476 /* Only create a comment space on demand. */
477 list->comments_cap = 0;
478 list->comments = 0;
481 list->tokens_used = 0;
482 list->name_used = 0;
483 list->comments_used = 0;
484 if (pfile->buffer)
485 list->line = pfile->buffer->lineno;
486 list->dir_handler = 0;
487 list->dir_flags = 0;
490 /* Scan an entire line and create a token list for it. Does not
491 macro-expand or execute directives. */
493 void
494 _cpp_scan_line (pfile, list)
495 cpp_reader *pfile;
496 cpp_toklist *list;
498 int i, col;
499 long written, len;
500 enum cpp_ttype type;
501 int space_before;
503 init_token_list (pfile, list, 1);
505 written = CPP_WRITTEN (pfile);
506 i = 0;
507 space_before = 0;
508 for (;;)
510 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
511 type = _cpp_lex_token (pfile);
512 len = CPP_WRITTEN (pfile) - written;
513 CPP_SET_WRITTEN (pfile, written);
514 if (type == CPP_HSPACE)
516 if (CPP_PEDANTIC (pfile))
517 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
518 space_before = 1;
519 continue;
521 else if (type == CPP_COMMENT)
522 /* Only happens when processing -traditional macro definitions.
523 Do not give this a token entry, but do not change space_before
524 either. */
525 continue;
527 if (list->tokens_used >= list->tokens_cap)
528 expand_token_space (list);
529 if (list->name_used + len >= list->name_cap)
530 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
532 if (type == CPP_MACRO)
533 type = CPP_NAME;
535 list->tokens_used++;
536 TOK_TYPE (list, i) = type;
537 TOK_COL (list, i) = col;
538 TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
540 if (type == CPP_VSPACE)
541 break;
543 TOK_LEN (list, i) = len;
544 TOK_OFFSET (list, i) = list->name_used;
545 memcpy (TOK_NAME (list, i), CPP_PWRITTEN (pfile), len);
546 list->name_used += len;
547 i++;
548 space_before = 0;
550 TOK_AUX (list, i) = CPP_BUFFER (pfile)->lineno + 1;
552 /* XXX Temporary kluge: put back the newline. */
553 FORWARD(-1);
557 /* Skip a C-style block comment. We know it's a comment, and point is
558 at the second character of the starter. */
559 static void
560 skip_block_comment (pfile)
561 cpp_reader *pfile;
563 unsigned int line, col;
564 const U_CHAR *limit, *cur;
566 FORWARD(1);
567 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
568 col = CPP_BUF_COL (CPP_BUFFER (pfile));
569 limit = CPP_BUFFER (pfile)->rlimit;
570 cur = CPP_BUFFER (pfile)->cur;
572 while (cur < limit)
574 char c = *cur++;
575 if (c == '\n' || c == '\r')
577 /* \r cannot be a macro escape marker here. */
578 if (!ACTIVE_MARK_P (pfile))
579 CPP_BUMP_LINE_CUR (pfile, cur);
581 else if (c == '*')
583 /* Check for teminator. */
584 if (cur < limit && *cur == '/')
585 goto out;
587 /* Warn about comment starter embedded in comment. */
588 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
589 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
590 cur - CPP_BUFFER (pfile)->line_base,
591 "'/*' within comment");
595 cpp_error_with_line (pfile, line, col, "unterminated comment");
596 cur--;
597 out:
598 CPP_BUFFER (pfile)->cur = cur + 1;
601 /* Skip a C++/Chill line comment. We know it's a comment, and point
602 is at the second character of the initiator. */
603 static void
604 skip_line_comment (pfile)
605 cpp_reader *pfile;
607 FORWARD(1);
608 for (;;)
610 int c = GETC ();
612 /* We don't have to worry about EOF in here. */
613 if (c == '\n')
615 /* Don't consider final '\n' to be part of comment. */
616 FORWARD(-1);
617 return;
619 else if (c == '\r')
621 /* \r cannot be a macro escape marker here. */
622 if (!ACTIVE_MARK_P (pfile))
623 CPP_BUMP_LINE (pfile);
624 if (CPP_OPTION (pfile, warn_comments))
625 cpp_warning (pfile, "backslash-newline within line comment");
630 /* Skip a comment - C, C++, or Chill style. M is the first character
631 of the comment marker. If this really is a comment, skip to its
632 end and return ' '. If this is not a comment, return M (which will
633 be '/' or '-'). */
635 static int
636 skip_comment (pfile, m)
637 cpp_reader *pfile;
638 int m;
640 if (m == '/' && PEEKC() == '*')
642 skip_block_comment (pfile);
643 return ' ';
645 else if (m == '/' && PEEKC() == '/')
647 if (CPP_BUFFER (pfile)->system_header_p)
649 /* We silently allow C++ comments in system headers, irrespective
650 of conformance mode, because lots of busted systems do that
651 and trying to clean it up in fixincludes is a nightmare. */
652 skip_line_comment (pfile);
653 return ' ';
655 else if (CPP_OPTION (pfile, cplusplus_comments))
657 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
659 if (CPP_WTRADITIONAL (pfile))
660 cpp_pedwarn (pfile,
661 "C++ style comments are not allowed in traditional C");
662 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
663 cpp_pedwarn (pfile,
664 "C++ style comments are not allowed in ISO C89");
665 if (CPP_WTRADITIONAL (pfile)
666 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
667 cpp_pedwarn (pfile,
668 "(this will be reported only once per input file)");
669 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
671 skip_line_comment (pfile);
672 return ' ';
674 else
675 return m;
677 else if (m == '-' && PEEKC() == '-'
678 && CPP_OPTION (pfile, chill))
680 skip_line_comment (pfile);
681 return ' ';
683 else
684 return m;
687 /* Identical to skip_comment except that it copies the comment into the
688 token_buffer. This is used if !discard_comments. */
689 static int
690 copy_comment (pfile, m)
691 cpp_reader *pfile;
692 int m;
694 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
695 const U_CHAR *limit;
697 if (skip_comment (pfile, m) == m)
698 return m;
700 limit = CPP_BUFFER (pfile)->cur;
701 CPP_RESERVE (pfile, limit - start + 2);
702 CPP_PUTC_Q (pfile, m);
703 for (; start <= limit; start++)
704 if (*start != '\r')
705 CPP_PUTC_Q (pfile, *start);
707 return ' ';
710 static void
711 null_warning (pfile, count)
712 cpp_reader *pfile;
713 unsigned int count;
715 if (count == 1)
716 cpp_warning (pfile, "embedded null character ignored");
717 else
718 cpp_warning (pfile, "embedded null characters ignored");
721 /* Skip whitespace \-newline and comments. Does not macro-expand. */
723 void
724 _cpp_skip_hspace (pfile)
725 cpp_reader *pfile;
727 unsigned int null_count = 0;
728 int c;
730 while (1)
732 c = GETC();
733 if (c == EOF)
734 goto out;
735 else if (is_hspace(c))
737 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
738 cpp_pedwarn (pfile, "%s in preprocessing directive",
739 c == '\f' ? "formfeed" : "vertical tab");
740 else if (c == '\0')
741 null_count++;
743 else if (c == '\r')
745 /* \r is a backslash-newline marker if !has_escapes, and
746 a deletable-whitespace or no-reexpansion marker otherwise. */
747 if (CPP_BUFFER (pfile)->has_escapes)
749 if (PEEKC() == ' ')
750 FORWARD(1);
751 else
752 break;
754 else
755 CPP_BUMP_LINE (pfile);
757 else if (c == '/' || c == '-')
759 c = skip_comment (pfile, c);
760 if (c != ' ')
761 break;
763 else
764 break;
766 FORWARD(-1);
767 out:
768 if (null_count)
769 null_warning (pfile, null_count);
772 /* Read and discard the rest of the current line. */
774 void
775 _cpp_skip_rest_of_line (pfile)
776 cpp_reader *pfile;
778 for (;;)
780 int c = GETC();
781 switch (c)
783 case '\n':
784 FORWARD(-1);
785 case EOF:
786 return;
788 case '\r':
789 if (! CPP_BUFFER (pfile)->has_escapes)
790 CPP_BUMP_LINE (pfile);
791 break;
793 case '\'':
794 case '\"':
795 skip_string (pfile, c);
796 break;
798 case '/':
799 case '-':
800 skip_comment (pfile, c);
801 break;
803 case '\f':
804 case '\v':
805 if (CPP_PEDANTIC (pfile))
806 cpp_pedwarn (pfile, "%s in preprocessing directive",
807 c == '\f' ? "formfeed" : "vertical tab");
808 break;
814 /* Parse an identifier starting with C. */
816 void
817 _cpp_parse_name (pfile, c)
818 cpp_reader *pfile;
819 int c;
821 for (;;)
823 if (! is_idchar(c))
825 FORWARD (-1);
826 break;
829 if (c == '$' && CPP_PEDANTIC (pfile))
830 cpp_pedwarn (pfile, "`$' in identifier");
832 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
833 CPP_PUTC_Q (pfile, c);
834 c = GETC();
835 if (c == EOF)
836 break;
838 return;
841 /* Parse and skip over a string starting with C. A single quoted
842 string is treated like a double -- some programs (e.g., troff) are
843 perverse this way. (However, a single quoted string is not allowed
844 to extend over multiple lines.) */
845 static void
846 skip_string (pfile, c)
847 cpp_reader *pfile;
848 int c;
850 unsigned int start_line, start_column;
851 unsigned int null_count = 0;
853 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
854 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
855 while (1)
857 int cc = GETC();
858 switch (cc)
860 case EOF:
861 cpp_error_with_line (pfile, start_line, start_column,
862 "unterminated string or character constant");
863 if (pfile->multiline_string_line != start_line
864 && pfile->multiline_string_line != 0)
865 cpp_error_with_line (pfile,
866 pfile->multiline_string_line, -1,
867 "possible real start of unterminated constant");
868 pfile->multiline_string_line = 0;
869 goto out;
871 case '\0':
872 null_count++;
873 break;
875 case '\n':
876 CPP_BUMP_LINE (pfile);
877 /* In Fortran and assembly language, silently terminate
878 strings of either variety at end of line. This is a
879 kludge around not knowing where comments are in these
880 languages. */
881 if (CPP_OPTION (pfile, lang_fortran)
882 || CPP_OPTION (pfile, lang_asm))
884 FORWARD(-1);
885 goto out;
887 /* Character constants may not extend over multiple lines.
888 In Standard C, neither may strings. We accept multiline
889 strings as an extension. */
890 if (c == '\'')
892 cpp_error_with_line (pfile, start_line, start_column,
893 "unterminated character constant");
894 FORWARD(-1);
895 goto out;
897 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
898 cpp_pedwarn_with_line (pfile, start_line, start_column,
899 "string constant runs past end of line");
900 if (pfile->multiline_string_line == 0)
901 pfile->multiline_string_line = start_line;
902 break;
904 case '\r':
905 if (CPP_BUFFER (pfile)->has_escapes)
907 cpp_ice (pfile, "\\r escape inside string constant");
908 FORWARD(1);
910 else
911 /* Backslash newline is replaced by nothing at all. */
912 CPP_BUMP_LINE (pfile);
913 break;
915 case '\\':
916 FORWARD(1);
917 break;
919 case '\"':
920 case '\'':
921 if (cc == c)
922 goto out;
923 break;
927 out:
928 if (null_count == 1)
929 cpp_warning (pfile, "null character in string or character constant");
930 else if (null_count > 1)
931 cpp_warning (pfile, "null characters in string or character constant");
934 /* Parse a string and copy it to the output. */
936 static void
937 parse_string (pfile, c)
938 cpp_reader *pfile;
939 int c;
941 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
942 const U_CHAR *limit;
944 skip_string (pfile, c);
946 limit = CPP_BUFFER (pfile)->cur;
947 CPP_RESERVE (pfile, limit - start + 2);
948 CPP_PUTC_Q (pfile, c);
949 for (; start < limit; start++)
950 if (*start != '\r')
951 CPP_PUTC_Q (pfile, *start);
954 /* Read an assertion into the token buffer, converting to
955 canonical form: `#predicate(a n swe r)' The next non-whitespace
956 character to read should be the first letter of the predicate.
957 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
958 with answer (see callers for why). In case of 0, an error has been
959 printed. */
961 _cpp_parse_assertion (pfile)
962 cpp_reader *pfile;
964 int c, dropwhite;
965 _cpp_skip_hspace (pfile);
966 c = PEEKC();
967 if (c == '\n')
969 cpp_error (pfile, "assertion without predicate");
970 return 0;
972 else if (! is_idstart(c))
974 cpp_error (pfile, "assertion predicate is not an identifier");
975 return 0;
977 CPP_PUTC(pfile, '#');
978 FORWARD(1);
979 _cpp_parse_name (pfile, c);
981 c = PEEKC();
982 if (c != '(')
984 if (is_hspace(c) || c == '\r')
985 _cpp_skip_hspace (pfile);
986 c = PEEKC();
988 if (c != '(')
989 return 1;
991 CPP_PUTC(pfile, '(');
992 FORWARD(1);
993 dropwhite = 1;
994 while ((c = GETC()) != ')')
996 if (is_space(c))
998 if (! dropwhite)
1000 CPP_PUTC(pfile, ' ');
1001 dropwhite = 1;
1004 else if (c == '\n' || c == EOF)
1006 if (c == '\n') FORWARD(-1);
1007 cpp_error (pfile, "un-terminated assertion answer");
1008 return 0;
1010 else if (c == '\r')
1011 /* \r cannot be a macro escape here. */
1012 CPP_BUMP_LINE (pfile);
1013 else
1015 CPP_PUTC (pfile, c);
1016 dropwhite = 0;
1020 if (pfile->limit[-1] == ' ')
1021 pfile->limit[-1] = ')';
1022 else if (pfile->limit[-1] == '(')
1024 cpp_error (pfile, "empty token sequence in assertion");
1025 return 0;
1027 else
1028 CPP_PUTC (pfile, ')');
1030 return 2;
1033 /* Get the next token, and add it to the text in pfile->token_buffer.
1034 Return the kind of token we got. */
1036 enum cpp_ttype
1037 _cpp_lex_token (pfile)
1038 cpp_reader *pfile;
1040 register int c, c2;
1041 enum cpp_ttype token;
1043 if (CPP_BUFFER (pfile) == NULL)
1044 return CPP_EOF;
1046 get_next:
1047 c = GETC();
1048 switch (c)
1050 case EOF:
1051 return CPP_EOF;
1053 case '/':
1054 if (PEEKC () == '=')
1055 goto op2;
1057 comment:
1058 if (CPP_OPTION (pfile, discard_comments))
1059 c = skip_comment (pfile, c);
1060 else
1061 c = copy_comment (pfile, c);
1062 if (c != ' ')
1063 goto randomchar;
1065 /* Comments are equivalent to spaces.
1066 For -traditional, a comment is equivalent to nothing. */
1067 if (!CPP_OPTION (pfile, discard_comments))
1068 return CPP_COMMENT;
1069 else if (CPP_TRADITIONAL (pfile))
1071 if (pfile->parsing_define_directive)
1072 return CPP_COMMENT;
1073 goto get_next;
1075 else
1077 CPP_PUTC (pfile, c);
1078 return CPP_HSPACE;
1081 case '#':
1082 CPP_PUTC (pfile, c);
1084 hash:
1085 if (pfile->parsing_if_directive)
1087 CPP_ADJUST_WRITTEN (pfile, -1);
1088 if (_cpp_parse_assertion (pfile))
1089 return CPP_ASSERTION;
1090 return CPP_OTHER;
1093 if (pfile->parsing_define_directive)
1095 c2 = PEEKC ();
1096 if (c2 == '#')
1098 FORWARD (1);
1099 CPP_PUTC (pfile, c2);
1101 else if (c2 == '%' && PEEKN (1) == ':')
1103 /* Digraph: "%:" == "#". */
1104 FORWARD (1);
1105 CPP_RESERVE (pfile, 2);
1106 CPP_PUTC_Q (pfile, c2);
1107 CPP_PUTC_Q (pfile, GETC ());
1109 else
1110 return CPP_HASH;
1112 return CPP_PASTE;
1115 if (!pfile->only_seen_white)
1116 return CPP_OTHER;
1118 /* Remove the "#" or "%:" from the token buffer. */
1119 CPP_ADJUST_WRITTEN (pfile, (c == '#' ? -1 : -2));
1120 return CPP_DIRECTIVE;
1122 case '\"':
1123 case '\'':
1124 parse_string (pfile, c);
1125 return c == '\'' ? CPP_CHAR : CPP_STRING;
1127 case '$':
1128 if (!CPP_OPTION (pfile, dollars_in_ident))
1129 goto randomchar;
1130 goto letter;
1132 case ':':
1133 c2 = PEEKC ();
1134 /* Digraph: ":>" == "]". */
1135 if (c2 == '>'
1136 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1137 goto op2;
1138 goto randomchar;
1140 case '&':
1141 case '+':
1142 case '|':
1143 c2 = PEEKC ();
1144 if (c2 == c || c2 == '=')
1145 goto op2;
1146 goto randomchar;
1148 case '%':
1149 /* Digraphs: "%:" == "#", "%>" == "}". */
1150 c2 = PEEKC ();
1151 if (c2 == ':')
1153 FORWARD (1);
1154 CPP_RESERVE (pfile, 2);
1155 CPP_PUTC_Q (pfile, c);
1156 CPP_PUTC_Q (pfile, c2);
1157 goto hash;
1159 else if (c2 == '>')
1161 FORWARD (1);
1162 CPP_RESERVE (pfile, 2);
1163 CPP_PUTC_Q (pfile, c);
1164 CPP_PUTC_Q (pfile, c2);
1165 return CPP_OPEN_BRACE;
1167 /* else fall through */
1169 case '*':
1170 case '!':
1171 case '=':
1172 case '^':
1173 if (PEEKC () == '=')
1174 goto op2;
1175 goto randomchar;
1177 case '-':
1178 c2 = PEEKC ();
1179 if (c2 == '-')
1181 if (CPP_OPTION (pfile, chill))
1182 goto comment; /* Chill style comment */
1183 else
1184 goto op2;
1186 else if (c2 == '=')
1187 goto op2;
1188 else if (c2 == '>')
1190 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1192 /* In C++, there's a ->* operator. */
1193 token = CPP_OTHER;
1194 CPP_RESERVE (pfile, 4);
1195 CPP_PUTC_Q (pfile, c);
1196 CPP_PUTC_Q (pfile, GETC ());
1197 CPP_PUTC_Q (pfile, GETC ());
1198 return token;
1200 goto op2;
1202 goto randomchar;
1204 case '<':
1205 if (pfile->parsing_include_directive)
1207 for (;;)
1209 CPP_PUTC (pfile, c);
1210 if (c == '>')
1211 break;
1212 c = GETC ();
1213 if (c == '\n' || c == EOF)
1215 cpp_error (pfile,
1216 "missing '>' in `#include <FILENAME>'");
1217 break;
1219 else if (c == '\r')
1221 if (!CPP_BUFFER (pfile)->has_escapes)
1223 /* Backslash newline is replaced by nothing. */
1224 CPP_ADJUST_WRITTEN (pfile, -1);
1225 CPP_BUMP_LINE (pfile);
1227 else
1229 /* We might conceivably get \r- or \r<space> in
1230 here. Just delete 'em. */
1231 int d = GETC();
1232 if (d != '-' && d != ' ')
1233 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1234 CPP_ADJUST_WRITTEN (pfile, -1);
1238 return CPP_STRING;
1240 /* Digraphs: "<%" == "{", "<:" == "[". */
1241 c2 = PEEKC ();
1242 if (c2 == '%')
1244 FORWARD (1);
1245 CPP_RESERVE (pfile, 2);
1246 CPP_PUTC_Q (pfile, c);
1247 CPP_PUTC_Q (pfile, c2);
1248 return CPP_CLOSE_BRACE;
1250 else if (c2 == ':')
1251 goto op2;
1252 /* else fall through */
1253 case '>':
1254 c2 = PEEKC ();
1255 if (c2 == '=')
1256 goto op2;
1257 /* GNU C++ supports MIN and MAX operators <? and >?. */
1258 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1259 goto randomchar;
1260 FORWARD(1);
1261 CPP_RESERVE (pfile, 3);
1262 CPP_PUTC_Q (pfile, c);
1263 CPP_PUTC_Q (pfile, c2);
1264 if (PEEKC () == '=')
1265 CPP_PUTC_Q (pfile, GETC ());
1266 return CPP_OTHER;
1268 case '.':
1269 c2 = PEEKC ();
1270 if (ISDIGIT (c2))
1272 CPP_PUTC (pfile, c);
1273 c = GETC ();
1274 goto number;
1277 /* In C++ there's a .* operator. */
1278 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1279 goto op2;
1281 if (c2 == '.' && PEEKN(1) == '.')
1283 CPP_RESERVE (pfile, 3);
1284 CPP_PUTC_Q (pfile, '.');
1285 CPP_PUTC_Q (pfile, '.');
1286 CPP_PUTC_Q (pfile, '.');
1287 FORWARD (2);
1288 return CPP_ELLIPSIS;
1290 goto randomchar;
1292 op2:
1293 CPP_RESERVE (pfile, 2);
1294 CPP_PUTC_Q (pfile, c);
1295 CPP_PUTC_Q (pfile, GETC ());
1296 return CPP_OTHER;
1298 case 'L':
1299 c2 = PEEKC ();
1300 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1302 CPP_PUTC (pfile, c);
1303 c = GETC ();
1304 parse_string (pfile, c);
1305 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1307 goto letter;
1309 case '0': case '1': case '2': case '3': case '4':
1310 case '5': case '6': case '7': case '8': case '9':
1311 number:
1312 c2 = '.';
1313 for (;;)
1315 CPP_RESERVE (pfile, 2);
1316 CPP_PUTC_Q (pfile, c);
1317 c = PEEKC ();
1318 if (c == EOF)
1319 break;
1320 if (!is_numchar(c) && c != '.'
1321 && ((c2 != 'e' && c2 != 'E'
1322 && ((c2 != 'p' && c2 != 'P')
1323 || CPP_OPTION (pfile, c89)))
1324 || (c != '+' && c != '-')))
1325 break;
1326 FORWARD(1);
1327 c2= c;
1329 return CPP_NUMBER;
1330 case 'b': case 'c': case 'd': case 'h': case 'o':
1331 case 'B': case 'C': case 'D': case 'H': case 'O':
1332 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1334 CPP_RESERVE (pfile, 2);
1335 CPP_PUTC_Q (pfile, c);
1336 CPP_PUTC_Q (pfile, '\'');
1337 FORWARD(1);
1338 for (;;)
1340 c = GETC();
1341 if (c == EOF)
1342 goto chill_number_eof;
1343 if (!is_numchar(c))
1344 break;
1345 CPP_PUTC (pfile, c);
1347 if (c == '\'')
1349 CPP_RESERVE (pfile, 2);
1350 CPP_PUTC_Q (pfile, c);
1351 return CPP_STRING;
1353 else
1355 FORWARD(-1);
1356 chill_number_eof:
1357 return CPP_NUMBER;
1360 else
1361 goto letter;
1362 case '_':
1363 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1364 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1365 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1366 case 'x': case 'y': case 'z':
1367 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1368 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1369 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1370 case 'Y': case 'Z':
1371 letter:
1372 _cpp_parse_name (pfile, c);
1373 return CPP_MACRO;
1375 case ' ': case '\t': case '\v': case '\f': case '\0':
1377 int null_count = 0;
1379 for (;;)
1381 if (c == '\0')
1382 null_count++;
1383 else
1384 CPP_PUTC (pfile, c);
1385 c = PEEKC ();
1386 if (c == EOF || !is_hspace(c))
1387 break;
1388 FORWARD(1);
1390 if (null_count)
1391 null_warning (pfile, null_count);
1392 return CPP_HSPACE;
1395 case '\r':
1396 if (CPP_BUFFER (pfile)->has_escapes)
1398 c = GETC ();
1399 if (c == '-')
1401 if (pfile->output_escapes)
1402 CPP_PUTS (pfile, "\r-", 2);
1403 _cpp_parse_name (pfile, GETC ());
1404 return CPP_NAME;
1406 else if (c == ' ')
1408 /* "\r " means a space, but only if necessary to prevent
1409 accidental token concatenation. */
1410 CPP_RESERVE (pfile, 2);
1411 if (pfile->output_escapes)
1412 CPP_PUTC_Q (pfile, '\r');
1413 CPP_PUTC_Q (pfile, c);
1414 return CPP_HSPACE;
1416 else
1418 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1419 goto get_next;
1422 else
1424 /* Backslash newline is ignored. */
1425 if (!ACTIVE_MARK_P (pfile))
1426 CPP_BUMP_LINE (pfile);
1427 goto get_next;
1430 case '\n':
1431 CPP_PUTC (pfile, c);
1432 return CPP_VSPACE;
1434 case '(': token = CPP_OPEN_PAREN; goto char1;
1435 case ')': token = CPP_CLOSE_PAREN; goto char1;
1436 case '{': token = CPP_OPEN_BRACE; goto char1;
1437 case '}': token = CPP_CLOSE_BRACE; goto char1;
1438 case ',': token = CPP_COMMA; goto char1;
1439 case ';': token = CPP_SEMICOLON; goto char1;
1441 randomchar:
1442 default:
1443 token = CPP_OTHER;
1444 char1:
1445 CPP_PUTC (pfile, c);
1446 return token;
1450 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1451 Caller is expected to have checked no_macro_expand. */
1452 static int
1453 maybe_macroexpand (pfile, written)
1454 cpp_reader *pfile;
1455 long written;
1457 U_CHAR *macro = pfile->token_buffer + written;
1458 size_t len = CPP_WRITTEN (pfile) - written;
1459 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1461 /* _cpp_lookup never returns null. */
1462 if (hp->type == T_VOID)
1463 return 0;
1464 if (hp->disabled || hp->type == T_IDENTITY)
1466 if (pfile->output_escapes)
1468 /* Insert a no-reexpand marker before IDENT. */
1469 CPP_RESERVE (pfile, 2);
1470 CPP_ADJUST_WRITTEN (pfile, 2);
1471 macro = pfile->token_buffer + written;
1473 memmove (macro + 2, macro, len);
1474 macro[0] = '\r';
1475 macro[1] = '-';
1477 return 0;
1479 if (hp->type == T_EMPTY)
1481 /* Special case optimization: macro expands to nothing. */
1482 CPP_SET_WRITTEN (pfile, written);
1483 CPP_PUTC_Q (pfile, ' ');
1484 return 1;
1487 /* If macro wants an arglist, verify that a '(' follows. */
1488 if (hp->type == T_FMACRO)
1490 int macbuf_whitespace = 0;
1491 int c;
1493 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1495 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1496 for (;;)
1498 _cpp_skip_hspace (pfile);
1499 c = PEEKC ();
1500 if (c == '\n')
1501 FORWARD(1);
1502 else
1503 break;
1505 if (point != CPP_BUFFER (pfile)->cur)
1506 macbuf_whitespace = 1;
1507 if (c == '(')
1508 goto is_macro_call;
1509 else if (c != EOF)
1510 goto not_macro_call;
1511 cpp_pop_buffer (pfile);
1514 CPP_SET_MARK (pfile);
1515 for (;;)
1517 _cpp_skip_hspace (pfile);
1518 c = PEEKC ();
1519 if (c == '\n')
1520 FORWARD(1);
1521 else
1522 break;
1524 CPP_GOTO_MARK (pfile);
1526 if (c != '(')
1528 not_macro_call:
1529 if (macbuf_whitespace)
1530 CPP_PUTC (pfile, ' ');
1531 return 0;
1535 is_macro_call:
1536 /* This is now known to be a macro call.
1537 Expand the macro, reading arguments as needed,
1538 and push the expansion on the input stack. */
1539 _cpp_macroexpand (pfile, hp);
1540 CPP_SET_WRITTEN (pfile, written);
1541 return 1;
1544 /* Complain about \v or \f in a preprocessing directive (constraint
1545 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1546 static void
1547 pedantic_whitespace (pfile, p, len)
1548 cpp_reader *pfile;
1549 U_CHAR *p;
1550 unsigned int len;
1552 while (len)
1554 if (*p == '\v')
1555 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1556 else if (*p == '\f')
1557 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1558 p++;
1559 len--;
1564 enum cpp_ttype
1565 cpp_get_token (pfile)
1566 cpp_reader *pfile;
1568 enum cpp_ttype token;
1569 long written = CPP_WRITTEN (pfile);
1571 get_next:
1572 token = _cpp_lex_token (pfile);
1574 switch (token)
1576 default:
1577 pfile->potential_control_macro = 0;
1578 pfile->only_seen_white = 0;
1579 return token;
1581 case CPP_VSPACE:
1582 if (pfile->only_seen_white == 0)
1583 pfile->only_seen_white = 1;
1584 CPP_BUMP_LINE (pfile);
1585 return token;
1587 case CPP_HSPACE:
1588 case CPP_COMMENT:
1589 return token;
1591 case CPP_DIRECTIVE:
1592 pfile->potential_control_macro = 0;
1593 if (_cpp_handle_directive (pfile))
1594 return CPP_DIRECTIVE;
1595 pfile->only_seen_white = 0;
1596 CPP_PUTC (pfile, '#');
1597 return CPP_OTHER;
1599 case CPP_MACRO:
1600 pfile->potential_control_macro = 0;
1601 pfile->only_seen_white = 0;
1602 if (! pfile->no_macro_expand
1603 && maybe_macroexpand (pfile, written))
1604 goto get_next;
1605 return CPP_NAME;
1607 case CPP_EOF:
1608 if (CPP_BUFFER (pfile) == NULL)
1609 return CPP_EOF;
1610 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1612 cpp_pop_buffer (pfile);
1613 goto get_next;
1615 cpp_pop_buffer (pfile);
1616 return CPP_EOF;
1620 /* Like cpp_get_token, but skip spaces and comments. */
1622 enum cpp_ttype
1623 cpp_get_non_space_token (pfile)
1624 cpp_reader *pfile;
1626 int old_written = CPP_WRITTEN (pfile);
1627 for (;;)
1629 enum cpp_ttype token = cpp_get_token (pfile);
1630 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1631 return token;
1632 CPP_SET_WRITTEN (pfile, old_written);
1636 /* Like cpp_get_token, except that it does not execute directives,
1637 does not consume vertical space, and discards horizontal space. */
1638 enum cpp_ttype
1639 _cpp_get_directive_token (pfile)
1640 cpp_reader *pfile;
1642 long old_written;
1643 enum cpp_ttype token;
1644 int at_bol;
1646 get_next:
1647 at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
1648 old_written = CPP_WRITTEN (pfile);
1649 token = _cpp_lex_token (pfile);
1650 switch (token)
1652 default:
1653 return token;
1655 case CPP_VSPACE:
1656 /* Put it back and return VSPACE. */
1657 FORWARD(-1);
1658 CPP_ADJUST_WRITTEN (pfile, -1);
1659 return CPP_VSPACE;
1661 case CPP_HSPACE:
1662 /* The purpose of this rather strange check is to prevent pedantic
1663 warnings for ^L in an #ifdefed out block. */
1664 if (CPP_PEDANTIC (pfile) && ! at_bol)
1665 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1666 CPP_WRITTEN (pfile) - old_written);
1667 CPP_SET_WRITTEN (pfile, old_written);
1668 goto get_next;
1669 return CPP_HSPACE;
1671 case CPP_DIRECTIVE:
1672 /* Don't execute the directive, but don't smash it to OTHER either. */
1673 CPP_PUTC (pfile, '#');
1674 return CPP_DIRECTIVE;
1676 case CPP_MACRO:
1677 if (! pfile->no_macro_expand
1678 && maybe_macroexpand (pfile, old_written))
1679 goto get_next;
1680 return CPP_NAME;
1682 case CPP_EOF:
1683 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1685 cpp_pop_buffer (pfile);
1686 goto get_next;
1688 else
1689 /* This can happen for files that don't end with a newline,
1690 and for cpp_define and friends. Pretend they do, so
1691 callers don't have to deal. A warning will be issued by
1692 someone else, if necessary. */
1693 return CPP_VSPACE;
1697 /* Determine the current line and column. Used only by read_and_prescan. */
1698 static U_CHAR *
1699 find_position (start, limit, linep)
1700 U_CHAR *start;
1701 U_CHAR *limit;
1702 unsigned long *linep;
1704 unsigned long line = *linep;
1705 U_CHAR *lbase = start;
1706 while (start < limit)
1708 U_CHAR ch = *start++;
1709 if (ch == '\n' || ch == '\r')
1711 line++;
1712 lbase = start;
1715 *linep = line;
1716 return lbase;
1719 /* The following table is used by _cpp_read_and_prescan. If we have
1720 designated initializers, it can be constant data; otherwise, it is
1721 set up at runtime by _cpp_init_input_buffer. */
1723 #ifndef UCHAR_MAX
1724 #define UCHAR_MAX 255 /* assume 8-bit bytes */
1725 #endif
1727 #if (GCC_VERSION >= 2007)
1728 #define init_chartab() /* nothing */
1729 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1730 #define END };
1731 #define s(p, v) [p] = v,
1732 #else
1733 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1734 static void init_chartab PARAMS ((void)) { \
1735 unsigned char *x = chartab;
1736 #define END }
1737 #define s(p, v) x[p] = v;
1738 #endif
1740 /* Table of characters that can't be handled in the inner loop.
1741 Also contains the mapping between trigraph third characters and their
1742 replacements. */
1743 #define SPECCASE_CR 1
1744 #define SPECCASE_BACKSLASH 2
1745 #define SPECCASE_QUESTION 3
1747 CHARTAB
1748 s('\r', SPECCASE_CR)
1749 s('\\', SPECCASE_BACKSLASH)
1750 s('?', SPECCASE_QUESTION)
1752 s('=', '#') s(')', ']') s('!', '|')
1753 s('(', '[') s('\'', '^') s('>', '}')
1754 s('/', '\\') s('<', '{') s('-', '~')
1757 #undef CHARTAB
1758 #undef END
1759 #undef s
1761 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1762 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1764 /* Read the entire contents of file DESC into buffer BUF. LEN is how
1765 much memory to allocate initially; more will be allocated if
1766 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1767 canonical form (\n). If enabled, convert and/or warn about
1768 trigraphs. Convert backslash-newline to a one-character escape
1769 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1770 token). If there is no newline at the end of the file, add one and
1771 warn. Returns -1 on failure, or the actual length of the data to
1772 be scanned.
1774 This function does a lot of work, and can be a serious performance
1775 bottleneck. It has been tuned heavily; make sure you understand it
1776 before hacking. The common case - no trigraphs, Unix style line
1777 breaks, backslash-newline set off by whitespace, newline at EOF -
1778 has been optimized at the expense of the others. The performance
1779 penalty for DOS style line breaks (\r\n) is about 15%.
1781 Warnings lose particularly heavily since we have to determine the
1782 line number, which involves scanning from the beginning of the file
1783 or from the last warning. The penalty for the absence of a newline
1784 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1786 If your file has more than one kind of end-of-line marker, you
1787 will get messed-up line numbering.
1789 So that the cases of the switch statement do not have to concern
1790 themselves with the complications of reading beyond the end of the
1791 buffer, the buffer is guaranteed to have at least 3 characters in
1792 it (or however many are left in the file, if less) on entry to the
1793 switch. This is enough to handle trigraphs and the "\\\n\r" and
1794 "\\\r\n" cases.
1796 The end of the buffer is marked by a '\\', which, being a special
1797 character, guarantees we will exit the fast-scan loops and perform
1798 a refill. */
1800 long
1801 _cpp_read_and_prescan (pfile, fp, desc, len)
1802 cpp_reader *pfile;
1803 cpp_buffer *fp;
1804 int desc;
1805 size_t len;
1807 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1808 U_CHAR *ip, *op, *line_base;
1809 U_CHAR *ibase;
1810 unsigned long line;
1811 unsigned int deferred_newlines;
1812 size_t offset;
1813 int count = 0;
1815 offset = 0;
1816 deferred_newlines = 0;
1817 op = buf;
1818 line_base = buf;
1819 line = 1;
1820 ibase = pfile->input_buffer + 3;
1821 ip = ibase;
1822 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
1824 for (;;)
1826 U_CHAR *near_buff_end;
1828 count = read (desc, ibase, pfile->input_buffer_len);
1829 if (count < 0)
1830 goto error;
1832 ibase[count] = '\\'; /* Marks end of buffer */
1833 if (count)
1835 near_buff_end = pfile->input_buffer + count;
1836 offset += count;
1837 if (offset > len)
1839 size_t delta_op;
1840 size_t delta_line_base;
1841 len = offset * 2;
1842 if (offset > len)
1843 /* len overflowed.
1844 This could happen if the file is larger than half the
1845 maximum address space of the machine. */
1846 goto too_big;
1848 delta_op = op - buf;
1849 delta_line_base = line_base - buf;
1850 buf = (U_CHAR *) xrealloc (buf, len);
1851 op = buf + delta_op;
1852 line_base = buf + delta_line_base;
1855 else
1857 if (ip == ibase)
1858 break;
1859 /* Allow normal processing of the (at most 2) remaining
1860 characters. The end-of-buffer marker is still present
1861 and prevents false matches within the switch. */
1862 near_buff_end = ibase - 1;
1865 for (;;)
1867 unsigned int span;
1869 /* Deal with \-newline, potentially in the middle of a token. */
1870 if (deferred_newlines)
1872 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
1874 /* Previous was not white space. Skip to white
1875 space, if we can, before outputting the \r's */
1876 span = 0;
1877 while (ip[span] != ' '
1878 && ip[span] != '\t'
1879 && ip[span] != '\n'
1880 && NORMAL(ip[span]))
1881 span++;
1882 memcpy (op, ip, span);
1883 op += span;
1884 ip += span;
1885 if (! NORMAL(ip[0]))
1886 goto do_speccase;
1888 while (deferred_newlines)
1889 deferred_newlines--, *op++ = '\r';
1892 /* Copy as much as we can without special treatment. */
1893 span = 0;
1894 while (NORMAL (ip[span])) span++;
1895 memcpy (op, ip, span);
1896 op += span;
1897 ip += span;
1899 do_speccase:
1900 if (ip > near_buff_end) /* Do we have enough chars? */
1901 break;
1902 switch (chartab[*ip++])
1904 case SPECCASE_CR: /* \r */
1905 if (ip[-2] != '\n')
1907 if (*ip == '\n')
1908 ip++;
1909 *op++ = '\n';
1911 break;
1913 case SPECCASE_BACKSLASH: /* \ */
1914 if (*ip == '\n')
1916 deferred_newlines++;
1917 ip++;
1918 if (*ip == '\r') ip++;
1920 else if (*ip == '\r')
1922 deferred_newlines++;
1923 ip++;
1924 if (*ip == '\n') ip++;
1926 else
1927 *op++ = '\\';
1928 break;
1930 case SPECCASE_QUESTION: /* ? */
1932 unsigned int d, t;
1934 *op++ = '?'; /* Normal non-trigraph case */
1935 if (ip[0] != '?')
1936 break;
1938 d = ip[1];
1939 t = chartab[d];
1940 if (NONTRI (t))
1941 break;
1943 if (CPP_OPTION (pfile, warn_trigraphs))
1945 unsigned long col;
1946 line_base = find_position (line_base, op, &line);
1947 col = op - line_base + 1;
1948 if (CPP_OPTION (pfile, trigraphs))
1949 cpp_warning_with_line (pfile, line, col,
1950 "trigraph ??%c converted to %c", d, t);
1951 else
1952 cpp_warning_with_line (pfile, line, col,
1953 "trigraph ??%c ignored", d);
1956 ip += 2;
1957 if (CPP_OPTION (pfile, trigraphs))
1959 op[-1] = t; /* Overwrite '?' */
1960 if (t == '\\')
1962 op--;
1963 *--ip = '\\';
1964 goto do_speccase; /* May need buffer refill */
1967 else
1969 *op++ = '?';
1970 *op++ = d;
1973 break;
1976 /* Copy previous char plus unprocessed (at most 2) chars
1977 to beginning of buffer, refill it with another
1978 read(), and continue processing */
1979 memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
1980 ip -= count;
1983 if (offset == 0)
1984 return 0;
1986 if (op[-1] != '\n')
1988 unsigned long col;
1989 line_base = find_position (line_base, op, &line);
1990 col = op - line_base + 1;
1991 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
1992 if (offset + 1 > len)
1994 len += 1;
1995 if (offset + 1 > len)
1996 goto too_big;
1997 buf = (U_CHAR *) xrealloc (buf, len);
1998 op = buf + offset;
2000 *op++ = '\n';
2003 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2004 return op - buf;
2006 too_big:
2007 cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2008 (unsigned long)offset);
2009 free (buf);
2010 return -1;
2012 error:
2013 cpp_error_from_errno (pfile, fp->ihash->name);
2014 free (buf);
2015 return -1;
2018 /* Allocate pfile->input_buffer, and initialize chartab[]
2019 if it hasn't happened already. */
2021 void
2022 _cpp_init_input_buffer (pfile)
2023 cpp_reader *pfile;
2025 U_CHAR *tmp;
2027 init_chartab ();
2028 init_token_list (pfile, &pfile->directbuf, 0);
2030 /* Determine the appropriate size for the input buffer. Normal C
2031 source files are smaller than eight K. */
2032 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2033 address arithmetic all the time, and 3 for pushback during buffer
2034 refill, in case there's a potential trigraph or end-of-line
2035 digraph at the end of a block. */
2037 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2038 pfile->input_buffer = tmp;
2039 pfile->input_buffer_len = 8192;
2042 /* Utility routine:
2043 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2044 and extending for LEN characters to the NUL-terminated string
2045 STRING. Typical usage:
2047 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2048 "inline"))
2049 { ... }
2053 cpp_idcmp (token, len, string)
2054 const U_CHAR *token;
2055 size_t len;
2056 const char *string;
2058 size_t len2 = strlen (string);
2059 int r;
2061 if ((r = memcmp (token, string, MIN (len, len2))))
2062 return r;
2064 /* The longer of the two strings sorts after the shorter. */
2065 if (len == len2)
2066 return 0;
2067 else if (len < len2)
2068 return -1;
2069 else
2070 return 1;
2073 #if 0
2075 /* Lexing algorithm.
2077 The original lexer in cpplib was made up of two passes: a first pass
2078 that replaced trigraphs and deleted esacped newlines, and a second
2079 pass that tokenized the result of the first pass. Tokenisation was
2080 performed by peeking at the next character in the input stream. For
2081 example, if the input stream contained "!=", the handler for the !
2082 character would peek at the next character, and if it were a '='
2083 would skip over it, and return a "!=" token, otherwise it would
2084 return just the "!" token.
2086 To implement a single-pass lexer, this peeking ahead is unworkable.
2087 An arbitrary number of escaped newlines, and trigraphs (in particular
2088 ??/ which translates to the escape \), could separate the '!' and '='
2089 in the input stream, yet the next token is still a "!=".
2091 Suppose instead that we lex by one logical line at a time, producing
2092 a token list or stack for each logical line, and when seeing the '!'
2093 push a CPP_NOT token on the list. Then if the '!' is part of a
2094 longer token ("!=") we know we must see the remainder of the token by
2095 the time we reach the end of the logical line. Thus we can have the
2096 '=' handler look at the previous token (at the end of the list / top
2097 of the stack) and see if it is a "!" token, and if so, instead of
2098 pushing a "=" token revise the existing token to be a "!=" token.
2100 This works in the presence of escaped newlines, because the '\' would
2101 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2102 newline ('\n' or '\r') handler looks at the token at the top of the
2103 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2104 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2105 the '=' handler would never see any intervening escaped newlines.
2107 To make trigraphs work in this context, as in precedence trigraphs
2108 are highest and converted before anything else, the '?' handler does
2109 lookahead to see if it is a trigraph, and if so skips the trigraph
2110 and pushes the token it represents onto the top of the stack. This
2111 also works in the particular case of a CPP_BACKSLASH trigraph.
2113 To the preprocessor, whitespace is only significant to the point of
2114 knowing whether whitespace precedes a particular token. For example,
2115 the '=' handler needs to know whether there was whitespace between it
2116 and a "!" token on the top of the stack, to make the token conversion
2117 decision correctly. So each token has a PREV_WHITESPACE flag to
2118 indicate this - the standard permits consecutive whitespace to be
2119 regarded as a single space. The compiler front ends are not
2120 interested in whitespace at all; they just require a token stream.
2121 Another place where whitespace is significant to the preprocessor is
2122 a #define statment - if there is whitespace between the macro name
2123 and an initial "(" token the macro is "object-like", otherwise it is
2124 a function-like macro that takes arguments.
2126 However, all is not rosy. Parsing of identifiers, numbers, comments
2127 and strings becomes trickier because of the possibility of raw
2128 trigraphs and escaped newlines in the input stream.
2130 The trigraphs are three consecutive characters beginning with two
2131 question marks. A question mark is not valid as part of a number or
2132 identifier, so parsing of a number or identifier terminates normally
2133 upon reaching it, returning to the mainloop which handles the
2134 trigraph just like it would in any other position. Similarly for the
2135 backslash of a backslash-newline combination. So we just need the
2136 escaped-newline dropper in the mainloop to check if the token on the
2137 top of the stack after dropping the escaped newline is a number or
2138 identifier, and if so to continue the processing it as if nothing had
2139 happened.
2141 For strings, we replace trigraphs whenever we reach a quote or
2142 newline, because there might be a backslash trigraph escaping them.
2143 We need to be careful that we start trigraph replacing from where we
2144 left off previously, because it is possible for a first scan to leave
2145 "fake" trigraphs that a second scan would pick up as real (e.g. the
2146 sequence "????/\n=" would find a fake ??= trigraph after removing the
2147 escaped newline.)
2149 For line comments, on reaching a newline we scan the previous
2150 character(s) to see if it escaped, and continue if it is. Block
2151 comments ignore everything and just focus on finding the comment
2152 termination mark. The only difficult thing, and it is surprisingly
2153 tricky, is checking if an asterisk precedes the final slash since
2154 they could be separated by escaped newlines. If the preprocessor is
2155 invoked with the output comments option, we don't bother removing
2156 escaped newlines and replacing trigraphs for output.
2158 Finally, numbers can begin with a period, which is pushed initially
2159 as a CPP_DOT token in its own right. The digit handler checks if the
2160 previous token was a CPP_DOT not separated by whitespace, and if so
2161 pops it off the stack and pushes a period into the number's buffer
2162 before calling the number parser.
2166 static void expand_comment_space PARAMS ((cpp_toklist *));
2167 void init_trigraph_map PARAMS ((void));
2168 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
2169 unsigned char *));
2170 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
2171 const unsigned char *));
2172 static int skip_block_comment PARAMS ((cpp_reader *));
2173 static int skip_line_comment PARAMS ((cpp_reader *));
2174 static void skip_whitespace PARAMS ((cpp_reader *, int));
2175 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
2176 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
2177 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
2178 unsigned int));
2179 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
2180 static void copy_comment PARAMS ((cpp_toklist *, const unsigned char *,
2181 unsigned int, unsigned int, unsigned int));
2182 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
2184 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
2186 unsigned int spell_string PARAMS ((unsigned char *, cpp_toklist *,
2187 cpp_token *token));
2188 unsigned int spell_comment PARAMS ((unsigned char *, cpp_toklist *,
2189 cpp_token *token));
2190 unsigned int spell_name PARAMS ((unsigned char *, cpp_toklist *,
2191 cpp_token *token));
2193 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
2194 cpp_token *));
2196 /* Macros on a cpp_name. */
2197 #define INIT_NAME(list, name) \
2198 do {(name).len = 0; (name).offset = (list)->name_used;} while (0)
2200 #define IS_DIRECTIVE(list) (TOK_TYPE (list, 0) == CPP_HASH)
2201 #define COLUMN(cur) ((cur) - buffer->line_base)
2203 /* Maybe put these in the ISTABLE eventually. */
2204 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
2205 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
2207 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
2208 character, if any, is in buffer. */
2209 #define handle_newline(cur, limit, c) \
2210 do {\
2211 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
2212 (cur)++; \
2213 CPP_BUMP_LINE_CUR (pfile, (cur)); \
2214 } while (0)
2216 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
2217 #define PREV_TOKEN_TYPE (cur_token[-1].type)
2219 #define SPELL_TEXT 0
2220 #define SPELL_HANDLER 1
2221 #define SPELL_CHAR 2
2222 #define SPELL_NONE 3
2223 #define SPELL_EOL 4
2225 #define T(e, s) {SPELL_TEXT, s},
2226 #define H(e, s) {SPELL_HANDLER, s},
2227 #define C(e, s) {SPELL_CHAR, s},
2228 #define N(e, s) {SPELL_NONE, s},
2229 #define E(e, s) {SPELL_EOL, s},
2231 static const struct token_spelling
2233 unsigned char type;
2234 PTR speller;
2235 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
2237 #undef T
2238 #undef H
2239 #undef C
2240 #undef N
2241 #undef E
2243 static const unsigned char *digraph_spellings [] = {"%:", "%:%:", "<:",
2244 ":>", "<%", "%>"};
2246 static void
2247 expand_comment_space (list)
2248 cpp_toklist *list;
2250 if (list->comments_cap == 0)
2252 list->comments_cap = 10;
2253 list->comments = (cpp_token *)
2254 xmalloc (list->comments_cap * sizeof (cpp_token));
2256 else
2258 list->comments_cap *= 2;
2259 list->comments = (cpp_token *)
2260 xrealloc (list->comments, list->comments_cap);
2264 void
2265 cpp_free_token_list (list)
2266 cpp_toklist *list;
2268 if (list->comments)
2269 free (list->comments);
2270 free (list->tokens - 1); /* Backup over dummy token. */
2271 free (list->namebuf);
2272 free (list);
2275 static unsigned char trigraph_map[256];
2277 void
2278 init_trigraph_map ()
2280 trigraph_map['='] = '#';
2281 trigraph_map['('] = '[';
2282 trigraph_map[')'] = ']';
2283 trigraph_map['/'] = '\\';
2284 trigraph_map['\''] = '^';
2285 trigraph_map['<'] = '{';
2286 trigraph_map['>'] = '}';
2287 trigraph_map['!'] = '|';
2288 trigraph_map['-'] = '~';
2291 /* Call when a trigraph is encountered. It warns if necessary, and
2292 returns true if the trigraph should be honoured. END is the third
2293 character of a trigraph in the input stream. */
2294 static int
2295 trigraph_ok (pfile, end)
2296 cpp_reader *pfile;
2297 const unsigned char *end;
2299 int accept = CPP_OPTION (pfile, trigraphs);
2301 if (CPP_OPTION (pfile, warn_trigraphs))
2303 unsigned int col = end - 1 - pfile->buffer->line_base;
2304 if (accept)
2305 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2306 "trigraph ??%c converted to %c",
2307 (int) *end, (int) trigraph_map[*end]);
2308 else
2309 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2310 "trigraph ??%c ignored", (int) *end);
2312 return accept;
2315 /* Scan a string for trigraphs, warning or replacing them inline as
2316 appropriate. When parsing a string, we must call this routine
2317 before processing a newline character (if trigraphs are enabled),
2318 since the newline might be escaped by a preceding backslash
2319 trigraph sequence. Returns a pointer to the end of the name after
2320 replacement. */
2322 static unsigned char*
2323 trigraph_replace (pfile, src, limit)
2324 cpp_reader *pfile;
2325 unsigned char *src;
2326 unsigned char* limit;
2328 unsigned char *dest;
2330 /* Starting with src[1], find two consecutive '?'. The case of no
2331 trigraphs is streamlined. */
2333 for (; src + 1 < limit; src += 2)
2335 if (src[0] != '?')
2336 continue;
2338 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2339 if (src[-1] == '?')
2340 src--;
2341 else if (src + 2 == limit || src[1] != '?')
2342 continue;
2344 /* Check if it really is a trigraph. */
2345 if (trigraph_map[src[2]] == 0)
2346 continue;
2348 dest = src;
2349 goto trigraph_found;
2351 return limit;
2353 /* Now we have a trigraph, we need to scan the remaining buffer, and
2354 copy-shifting its contents left if replacement is enabled. */
2355 for (; src + 2 < limit; dest++, src++)
2356 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2358 trigraph_found:
2359 src += 2;
2360 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2361 *dest = trigraph_map[*src];
2364 /* Copy remaining (at most 2) characters. */
2365 while (src < limit)
2366 *dest++ = *src++;
2367 return dest;
2370 /* If CUR is a backslash or the end of a trigraphed backslash, return
2371 a pointer to its beginning, otherwise NULL. We don't read beyond
2372 the buffer start, because there is the start of the comment in the
2373 buffer. */
2374 static const unsigned char *
2375 backslash_start (pfile, cur)
2376 cpp_reader *pfile;
2377 const unsigned char *cur;
2379 if (cur[0] == '\\')
2380 return cur;
2381 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2382 && trigraph_ok (pfile, cur))
2383 return cur - 2;
2384 return 0;
2387 /* Skip a C-style block comment. This is probably the trickiest
2388 handler. We find the end of the comment by seeing if an asterisk
2389 is before every '/' we encounter. The nasty complication is that a
2390 previous asterisk may be separated by one or more escaped newlines.
2391 Returns non-zero if comment terminated by EOF, zero otherwise. */
2392 static int
2393 skip_block_comment (pfile)
2394 cpp_reader *pfile;
2396 cpp_buffer *buffer = pfile->buffer;
2397 const unsigned char *char_after_star = 0;
2398 register const unsigned char *cur = buffer->cur;
2399 int seen_eof = 0;
2401 /* Inner loop would think the comment has ended if the first comment
2402 character is a '/'. Avoid this and keep the inner loop clean by
2403 skipping such a character. */
2404 if (cur < buffer->rlimit && cur[0] == '/')
2405 cur++;
2407 for (; cur < buffer->rlimit; )
2409 unsigned char c = *cur++;
2411 /* People like decorating comments with '*', so check for
2412 '/' instead for efficiency. */
2413 if (c == '/')
2415 if (cur[-2] == '*' || cur - 1 == char_after_star)
2416 goto out;
2418 /* Warn about potential nested comments, but not when
2419 the final character inside the comment is a '/'.
2420 Don't bother to get it right across escaped newlines. */
2421 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2422 && cur[0] == '*' && cur[1] != '/')
2424 buffer->cur = cur;
2425 cpp_warning (pfile, "'/*' within comment");
2428 else if (IS_NEWLINE(c))
2430 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2432 handle_newline (cur, buffer->rlimit, c);
2433 /* Work correctly if there is an asterisk before an
2434 arbirtrarily long sequence of escaped newlines. */
2435 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2436 char_after_star = cur;
2437 else
2438 char_after_star = 0;
2441 seen_eof = 1;
2443 out:
2444 buffer->cur = cur;
2445 return seen_eof;
2448 /* Skip a C++ or Chill line comment. Handles escaped newlines.
2449 Returns non-zero if a multiline comment. */
2450 static int
2451 skip_line_comment (pfile)
2452 cpp_reader *pfile;
2454 cpp_buffer *buffer = pfile->buffer;
2455 register const unsigned char *cur = buffer->cur;
2456 int multiline = 0;
2458 for (; cur < buffer->rlimit; )
2460 unsigned char c = *cur++;
2462 if (IS_NEWLINE (c))
2464 /* Check for a (trigaph?) backslash escaping the newline. */
2465 if (!backslash_start (pfile, cur - 2))
2466 goto out;
2467 multiline = 1;
2468 handle_newline (cur, buffer->rlimit, c);
2471 cur++;
2473 out:
2474 buffer->cur = cur - 1; /* Leave newline for caller. */
2475 return multiline;
2478 /* Skips whitespace, stopping at next non-whitespace character. */
2479 static void
2480 skip_whitespace (pfile, in_directive)
2481 cpp_reader *pfile;
2482 int in_directive;
2484 cpp_buffer *buffer = pfile->buffer;
2485 register const unsigned char *cur = buffer->cur;
2486 unsigned short null_count = 0;
2488 for (; cur < buffer->rlimit; )
2490 unsigned char c = *cur++;
2492 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2493 continue;
2494 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2495 goto out;
2496 if (c == '\0')
2497 null_count++;
2498 /* Mut be '\f' or '\v' */
2499 else if (in_directive && CPP_PEDANTIC (pfile))
2500 cpp_pedwarn (pfile, "%s in preprocessing directive",
2501 c == '\f' ? "formfeed" : "vertical tab");
2503 cur++;
2505 out:
2506 buffer->cur = cur - 1;
2507 if (null_count)
2508 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2509 : "embedded null character ignored");
2512 /* Parse (append) an identifier. */
2513 static void
2514 parse_name (pfile, list, name)
2515 cpp_reader *pfile;
2516 cpp_toklist *list;
2517 cpp_name *name;
2519 const unsigned char *name_limit;
2520 unsigned char *namebuf;
2521 cpp_buffer *buffer = pfile->buffer;
2522 register const unsigned char *cur = buffer->cur;
2524 expanded:
2525 name_limit = list->namebuf + list->name_cap;
2526 namebuf = list->namebuf + list->name_used;
2528 for (; cur < buffer->rlimit && namebuf < name_limit; )
2530 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2532 if (! is_idchar(c))
2533 goto out;
2534 namebuf++;
2535 cur++;
2536 if (c == '$' && CPP_PEDANTIC (pfile))
2538 buffer->cur = cur;
2539 cpp_pedwarn (pfile, "'$' character in identifier");
2543 /* Run out of name space? */
2544 if (cur < buffer->rlimit)
2546 list->name_used = namebuf - list->namebuf;
2547 auto_expand_name_space (list);
2548 goto expanded;
2551 out:
2552 buffer->cur = cur;
2553 name->len = namebuf - (list->namebuf + name->offset);
2554 list->name_used = namebuf - list->namebuf;
2557 /* Parse (append) a number. */
2559 #define VALID_SIGN(c, prevc) \
2560 (((c) == '+' || (c) == '-') && \
2561 ((prevc) == 'e' || (prevc) == 'E' \
2562 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2564 static void
2565 parse_number (pfile, list, name)
2566 cpp_reader *pfile;
2567 cpp_toklist *list;
2568 cpp_name *name;
2570 const unsigned char *name_limit;
2571 unsigned char *namebuf;
2572 cpp_buffer *buffer = pfile->buffer;
2573 register const unsigned char *cur = buffer->cur;
2575 expanded:
2576 name_limit = list->namebuf + list->name_cap;
2577 namebuf = list->namebuf + list->name_used;
2579 for (; cur < buffer->rlimit && namebuf < name_limit; )
2581 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2583 /* Perhaps we should accept '$' here if we accept it for
2584 identifiers. We know namebuf[-1] is safe, because for c to
2585 be a sign we must have pushed at least one character. */
2586 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2587 goto out;
2589 namebuf++;
2590 cur++;
2593 /* Run out of name space? */
2594 if (cur < buffer->rlimit)
2596 list->name_used = namebuf - list->namebuf;
2597 auto_expand_name_space (list);
2598 goto expanded;
2601 out:
2602 buffer->cur = cur;
2603 name->len = namebuf - (list->namebuf + name->offset);
2604 list->name_used = namebuf - list->namebuf;
2607 /* Places a string terminated by an unescaped TERMINATOR into a
2608 cpp_name, which should be expandable and thus at the top of the
2609 list's stack. Handles embedded trigraphs, if necessary, and
2610 escaped newlines.
2612 Can be used for character constants (terminator = '\''), string
2613 constants ('"'), angled headers ('>') and assertions (')'). */
2615 static void
2616 parse_string (pfile, list, name, terminator)
2617 cpp_reader *pfile;
2618 cpp_toklist *list;
2619 cpp_name *name;
2620 unsigned int terminator;
2622 cpp_buffer *buffer = pfile->buffer;
2623 register const unsigned char *cur = buffer->cur;
2624 const unsigned char *name_limit;
2625 unsigned char *namebuf;
2626 unsigned int null_count = 0;
2627 int trigraphed_len = 0;
2629 expanded:
2630 name_limit = list->namebuf + list->name_cap;
2631 namebuf = list->namebuf + list->name_used;
2633 for (; cur < buffer->rlimit && namebuf < name_limit; )
2635 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2637 if (c == '\0')
2638 null_count++;
2639 else if (c == terminator || IS_NEWLINE (c))
2641 unsigned char* name_start = list->namebuf + name->offset;
2643 /* Needed for trigraph_replace and multiline string warning. */
2644 buffer->cur = cur;
2646 /* Scan for trigraphs before checking if backslash-escaped. */
2647 if (CPP_OPTION (pfile, trigraphs)
2648 || CPP_OPTION (pfile, warn_trigraphs))
2650 namebuf = trigraph_replace (pfile, name_start + trigraphed_len,
2651 namebuf);
2652 trigraphed_len = namebuf - 2 - (name_start + trigraphed_len);
2653 if (trigraphed_len < 0)
2654 trigraphed_len = 0;
2657 namebuf--; /* Drop the newline / terminator from the name. */
2658 if (IS_NEWLINE (c))
2660 /* Drop a backslash newline, and continue. */
2661 if (namebuf[-1] == '\\')
2663 handle_newline (cur, buffer->rlimit, c);
2664 namebuf--;
2665 continue;
2668 cur--;
2670 /* In Fortran and assembly language, silently terminate
2671 strings of either variety at end of line. This is a
2672 kludge around not knowing where comments are in these
2673 languages. */
2674 if (CPP_OPTION (pfile, lang_fortran)
2675 || CPP_OPTION (pfile, lang_asm))
2676 goto out;
2678 /* Character constants, headers and asserts may not
2679 extend over multiple lines. In Standard C, neither
2680 may strings. We accept multiline strings as an
2681 extension, but not in directives. */
2682 if (terminator != '"' || IS_DIRECTIVE (list))
2683 goto unterminated;
2685 cur++; /* Move forwards again. */
2687 if (pfile->multiline_string_line == 0)
2689 pfile->multiline_string_line = list->line;
2690 if (CPP_PEDANTIC (pfile))
2691 cpp_pedwarn (pfile, "multi-line string constant");
2694 *namebuf++ = '\n';
2695 handle_newline (cur, buffer->rlimit, c);
2697 else
2699 unsigned char *temp;
2701 /* An odd number of consecutive backslashes represents
2702 an escaped terminator. */
2703 temp = namebuf - 1;
2704 while (temp >= name_start && *temp == '\\')
2705 temp--;
2707 if ((namebuf - temp) & 1)
2708 goto out;
2709 namebuf++;
2714 /* Run out of name space? */
2715 if (cur < buffer->rlimit)
2717 list->name_used = namebuf - list->namebuf;
2718 auto_expand_name_space (list);
2719 goto expanded;
2722 /* We may not have trigraph-replaced the input for this code path,
2723 but as the input is in error by being unterminated we don't
2724 bother. Prevent warnings about no newlines at EOF. */
2725 if (IS_NEWLINE(cur[-1]))
2726 cur--;
2728 unterminated:
2729 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2731 if (terminator == '\"' && pfile->multiline_string_line != list->line
2732 && pfile->multiline_string_line != 0)
2734 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2735 "possible start of unterminated string literal");
2736 pfile->multiline_string_line = 0;
2739 out:
2740 buffer->cur = cur;
2741 name->len = namebuf - (list->namebuf + name->offset);
2742 list->name_used = namebuf - list->namebuf;
2744 if (null_count > 0)
2745 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2746 : "null character preserved"));
2749 /* The character C helps us distinguish comment types: '*' = C style,
2750 '-' = Chill-style and '/' = C++ style. For code simplicity, the
2751 stored comment includes any C-style comment terminator. */
2752 static void
2753 copy_comment (list, from, len, tok_no, type)
2754 cpp_toklist *list;
2755 const unsigned char *from;
2756 unsigned int len;
2757 unsigned int tok_no;
2758 unsigned int type;
2760 cpp_token *comment;
2762 if (list->comments_used == list->comments_cap)
2763 expand_comment_space (list);
2765 if (list->name_used + len > list->name_cap)
2766 expand_name_space (list, len);
2768 comment = &list->comments[list->comments_used++];
2769 comment->type = type;
2770 comment->aux = tok_no;
2771 comment->val.name.len = len;
2772 comment->val.name.offset = list->name_used;
2774 memcpy (list->namebuf + list->name_used, from, len);
2775 list->name_used += len;
2779 * The tokenizer's main loop. Returns a token list, representing a
2780 * logical line in the input file, terminated with a CPP_VSPACE
2781 * token. On EOF, a token list containing the single CPP_EOF token
2782 * is returned.
2784 * Implementation relies almost entirely on lookback, rather than
2785 * looking forwards. This means that tokenization requires just
2786 * a single pass of the file, even in the presence of trigraphs and
2787 * escaped newlines, providing significant performance benefits.
2788 * Trigraph overhead is negligible if they are disabled, and low
2789 * even when enabled.
2792 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
2793 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
2794 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
2795 #define BACKUP_DIGRAPH(ttype) do { \
2796 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
2798 void
2799 _cpp_lex_line (pfile, list)
2800 cpp_reader *pfile;
2801 cpp_toklist *list;
2803 cpp_token *cur_token, *token_limit;
2804 cpp_buffer *buffer = pfile->buffer;
2805 register const unsigned char *cur = buffer->cur;
2806 unsigned char flags = 0;
2808 expanded:
2809 token_limit = list->tokens + list->tokens_cap;
2810 cur_token = list->tokens + list->tokens_used;
2812 for (; cur < buffer->rlimit && cur_token < token_limit;)
2814 unsigned char c = *cur++;
2816 /* Optimize whitespace skipping, in particular the case of a
2817 single whitespace character, as every other token is probably
2818 whitespace. (' ' '\t' '\v' '\f' '\0'). */
2819 if (is_hspace ((unsigned int) c))
2821 if (c == '\0' || (cur < buffer->rlimit && is_hspace (*cur)))
2823 buffer->cur = cur - (c == '\0'); /* Get the null warning. */
2824 skip_whitespace (pfile, IS_DIRECTIVE (list));
2825 cur = buffer->cur;
2827 flags = PREV_WHITESPACE;
2828 if (cur == buffer->rlimit)
2829 break;
2830 c = *cur++;
2833 /* Initialize current token. Its type is set in the switch. */
2834 cur_token->col = COLUMN (cur);
2835 cur_token->flags = flags;
2836 flags = 0;
2838 switch (c)
2840 case '0': case '1': case '2': case '3': case '4':
2841 case '5': case '6': case '7': case '8': case '9':
2842 /* Prepend an immediately previous CPP_DOT token. */
2843 if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
2845 cur_token--;
2846 if (list->name_cap == list->name_used)
2847 auto_expand_name_space (list);
2849 cur_token->val.name.len = 1;
2850 cur_token->val.name.offset = list->name_used;
2851 list->namebuf[list->name_used++] = '.';
2853 else
2854 INIT_NAME (list, cur_token->val.name);
2855 cur--; /* Backup character. */
2857 continue_number:
2858 buffer->cur = cur;
2859 parse_number (pfile, list, &cur_token->val.name);
2860 cur = buffer->cur;
2862 PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */
2863 break;
2865 letter:
2866 case '_':
2867 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2868 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2869 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2870 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2871 case 'y': case 'z':
2872 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2873 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2874 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2875 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2876 case 'Y': case 'Z':
2877 INIT_NAME (list, cur_token->val.name);
2878 cur--; /* Backup character. */
2879 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2881 continue_name:
2882 buffer->cur = cur;
2883 parse_name (pfile, list, &cur_token->val.name);
2884 cur = buffer->cur;
2886 /* Find handler for newly created / extended directive. */
2887 if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
2888 _cpp_check_directive (list, cur_token);
2889 cur_token++;
2890 break;
2892 case '\'':
2893 /* Fall through. */
2894 case '\"':
2895 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2896 /* Do we have a wide string? */
2897 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2898 && cur_token[-1].val.name.len == 1
2899 && *(list->namebuf + cur_token[-1].val.name.offset) == 'L'
2900 && !CPP_TRADITIONAL (pfile))
2902 /* No need for 'L' any more. */
2903 list->name_used--;
2904 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2907 do_parse_string:
2908 /* Here c is one of ' " > or ). */
2909 INIT_NAME (list, cur_token->val.name);
2910 buffer->cur = cur;
2911 parse_string (pfile, list, &cur_token->val.name, c);
2912 cur = buffer->cur;
2913 cur_token++;
2914 break;
2916 case '/':
2917 cur_token->type = CPP_DIV;
2918 if (IMMED_TOKEN ())
2920 if (PREV_TOKEN_TYPE == CPP_DIV)
2922 /* We silently allow C++ comments in system headers,
2923 irrespective of conformance mode, because lots of
2924 broken systems do that and trying to clean it up
2925 in fixincludes is a nightmare. */
2926 if (buffer->system_header_p)
2927 goto do_line_comment;
2928 else if (CPP_OPTION (pfile, cplusplus_comments))
2930 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2931 && ! buffer->warned_cplusplus_comments)
2933 buffer->cur = cur;
2934 cpp_pedwarn (pfile,
2935 "C++ style comments are not allowed in ISO C89");
2936 cpp_pedwarn (pfile,
2937 "(this will be reported only once per input file)");
2938 buffer->warned_cplusplus_comments = 1;
2940 do_line_comment:
2941 buffer->cur = cur;
2942 if (cur[-2] != c)
2943 cpp_warning (pfile,
2944 "comment start split across lines");
2945 if (skip_line_comment (pfile))
2946 cpp_error_with_line (pfile, list->line,
2947 cur_token[-1].col,
2948 "multi-line comment");
2949 if (!CPP_OPTION (pfile, discard_comments))
2950 copy_comment (list, cur, buffer->cur - cur,
2951 cur_token - 1 - list->tokens, c == '/'
2952 ? CPP_CPP_COMMENT: CPP_CHILL_COMMENT);
2953 cur = buffer->cur;
2955 /* Back-up to first '-' or '/'. */
2956 cur_token -= 2;
2957 if (!CPP_OPTION (pfile, traditional))
2958 flags = PREV_WHITESPACE;
2962 cur_token++;
2963 break;
2965 case '*':
2966 cur_token->type = CPP_MULT;
2967 if (IMMED_TOKEN ())
2969 if (PREV_TOKEN_TYPE == CPP_DIV)
2971 buffer->cur = cur;
2972 if (cur[-2] != '/')
2973 cpp_warning (pfile,
2974 "comment start '/*' split across lines");
2975 if (skip_block_comment (pfile))
2976 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
2977 "unterminated comment");
2978 else if (buffer->cur[-2] != '*')
2979 cpp_warning (pfile,
2980 "comment end '*/' split across lines");
2981 if (!CPP_OPTION (pfile, discard_comments))
2982 copy_comment (list, cur, buffer->cur - cur,
2983 cur_token - 1 - list->tokens, CPP_C_COMMENT);
2984 cur = buffer->cur;
2986 cur_token -= 2;
2987 if (!CPP_OPTION (pfile, traditional))
2988 flags = PREV_WHITESPACE;
2990 else if (CPP_OPTION (pfile, cplusplus))
2992 /* In C++, there are .* and ->* operators. */
2993 if (PREV_TOKEN_TYPE == CPP_DEREF)
2994 BACKUP_TOKEN (CPP_DEREF_STAR);
2995 else if (PREV_TOKEN_TYPE == CPP_DOT)
2996 BACKUP_TOKEN (CPP_DOT_STAR);
2999 cur_token++;
3000 break;
3002 case '\n':
3003 case '\r':
3004 handle_newline (cur, buffer->rlimit, c);
3005 if (PREV_TOKEN_TYPE != CPP_BACKSLASH || !IMMED_TOKEN ())
3007 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3009 buffer->cur = cur;
3010 cpp_warning (pfile,
3011 "backslash and newline separated by space");
3013 PUSH_TOKEN (CPP_VSPACE);
3014 goto out;
3016 /* Remove the escaped newline. Then continue to process
3017 any interrupted name or number. */
3018 cur_token--;
3019 if (IMMED_TOKEN ())
3021 cur_token--;
3022 if (cur_token->type == CPP_NAME)
3023 goto continue_name;
3024 else if (cur_token->type == CPP_NUMBER)
3025 goto continue_number;
3026 cur_token++;
3028 break;
3030 case '-':
3031 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3033 if (CPP_OPTION (pfile, chill))
3034 goto do_line_comment;
3035 REVISE_TOKEN (CPP_MINUS_MINUS);
3037 else
3038 PUSH_TOKEN (CPP_MINUS);
3039 break;
3041 /* The digraph flag checking ensures that ## and %:%:
3042 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3043 make_hash:
3044 case '#':
3045 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3046 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3047 REVISE_TOKEN (CPP_PASTE);
3048 else
3049 PUSH_TOKEN (CPP_HASH);
3050 break;
3052 case ':':
3053 cur_token->type = CPP_COLON;
3054 if (IMMED_TOKEN ())
3056 if (PREV_TOKEN_TYPE == CPP_COLON
3057 && CPP_OPTION (pfile, cplusplus))
3058 BACKUP_TOKEN (CPP_SCOPE);
3059 /* Digraph: "<:" is a '[' */
3060 else if (PREV_TOKEN_TYPE == CPP_LESS)
3061 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3062 /* Digraph: "%:" is a '#' */
3063 else if (PREV_TOKEN_TYPE == CPP_MOD)
3065 (--cur_token)->flags |= DIGRAPH;
3066 goto make_hash;
3069 cur_token++;
3070 break;
3072 case '&':
3073 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3074 REVISE_TOKEN (CPP_AND_AND);
3075 else
3076 PUSH_TOKEN (CPP_AND);
3077 break;
3079 make_or:
3080 case '|':
3081 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3082 REVISE_TOKEN (CPP_OR_OR);
3083 else
3084 PUSH_TOKEN (CPP_OR);
3085 break;
3087 case '+':
3088 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3089 REVISE_TOKEN (CPP_PLUS_PLUS);
3090 else
3091 PUSH_TOKEN (CPP_PLUS);
3092 break;
3094 case '=':
3095 /* This relies on equidistance of "?=" and "?" tokens. */
3096 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3097 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3098 else
3099 PUSH_TOKEN (CPP_EQ);
3100 break;
3102 case '>':
3103 cur_token->type = CPP_GREATER;
3104 if (IMMED_TOKEN ())
3106 if (PREV_TOKEN_TYPE == CPP_GREATER)
3107 BACKUP_TOKEN (CPP_RSHIFT);
3108 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3109 BACKUP_TOKEN (CPP_DEREF);
3110 /* Digraph: ":>" is a ']' */
3111 else if (PREV_TOKEN_TYPE == CPP_COLON)
3112 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3113 /* Digraph: "%>" is a '}' */
3114 else if (PREV_TOKEN_TYPE == CPP_MOD)
3115 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3117 cur_token++;
3118 break;
3120 case '<':
3121 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3123 REVISE_TOKEN (CPP_LSHIFT);
3124 break;
3126 /* Is this the beginning of a header name? */
3127 if (list->dir_flags & SYNTAX_INCLUDE)
3129 c = '>'; /* Terminator. */
3130 cur_token->type = CPP_HEADER_NAME;
3131 goto do_parse_string;
3133 PUSH_TOKEN (CPP_LESS);
3134 break;
3136 case '%':
3137 /* Digraph: "<%" is a '{' */
3138 cur_token->type = CPP_MOD;
3139 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3140 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3141 cur_token++;
3142 break;
3144 case '(':
3145 /* Is this the beginning of an assertion string? */
3146 if (list->dir_flags & SYNTAX_ASSERT)
3148 c = ')'; /* Terminator. */
3149 cur_token->type = CPP_ASSERTION;
3150 goto do_parse_string;
3152 PUSH_TOKEN (CPP_OPEN_PAREN);
3153 break;
3155 case '?':
3156 if (cur + 1 < buffer->rlimit && *cur == '?'
3157 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3159 /* Handle trigraph. */
3160 cur++;
3161 switch (*cur++)
3163 case '(': goto make_open_square;
3164 case ')': goto make_close_square;
3165 case '<': goto make_open_brace;
3166 case '>': goto make_close_brace;
3167 case '=': goto make_hash;
3168 case '!': goto make_or;
3169 case '-': goto make_complement;
3170 case '/': goto make_backslash;
3171 case '\'': goto make_xor;
3174 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3176 /* GNU C++ defines <? and >? operators. */
3177 if (PREV_TOKEN_TYPE == CPP_LESS)
3179 REVISE_TOKEN (CPP_MIN);
3180 break;
3182 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3184 REVISE_TOKEN (CPP_MAX);
3185 break;
3188 PUSH_TOKEN (CPP_QUERY);
3189 break;
3191 case '.':
3192 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3193 && IMMED_TOKEN ()
3194 && !(cur_token[-1].flags & PREV_WHITESPACE))
3196 cur_token -= 2;
3197 PUSH_TOKEN (CPP_ELLIPSIS);
3199 else
3200 PUSH_TOKEN (CPP_DOT);
3201 break;
3203 make_complement:
3204 case '~': PUSH_TOKEN (CPP_COMPL); break;
3205 make_xor:
3206 case '^': PUSH_TOKEN (CPP_XOR); break;
3207 make_open_brace:
3208 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3209 make_close_brace:
3210 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3211 make_open_square:
3212 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3213 make_close_square:
3214 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3215 make_backslash:
3216 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3217 case '!': PUSH_TOKEN (CPP_NOT); break;
3218 case ',': PUSH_TOKEN (CPP_COMMA); break;
3219 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3220 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3222 case '$':
3223 if (CPP_OPTION (pfile, dollars_in_ident))
3224 goto letter;
3225 /* Fall through */
3226 default:
3227 cur_token->aux = c;
3228 PUSH_TOKEN (CPP_OTHER);
3229 break;
3233 /* Run out of token space? */
3234 if (cur_token == token_limit)
3236 list->tokens_used = cur_token - list->tokens;
3237 expand_token_space (list);
3238 goto expanded;
3241 cur_token->type = CPP_EOF;
3242 cur_token->flags = flags;
3244 if (cur_token != &list->tokens[0])
3246 /* Next call back will get just a CPP_EOF. */
3247 buffer->cur = cur;
3248 cpp_warning (pfile, "no newline at end of file");
3249 PUSH_TOKEN (CPP_VSPACE);
3252 out:
3253 buffer->cur = cur;
3255 list->tokens_used = cur_token - list->tokens;
3257 /* FIXME: take this check out and put it in the caller.
3258 list->directive == 0 indicates an unknown directive (but null
3259 directive is OK). This is the first time we can be sure the
3260 directive is invalid, and thus warn about it, because it might
3261 have been split by escaped newlines. Also, don't complain about
3262 invalid directives in assembly source, we don't know where the
3263 comments are, and # may introduce assembler pseudo-ops. */
3265 if (IS_DIRECTIVE (list) && list->dir_handler == 0
3266 && list->tokens[1].type != CPP_VSPACE
3267 && !CPP_OPTION (pfile, lang_asm))
3268 cpp_error_with_line (pfile, list->line, list->tokens[1].col,
3269 "invalid preprocessing directive");
3272 /* Token spelling functions. Used for output of a preprocessed file,
3273 stringizing and token pasting. They all assume sufficient buffer
3274 is allocated, and return exactly how much they used. */
3276 /* Needs buffer of 3 + len. */
3277 unsigned int
3278 spell_string (buffer, list, token)
3279 unsigned char *buffer;
3280 cpp_toklist *list;
3281 cpp_token *token;
3283 unsigned char c, *orig_buff = buffer;
3284 size_t len;
3286 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3287 *buffer++ = 'L';
3288 c = token->type == CPP_STRING || token->type == CPP_WSTRING ? '"': '\'';
3289 *buffer++ = c;
3291 len = token->val.name.len;
3292 memcpy (buffer, list->namebuf + token->val.name.offset, len);
3293 buffer += len;
3294 *buffer++ = c;
3295 return buffer - orig_buff;
3298 /* Needs buffer of len + 2. */
3299 unsigned int
3300 spell_comment (buffer, list, token)
3301 unsigned char *buffer;
3302 cpp_toklist *list;
3303 cpp_token *token;
3305 size_t len;
3307 if (token->type == CPP_C_COMMENT)
3309 *buffer++ = '/';
3310 *buffer++ = '*';
3312 else if (token->type == CPP_CPP_COMMENT)
3314 *buffer++ = '/';
3315 *buffer++ = '/';
3317 else
3319 *buffer++ = '-';
3320 *buffer++ = '-';
3323 len = token->val.name.len;
3324 memcpy (buffer, list->namebuf + token->val.name.offset, len);
3326 return len + 2;
3329 /* Needs buffer of len. */
3330 unsigned int
3331 spell_name (buffer, list, token)
3332 unsigned char *buffer;
3333 cpp_toklist *list;
3334 cpp_token *token;
3336 size_t len;
3338 len = token->val.name.len;
3339 memcpy (buffer, list->namebuf + token->val.name.offset, len);
3340 buffer += len;
3342 return len;
3345 void
3346 _cpp_lex_file (pfile)
3347 cpp_reader* pfile;
3349 int recycle;
3350 cpp_toklist* list;
3352 init_trigraph_map ();
3353 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3355 for (recycle = 0; ;)
3357 init_token_list (pfile, list, recycle);
3358 recycle = 1;
3360 _cpp_lex_line (pfile, list);
3361 if (list->tokens[0].type == CPP_EOF)
3362 break;
3364 if (list->dir_handler)
3366 if (list->dir_handler (pfile))
3368 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3369 recycle = 0;
3372 else
3373 _cpp_output_list (pfile, list);
3377 /* This could be useful to other routines. If you allocate this many
3378 bytes, you have enough room to spell the token. */
3379 #define TOKEN_LEN(token) (4 + (token_spellings[token->type].type == \
3380 SPELL_HANDLER ? token->val.name.len: 0))
3382 static void
3383 _cpp_output_list (pfile, list)
3384 cpp_reader *pfile;
3385 cpp_toklist *list;
3387 unsigned int comment_no = 0;
3388 cpp_token *token, *comment_token = 0;
3390 if (list->comments_used > 0)
3391 comment_token = list->tokens + list->comments[0].aux;
3393 CPP_RESERVE (pfile, 2); /* Always have room for " \n". */
3394 for (token = &list->tokens[0];; token++)
3396 if (token->flags & PREV_WHITESPACE)
3398 /* Output comments if -C. Otherwise a space will do. */
3399 if (token == comment_token)
3401 cpp_token *comment = &list->comments[comment_no];
3404 CPP_RESERVE (pfile, 2 + TOKEN_LEN (comment));
3405 pfile->limit += spell_comment (pfile->limit, list, comment);
3406 comment_no++, comment++;
3407 if (comment_no == list->comments_used)
3408 break;
3409 comment_token = comment->aux + list->tokens;
3411 while (comment_token == token);
3413 else
3414 CPP_PUTC_Q (pfile, ' ');
3417 CPP_RESERVE (pfile, 2 + TOKEN_LEN (token));
3418 switch (token_spellings[token->type].type)
3420 case SPELL_TEXT:
3422 const unsigned char *spelling;
3423 unsigned char c;
3425 if (token->flags & DIGRAPH)
3426 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3427 else
3428 spelling = token_spellings[token->type].speller;
3430 while ((c = *spelling++) != '\0')
3431 CPP_PUTC_Q (pfile, c);
3433 break;
3435 case SPELL_HANDLER:
3437 speller s;
3439 s = (speller) token_spellings[token->type].speller;
3440 pfile->limit += s (pfile->limit, list, token);
3442 break;
3444 case SPELL_CHAR:
3445 *pfile->limit++ = token->aux;
3446 break;
3448 case SPELL_EOL:
3449 CPP_PUTC_Q (pfile, '\n');
3450 return;
3452 case SPELL_NONE:
3453 cpp_error (pfile, "Unwriteable token");
3454 break;
3459 #endif