Only create gcc/configargs.h if gcc build directory is present
[official-gcc.git] / gcc / c-lex.c
blob343c9ccc6595ff47914c10a420a3772416b05b1a
1 /* Lexical analyzer for C and Objective C.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "tree.h"
28 #include "input.h"
29 #include "output.h"
30 #include "c-lex.h"
31 #include "c-tree.h"
32 #include "flags.h"
33 #include "timevar.h"
34 #include "cpplib.h"
35 #include "c-pragma.h"
36 #include "toplev.h"
37 #include "intl.h"
38 #include "ggc.h"
39 #include "tm_p.h"
40 #include "splay-tree.h"
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45 #ifdef CROSS_COMPILE
46 #undef MULTIBYTE_CHARS
47 #endif
49 #ifdef MULTIBYTE_CHARS
50 #include "mbchar.h"
51 #include <locale.h>
52 #endif /* MULTIBYTE_CHARS */
53 #ifndef GET_ENVIRONMENT
54 #define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
55 #endif
57 #if USE_CPPLIB
58 extern cpp_reader parse_in;
59 #else
60 /* Stream for reading from the input file. */
61 FILE *finput;
62 #endif
64 /* Private idea of the line number. See discussion in c_lex(). */
65 static int lex_lineno;
67 /* We may keep statistics about how long which files took to compile. */
68 static int header_time, body_time;
69 static splay_tree file_info_tree;
71 /* Cause the `yydebug' variable to be defined. */
72 #define YYDEBUG 1
74 #if !USE_CPPLIB
76 struct putback_buffer
78 unsigned char *buffer;
79 int buffer_size;
80 int index;
83 static struct putback_buffer putback = {NULL, 0, -1};
85 static inline int getch PARAMS ((void));
87 static inline int
88 getch ()
90 if (putback.index != -1)
92 int ch = putback.buffer[putback.index];
93 --putback.index;
94 return ch;
96 return getc (finput);
99 static inline void put_back PARAMS ((int));
101 static inline void
102 put_back (ch)
103 int ch;
105 if (ch != EOF)
107 if (putback.index == putback.buffer_size - 1)
109 putback.buffer_size += 16;
110 putback.buffer = xrealloc (putback.buffer, putback.buffer_size);
112 putback.buffer[++putback.index] = ch;
116 int linemode;
118 #endif
120 /* File used for outputting assembler code. */
121 extern FILE *asm_out_file;
123 #undef WCHAR_TYPE_SIZE
124 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
126 /* Number of bytes in a wide character. */
127 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
129 #if !USE_CPPLIB
130 static int maxtoken; /* Current nominal length of token buffer. */
131 static char *token_buffer; /* Pointer to token buffer.
132 Actual allocated length is maxtoken + 2. */
133 #endif
135 int indent_level; /* Number of { minus number of }. */
136 int pending_lang_change; /* If we need to switch languages - C++ only */
137 int c_header_level; /* depth in C headers - C++ only */
139 /* Nonzero tells yylex to ignore \ in string constants. */
140 static int ignore_escape_flag;
142 static const char *readescape PARAMS ((const char *, const char *,
143 unsigned int *));
144 static const char *read_ucs PARAMS ((const char *, const char *,
145 unsigned int *, int));
146 static void parse_float PARAMS ((PTR));
147 static tree lex_number PARAMS ((const char *, unsigned int));
148 static tree lex_string PARAMS ((const char *, unsigned int, int));
149 static tree lex_charconst PARAMS ((const char *, unsigned int, int));
150 static void update_header_times PARAMS ((const char *));
151 static int dump_one_header PARAMS ((splay_tree_node, void *));
152 static int mark_splay_tree_node PARAMS ((splay_tree_node, void *));
153 static void mark_splay_tree PARAMS ((void *));
155 #if !USE_CPPLIB
156 static int skip_white_space PARAMS ((int));
157 static char *extend_token_buffer PARAMS ((const char *));
158 static void extend_token_buffer_to PARAMS ((int));
159 static int read_line_number PARAMS ((int *));
160 static void process_directive PARAMS ((void));
161 #else
162 static void cb_ident PARAMS ((cpp_reader *, const unsigned char *,
163 unsigned int));
164 static void cb_enter_file PARAMS ((cpp_reader *));
165 static void cb_leave_file PARAMS ((cpp_reader *));
166 static void cb_rename_file PARAMS ((cpp_reader *));
167 static void cb_def_pragma PARAMS ((cpp_reader *));
168 #endif
171 const char *
172 init_c_lex (filename)
173 const char *filename;
175 struct c_fileinfo *toplevel;
177 /* Set up filename timing. Must happen before cpp_start_read. */
178 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
180 (splay_tree_delete_value_fn)free);
181 /* Make sure to mark the filenames in the tree for GC. */
182 ggc_add_root (&file_info_tree, 1, sizeof (file_info_tree),
183 mark_splay_tree);
184 toplevel = get_fileinfo (ggc_strdup ("<top level>"));
185 if (flag_detailed_statistics)
187 header_time = 0;
188 body_time = get_run_time ();
189 toplevel->time = body_time;
192 #ifdef MULTIBYTE_CHARS
193 /* Change to the native locale for multibyte conversions. */
194 setlocale (LC_CTYPE, "");
195 GET_ENVIRONMENT (literal_codeset, "LANG");
196 #endif
198 #if !USE_CPPLIB
199 /* Open input file. */
200 if (filename == 0 || !strcmp (filename, "-"))
202 finput = stdin;
203 filename = "stdin";
205 else
206 finput = fopen (filename, "r");
207 if (finput == 0)
208 pfatal_with_name (filename);
210 #ifdef IO_BUFFER_SIZE
211 setvbuf (finput, (char *) xmalloc (IO_BUFFER_SIZE), _IOFBF, IO_BUFFER_SIZE);
212 #endif
213 #else /* !USE_CPPLIB */
215 parse_in.cb.ident = cb_ident;
216 parse_in.cb.enter_file = cb_enter_file;
217 parse_in.cb.leave_file = cb_leave_file;
218 parse_in.cb.rename_file = cb_rename_file;
219 parse_in.cb.def_pragma = cb_def_pragma;
221 /* Make sure parse_in.digraphs matches flag_digraphs. */
222 CPP_OPTION (&parse_in, digraphs) = flag_digraphs;
224 if (! cpp_start_read (&parse_in, 0 /* no printer */, filename))
225 abort ();
227 if (filename == 0 || !strcmp (filename, "-"))
228 filename = "stdin";
229 #endif
231 #if !USE_CPPLIB
232 maxtoken = 40;
233 token_buffer = (char *) xmalloc (maxtoken + 2);
234 #endif
235 /* Start it at 0, because check_newline is called at the very beginning
236 and will increment it to 1. */
237 lineno = lex_lineno = 0;
239 return filename;
242 struct c_fileinfo *
243 get_fileinfo (name)
244 const char *name;
246 splay_tree_node n;
247 struct c_fileinfo *fi;
249 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
250 if (n)
251 return (struct c_fileinfo *) n->value;
253 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
254 fi->time = 0;
255 fi->interface_only = 0;
256 fi->interface_unknown = 1;
257 splay_tree_insert (file_info_tree, (splay_tree_key) name,
258 (splay_tree_value) fi);
259 return fi;
262 static void
263 update_header_times (name)
264 const char *name;
266 /* Changing files again. This means currently collected time
267 is charged against header time, and body time starts back at 0. */
268 if (flag_detailed_statistics)
270 int this_time = get_run_time ();
271 struct c_fileinfo *file = get_fileinfo (name);
272 header_time += this_time - body_time;
273 file->time += this_time - body_time;
274 body_time = this_time;
278 static int
279 dump_one_header (n, dummy)
280 splay_tree_node n;
281 void *dummy ATTRIBUTE_UNUSED;
283 print_time ((const char *) n->key,
284 ((struct c_fileinfo *) n->value)->time);
285 return 0;
288 void
289 dump_time_statistics ()
291 struct c_fileinfo *file = get_fileinfo (input_filename);
292 int this_time = get_run_time ();
293 file->time += this_time - body_time;
295 fprintf (stderr, "\n******\n");
296 print_time ("header files (total)", header_time);
297 print_time ("main file (total)", this_time - body_time);
298 fprintf (stderr, "ratio = %g : 1\n",
299 (double)header_time / (double)(this_time - body_time));
300 fprintf (stderr, "\n******\n");
302 splay_tree_foreach (file_info_tree, dump_one_header, 0);
305 #if !USE_CPPLIB
307 /* If C is not whitespace, return C.
308 Otherwise skip whitespace and return first nonwhite char read. */
310 static int
311 skip_white_space (c)
312 register int c;
314 for (;;)
316 switch (c)
318 /* There is no need to process comments or backslash-newline
319 here. None can occur in the output of cpp. Do handle \r
320 in case someone sent us a .i file. */
322 case '\n':
323 if (linemode)
325 put_back (c);
326 return EOF;
328 c = check_newline ();
329 break;
331 case '\r':
332 /* Per C99, horizontal whitespace is just these four characters. */
333 case ' ':
334 case '\t':
335 case '\f':
336 case '\v':
337 c = getch ();
338 break;
340 case '\\':
341 error ("stray '\\' in program");
342 c = getch ();
343 break;
345 default:
346 return (c);
351 /* Skips all of the white space at the current location in the input file. */
353 void
354 position_after_white_space ()
356 register int c;
358 c = getch ();
360 put_back (skip_white_space (c));
363 /* Make the token buffer longer, preserving the data in it.
364 P should point to just beyond the last valid character in the old buffer.
365 The value we return is a pointer to the new buffer
366 at a place corresponding to P. */
368 static void
369 extend_token_buffer_to (size)
370 int size;
373 maxtoken = maxtoken * 2 + 10;
374 while (maxtoken < size);
375 token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
378 static char *
379 extend_token_buffer (p)
380 const char *p;
382 int offset = p - token_buffer;
383 extend_token_buffer_to (offset);
384 return token_buffer + offset;
388 static int
389 read_line_number (num)
390 int *num;
392 tree value;
393 enum cpp_ttype token = c_lex (&value);
395 if (token == CPP_NUMBER && TREE_CODE (value) == INTEGER_CST)
397 *num = TREE_INT_CST_LOW (value);
398 return 1;
400 else
402 if (token != CPP_EOF)
403 error ("invalid #-line");
404 return 0;
408 /* At the beginning of a line, increment the line number
409 and process any #-directive on this line.
410 If the line is a #-directive, read the entire line and return a newline.
411 Otherwise, return the line's first non-whitespace character. */
414 check_newline ()
416 register int c;
418 /* Loop till we get a nonblank, non-directive line. */
419 for (;;)
421 /* Read first nonwhite char on the line. */
423 c = getch ();
424 while (c == ' ' || c == '\t');
426 lex_lineno++;
427 if (c == '#')
429 process_directive ();
430 return '\n';
433 else if (c != '\n')
434 break;
436 return c;
439 static void
440 process_directive ()
442 enum cpp_ttype token;
443 tree value;
444 int saw_line;
445 enum { act_none, act_push, act_pop } action;
446 int action_number, l;
447 char *new_file;
448 #ifndef NO_IMPLICIT_EXTERN_C
449 int entering_c_header = 0;
450 #endif
452 /* Don't read beyond this line. */
453 saw_line = 0;
454 linemode = 1;
456 token = c_lex (&value);
458 if (token == CPP_NAME)
460 /* If a letter follows, then if the word here is `line', skip
461 it and ignore it; otherwise, ignore the line, with an error
462 if the word isn't `pragma'. */
464 const char *name = IDENTIFIER_POINTER (value);
466 if (!strcmp (name, "pragma"))
468 dispatch_pragma ();
469 goto skipline;
471 else if (!strcmp (name, "define"))
473 debug_define (lex_lineno, GET_DIRECTIVE_LINE ());
474 goto skipline;
476 else if (!strcmp (name, "undef"))
478 debug_undef (lex_lineno, GET_DIRECTIVE_LINE ());
479 goto skipline;
481 else if (!strcmp (name, "line"))
483 saw_line = 1;
484 token = c_lex (&value);
485 goto linenum;
487 else if (!strcmp (name, "ident"))
489 /* #ident. We expect a string constant here.
490 The pedantic warning and syntax error are now in cpp. */
492 token = c_lex (&value);
493 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
494 goto skipline;
496 #ifdef ASM_OUTPUT_IDENT
497 if (! flag_no_ident)
499 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
501 #endif
503 /* Skip the rest of this line. */
504 goto skipline;
507 error ("undefined or invalid # directive `%s'", name);
508 goto skipline;
511 /* If the # is the only nonwhite char on the line,
512 just ignore it. Check the new newline. */
513 if (token == CPP_EOF)
514 goto skipline;
516 linenum:
517 /* Here we have either `#line' or `# <nonletter>'.
518 In either case, it should be a line number; a digit should follow. */
520 if (token != CPP_NUMBER || TREE_CODE (value) != INTEGER_CST)
522 error ("invalid #-line");
523 goto skipline;
526 /* subtract one, because it is the following line that
527 gets the specified number */
529 l = TREE_INT_CST_LOW (value) - 1;
531 /* More follows: it must be a string constant (filename).
532 It would be neat to use cpplib to quickly process the string, but
533 (1) we don't have a handy tokenization of the string, and
534 (2) I don't know how well that would work in the presense
535 of filenames that contain wide characters. */
537 if (saw_line)
539 /* Don't treat \ as special if we are processing #line 1 "...".
540 If you want it to be treated specially, use # 1 "...". */
541 ignore_escape_flag = 1;
544 /* Read the string constant. */
545 token = c_lex (&value);
547 ignore_escape_flag = 0;
549 if (token == CPP_EOF)
551 /* No more: store the line number and check following line. */
552 lex_lineno = l;
553 goto skipline;
556 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
558 error ("invalid #line");
559 goto skipline;
562 new_file = TREE_STRING_POINTER (value);
564 if (main_input_filename == 0)
565 main_input_filename = new_file;
567 action = act_none;
568 action_number = 0;
570 /* Each change of file name
571 reinitializes whether we are now in a system header. */
572 in_system_header = 0;
574 if (!read_line_number (&action_number))
576 /* Update the name in the top element of input_file_stack. */
577 if (input_file_stack)
578 input_file_stack->name = input_filename;
581 /* `1' after file name means entering new file.
582 `2' after file name means just left a file. */
584 if (action_number == 1)
586 action = act_push;
587 read_line_number (&action_number);
589 else if (action_number == 2)
591 action = act_pop;
592 read_line_number (&action_number);
594 if (action_number == 3)
596 /* `3' after file name means this is a system header file. */
597 in_system_header = 1;
598 read_line_number (&action_number);
600 #ifndef NO_IMPLICIT_EXTERN_C
601 if (action_number == 4)
603 /* `4' after file name means this is a C header file. */
604 entering_c_header = 1;
605 read_line_number (&action_number);
607 #endif
609 /* Do the actions implied by the preceding numbers. */
610 if (action == act_push)
612 lineno = lex_lineno;
613 push_srcloc (input_filename, 1);
614 input_file_stack->indent_level = indent_level;
615 debug_start_source_file (input_filename);
616 #ifndef NO_IMPLICIT_EXTERN_C
617 if (c_header_level)
618 ++c_header_level;
619 else if (entering_c_header)
621 c_header_level = 1;
622 ++pending_lang_change;
624 #endif
626 else if (action == act_pop)
628 /* Popping out of a file. */
629 if (input_file_stack->next)
631 #ifndef NO_IMPLICIT_EXTERN_C
632 if (c_header_level && --c_header_level == 0)
634 if (entering_c_header)
635 warning ("badly nested C headers from preprocessor");
636 --pending_lang_change;
638 #endif
639 #if 0
640 if (indent_level != input_file_stack->indent_level)
642 warning_with_file_and_line
643 (input_filename, lex_lineno,
644 "This file contains more '%c's than '%c's.",
645 indent_level > input_file_stack->indent_level ? '{' : '}',
646 indent_level > input_file_stack->indent_level ? '}' : '{');
648 #endif
649 pop_srcloc ();
650 debug_end_source_file (input_file_stack->line);
652 else
653 error ("#-lines for entering and leaving files don't match");
656 update_header_times (new_file);
658 input_filename = new_file;
659 lex_lineno = l;
661 /* Hook for C++. */
662 extract_interface_info ();
664 /* skip the rest of this line. */
665 skipline:
666 linemode = 0;
668 while (getch () != '\n');
670 #else /* USE_CPPLIB */
672 /* Not yet handled: #pragma, #define, #undef.
673 No need to deal with linemarkers under normal conditions. */
675 static void
676 cb_ident (pfile, str, len)
677 cpp_reader *pfile ATTRIBUTE_UNUSED;
678 const unsigned char *str;
679 unsigned int len;
681 #ifdef ASM_OUTPUT_IDENT
682 if (! flag_no_ident)
684 /* Convert escapes in the string. */
685 tree value = lex_string ((const char *)str, len, 0);
686 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
688 #endif
691 static void
692 cb_enter_file (pfile)
693 cpp_reader *pfile;
695 cpp_buffer *ip = CPP_BUFFER (pfile);
696 /* Bleah, need a better interface to this. */
697 const char *flags = cpp_syshdr_flags (pfile, ip);
699 /* Mustn't stack the main buffer on the input stack. (Ick.) */
700 if (ip->prev)
702 lex_lineno = lineno = ip->prev->lineno - 1;
703 push_srcloc (ggc_alloc_string (ip->nominal_fname, -1), 1);
704 input_file_stack->indent_level = indent_level;
705 debug_start_source_file (ip->nominal_fname);
707 else
708 lex_lineno = 1;
710 update_header_times (ip->nominal_fname);
712 /* Hook for C++. */
713 extract_interface_info ();
715 in_system_header = (flags[0] != 0);
716 #ifndef NO_IMPLICIT_EXTERN_C
717 if (c_header_level)
718 ++c_header_level;
719 else if (flags[2] != 0)
721 c_header_level = 1;
722 ++pending_lang_change;
724 #endif
727 static void
728 cb_leave_file (pfile)
729 cpp_reader *pfile;
731 /* Bleah, need a better interface to this. */
732 const char *flags = cpp_syshdr_flags (pfile, CPP_BUFFER (pfile));
733 #if 0
734 if (indent_level != input_file_stack->indent_level)
736 warning_with_file_and_line
737 (input_filename, lex_lineno,
738 "This file contains more '%c's than '%c's.",
739 indent_level > input_file_stack->indent_level ? '{' : '}',
740 indent_level > input_file_stack->indent_level ? '}' : '{');
742 #endif
743 /* We get called for the main buffer, but we mustn't pop it. */
744 if (input_file_stack->next)
745 pop_srcloc ();
746 in_system_header = (flags[0] != 0);
747 #ifndef NO_IMPLICIT_EXTERN_C
748 if (c_header_level && --c_header_level == 0)
750 if (flags[2] != 0)
751 warning ("badly nested C headers from preprocessor");
752 --pending_lang_change;
754 #endif
755 lex_lineno = CPP_BUFFER (pfile)->lineno;
756 debug_end_source_file (input_file_stack->line);
758 update_header_times (input_file_stack->name);
759 /* Hook for C++. */
760 extract_interface_info ();
763 static void
764 cb_rename_file (pfile)
765 cpp_reader *pfile;
767 cpp_buffer *ip = CPP_BUFFER (pfile);
768 /* Bleah, need a better interface to this. */
769 const char *flags = cpp_syshdr_flags (pfile, ip);
770 input_filename = ggc_alloc_string (ip->nominal_fname, -1);
771 lex_lineno = ip->lineno;
772 in_system_header = (flags[0] != 0);
774 update_header_times (ip->nominal_fname);
775 /* Hook for C++. */
776 extract_interface_info ();
779 static void
780 cb_def_pragma (pfile)
781 cpp_reader *pfile;
783 /* Issue a warning message if we have been asked to do so. Ignore
784 unknown pragmas in system headers unless an explicit
785 -Wunknown-pragmas has been given. */
786 if (warn_unknown_pragmas > in_system_header)
788 const unsigned char *space, *name;
789 const cpp_token *t = pfile->first_directive_token + 2;
791 space = t[0].val.node->name;
792 name = t[1].type == CPP_NAME ? t[1].val.node->name : 0;
793 if (name)
794 warning ("ignoring #pragma %s %s", space, name);
795 else
796 warning ("ignoring #pragma %s", space);
799 #endif /* USE_CPPLIB */
801 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.
803 [lex.charset]: The character designated by the universal-character-name
804 \UNNNNNNNN is that character whose character short name in ISO/IEC 10646
805 is NNNNNNNN; the character designated by the universal-character-name
806 \uNNNN is that character whose character short name in ISO/IEC 10646 is
807 0000NNNN. If the hexadecimal value for a universal character name is
808 less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the
809 universal character name designates a character in the basic source
810 character set, then the program is ill-formed.
812 We assume that wchar_t is Unicode, so we don't need to do any
813 mapping. Is this ever wrong? */
815 static const char *
816 read_ucs (p, limit, cptr, length)
817 const char *p;
818 const char *limit;
819 unsigned int *cptr;
820 int length;
822 unsigned int code = 0;
823 int c;
825 for (; length; --length)
827 if (p >= limit)
829 error ("incomplete universal-character-name");
830 break;
833 c = *p++;
834 if (! ISXDIGIT (c))
836 error ("non hex digit '%c' in universal-character-name", c);
837 p--;
838 break;
841 code <<= 4;
842 if (c >= 'a' && c <= 'f')
843 code += c - 'a' + 10;
844 if (c >= 'A' && c <= 'F')
845 code += c - 'A' + 10;
846 if (c >= '0' && c <= '9')
847 code += c - '0';
850 #ifdef TARGET_EBCDIC
851 sorry ("universal-character-name on EBCDIC target");
852 *cptr = 0x3f; /* EBCDIC invalid character */
853 return p;
854 #endif
856 if (code > 0x9f && !(code & 0x80000000))
857 /* True extended character, OK. */;
858 else if (code >= 0x20 && code < 0x7f)
860 /* ASCII printable character. The C character set consists of all of
861 these except $, @ and `. We use hex escapes so that this also
862 works with EBCDIC hosts. */
863 if (code != 0x24 && code != 0x40 && code != 0x60)
864 error ("universal-character-name used for '%c'", code);
866 else
867 error ("invalid universal-character-name");
869 *cptr = code;
870 return p;
873 /* Read an escape sequence and write its character equivalent into *CPTR.
874 P is the input pointer, which is just after the backslash. LIMIT
875 is how much text we have.
876 Returns the updated input pointer. */
878 static const char *
879 readescape (p, limit, cptr)
880 const char *p;
881 const char *limit;
882 unsigned int *cptr;
884 unsigned int c, code, count;
885 unsigned firstdig = 0;
886 int nonnull;
888 if (p == limit)
890 /* cpp has already issued an error for this. */
891 *cptr = 0;
892 return p;
895 c = *p++;
897 switch (c)
899 case 'x':
900 if (warn_traditional && !in_system_header)
901 warning ("the meaning of `\\x' varies with -traditional");
903 if (flag_traditional)
905 *cptr = 'x';
906 return p;
909 code = 0;
910 count = 0;
911 nonnull = 0;
912 while (p < limit)
914 c = *p++;
915 if (! ISXDIGIT (c))
917 p--;
918 break;
920 code *= 16;
921 if (c >= 'a' && c <= 'f')
922 code += c - 'a' + 10;
923 if (c >= 'A' && c <= 'F')
924 code += c - 'A' + 10;
925 if (c >= '0' && c <= '9')
926 code += c - '0';
927 if (code != 0 || count != 0)
929 if (count == 0)
930 firstdig = code;
931 count++;
933 nonnull = 1;
935 if (! nonnull)
937 warning ("\\x used with no following hex digits");
938 *cptr = 'x';
939 return p;
941 else if (count == 0)
942 /* Digits are all 0's. Ok. */
944 else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
945 || (count > 1
946 && (((unsigned)1
947 << (TYPE_PRECISION (integer_type_node)
948 - (count - 1) * 4))
949 <= firstdig)))
950 pedwarn ("hex escape out of range");
951 *cptr = code;
952 return p;
954 case '0': case '1': case '2': case '3': case '4':
955 case '5': case '6': case '7':
956 code = 0;
957 for (count = 0; count < 3; count++)
959 if (c < '0' || c > '7')
961 p--;
962 break;
964 code = (code * 8) + (c - '0');
965 if (p == limit)
966 break;
967 c = *p++;
970 if (count == 3)
971 p--;
973 *cptr = code;
974 return p;
976 case '\\': case '\'': case '"': case '?':
977 *cptr = c;
978 return p;
980 case 'n': *cptr = TARGET_NEWLINE; return p;
981 case 't': *cptr = TARGET_TAB; return p;
982 case 'r': *cptr = TARGET_CR; return p;
983 case 'f': *cptr = TARGET_FF; return p;
984 case 'b': *cptr = TARGET_BS; return p;
985 case 'v': *cptr = TARGET_VT; return p;
986 case 'a':
987 if (warn_traditional && !in_system_header)
988 warning ("the meaning of '\\a' varies with -traditional");
989 *cptr = flag_traditional ? c : TARGET_BELL;
990 return p;
992 /* Warnings and support checks handled by read_ucs(). */
993 case 'u': case 'U':
994 if (c_language != clk_cplusplus && !flag_isoc99)
995 break;
997 if (warn_traditional && !in_system_header)
998 warning ("the meaning of '\\%c' varies with -traditional", c);
1000 return read_ucs (p, limit, cptr, c == 'u' ? 4 : 8);
1002 case 'e': case 'E':
1003 if (pedantic)
1004 pedwarn ("non-ISO-standard escape sequence, '\\%c'", c);
1005 *cptr = TARGET_ESC; return p;
1007 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1008 '\%' is used to prevent SCCS from getting confused. */
1009 case '(': case '{': case '[': case '%':
1010 if (pedantic)
1011 pedwarn ("unknown escape sequence '\\%c'", c);
1012 *cptr = c;
1013 return p;
1016 if (ISGRAPH (c))
1017 pedwarn ("unknown escape sequence '\\%c'", c);
1018 else
1019 pedwarn ("unknown escape sequence: '\\' followed by char 0x%x", c);
1021 *cptr = c;
1022 return p;
1025 #if 0 /* not yet */
1026 /* Returns nonzero if C is a universal-character-name. Give an error if it
1027 is not one which may appear in an identifier, as per [extendid].
1029 Note that extended character support in identifiers has not yet been
1030 implemented. It is my personal opinion that this is not a desirable
1031 feature. Portable code cannot count on support for more than the basic
1032 identifier character set. */
1034 static inline int
1035 is_extended_char (c)
1036 int c;
1038 #ifdef TARGET_EBCDIC
1039 return 0;
1040 #else
1041 /* ASCII. */
1042 if (c < 0x7f)
1043 return 0;
1045 /* None of the valid chars are outside the Basic Multilingual Plane (the
1046 low 16 bits). */
1047 if (c > 0xffff)
1049 error ("universal-character-name '\\U%08x' not valid in identifier", c);
1050 return 1;
1053 /* Latin */
1054 if ((c >= 0x00c0 && c <= 0x00d6)
1055 || (c >= 0x00d8 && c <= 0x00f6)
1056 || (c >= 0x00f8 && c <= 0x01f5)
1057 || (c >= 0x01fa && c <= 0x0217)
1058 || (c >= 0x0250 && c <= 0x02a8)
1059 || (c >= 0x1e00 && c <= 0x1e9a)
1060 || (c >= 0x1ea0 && c <= 0x1ef9))
1061 return 1;
1063 /* Greek */
1064 if ((c == 0x0384)
1065 || (c >= 0x0388 && c <= 0x038a)
1066 || (c == 0x038c)
1067 || (c >= 0x038e && c <= 0x03a1)
1068 || (c >= 0x03a3 && c <= 0x03ce)
1069 || (c >= 0x03d0 && c <= 0x03d6)
1070 || (c == 0x03da)
1071 || (c == 0x03dc)
1072 || (c == 0x03de)
1073 || (c == 0x03e0)
1074 || (c >= 0x03e2 && c <= 0x03f3)
1075 || (c >= 0x1f00 && c <= 0x1f15)
1076 || (c >= 0x1f18 && c <= 0x1f1d)
1077 || (c >= 0x1f20 && c <= 0x1f45)
1078 || (c >= 0x1f48 && c <= 0x1f4d)
1079 || (c >= 0x1f50 && c <= 0x1f57)
1080 || (c == 0x1f59)
1081 || (c == 0x1f5b)
1082 || (c == 0x1f5d)
1083 || (c >= 0x1f5f && c <= 0x1f7d)
1084 || (c >= 0x1f80 && c <= 0x1fb4)
1085 || (c >= 0x1fb6 && c <= 0x1fbc)
1086 || (c >= 0x1fc2 && c <= 0x1fc4)
1087 || (c >= 0x1fc6 && c <= 0x1fcc)
1088 || (c >= 0x1fd0 && c <= 0x1fd3)
1089 || (c >= 0x1fd6 && c <= 0x1fdb)
1090 || (c >= 0x1fe0 && c <= 0x1fec)
1091 || (c >= 0x1ff2 && c <= 0x1ff4)
1092 || (c >= 0x1ff6 && c <= 0x1ffc))
1093 return 1;
1095 /* Cyrillic */
1096 if ((c >= 0x0401 && c <= 0x040d)
1097 || (c >= 0x040f && c <= 0x044f)
1098 || (c >= 0x0451 && c <= 0x045c)
1099 || (c >= 0x045e && c <= 0x0481)
1100 || (c >= 0x0490 && c <= 0x04c4)
1101 || (c >= 0x04c7 && c <= 0x04c8)
1102 || (c >= 0x04cb && c <= 0x04cc)
1103 || (c >= 0x04d0 && c <= 0x04eb)
1104 || (c >= 0x04ee && c <= 0x04f5)
1105 || (c >= 0x04f8 && c <= 0x04f9))
1106 return 1;
1108 /* Armenian */
1109 if ((c >= 0x0531 && c <= 0x0556)
1110 || (c >= 0x0561 && c <= 0x0587))
1111 return 1;
1113 /* Hebrew */
1114 if ((c >= 0x05d0 && c <= 0x05ea)
1115 || (c >= 0x05f0 && c <= 0x05f4))
1116 return 1;
1118 /* Arabic */
1119 if ((c >= 0x0621 && c <= 0x063a)
1120 || (c >= 0x0640 && c <= 0x0652)
1121 || (c >= 0x0670 && c <= 0x06b7)
1122 || (c >= 0x06ba && c <= 0x06be)
1123 || (c >= 0x06c0 && c <= 0x06ce)
1124 || (c >= 0x06e5 && c <= 0x06e7))
1125 return 1;
1127 /* Devanagari */
1128 if ((c >= 0x0905 && c <= 0x0939)
1129 || (c >= 0x0958 && c <= 0x0962))
1130 return 1;
1132 /* Bengali */
1133 if ((c >= 0x0985 && c <= 0x098c)
1134 || (c >= 0x098f && c <= 0x0990)
1135 || (c >= 0x0993 && c <= 0x09a8)
1136 || (c >= 0x09aa && c <= 0x09b0)
1137 || (c == 0x09b2)
1138 || (c >= 0x09b6 && c <= 0x09b9)
1139 || (c >= 0x09dc && c <= 0x09dd)
1140 || (c >= 0x09df && c <= 0x09e1)
1141 || (c >= 0x09f0 && c <= 0x09f1))
1142 return 1;
1144 /* Gurmukhi */
1145 if ((c >= 0x0a05 && c <= 0x0a0a)
1146 || (c >= 0x0a0f && c <= 0x0a10)
1147 || (c >= 0x0a13 && c <= 0x0a28)
1148 || (c >= 0x0a2a && c <= 0x0a30)
1149 || (c >= 0x0a32 && c <= 0x0a33)
1150 || (c >= 0x0a35 && c <= 0x0a36)
1151 || (c >= 0x0a38 && c <= 0x0a39)
1152 || (c >= 0x0a59 && c <= 0x0a5c)
1153 || (c == 0x0a5e))
1154 return 1;
1156 /* Gujarati */
1157 if ((c >= 0x0a85 && c <= 0x0a8b)
1158 || (c == 0x0a8d)
1159 || (c >= 0x0a8f && c <= 0x0a91)
1160 || (c >= 0x0a93 && c <= 0x0aa8)
1161 || (c >= 0x0aaa && c <= 0x0ab0)
1162 || (c >= 0x0ab2 && c <= 0x0ab3)
1163 || (c >= 0x0ab5 && c <= 0x0ab9)
1164 || (c == 0x0ae0))
1165 return 1;
1167 /* Oriya */
1168 if ((c >= 0x0b05 && c <= 0x0b0c)
1169 || (c >= 0x0b0f && c <= 0x0b10)
1170 || (c >= 0x0b13 && c <= 0x0b28)
1171 || (c >= 0x0b2a && c <= 0x0b30)
1172 || (c >= 0x0b32 && c <= 0x0b33)
1173 || (c >= 0x0b36 && c <= 0x0b39)
1174 || (c >= 0x0b5c && c <= 0x0b5d)
1175 || (c >= 0x0b5f && c <= 0x0b61))
1176 return 1;
1178 /* Tamil */
1179 if ((c >= 0x0b85 && c <= 0x0b8a)
1180 || (c >= 0x0b8e && c <= 0x0b90)
1181 || (c >= 0x0b92 && c <= 0x0b95)
1182 || (c >= 0x0b99 && c <= 0x0b9a)
1183 || (c == 0x0b9c)
1184 || (c >= 0x0b9e && c <= 0x0b9f)
1185 || (c >= 0x0ba3 && c <= 0x0ba4)
1186 || (c >= 0x0ba8 && c <= 0x0baa)
1187 || (c >= 0x0bae && c <= 0x0bb5)
1188 || (c >= 0x0bb7 && c <= 0x0bb9))
1189 return 1;
1191 /* Telugu */
1192 if ((c >= 0x0c05 && c <= 0x0c0c)
1193 || (c >= 0x0c0e && c <= 0x0c10)
1194 || (c >= 0x0c12 && c <= 0x0c28)
1195 || (c >= 0x0c2a && c <= 0x0c33)
1196 || (c >= 0x0c35 && c <= 0x0c39)
1197 || (c >= 0x0c60 && c <= 0x0c61))
1198 return 1;
1200 /* Kannada */
1201 if ((c >= 0x0c85 && c <= 0x0c8c)
1202 || (c >= 0x0c8e && c <= 0x0c90)
1203 || (c >= 0x0c92 && c <= 0x0ca8)
1204 || (c >= 0x0caa && c <= 0x0cb3)
1205 || (c >= 0x0cb5 && c <= 0x0cb9)
1206 || (c >= 0x0ce0 && c <= 0x0ce1))
1207 return 1;
1209 /* Malayalam */
1210 if ((c >= 0x0d05 && c <= 0x0d0c)
1211 || (c >= 0x0d0e && c <= 0x0d10)
1212 || (c >= 0x0d12 && c <= 0x0d28)
1213 || (c >= 0x0d2a && c <= 0x0d39)
1214 || (c >= 0x0d60 && c <= 0x0d61))
1215 return 1;
1217 /* Thai */
1218 if ((c >= 0x0e01 && c <= 0x0e30)
1219 || (c >= 0x0e32 && c <= 0x0e33)
1220 || (c >= 0x0e40 && c <= 0x0e46)
1221 || (c >= 0x0e4f && c <= 0x0e5b))
1222 return 1;
1224 /* Lao */
1225 if ((c >= 0x0e81 && c <= 0x0e82)
1226 || (c == 0x0e84)
1227 || (c == 0x0e87)
1228 || (c == 0x0e88)
1229 || (c == 0x0e8a)
1230 || (c == 0x0e0d)
1231 || (c >= 0x0e94 && c <= 0x0e97)
1232 || (c >= 0x0e99 && c <= 0x0e9f)
1233 || (c >= 0x0ea1 && c <= 0x0ea3)
1234 || (c == 0x0ea5)
1235 || (c == 0x0ea7)
1236 || (c == 0x0eaa)
1237 || (c == 0x0eab)
1238 || (c >= 0x0ead && c <= 0x0eb0)
1239 || (c == 0x0eb2)
1240 || (c == 0x0eb3)
1241 || (c == 0x0ebd)
1242 || (c >= 0x0ec0 && c <= 0x0ec4)
1243 || (c == 0x0ec6))
1244 return 1;
1246 /* Georgian */
1247 if ((c >= 0x10a0 && c <= 0x10c5)
1248 || (c >= 0x10d0 && c <= 0x10f6))
1249 return 1;
1251 /* Hiragana */
1252 if ((c >= 0x3041 && c <= 0x3094)
1253 || (c >= 0x309b && c <= 0x309e))
1254 return 1;
1256 /* Katakana */
1257 if ((c >= 0x30a1 && c <= 0x30fe))
1258 return 1;
1260 /* Bopmofo */
1261 if ((c >= 0x3105 && c <= 0x312c))
1262 return 1;
1264 /* Hangul */
1265 if ((c >= 0x1100 && c <= 0x1159)
1266 || (c >= 0x1161 && c <= 0x11a2)
1267 || (c >= 0x11a8 && c <= 0x11f9))
1268 return 1;
1270 /* CJK Unified Ideographs */
1271 if ((c >= 0xf900 && c <= 0xfa2d)
1272 || (c >= 0xfb1f && c <= 0xfb36)
1273 || (c >= 0xfb38 && c <= 0xfb3c)
1274 || (c == 0xfb3e)
1275 || (c >= 0xfb40 && c <= 0xfb41)
1276 || (c >= 0xfb42 && c <= 0xfb44)
1277 || (c >= 0xfb46 && c <= 0xfbb1)
1278 || (c >= 0xfbd3 && c <= 0xfd3f)
1279 || (c >= 0xfd50 && c <= 0xfd8f)
1280 || (c >= 0xfd92 && c <= 0xfdc7)
1281 || (c >= 0xfdf0 && c <= 0xfdfb)
1282 || (c >= 0xfe70 && c <= 0xfe72)
1283 || (c == 0xfe74)
1284 || (c >= 0xfe76 && c <= 0xfefc)
1285 || (c >= 0xff21 && c <= 0xff3a)
1286 || (c >= 0xff41 && c <= 0xff5a)
1287 || (c >= 0xff66 && c <= 0xffbe)
1288 || (c >= 0xffc2 && c <= 0xffc7)
1289 || (c >= 0xffca && c <= 0xffcf)
1290 || (c >= 0xffd2 && c <= 0xffd7)
1291 || (c >= 0xffda && c <= 0xffdc)
1292 || (c >= 0x4e00 && c <= 0x9fa5))
1293 return 1;
1295 error ("universal-character-name '\\u%04x' not valid in identifier", c);
1296 return 1;
1297 #endif
1300 /* Add the UTF-8 representation of C to the token_buffer. */
1302 static void
1303 utf8_extend_token (c)
1304 int c;
1306 int shift, mask;
1308 if (c <= 0x0000007f)
1310 extend_token (c);
1311 return;
1313 else if (c <= 0x000007ff)
1314 shift = 6, mask = 0xc0;
1315 else if (c <= 0x0000ffff)
1316 shift = 12, mask = 0xe0;
1317 else if (c <= 0x001fffff)
1318 shift = 18, mask = 0xf0;
1319 else if (c <= 0x03ffffff)
1320 shift = 24, mask = 0xf8;
1321 else
1322 shift = 30, mask = 0xfc;
1324 extend_token (mask | (c >> shift));
1327 shift -= 6;
1328 extend_token ((unsigned char) (0x80 | (c >> shift)));
1330 while (shift);
1332 #endif
1334 #if 0
1335 struct try_type
1337 tree *node_var;
1338 char unsigned_flag;
1339 char long_flag;
1340 char long_long_flag;
1343 struct try_type type_sequence[] =
1345 { &integer_type_node, 0, 0, 0},
1346 { &unsigned_type_node, 1, 0, 0},
1347 { &long_integer_type_node, 0, 1, 0},
1348 { &long_unsigned_type_node, 1, 1, 0},
1349 { &long_long_integer_type_node, 0, 1, 1},
1350 { &long_long_unsigned_type_node, 1, 1, 1}
1352 #endif /* 0 */
1354 struct pf_args
1356 /* Input */
1357 const char *str;
1358 int fflag;
1359 int lflag;
1360 int base;
1361 /* Output */
1362 int conversion_errno;
1363 REAL_VALUE_TYPE value;
1364 tree type;
1367 static void
1368 parse_float (data)
1369 PTR data;
1371 struct pf_args * args = (struct pf_args *) data;
1372 const char *typename;
1374 args->conversion_errno = 0;
1375 args->type = double_type_node;
1376 typename = "double";
1378 /* The second argument, machine_mode, of REAL_VALUE_ATOF
1379 tells the desired precision of the binary result
1380 of decimal-to-binary conversion. */
1382 if (args->fflag)
1384 if (args->lflag)
1385 error ("both 'f' and 'l' suffixes on floating constant");
1387 args->type = float_type_node;
1388 typename = "float";
1390 else if (args->lflag)
1392 args->type = long_double_type_node;
1393 typename = "long double";
1395 else if (flag_single_precision_constant)
1397 args->type = float_type_node;
1398 typename = "float";
1401 errno = 0;
1402 if (args->base == 16)
1403 args->value = REAL_VALUE_HTOF (args->str, TYPE_MODE (args->type));
1404 else
1405 args->value = REAL_VALUE_ATOF (args->str, TYPE_MODE (args->type));
1407 args->conversion_errno = errno;
1408 /* A diagnostic is required here by some ISO C testsuites.
1409 This is not pedwarn, because some people don't want
1410 an error for this. */
1411 if (REAL_VALUE_ISINF (args->value) && pedantic)
1412 warning ("floating point number exceeds range of '%s'", typename);
1416 c_lex (value)
1417 tree *value;
1419 #if USE_CPPLIB
1420 const cpp_token *tok;
1421 enum cpp_ttype type;
1423 retry:
1424 timevar_push (TV_CPP);
1425 tok = cpp_get_token (&parse_in);
1426 timevar_pop (TV_CPP);
1428 /* The C++ front end does horrible things with the current line
1429 number. To ensure an accurate line number, we must reset it
1430 every time we return a token. If we reset it from tok->line
1431 every time, we'll get line numbers inside macros referring to the
1432 macro definition; this is nice, but we don't want to change the
1433 behavior until integrated mode is the only option. So we keep our
1434 own idea of the line number, and reset it from tok->line at each
1435 new line (which never happens inside a macro). */
1436 if (tok->flags & BOL)
1437 lex_lineno = tok->line;
1439 *value = NULL_TREE;
1440 lineno = lex_lineno;
1441 type = tok->type;
1442 switch (type)
1444 case CPP_OPEN_BRACE: indent_level++; break;
1445 case CPP_CLOSE_BRACE: indent_level--; break;
1447 /* Issue this error here, where we can get at tok->val.aux. */
1448 case CPP_OTHER:
1449 if (ISGRAPH (tok->val.aux))
1450 error ("stray '%c' in program", tok->val.aux);
1451 else
1452 error ("stray '\\%#o' in program", tok->val.aux);
1453 goto retry;
1455 case CPP_DEFINED:
1456 type = CPP_NAME;
1457 case CPP_NAME:
1458 *value = get_identifier ((const char *)tok->val.node->name);
1459 break;
1461 case CPP_INT:
1462 case CPP_FLOAT:
1463 case CPP_NUMBER:
1464 *value = lex_number ((const char *)tok->val.str.text, tok->val.str.len);
1465 break;
1467 case CPP_CHAR:
1468 case CPP_WCHAR:
1469 *value = lex_charconst ((const char *)tok->val.str.text,
1470 tok->val.str.len, tok->type == CPP_WCHAR);
1471 break;
1473 case CPP_STRING:
1474 case CPP_WSTRING:
1475 case CPP_OSTRING:
1476 *value = lex_string ((const char *)tok->val.str.text,
1477 tok->val.str.len, tok->type == CPP_WSTRING);
1478 break;
1480 /* These tokens should not be visible outside cpplib. */
1481 case CPP_HEADER_NAME:
1482 case CPP_COMMENT:
1483 case CPP_MACRO_ARG:
1484 case CPP_PLACEMARKER:
1485 abort ();
1487 default: break;
1490 return type;
1492 #else
1493 int c;
1494 char *p;
1495 int wide_flag = 0;
1496 int objc_flag = 0;
1497 int charconst = 0;
1499 *value = NULL_TREE;
1501 retry:
1502 c = getch ();
1504 /* Effectively do c = skip_white_space (c)
1505 but do it faster in the usual cases. */
1506 while (1)
1507 switch (c)
1509 case ' ':
1510 case '\t':
1511 case '\f':
1512 case '\v':
1513 c = getch ();
1514 break;
1516 case '\r':
1517 case '\n':
1518 c = skip_white_space (c);
1519 default:
1520 goto found_nonwhite;
1522 found_nonwhite:
1524 lineno = lex_lineno;
1526 switch (c)
1528 case EOF:
1529 return CPP_EOF;
1531 case 'L':
1532 /* Capital L may start a wide-string or wide-character constant. */
1534 register int c1 = getch();
1535 if (c1 == '\'')
1537 wide_flag = 1;
1538 goto char_constant;
1540 if (c1 == '"')
1542 wide_flag = 1;
1543 goto string_constant;
1545 put_back (c1);
1547 goto letter;
1549 case '@':
1550 if (!doing_objc_thang)
1551 goto straychar;
1552 else
1554 /* '@' may start a constant string object. */
1555 register int c1 = getch ();
1556 if (c1 == '"')
1558 objc_flag = 1;
1559 goto string_constant;
1561 put_back (c1);
1562 /* Fall through to treat '@' as the start of an identifier. */
1565 case 'A': case 'B': case 'C': case 'D': case 'E':
1566 case 'F': case 'G': case 'H': case 'I': case 'J':
1567 case 'K': case 'M': case 'N': case 'O':
1568 case 'P': case 'Q': case 'R': case 'S': case 'T':
1569 case 'U': case 'V': case 'W': case 'X': case 'Y':
1570 case 'Z':
1571 case 'a': case 'b': case 'c': case 'd': case 'e':
1572 case 'f': case 'g': case 'h': case 'i': case 'j':
1573 case 'k': case 'l': case 'm': case 'n': case 'o':
1574 case 'p': case 'q': case 'r': case 's': case 't':
1575 case 'u': case 'v': case 'w': case 'x': case 'y':
1576 case 'z':
1577 case '_':
1578 case '$':
1579 letter:
1580 p = token_buffer;
1581 while (ISALNUM (c) || c == '_' || c == '$' || c == '@')
1583 /* Make sure this char really belongs in an identifier. */
1584 if (c == '$')
1586 if (! dollars_in_ident)
1587 error ("'$' in identifier");
1588 else if (pedantic)
1589 pedwarn ("'$' in identifier");
1592 if (p >= token_buffer + maxtoken)
1593 p = extend_token_buffer (p);
1595 *p++ = c;
1596 c = getch();
1599 put_back (c);
1601 if (p >= token_buffer + maxtoken)
1602 p = extend_token_buffer (p);
1603 *p = 0;
1605 *value = get_identifier (token_buffer);
1606 return CPP_NAME;
1608 case '.':
1610 /* It's hard to preserve tokenization on '.' because
1611 it could be a symbol by itself, or it could be the
1612 start of a floating point number and cpp won't tell us. */
1613 int c1 = getch ();
1614 if (c1 == '.')
1616 int c2 = getch ();
1617 if (c2 == '.')
1618 return CPP_ELLIPSIS;
1620 put_back (c2);
1621 error ("parse error at '..'");
1623 else if (c1 == '*' && c_language == clk_cplusplus)
1624 return CPP_DOT_STAR;
1626 put_back (c1);
1627 if (ISDIGIT (c1))
1628 goto number;
1630 return CPP_DOT;
1632 case '0': case '1': case '2': case '3': case '4':
1633 case '5': case '6': case '7': case '8': case '9':
1634 number:
1635 p = token_buffer;
1636 /* Scan the next preprocessing number. All C numeric constants
1637 are preprocessing numbers, but not all preprocessing numbers
1638 are valid numeric constants. Preprocessing numbers fit the
1639 regular expression \.?[0-9]([0-9a-zA-Z_.]|[eEpP][+-])*
1640 See C99 section 6.4.8. */
1641 for (;;)
1643 if (p >= token_buffer + maxtoken)
1644 p = extend_token_buffer (p);
1646 *p++ = c;
1647 c = getch();
1649 if (c == '+' || c == '-')
1651 int d = p[-1];
1652 if (d == 'e' || d == 'E' || d == 'p' || d == 'P')
1653 continue;
1655 if (ISALNUM (c) || c == '_' || c == '.')
1656 continue;
1657 break;
1659 put_back (c);
1661 *value = lex_number (token_buffer, p - token_buffer);
1662 return CPP_NUMBER;
1664 case '\'':
1665 char_constant:
1666 charconst = 1;
1668 case '"':
1669 string_constant:
1671 int delimiter = charconst ? '\'' : '"';
1672 #ifdef MULTIBYTE_CHARS
1673 int longest_char = local_mb_cur_max ();
1674 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
1675 #endif
1676 c = getch ();
1677 p = token_buffer + 1;
1679 while (c != delimiter && c != EOF)
1681 if (p + 2 > token_buffer + maxtoken)
1682 p = extend_token_buffer (p);
1684 /* ignore_escape_flag is set for reading the filename in #line. */
1685 if (!ignore_escape_flag && c == '\\')
1687 *p++ = c;
1688 *p++ = getch (); /* escaped character */
1689 c = getch ();
1690 continue;
1692 else
1694 #ifdef MULTIBYTE_CHARS
1695 int i;
1696 int char_len = -1;
1697 for (i = 0; i < longest_char; ++i)
1699 if (p + i >= token_buffer + maxtoken)
1700 p = extend_token_buffer (p);
1701 p[i] = c;
1703 char_len = local_mblen (p, i + 1);
1704 if (char_len != -1)
1705 break;
1706 c = getch ();
1708 if (char_len == -1)
1710 /* Replace all except the first byte. */
1711 put_back (c);
1712 for (--i; i > 0; --i)
1713 put_back (p[i]);
1714 char_len = 1;
1716 /* mbtowc sometimes needs an extra char before accepting */
1717 else if (char_len <= i)
1718 put_back (c);
1720 p += char_len;
1721 #else
1722 *p++ = c;
1723 #endif
1724 c = getch ();
1729 if (charconst)
1731 *value = lex_charconst (token_buffer + 1, p - (token_buffer + 1),
1732 wide_flag);
1733 return wide_flag ? CPP_WCHAR : CPP_CHAR;
1735 else
1737 *value = lex_string (token_buffer + 1, p - (token_buffer + 1),
1738 wide_flag);
1739 return wide_flag ? CPP_WSTRING : objc_flag ? CPP_OSTRING : CPP_STRING;
1742 case '+':
1743 case '-':
1744 case '&':
1745 case '|':
1746 case ':':
1747 case '<':
1748 case '>':
1749 case '*':
1750 case '/':
1751 case '%':
1752 case '^':
1753 case '!':
1754 case '=':
1756 int c1;
1757 enum cpp_ttype type = CPP_EOF;
1759 switch (c)
1761 case '+': type = CPP_PLUS; break;
1762 case '-': type = CPP_MINUS; break;
1763 case '&': type = CPP_AND; break;
1764 case '|': type = CPP_OR; break;
1765 case ':': type = CPP_COLON; break;
1766 case '<': type = CPP_LESS; break;
1767 case '>': type = CPP_GREATER; break;
1768 case '*': type = CPP_MULT; break;
1769 case '/': type = CPP_DIV; break;
1770 case '%': type = CPP_MOD; break;
1771 case '^': type = CPP_XOR; break;
1772 case '!': type = CPP_NOT; break;
1773 case '=': type = CPP_EQ; break;
1776 c1 = getch ();
1778 if (c1 == '=' && type < CPP_LAST_EQ)
1779 return type + (CPP_EQ_EQ - CPP_EQ);
1780 else if (c == c1)
1781 switch (c)
1783 case '+': return CPP_PLUS_PLUS;
1784 case '-': return CPP_MINUS_MINUS;
1785 case '&': return CPP_AND_AND;
1786 case '|': return CPP_OR_OR;
1787 case ':':
1788 if (c_language == clk_cplusplus)
1789 return CPP_SCOPE;
1790 break;
1792 case '<': type = CPP_LSHIFT; goto do_triad;
1793 case '>': type = CPP_RSHIFT; goto do_triad;
1795 else
1796 switch (c)
1798 case '-':
1799 if (c1 == '>')
1801 if (c_language == clk_cplusplus)
1803 c1 = getch ();
1804 if (c1 == '*')
1805 return CPP_DEREF_STAR;
1806 put_back (c1);
1808 return CPP_DEREF;
1810 break;
1812 case '>':
1813 if (c1 == '?' && c_language == clk_cplusplus)
1814 { type = CPP_MAX; goto do_triad; }
1815 break;
1817 case '<':
1818 if (c1 == ':' && flag_digraphs)
1819 return CPP_OPEN_SQUARE;
1820 if (c1 == '%' && flag_digraphs)
1821 { indent_level++; return CPP_OPEN_BRACE; }
1822 if (c1 == '?' && c_language == clk_cplusplus)
1823 { type = CPP_MIN; goto do_triad; }
1824 break;
1826 case ':':
1827 if (c1 == '>' && flag_digraphs)
1828 return CPP_CLOSE_SQUARE;
1829 break;
1830 case '%':
1831 if (c1 == '>' && flag_digraphs)
1832 { indent_level--; return CPP_CLOSE_BRACE; }
1833 break;
1836 put_back (c1);
1837 return type;
1839 do_triad:
1840 c1 = getch ();
1841 if (c1 == '=')
1842 type += (CPP_EQ_EQ - CPP_EQ);
1843 else
1844 put_back (c1);
1845 return type;
1848 case '~': return CPP_COMPL;
1849 case '?': return CPP_QUERY;
1850 case ',': return CPP_COMMA;
1851 case '(': return CPP_OPEN_PAREN;
1852 case ')': return CPP_CLOSE_PAREN;
1853 case '[': return CPP_OPEN_SQUARE;
1854 case ']': return CPP_CLOSE_SQUARE;
1855 case '{': indent_level++; return CPP_OPEN_BRACE;
1856 case '}': indent_level--; return CPP_CLOSE_BRACE;
1857 case ';': return CPP_SEMICOLON;
1859 straychar:
1860 default:
1861 if (ISGRAPH (c))
1862 error ("stray '%c' in program", c);
1863 else
1864 error ("stray '\\%#o' in program", c);
1865 goto retry;
1867 /* NOTREACHED */
1868 #endif
1872 #define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
1874 static tree
1875 lex_number (str, len)
1876 const char *str;
1877 unsigned int len;
1879 int base = 10;
1880 int count = 0;
1881 int largest_digit = 0;
1882 int numdigits = 0;
1883 int overflow = 0;
1884 int c;
1885 tree value;
1886 const char *p;
1887 enum anon1 { NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON } floatflag = NOT_FLOAT;
1889 /* We actually store only HOST_BITS_PER_CHAR bits in each part.
1890 The code below which fills the parts array assumes that a host
1891 int is at least twice as wide as a host char, and that
1892 HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
1893 Two HOST_WIDE_INTs is the largest int literal we can store.
1894 In order to detect overflow below, the number of parts (TOTAL_PARTS)
1895 must be exactly the number of parts needed to hold the bits
1896 of two HOST_WIDE_INTs. */
1897 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
1898 unsigned int parts[TOTAL_PARTS];
1900 /* Optimize for most frequent case. */
1901 if (len == 1)
1903 if (*str == '0')
1904 return integer_zero_node;
1905 else if (*str == '1')
1906 return integer_one_node;
1907 else
1908 return build_int_2 (*str - '0', 0);
1911 for (count = 0; count < TOTAL_PARTS; count++)
1912 parts[count] = 0;
1914 /* len is known to be >1 at this point. */
1915 p = str;
1917 if (len > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
1919 base = 16;
1920 p = str + 2;
1922 /* The ISDIGIT check is so we are not confused by a suffix on 0. */
1923 else if (str[0] == '0' && ISDIGIT (str[1]))
1925 base = 8;
1926 p = str + 1;
1931 c = *p++;
1933 if (c == '.')
1935 if (base == 16 && pedantic && !flag_isoc99)
1936 pedwarn ("floating constant may not be in radix 16");
1937 else if (floatflag == AFTER_POINT)
1938 ERROR ("too many decimal points in floating constant");
1939 else if (floatflag == AFTER_EXPON)
1940 ERROR ("decimal point in exponent - impossible!");
1941 else
1942 floatflag = AFTER_POINT;
1944 if (base == 8)
1945 base = 10;
1947 else if (c == '_')
1948 /* Possible future extension: silently ignore _ in numbers,
1949 permitting cosmetic grouping - e.g. 0x8000_0000 == 0x80000000
1950 but somewhat easier to read. Ada has this? */
1951 ERROR ("underscore in number");
1952 else
1954 int n;
1955 /* It is not a decimal point.
1956 It should be a digit (perhaps a hex digit). */
1958 if (ISDIGIT (c))
1960 n = c - '0';
1962 else if (base <= 10 && (c == 'e' || c == 'E'))
1964 base = 10;
1965 floatflag = AFTER_EXPON;
1966 break;
1968 else if (base == 16 && (c == 'p' || c == 'P'))
1970 floatflag = AFTER_EXPON;
1971 break; /* start of exponent */
1973 else if (base == 16 && c >= 'a' && c <= 'f')
1975 n = c - 'a' + 10;
1977 else if (base == 16 && c >= 'A' && c <= 'F')
1979 n = c - 'A' + 10;
1981 else
1983 p--;
1984 break; /* start of suffix */
1987 if (n >= largest_digit)
1988 largest_digit = n;
1989 numdigits++;
1991 for (count = 0; count < TOTAL_PARTS; count++)
1993 parts[count] *= base;
1994 if (count)
1996 parts[count]
1997 += (parts[count-1] >> HOST_BITS_PER_CHAR);
1998 parts[count-1]
1999 &= (1 << HOST_BITS_PER_CHAR) - 1;
2001 else
2002 parts[0] += n;
2005 /* If the highest-order part overflows (gets larger than
2006 a host char will hold) then the whole number has
2007 overflowed. Record this and truncate the highest-order
2008 part. */
2009 if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR)
2011 overflow = 1;
2012 parts[TOTAL_PARTS - 1] &= (1 << HOST_BITS_PER_CHAR) - 1;
2016 while (p < str + len);
2018 /* This can happen on input like `int i = 0x;' */
2019 if (numdigits == 0)
2020 ERROR ("numeric constant with no digits");
2022 if (largest_digit >= base)
2023 ERROR ("numeric constant contains digits beyond the radix");
2025 if (floatflag != NOT_FLOAT)
2027 tree type;
2028 int imag, fflag, lflag, conversion_errno;
2029 REAL_VALUE_TYPE real;
2030 struct pf_args args;
2031 char *copy;
2033 if (base == 16 && floatflag != AFTER_EXPON)
2034 ERROR ("hexadecimal floating constant has no exponent");
2036 /* Read explicit exponent if any, and put it in tokenbuf. */
2037 if ((base == 10 && ((c == 'e') || (c == 'E')))
2038 || (base == 16 && (c == 'p' || c == 'P')))
2040 if (p < str + len)
2041 c = *p++;
2042 if (p < str + len && (c == '+' || c == '-'))
2043 c = *p++;
2044 /* Exponent is decimal, even if string is a hex float. */
2045 if (! ISDIGIT (c))
2046 ERROR ("floating constant exponent has no digits");
2047 while (p < str + len && ISDIGIT (c))
2048 c = *p++;
2049 if (! ISDIGIT (c))
2050 p--;
2053 /* Copy the float constant now; we don't want any suffixes in the
2054 string passed to parse_float. */
2055 copy = alloca (p - str + 1);
2056 memcpy (copy, str, p - str);
2057 copy[p - str] = '\0';
2059 /* Now parse suffixes. */
2060 fflag = lflag = imag = 0;
2061 while (p < str + len)
2062 switch (*p++)
2064 case 'f': case 'F':
2065 if (fflag)
2066 ERROR ("more than one 'f' suffix on floating constant");
2067 else if (warn_traditional && !in_system_header)
2068 warning ("traditional C rejects the 'f' suffix");
2070 fflag = 1;
2071 break;
2073 case 'l': case 'L':
2074 if (lflag)
2075 ERROR ("more than one 'l' suffix on floating constant");
2076 else if (warn_traditional && !in_system_header)
2077 warning ("traditional C rejects the 'l' suffix");
2079 lflag = 1;
2080 break;
2082 case 'i': case 'I':
2083 case 'j': case 'J':
2084 if (imag)
2085 ERROR ("more than one 'i' or 'j' suffix on floating constant");
2086 else if (pedantic)
2087 pedwarn ("ISO C forbids imaginary numeric constants");
2088 imag = 1;
2089 break;
2091 default:
2092 ERROR ("invalid suffix on floating constant");
2095 /* Setup input for parse_float() */
2096 args.str = copy;
2097 args.fflag = fflag;
2098 args.lflag = lflag;
2099 args.base = base;
2101 /* Convert string to a double, checking for overflow. */
2102 if (do_float_handler (parse_float, (PTR) &args))
2104 /* Receive output from parse_float() */
2105 real = args.value;
2107 else
2108 /* We got an exception from parse_float() */
2109 ERROR ("floating constant out of range");
2111 /* Receive output from parse_float() */
2112 conversion_errno = args.conversion_errno;
2113 type = args.type;
2115 #ifdef ERANGE
2116 /* ERANGE is also reported for underflow,
2117 so test the value to distinguish overflow from that. */
2118 if (conversion_errno == ERANGE && !flag_traditional && pedantic
2119 && (REAL_VALUES_LESS (dconst1, real)
2120 || REAL_VALUES_LESS (real, dconstm1)))
2121 warning ("floating point number exceeds range of 'double'");
2122 #endif
2124 /* Create a node with determined type and value. */
2125 if (imag)
2126 value = build_complex (NULL_TREE, convert (type, integer_zero_node),
2127 build_real (type, real));
2128 else
2129 value = build_real (type, real);
2131 else
2133 tree trad_type, ansi_type, type;
2134 HOST_WIDE_INT high, low;
2135 int spec_unsigned = 0;
2136 int spec_long = 0;
2137 int spec_long_long = 0;
2138 int spec_imag = 0;
2139 int suffix_lu = 0;
2140 int warn = 0, i;
2142 trad_type = ansi_type = type = NULL_TREE;
2143 while (p < str + len)
2145 c = *p++;
2146 switch (c)
2148 case 'u': case 'U':
2149 if (spec_unsigned)
2150 error ("two 'u' suffixes on integer constant");
2151 else if (warn_traditional && !in_system_header)
2152 warning ("traditional C rejects the 'u' suffix");
2154 spec_unsigned = 1;
2155 if (spec_long)
2156 suffix_lu = 1;
2157 break;
2159 case 'l': case 'L':
2160 if (spec_long)
2162 if (spec_long_long)
2163 error ("three 'l' suffixes on integer constant");
2164 else if (suffix_lu)
2165 error ("'lul' is not a valid integer suffix");
2166 else if (c != spec_long)
2167 error ("'Ll' and 'lL' are not valid integer suffixes");
2168 else if (pedantic && ! flag_isoc99
2169 && ! in_system_header && warn_long_long)
2170 pedwarn ("ISO C89 forbids long long integer constants");
2171 spec_long_long = 1;
2173 spec_long = c;
2174 break;
2176 case 'i': case 'I': case 'j': case 'J':
2177 if (spec_imag)
2178 error ("more than one 'i' or 'j' suffix on integer constant");
2179 else if (pedantic)
2180 pedwarn ("ISO C forbids imaginary numeric constants");
2181 spec_imag = 1;
2182 break;
2184 default:
2185 ERROR ("invalid suffix on integer constant");
2189 /* If the literal overflowed, pedwarn about it now. */
2190 if (overflow)
2192 warn = 1;
2193 pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT * 2);
2196 /* This is simplified by the fact that our constant
2197 is always positive. */
2199 high = low = 0;
2201 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
2203 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
2204 / HOST_BITS_PER_CHAR)]
2205 << (i * HOST_BITS_PER_CHAR));
2206 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
2209 value = build_int_2 (low, high);
2210 TREE_TYPE (value) = long_long_unsigned_type_node;
2212 /* If warn_traditional, calculate both the ISO type and the
2213 traditional type, then see if they disagree.
2214 Otherwise, calculate only the type for the dialect in use. */
2215 if (warn_traditional || flag_traditional)
2217 /* Calculate the traditional type. */
2218 /* Traditionally, any constant is signed; but if unsigned is
2219 specified explicitly, obey that. Use the smallest size
2220 with the right number of bits, except for one special
2221 case with decimal constants. */
2222 if (! spec_long && base != 10
2223 && int_fits_type_p (value, unsigned_type_node))
2224 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2225 /* A decimal constant must be long if it does not fit in
2226 type int. I think this is independent of whether the
2227 constant is signed. */
2228 else if (! spec_long && base == 10
2229 && int_fits_type_p (value, integer_type_node))
2230 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2231 else if (! spec_long_long)
2232 trad_type = (spec_unsigned
2233 ? long_unsigned_type_node
2234 : long_integer_type_node);
2235 else if (int_fits_type_p (value,
2236 spec_unsigned
2237 ? long_long_unsigned_type_node
2238 : long_long_integer_type_node))
2239 trad_type = (spec_unsigned
2240 ? long_long_unsigned_type_node
2241 : long_long_integer_type_node);
2242 else
2243 trad_type = (spec_unsigned
2244 ? widest_unsigned_literal_type_node
2245 : widest_integer_literal_type_node);
2247 if (warn_traditional || ! flag_traditional)
2249 /* Calculate the ISO type. */
2250 if (! spec_long && ! spec_unsigned
2251 && int_fits_type_p (value, integer_type_node))
2252 ansi_type = integer_type_node;
2253 else if (! spec_long && (base != 10 || spec_unsigned)
2254 && int_fits_type_p (value, unsigned_type_node))
2255 ansi_type = unsigned_type_node;
2256 else if (! spec_unsigned && !spec_long_long
2257 && int_fits_type_p (value, long_integer_type_node))
2258 ansi_type = long_integer_type_node;
2259 else if (! spec_long_long
2260 && int_fits_type_p (value, long_unsigned_type_node))
2261 ansi_type = long_unsigned_type_node;
2262 else if (! spec_unsigned
2263 && int_fits_type_p (value, long_long_integer_type_node))
2264 ansi_type = long_long_integer_type_node;
2265 else if (int_fits_type_p (value, long_long_unsigned_type_node))
2266 ansi_type = long_long_unsigned_type_node;
2267 else if (! spec_unsigned
2268 && int_fits_type_p (value, widest_integer_literal_type_node))
2269 ansi_type = widest_integer_literal_type_node;
2270 else
2271 ansi_type = widest_unsigned_literal_type_node;
2274 type = flag_traditional ? trad_type : ansi_type;
2276 /* We assume that constants specified in a non-decimal
2277 base are bit patterns, and that the programmer really
2278 meant what they wrote. */
2279 if (warn_traditional && !in_system_header
2280 && base == 10 && trad_type != ansi_type)
2282 if (TYPE_PRECISION (trad_type) != TYPE_PRECISION (ansi_type))
2283 warning ("width of integer constant changes with -traditional");
2284 else if (TREE_UNSIGNED (trad_type) != TREE_UNSIGNED (ansi_type))
2285 warning ("integer constant is unsigned in ISO C, signed with -traditional");
2286 else
2287 warning ("width of integer constant may change on other systems with -traditional");
2290 if (pedantic && !flag_traditional && (flag_isoc99 || !spec_long_long)
2291 && !warn
2292 && ((flag_isoc99
2293 ? TYPE_PRECISION (long_long_integer_type_node)
2294 : TYPE_PRECISION (long_integer_type_node)) < TYPE_PRECISION (type)))
2296 warn = 1;
2297 pedwarn ("integer constant larger than the maximum value of %s",
2298 (flag_isoc99
2299 ? (TREE_UNSIGNED (type)
2300 ? "an unsigned long long int"
2301 : "a long long int")
2302 : "an unsigned long int"));
2305 if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
2306 warning ("decimal constant is so large that it is unsigned");
2308 if (spec_imag)
2310 if (TYPE_PRECISION (type)
2311 <= TYPE_PRECISION (integer_type_node))
2312 value = build_complex (NULL_TREE, integer_zero_node,
2313 convert (integer_type_node, value));
2314 else
2315 ERROR ("complex integer constant is too wide for 'complex int'");
2317 else if (flag_traditional && !int_fits_type_p (value, type))
2318 /* The traditional constant 0x80000000 is signed
2319 but doesn't fit in the range of int.
2320 This will change it to -0x80000000, which does fit. */
2322 TREE_TYPE (value) = unsigned_type (type);
2323 value = convert (type, value);
2324 TREE_OVERFLOW (value) = TREE_CONSTANT_OVERFLOW (value) = 0;
2326 else
2327 TREE_TYPE (value) = type;
2329 /* If it's still an integer (not a complex), and it doesn't
2330 fit in the type we choose for it, then pedwarn. */
2332 if (! warn
2333 && TREE_CODE (TREE_TYPE (value)) == INTEGER_TYPE
2334 && ! int_fits_type_p (value, TREE_TYPE (value)))
2335 pedwarn ("integer constant is larger than the maximum value for its type");
2338 if (p < str + len)
2339 error ("missing white space after number '%.*s'", (int) (p - str), str);
2341 return value;
2343 syntax_error:
2344 return integer_zero_node;
2347 static tree
2348 lex_string (str, len, wide)
2349 const char *str;
2350 unsigned int len;
2351 int wide;
2353 tree value;
2354 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
2355 char *q = buf;
2356 const char *p = str, *limit = str + len;
2357 unsigned int c;
2358 unsigned width = wide ? WCHAR_TYPE_SIZE
2359 : TYPE_PRECISION (char_type_node);
2361 #ifdef MULTIBYTE_CHARS
2362 /* Reset multibyte conversion state. */
2363 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2364 #endif
2366 while (p < limit)
2368 #ifdef MULTIBYTE_CHARS
2369 wchar_t wc;
2370 int char_len;
2372 char_len = local_mbtowc (&wc, p, limit - p);
2373 if (char_len == -1)
2375 warning ("Ignoring invalid multibyte character");
2376 char_len = 1;
2377 c = *p++;
2379 else
2381 p += char_len;
2382 c = wc;
2384 #else
2385 c = *p++;
2386 #endif
2388 if (c == '\\' && !ignore_escape_flag)
2390 p = readescape (p, limit, &c);
2391 if (width < HOST_BITS_PER_INT
2392 && (unsigned) c >= ((unsigned)1 << width))
2393 pedwarn ("escape sequence out of range for character");
2396 /* Add this single character into the buffer either as a wchar_t
2397 or as a single byte. */
2398 if (wide)
2400 unsigned charwidth = TYPE_PRECISION (char_type_node);
2401 unsigned bytemask = (1 << charwidth) - 1;
2402 int byte;
2404 for (byte = 0; byte < WCHAR_BYTES; ++byte)
2406 int n;
2407 if (byte >= (int) sizeof (c))
2408 n = 0;
2409 else
2410 n = (c >> (byte * charwidth)) & bytemask;
2411 if (BYTES_BIG_ENDIAN)
2412 q[WCHAR_BYTES - byte - 1] = n;
2413 else
2414 q[byte] = n;
2416 q += WCHAR_BYTES;
2418 else
2420 *q++ = c;
2424 /* Terminate the string value, either with a single byte zero
2425 or with a wide zero. */
2427 if (wide)
2429 memset (q, 0, WCHAR_BYTES);
2430 q += WCHAR_BYTES;
2432 else
2434 *q++ = '\0';
2437 value = build_string (q - buf, buf);
2439 if (wide)
2440 TREE_TYPE (value) = wchar_array_type_node;
2441 else
2442 TREE_TYPE (value) = char_array_type_node;
2443 return value;
2446 static tree
2447 lex_charconst (str, len, wide)
2448 const char *str;
2449 unsigned int len;
2450 int wide;
2452 const char *limit = str + len;
2453 int result = 0;
2454 int num_chars = 0;
2455 int chars_seen = 0;
2456 unsigned width = TYPE_PRECISION (char_type_node);
2457 int max_chars;
2458 unsigned int c;
2459 tree value;
2461 #ifdef MULTIBYTE_CHARS
2462 int longest_char = local_mb_cur_max ();
2463 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2464 #endif
2466 max_chars = TYPE_PRECISION (integer_type_node) / width;
2467 if (wide)
2468 width = WCHAR_TYPE_SIZE;
2470 while (str < limit)
2472 #ifdef MULTIBYTE_CHARS
2473 wchar_t wc;
2474 int char_len;
2476 char_len = local_mbtowc (&wc, str, limit - str);
2477 if (char_len == -1)
2479 warning ("Ignoring invalid multibyte character");
2480 char_len = 1;
2481 c = *str++;
2483 else
2485 p += char_len;
2486 c = wc;
2488 #else
2489 c = *str++;
2490 #endif
2492 ++chars_seen;
2493 if (c == '\\')
2495 str = readescape (str, limit, &c);
2496 if (width < HOST_BITS_PER_INT
2497 && (unsigned) c >= ((unsigned)1 << width))
2498 pedwarn ("escape sequence out of range for character");
2500 #ifdef MAP_CHARACTER
2501 if (ISPRINT (c))
2502 c = MAP_CHARACTER (c);
2503 #endif
2505 /* Merge character into result; ignore excess chars. */
2506 num_chars += (width / TYPE_PRECISION (char_type_node));
2507 if (num_chars < max_chars + 1)
2509 if (width < HOST_BITS_PER_INT)
2510 result = (result << width) | (c & ((1 << width) - 1));
2511 else
2512 result = c;
2516 if (chars_seen == 0)
2517 error ("empty character constant");
2518 else if (num_chars > max_chars)
2520 num_chars = max_chars;
2521 error ("character constant too long");
2523 else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
2524 warning ("multi-character character constant");
2526 /* If char type is signed, sign-extend the constant. */
2527 if (! wide)
2529 int num_bits = num_chars * width;
2530 if (num_bits == 0)
2531 /* We already got an error; avoid invalid shift. */
2532 value = build_int_2 (0, 0);
2533 else if (TREE_UNSIGNED (char_type_node)
2534 || ((result >> (num_bits - 1)) & 1) == 0)
2535 value = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
2536 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2538 else
2539 value = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
2540 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2541 -1);
2542 /* In C, a character constant has type 'int'; in C++, 'char'. */
2543 if (chars_seen <= 1 && c_language == clk_cplusplus)
2544 TREE_TYPE (value) = char_type_node;
2545 else
2546 TREE_TYPE (value) = integer_type_node;
2548 else
2550 value = build_int_2 (result, 0);
2551 TREE_TYPE (value) = wchar_type_node;
2554 return value;
2557 /* Mark for GC a node in a splay tree whose keys are strings. */
2559 static int
2560 mark_splay_tree_node (n, data)
2561 splay_tree_node n;
2562 void *data ATTRIBUTE_UNUSED;
2564 ggc_mark_string ((char *) n->key);
2565 return 0;
2568 /* Mark for GC a splay tree whose keys are strings. */
2570 static void
2571 mark_splay_tree (p)
2572 void *p;
2574 splay_tree st = *(splay_tree *) p;
2576 splay_tree_foreach (st, mark_splay_tree_node, NULL);