* config/i386/i386.md (mmx_pinsrw): Output operands in correct
[official-gcc.git] / gcc / c-lex.c
blob42b8c3978dada6353beeef7b6b01ee0364965c43
1 /* Lexical analyzer for C and Objective C.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "tree.h"
28 #include "input.h"
29 #include "output.h"
30 #include "c-lex.h"
31 #include "c-tree.h"
32 #include "flags.h"
33 #include "timevar.h"
34 #include "cpplib.h"
35 #include "c-pragma.h"
36 #include "toplev.h"
37 #include "intl.h"
38 #include "ggc.h"
39 #include "tm_p.h"
40 #include "splay-tree.h"
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45 #ifdef CROSS_COMPILE
46 #undef MULTIBYTE_CHARS
47 #endif
49 #ifdef MULTIBYTE_CHARS
50 #include "mbchar.h"
51 #include <locale.h>
52 #endif /* MULTIBYTE_CHARS */
53 #ifndef GET_ENVIRONMENT
54 #define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
55 #endif
57 #if USE_CPPLIB
58 extern cpp_reader parse_in;
59 #else
60 /* Stream for reading from the input file. */
61 FILE *finput;
62 #endif
64 /* Private idea of the line number. See discussion in c_lex(). */
65 static int lex_lineno;
67 /* We may keep statistics about how long which files took to compile. */
68 static int header_time, body_time;
69 static splay_tree file_info_tree;
71 /* Cause the `yydebug' variable to be defined. */
72 #define YYDEBUG 1
74 #if !USE_CPPLIB
76 struct putback_buffer
78 unsigned char *buffer;
79 int buffer_size;
80 int index;
83 static struct putback_buffer putback = {NULL, 0, -1};
85 static inline int getch PARAMS ((void));
87 static inline int
88 getch ()
90 if (putback.index != -1)
92 int ch = putback.buffer[putback.index];
93 --putback.index;
94 return ch;
96 return getc (finput);
99 static inline void put_back PARAMS ((int));
101 static inline void
102 put_back (ch)
103 int ch;
105 if (ch != EOF)
107 if (putback.index == putback.buffer_size - 1)
109 putback.buffer_size += 16;
110 putback.buffer = xrealloc (putback.buffer, putback.buffer_size);
112 putback.buffer[++putback.index] = ch;
116 int linemode;
118 #endif
120 /* File used for outputting assembler code. */
121 extern FILE *asm_out_file;
123 #undef WCHAR_TYPE_SIZE
124 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
126 /* Number of bytes in a wide character. */
127 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
129 #if !USE_CPPLIB
130 static int maxtoken; /* Current nominal length of token buffer. */
131 static char *token_buffer; /* Pointer to token buffer.
132 Actual allocated length is maxtoken + 2. */
133 #endif
135 int indent_level; /* Number of { minus number of }. */
136 int pending_lang_change; /* If we need to switch languages - C++ only */
137 int c_header_level; /* depth in C headers - C++ only */
139 /* Nonzero tells yylex to ignore \ in string constants. */
140 static int ignore_escape_flag;
142 static const char *readescape PARAMS ((const char *, const char *,
143 unsigned int *));
144 static const char *read_ucs PARAMS ((const char *, const char *,
145 unsigned int *, int));
146 static void parse_float PARAMS ((PTR));
147 static tree lex_number PARAMS ((const char *, unsigned int));
148 static tree lex_string PARAMS ((const char *, unsigned int, int));
149 static tree lex_charconst PARAMS ((const char *, unsigned int, int));
150 static void update_header_times PARAMS ((const char *));
151 static int dump_one_header PARAMS ((splay_tree_node, void *));
152 static int mark_splay_tree_node PARAMS ((splay_tree_node, void *));
153 static void mark_splay_tree PARAMS ((void *));
155 #if !USE_CPPLIB
156 static int skip_white_space PARAMS ((int));
157 static char *extend_token_buffer PARAMS ((const char *));
158 static void extend_token_buffer_to PARAMS ((int));
159 static int read_line_number PARAMS ((int *));
160 static void process_directive PARAMS ((void));
161 #else
162 static void cb_ident PARAMS ((cpp_reader *, const cpp_string *));
163 static void cb_enter_file PARAMS ((cpp_reader *));
164 static void cb_leave_file PARAMS ((cpp_reader *));
165 static void cb_rename_file PARAMS ((cpp_reader *));
166 static void cb_def_pragma PARAMS ((cpp_reader *));
167 #endif
170 const char *
171 init_c_lex (filename)
172 const char *filename;
174 struct c_fileinfo *toplevel;
176 /* Set up filename timing. Must happen before cpp_start_read. */
177 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
179 (splay_tree_delete_value_fn)free);
180 /* Make sure to mark the filenames in the tree for GC. */
181 ggc_add_root (&file_info_tree, 1, sizeof (file_info_tree),
182 mark_splay_tree);
183 toplevel = get_fileinfo (ggc_strdup ("<top level>"));
184 if (flag_detailed_statistics)
186 header_time = 0;
187 body_time = get_run_time ();
188 toplevel->time = body_time;
191 #ifdef MULTIBYTE_CHARS
192 /* Change to the native locale for multibyte conversions. */
193 setlocale (LC_CTYPE, "");
194 GET_ENVIRONMENT (literal_codeset, "LANG");
195 #endif
197 #if !USE_CPPLIB
198 /* Open input file. */
199 if (filename == 0 || !strcmp (filename, "-"))
201 finput = stdin;
202 filename = "stdin";
204 else
205 finput = fopen (filename, "r");
206 if (finput == 0)
207 pfatal_with_name (filename);
209 #ifdef IO_BUFFER_SIZE
210 setvbuf (finput, (char *) xmalloc (IO_BUFFER_SIZE), _IOFBF, IO_BUFFER_SIZE);
211 #endif
212 #else /* !USE_CPPLIB */
214 parse_in.cb.ident = cb_ident;
215 parse_in.cb.enter_file = cb_enter_file;
216 parse_in.cb.leave_file = cb_leave_file;
217 parse_in.cb.rename_file = cb_rename_file;
218 parse_in.cb.def_pragma = cb_def_pragma;
220 /* Make sure parse_in.digraphs matches flag_digraphs. */
221 CPP_OPTION (&parse_in, digraphs) = flag_digraphs;
223 if (! cpp_start_read (&parse_in, filename))
224 exit (FATAL_EXIT_CODE); /* cpplib has emitted an error. */
226 if (filename == 0 || !strcmp (filename, "-"))
227 filename = "stdin";
228 #endif
230 #if !USE_CPPLIB
231 maxtoken = 40;
232 token_buffer = (char *) xmalloc (maxtoken + 2);
233 #endif
234 /* Start it at 0, because check_newline is called at the very beginning
235 and will increment it to 1. */
236 lineno = lex_lineno = 0;
238 return filename;
241 struct c_fileinfo *
242 get_fileinfo (name)
243 const char *name;
245 splay_tree_node n;
246 struct c_fileinfo *fi;
248 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
249 if (n)
250 return (struct c_fileinfo *) n->value;
252 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
253 fi->time = 0;
254 fi->interface_only = 0;
255 fi->interface_unknown = 1;
256 splay_tree_insert (file_info_tree, (splay_tree_key) name,
257 (splay_tree_value) fi);
258 return fi;
261 static void
262 update_header_times (name)
263 const char *name;
265 /* Changing files again. This means currently collected time
266 is charged against header time, and body time starts back at 0. */
267 if (flag_detailed_statistics)
269 int this_time = get_run_time ();
270 struct c_fileinfo *file = get_fileinfo (name);
271 header_time += this_time - body_time;
272 file->time += this_time - body_time;
273 body_time = this_time;
277 static int
278 dump_one_header (n, dummy)
279 splay_tree_node n;
280 void *dummy ATTRIBUTE_UNUSED;
282 print_time ((const char *) n->key,
283 ((struct c_fileinfo *) n->value)->time);
284 return 0;
287 void
288 dump_time_statistics ()
290 struct c_fileinfo *file = get_fileinfo (input_filename);
291 int this_time = get_run_time ();
292 file->time += this_time - body_time;
294 fprintf (stderr, "\n******\n");
295 print_time ("header files (total)", header_time);
296 print_time ("main file (total)", this_time - body_time);
297 fprintf (stderr, "ratio = %g : 1\n",
298 (double)header_time / (double)(this_time - body_time));
299 fprintf (stderr, "\n******\n");
301 splay_tree_foreach (file_info_tree, dump_one_header, 0);
304 #if !USE_CPPLIB
306 /* If C is not whitespace, return C.
307 Otherwise skip whitespace and return first nonwhite char read. */
309 static int
310 skip_white_space (c)
311 register int c;
313 for (;;)
315 switch (c)
317 /* There is no need to process comments or backslash-newline
318 here. None can occur in the output of cpp. Do handle \r
319 in case someone sent us a .i file. */
321 case '\n':
322 if (linemode)
324 put_back (c);
325 return EOF;
327 c = check_newline ();
328 break;
330 case '\r':
331 /* Per C99, horizontal whitespace is just these four characters. */
332 case ' ':
333 case '\t':
334 case '\f':
335 case '\v':
336 c = getch ();
337 break;
339 case '\\':
340 error ("stray '\\' in program");
341 c = getch ();
342 break;
344 default:
345 return (c);
350 /* Skips all of the white space at the current location in the input file. */
352 void
353 position_after_white_space ()
355 register int c;
357 c = getch ();
359 put_back (skip_white_space (c));
362 /* Make the token buffer longer, preserving the data in it.
363 P should point to just beyond the last valid character in the old buffer.
364 The value we return is a pointer to the new buffer
365 at a place corresponding to P. */
367 static void
368 extend_token_buffer_to (size)
369 int size;
372 maxtoken = maxtoken * 2 + 10;
373 while (maxtoken < size);
374 token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
377 static char *
378 extend_token_buffer (p)
379 const char *p;
381 int offset = p - token_buffer;
382 extend_token_buffer_to (offset);
383 return token_buffer + offset;
387 static int
388 read_line_number (num)
389 int *num;
391 tree value;
392 enum cpp_ttype token = c_lex (&value);
394 if (token == CPP_NUMBER && TREE_CODE (value) == INTEGER_CST)
396 *num = TREE_INT_CST_LOW (value);
397 return 1;
399 else
401 if (token != CPP_EOF)
402 error ("invalid #-line");
403 return 0;
407 /* At the beginning of a line, increment the line number
408 and process any #-directive on this line.
409 If the line is a #-directive, read the entire line and return a newline.
410 Otherwise, return the line's first non-whitespace character. */
413 check_newline ()
415 register int c;
417 /* Loop till we get a nonblank, non-directive line. */
418 for (;;)
420 /* Read first nonwhite char on the line. */
422 c = getch ();
423 while (c == ' ' || c == '\t');
425 lex_lineno++;
426 if (c == '#')
428 process_directive ();
429 return '\n';
432 else if (c != '\n')
433 break;
435 return c;
438 static void
439 process_directive ()
441 enum cpp_ttype token;
442 tree value;
443 int saw_line;
444 enum { act_none, act_push, act_pop } action;
445 int action_number, l;
446 char *new_file;
447 #ifndef NO_IMPLICIT_EXTERN_C
448 int entering_c_header = 0;
449 #endif
451 /* Don't read beyond this line. */
452 saw_line = 0;
453 linemode = 1;
455 token = c_lex (&value);
457 if (token == CPP_NAME)
459 /* If a letter follows, then if the word here is `line', skip
460 it and ignore it; otherwise, ignore the line, with an error
461 if the word isn't `pragma'. */
463 const char *name = IDENTIFIER_POINTER (value);
465 if (!strcmp (name, "pragma"))
467 dispatch_pragma ();
468 goto skipline;
470 else if (!strcmp (name, "define"))
472 debug_define (lex_lineno, GET_DIRECTIVE_LINE ());
473 goto skipline;
475 else if (!strcmp (name, "undef"))
477 debug_undef (lex_lineno, GET_DIRECTIVE_LINE ());
478 goto skipline;
480 else if (!strcmp (name, "line"))
482 saw_line = 1;
483 token = c_lex (&value);
484 goto linenum;
486 else if (!strcmp (name, "ident"))
488 /* #ident. We expect a string constant here.
489 The pedantic warning and syntax error are now in cpp. */
491 token = c_lex (&value);
492 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
493 goto skipline;
495 #ifdef ASM_OUTPUT_IDENT
496 if (! flag_no_ident)
498 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
500 #endif
502 /* Skip the rest of this line. */
503 goto skipline;
506 error ("undefined or invalid # directive `%s'", name);
507 goto skipline;
510 /* If the # is the only nonwhite char on the line,
511 just ignore it. Check the new newline. */
512 if (token == CPP_EOF)
513 goto skipline;
515 linenum:
516 /* Here we have either `#line' or `# <nonletter>'.
517 In either case, it should be a line number; a digit should follow. */
519 if (token != CPP_NUMBER || TREE_CODE (value) != INTEGER_CST)
521 error ("invalid #-line");
522 goto skipline;
525 /* subtract one, because it is the following line that
526 gets the specified number */
528 l = TREE_INT_CST_LOW (value) - 1;
530 /* More follows: it must be a string constant (filename).
531 It would be neat to use cpplib to quickly process the string, but
532 (1) we don't have a handy tokenization of the string, and
533 (2) I don't know how well that would work in the presense
534 of filenames that contain wide characters. */
536 if (saw_line)
538 /* Don't treat \ as special if we are processing #line 1 "...".
539 If you want it to be treated specially, use # 1 "...". */
540 ignore_escape_flag = 1;
543 /* Read the string constant. */
544 token = c_lex (&value);
546 ignore_escape_flag = 0;
548 if (token == CPP_EOF)
550 /* No more: store the line number and check following line. */
551 lex_lineno = l;
552 goto skipline;
555 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
557 error ("invalid #line");
558 goto skipline;
561 new_file = TREE_STRING_POINTER (value);
563 if (main_input_filename == 0)
564 main_input_filename = new_file;
566 action = act_none;
567 action_number = 0;
569 /* Each change of file name
570 reinitializes whether we are now in a system header. */
571 in_system_header = 0;
573 if (!read_line_number (&action_number))
575 /* Update the name in the top element of input_file_stack. */
576 if (input_file_stack)
577 input_file_stack->name = input_filename;
580 /* `1' after file name means entering new file.
581 `2' after file name means just left a file. */
583 if (action_number == 1)
585 action = act_push;
586 read_line_number (&action_number);
588 else if (action_number == 2)
590 action = act_pop;
591 read_line_number (&action_number);
593 if (action_number == 3)
595 /* `3' after file name means this is a system header file. */
596 in_system_header = 1;
597 read_line_number (&action_number);
599 #ifndef NO_IMPLICIT_EXTERN_C
600 if (action_number == 4)
602 /* `4' after file name means this is a C header file. */
603 entering_c_header = 1;
604 read_line_number (&action_number);
606 #endif
608 /* Do the actions implied by the preceding numbers. */
609 if (action == act_push)
611 lineno = lex_lineno;
612 push_srcloc (input_filename, 1);
613 input_file_stack->indent_level = indent_level;
614 debug_start_source_file (input_filename);
615 #ifndef NO_IMPLICIT_EXTERN_C
616 if (c_header_level)
617 ++c_header_level;
618 else if (entering_c_header)
620 c_header_level = 1;
621 ++pending_lang_change;
623 #endif
625 else if (action == act_pop)
627 /* Popping out of a file. */
628 if (input_file_stack->next)
630 #ifndef NO_IMPLICIT_EXTERN_C
631 if (c_header_level && --c_header_level == 0)
633 if (entering_c_header)
634 warning ("badly nested C headers from preprocessor");
635 --pending_lang_change;
637 #endif
638 #if 0
639 if (indent_level != input_file_stack->indent_level)
641 warning_with_file_and_line
642 (input_filename, lex_lineno,
643 "This file contains more '%c's than '%c's.",
644 indent_level > input_file_stack->indent_level ? '{' : '}',
645 indent_level > input_file_stack->indent_level ? '}' : '{');
647 #endif
648 pop_srcloc ();
649 debug_end_source_file (input_file_stack->line);
651 else
652 error ("#-lines for entering and leaving files don't match");
655 update_header_times (new_file);
657 input_filename = new_file;
658 lex_lineno = l;
660 /* Hook for C++. */
661 extract_interface_info ();
663 /* skip the rest of this line. */
664 skipline:
665 linemode = 0;
667 while (getch () != '\n');
669 #else /* USE_CPPLIB */
671 /* Not yet handled: #pragma, #define, #undef.
672 No need to deal with linemarkers under normal conditions. */
674 static void
675 cb_ident (pfile, str)
676 cpp_reader *pfile ATTRIBUTE_UNUSED;
677 const cpp_string *str;
679 #ifdef ASM_OUTPUT_IDENT
680 if (! flag_no_ident)
682 /* Convert escapes in the string. */
683 tree value = lex_string ((const char *)str->text, str->len, 0);
684 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
686 #endif
689 static void
690 cb_enter_file (pfile)
691 cpp_reader *pfile;
693 cpp_buffer *ip = CPP_BUFFER (pfile);
694 /* Bleah, need a better interface to this. */
695 const char *flags = cpp_syshdr_flags (pfile, ip);
697 /* Mustn't stack the main buffer on the input stack. (Ick.) */
698 if (ip->prev)
700 lex_lineno = lineno = ip->prev->lineno - 1;
701 push_srcloc (ggc_alloc_string (ip->nominal_fname, -1), 1);
702 input_file_stack->indent_level = indent_level;
703 debug_start_source_file (ip->nominal_fname);
705 else
706 lex_lineno = 1;
708 update_header_times (ip->nominal_fname);
710 /* Hook for C++. */
711 extract_interface_info ();
713 in_system_header = (flags[0] != 0);
714 #ifndef NO_IMPLICIT_EXTERN_C
715 if (c_header_level)
716 ++c_header_level;
717 else if (flags[2] != 0)
719 c_header_level = 1;
720 ++pending_lang_change;
722 #endif
725 static void
726 cb_leave_file (pfile)
727 cpp_reader *pfile;
729 /* Bleah, need a better interface to this. */
730 const char *flags = cpp_syshdr_flags (pfile, CPP_BUFFER (pfile));
731 #if 0
732 if (indent_level != input_file_stack->indent_level)
734 warning_with_file_and_line
735 (input_filename, lex_lineno,
736 "This file contains more '%c's than '%c's.",
737 indent_level > input_file_stack->indent_level ? '{' : '}',
738 indent_level > input_file_stack->indent_level ? '}' : '{');
740 #endif
741 /* We get called for the main buffer, but we mustn't pop it. */
742 if (input_file_stack->next)
743 pop_srcloc ();
744 in_system_header = (flags[0] != 0);
745 #ifndef NO_IMPLICIT_EXTERN_C
746 if (c_header_level && --c_header_level == 0)
748 if (flags[2] != 0)
749 warning ("badly nested C headers from preprocessor");
750 --pending_lang_change;
752 #endif
753 lex_lineno = CPP_BUFFER (pfile)->lineno;
754 debug_end_source_file (input_file_stack->line);
756 update_header_times (input_file_stack->name);
757 /* Hook for C++. */
758 extract_interface_info ();
761 static void
762 cb_rename_file (pfile)
763 cpp_reader *pfile;
765 cpp_buffer *ip = CPP_BUFFER (pfile);
766 /* Bleah, need a better interface to this. */
767 const char *flags = cpp_syshdr_flags (pfile, ip);
768 input_filename = ggc_alloc_string (ip->nominal_fname, -1);
769 lex_lineno = ip->lineno;
770 in_system_header = (flags[0] != 0);
772 update_header_times (ip->nominal_fname);
773 /* Hook for C++. */
774 extract_interface_info ();
777 static void
778 cb_def_pragma (pfile)
779 cpp_reader *pfile;
781 /* Issue a warning message if we have been asked to do so. Ignore
782 unknown pragmas in system headers unless an explicit
783 -Wunknown-pragmas has been given. */
784 if (warn_unknown_pragmas > in_system_header)
786 const unsigned char *space, *name = 0;
787 cpp_token s;
789 cpp_get_token (pfile, &s);
790 space = cpp_token_as_text (pfile, &s);
791 cpp_get_token (pfile, &s);
792 if (s.type == CPP_NAME)
793 name = cpp_token_as_text (pfile, &s);
795 if (name)
796 warning ("ignoring #pragma %s %s", space, name);
797 else
798 warning ("ignoring #pragma %s", space);
801 #endif /* USE_CPPLIB */
803 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.
805 [lex.charset]: The character designated by the universal-character-name
806 \UNNNNNNNN is that character whose character short name in ISO/IEC 10646
807 is NNNNNNNN; the character designated by the universal-character-name
808 \uNNNN is that character whose character short name in ISO/IEC 10646 is
809 0000NNNN. If the hexadecimal value for a universal character name is
810 less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the
811 universal character name designates a character in the basic source
812 character set, then the program is ill-formed.
814 We assume that wchar_t is Unicode, so we don't need to do any
815 mapping. Is this ever wrong? */
817 static const char *
818 read_ucs (p, limit, cptr, length)
819 const char *p;
820 const char *limit;
821 unsigned int *cptr;
822 int length;
824 unsigned int code = 0;
825 int c;
827 for (; length; --length)
829 if (p >= limit)
831 error ("incomplete universal-character-name");
832 break;
835 c = *p++;
836 if (! ISXDIGIT (c))
838 error ("non hex digit '%c' in universal-character-name", c);
839 p--;
840 break;
843 code <<= 4;
844 if (c >= 'a' && c <= 'f')
845 code += c - 'a' + 10;
846 if (c >= 'A' && c <= 'F')
847 code += c - 'A' + 10;
848 if (c >= '0' && c <= '9')
849 code += c - '0';
852 #ifdef TARGET_EBCDIC
853 sorry ("universal-character-name on EBCDIC target");
854 *cptr = 0x3f; /* EBCDIC invalid character */
855 return p;
856 #endif
858 if (code > 0x9f && !(code & 0x80000000))
859 /* True extended character, OK. */;
860 else if (code >= 0x20 && code < 0x7f)
862 /* ASCII printable character. The C character set consists of all of
863 these except $, @ and `. We use hex escapes so that this also
864 works with EBCDIC hosts. */
865 if (code != 0x24 && code != 0x40 && code != 0x60)
866 error ("universal-character-name used for '%c'", code);
868 else
869 error ("invalid universal-character-name");
871 *cptr = code;
872 return p;
875 /* Read an escape sequence and write its character equivalent into *CPTR.
876 P is the input pointer, which is just after the backslash. LIMIT
877 is how much text we have.
878 Returns the updated input pointer. */
880 static const char *
881 readescape (p, limit, cptr)
882 const char *p;
883 const char *limit;
884 unsigned int *cptr;
886 unsigned int c, code, count;
887 unsigned firstdig = 0;
888 int nonnull;
890 if (p == limit)
892 /* cpp has already issued an error for this. */
893 *cptr = 0;
894 return p;
897 c = *p++;
899 switch (c)
901 case 'x':
902 if (warn_traditional && !in_system_header)
903 warning ("the meaning of `\\x' varies with -traditional");
905 if (flag_traditional)
907 *cptr = 'x';
908 return p;
911 code = 0;
912 count = 0;
913 nonnull = 0;
914 while (p < limit)
916 c = *p++;
917 if (! ISXDIGIT (c))
919 p--;
920 break;
922 code *= 16;
923 if (c >= 'a' && c <= 'f')
924 code += c - 'a' + 10;
925 if (c >= 'A' && c <= 'F')
926 code += c - 'A' + 10;
927 if (c >= '0' && c <= '9')
928 code += c - '0';
929 if (code != 0 || count != 0)
931 if (count == 0)
932 firstdig = code;
933 count++;
935 nonnull = 1;
937 if (! nonnull)
939 warning ("\\x used with no following hex digits");
940 *cptr = 'x';
941 return p;
943 else if (count == 0)
944 /* Digits are all 0's. Ok. */
946 else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
947 || (count > 1
948 && (((unsigned)1
949 << (TYPE_PRECISION (integer_type_node)
950 - (count - 1) * 4))
951 <= firstdig)))
952 pedwarn ("hex escape out of range");
953 *cptr = code;
954 return p;
956 case '0': case '1': case '2': case '3': case '4':
957 case '5': case '6': case '7':
958 code = 0;
959 for (count = 0; count < 3; count++)
961 if (c < '0' || c > '7')
963 p--;
964 break;
966 code = (code * 8) + (c - '0');
967 if (p == limit)
968 break;
969 c = *p++;
972 if (count == 3)
973 p--;
975 *cptr = code;
976 return p;
978 case '\\': case '\'': case '"': case '?':
979 *cptr = c;
980 return p;
982 case 'n': *cptr = TARGET_NEWLINE; return p;
983 case 't': *cptr = TARGET_TAB; return p;
984 case 'r': *cptr = TARGET_CR; return p;
985 case 'f': *cptr = TARGET_FF; return p;
986 case 'b': *cptr = TARGET_BS; return p;
987 case 'v': *cptr = TARGET_VT; return p;
988 case 'a':
989 if (warn_traditional && !in_system_header)
990 warning ("the meaning of '\\a' varies with -traditional");
991 *cptr = flag_traditional ? c : TARGET_BELL;
992 return p;
994 /* Warnings and support checks handled by read_ucs(). */
995 case 'u': case 'U':
996 if (c_language != clk_cplusplus && !flag_isoc99)
997 break;
999 if (warn_traditional && !in_system_header)
1000 warning ("the meaning of '\\%c' varies with -traditional", c);
1002 return read_ucs (p, limit, cptr, c == 'u' ? 4 : 8);
1004 case 'e': case 'E':
1005 if (pedantic)
1006 pedwarn ("non-ISO-standard escape sequence, '\\%c'", c);
1007 *cptr = TARGET_ESC; return p;
1009 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1010 '\%' is used to prevent SCCS from getting confused. */
1011 case '(': case '{': case '[': case '%':
1012 if (pedantic)
1013 pedwarn ("unknown escape sequence '\\%c'", c);
1014 *cptr = c;
1015 return p;
1018 if (ISGRAPH (c))
1019 pedwarn ("unknown escape sequence '\\%c'", c);
1020 else
1021 pedwarn ("unknown escape sequence: '\\' followed by char 0x%x", c);
1023 *cptr = c;
1024 return p;
1027 #if 0 /* not yet */
1028 /* Returns nonzero if C is a universal-character-name. Give an error if it
1029 is not one which may appear in an identifier, as per [extendid].
1031 Note that extended character support in identifiers has not yet been
1032 implemented. It is my personal opinion that this is not a desirable
1033 feature. Portable code cannot count on support for more than the basic
1034 identifier character set. */
1036 static inline int
1037 is_extended_char (c)
1038 int c;
1040 #ifdef TARGET_EBCDIC
1041 return 0;
1042 #else
1043 /* ASCII. */
1044 if (c < 0x7f)
1045 return 0;
1047 /* None of the valid chars are outside the Basic Multilingual Plane (the
1048 low 16 bits). */
1049 if (c > 0xffff)
1051 error ("universal-character-name '\\U%08x' not valid in identifier", c);
1052 return 1;
1055 /* Latin */
1056 if ((c >= 0x00c0 && c <= 0x00d6)
1057 || (c >= 0x00d8 && c <= 0x00f6)
1058 || (c >= 0x00f8 && c <= 0x01f5)
1059 || (c >= 0x01fa && c <= 0x0217)
1060 || (c >= 0x0250 && c <= 0x02a8)
1061 || (c >= 0x1e00 && c <= 0x1e9a)
1062 || (c >= 0x1ea0 && c <= 0x1ef9))
1063 return 1;
1065 /* Greek */
1066 if ((c == 0x0384)
1067 || (c >= 0x0388 && c <= 0x038a)
1068 || (c == 0x038c)
1069 || (c >= 0x038e && c <= 0x03a1)
1070 || (c >= 0x03a3 && c <= 0x03ce)
1071 || (c >= 0x03d0 && c <= 0x03d6)
1072 || (c == 0x03da)
1073 || (c == 0x03dc)
1074 || (c == 0x03de)
1075 || (c == 0x03e0)
1076 || (c >= 0x03e2 && c <= 0x03f3)
1077 || (c >= 0x1f00 && c <= 0x1f15)
1078 || (c >= 0x1f18 && c <= 0x1f1d)
1079 || (c >= 0x1f20 && c <= 0x1f45)
1080 || (c >= 0x1f48 && c <= 0x1f4d)
1081 || (c >= 0x1f50 && c <= 0x1f57)
1082 || (c == 0x1f59)
1083 || (c == 0x1f5b)
1084 || (c == 0x1f5d)
1085 || (c >= 0x1f5f && c <= 0x1f7d)
1086 || (c >= 0x1f80 && c <= 0x1fb4)
1087 || (c >= 0x1fb6 && c <= 0x1fbc)
1088 || (c >= 0x1fc2 && c <= 0x1fc4)
1089 || (c >= 0x1fc6 && c <= 0x1fcc)
1090 || (c >= 0x1fd0 && c <= 0x1fd3)
1091 || (c >= 0x1fd6 && c <= 0x1fdb)
1092 || (c >= 0x1fe0 && c <= 0x1fec)
1093 || (c >= 0x1ff2 && c <= 0x1ff4)
1094 || (c >= 0x1ff6 && c <= 0x1ffc))
1095 return 1;
1097 /* Cyrillic */
1098 if ((c >= 0x0401 && c <= 0x040d)
1099 || (c >= 0x040f && c <= 0x044f)
1100 || (c >= 0x0451 && c <= 0x045c)
1101 || (c >= 0x045e && c <= 0x0481)
1102 || (c >= 0x0490 && c <= 0x04c4)
1103 || (c >= 0x04c7 && c <= 0x04c8)
1104 || (c >= 0x04cb && c <= 0x04cc)
1105 || (c >= 0x04d0 && c <= 0x04eb)
1106 || (c >= 0x04ee && c <= 0x04f5)
1107 || (c >= 0x04f8 && c <= 0x04f9))
1108 return 1;
1110 /* Armenian */
1111 if ((c >= 0x0531 && c <= 0x0556)
1112 || (c >= 0x0561 && c <= 0x0587))
1113 return 1;
1115 /* Hebrew */
1116 if ((c >= 0x05d0 && c <= 0x05ea)
1117 || (c >= 0x05f0 && c <= 0x05f4))
1118 return 1;
1120 /* Arabic */
1121 if ((c >= 0x0621 && c <= 0x063a)
1122 || (c >= 0x0640 && c <= 0x0652)
1123 || (c >= 0x0670 && c <= 0x06b7)
1124 || (c >= 0x06ba && c <= 0x06be)
1125 || (c >= 0x06c0 && c <= 0x06ce)
1126 || (c >= 0x06e5 && c <= 0x06e7))
1127 return 1;
1129 /* Devanagari */
1130 if ((c >= 0x0905 && c <= 0x0939)
1131 || (c >= 0x0958 && c <= 0x0962))
1132 return 1;
1134 /* Bengali */
1135 if ((c >= 0x0985 && c <= 0x098c)
1136 || (c >= 0x098f && c <= 0x0990)
1137 || (c >= 0x0993 && c <= 0x09a8)
1138 || (c >= 0x09aa && c <= 0x09b0)
1139 || (c == 0x09b2)
1140 || (c >= 0x09b6 && c <= 0x09b9)
1141 || (c >= 0x09dc && c <= 0x09dd)
1142 || (c >= 0x09df && c <= 0x09e1)
1143 || (c >= 0x09f0 && c <= 0x09f1))
1144 return 1;
1146 /* Gurmukhi */
1147 if ((c >= 0x0a05 && c <= 0x0a0a)
1148 || (c >= 0x0a0f && c <= 0x0a10)
1149 || (c >= 0x0a13 && c <= 0x0a28)
1150 || (c >= 0x0a2a && c <= 0x0a30)
1151 || (c >= 0x0a32 && c <= 0x0a33)
1152 || (c >= 0x0a35 && c <= 0x0a36)
1153 || (c >= 0x0a38 && c <= 0x0a39)
1154 || (c >= 0x0a59 && c <= 0x0a5c)
1155 || (c == 0x0a5e))
1156 return 1;
1158 /* Gujarati */
1159 if ((c >= 0x0a85 && c <= 0x0a8b)
1160 || (c == 0x0a8d)
1161 || (c >= 0x0a8f && c <= 0x0a91)
1162 || (c >= 0x0a93 && c <= 0x0aa8)
1163 || (c >= 0x0aaa && c <= 0x0ab0)
1164 || (c >= 0x0ab2 && c <= 0x0ab3)
1165 || (c >= 0x0ab5 && c <= 0x0ab9)
1166 || (c == 0x0ae0))
1167 return 1;
1169 /* Oriya */
1170 if ((c >= 0x0b05 && c <= 0x0b0c)
1171 || (c >= 0x0b0f && c <= 0x0b10)
1172 || (c >= 0x0b13 && c <= 0x0b28)
1173 || (c >= 0x0b2a && c <= 0x0b30)
1174 || (c >= 0x0b32 && c <= 0x0b33)
1175 || (c >= 0x0b36 && c <= 0x0b39)
1176 || (c >= 0x0b5c && c <= 0x0b5d)
1177 || (c >= 0x0b5f && c <= 0x0b61))
1178 return 1;
1180 /* Tamil */
1181 if ((c >= 0x0b85 && c <= 0x0b8a)
1182 || (c >= 0x0b8e && c <= 0x0b90)
1183 || (c >= 0x0b92 && c <= 0x0b95)
1184 || (c >= 0x0b99 && c <= 0x0b9a)
1185 || (c == 0x0b9c)
1186 || (c >= 0x0b9e && c <= 0x0b9f)
1187 || (c >= 0x0ba3 && c <= 0x0ba4)
1188 || (c >= 0x0ba8 && c <= 0x0baa)
1189 || (c >= 0x0bae && c <= 0x0bb5)
1190 || (c >= 0x0bb7 && c <= 0x0bb9))
1191 return 1;
1193 /* Telugu */
1194 if ((c >= 0x0c05 && c <= 0x0c0c)
1195 || (c >= 0x0c0e && c <= 0x0c10)
1196 || (c >= 0x0c12 && c <= 0x0c28)
1197 || (c >= 0x0c2a && c <= 0x0c33)
1198 || (c >= 0x0c35 && c <= 0x0c39)
1199 || (c >= 0x0c60 && c <= 0x0c61))
1200 return 1;
1202 /* Kannada */
1203 if ((c >= 0x0c85 && c <= 0x0c8c)
1204 || (c >= 0x0c8e && c <= 0x0c90)
1205 || (c >= 0x0c92 && c <= 0x0ca8)
1206 || (c >= 0x0caa && c <= 0x0cb3)
1207 || (c >= 0x0cb5 && c <= 0x0cb9)
1208 || (c >= 0x0ce0 && c <= 0x0ce1))
1209 return 1;
1211 /* Malayalam */
1212 if ((c >= 0x0d05 && c <= 0x0d0c)
1213 || (c >= 0x0d0e && c <= 0x0d10)
1214 || (c >= 0x0d12 && c <= 0x0d28)
1215 || (c >= 0x0d2a && c <= 0x0d39)
1216 || (c >= 0x0d60 && c <= 0x0d61))
1217 return 1;
1219 /* Thai */
1220 if ((c >= 0x0e01 && c <= 0x0e30)
1221 || (c >= 0x0e32 && c <= 0x0e33)
1222 || (c >= 0x0e40 && c <= 0x0e46)
1223 || (c >= 0x0e4f && c <= 0x0e5b))
1224 return 1;
1226 /* Lao */
1227 if ((c >= 0x0e81 && c <= 0x0e82)
1228 || (c == 0x0e84)
1229 || (c == 0x0e87)
1230 || (c == 0x0e88)
1231 || (c == 0x0e8a)
1232 || (c == 0x0e0d)
1233 || (c >= 0x0e94 && c <= 0x0e97)
1234 || (c >= 0x0e99 && c <= 0x0e9f)
1235 || (c >= 0x0ea1 && c <= 0x0ea3)
1236 || (c == 0x0ea5)
1237 || (c == 0x0ea7)
1238 || (c == 0x0eaa)
1239 || (c == 0x0eab)
1240 || (c >= 0x0ead && c <= 0x0eb0)
1241 || (c == 0x0eb2)
1242 || (c == 0x0eb3)
1243 || (c == 0x0ebd)
1244 || (c >= 0x0ec0 && c <= 0x0ec4)
1245 || (c == 0x0ec6))
1246 return 1;
1248 /* Georgian */
1249 if ((c >= 0x10a0 && c <= 0x10c5)
1250 || (c >= 0x10d0 && c <= 0x10f6))
1251 return 1;
1253 /* Hiragana */
1254 if ((c >= 0x3041 && c <= 0x3094)
1255 || (c >= 0x309b && c <= 0x309e))
1256 return 1;
1258 /* Katakana */
1259 if ((c >= 0x30a1 && c <= 0x30fe))
1260 return 1;
1262 /* Bopmofo */
1263 if ((c >= 0x3105 && c <= 0x312c))
1264 return 1;
1266 /* Hangul */
1267 if ((c >= 0x1100 && c <= 0x1159)
1268 || (c >= 0x1161 && c <= 0x11a2)
1269 || (c >= 0x11a8 && c <= 0x11f9))
1270 return 1;
1272 /* CJK Unified Ideographs */
1273 if ((c >= 0xf900 && c <= 0xfa2d)
1274 || (c >= 0xfb1f && c <= 0xfb36)
1275 || (c >= 0xfb38 && c <= 0xfb3c)
1276 || (c == 0xfb3e)
1277 || (c >= 0xfb40 && c <= 0xfb41)
1278 || (c >= 0xfb42 && c <= 0xfb44)
1279 || (c >= 0xfb46 && c <= 0xfbb1)
1280 || (c >= 0xfbd3 && c <= 0xfd3f)
1281 || (c >= 0xfd50 && c <= 0xfd8f)
1282 || (c >= 0xfd92 && c <= 0xfdc7)
1283 || (c >= 0xfdf0 && c <= 0xfdfb)
1284 || (c >= 0xfe70 && c <= 0xfe72)
1285 || (c == 0xfe74)
1286 || (c >= 0xfe76 && c <= 0xfefc)
1287 || (c >= 0xff21 && c <= 0xff3a)
1288 || (c >= 0xff41 && c <= 0xff5a)
1289 || (c >= 0xff66 && c <= 0xffbe)
1290 || (c >= 0xffc2 && c <= 0xffc7)
1291 || (c >= 0xffca && c <= 0xffcf)
1292 || (c >= 0xffd2 && c <= 0xffd7)
1293 || (c >= 0xffda && c <= 0xffdc)
1294 || (c >= 0x4e00 && c <= 0x9fa5))
1295 return 1;
1297 error ("universal-character-name '\\u%04x' not valid in identifier", c);
1298 return 1;
1299 #endif
1302 /* Add the UTF-8 representation of C to the token_buffer. */
1304 static void
1305 utf8_extend_token (c)
1306 int c;
1308 int shift, mask;
1310 if (c <= 0x0000007f)
1312 extend_token (c);
1313 return;
1315 else if (c <= 0x000007ff)
1316 shift = 6, mask = 0xc0;
1317 else if (c <= 0x0000ffff)
1318 shift = 12, mask = 0xe0;
1319 else if (c <= 0x001fffff)
1320 shift = 18, mask = 0xf0;
1321 else if (c <= 0x03ffffff)
1322 shift = 24, mask = 0xf8;
1323 else
1324 shift = 30, mask = 0xfc;
1326 extend_token (mask | (c >> shift));
1329 shift -= 6;
1330 extend_token ((unsigned char) (0x80 | (c >> shift)));
1332 while (shift);
1334 #endif
1336 #if 0
1337 struct try_type
1339 tree *node_var;
1340 char unsigned_flag;
1341 char long_flag;
1342 char long_long_flag;
1345 struct try_type type_sequence[] =
1347 { &integer_type_node, 0, 0, 0},
1348 { &unsigned_type_node, 1, 0, 0},
1349 { &long_integer_type_node, 0, 1, 0},
1350 { &long_unsigned_type_node, 1, 1, 0},
1351 { &long_long_integer_type_node, 0, 1, 1},
1352 { &long_long_unsigned_type_node, 1, 1, 1}
1354 #endif /* 0 */
1356 struct pf_args
1358 /* Input */
1359 const char *str;
1360 int fflag;
1361 int lflag;
1362 int base;
1363 /* Output */
1364 int conversion_errno;
1365 REAL_VALUE_TYPE value;
1366 tree type;
1369 static void
1370 parse_float (data)
1371 PTR data;
1373 struct pf_args * args = (struct pf_args *) data;
1374 const char *typename;
1376 args->conversion_errno = 0;
1377 args->type = double_type_node;
1378 typename = "double";
1380 /* The second argument, machine_mode, of REAL_VALUE_ATOF
1381 tells the desired precision of the binary result
1382 of decimal-to-binary conversion. */
1384 if (args->fflag)
1386 if (args->lflag)
1387 error ("both 'f' and 'l' suffixes on floating constant");
1389 args->type = float_type_node;
1390 typename = "float";
1392 else if (args->lflag)
1394 args->type = long_double_type_node;
1395 typename = "long double";
1397 else if (flag_single_precision_constant)
1399 args->type = float_type_node;
1400 typename = "float";
1403 errno = 0;
1404 if (args->base == 16)
1405 args->value = REAL_VALUE_HTOF (args->str, TYPE_MODE (args->type));
1406 else
1407 args->value = REAL_VALUE_ATOF (args->str, TYPE_MODE (args->type));
1409 args->conversion_errno = errno;
1410 /* A diagnostic is required here by some ISO C testsuites.
1411 This is not pedwarn, because some people don't want
1412 an error for this. */
1413 if (REAL_VALUE_ISINF (args->value) && pedantic)
1414 warning ("floating point number exceeds range of '%s'", typename);
1418 c_lex (value)
1419 tree *value;
1421 #if USE_CPPLIB
1422 cpp_token tok;
1423 enum cpp_ttype type;
1425 retry:
1426 timevar_push (TV_CPP);
1427 cpp_get_token (&parse_in, &tok);
1428 timevar_pop (TV_CPP);
1430 /* The C++ front end does horrible things with the current line
1431 number. To ensure an accurate line number, we must reset it
1432 every time we return a token. */
1433 lex_lineno = cpp_get_line (&parse_in)->line;
1435 *value = NULL_TREE;
1436 lineno = lex_lineno;
1437 type = tok.type;
1438 switch (type)
1440 case CPP_OPEN_BRACE: indent_level++; break;
1441 case CPP_CLOSE_BRACE: indent_level--; break;
1443 /* Issue this error here, where we can get at tok.val.c. */
1444 case CPP_OTHER:
1445 if (ISGRAPH (tok.val.c))
1446 error ("stray '%c' in program", tok.val.c);
1447 else
1448 error ("stray '\\%#o' in program", tok.val.c);
1449 goto retry;
1451 case CPP_NAME:
1452 *value = get_identifier ((const char *)tok.val.node->name);
1453 break;
1455 case CPP_INT:
1456 case CPP_FLOAT:
1457 case CPP_NUMBER:
1458 *value = lex_number ((const char *)tok.val.str.text, tok.val.str.len);
1459 break;
1461 case CPP_CHAR:
1462 case CPP_WCHAR:
1463 *value = lex_charconst ((const char *)tok.val.str.text,
1464 tok.val.str.len, tok.type == CPP_WCHAR);
1465 break;
1467 case CPP_STRING:
1468 case CPP_WSTRING:
1469 case CPP_OSTRING:
1470 *value = lex_string ((const char *)tok.val.str.text,
1471 tok.val.str.len, tok.type == CPP_WSTRING);
1472 break;
1474 /* These tokens should not be visible outside cpplib. */
1475 case CPP_HEADER_NAME:
1476 case CPP_COMMENT:
1477 case CPP_MACRO_ARG:
1478 case CPP_PLACEMARKER:
1479 abort ();
1481 default: break;
1484 return type;
1486 #else
1487 int c;
1488 char *p;
1489 int wide_flag = 0;
1490 int objc_flag = 0;
1491 int charconst = 0;
1493 *value = NULL_TREE;
1495 retry:
1496 c = getch ();
1498 /* Effectively do c = skip_white_space (c)
1499 but do it faster in the usual cases. */
1500 while (1)
1501 switch (c)
1503 case ' ':
1504 case '\t':
1505 case '\f':
1506 case '\v':
1507 c = getch ();
1508 break;
1510 case '\r':
1511 case '\n':
1512 c = skip_white_space (c);
1513 default:
1514 goto found_nonwhite;
1516 found_nonwhite:
1518 lineno = lex_lineno;
1520 switch (c)
1522 case EOF:
1523 return CPP_EOF;
1525 case 'L':
1526 /* Capital L may start a wide-string or wide-character constant. */
1528 register int c1 = getch();
1529 if (c1 == '\'')
1531 wide_flag = 1;
1532 goto char_constant;
1534 if (c1 == '"')
1536 wide_flag = 1;
1537 goto string_constant;
1539 put_back (c1);
1541 goto letter;
1543 case '@':
1544 if (!doing_objc_thang)
1545 goto straychar;
1546 else
1548 /* '@' may start a constant string object. */
1549 register int c1 = getch ();
1550 if (c1 == '"')
1552 objc_flag = 1;
1553 goto string_constant;
1555 put_back (c1);
1556 /* Fall through to treat '@' as the start of an identifier. */
1559 case 'A': case 'B': case 'C': case 'D': case 'E':
1560 case 'F': case 'G': case 'H': case 'I': case 'J':
1561 case 'K': case 'M': case 'N': case 'O':
1562 case 'P': case 'Q': case 'R': case 'S': case 'T':
1563 case 'U': case 'V': case 'W': case 'X': case 'Y':
1564 case 'Z':
1565 case 'a': case 'b': case 'c': case 'd': case 'e':
1566 case 'f': case 'g': case 'h': case 'i': case 'j':
1567 case 'k': case 'l': case 'm': case 'n': case 'o':
1568 case 'p': case 'q': case 'r': case 's': case 't':
1569 case 'u': case 'v': case 'w': case 'x': case 'y':
1570 case 'z':
1571 case '_':
1572 case '$':
1573 letter:
1574 p = token_buffer;
1575 while (ISALNUM (c) || c == '_' || c == '$' || c == '@')
1577 /* Make sure this char really belongs in an identifier. */
1578 if (c == '$')
1580 if (! dollars_in_ident)
1581 error ("'$' in identifier");
1582 else if (pedantic)
1583 pedwarn ("'$' in identifier");
1586 if (p >= token_buffer + maxtoken)
1587 p = extend_token_buffer (p);
1589 *p++ = c;
1590 c = getch();
1593 put_back (c);
1595 if (p >= token_buffer + maxtoken)
1596 p = extend_token_buffer (p);
1597 *p = 0;
1599 *value = get_identifier (token_buffer);
1600 return CPP_NAME;
1602 case '.':
1604 /* It's hard to preserve tokenization on '.' because
1605 it could be a symbol by itself, or it could be the
1606 start of a floating point number and cpp won't tell us. */
1607 int c1 = getch ();
1608 if (c1 == '.')
1610 int c2 = getch ();
1611 if (c2 == '.')
1612 return CPP_ELLIPSIS;
1614 put_back (c2);
1615 error ("parse error at '..'");
1617 else if (c1 == '*' && c_language == clk_cplusplus)
1618 return CPP_DOT_STAR;
1620 put_back (c1);
1621 if (ISDIGIT (c1))
1622 goto number;
1624 return CPP_DOT;
1626 case '0': case '1': case '2': case '3': case '4':
1627 case '5': case '6': case '7': case '8': case '9':
1628 number:
1629 p = token_buffer;
1630 /* Scan the next preprocessing number. All C numeric constants
1631 are preprocessing numbers, but not all preprocessing numbers
1632 are valid numeric constants. Preprocessing numbers fit the
1633 regular expression \.?[0-9]([0-9a-zA-Z_.]|[eEpP][+-])*
1634 See C99 section 6.4.8. */
1635 for (;;)
1637 if (p >= token_buffer + maxtoken)
1638 p = extend_token_buffer (p);
1640 *p++ = c;
1641 c = getch();
1643 if (c == '+' || c == '-')
1645 int d = p[-1];
1646 if (d == 'e' || d == 'E' || d == 'p' || d == 'P')
1647 continue;
1649 if (ISALNUM (c) || c == '_' || c == '.')
1650 continue;
1651 break;
1653 put_back (c);
1655 *value = lex_number (token_buffer, p - token_buffer);
1656 return CPP_NUMBER;
1658 case '\'':
1659 char_constant:
1660 charconst = 1;
1662 case '"':
1663 string_constant:
1665 int delimiter = charconst ? '\'' : '"';
1666 #ifdef MULTIBYTE_CHARS
1667 int longest_char = local_mb_cur_max ();
1668 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
1669 #endif
1670 c = getch ();
1671 p = token_buffer + 1;
1673 while (c != delimiter && c != EOF)
1675 if (p + 2 > token_buffer + maxtoken)
1676 p = extend_token_buffer (p);
1678 /* ignore_escape_flag is set for reading the filename in #line. */
1679 if (!ignore_escape_flag && c == '\\')
1681 *p++ = c;
1682 *p++ = getch (); /* escaped character */
1683 c = getch ();
1684 continue;
1686 else
1688 #ifdef MULTIBYTE_CHARS
1689 int i;
1690 int char_len = -1;
1691 for (i = 0; i < longest_char; ++i)
1693 if (p + i >= token_buffer + maxtoken)
1694 p = extend_token_buffer (p);
1695 p[i] = c;
1697 char_len = local_mblen (p, i + 1);
1698 if (char_len != -1)
1699 break;
1700 c = getch ();
1702 if (char_len == -1)
1704 /* Replace all except the first byte. */
1705 put_back (c);
1706 for (--i; i > 0; --i)
1707 put_back (p[i]);
1708 char_len = 1;
1710 /* mbtowc sometimes needs an extra char before accepting */
1711 else if (char_len <= i)
1712 put_back (c);
1714 p += char_len;
1715 #else
1716 *p++ = c;
1717 #endif
1718 c = getch ();
1723 if (charconst)
1725 *value = lex_charconst (token_buffer + 1, p - (token_buffer + 1),
1726 wide_flag);
1727 return wide_flag ? CPP_WCHAR : CPP_CHAR;
1729 else
1731 *value = lex_string (token_buffer + 1, p - (token_buffer + 1),
1732 wide_flag);
1733 return wide_flag ? CPP_WSTRING : objc_flag ? CPP_OSTRING : CPP_STRING;
1736 case '+':
1737 case '-':
1738 case '&':
1739 case '|':
1740 case ':':
1741 case '<':
1742 case '>':
1743 case '*':
1744 case '/':
1745 case '%':
1746 case '^':
1747 case '!':
1748 case '=':
1750 int c1;
1751 enum cpp_ttype type = CPP_EOF;
1753 switch (c)
1755 case '+': type = CPP_PLUS; break;
1756 case '-': type = CPP_MINUS; break;
1757 case '&': type = CPP_AND; break;
1758 case '|': type = CPP_OR; break;
1759 case ':': type = CPP_COLON; break;
1760 case '<': type = CPP_LESS; break;
1761 case '>': type = CPP_GREATER; break;
1762 case '*': type = CPP_MULT; break;
1763 case '/': type = CPP_DIV; break;
1764 case '%': type = CPP_MOD; break;
1765 case '^': type = CPP_XOR; break;
1766 case '!': type = CPP_NOT; break;
1767 case '=': type = CPP_EQ; break;
1770 c1 = getch ();
1772 if (c1 == '=' && type < CPP_LAST_EQ)
1773 return type + (CPP_EQ_EQ - CPP_EQ);
1774 else if (c == c1)
1775 switch (c)
1777 case '+': return CPP_PLUS_PLUS;
1778 case '-': return CPP_MINUS_MINUS;
1779 case '&': return CPP_AND_AND;
1780 case '|': return CPP_OR_OR;
1781 case ':':
1782 if (c_language == clk_cplusplus)
1783 return CPP_SCOPE;
1784 break;
1786 case '<': type = CPP_LSHIFT; goto do_triad;
1787 case '>': type = CPP_RSHIFT; goto do_triad;
1789 else
1790 switch (c)
1792 case '-':
1793 if (c1 == '>')
1795 if (c_language == clk_cplusplus)
1797 c1 = getch ();
1798 if (c1 == '*')
1799 return CPP_DEREF_STAR;
1800 put_back (c1);
1802 return CPP_DEREF;
1804 break;
1806 case '>':
1807 if (c1 == '?' && c_language == clk_cplusplus)
1808 { type = CPP_MAX; goto do_triad; }
1809 break;
1811 case '<':
1812 if (c1 == ':' && flag_digraphs)
1813 return CPP_OPEN_SQUARE;
1814 if (c1 == '%' && flag_digraphs)
1815 { indent_level++; return CPP_OPEN_BRACE; }
1816 if (c1 == '?' && c_language == clk_cplusplus)
1817 { type = CPP_MIN; goto do_triad; }
1818 break;
1820 case ':':
1821 if (c1 == '>' && flag_digraphs)
1822 return CPP_CLOSE_SQUARE;
1823 break;
1824 case '%':
1825 if (c1 == '>' && flag_digraphs)
1826 { indent_level--; return CPP_CLOSE_BRACE; }
1827 break;
1830 put_back (c1);
1831 return type;
1833 do_triad:
1834 c1 = getch ();
1835 if (c1 == '=')
1836 type += (CPP_EQ_EQ - CPP_EQ);
1837 else
1838 put_back (c1);
1839 return type;
1842 case '~': return CPP_COMPL;
1843 case '?': return CPP_QUERY;
1844 case ',': return CPP_COMMA;
1845 case '(': return CPP_OPEN_PAREN;
1846 case ')': return CPP_CLOSE_PAREN;
1847 case '[': return CPP_OPEN_SQUARE;
1848 case ']': return CPP_CLOSE_SQUARE;
1849 case '{': indent_level++; return CPP_OPEN_BRACE;
1850 case '}': indent_level--; return CPP_CLOSE_BRACE;
1851 case ';': return CPP_SEMICOLON;
1853 straychar:
1854 default:
1855 if (ISGRAPH (c))
1856 error ("stray '%c' in program", c);
1857 else
1858 error ("stray '\\%#o' in program", c);
1859 goto retry;
1861 /* NOTREACHED */
1862 #endif
1866 #define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
1868 static tree
1869 lex_number (str, len)
1870 const char *str;
1871 unsigned int len;
1873 int base = 10;
1874 int count = 0;
1875 int largest_digit = 0;
1876 int numdigits = 0;
1877 int overflow = 0;
1878 int c;
1879 tree value;
1880 const char *p;
1881 enum anon1 { NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON } floatflag = NOT_FLOAT;
1883 /* We actually store only HOST_BITS_PER_CHAR bits in each part.
1884 The code below which fills the parts array assumes that a host
1885 int is at least twice as wide as a host char, and that
1886 HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
1887 Two HOST_WIDE_INTs is the largest int literal we can store.
1888 In order to detect overflow below, the number of parts (TOTAL_PARTS)
1889 must be exactly the number of parts needed to hold the bits
1890 of two HOST_WIDE_INTs. */
1891 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
1892 unsigned int parts[TOTAL_PARTS];
1894 /* Optimize for most frequent case. */
1895 if (len == 1)
1897 if (*str == '0')
1898 return integer_zero_node;
1899 else if (*str == '1')
1900 return integer_one_node;
1901 else
1902 return build_int_2 (*str - '0', 0);
1905 for (count = 0; count < TOTAL_PARTS; count++)
1906 parts[count] = 0;
1908 /* len is known to be >1 at this point. */
1909 p = str;
1911 if (len > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
1913 base = 16;
1914 p = str + 2;
1916 /* The ISDIGIT check is so we are not confused by a suffix on 0. */
1917 else if (str[0] == '0' && ISDIGIT (str[1]))
1919 base = 8;
1920 p = str + 1;
1925 c = *p++;
1927 if (c == '.')
1929 if (base == 16 && pedantic && !flag_isoc99)
1930 pedwarn ("floating constant may not be in radix 16");
1931 else if (floatflag == AFTER_POINT)
1932 ERROR ("too many decimal points in floating constant");
1933 else if (floatflag == AFTER_EXPON)
1934 ERROR ("decimal point in exponent - impossible!");
1935 else
1936 floatflag = AFTER_POINT;
1938 if (base == 8)
1939 base = 10;
1941 else if (c == '_')
1942 /* Possible future extension: silently ignore _ in numbers,
1943 permitting cosmetic grouping - e.g. 0x8000_0000 == 0x80000000
1944 but somewhat easier to read. Ada has this? */
1945 ERROR ("underscore in number");
1946 else
1948 int n;
1949 /* It is not a decimal point.
1950 It should be a digit (perhaps a hex digit). */
1952 if (ISDIGIT (c))
1954 n = c - '0';
1956 else if (base <= 10 && (c == 'e' || c == 'E'))
1958 base = 10;
1959 floatflag = AFTER_EXPON;
1960 break;
1962 else if (base == 16 && (c == 'p' || c == 'P'))
1964 floatflag = AFTER_EXPON;
1965 break; /* start of exponent */
1967 else if (base == 16 && c >= 'a' && c <= 'f')
1969 n = c - 'a' + 10;
1971 else if (base == 16 && c >= 'A' && c <= 'F')
1973 n = c - 'A' + 10;
1975 else
1977 p--;
1978 break; /* start of suffix */
1981 if (n >= largest_digit)
1982 largest_digit = n;
1983 numdigits++;
1985 for (count = 0; count < TOTAL_PARTS; count++)
1987 parts[count] *= base;
1988 if (count)
1990 parts[count]
1991 += (parts[count-1] >> HOST_BITS_PER_CHAR);
1992 parts[count-1]
1993 &= (1 << HOST_BITS_PER_CHAR) - 1;
1995 else
1996 parts[0] += n;
1999 /* If the highest-order part overflows (gets larger than
2000 a host char will hold) then the whole number has
2001 overflowed. Record this and truncate the highest-order
2002 part. */
2003 if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR)
2005 overflow = 1;
2006 parts[TOTAL_PARTS - 1] &= (1 << HOST_BITS_PER_CHAR) - 1;
2010 while (p < str + len);
2012 /* This can happen on input like `int i = 0x;' */
2013 if (numdigits == 0)
2014 ERROR ("numeric constant with no digits");
2016 if (largest_digit >= base)
2017 ERROR ("numeric constant contains digits beyond the radix");
2019 if (floatflag != NOT_FLOAT)
2021 tree type;
2022 int imag, fflag, lflag, conversion_errno;
2023 REAL_VALUE_TYPE real;
2024 struct pf_args args;
2025 char *copy;
2027 if (base == 16 && floatflag != AFTER_EXPON)
2028 ERROR ("hexadecimal floating constant has no exponent");
2030 /* Read explicit exponent if any, and put it in tokenbuf. */
2031 if ((base == 10 && ((c == 'e') || (c == 'E')))
2032 || (base == 16 && (c == 'p' || c == 'P')))
2034 if (p < str + len)
2035 c = *p++;
2036 if (p < str + len && (c == '+' || c == '-'))
2037 c = *p++;
2038 /* Exponent is decimal, even if string is a hex float. */
2039 if (! ISDIGIT (c))
2040 ERROR ("floating constant exponent has no digits");
2041 while (p < str + len && ISDIGIT (c))
2042 c = *p++;
2043 if (! ISDIGIT (c))
2044 p--;
2047 /* Copy the float constant now; we don't want any suffixes in the
2048 string passed to parse_float. */
2049 copy = alloca (p - str + 1);
2050 memcpy (copy, str, p - str);
2051 copy[p - str] = '\0';
2053 /* Now parse suffixes. */
2054 fflag = lflag = imag = 0;
2055 while (p < str + len)
2056 switch (*p++)
2058 case 'f': case 'F':
2059 if (fflag)
2060 ERROR ("more than one 'f' suffix on floating constant");
2061 else if (warn_traditional && !in_system_header)
2062 warning ("traditional C rejects the 'f' suffix");
2064 fflag = 1;
2065 break;
2067 case 'l': case 'L':
2068 if (lflag)
2069 ERROR ("more than one 'l' suffix on floating constant");
2070 else if (warn_traditional && !in_system_header)
2071 warning ("traditional C rejects the 'l' suffix");
2073 lflag = 1;
2074 break;
2076 case 'i': case 'I':
2077 case 'j': case 'J':
2078 if (imag)
2079 ERROR ("more than one 'i' or 'j' suffix on floating constant");
2080 else if (pedantic)
2081 pedwarn ("ISO C forbids imaginary numeric constants");
2082 imag = 1;
2083 break;
2085 default:
2086 ERROR ("invalid suffix on floating constant");
2089 /* Setup input for parse_float() */
2090 args.str = copy;
2091 args.fflag = fflag;
2092 args.lflag = lflag;
2093 args.base = base;
2095 /* Convert string to a double, checking for overflow. */
2096 if (do_float_handler (parse_float, (PTR) &args))
2098 /* Receive output from parse_float() */
2099 real = args.value;
2101 else
2102 /* We got an exception from parse_float() */
2103 ERROR ("floating constant out of range");
2105 /* Receive output from parse_float() */
2106 conversion_errno = args.conversion_errno;
2107 type = args.type;
2109 #ifdef ERANGE
2110 /* ERANGE is also reported for underflow,
2111 so test the value to distinguish overflow from that. */
2112 if (conversion_errno == ERANGE && !flag_traditional && pedantic
2113 && (REAL_VALUES_LESS (dconst1, real)
2114 || REAL_VALUES_LESS (real, dconstm1)))
2115 warning ("floating point number exceeds range of 'double'");
2116 #endif
2118 /* Create a node with determined type and value. */
2119 if (imag)
2120 value = build_complex (NULL_TREE, convert (type, integer_zero_node),
2121 build_real (type, real));
2122 else
2123 value = build_real (type, real);
2125 else
2127 tree trad_type, ansi_type, type;
2128 HOST_WIDE_INT high, low;
2129 int spec_unsigned = 0;
2130 int spec_long = 0;
2131 int spec_long_long = 0;
2132 int spec_imag = 0;
2133 int suffix_lu = 0;
2134 int warn = 0, i;
2136 trad_type = ansi_type = type = NULL_TREE;
2137 while (p < str + len)
2139 c = *p++;
2140 switch (c)
2142 case 'u': case 'U':
2143 if (spec_unsigned)
2144 error ("two 'u' suffixes on integer constant");
2145 else if (warn_traditional && !in_system_header)
2146 warning ("traditional C rejects the 'u' suffix");
2148 spec_unsigned = 1;
2149 if (spec_long)
2150 suffix_lu = 1;
2151 break;
2153 case 'l': case 'L':
2154 if (spec_long)
2156 if (spec_long_long)
2157 error ("three 'l' suffixes on integer constant");
2158 else if (suffix_lu)
2159 error ("'lul' is not a valid integer suffix");
2160 else if (c != spec_long)
2161 error ("'Ll' and 'lL' are not valid integer suffixes");
2162 else if (pedantic && ! flag_isoc99
2163 && ! in_system_header && warn_long_long)
2164 pedwarn ("ISO C89 forbids long long integer constants");
2165 spec_long_long = 1;
2167 spec_long = c;
2168 break;
2170 case 'i': case 'I': case 'j': case 'J':
2171 if (spec_imag)
2172 error ("more than one 'i' or 'j' suffix on integer constant");
2173 else if (pedantic)
2174 pedwarn ("ISO C forbids imaginary numeric constants");
2175 spec_imag = 1;
2176 break;
2178 default:
2179 ERROR ("invalid suffix on integer constant");
2183 /* If the literal overflowed, pedwarn about it now. */
2184 if (overflow)
2186 warn = 1;
2187 pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT * 2);
2190 /* This is simplified by the fact that our constant
2191 is always positive. */
2193 high = low = 0;
2195 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
2197 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
2198 / HOST_BITS_PER_CHAR)]
2199 << (i * HOST_BITS_PER_CHAR));
2200 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
2203 value = build_int_2 (low, high);
2204 TREE_TYPE (value) = long_long_unsigned_type_node;
2206 /* If warn_traditional, calculate both the ISO type and the
2207 traditional type, then see if they disagree.
2208 Otherwise, calculate only the type for the dialect in use. */
2209 if (warn_traditional || flag_traditional)
2211 /* Calculate the traditional type. */
2212 /* Traditionally, any constant is signed; but if unsigned is
2213 specified explicitly, obey that. Use the smallest size
2214 with the right number of bits, except for one special
2215 case with decimal constants. */
2216 if (! spec_long && base != 10
2217 && int_fits_type_p (value, unsigned_type_node))
2218 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2219 /* A decimal constant must be long if it does not fit in
2220 type int. I think this is independent of whether the
2221 constant is signed. */
2222 else if (! spec_long && base == 10
2223 && int_fits_type_p (value, integer_type_node))
2224 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2225 else if (! spec_long_long)
2226 trad_type = (spec_unsigned
2227 ? long_unsigned_type_node
2228 : long_integer_type_node);
2229 else if (int_fits_type_p (value,
2230 spec_unsigned
2231 ? long_long_unsigned_type_node
2232 : long_long_integer_type_node))
2233 trad_type = (spec_unsigned
2234 ? long_long_unsigned_type_node
2235 : long_long_integer_type_node);
2236 else
2237 trad_type = (spec_unsigned
2238 ? widest_unsigned_literal_type_node
2239 : widest_integer_literal_type_node);
2241 if (warn_traditional || ! flag_traditional)
2243 /* Calculate the ISO type. */
2244 if (! spec_long && ! spec_unsigned
2245 && int_fits_type_p (value, integer_type_node))
2246 ansi_type = integer_type_node;
2247 else if (! spec_long && (base != 10 || spec_unsigned)
2248 && int_fits_type_p (value, unsigned_type_node))
2249 ansi_type = unsigned_type_node;
2250 else if (! spec_unsigned && !spec_long_long
2251 && int_fits_type_p (value, long_integer_type_node))
2252 ansi_type = long_integer_type_node;
2253 else if (! spec_long_long
2254 && int_fits_type_p (value, long_unsigned_type_node))
2255 ansi_type = long_unsigned_type_node;
2256 else if (! spec_unsigned
2257 && int_fits_type_p (value, long_long_integer_type_node))
2258 ansi_type = long_long_integer_type_node;
2259 else if (int_fits_type_p (value, long_long_unsigned_type_node))
2260 ansi_type = long_long_unsigned_type_node;
2261 else if (! spec_unsigned
2262 && int_fits_type_p (value, widest_integer_literal_type_node))
2263 ansi_type = widest_integer_literal_type_node;
2264 else
2265 ansi_type = widest_unsigned_literal_type_node;
2268 type = flag_traditional ? trad_type : ansi_type;
2270 /* We assume that constants specified in a non-decimal
2271 base are bit patterns, and that the programmer really
2272 meant what they wrote. */
2273 if (warn_traditional && !in_system_header
2274 && base == 10 && trad_type != ansi_type)
2276 if (TYPE_PRECISION (trad_type) != TYPE_PRECISION (ansi_type))
2277 warning ("width of integer constant changes with -traditional");
2278 else if (TREE_UNSIGNED (trad_type) != TREE_UNSIGNED (ansi_type))
2279 warning ("integer constant is unsigned in ISO C, signed with -traditional");
2280 else
2281 warning ("width of integer constant may change on other systems with -traditional");
2284 if (pedantic && !flag_traditional && (flag_isoc99 || !spec_long_long)
2285 && !warn
2286 && ((flag_isoc99
2287 ? TYPE_PRECISION (long_long_integer_type_node)
2288 : TYPE_PRECISION (long_integer_type_node)) < TYPE_PRECISION (type)))
2290 warn = 1;
2291 pedwarn ("integer constant larger than the maximum value of %s",
2292 (flag_isoc99
2293 ? (TREE_UNSIGNED (type)
2294 ? "an unsigned long long int"
2295 : "a long long int")
2296 : "an unsigned long int"));
2299 if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
2300 warning ("decimal constant is so large that it is unsigned");
2302 if (spec_imag)
2304 if (TYPE_PRECISION (type)
2305 <= TYPE_PRECISION (integer_type_node))
2306 value = build_complex (NULL_TREE, integer_zero_node,
2307 convert (integer_type_node, value));
2308 else
2309 ERROR ("complex integer constant is too wide for 'complex int'");
2311 else if (flag_traditional && !int_fits_type_p (value, type))
2312 /* The traditional constant 0x80000000 is signed
2313 but doesn't fit in the range of int.
2314 This will change it to -0x80000000, which does fit. */
2316 TREE_TYPE (value) = unsigned_type (type);
2317 value = convert (type, value);
2318 TREE_OVERFLOW (value) = TREE_CONSTANT_OVERFLOW (value) = 0;
2320 else
2321 TREE_TYPE (value) = type;
2323 /* If it's still an integer (not a complex), and it doesn't
2324 fit in the type we choose for it, then pedwarn. */
2326 if (! warn
2327 && TREE_CODE (TREE_TYPE (value)) == INTEGER_TYPE
2328 && ! int_fits_type_p (value, TREE_TYPE (value)))
2329 pedwarn ("integer constant is larger than the maximum value for its type");
2332 if (p < str + len)
2333 error ("missing white space after number '%.*s'", (int) (p - str), str);
2335 return value;
2337 syntax_error:
2338 return integer_zero_node;
2341 static tree
2342 lex_string (str, len, wide)
2343 const char *str;
2344 unsigned int len;
2345 int wide;
2347 tree value;
2348 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
2349 char *q = buf;
2350 const char *p = str, *limit = str + len;
2351 unsigned int c;
2352 unsigned width = wide ? WCHAR_TYPE_SIZE
2353 : TYPE_PRECISION (char_type_node);
2355 #ifdef MULTIBYTE_CHARS
2356 /* Reset multibyte conversion state. */
2357 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2358 #endif
2360 while (p < limit)
2362 #ifdef MULTIBYTE_CHARS
2363 wchar_t wc;
2364 int char_len;
2366 char_len = local_mbtowc (&wc, p, limit - p);
2367 if (char_len == -1)
2369 warning ("Ignoring invalid multibyte character");
2370 char_len = 1;
2371 c = *p++;
2373 else
2375 p += char_len;
2376 c = wc;
2378 #else
2379 c = *p++;
2380 #endif
2382 if (c == '\\' && !ignore_escape_flag)
2384 p = readescape (p, limit, &c);
2385 if (width < HOST_BITS_PER_INT
2386 && (unsigned) c >= ((unsigned)1 << width))
2387 pedwarn ("escape sequence out of range for character");
2390 /* Add this single character into the buffer either as a wchar_t
2391 or as a single byte. */
2392 if (wide)
2394 unsigned charwidth = TYPE_PRECISION (char_type_node);
2395 unsigned bytemask = (1 << charwidth) - 1;
2396 int byte;
2398 for (byte = 0; byte < WCHAR_BYTES; ++byte)
2400 int n;
2401 if (byte >= (int) sizeof (c))
2402 n = 0;
2403 else
2404 n = (c >> (byte * charwidth)) & bytemask;
2405 if (BYTES_BIG_ENDIAN)
2406 q[WCHAR_BYTES - byte - 1] = n;
2407 else
2408 q[byte] = n;
2410 q += WCHAR_BYTES;
2412 else
2414 *q++ = c;
2418 /* Terminate the string value, either with a single byte zero
2419 or with a wide zero. */
2421 if (wide)
2423 memset (q, 0, WCHAR_BYTES);
2424 q += WCHAR_BYTES;
2426 else
2428 *q++ = '\0';
2431 value = build_string (q - buf, buf);
2433 if (wide)
2434 TREE_TYPE (value) = wchar_array_type_node;
2435 else
2436 TREE_TYPE (value) = char_array_type_node;
2437 return value;
2440 static tree
2441 lex_charconst (str, len, wide)
2442 const char *str;
2443 unsigned int len;
2444 int wide;
2446 const char *limit = str + len;
2447 int result = 0;
2448 int num_chars = 0;
2449 int chars_seen = 0;
2450 unsigned width = TYPE_PRECISION (char_type_node);
2451 int max_chars;
2452 unsigned int c;
2453 tree value;
2455 #ifdef MULTIBYTE_CHARS
2456 int longest_char = local_mb_cur_max ();
2457 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2458 #endif
2460 max_chars = TYPE_PRECISION (integer_type_node) / width;
2461 if (wide)
2462 width = WCHAR_TYPE_SIZE;
2464 while (str < limit)
2466 #ifdef MULTIBYTE_CHARS
2467 wchar_t wc;
2468 int char_len;
2470 char_len = local_mbtowc (&wc, str, limit - str);
2471 if (char_len == -1)
2473 warning ("Ignoring invalid multibyte character");
2474 char_len = 1;
2475 c = *str++;
2477 else
2479 p += char_len;
2480 c = wc;
2482 #else
2483 c = *str++;
2484 #endif
2486 ++chars_seen;
2487 if (c == '\\')
2489 str = readescape (str, limit, &c);
2490 if (width < HOST_BITS_PER_INT
2491 && (unsigned) c >= ((unsigned)1 << width))
2492 pedwarn ("escape sequence out of range for character");
2494 #ifdef MAP_CHARACTER
2495 if (ISPRINT (c))
2496 c = MAP_CHARACTER (c);
2497 #endif
2499 /* Merge character into result; ignore excess chars. */
2500 num_chars += (width / TYPE_PRECISION (char_type_node));
2501 if (num_chars < max_chars + 1)
2503 if (width < HOST_BITS_PER_INT)
2504 result = (result << width) | (c & ((1 << width) - 1));
2505 else
2506 result = c;
2510 if (chars_seen == 0)
2511 error ("empty character constant");
2512 else if (num_chars > max_chars)
2514 num_chars = max_chars;
2515 error ("character constant too long");
2517 else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
2518 warning ("multi-character character constant");
2520 /* If char type is signed, sign-extend the constant. */
2521 if (! wide)
2523 int num_bits = num_chars * width;
2524 if (num_bits == 0)
2525 /* We already got an error; avoid invalid shift. */
2526 value = build_int_2 (0, 0);
2527 else if (TREE_UNSIGNED (char_type_node)
2528 || ((result >> (num_bits - 1)) & 1) == 0)
2529 value = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
2530 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2532 else
2533 value = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
2534 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2535 -1);
2536 /* In C, a character constant has type 'int'; in C++, 'char'. */
2537 if (chars_seen <= 1 && c_language == clk_cplusplus)
2538 TREE_TYPE (value) = char_type_node;
2539 else
2540 TREE_TYPE (value) = integer_type_node;
2542 else
2544 value = build_int_2 (result, 0);
2545 TREE_TYPE (value) = wchar_type_node;
2548 return value;
2551 /* Mark for GC a node in a splay tree whose keys are strings. */
2553 static int
2554 mark_splay_tree_node (n, data)
2555 splay_tree_node n;
2556 void *data ATTRIBUTE_UNUSED;
2558 ggc_mark_string ((char *) n->key);
2559 return 0;
2562 /* Mark for GC a splay tree whose keys are strings. */
2564 static void
2565 mark_splay_tree (p)
2566 void *p;
2568 splay_tree st = *(splay_tree *) p;
2570 splay_tree_foreach (st, mark_splay_tree_node, NULL);