1 /* Lexical analyzer for C and Objective C.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
40 #include "splay-tree.h"
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
52 #endif /* MULTIBYTE_CHARS */
53 #ifndef GET_ENVIRONMENT
54 #define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
58 extern cpp_reader parse_in
;
60 /* Stream for reading from the input file. */
64 /* Private idea of the line number. See discussion in c_lex(). */
65 static int lex_lineno
;
67 /* We may keep statistics about how long which files took to compile. */
68 static int header_time
, body_time
;
69 static splay_tree file_info_tree
;
71 /* Cause the `yydebug' variable to be defined. */
78 unsigned char *buffer
;
83 static struct putback_buffer putback
= {NULL
, 0, -1};
85 static inline int getch
PARAMS ((void));
90 if (putback
.index
!= -1)
92 int ch
= putback
.buffer
[putback
.index
];
99 static inline void put_back
PARAMS ((int));
107 if (putback
.index
== putback
.buffer_size
- 1)
109 putback
.buffer_size
+= 16;
110 putback
.buffer
= xrealloc (putback
.buffer
, putback
.buffer_size
);
112 putback
.buffer
[++putback
.index
] = ch
;
120 /* File used for outputting assembler code. */
121 extern FILE *asm_out_file
;
123 #undef WCHAR_TYPE_SIZE
124 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
126 /* Number of bytes in a wide character. */
127 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
130 static int maxtoken
; /* Current nominal length of token buffer. */
131 static char *token_buffer
; /* Pointer to token buffer.
132 Actual allocated length is maxtoken + 2. */
135 int indent_level
; /* Number of { minus number of }. */
136 int pending_lang_change
; /* If we need to switch languages - C++ only */
137 int c_header_level
; /* depth in C headers - C++ only */
139 /* Nonzero tells yylex to ignore \ in string constants. */
140 static int ignore_escape_flag
;
142 static const char *readescape
PARAMS ((const char *, const char *,
144 static const char *read_ucs
PARAMS ((const char *, const char *,
145 unsigned int *, int));
146 static void parse_float
PARAMS ((PTR
));
147 static tree lex_number
PARAMS ((const char *, unsigned int));
148 static tree lex_string
PARAMS ((const char *, unsigned int, int));
149 static tree lex_charconst
PARAMS ((const char *, unsigned int, int));
150 static void update_header_times
PARAMS ((const char *));
151 static int dump_one_header
PARAMS ((splay_tree_node
, void *));
152 static int mark_splay_tree_node
PARAMS ((splay_tree_node
, void *));
153 static void mark_splay_tree
PARAMS ((void *));
156 static int skip_white_space
PARAMS ((int));
157 static char *extend_token_buffer
PARAMS ((const char *));
158 static void extend_token_buffer_to
PARAMS ((int));
159 static int read_line_number
PARAMS ((int *));
160 static void process_directive
PARAMS ((void));
162 static void cb_ident
PARAMS ((cpp_reader
*, const unsigned char *,
164 static void cb_enter_file
PARAMS ((cpp_reader
*));
165 static void cb_leave_file
PARAMS ((cpp_reader
*));
166 static void cb_rename_file
PARAMS ((cpp_reader
*));
167 static void cb_def_pragma
PARAMS ((cpp_reader
*));
172 init_c_lex (filename
)
173 const char *filename
;
175 struct c_fileinfo
*toplevel
;
177 /* Set up filename timing. Must happen before cpp_start_read. */
178 file_info_tree
= splay_tree_new ((splay_tree_compare_fn
)strcmp
,
180 (splay_tree_delete_value_fn
)free
);
181 /* Make sure to mark the filenames in the tree for GC. */
182 ggc_add_root (&file_info_tree
, 1, sizeof (file_info_tree
),
184 toplevel
= get_fileinfo (ggc_strdup ("<top level>"));
185 if (flag_detailed_statistics
)
188 body_time
= get_run_time ();
189 toplevel
->time
= body_time
;
192 #ifdef MULTIBYTE_CHARS
193 /* Change to the native locale for multibyte conversions. */
194 setlocale (LC_CTYPE
, "");
195 GET_ENVIRONMENT (literal_codeset
, "LANG");
199 /* Open input file. */
200 if (filename
== 0 || !strcmp (filename
, "-"))
206 finput
= fopen (filename
, "r");
208 pfatal_with_name (filename
);
210 #ifdef IO_BUFFER_SIZE
211 setvbuf (finput
, (char *) xmalloc (IO_BUFFER_SIZE
), _IOFBF
, IO_BUFFER_SIZE
);
213 #else /* !USE_CPPLIB */
215 parse_in
.cb
.ident
= cb_ident
;
216 parse_in
.cb
.enter_file
= cb_enter_file
;
217 parse_in
.cb
.leave_file
= cb_leave_file
;
218 parse_in
.cb
.rename_file
= cb_rename_file
;
219 parse_in
.cb
.def_pragma
= cb_def_pragma
;
221 /* Make sure parse_in.digraphs matches flag_digraphs. */
222 CPP_OPTION (&parse_in
, digraphs
) = flag_digraphs
;
224 if (! cpp_start_read (&parse_in
, 0 /* no printer */, filename
))
227 if (filename
== 0 || !strcmp (filename
, "-"))
233 token_buffer
= (char *) xmalloc (maxtoken
+ 2);
235 /* Start it at 0, because check_newline is called at the very beginning
236 and will increment it to 1. */
237 lineno
= lex_lineno
= 0;
247 struct c_fileinfo
*fi
;
249 n
= splay_tree_lookup (file_info_tree
, (splay_tree_key
) name
);
251 return (struct c_fileinfo
*) n
->value
;
253 fi
= (struct c_fileinfo
*) xmalloc (sizeof (struct c_fileinfo
));
255 fi
->interface_only
= 0;
256 fi
->interface_unknown
= 1;
257 splay_tree_insert (file_info_tree
, (splay_tree_key
) name
,
258 (splay_tree_value
) fi
);
263 update_header_times (name
)
266 /* Changing files again. This means currently collected time
267 is charged against header time, and body time starts back at 0. */
268 if (flag_detailed_statistics
)
270 int this_time
= get_run_time ();
271 struct c_fileinfo
*file
= get_fileinfo (name
);
272 header_time
+= this_time
- body_time
;
273 file
->time
+= this_time
- body_time
;
274 body_time
= this_time
;
279 dump_one_header (n
, dummy
)
281 void *dummy ATTRIBUTE_UNUSED
;
283 print_time ((const char *) n
->key
,
284 ((struct c_fileinfo
*) n
->value
)->time
);
289 dump_time_statistics ()
291 struct c_fileinfo
*file
= get_fileinfo (input_filename
);
292 int this_time
= get_run_time ();
293 file
->time
+= this_time
- body_time
;
295 fprintf (stderr
, "\n******\n");
296 print_time ("header files (total)", header_time
);
297 print_time ("main file (total)", this_time
- body_time
);
298 fprintf (stderr
, "ratio = %g : 1\n",
299 (double)header_time
/ (double)(this_time
- body_time
));
300 fprintf (stderr
, "\n******\n");
302 splay_tree_foreach (file_info_tree
, dump_one_header
, 0);
307 /* If C is not whitespace, return C.
308 Otherwise skip whitespace and return first nonwhite char read. */
318 /* There is no need to process comments or backslash-newline
319 here. None can occur in the output of cpp. Do handle \r
320 in case someone sent us a .i file. */
328 c
= check_newline ();
332 /* Per C99, horizontal whitespace is just these four characters. */
341 error ("stray '\\' in program");
351 /* Skips all of the white space at the current location in the input file. */
354 position_after_white_space ()
360 put_back (skip_white_space (c
));
363 /* Make the token buffer longer, preserving the data in it.
364 P should point to just beyond the last valid character in the old buffer.
365 The value we return is a pointer to the new buffer
366 at a place corresponding to P. */
369 extend_token_buffer_to (size
)
373 maxtoken
= maxtoken
* 2 + 10;
374 while (maxtoken
< size
);
375 token_buffer
= (char *) xrealloc (token_buffer
, maxtoken
+ 2);
379 extend_token_buffer (p
)
382 int offset
= p
- token_buffer
;
383 extend_token_buffer_to (offset
);
384 return token_buffer
+ offset
;
389 read_line_number (num
)
393 enum cpp_ttype token
= c_lex (&value
);
395 if (token
== CPP_NUMBER
&& TREE_CODE (value
) == INTEGER_CST
)
397 *num
= TREE_INT_CST_LOW (value
);
402 if (token
!= CPP_EOF
)
403 error ("invalid #-line");
408 /* At the beginning of a line, increment the line number
409 and process any #-directive on this line.
410 If the line is a #-directive, read the entire line and return a newline.
411 Otherwise, return the line's first non-whitespace character. */
418 /* Loop till we get a nonblank, non-directive line. */
421 /* Read first nonwhite char on the line. */
424 while (c
== ' ' || c
== '\t');
429 process_directive ();
442 enum cpp_ttype token
;
445 enum { act_none
, act_push
, act_pop
} action
;
446 int action_number
, l
;
448 #ifndef NO_IMPLICIT_EXTERN_C
449 int entering_c_header
= 0;
452 /* Don't read beyond this line. */
456 token
= c_lex (&value
);
458 if (token
== CPP_NAME
)
460 /* If a letter follows, then if the word here is `line', skip
461 it and ignore it; otherwise, ignore the line, with an error
462 if the word isn't `pragma'. */
464 const char *name
= IDENTIFIER_POINTER (value
);
466 if (!strcmp (name
, "pragma"))
471 else if (!strcmp (name
, "define"))
473 debug_define (lex_lineno
, GET_DIRECTIVE_LINE ());
476 else if (!strcmp (name
, "undef"))
478 debug_undef (lex_lineno
, GET_DIRECTIVE_LINE ());
481 else if (!strcmp (name
, "line"))
484 token
= c_lex (&value
);
487 else if (!strcmp (name
, "ident"))
489 /* #ident. We expect a string constant here.
490 The pedantic warning and syntax error are now in cpp. */
492 token
= c_lex (&value
);
493 if (token
!= CPP_STRING
|| TREE_CODE (value
) != STRING_CST
)
496 #ifdef ASM_OUTPUT_IDENT
499 ASM_OUTPUT_IDENT (asm_out_file
, TREE_STRING_POINTER (value
));
503 /* Skip the rest of this line. */
507 error ("undefined or invalid # directive `%s'", name
);
511 /* If the # is the only nonwhite char on the line,
512 just ignore it. Check the new newline. */
513 if (token
== CPP_EOF
)
517 /* Here we have either `#line' or `# <nonletter>'.
518 In either case, it should be a line number; a digit should follow. */
520 if (token
!= CPP_NUMBER
|| TREE_CODE (value
) != INTEGER_CST
)
522 error ("invalid #-line");
526 /* subtract one, because it is the following line that
527 gets the specified number */
529 l
= TREE_INT_CST_LOW (value
) - 1;
531 /* More follows: it must be a string constant (filename).
532 It would be neat to use cpplib to quickly process the string, but
533 (1) we don't have a handy tokenization of the string, and
534 (2) I don't know how well that would work in the presense
535 of filenames that contain wide characters. */
539 /* Don't treat \ as special if we are processing #line 1 "...".
540 If you want it to be treated specially, use # 1 "...". */
541 ignore_escape_flag
= 1;
544 /* Read the string constant. */
545 token
= c_lex (&value
);
547 ignore_escape_flag
= 0;
549 if (token
== CPP_EOF
)
551 /* No more: store the line number and check following line. */
556 if (token
!= CPP_STRING
|| TREE_CODE (value
) != STRING_CST
)
558 error ("invalid #line");
562 new_file
= TREE_STRING_POINTER (value
);
564 if (main_input_filename
== 0)
565 main_input_filename
= new_file
;
570 /* Each change of file name
571 reinitializes whether we are now in a system header. */
572 in_system_header
= 0;
574 if (!read_line_number (&action_number
))
576 /* Update the name in the top element of input_file_stack. */
577 if (input_file_stack
)
578 input_file_stack
->name
= input_filename
;
581 /* `1' after file name means entering new file.
582 `2' after file name means just left a file. */
584 if (action_number
== 1)
587 read_line_number (&action_number
);
589 else if (action_number
== 2)
592 read_line_number (&action_number
);
594 if (action_number
== 3)
596 /* `3' after file name means this is a system header file. */
597 in_system_header
= 1;
598 read_line_number (&action_number
);
600 #ifndef NO_IMPLICIT_EXTERN_C
601 if (action_number
== 4)
603 /* `4' after file name means this is a C header file. */
604 entering_c_header
= 1;
605 read_line_number (&action_number
);
609 /* Do the actions implied by the preceding numbers. */
610 if (action
== act_push
)
613 push_srcloc (input_filename
, 1);
614 input_file_stack
->indent_level
= indent_level
;
615 debug_start_source_file (input_filename
);
616 #ifndef NO_IMPLICIT_EXTERN_C
619 else if (entering_c_header
)
622 ++pending_lang_change
;
626 else if (action
== act_pop
)
628 /* Popping out of a file. */
629 if (input_file_stack
->next
)
631 #ifndef NO_IMPLICIT_EXTERN_C
632 if (c_header_level
&& --c_header_level
== 0)
634 if (entering_c_header
)
635 warning ("badly nested C headers from preprocessor");
636 --pending_lang_change
;
640 if (indent_level
!= input_file_stack
->indent_level
)
642 warning_with_file_and_line
643 (input_filename
, lex_lineno
,
644 "This file contains more '%c's than '%c's.",
645 indent_level
> input_file_stack
->indent_level
? '{' : '}',
646 indent_level
> input_file_stack
->indent_level
? '}' : '{');
650 debug_end_source_file (input_file_stack
->line
);
653 error ("#-lines for entering and leaving files don't match");
656 update_header_times (new_file
);
658 input_filename
= new_file
;
662 extract_interface_info ();
664 /* skip the rest of this line. */
668 while (getch () != '\n');
670 #else /* USE_CPPLIB */
672 /* Not yet handled: #pragma, #define, #undef.
673 No need to deal with linemarkers under normal conditions. */
676 cb_ident (pfile
, str
, len
)
677 cpp_reader
*pfile ATTRIBUTE_UNUSED
;
678 const unsigned char *str
;
681 #ifdef ASM_OUTPUT_IDENT
684 /* Convert escapes in the string. */
685 tree value
= lex_string ((const char *)str
, len
, 0);
686 ASM_OUTPUT_IDENT (asm_out_file
, TREE_STRING_POINTER (value
));
692 cb_enter_file (pfile
)
695 cpp_buffer
*ip
= CPP_BUFFER (pfile
);
696 /* Bleah, need a better interface to this. */
697 const char *flags
= cpp_syshdr_flags (pfile
, ip
);
699 /* Mustn't stack the main buffer on the input stack. (Ick.) */
702 lex_lineno
= lineno
= ip
->prev
->lineno
- 1;
703 push_srcloc (ggc_alloc_string (ip
->nominal_fname
, -1), 1);
704 input_file_stack
->indent_level
= indent_level
;
705 debug_start_source_file (ip
->nominal_fname
);
710 update_header_times (ip
->nominal_fname
);
713 extract_interface_info ();
715 in_system_header
= (flags
[0] != 0);
716 #ifndef NO_IMPLICIT_EXTERN_C
719 else if (flags
[2] != 0)
722 ++pending_lang_change
;
728 cb_leave_file (pfile
)
731 /* Bleah, need a better interface to this. */
732 const char *flags
= cpp_syshdr_flags (pfile
, CPP_BUFFER (pfile
));
734 if (indent_level
!= input_file_stack
->indent_level
)
736 warning_with_file_and_line
737 (input_filename
, lex_lineno
,
738 "This file contains more '%c's than '%c's.",
739 indent_level
> input_file_stack
->indent_level
? '{' : '}',
740 indent_level
> input_file_stack
->indent_level
? '}' : '{');
743 /* We get called for the main buffer, but we mustn't pop it. */
744 if (input_file_stack
->next
)
746 in_system_header
= (flags
[0] != 0);
747 #ifndef NO_IMPLICIT_EXTERN_C
748 if (c_header_level
&& --c_header_level
== 0)
751 warning ("badly nested C headers from preprocessor");
752 --pending_lang_change
;
755 lex_lineno
= CPP_BUFFER (pfile
)->lineno
;
756 debug_end_source_file (input_file_stack
->line
);
758 update_header_times (input_file_stack
->name
);
760 extract_interface_info ();
764 cb_rename_file (pfile
)
767 cpp_buffer
*ip
= CPP_BUFFER (pfile
);
768 /* Bleah, need a better interface to this. */
769 const char *flags
= cpp_syshdr_flags (pfile
, ip
);
770 input_filename
= ggc_alloc_string (ip
->nominal_fname
, -1);
771 lex_lineno
= ip
->lineno
;
772 in_system_header
= (flags
[0] != 0);
774 update_header_times (ip
->nominal_fname
);
776 extract_interface_info ();
780 cb_def_pragma (pfile
)
783 /* Issue a warning message if we have been asked to do so. Ignore
784 unknown pragmas in system headers unless an explicit
785 -Wunknown-pragmas has been given. */
786 if (warn_unknown_pragmas
> in_system_header
)
788 const unsigned char *space
, *name
;
789 const cpp_token
*t
= pfile
->first_directive_token
+ 2;
791 space
= t
[0].val
.node
->name
;
792 name
= t
[1].type
== CPP_NAME
? t
[1].val
.node
->name
: 0;
794 warning ("ignoring #pragma %s %s", space
, name
);
796 warning ("ignoring #pragma %s", space
);
799 #endif /* USE_CPPLIB */
801 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.
803 [lex.charset]: The character designated by the universal-character-name
804 \UNNNNNNNN is that character whose character short name in ISO/IEC 10646
805 is NNNNNNNN; the character designated by the universal-character-name
806 \uNNNN is that character whose character short name in ISO/IEC 10646 is
807 0000NNNN. If the hexadecimal value for a universal character name is
808 less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the
809 universal character name designates a character in the basic source
810 character set, then the program is ill-formed.
812 We assume that wchar_t is Unicode, so we don't need to do any
813 mapping. Is this ever wrong? */
816 read_ucs (p
, limit
, cptr
, length
)
822 unsigned int code
= 0;
825 for (; length
; --length
)
829 error ("incomplete universal-character-name");
836 error ("non hex digit '%c' in universal-character-name", c
);
842 if (c
>= 'a' && c
<= 'f')
843 code
+= c
- 'a' + 10;
844 if (c
>= 'A' && c
<= 'F')
845 code
+= c
- 'A' + 10;
846 if (c
>= '0' && c
<= '9')
851 sorry ("universal-character-name on EBCDIC target");
852 *cptr
= 0x3f; /* EBCDIC invalid character */
856 if (code
> 0x9f && !(code
& 0x80000000))
857 /* True extended character, OK. */;
858 else if (code
>= 0x20 && code
< 0x7f)
860 /* ASCII printable character. The C character set consists of all of
861 these except $, @ and `. We use hex escapes so that this also
862 works with EBCDIC hosts. */
863 if (code
!= 0x24 && code
!= 0x40 && code
!= 0x60)
864 error ("universal-character-name used for '%c'", code
);
867 error ("invalid universal-character-name");
873 /* Read an escape sequence and write its character equivalent into *CPTR.
874 P is the input pointer, which is just after the backslash. LIMIT
875 is how much text we have.
876 Returns the updated input pointer. */
879 readescape (p
, limit
, cptr
)
884 unsigned int c
, code
, count
;
885 unsigned firstdig
= 0;
890 /* cpp has already issued an error for this. */
900 if (warn_traditional
&& !in_system_header
)
901 warning ("the meaning of `\\x' varies with -traditional");
903 if (flag_traditional
)
921 if (c
>= 'a' && c
<= 'f')
922 code
+= c
- 'a' + 10;
923 if (c
>= 'A' && c
<= 'F')
924 code
+= c
- 'A' + 10;
925 if (c
>= '0' && c
<= '9')
927 if (code
!= 0 || count
!= 0)
937 warning ("\\x used with no following hex digits");
942 /* Digits are all 0's. Ok. */
944 else if ((count
- 1) * 4 >= TYPE_PRECISION (integer_type_node
)
947 << (TYPE_PRECISION (integer_type_node
)
950 pedwarn ("hex escape out of range");
954 case '0': case '1': case '2': case '3': case '4':
955 case '5': case '6': case '7':
957 for (count
= 0; count
< 3; count
++)
959 if (c
< '0' || c
> '7')
964 code
= (code
* 8) + (c
- '0');
976 case '\\': case '\'': case '"': case '?':
980 case 'n': *cptr
= TARGET_NEWLINE
; return p
;
981 case 't': *cptr
= TARGET_TAB
; return p
;
982 case 'r': *cptr
= TARGET_CR
; return p
;
983 case 'f': *cptr
= TARGET_FF
; return p
;
984 case 'b': *cptr
= TARGET_BS
; return p
;
985 case 'v': *cptr
= TARGET_VT
; return p
;
987 if (warn_traditional
&& !in_system_header
)
988 warning ("the meaning of '\\a' varies with -traditional");
989 *cptr
= flag_traditional
? c
: TARGET_BELL
;
992 /* Warnings and support checks handled by read_ucs(). */
994 if (c_language
!= clk_cplusplus
&& !flag_isoc99
)
997 if (warn_traditional
&& !in_system_header
)
998 warning ("the meaning of '\\%c' varies with -traditional", c
);
1000 return read_ucs (p
, limit
, cptr
, c
== 'u' ? 4 : 8);
1004 pedwarn ("non-ISO-standard escape sequence, '\\%c'", c
);
1005 *cptr
= TARGET_ESC
; return p
;
1007 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1008 '\%' is used to prevent SCCS from getting confused. */
1009 case '(': case '{': case '[': case '%':
1011 pedwarn ("unknown escape sequence '\\%c'", c
);
1017 pedwarn ("unknown escape sequence '\\%c'", c
);
1019 pedwarn ("unknown escape sequence: '\\' followed by char 0x%x", c
);
1026 /* Returns nonzero if C is a universal-character-name. Give an error if it
1027 is not one which may appear in an identifier, as per [extendid].
1029 Note that extended character support in identifiers has not yet been
1030 implemented. It is my personal opinion that this is not a desirable
1031 feature. Portable code cannot count on support for more than the basic
1032 identifier character set. */
1035 is_extended_char (c
)
1038 #ifdef TARGET_EBCDIC
1045 /* None of the valid chars are outside the Basic Multilingual Plane (the
1049 error ("universal-character-name '\\U%08x' not valid in identifier", c
);
1054 if ((c
>= 0x00c0 && c
<= 0x00d6)
1055 || (c
>= 0x00d8 && c
<= 0x00f6)
1056 || (c
>= 0x00f8 && c
<= 0x01f5)
1057 || (c
>= 0x01fa && c
<= 0x0217)
1058 || (c
>= 0x0250 && c
<= 0x02a8)
1059 || (c
>= 0x1e00 && c
<= 0x1e9a)
1060 || (c
>= 0x1ea0 && c
<= 0x1ef9))
1065 || (c
>= 0x0388 && c
<= 0x038a)
1067 || (c
>= 0x038e && c
<= 0x03a1)
1068 || (c
>= 0x03a3 && c
<= 0x03ce)
1069 || (c
>= 0x03d0 && c
<= 0x03d6)
1074 || (c
>= 0x03e2 && c
<= 0x03f3)
1075 || (c
>= 0x1f00 && c
<= 0x1f15)
1076 || (c
>= 0x1f18 && c
<= 0x1f1d)
1077 || (c
>= 0x1f20 && c
<= 0x1f45)
1078 || (c
>= 0x1f48 && c
<= 0x1f4d)
1079 || (c
>= 0x1f50 && c
<= 0x1f57)
1083 || (c
>= 0x1f5f && c
<= 0x1f7d)
1084 || (c
>= 0x1f80 && c
<= 0x1fb4)
1085 || (c
>= 0x1fb6 && c
<= 0x1fbc)
1086 || (c
>= 0x1fc2 && c
<= 0x1fc4)
1087 || (c
>= 0x1fc6 && c
<= 0x1fcc)
1088 || (c
>= 0x1fd0 && c
<= 0x1fd3)
1089 || (c
>= 0x1fd6 && c
<= 0x1fdb)
1090 || (c
>= 0x1fe0 && c
<= 0x1fec)
1091 || (c
>= 0x1ff2 && c
<= 0x1ff4)
1092 || (c
>= 0x1ff6 && c
<= 0x1ffc))
1096 if ((c
>= 0x0401 && c
<= 0x040d)
1097 || (c
>= 0x040f && c
<= 0x044f)
1098 || (c
>= 0x0451 && c
<= 0x045c)
1099 || (c
>= 0x045e && c
<= 0x0481)
1100 || (c
>= 0x0490 && c
<= 0x04c4)
1101 || (c
>= 0x04c7 && c
<= 0x04c8)
1102 || (c
>= 0x04cb && c
<= 0x04cc)
1103 || (c
>= 0x04d0 && c
<= 0x04eb)
1104 || (c
>= 0x04ee && c
<= 0x04f5)
1105 || (c
>= 0x04f8 && c
<= 0x04f9))
1109 if ((c
>= 0x0531 && c
<= 0x0556)
1110 || (c
>= 0x0561 && c
<= 0x0587))
1114 if ((c
>= 0x05d0 && c
<= 0x05ea)
1115 || (c
>= 0x05f0 && c
<= 0x05f4))
1119 if ((c
>= 0x0621 && c
<= 0x063a)
1120 || (c
>= 0x0640 && c
<= 0x0652)
1121 || (c
>= 0x0670 && c
<= 0x06b7)
1122 || (c
>= 0x06ba && c
<= 0x06be)
1123 || (c
>= 0x06c0 && c
<= 0x06ce)
1124 || (c
>= 0x06e5 && c
<= 0x06e7))
1128 if ((c
>= 0x0905 && c
<= 0x0939)
1129 || (c
>= 0x0958 && c
<= 0x0962))
1133 if ((c
>= 0x0985 && c
<= 0x098c)
1134 || (c
>= 0x098f && c
<= 0x0990)
1135 || (c
>= 0x0993 && c
<= 0x09a8)
1136 || (c
>= 0x09aa && c
<= 0x09b0)
1138 || (c
>= 0x09b6 && c
<= 0x09b9)
1139 || (c
>= 0x09dc && c
<= 0x09dd)
1140 || (c
>= 0x09df && c
<= 0x09e1)
1141 || (c
>= 0x09f0 && c
<= 0x09f1))
1145 if ((c
>= 0x0a05 && c
<= 0x0a0a)
1146 || (c
>= 0x0a0f && c
<= 0x0a10)
1147 || (c
>= 0x0a13 && c
<= 0x0a28)
1148 || (c
>= 0x0a2a && c
<= 0x0a30)
1149 || (c
>= 0x0a32 && c
<= 0x0a33)
1150 || (c
>= 0x0a35 && c
<= 0x0a36)
1151 || (c
>= 0x0a38 && c
<= 0x0a39)
1152 || (c
>= 0x0a59 && c
<= 0x0a5c)
1157 if ((c
>= 0x0a85 && c
<= 0x0a8b)
1159 || (c
>= 0x0a8f && c
<= 0x0a91)
1160 || (c
>= 0x0a93 && c
<= 0x0aa8)
1161 || (c
>= 0x0aaa && c
<= 0x0ab0)
1162 || (c
>= 0x0ab2 && c
<= 0x0ab3)
1163 || (c
>= 0x0ab5 && c
<= 0x0ab9)
1168 if ((c
>= 0x0b05 && c
<= 0x0b0c)
1169 || (c
>= 0x0b0f && c
<= 0x0b10)
1170 || (c
>= 0x0b13 && c
<= 0x0b28)
1171 || (c
>= 0x0b2a && c
<= 0x0b30)
1172 || (c
>= 0x0b32 && c
<= 0x0b33)
1173 || (c
>= 0x0b36 && c
<= 0x0b39)
1174 || (c
>= 0x0b5c && c
<= 0x0b5d)
1175 || (c
>= 0x0b5f && c
<= 0x0b61))
1179 if ((c
>= 0x0b85 && c
<= 0x0b8a)
1180 || (c
>= 0x0b8e && c
<= 0x0b90)
1181 || (c
>= 0x0b92 && c
<= 0x0b95)
1182 || (c
>= 0x0b99 && c
<= 0x0b9a)
1184 || (c
>= 0x0b9e && c
<= 0x0b9f)
1185 || (c
>= 0x0ba3 && c
<= 0x0ba4)
1186 || (c
>= 0x0ba8 && c
<= 0x0baa)
1187 || (c
>= 0x0bae && c
<= 0x0bb5)
1188 || (c
>= 0x0bb7 && c
<= 0x0bb9))
1192 if ((c
>= 0x0c05 && c
<= 0x0c0c)
1193 || (c
>= 0x0c0e && c
<= 0x0c10)
1194 || (c
>= 0x0c12 && c
<= 0x0c28)
1195 || (c
>= 0x0c2a && c
<= 0x0c33)
1196 || (c
>= 0x0c35 && c
<= 0x0c39)
1197 || (c
>= 0x0c60 && c
<= 0x0c61))
1201 if ((c
>= 0x0c85 && c
<= 0x0c8c)
1202 || (c
>= 0x0c8e && c
<= 0x0c90)
1203 || (c
>= 0x0c92 && c
<= 0x0ca8)
1204 || (c
>= 0x0caa && c
<= 0x0cb3)
1205 || (c
>= 0x0cb5 && c
<= 0x0cb9)
1206 || (c
>= 0x0ce0 && c
<= 0x0ce1))
1210 if ((c
>= 0x0d05 && c
<= 0x0d0c)
1211 || (c
>= 0x0d0e && c
<= 0x0d10)
1212 || (c
>= 0x0d12 && c
<= 0x0d28)
1213 || (c
>= 0x0d2a && c
<= 0x0d39)
1214 || (c
>= 0x0d60 && c
<= 0x0d61))
1218 if ((c
>= 0x0e01 && c
<= 0x0e30)
1219 || (c
>= 0x0e32 && c
<= 0x0e33)
1220 || (c
>= 0x0e40 && c
<= 0x0e46)
1221 || (c
>= 0x0e4f && c
<= 0x0e5b))
1225 if ((c
>= 0x0e81 && c
<= 0x0e82)
1231 || (c
>= 0x0e94 && c
<= 0x0e97)
1232 || (c
>= 0x0e99 && c
<= 0x0e9f)
1233 || (c
>= 0x0ea1 && c
<= 0x0ea3)
1238 || (c
>= 0x0ead && c
<= 0x0eb0)
1242 || (c
>= 0x0ec0 && c
<= 0x0ec4)
1247 if ((c
>= 0x10a0 && c
<= 0x10c5)
1248 || (c
>= 0x10d0 && c
<= 0x10f6))
1252 if ((c
>= 0x3041 && c
<= 0x3094)
1253 || (c
>= 0x309b && c
<= 0x309e))
1257 if ((c
>= 0x30a1 && c
<= 0x30fe))
1261 if ((c
>= 0x3105 && c
<= 0x312c))
1265 if ((c
>= 0x1100 && c
<= 0x1159)
1266 || (c
>= 0x1161 && c
<= 0x11a2)
1267 || (c
>= 0x11a8 && c
<= 0x11f9))
1270 /* CJK Unified Ideographs */
1271 if ((c
>= 0xf900 && c
<= 0xfa2d)
1272 || (c
>= 0xfb1f && c
<= 0xfb36)
1273 || (c
>= 0xfb38 && c
<= 0xfb3c)
1275 || (c
>= 0xfb40 && c
<= 0xfb41)
1276 || (c
>= 0xfb42 && c
<= 0xfb44)
1277 || (c
>= 0xfb46 && c
<= 0xfbb1)
1278 || (c
>= 0xfbd3 && c
<= 0xfd3f)
1279 || (c
>= 0xfd50 && c
<= 0xfd8f)
1280 || (c
>= 0xfd92 && c
<= 0xfdc7)
1281 || (c
>= 0xfdf0 && c
<= 0xfdfb)
1282 || (c
>= 0xfe70 && c
<= 0xfe72)
1284 || (c
>= 0xfe76 && c
<= 0xfefc)
1285 || (c
>= 0xff21 && c
<= 0xff3a)
1286 || (c
>= 0xff41 && c
<= 0xff5a)
1287 || (c
>= 0xff66 && c
<= 0xffbe)
1288 || (c
>= 0xffc2 && c
<= 0xffc7)
1289 || (c
>= 0xffca && c
<= 0xffcf)
1290 || (c
>= 0xffd2 && c
<= 0xffd7)
1291 || (c
>= 0xffda && c
<= 0xffdc)
1292 || (c
>= 0x4e00 && c
<= 0x9fa5))
1295 error ("universal-character-name '\\u%04x' not valid in identifier", c
);
1300 /* Add the UTF-8 representation of C to the token_buffer. */
1303 utf8_extend_token (c
)
1308 if (c
<= 0x0000007f)
1313 else if (c
<= 0x000007ff)
1314 shift
= 6, mask
= 0xc0;
1315 else if (c
<= 0x0000ffff)
1316 shift
= 12, mask
= 0xe0;
1317 else if (c
<= 0x001fffff)
1318 shift
= 18, mask
= 0xf0;
1319 else if (c
<= 0x03ffffff)
1320 shift
= 24, mask
= 0xf8;
1322 shift
= 30, mask
= 0xfc;
1324 extend_token (mask
| (c
>> shift
));
1328 extend_token ((unsigned char) (0x80 | (c
>> shift
)));
1340 char long_long_flag
;
1343 struct try_type type_sequence
[] =
1345 { &integer_type_node
, 0, 0, 0},
1346 { &unsigned_type_node
, 1, 0, 0},
1347 { &long_integer_type_node
, 0, 1, 0},
1348 { &long_unsigned_type_node
, 1, 1, 0},
1349 { &long_long_integer_type_node
, 0, 1, 1},
1350 { &long_long_unsigned_type_node
, 1, 1, 1}
1362 int conversion_errno
;
1363 REAL_VALUE_TYPE value
;
1371 struct pf_args
* args
= (struct pf_args
*) data
;
1372 const char *typename
;
1374 args
->conversion_errno
= 0;
1375 args
->type
= double_type_node
;
1376 typename
= "double";
1378 /* The second argument, machine_mode, of REAL_VALUE_ATOF
1379 tells the desired precision of the binary result
1380 of decimal-to-binary conversion. */
1385 error ("both 'f' and 'l' suffixes on floating constant");
1387 args
->type
= float_type_node
;
1390 else if (args
->lflag
)
1392 args
->type
= long_double_type_node
;
1393 typename
= "long double";
1395 else if (flag_single_precision_constant
)
1397 args
->type
= float_type_node
;
1402 if (args
->base
== 16)
1403 args
->value
= REAL_VALUE_HTOF (args
->str
, TYPE_MODE (args
->type
));
1405 args
->value
= REAL_VALUE_ATOF (args
->str
, TYPE_MODE (args
->type
));
1407 args
->conversion_errno
= errno
;
1408 /* A diagnostic is required here by some ISO C testsuites.
1409 This is not pedwarn, because some people don't want
1410 an error for this. */
1411 if (REAL_VALUE_ISINF (args
->value
) && pedantic
)
1412 warning ("floating point number exceeds range of '%s'", typename
);
1420 const cpp_token
*tok
;
1421 enum cpp_ttype type
;
1424 timevar_push (TV_CPP
);
1425 tok
= cpp_get_token (&parse_in
);
1426 timevar_pop (TV_CPP
);
1428 /* The C++ front end does horrible things with the current line
1429 number. To ensure an accurate line number, we must reset it
1430 every time we return a token. If we reset it from tok->line
1431 every time, we'll get line numbers inside macros referring to the
1432 macro definition; this is nice, but we don't want to change the
1433 behavior until integrated mode is the only option. So we keep our
1434 own idea of the line number, and reset it from tok->line at each
1435 new line (which never happens inside a macro). */
1436 if (tok
->flags
& BOL
)
1437 lex_lineno
= tok
->line
;
1440 lineno
= lex_lineno
;
1444 case CPP_OPEN_BRACE
: indent_level
++; break;
1445 case CPP_CLOSE_BRACE
: indent_level
--; break;
1447 /* Issue this error here, where we can get at tok->val.aux. */
1449 if (ISGRAPH (tok
->val
.aux
))
1450 error ("stray '%c' in program", tok
->val
.aux
);
1452 error ("stray '\\%#o' in program", tok
->val
.aux
);
1458 *value
= get_identifier ((const char *)tok
->val
.node
->name
);
1464 *value
= lex_number ((const char *)tok
->val
.str
.text
, tok
->val
.str
.len
);
1469 *value
= lex_charconst ((const char *)tok
->val
.str
.text
,
1470 tok
->val
.str
.len
, tok
->type
== CPP_WCHAR
);
1476 *value
= lex_string ((const char *)tok
->val
.str
.text
,
1477 tok
->val
.str
.len
, tok
->type
== CPP_WSTRING
);
1480 /* These tokens should not be visible outside cpplib. */
1481 case CPP_HEADER_NAME
:
1484 case CPP_PLACEMARKER
:
1504 /* Effectively do c = skip_white_space (c)
1505 but do it faster in the usual cases. */
1518 c
= skip_white_space (c
);
1520 goto found_nonwhite
;
1524 lineno
= lex_lineno
;
1532 /* Capital L may start a wide-string or wide-character constant. */
1534 register int c1
= getch();
1543 goto string_constant
;
1550 if (!doing_objc_thang
)
1554 /* '@' may start a constant string object. */
1555 register int c1
= getch ();
1559 goto string_constant
;
1562 /* Fall through to treat '@' as the start of an identifier. */
1565 case 'A': case 'B': case 'C': case 'D': case 'E':
1566 case 'F': case 'G': case 'H': case 'I': case 'J':
1567 case 'K': case 'M': case 'N': case 'O':
1568 case 'P': case 'Q': case 'R': case 'S': case 'T':
1569 case 'U': case 'V': case 'W': case 'X': case 'Y':
1571 case 'a': case 'b': case 'c': case 'd': case 'e':
1572 case 'f': case 'g': case 'h': case 'i': case 'j':
1573 case 'k': case 'l': case 'm': case 'n': case 'o':
1574 case 'p': case 'q': case 'r': case 's': case 't':
1575 case 'u': case 'v': case 'w': case 'x': case 'y':
1581 while (ISALNUM (c
) || c
== '_' || c
== '$' || c
== '@')
1583 /* Make sure this char really belongs in an identifier. */
1586 if (! dollars_in_ident
)
1587 error ("'$' in identifier");
1589 pedwarn ("'$' in identifier");
1592 if (p
>= token_buffer
+ maxtoken
)
1593 p
= extend_token_buffer (p
);
1601 if (p
>= token_buffer
+ maxtoken
)
1602 p
= extend_token_buffer (p
);
1605 *value
= get_identifier (token_buffer
);
1610 /* It's hard to preserve tokenization on '.' because
1611 it could be a symbol by itself, or it could be the
1612 start of a floating point number and cpp won't tell us. */
1618 return CPP_ELLIPSIS
;
1621 error ("parse error at '..'");
1623 else if (c1
== '*' && c_language
== clk_cplusplus
)
1624 return CPP_DOT_STAR
;
1632 case '0': case '1': case '2': case '3': case '4':
1633 case '5': case '6': case '7': case '8': case '9':
1636 /* Scan the next preprocessing number. All C numeric constants
1637 are preprocessing numbers, but not all preprocessing numbers
1638 are valid numeric constants. Preprocessing numbers fit the
1639 regular expression \.?[0-9]([0-9a-zA-Z_.]|[eEpP][+-])*
1640 See C99 section 6.4.8. */
1643 if (p
>= token_buffer
+ maxtoken
)
1644 p
= extend_token_buffer (p
);
1649 if (c
== '+' || c
== '-')
1652 if (d
== 'e' || d
== 'E' || d
== 'p' || d
== 'P')
1655 if (ISALNUM (c
) || c
== '_' || c
== '.')
1661 *value
= lex_number (token_buffer
, p
- token_buffer
);
1671 int delimiter
= charconst
? '\'' : '"';
1672 #ifdef MULTIBYTE_CHARS
1673 int longest_char
= local_mb_cur_max ();
1674 (void) local_mbtowc (NULL_PTR
, NULL_PTR
, 0);
1677 p
= token_buffer
+ 1;
1679 while (c
!= delimiter
&& c
!= EOF
)
1681 if (p
+ 2 > token_buffer
+ maxtoken
)
1682 p
= extend_token_buffer (p
);
1684 /* ignore_escape_flag is set for reading the filename in #line. */
1685 if (!ignore_escape_flag
&& c
== '\\')
1688 *p
++ = getch (); /* escaped character */
1694 #ifdef MULTIBYTE_CHARS
1697 for (i
= 0; i
< longest_char
; ++i
)
1699 if (p
+ i
>= token_buffer
+ maxtoken
)
1700 p
= extend_token_buffer (p
);
1703 char_len
= local_mblen (p
, i
+ 1);
1710 /* Replace all except the first byte. */
1712 for (--i
; i
> 0; --i
)
1716 /* mbtowc sometimes needs an extra char before accepting */
1717 else if (char_len
<= i
)
1731 *value
= lex_charconst (token_buffer
+ 1, p
- (token_buffer
+ 1),
1733 return wide_flag
? CPP_WCHAR
: CPP_CHAR
;
1737 *value
= lex_string (token_buffer
+ 1, p
- (token_buffer
+ 1),
1739 return wide_flag
? CPP_WSTRING
: objc_flag
? CPP_OSTRING
: CPP_STRING
;
1757 enum cpp_ttype type
= CPP_EOF
;
1761 case '+': type
= CPP_PLUS
; break;
1762 case '-': type
= CPP_MINUS
; break;
1763 case '&': type
= CPP_AND
; break;
1764 case '|': type
= CPP_OR
; break;
1765 case ':': type
= CPP_COLON
; break;
1766 case '<': type
= CPP_LESS
; break;
1767 case '>': type
= CPP_GREATER
; break;
1768 case '*': type
= CPP_MULT
; break;
1769 case '/': type
= CPP_DIV
; break;
1770 case '%': type
= CPP_MOD
; break;
1771 case '^': type
= CPP_XOR
; break;
1772 case '!': type
= CPP_NOT
; break;
1773 case '=': type
= CPP_EQ
; break;
1778 if (c1
== '=' && type
< CPP_LAST_EQ
)
1779 return type
+ (CPP_EQ_EQ
- CPP_EQ
);
1783 case '+': return CPP_PLUS_PLUS
;
1784 case '-': return CPP_MINUS_MINUS
;
1785 case '&': return CPP_AND_AND
;
1786 case '|': return CPP_OR_OR
;
1788 if (c_language
== clk_cplusplus
)
1792 case '<': type
= CPP_LSHIFT
; goto do_triad
;
1793 case '>': type
= CPP_RSHIFT
; goto do_triad
;
1801 if (c_language
== clk_cplusplus
)
1805 return CPP_DEREF_STAR
;
1813 if (c1
== '?' && c_language
== clk_cplusplus
)
1814 { type
= CPP_MAX
; goto do_triad
; }
1818 if (c1
== ':' && flag_digraphs
)
1819 return CPP_OPEN_SQUARE
;
1820 if (c1
== '%' && flag_digraphs
)
1821 { indent_level
++; return CPP_OPEN_BRACE
; }
1822 if (c1
== '?' && c_language
== clk_cplusplus
)
1823 { type
= CPP_MIN
; goto do_triad
; }
1827 if (c1
== '>' && flag_digraphs
)
1828 return CPP_CLOSE_SQUARE
;
1831 if (c1
== '>' && flag_digraphs
)
1832 { indent_level
--; return CPP_CLOSE_BRACE
; }
1842 type
+= (CPP_EQ_EQ
- CPP_EQ
);
1848 case '~': return CPP_COMPL
;
1849 case '?': return CPP_QUERY
;
1850 case ',': return CPP_COMMA
;
1851 case '(': return CPP_OPEN_PAREN
;
1852 case ')': return CPP_CLOSE_PAREN
;
1853 case '[': return CPP_OPEN_SQUARE
;
1854 case ']': return CPP_CLOSE_SQUARE
;
1855 case '{': indent_level
++; return CPP_OPEN_BRACE
;
1856 case '}': indent_level
--; return CPP_CLOSE_BRACE
;
1857 case ';': return CPP_SEMICOLON
;
1862 error ("stray '%c' in program", c
);
1864 error ("stray '\\%#o' in program", c
);
1872 #define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
1875 lex_number (str
, len
)
1881 int largest_digit
= 0;
1887 enum anon1
{ NOT_FLOAT
= 0, AFTER_POINT
, AFTER_EXPON
} floatflag
= NOT_FLOAT
;
1889 /* We actually store only HOST_BITS_PER_CHAR bits in each part.
1890 The code below which fills the parts array assumes that a host
1891 int is at least twice as wide as a host char, and that
1892 HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
1893 Two HOST_WIDE_INTs is the largest int literal we can store.
1894 In order to detect overflow below, the number of parts (TOTAL_PARTS)
1895 must be exactly the number of parts needed to hold the bits
1896 of two HOST_WIDE_INTs. */
1897 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
1898 unsigned int parts
[TOTAL_PARTS
];
1900 /* Optimize for most frequent case. */
1904 return integer_zero_node
;
1905 else if (*str
== '1')
1906 return integer_one_node
;
1908 return build_int_2 (*str
- '0', 0);
1911 for (count
= 0; count
< TOTAL_PARTS
; count
++)
1914 /* len is known to be >1 at this point. */
1917 if (len
> 2 && str
[0] == '0' && (str
[1] == 'x' || str
[1] == 'X'))
1922 /* The ISDIGIT check is so we are not confused by a suffix on 0. */
1923 else if (str
[0] == '0' && ISDIGIT (str
[1]))
1935 if (base
== 16 && pedantic
&& !flag_isoc99
)
1936 pedwarn ("floating constant may not be in radix 16");
1937 else if (floatflag
== AFTER_POINT
)
1938 ERROR ("too many decimal points in floating constant");
1939 else if (floatflag
== AFTER_EXPON
)
1940 ERROR ("decimal point in exponent - impossible!");
1942 floatflag
= AFTER_POINT
;
1948 /* Possible future extension: silently ignore _ in numbers,
1949 permitting cosmetic grouping - e.g. 0x8000_0000 == 0x80000000
1950 but somewhat easier to read. Ada has this? */
1951 ERROR ("underscore in number");
1955 /* It is not a decimal point.
1956 It should be a digit (perhaps a hex digit). */
1962 else if (base
<= 10 && (c
== 'e' || c
== 'E'))
1965 floatflag
= AFTER_EXPON
;
1968 else if (base
== 16 && (c
== 'p' || c
== 'P'))
1970 floatflag
= AFTER_EXPON
;
1971 break; /* start of exponent */
1973 else if (base
== 16 && c
>= 'a' && c
<= 'f')
1977 else if (base
== 16 && c
>= 'A' && c
<= 'F')
1984 break; /* start of suffix */
1987 if (n
>= largest_digit
)
1991 for (count
= 0; count
< TOTAL_PARTS
; count
++)
1993 parts
[count
] *= base
;
1997 += (parts
[count
-1] >> HOST_BITS_PER_CHAR
);
1999 &= (1 << HOST_BITS_PER_CHAR
) - 1;
2005 /* If the highest-order part overflows (gets larger than
2006 a host char will hold) then the whole number has
2007 overflowed. Record this and truncate the highest-order
2009 if (parts
[TOTAL_PARTS
- 1] >> HOST_BITS_PER_CHAR
)
2012 parts
[TOTAL_PARTS
- 1] &= (1 << HOST_BITS_PER_CHAR
) - 1;
2016 while (p
< str
+ len
);
2018 /* This can happen on input like `int i = 0x;' */
2020 ERROR ("numeric constant with no digits");
2022 if (largest_digit
>= base
)
2023 ERROR ("numeric constant contains digits beyond the radix");
2025 if (floatflag
!= NOT_FLOAT
)
2028 int imag
, fflag
, lflag
, conversion_errno
;
2029 REAL_VALUE_TYPE real
;
2030 struct pf_args args
;
2033 if (base
== 16 && floatflag
!= AFTER_EXPON
)
2034 ERROR ("hexadecimal floating constant has no exponent");
2036 /* Read explicit exponent if any, and put it in tokenbuf. */
2037 if ((base
== 10 && ((c
== 'e') || (c
== 'E')))
2038 || (base
== 16 && (c
== 'p' || c
== 'P')))
2042 if (p
< str
+ len
&& (c
== '+' || c
== '-'))
2044 /* Exponent is decimal, even if string is a hex float. */
2046 ERROR ("floating constant exponent has no digits");
2047 while (p
< str
+ len
&& ISDIGIT (c
))
2053 /* Copy the float constant now; we don't want any suffixes in the
2054 string passed to parse_float. */
2055 copy
= alloca (p
- str
+ 1);
2056 memcpy (copy
, str
, p
- str
);
2057 copy
[p
- str
] = '\0';
2059 /* Now parse suffixes. */
2060 fflag
= lflag
= imag
= 0;
2061 while (p
< str
+ len
)
2066 ERROR ("more than one 'f' suffix on floating constant");
2067 else if (warn_traditional
&& !in_system_header
)
2068 warning ("traditional C rejects the 'f' suffix");
2075 ERROR ("more than one 'l' suffix on floating constant");
2076 else if (warn_traditional
&& !in_system_header
)
2077 warning ("traditional C rejects the 'l' suffix");
2085 ERROR ("more than one 'i' or 'j' suffix on floating constant");
2087 pedwarn ("ISO C forbids imaginary numeric constants");
2092 ERROR ("invalid suffix on floating constant");
2095 /* Setup input for parse_float() */
2101 /* Convert string to a double, checking for overflow. */
2102 if (do_float_handler (parse_float
, (PTR
) &args
))
2104 /* Receive output from parse_float() */
2108 /* We got an exception from parse_float() */
2109 ERROR ("floating constant out of range");
2111 /* Receive output from parse_float() */
2112 conversion_errno
= args
.conversion_errno
;
2116 /* ERANGE is also reported for underflow,
2117 so test the value to distinguish overflow from that. */
2118 if (conversion_errno
== ERANGE
&& !flag_traditional
&& pedantic
2119 && (REAL_VALUES_LESS (dconst1
, real
)
2120 || REAL_VALUES_LESS (real
, dconstm1
)))
2121 warning ("floating point number exceeds range of 'double'");
2124 /* Create a node with determined type and value. */
2126 value
= build_complex (NULL_TREE
, convert (type
, integer_zero_node
),
2127 build_real (type
, real
));
2129 value
= build_real (type
, real
);
2133 tree trad_type
, ansi_type
, type
;
2134 HOST_WIDE_INT high
, low
;
2135 int spec_unsigned
= 0;
2137 int spec_long_long
= 0;
2142 trad_type
= ansi_type
= type
= NULL_TREE
;
2143 while (p
< str
+ len
)
2150 error ("two 'u' suffixes on integer constant");
2151 else if (warn_traditional
&& !in_system_header
)
2152 warning ("traditional C rejects the 'u' suffix");
2163 error ("three 'l' suffixes on integer constant");
2165 error ("'lul' is not a valid integer suffix");
2166 else if (c
!= spec_long
)
2167 error ("'Ll' and 'lL' are not valid integer suffixes");
2168 else if (pedantic
&& ! flag_isoc99
2169 && ! in_system_header
&& warn_long_long
)
2170 pedwarn ("ISO C89 forbids long long integer constants");
2176 case 'i': case 'I': case 'j': case 'J':
2178 error ("more than one 'i' or 'j' suffix on integer constant");
2180 pedwarn ("ISO C forbids imaginary numeric constants");
2185 ERROR ("invalid suffix on integer constant");
2189 /* If the literal overflowed, pedwarn about it now. */
2193 pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT
* 2);
2196 /* This is simplified by the fact that our constant
2197 is always positive. */
2201 for (i
= 0; i
< HOST_BITS_PER_WIDE_INT
/ HOST_BITS_PER_CHAR
; i
++)
2203 high
|= ((HOST_WIDE_INT
) parts
[i
+ (HOST_BITS_PER_WIDE_INT
2204 / HOST_BITS_PER_CHAR
)]
2205 << (i
* HOST_BITS_PER_CHAR
));
2206 low
|= (HOST_WIDE_INT
) parts
[i
] << (i
* HOST_BITS_PER_CHAR
);
2209 value
= build_int_2 (low
, high
);
2210 TREE_TYPE (value
) = long_long_unsigned_type_node
;
2212 /* If warn_traditional, calculate both the ISO type and the
2213 traditional type, then see if they disagree.
2214 Otherwise, calculate only the type for the dialect in use. */
2215 if (warn_traditional
|| flag_traditional
)
2217 /* Calculate the traditional type. */
2218 /* Traditionally, any constant is signed; but if unsigned is
2219 specified explicitly, obey that. Use the smallest size
2220 with the right number of bits, except for one special
2221 case with decimal constants. */
2222 if (! spec_long
&& base
!= 10
2223 && int_fits_type_p (value
, unsigned_type_node
))
2224 trad_type
= spec_unsigned
? unsigned_type_node
: integer_type_node
;
2225 /* A decimal constant must be long if it does not fit in
2226 type int. I think this is independent of whether the
2227 constant is signed. */
2228 else if (! spec_long
&& base
== 10
2229 && int_fits_type_p (value
, integer_type_node
))
2230 trad_type
= spec_unsigned
? unsigned_type_node
: integer_type_node
;
2231 else if (! spec_long_long
)
2232 trad_type
= (spec_unsigned
2233 ? long_unsigned_type_node
2234 : long_integer_type_node
);
2235 else if (int_fits_type_p (value
,
2237 ? long_long_unsigned_type_node
2238 : long_long_integer_type_node
))
2239 trad_type
= (spec_unsigned
2240 ? long_long_unsigned_type_node
2241 : long_long_integer_type_node
);
2243 trad_type
= (spec_unsigned
2244 ? widest_unsigned_literal_type_node
2245 : widest_integer_literal_type_node
);
2247 if (warn_traditional
|| ! flag_traditional
)
2249 /* Calculate the ISO type. */
2250 if (! spec_long
&& ! spec_unsigned
2251 && int_fits_type_p (value
, integer_type_node
))
2252 ansi_type
= integer_type_node
;
2253 else if (! spec_long
&& (base
!= 10 || spec_unsigned
)
2254 && int_fits_type_p (value
, unsigned_type_node
))
2255 ansi_type
= unsigned_type_node
;
2256 else if (! spec_unsigned
&& !spec_long_long
2257 && int_fits_type_p (value
, long_integer_type_node
))
2258 ansi_type
= long_integer_type_node
;
2259 else if (! spec_long_long
2260 && int_fits_type_p (value
, long_unsigned_type_node
))
2261 ansi_type
= long_unsigned_type_node
;
2262 else if (! spec_unsigned
2263 && int_fits_type_p (value
, long_long_integer_type_node
))
2264 ansi_type
= long_long_integer_type_node
;
2265 else if (int_fits_type_p (value
, long_long_unsigned_type_node
))
2266 ansi_type
= long_long_unsigned_type_node
;
2267 else if (! spec_unsigned
2268 && int_fits_type_p (value
, widest_integer_literal_type_node
))
2269 ansi_type
= widest_integer_literal_type_node
;
2271 ansi_type
= widest_unsigned_literal_type_node
;
2274 type
= flag_traditional
? trad_type
: ansi_type
;
2276 /* We assume that constants specified in a non-decimal
2277 base are bit patterns, and that the programmer really
2278 meant what they wrote. */
2279 if (warn_traditional
&& !in_system_header
2280 && base
== 10 && trad_type
!= ansi_type
)
2282 if (TYPE_PRECISION (trad_type
) != TYPE_PRECISION (ansi_type
))
2283 warning ("width of integer constant changes with -traditional");
2284 else if (TREE_UNSIGNED (trad_type
) != TREE_UNSIGNED (ansi_type
))
2285 warning ("integer constant is unsigned in ISO C, signed with -traditional");
2287 warning ("width of integer constant may change on other systems with -traditional");
2290 if (pedantic
&& !flag_traditional
&& (flag_isoc99
|| !spec_long_long
)
2293 ? TYPE_PRECISION (long_long_integer_type_node
)
2294 : TYPE_PRECISION (long_integer_type_node
)) < TYPE_PRECISION (type
)))
2297 pedwarn ("integer constant larger than the maximum value of %s",
2299 ? (TREE_UNSIGNED (type
)
2300 ? "an unsigned long long int"
2301 : "a long long int")
2302 : "an unsigned long int"));
2305 if (base
== 10 && ! spec_unsigned
&& TREE_UNSIGNED (type
))
2306 warning ("decimal constant is so large that it is unsigned");
2310 if (TYPE_PRECISION (type
)
2311 <= TYPE_PRECISION (integer_type_node
))
2312 value
= build_complex (NULL_TREE
, integer_zero_node
,
2313 convert (integer_type_node
, value
));
2315 ERROR ("complex integer constant is too wide for 'complex int'");
2317 else if (flag_traditional
&& !int_fits_type_p (value
, type
))
2318 /* The traditional constant 0x80000000 is signed
2319 but doesn't fit in the range of int.
2320 This will change it to -0x80000000, which does fit. */
2322 TREE_TYPE (value
) = unsigned_type (type
);
2323 value
= convert (type
, value
);
2324 TREE_OVERFLOW (value
) = TREE_CONSTANT_OVERFLOW (value
) = 0;
2327 TREE_TYPE (value
) = type
;
2329 /* If it's still an integer (not a complex), and it doesn't
2330 fit in the type we choose for it, then pedwarn. */
2333 && TREE_CODE (TREE_TYPE (value
)) == INTEGER_TYPE
2334 && ! int_fits_type_p (value
, TREE_TYPE (value
)))
2335 pedwarn ("integer constant is larger than the maximum value for its type");
2339 error ("missing white space after number '%.*s'", (int) (p
- str
), str
);
2344 return integer_zero_node
;
2348 lex_string (str
, len
, wide
)
2354 char *buf
= alloca ((len
+ 1) * (wide
? WCHAR_BYTES
: 1));
2356 const char *p
= str
, *limit
= str
+ len
;
2358 unsigned width
= wide
? WCHAR_TYPE_SIZE
2359 : TYPE_PRECISION (char_type_node
);
2361 #ifdef MULTIBYTE_CHARS
2362 /* Reset multibyte conversion state. */
2363 (void) local_mbtowc (NULL_PTR
, NULL_PTR
, 0);
2368 #ifdef MULTIBYTE_CHARS
2372 char_len
= local_mbtowc (&wc
, p
, limit
- p
);
2375 warning ("Ignoring invalid multibyte character");
2388 if (c
== '\\' && !ignore_escape_flag
)
2390 p
= readescape (p
, limit
, &c
);
2391 if (width
< HOST_BITS_PER_INT
2392 && (unsigned) c
>= ((unsigned)1 << width
))
2393 pedwarn ("escape sequence out of range for character");
2396 /* Add this single character into the buffer either as a wchar_t
2397 or as a single byte. */
2400 unsigned charwidth
= TYPE_PRECISION (char_type_node
);
2401 unsigned bytemask
= (1 << charwidth
) - 1;
2404 for (byte
= 0; byte
< WCHAR_BYTES
; ++byte
)
2407 if (byte
>= (int) sizeof (c
))
2410 n
= (c
>> (byte
* charwidth
)) & bytemask
;
2411 if (BYTES_BIG_ENDIAN
)
2412 q
[WCHAR_BYTES
- byte
- 1] = n
;
2424 /* Terminate the string value, either with a single byte zero
2425 or with a wide zero. */
2429 memset (q
, 0, WCHAR_BYTES
);
2437 value
= build_string (q
- buf
, buf
);
2440 TREE_TYPE (value
) = wchar_array_type_node
;
2442 TREE_TYPE (value
) = char_array_type_node
;
2447 lex_charconst (str
, len
, wide
)
2452 const char *limit
= str
+ len
;
2456 unsigned width
= TYPE_PRECISION (char_type_node
);
2461 #ifdef MULTIBYTE_CHARS
2462 int longest_char
= local_mb_cur_max ();
2463 (void) local_mbtowc (NULL_PTR
, NULL_PTR
, 0);
2466 max_chars
= TYPE_PRECISION (integer_type_node
) / width
;
2468 width
= WCHAR_TYPE_SIZE
;
2472 #ifdef MULTIBYTE_CHARS
2476 char_len
= local_mbtowc (&wc
, str
, limit
- str
);
2479 warning ("Ignoring invalid multibyte character");
2495 str
= readescape (str
, limit
, &c
);
2496 if (width
< HOST_BITS_PER_INT
2497 && (unsigned) c
>= ((unsigned)1 << width
))
2498 pedwarn ("escape sequence out of range for character");
2500 #ifdef MAP_CHARACTER
2502 c
= MAP_CHARACTER (c
);
2505 /* Merge character into result; ignore excess chars. */
2506 num_chars
+= (width
/ TYPE_PRECISION (char_type_node
));
2507 if (num_chars
< max_chars
+ 1)
2509 if (width
< HOST_BITS_PER_INT
)
2510 result
= (result
<< width
) | (c
& ((1 << width
) - 1));
2516 if (chars_seen
== 0)
2517 error ("empty character constant");
2518 else if (num_chars
> max_chars
)
2520 num_chars
= max_chars
;
2521 error ("character constant too long");
2523 else if (chars_seen
!= 1 && ! flag_traditional
&& warn_multichar
)
2524 warning ("multi-character character constant");
2526 /* If char type is signed, sign-extend the constant. */
2529 int num_bits
= num_chars
* width
;
2531 /* We already got an error; avoid invalid shift. */
2532 value
= build_int_2 (0, 0);
2533 else if (TREE_UNSIGNED (char_type_node
)
2534 || ((result
>> (num_bits
- 1)) & 1) == 0)
2535 value
= build_int_2 (result
& (~(unsigned HOST_WIDE_INT
) 0
2536 >> (HOST_BITS_PER_WIDE_INT
- num_bits
)),
2539 value
= build_int_2 (result
| ~(~(unsigned HOST_WIDE_INT
) 0
2540 >> (HOST_BITS_PER_WIDE_INT
- num_bits
)),
2542 /* In C, a character constant has type 'int'; in C++, 'char'. */
2543 if (chars_seen
<= 1 && c_language
== clk_cplusplus
)
2544 TREE_TYPE (value
) = char_type_node
;
2546 TREE_TYPE (value
) = integer_type_node
;
2550 value
= build_int_2 (result
, 0);
2551 TREE_TYPE (value
) = wchar_type_node
;
2557 /* Mark for GC a node in a splay tree whose keys are strings. */
2560 mark_splay_tree_node (n
, data
)
2562 void *data ATTRIBUTE_UNUSED
;
2564 ggc_mark_string ((char *) n
->key
);
2568 /* Mark for GC a splay tree whose keys are strings. */
2574 splay_tree st
= *(splay_tree
*) p
;
2576 splay_tree_foreach (st
, mark_splay_tree_node
, NULL
);