re PR target/8343 ([m68k] [3.2 regression] m68k-elf/rtems ICE at instantiate_virtual_...
[official-gcc.git] / gcc / c-lex.c
blob5507e6311d0f65ad43ad650b527fce784836a2af
1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
27 #include "real.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "expr.h"
31 #include "input.h"
32 #include "output.h"
33 #include "c-tree.h"
34 #include "c-common.h"
35 #include "flags.h"
36 #include "timevar.h"
37 #include "cpplib.h"
38 #include "c-pragma.h"
39 #include "toplev.h"
40 #include "intl.h"
41 #include "tm_p.h"
42 #include "splay-tree.h"
43 #include "debug.h"
45 #ifdef MULTIBYTE_CHARS
46 #include "mbchar.h"
47 #include <locale.h>
48 #endif /* MULTIBYTE_CHARS */
50 /* The current line map. */
51 static const struct line_map *map;
53 /* The line used to refresh the lineno global variable after each token. */
54 static unsigned int src_lineno;
56 /* We may keep statistics about how long which files took to compile. */
57 static int header_time, body_time;
58 static splay_tree file_info_tree;
60 /* File used for outputting assembler code. */
61 extern FILE *asm_out_file;
63 #undef WCHAR_TYPE_SIZE
64 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
66 /* Number of bytes in a wide character. */
67 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
69 int pending_lang_change; /* If we need to switch languages - C++ only */
70 int c_header_level; /* depth in C headers - C++ only */
72 /* Nonzero tells yylex to ignore \ in string constants. */
73 static int ignore_escape_flag;
75 static tree interpret_integer PARAMS ((const cpp_token *, unsigned int));
76 static tree interpret_float PARAMS ((const cpp_token *, unsigned int));
77 static enum integer_type_kind
78 narrowest_unsigned_type PARAMS ((tree, unsigned int));
79 static enum integer_type_kind
80 narrowest_signed_type PARAMS ((tree, unsigned int));
81 static tree lex_string PARAMS ((const unsigned char *, unsigned int,
82 int));
83 static tree lex_charconst PARAMS ((const cpp_token *));
84 static void update_header_times PARAMS ((const char *));
85 static int dump_one_header PARAMS ((splay_tree_node, void *));
86 static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int));
87 static void cb_ident PARAMS ((cpp_reader *, unsigned int,
88 const cpp_string *));
89 static void cb_file_change PARAMS ((cpp_reader *, const struct line_map *));
90 static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int));
91 static void cb_define PARAMS ((cpp_reader *, unsigned int,
92 cpp_hashnode *));
93 static void cb_undef PARAMS ((cpp_reader *, unsigned int,
94 cpp_hashnode *));
96 const char *
97 init_c_lex (filename)
98 const char *filename;
100 struct cpp_callbacks *cb;
101 struct c_fileinfo *toplevel;
103 /* Set up filename timing. Must happen before cpp_read_main_file. */
104 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
106 (splay_tree_delete_value_fn)free);
107 toplevel = get_fileinfo ("<top level>");
108 if (flag_detailed_statistics)
110 header_time = 0;
111 body_time = get_run_time ();
112 toplevel->time = body_time;
115 #ifdef MULTIBYTE_CHARS
116 /* Change to the native locale for multibyte conversions. */
117 setlocale (LC_CTYPE, "");
118 GET_ENVIRONMENT (literal_codeset, "LANG");
119 #endif
121 cb = cpp_get_callbacks (parse_in);
123 cb->line_change = cb_line_change;
124 cb->ident = cb_ident;
125 cb->file_change = cb_file_change;
126 cb->def_pragma = cb_def_pragma;
128 /* Set the debug callbacks if we can use them. */
129 if (debug_info_level == DINFO_LEVEL_VERBOSE
130 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
131 || write_symbols == VMS_AND_DWARF2_DEBUG))
133 cb->define = cb_define;
134 cb->undef = cb_undef;
137 /* Start it at 0. */
138 lineno = 0;
140 return cpp_read_main_file (parse_in, filename, ident_hash);
143 /* A thin wrapper around the real parser that initializes the
144 integrated preprocessor after debug output has been initialized.
145 Also, make sure the start_source_file debug hook gets called for
146 the primary source file. */
148 void
149 c_common_parse_file (set_yydebug)
150 int set_yydebug ATTRIBUTE_UNUSED;
152 #if YYDEBUG != 0
153 yydebug = set_yydebug;
154 #else
155 warning ("YYDEBUG not defined");
156 #endif
158 (*debug_hooks->start_source_file) (lineno, input_filename);
159 cpp_finish_options (parse_in);
161 yyparse ();
162 free_parser_stacks ();
165 struct c_fileinfo *
166 get_fileinfo (name)
167 const char *name;
169 splay_tree_node n;
170 struct c_fileinfo *fi;
172 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
173 if (n)
174 return (struct c_fileinfo *) n->value;
176 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
177 fi->time = 0;
178 fi->interface_only = 0;
179 fi->interface_unknown = 1;
180 splay_tree_insert (file_info_tree, (splay_tree_key) name,
181 (splay_tree_value) fi);
182 return fi;
185 static void
186 update_header_times (name)
187 const char *name;
189 /* Changing files again. This means currently collected time
190 is charged against header time, and body time starts back at 0. */
191 if (flag_detailed_statistics)
193 int this_time = get_run_time ();
194 struct c_fileinfo *file = get_fileinfo (name);
195 header_time += this_time - body_time;
196 file->time += this_time - body_time;
197 body_time = this_time;
201 static int
202 dump_one_header (n, dummy)
203 splay_tree_node n;
204 void *dummy ATTRIBUTE_UNUSED;
206 print_time ((const char *) n->key,
207 ((struct c_fileinfo *) n->value)->time);
208 return 0;
211 void
212 dump_time_statistics ()
214 struct c_fileinfo *file = get_fileinfo (input_filename);
215 int this_time = get_run_time ();
216 file->time += this_time - body_time;
218 fprintf (stderr, "\n******\n");
219 print_time ("header files (total)", header_time);
220 print_time ("main file (total)", this_time - body_time);
221 fprintf (stderr, "ratio = %g : 1\n",
222 (double)header_time / (double)(this_time - body_time));
223 fprintf (stderr, "\n******\n");
225 splay_tree_foreach (file_info_tree, dump_one_header, 0);
228 static void
229 cb_ident (pfile, line, str)
230 cpp_reader *pfile ATTRIBUTE_UNUSED;
231 unsigned int line ATTRIBUTE_UNUSED;
232 const cpp_string *str ATTRIBUTE_UNUSED;
234 #ifdef ASM_OUTPUT_IDENT
235 if (! flag_no_ident)
237 /* Convert escapes in the string. */
238 tree value ATTRIBUTE_UNUSED = lex_string (str->text, str->len, 0);
239 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
241 #endif
244 /* Called at the start of every non-empty line. TOKEN is the first
245 lexed token on the line. Used for diagnostic line numbers. */
246 static void
247 cb_line_change (pfile, token, parsing_args)
248 cpp_reader *pfile ATTRIBUTE_UNUSED;
249 const cpp_token *token;
250 int parsing_args ATTRIBUTE_UNUSED;
252 src_lineno = SOURCE_LINE (map, token->line);
255 static void
256 cb_file_change (pfile, new_map)
257 cpp_reader *pfile ATTRIBUTE_UNUSED;
258 const struct line_map *new_map;
260 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
262 if (new_map->reason == LC_ENTER)
264 /* Don't stack the main buffer on the input stack;
265 we already did in compile_file. */
266 if (map == NULL)
267 main_input_filename = new_map->to_file;
268 else
270 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
272 lineno = included_at;
273 push_srcloc (new_map->to_file, 1);
274 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
275 #ifndef NO_IMPLICIT_EXTERN_C
276 if (c_header_level)
277 ++c_header_level;
278 else if (new_map->sysp == 2)
280 c_header_level = 1;
281 ++pending_lang_change;
283 #endif
286 else if (new_map->reason == LC_LEAVE)
288 #ifndef NO_IMPLICIT_EXTERN_C
289 if (c_header_level && --c_header_level == 0)
291 if (new_map->sysp == 2)
292 warning ("badly nested C headers from preprocessor");
293 --pending_lang_change;
295 #endif
296 pop_srcloc ();
298 (*debug_hooks->end_source_file) (to_line);
301 update_header_times (new_map->to_file);
302 in_system_header = new_map->sysp != 0;
303 input_filename = new_map->to_file;
304 lineno = to_line;
305 map = new_map;
307 /* Hook for C++. */
308 extract_interface_info ();
311 static void
312 cb_def_pragma (pfile, line)
313 cpp_reader *pfile;
314 unsigned int line;
316 /* Issue a warning message if we have been asked to do so. Ignore
317 unknown pragmas in system headers unless an explicit
318 -Wunknown-pragmas has been given. */
319 if (warn_unknown_pragmas > in_system_header)
321 const unsigned char *space, *name;
322 const cpp_token *s;
324 space = name = (const unsigned char *) "";
325 s = cpp_get_token (pfile);
326 if (s->type != CPP_EOF)
328 space = cpp_token_as_text (pfile, s);
329 s = cpp_get_token (pfile);
330 if (s->type == CPP_NAME)
331 name = cpp_token_as_text (pfile, s);
334 lineno = SOURCE_LINE (map, line);
335 warning ("ignoring #pragma %s %s", space, name);
339 /* #define callback for DWARF and DWARF2 debug info. */
340 static void
341 cb_define (pfile, line, node)
342 cpp_reader *pfile;
343 unsigned int line;
344 cpp_hashnode *node;
346 (*debug_hooks->define) (SOURCE_LINE (map, line),
347 (const char *) cpp_macro_definition (pfile, node));
350 /* #undef callback for DWARF and DWARF2 debug info. */
351 static void
352 cb_undef (pfile, line, node)
353 cpp_reader *pfile ATTRIBUTE_UNUSED;
354 unsigned int line;
355 cpp_hashnode *node;
357 (*debug_hooks->undef) (SOURCE_LINE (map, line),
358 (const char *) NODE_NAME (node));
361 #if 0 /* not yet */
362 /* Returns nonzero if C is a universal-character-name. Give an error if it
363 is not one which may appear in an identifier, as per [extendid].
365 Note that extended character support in identifiers has not yet been
366 implemented. It is my personal opinion that this is not a desirable
367 feature. Portable code cannot count on support for more than the basic
368 identifier character set. */
370 static inline int
371 is_extended_char (c)
372 int c;
374 #ifdef TARGET_EBCDIC
375 return 0;
376 #else
377 /* ASCII. */
378 if (c < 0x7f)
379 return 0;
381 /* None of the valid chars are outside the Basic Multilingual Plane (the
382 low 16 bits). */
383 if (c > 0xffff)
385 error ("universal-character-name '\\U%08x' not valid in identifier", c);
386 return 1;
389 /* Latin */
390 if ((c >= 0x00c0 && c <= 0x00d6)
391 || (c >= 0x00d8 && c <= 0x00f6)
392 || (c >= 0x00f8 && c <= 0x01f5)
393 || (c >= 0x01fa && c <= 0x0217)
394 || (c >= 0x0250 && c <= 0x02a8)
395 || (c >= 0x1e00 && c <= 0x1e9a)
396 || (c >= 0x1ea0 && c <= 0x1ef9))
397 return 1;
399 /* Greek */
400 if ((c == 0x0384)
401 || (c >= 0x0388 && c <= 0x038a)
402 || (c == 0x038c)
403 || (c >= 0x038e && c <= 0x03a1)
404 || (c >= 0x03a3 && c <= 0x03ce)
405 || (c >= 0x03d0 && c <= 0x03d6)
406 || (c == 0x03da)
407 || (c == 0x03dc)
408 || (c == 0x03de)
409 || (c == 0x03e0)
410 || (c >= 0x03e2 && c <= 0x03f3)
411 || (c >= 0x1f00 && c <= 0x1f15)
412 || (c >= 0x1f18 && c <= 0x1f1d)
413 || (c >= 0x1f20 && c <= 0x1f45)
414 || (c >= 0x1f48 && c <= 0x1f4d)
415 || (c >= 0x1f50 && c <= 0x1f57)
416 || (c == 0x1f59)
417 || (c == 0x1f5b)
418 || (c == 0x1f5d)
419 || (c >= 0x1f5f && c <= 0x1f7d)
420 || (c >= 0x1f80 && c <= 0x1fb4)
421 || (c >= 0x1fb6 && c <= 0x1fbc)
422 || (c >= 0x1fc2 && c <= 0x1fc4)
423 || (c >= 0x1fc6 && c <= 0x1fcc)
424 || (c >= 0x1fd0 && c <= 0x1fd3)
425 || (c >= 0x1fd6 && c <= 0x1fdb)
426 || (c >= 0x1fe0 && c <= 0x1fec)
427 || (c >= 0x1ff2 && c <= 0x1ff4)
428 || (c >= 0x1ff6 && c <= 0x1ffc))
429 return 1;
431 /* Cyrillic */
432 if ((c >= 0x0401 && c <= 0x040d)
433 || (c >= 0x040f && c <= 0x044f)
434 || (c >= 0x0451 && c <= 0x045c)
435 || (c >= 0x045e && c <= 0x0481)
436 || (c >= 0x0490 && c <= 0x04c4)
437 || (c >= 0x04c7 && c <= 0x04c8)
438 || (c >= 0x04cb && c <= 0x04cc)
439 || (c >= 0x04d0 && c <= 0x04eb)
440 || (c >= 0x04ee && c <= 0x04f5)
441 || (c >= 0x04f8 && c <= 0x04f9))
442 return 1;
444 /* Armenian */
445 if ((c >= 0x0531 && c <= 0x0556)
446 || (c >= 0x0561 && c <= 0x0587))
447 return 1;
449 /* Hebrew */
450 if ((c >= 0x05d0 && c <= 0x05ea)
451 || (c >= 0x05f0 && c <= 0x05f4))
452 return 1;
454 /* Arabic */
455 if ((c >= 0x0621 && c <= 0x063a)
456 || (c >= 0x0640 && c <= 0x0652)
457 || (c >= 0x0670 && c <= 0x06b7)
458 || (c >= 0x06ba && c <= 0x06be)
459 || (c >= 0x06c0 && c <= 0x06ce)
460 || (c >= 0x06e5 && c <= 0x06e7))
461 return 1;
463 /* Devanagari */
464 if ((c >= 0x0905 && c <= 0x0939)
465 || (c >= 0x0958 && c <= 0x0962))
466 return 1;
468 /* Bengali */
469 if ((c >= 0x0985 && c <= 0x098c)
470 || (c >= 0x098f && c <= 0x0990)
471 || (c >= 0x0993 && c <= 0x09a8)
472 || (c >= 0x09aa && c <= 0x09b0)
473 || (c == 0x09b2)
474 || (c >= 0x09b6 && c <= 0x09b9)
475 || (c >= 0x09dc && c <= 0x09dd)
476 || (c >= 0x09df && c <= 0x09e1)
477 || (c >= 0x09f0 && c <= 0x09f1))
478 return 1;
480 /* Gurmukhi */
481 if ((c >= 0x0a05 && c <= 0x0a0a)
482 || (c >= 0x0a0f && c <= 0x0a10)
483 || (c >= 0x0a13 && c <= 0x0a28)
484 || (c >= 0x0a2a && c <= 0x0a30)
485 || (c >= 0x0a32 && c <= 0x0a33)
486 || (c >= 0x0a35 && c <= 0x0a36)
487 || (c >= 0x0a38 && c <= 0x0a39)
488 || (c >= 0x0a59 && c <= 0x0a5c)
489 || (c == 0x0a5e))
490 return 1;
492 /* Gujarati */
493 if ((c >= 0x0a85 && c <= 0x0a8b)
494 || (c == 0x0a8d)
495 || (c >= 0x0a8f && c <= 0x0a91)
496 || (c >= 0x0a93 && c <= 0x0aa8)
497 || (c >= 0x0aaa && c <= 0x0ab0)
498 || (c >= 0x0ab2 && c <= 0x0ab3)
499 || (c >= 0x0ab5 && c <= 0x0ab9)
500 || (c == 0x0ae0))
501 return 1;
503 /* Oriya */
504 if ((c >= 0x0b05 && c <= 0x0b0c)
505 || (c >= 0x0b0f && c <= 0x0b10)
506 || (c >= 0x0b13 && c <= 0x0b28)
507 || (c >= 0x0b2a && c <= 0x0b30)
508 || (c >= 0x0b32 && c <= 0x0b33)
509 || (c >= 0x0b36 && c <= 0x0b39)
510 || (c >= 0x0b5c && c <= 0x0b5d)
511 || (c >= 0x0b5f && c <= 0x0b61))
512 return 1;
514 /* Tamil */
515 if ((c >= 0x0b85 && c <= 0x0b8a)
516 || (c >= 0x0b8e && c <= 0x0b90)
517 || (c >= 0x0b92 && c <= 0x0b95)
518 || (c >= 0x0b99 && c <= 0x0b9a)
519 || (c == 0x0b9c)
520 || (c >= 0x0b9e && c <= 0x0b9f)
521 || (c >= 0x0ba3 && c <= 0x0ba4)
522 || (c >= 0x0ba8 && c <= 0x0baa)
523 || (c >= 0x0bae && c <= 0x0bb5)
524 || (c >= 0x0bb7 && c <= 0x0bb9))
525 return 1;
527 /* Telugu */
528 if ((c >= 0x0c05 && c <= 0x0c0c)
529 || (c >= 0x0c0e && c <= 0x0c10)
530 || (c >= 0x0c12 && c <= 0x0c28)
531 || (c >= 0x0c2a && c <= 0x0c33)
532 || (c >= 0x0c35 && c <= 0x0c39)
533 || (c >= 0x0c60 && c <= 0x0c61))
534 return 1;
536 /* Kannada */
537 if ((c >= 0x0c85 && c <= 0x0c8c)
538 || (c >= 0x0c8e && c <= 0x0c90)
539 || (c >= 0x0c92 && c <= 0x0ca8)
540 || (c >= 0x0caa && c <= 0x0cb3)
541 || (c >= 0x0cb5 && c <= 0x0cb9)
542 || (c >= 0x0ce0 && c <= 0x0ce1))
543 return 1;
545 /* Malayalam */
546 if ((c >= 0x0d05 && c <= 0x0d0c)
547 || (c >= 0x0d0e && c <= 0x0d10)
548 || (c >= 0x0d12 && c <= 0x0d28)
549 || (c >= 0x0d2a && c <= 0x0d39)
550 || (c >= 0x0d60 && c <= 0x0d61))
551 return 1;
553 /* Thai */
554 if ((c >= 0x0e01 && c <= 0x0e30)
555 || (c >= 0x0e32 && c <= 0x0e33)
556 || (c >= 0x0e40 && c <= 0x0e46)
557 || (c >= 0x0e4f && c <= 0x0e5b))
558 return 1;
560 /* Lao */
561 if ((c >= 0x0e81 && c <= 0x0e82)
562 || (c == 0x0e84)
563 || (c == 0x0e87)
564 || (c == 0x0e88)
565 || (c == 0x0e8a)
566 || (c == 0x0e0d)
567 || (c >= 0x0e94 && c <= 0x0e97)
568 || (c >= 0x0e99 && c <= 0x0e9f)
569 || (c >= 0x0ea1 && c <= 0x0ea3)
570 || (c == 0x0ea5)
571 || (c == 0x0ea7)
572 || (c == 0x0eaa)
573 || (c == 0x0eab)
574 || (c >= 0x0ead && c <= 0x0eb0)
575 || (c == 0x0eb2)
576 || (c == 0x0eb3)
577 || (c == 0x0ebd)
578 || (c >= 0x0ec0 && c <= 0x0ec4)
579 || (c == 0x0ec6))
580 return 1;
582 /* Georgian */
583 if ((c >= 0x10a0 && c <= 0x10c5)
584 || (c >= 0x10d0 && c <= 0x10f6))
585 return 1;
587 /* Hiragana */
588 if ((c >= 0x3041 && c <= 0x3094)
589 || (c >= 0x309b && c <= 0x309e))
590 return 1;
592 /* Katakana */
593 if ((c >= 0x30a1 && c <= 0x30fe))
594 return 1;
596 /* Bopmofo */
597 if ((c >= 0x3105 && c <= 0x312c))
598 return 1;
600 /* Hangul */
601 if ((c >= 0x1100 && c <= 0x1159)
602 || (c >= 0x1161 && c <= 0x11a2)
603 || (c >= 0x11a8 && c <= 0x11f9))
604 return 1;
606 /* CJK Unified Ideographs */
607 if ((c >= 0xf900 && c <= 0xfa2d)
608 || (c >= 0xfb1f && c <= 0xfb36)
609 || (c >= 0xfb38 && c <= 0xfb3c)
610 || (c == 0xfb3e)
611 || (c >= 0xfb40 && c <= 0xfb41)
612 || (c >= 0xfb42 && c <= 0xfb44)
613 || (c >= 0xfb46 && c <= 0xfbb1)
614 || (c >= 0xfbd3 && c <= 0xfd3f)
615 || (c >= 0xfd50 && c <= 0xfd8f)
616 || (c >= 0xfd92 && c <= 0xfdc7)
617 || (c >= 0xfdf0 && c <= 0xfdfb)
618 || (c >= 0xfe70 && c <= 0xfe72)
619 || (c == 0xfe74)
620 || (c >= 0xfe76 && c <= 0xfefc)
621 || (c >= 0xff21 && c <= 0xff3a)
622 || (c >= 0xff41 && c <= 0xff5a)
623 || (c >= 0xff66 && c <= 0xffbe)
624 || (c >= 0xffc2 && c <= 0xffc7)
625 || (c >= 0xffca && c <= 0xffcf)
626 || (c >= 0xffd2 && c <= 0xffd7)
627 || (c >= 0xffda && c <= 0xffdc)
628 || (c >= 0x4e00 && c <= 0x9fa5))
629 return 1;
631 error ("universal-character-name '\\u%04x' not valid in identifier", c);
632 return 1;
633 #endif
636 /* Add the UTF-8 representation of C to the token_buffer. */
638 static void
639 utf8_extend_token (c)
640 int c;
642 int shift, mask;
644 if (c <= 0x0000007f)
646 extend_token (c);
647 return;
649 else if (c <= 0x000007ff)
650 shift = 6, mask = 0xc0;
651 else if (c <= 0x0000ffff)
652 shift = 12, mask = 0xe0;
653 else if (c <= 0x001fffff)
654 shift = 18, mask = 0xf0;
655 else if (c <= 0x03ffffff)
656 shift = 24, mask = 0xf8;
657 else
658 shift = 30, mask = 0xfc;
660 extend_token (mask | (c >> shift));
663 shift -= 6;
664 extend_token ((unsigned char) (0x80 | (c >> shift)));
666 while (shift);
668 #endif
671 c_lex (value)
672 tree *value;
674 const cpp_token *tok;
676 retry:
677 timevar_push (TV_CPP);
679 tok = cpp_get_token (parse_in);
680 while (tok->type == CPP_PADDING);
681 timevar_pop (TV_CPP);
683 /* The C++ front end does horrible things with the current line
684 number. To ensure an accurate line number, we must reset it
685 every time we return a token. */
686 lineno = src_lineno;
688 *value = NULL_TREE;
689 switch (tok->type)
691 /* Issue this error here, where we can get at tok->val.c. */
692 case CPP_OTHER:
693 if (ISGRAPH (tok->val.c))
694 error ("stray '%c' in program", tok->val.c);
695 else
696 error ("stray '\\%o' in program", tok->val.c);
697 goto retry;
699 case CPP_NAME:
700 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
701 break;
703 case CPP_NUMBER:
705 unsigned int flags = cpp_classify_number (parse_in, tok);
707 switch (flags & CPP_N_CATEGORY)
709 case CPP_N_INVALID:
710 /* cpplib has issued an error. */
711 break;
713 case CPP_N_INTEGER:
714 *value = interpret_integer (tok, flags);
715 break;
717 case CPP_N_FLOATING:
718 *value = interpret_float (tok, flags);
719 break;
721 default:
722 abort ();
725 break;
727 case CPP_CHAR:
728 case CPP_WCHAR:
729 *value = lex_charconst (tok);
730 break;
732 case CPP_STRING:
733 case CPP_WSTRING:
734 *value = lex_string (tok->val.str.text, tok->val.str.len,
735 tok->type == CPP_WSTRING);
736 break;
738 /* These tokens should not be visible outside cpplib. */
739 case CPP_HEADER_NAME:
740 case CPP_COMMENT:
741 case CPP_MACRO_ARG:
742 abort ();
744 default: break;
747 return tok->type;
750 /* Returns the narrowest C-visible unsigned type, starting with the
751 minimum specified by FLAGS, that can fit VALUE, or itk_none if
752 there isn't one. */
753 static enum integer_type_kind
754 narrowest_unsigned_type (value, flags)
755 tree value;
756 unsigned int flags;
758 enum integer_type_kind itk;
760 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
761 itk = itk_unsigned_int;
762 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
763 itk = itk_unsigned_long;
764 else
765 itk = itk_unsigned_long_long;
767 /* int_fits_type_p must think the type of its first argument is
768 wider than its second argument, or it won't do the proper check. */
769 TREE_TYPE (value) = widest_unsigned_literal_type_node;
771 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
772 if (int_fits_type_p (value, integer_types[itk]))
773 return itk;
775 return itk_none;
778 /* Ditto, but narrowest signed type. */
779 static enum integer_type_kind
780 narrowest_signed_type (value, flags)
781 tree value;
782 unsigned int flags;
784 enum integer_type_kind itk;
786 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
787 itk = itk_int;
788 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
789 itk = itk_long;
790 else
791 itk = itk_long_long;
793 /* int_fits_type_p must think the type of its first argument is
794 wider than its second argument, or it won't do the proper check. */
795 TREE_TYPE (value) = widest_unsigned_literal_type_node;
797 for (; itk < itk_none; itk += 2 /* skip signed types */)
798 if (int_fits_type_p (value, integer_types[itk]))
799 return itk;
801 return itk_none;
804 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
805 static tree
806 interpret_integer (token, flags)
807 const cpp_token *token;
808 unsigned int flags;
810 tree value, type;
811 enum integer_type_kind itk;
812 cpp_num integer;
813 cpp_options *options = cpp_get_options (parse_in);
815 integer = cpp_interpret_integer (parse_in, token, flags);
816 integer = cpp_num_sign_extend (integer, options->precision);
817 value = build_int_2_wide (integer.low, integer.high);
819 /* The type of a constant with a U suffix is straightforward. */
820 if (flags & CPP_N_UNSIGNED)
821 itk = narrowest_unsigned_type (value, flags);
822 else
824 /* The type of a potentially-signed integer constant varies
825 depending on the base it's in, the standard in use, and the
826 length suffixes. */
827 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
828 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
830 /* In both C89 and C99, octal and hex constants may be signed or
831 unsigned, whichever fits tighter. We do not warn about this
832 choice differing from the traditional choice, as the constant
833 is probably a bit pattern and either way will work. */
834 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
835 itk = MIN (itk_u, itk_s);
836 else
838 /* In C99, decimal constants are always signed.
839 In C89, decimal constants that don't fit in long have
840 undefined behavior; we try to make them unsigned long.
841 In GCC's extended C89, that last is true of decimal
842 constants that don't fit in long long, too. */
844 itk = itk_s;
845 if (itk_s > itk_u && itk_s > itk_long)
847 if (!flag_isoc99)
849 if (itk_u < itk_unsigned_long)
850 itk_u = itk_unsigned_long;
851 itk = itk_u;
852 warning ("this decimal constant is unsigned only in ISO C90");
854 else if (warn_traditional)
855 warning ("this decimal constant would be unsigned in ISO C90");
860 if (itk == itk_none)
861 /* cpplib has already issued a warning for overflow. */
862 type = ((flags & CPP_N_UNSIGNED)
863 ? widest_unsigned_literal_type_node
864 : widest_integer_literal_type_node);
865 else
866 type = integer_types[itk];
868 if (itk > itk_unsigned_long
869 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
870 && ! in_system_header && ! flag_isoc99)
871 pedwarn ("integer constant is too large for \"%s\" type",
872 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
874 TREE_TYPE (value) = type;
876 /* Convert imaginary to a complex type. */
877 if (flags & CPP_N_IMAGINARY)
878 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
880 return value;
883 /* Interpret TOKEN, a floating point number with FLAGS as classified
884 by cpplib. */
885 static tree
886 interpret_float (token, flags)
887 const cpp_token *token;
888 unsigned int flags;
890 tree type;
891 tree value;
892 REAL_VALUE_TYPE real;
893 char *copy;
894 size_t copylen;
895 const char *typename;
897 /* FIXME: make %T work in error/warning, then we don't need typename. */
898 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
900 type = long_double_type_node;
901 typename = "long double";
903 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
904 || flag_single_precision_constant)
906 type = float_type_node;
907 typename = "float";
909 else
911 type = double_type_node;
912 typename = "double";
915 /* Copy the constant to a nul-terminated buffer. If the constant
916 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
917 can't handle them. */
918 copylen = token->val.str.len;
919 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
920 /* Must be an F or L suffix. */
921 copylen--;
922 if (flags & CPP_N_IMAGINARY)
923 /* I or J suffix. */
924 copylen--;
926 copy = alloca (copylen + 1);
927 memcpy (copy, token->val.str.text, copylen);
928 copy[copylen] = '\0';
930 real_from_string (&real, copy);
931 real_convert (&real, TYPE_MODE (type), &real);
933 /* A diagnostic is required for "soft" overflow by some ISO C
934 testsuites. This is not pedwarn, because some people don't want
935 an error for this.
936 ??? That's a dubious reason... is this a mandatory diagnostic or
937 isn't it? -- zw, 2001-08-21. */
938 if (REAL_VALUE_ISINF (real) && pedantic)
939 warning ("floating constant exceeds range of \"%s\"", typename);
941 /* Create a node with determined type and value. */
942 value = build_real (type, real);
943 if (flags & CPP_N_IMAGINARY)
944 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
946 return value;
949 static tree
950 lex_string (str, len, wide)
951 const unsigned char *str;
952 unsigned int len;
953 int wide;
955 tree value;
956 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
957 char *q = buf;
958 const unsigned char *p = str, *limit = str + len;
959 cppchar_t c;
961 #ifdef MULTIBYTE_CHARS
962 /* Reset multibyte conversion state. */
963 (void) local_mbtowc (NULL, NULL, 0);
964 #endif
966 while (p < limit)
968 #ifdef MULTIBYTE_CHARS
969 wchar_t wc;
970 int char_len;
972 char_len = local_mbtowc (&wc, (const char *) p, limit - p);
973 if (char_len == -1)
975 warning ("ignoring invalid multibyte character");
976 char_len = 1;
977 c = *p++;
979 else
981 p += char_len;
982 c = wc;
984 #else
985 c = *p++;
986 #endif
988 if (c == '\\' && !ignore_escape_flag)
989 c = cpp_parse_escape (parse_in, &p, limit, wide);
991 /* Add this single character into the buffer either as a wchar_t,
992 a multibyte sequence, or as a single byte. */
993 if (wide)
995 unsigned charwidth = TYPE_PRECISION (char_type_node);
996 unsigned bytemask = (1 << charwidth) - 1;
997 int byte;
999 for (byte = 0; byte < WCHAR_BYTES; ++byte)
1001 int n;
1002 if (byte >= (int) sizeof (c))
1003 n = 0;
1004 else
1005 n = (c >> (byte * charwidth)) & bytemask;
1006 if (BYTES_BIG_ENDIAN)
1007 q[WCHAR_BYTES - byte - 1] = n;
1008 else
1009 q[byte] = n;
1011 q += WCHAR_BYTES;
1013 #ifdef MULTIBYTE_CHARS
1014 else if (char_len > 1)
1016 /* We're dealing with a multibyte character. */
1017 for ( ; char_len >0; --char_len)
1019 *q++ = *(p - char_len);
1022 #endif
1023 else
1025 *q++ = c;
1029 /* Terminate the string value, either with a single byte zero
1030 or with a wide zero. */
1032 if (wide)
1034 memset (q, 0, WCHAR_BYTES);
1035 q += WCHAR_BYTES;
1037 else
1039 *q++ = '\0';
1042 value = build_string (q - buf, buf);
1044 if (wide)
1045 TREE_TYPE (value) = wchar_array_type_node;
1046 else
1047 TREE_TYPE (value) = char_array_type_node;
1048 return value;
1051 /* Converts a (possibly wide) character constant token into a tree. */
1052 static tree
1053 lex_charconst (token)
1054 const cpp_token *token;
1056 cppchar_t result;
1057 tree type, value;
1058 unsigned int chars_seen;
1059 int unsignedp;
1061 result = cpp_interpret_charconst (parse_in, token,
1062 &chars_seen, &unsignedp);
1064 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
1065 before possibly widening to HOST_WIDE_INT for build_int_2. */
1066 if (unsignedp || (cppchar_signed_t) result >= 0)
1067 value = build_int_2 (result, 0);
1068 else
1069 value = build_int_2 ((cppchar_signed_t) result, -1);
1071 if (token->type == CPP_WCHAR)
1072 type = wchar_type_node;
1073 /* In C, a character constant has type 'int'.
1074 In C++ 'char', but multi-char charconsts have type 'int'. */
1075 else if ((c_language == clk_c) || chars_seen > 1)
1076 type = integer_type_node;
1077 else
1078 type = char_type_node;
1080 TREE_TYPE (value) = type;
1081 return value;