FSF GCC merge 02/23/03
[official-gcc.git] / gcc / c-lex.c
blobab068207e1d7a224aa9c81980889b32cd6dce396
1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
27 #include "real.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "expr.h"
31 #include "input.h"
32 #include "output.h"
33 #include "c-tree.h"
34 #include "c-common.h"
35 #include "flags.h"
36 #include "timevar.h"
37 #include "cpplib.h"
38 #include "c-pragma.h"
39 #include "toplev.h"
40 #include "intl.h"
41 #include "tm_p.h"
42 #include "splay-tree.h"
43 #include "debug.h"
45 #ifdef MULTIBYTE_CHARS
46 #include "mbchar.h"
47 #include <locale.h>
48 #endif /* MULTIBYTE_CHARS */
50 /* The current line map. */
51 static const struct line_map *map;
53 /* The line used to refresh the lineno global variable after each token. */
54 static unsigned int src_lineno;
56 /* We may keep statistics about how long which files took to compile. */
57 static int header_time, body_time;
58 static splay_tree file_info_tree;
60 /* File used for outputting assembler code. */
61 extern FILE *asm_out_file;
63 #undef WCHAR_TYPE_SIZE
64 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
66 /* Number of bytes in a wide character. */
67 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
69 int pending_lang_change; /* If we need to switch languages - C++ only */
70 int c_header_level; /* depth in C headers - C++ only */
72 /* Nonzero tells yylex to ignore \ in string constants. */
73 static int ignore_escape_flag;
75 static tree interpret_integer PARAMS ((const cpp_token *, unsigned int));
76 static tree interpret_float PARAMS ((const cpp_token *, unsigned int));
77 static enum integer_type_kind
78 narrowest_unsigned_type PARAMS ((tree, unsigned int));
79 static enum integer_type_kind
80 narrowest_signed_type PARAMS ((tree, unsigned int));
81 static tree lex_string PARAMS ((const unsigned char *, unsigned int,
82 int));
83 static tree lex_charconst PARAMS ((const cpp_token *));
84 static void update_header_times PARAMS ((const char *));
85 static int dump_one_header PARAMS ((splay_tree_node, void *));
86 static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int));
87 static void cb_ident PARAMS ((cpp_reader *, unsigned int,
88 const cpp_string *));
89 static void cb_file_change PARAMS ((cpp_reader *, const struct line_map *));
90 static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int));
91 static void cb_define PARAMS ((cpp_reader *, unsigned int,
92 cpp_hashnode *));
93 static void cb_undef PARAMS ((cpp_reader *, unsigned int,
94 cpp_hashnode *));
96 const char *
97 init_c_lex (filename)
98 const char *filename;
100 struct cpp_callbacks *cb;
101 struct c_fileinfo *toplevel;
103 /* Set up filename timing. Must happen before cpp_read_main_file. */
104 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
106 (splay_tree_delete_value_fn)free);
107 toplevel = get_fileinfo ("<top level>");
108 if (flag_detailed_statistics)
110 header_time = 0;
111 body_time = get_run_time ();
112 toplevel->time = body_time;
115 #ifdef MULTIBYTE_CHARS
116 /* Change to the native locale for multibyte conversions. */
117 setlocale (LC_CTYPE, "");
118 GET_ENVIRONMENT (literal_codeset, "LANG");
119 #endif
121 cb = cpp_get_callbacks (parse_in);
123 cb->line_change = cb_line_change;
124 cb->ident = cb_ident;
125 cb->file_change = cb_file_change;
126 cb->def_pragma = cb_def_pragma;
127 cb->valid_pch = c_common_valid_pch;
128 cb->read_pch = c_common_read_pch;
130 /* Set the debug callbacks if we can use them. */
131 if (debug_info_level == DINFO_LEVEL_VERBOSE
132 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
133 || write_symbols == VMS_AND_DWARF2_DEBUG))
135 cb->define = cb_define;
136 cb->undef = cb_undef;
139 /* Start it at 0. */
140 lineno = 0;
142 return cpp_read_main_file (parse_in, filename, ident_hash);
145 /* A thin wrapper around the real parser that initializes the
146 integrated preprocessor after debug output has been initialized.
147 Also, make sure the start_source_file debug hook gets called for
148 the primary source file. */
150 void
151 c_common_parse_file (set_yydebug)
152 int set_yydebug ATTRIBUTE_UNUSED;
154 #if YYDEBUG != 0
155 yydebug = set_yydebug;
156 #else
157 warning ("YYDEBUG not defined");
158 #endif
160 (*debug_hooks->start_source_file) (lineno, input_filename);
161 cpp_finish_options (parse_in);
163 pch_init();
165 yyparse ();
166 free_parser_stacks ();
169 struct c_fileinfo *
170 get_fileinfo (name)
171 const char *name;
173 splay_tree_node n;
174 struct c_fileinfo *fi;
176 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
177 if (n)
178 return (struct c_fileinfo *) n->value;
180 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
181 fi->time = 0;
182 fi->interface_only = 0;
183 fi->interface_unknown = 1;
184 splay_tree_insert (file_info_tree, (splay_tree_key) name,
185 (splay_tree_value) fi);
186 return fi;
189 static void
190 update_header_times (name)
191 const char *name;
193 /* Changing files again. This means currently collected time
194 is charged against header time, and body time starts back at 0. */
195 if (flag_detailed_statistics)
197 int this_time = get_run_time ();
198 struct c_fileinfo *file = get_fileinfo (name);
199 header_time += this_time - body_time;
200 file->time += this_time - body_time;
201 body_time = this_time;
205 static int
206 dump_one_header (n, dummy)
207 splay_tree_node n;
208 void *dummy ATTRIBUTE_UNUSED;
210 print_time ((const char *) n->key,
211 ((struct c_fileinfo *) n->value)->time);
212 return 0;
215 void
216 dump_time_statistics ()
218 struct c_fileinfo *file = get_fileinfo (input_filename);
219 int this_time = get_run_time ();
220 file->time += this_time - body_time;
222 fprintf (stderr, "\n******\n");
223 print_time ("header files (total)", header_time);
224 print_time ("main file (total)", this_time - body_time);
225 fprintf (stderr, "ratio = %g : 1\n",
226 (double)header_time / (double)(this_time - body_time));
227 fprintf (stderr, "\n******\n");
229 splay_tree_foreach (file_info_tree, dump_one_header, 0);
232 static void
233 cb_ident (pfile, line, str)
234 cpp_reader *pfile ATTRIBUTE_UNUSED;
235 unsigned int line ATTRIBUTE_UNUSED;
236 const cpp_string *str ATTRIBUTE_UNUSED;
238 #ifdef ASM_OUTPUT_IDENT
239 if (! flag_no_ident)
241 /* Convert escapes in the string. */
242 tree value ATTRIBUTE_UNUSED = lex_string (str->text, str->len, 0);
243 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
245 #endif
248 /* Called at the start of every non-empty line. TOKEN is the first
249 lexed token on the line. Used for diagnostic line numbers. */
250 static void
251 cb_line_change (pfile, token, parsing_args)
252 cpp_reader *pfile ATTRIBUTE_UNUSED;
253 const cpp_token *token;
254 int parsing_args ATTRIBUTE_UNUSED;
256 src_lineno = SOURCE_LINE (map, token->line);
259 static void
260 cb_file_change (pfile, new_map)
261 cpp_reader *pfile ATTRIBUTE_UNUSED;
262 const struct line_map *new_map;
264 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
266 if (new_map->reason == LC_ENTER)
268 /* Don't stack the main buffer on the input stack;
269 we already did in compile_file. */
270 if (map == NULL)
271 main_input_filename = new_map->to_file;
272 else
274 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
276 lineno = included_at;
277 push_srcloc (new_map->to_file, 1);
278 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
279 #ifndef NO_IMPLICIT_EXTERN_C
280 if (c_header_level)
281 ++c_header_level;
282 else if (new_map->sysp == 2)
284 c_header_level = 1;
285 ++pending_lang_change;
287 #endif
290 else if (new_map->reason == LC_LEAVE)
292 #ifndef NO_IMPLICIT_EXTERN_C
293 if (c_header_level && --c_header_level == 0)
295 if (new_map->sysp == 2)
296 warning ("badly nested C headers from preprocessor");
297 --pending_lang_change;
299 #endif
300 pop_srcloc ();
302 (*debug_hooks->end_source_file) (to_line);
305 update_header_times (new_map->to_file);
306 in_system_header = new_map->sysp != 0;
307 input_filename = new_map->to_file;
308 lineno = to_line;
309 map = new_map;
311 /* Hook for C++. */
312 extract_interface_info ();
315 static void
316 cb_def_pragma (pfile, line)
317 cpp_reader *pfile;
318 unsigned int line;
320 /* Issue a warning message if we have been asked to do so. Ignore
321 unknown pragmas in system headers unless an explicit
322 -Wunknown-pragmas has been given. */
323 if (warn_unknown_pragmas > in_system_header)
325 const unsigned char *space, *name;
326 const cpp_token *s;
328 space = name = (const unsigned char *) "";
329 s = cpp_get_token (pfile);
330 if (s->type != CPP_EOF)
332 space = cpp_token_as_text (pfile, s);
333 s = cpp_get_token (pfile);
334 if (s->type == CPP_NAME)
335 name = cpp_token_as_text (pfile, s);
338 lineno = SOURCE_LINE (map, line);
339 warning ("ignoring #pragma %s %s", space, name);
343 /* #define callback for DWARF and DWARF2 debug info. */
344 static void
345 cb_define (pfile, line, node)
346 cpp_reader *pfile;
347 unsigned int line;
348 cpp_hashnode *node;
350 (*debug_hooks->define) (SOURCE_LINE (map, line),
351 (const char *) cpp_macro_definition (pfile, node));
354 /* #undef callback for DWARF and DWARF2 debug info. */
355 static void
356 cb_undef (pfile, line, node)
357 cpp_reader *pfile ATTRIBUTE_UNUSED;
358 unsigned int line;
359 cpp_hashnode *node;
361 (*debug_hooks->undef) (SOURCE_LINE (map, line),
362 (const char *) NODE_NAME (node));
365 #if 0 /* not yet */
366 /* Returns nonzero if C is a universal-character-name. Give an error if it
367 is not one which may appear in an identifier, as per [extendid].
369 Note that extended character support in identifiers has not yet been
370 implemented. It is my personal opinion that this is not a desirable
371 feature. Portable code cannot count on support for more than the basic
372 identifier character set. */
374 static inline int
375 is_extended_char (c)
376 int c;
378 #ifdef TARGET_EBCDIC
379 return 0;
380 #else
381 /* ASCII. */
382 if (c < 0x7f)
383 return 0;
385 /* None of the valid chars are outside the Basic Multilingual Plane (the
386 low 16 bits). */
387 if (c > 0xffff)
389 error ("universal-character-name '\\U%08x' not valid in identifier", c);
390 return 1;
393 /* Latin */
394 if ((c >= 0x00c0 && c <= 0x00d6)
395 || (c >= 0x00d8 && c <= 0x00f6)
396 || (c >= 0x00f8 && c <= 0x01f5)
397 || (c >= 0x01fa && c <= 0x0217)
398 || (c >= 0x0250 && c <= 0x02a8)
399 || (c >= 0x1e00 && c <= 0x1e9a)
400 || (c >= 0x1ea0 && c <= 0x1ef9))
401 return 1;
403 /* Greek */
404 if ((c == 0x0384)
405 || (c >= 0x0388 && c <= 0x038a)
406 || (c == 0x038c)
407 || (c >= 0x038e && c <= 0x03a1)
408 || (c >= 0x03a3 && c <= 0x03ce)
409 || (c >= 0x03d0 && c <= 0x03d6)
410 || (c == 0x03da)
411 || (c == 0x03dc)
412 || (c == 0x03de)
413 || (c == 0x03e0)
414 || (c >= 0x03e2 && c <= 0x03f3)
415 || (c >= 0x1f00 && c <= 0x1f15)
416 || (c >= 0x1f18 && c <= 0x1f1d)
417 || (c >= 0x1f20 && c <= 0x1f45)
418 || (c >= 0x1f48 && c <= 0x1f4d)
419 || (c >= 0x1f50 && c <= 0x1f57)
420 || (c == 0x1f59)
421 || (c == 0x1f5b)
422 || (c == 0x1f5d)
423 || (c >= 0x1f5f && c <= 0x1f7d)
424 || (c >= 0x1f80 && c <= 0x1fb4)
425 || (c >= 0x1fb6 && c <= 0x1fbc)
426 || (c >= 0x1fc2 && c <= 0x1fc4)
427 || (c >= 0x1fc6 && c <= 0x1fcc)
428 || (c >= 0x1fd0 && c <= 0x1fd3)
429 || (c >= 0x1fd6 && c <= 0x1fdb)
430 || (c >= 0x1fe0 && c <= 0x1fec)
431 || (c >= 0x1ff2 && c <= 0x1ff4)
432 || (c >= 0x1ff6 && c <= 0x1ffc))
433 return 1;
435 /* Cyrillic */
436 if ((c >= 0x0401 && c <= 0x040d)
437 || (c >= 0x040f && c <= 0x044f)
438 || (c >= 0x0451 && c <= 0x045c)
439 || (c >= 0x045e && c <= 0x0481)
440 || (c >= 0x0490 && c <= 0x04c4)
441 || (c >= 0x04c7 && c <= 0x04c8)
442 || (c >= 0x04cb && c <= 0x04cc)
443 || (c >= 0x04d0 && c <= 0x04eb)
444 || (c >= 0x04ee && c <= 0x04f5)
445 || (c >= 0x04f8 && c <= 0x04f9))
446 return 1;
448 /* Armenian */
449 if ((c >= 0x0531 && c <= 0x0556)
450 || (c >= 0x0561 && c <= 0x0587))
451 return 1;
453 /* Hebrew */
454 if ((c >= 0x05d0 && c <= 0x05ea)
455 || (c >= 0x05f0 && c <= 0x05f4))
456 return 1;
458 /* Arabic */
459 if ((c >= 0x0621 && c <= 0x063a)
460 || (c >= 0x0640 && c <= 0x0652)
461 || (c >= 0x0670 && c <= 0x06b7)
462 || (c >= 0x06ba && c <= 0x06be)
463 || (c >= 0x06c0 && c <= 0x06ce)
464 || (c >= 0x06e5 && c <= 0x06e7))
465 return 1;
467 /* Devanagari */
468 if ((c >= 0x0905 && c <= 0x0939)
469 || (c >= 0x0958 && c <= 0x0962))
470 return 1;
472 /* Bengali */
473 if ((c >= 0x0985 && c <= 0x098c)
474 || (c >= 0x098f && c <= 0x0990)
475 || (c >= 0x0993 && c <= 0x09a8)
476 || (c >= 0x09aa && c <= 0x09b0)
477 || (c == 0x09b2)
478 || (c >= 0x09b6 && c <= 0x09b9)
479 || (c >= 0x09dc && c <= 0x09dd)
480 || (c >= 0x09df && c <= 0x09e1)
481 || (c >= 0x09f0 && c <= 0x09f1))
482 return 1;
484 /* Gurmukhi */
485 if ((c >= 0x0a05 && c <= 0x0a0a)
486 || (c >= 0x0a0f && c <= 0x0a10)
487 || (c >= 0x0a13 && c <= 0x0a28)
488 || (c >= 0x0a2a && c <= 0x0a30)
489 || (c >= 0x0a32 && c <= 0x0a33)
490 || (c >= 0x0a35 && c <= 0x0a36)
491 || (c >= 0x0a38 && c <= 0x0a39)
492 || (c >= 0x0a59 && c <= 0x0a5c)
493 || (c == 0x0a5e))
494 return 1;
496 /* Gujarati */
497 if ((c >= 0x0a85 && c <= 0x0a8b)
498 || (c == 0x0a8d)
499 || (c >= 0x0a8f && c <= 0x0a91)
500 || (c >= 0x0a93 && c <= 0x0aa8)
501 || (c >= 0x0aaa && c <= 0x0ab0)
502 || (c >= 0x0ab2 && c <= 0x0ab3)
503 || (c >= 0x0ab5 && c <= 0x0ab9)
504 || (c == 0x0ae0))
505 return 1;
507 /* Oriya */
508 if ((c >= 0x0b05 && c <= 0x0b0c)
509 || (c >= 0x0b0f && c <= 0x0b10)
510 || (c >= 0x0b13 && c <= 0x0b28)
511 || (c >= 0x0b2a && c <= 0x0b30)
512 || (c >= 0x0b32 && c <= 0x0b33)
513 || (c >= 0x0b36 && c <= 0x0b39)
514 || (c >= 0x0b5c && c <= 0x0b5d)
515 || (c >= 0x0b5f && c <= 0x0b61))
516 return 1;
518 /* Tamil */
519 if ((c >= 0x0b85 && c <= 0x0b8a)
520 || (c >= 0x0b8e && c <= 0x0b90)
521 || (c >= 0x0b92 && c <= 0x0b95)
522 || (c >= 0x0b99 && c <= 0x0b9a)
523 || (c == 0x0b9c)
524 || (c >= 0x0b9e && c <= 0x0b9f)
525 || (c >= 0x0ba3 && c <= 0x0ba4)
526 || (c >= 0x0ba8 && c <= 0x0baa)
527 || (c >= 0x0bae && c <= 0x0bb5)
528 || (c >= 0x0bb7 && c <= 0x0bb9))
529 return 1;
531 /* Telugu */
532 if ((c >= 0x0c05 && c <= 0x0c0c)
533 || (c >= 0x0c0e && c <= 0x0c10)
534 || (c >= 0x0c12 && c <= 0x0c28)
535 || (c >= 0x0c2a && c <= 0x0c33)
536 || (c >= 0x0c35 && c <= 0x0c39)
537 || (c >= 0x0c60 && c <= 0x0c61))
538 return 1;
540 /* Kannada */
541 if ((c >= 0x0c85 && c <= 0x0c8c)
542 || (c >= 0x0c8e && c <= 0x0c90)
543 || (c >= 0x0c92 && c <= 0x0ca8)
544 || (c >= 0x0caa && c <= 0x0cb3)
545 || (c >= 0x0cb5 && c <= 0x0cb9)
546 || (c >= 0x0ce0 && c <= 0x0ce1))
547 return 1;
549 /* Malayalam */
550 if ((c >= 0x0d05 && c <= 0x0d0c)
551 || (c >= 0x0d0e && c <= 0x0d10)
552 || (c >= 0x0d12 && c <= 0x0d28)
553 || (c >= 0x0d2a && c <= 0x0d39)
554 || (c >= 0x0d60 && c <= 0x0d61))
555 return 1;
557 /* Thai */
558 if ((c >= 0x0e01 && c <= 0x0e30)
559 || (c >= 0x0e32 && c <= 0x0e33)
560 || (c >= 0x0e40 && c <= 0x0e46)
561 || (c >= 0x0e4f && c <= 0x0e5b))
562 return 1;
564 /* Lao */
565 if ((c >= 0x0e81 && c <= 0x0e82)
566 || (c == 0x0e84)
567 || (c == 0x0e87)
568 || (c == 0x0e88)
569 || (c == 0x0e8a)
570 || (c == 0x0e0d)
571 || (c >= 0x0e94 && c <= 0x0e97)
572 || (c >= 0x0e99 && c <= 0x0e9f)
573 || (c >= 0x0ea1 && c <= 0x0ea3)
574 || (c == 0x0ea5)
575 || (c == 0x0ea7)
576 || (c == 0x0eaa)
577 || (c == 0x0eab)
578 || (c >= 0x0ead && c <= 0x0eb0)
579 || (c == 0x0eb2)
580 || (c == 0x0eb3)
581 || (c == 0x0ebd)
582 || (c >= 0x0ec0 && c <= 0x0ec4)
583 || (c == 0x0ec6))
584 return 1;
586 /* Georgian */
587 if ((c >= 0x10a0 && c <= 0x10c5)
588 || (c >= 0x10d0 && c <= 0x10f6))
589 return 1;
591 /* Hiragana */
592 if ((c >= 0x3041 && c <= 0x3094)
593 || (c >= 0x309b && c <= 0x309e))
594 return 1;
596 /* Katakana */
597 if ((c >= 0x30a1 && c <= 0x30fe))
598 return 1;
600 /* Bopmofo */
601 if ((c >= 0x3105 && c <= 0x312c))
602 return 1;
604 /* Hangul */
605 if ((c >= 0x1100 && c <= 0x1159)
606 || (c >= 0x1161 && c <= 0x11a2)
607 || (c >= 0x11a8 && c <= 0x11f9))
608 return 1;
610 /* CJK Unified Ideographs */
611 if ((c >= 0xf900 && c <= 0xfa2d)
612 || (c >= 0xfb1f && c <= 0xfb36)
613 || (c >= 0xfb38 && c <= 0xfb3c)
614 || (c == 0xfb3e)
615 || (c >= 0xfb40 && c <= 0xfb41)
616 || (c >= 0xfb42 && c <= 0xfb44)
617 || (c >= 0xfb46 && c <= 0xfbb1)
618 || (c >= 0xfbd3 && c <= 0xfd3f)
619 || (c >= 0xfd50 && c <= 0xfd8f)
620 || (c >= 0xfd92 && c <= 0xfdc7)
621 || (c >= 0xfdf0 && c <= 0xfdfb)
622 || (c >= 0xfe70 && c <= 0xfe72)
623 || (c == 0xfe74)
624 || (c >= 0xfe76 && c <= 0xfefc)
625 || (c >= 0xff21 && c <= 0xff3a)
626 || (c >= 0xff41 && c <= 0xff5a)
627 || (c >= 0xff66 && c <= 0xffbe)
628 || (c >= 0xffc2 && c <= 0xffc7)
629 || (c >= 0xffca && c <= 0xffcf)
630 || (c >= 0xffd2 && c <= 0xffd7)
631 || (c >= 0xffda && c <= 0xffdc)
632 || (c >= 0x4e00 && c <= 0x9fa5))
633 return 1;
635 error ("universal-character-name '\\u%04x' not valid in identifier", c);
636 return 1;
637 #endif
640 /* Add the UTF-8 representation of C to the token_buffer. */
642 static void
643 utf8_extend_token (c)
644 int c;
646 int shift, mask;
648 if (c <= 0x0000007f)
650 extend_token (c);
651 return;
653 else if (c <= 0x000007ff)
654 shift = 6, mask = 0xc0;
655 else if (c <= 0x0000ffff)
656 shift = 12, mask = 0xe0;
657 else if (c <= 0x001fffff)
658 shift = 18, mask = 0xf0;
659 else if (c <= 0x03ffffff)
660 shift = 24, mask = 0xf8;
661 else
662 shift = 30, mask = 0xfc;
664 extend_token (mask | (c >> shift));
667 shift -= 6;
668 extend_token ((unsigned char) (0x80 | (c >> shift)));
670 while (shift);
672 #endif
675 c_lex (value)
676 tree *value;
678 const cpp_token *tok;
680 retry:
681 timevar_push (TV_CPP);
683 tok = cpp_get_token (parse_in);
684 while (tok->type == CPP_PADDING);
685 timevar_pop (TV_CPP);
687 /* The C++ front end does horrible things with the current line
688 number. To ensure an accurate line number, we must reset it
689 every time we return a token. */
690 lineno = src_lineno;
692 *value = NULL_TREE;
693 switch (tok->type)
695 /* Issue this error here, where we can get at tok->val.c. */
696 case CPP_OTHER:
697 if (ISGRAPH (tok->val.c))
698 error ("stray '%c' in program", tok->val.c);
699 else
700 error ("stray '\\%o' in program", tok->val.c);
701 goto retry;
703 case CPP_NAME:
704 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
705 break;
707 case CPP_NUMBER:
709 unsigned int flags = cpp_classify_number (parse_in, tok);
711 switch (flags & CPP_N_CATEGORY)
713 case CPP_N_INVALID:
714 /* cpplib has issued an error. */
715 *value = error_mark_node;
716 break;
718 case CPP_N_INTEGER:
719 *value = interpret_integer (tok, flags);
720 break;
722 case CPP_N_FLOATING:
723 *value = interpret_float (tok, flags);
724 break;
726 default:
727 abort ();
730 break;
732 case CPP_CHAR:
733 case CPP_WCHAR:
734 *value = lex_charconst (tok);
735 break;
737 case CPP_STRING:
738 case CPP_WSTRING:
739 *value = lex_string (tok->val.str.text, tok->val.str.len,
740 tok->type == CPP_WSTRING);
741 break;
743 /* These tokens should not be visible outside cpplib. */
744 case CPP_HEADER_NAME:
745 case CPP_COMMENT:
746 case CPP_MACRO_ARG:
747 abort ();
749 default: break;
752 return tok->type;
755 /* Returns the narrowest C-visible unsigned type, starting with the
756 minimum specified by FLAGS, that can fit VALUE, or itk_none if
757 there isn't one. */
758 static enum integer_type_kind
759 narrowest_unsigned_type (value, flags)
760 tree value;
761 unsigned int flags;
763 enum integer_type_kind itk;
765 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
766 itk = itk_unsigned_int;
767 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
768 itk = itk_unsigned_long;
769 else
770 itk = itk_unsigned_long_long;
772 /* int_fits_type_p must think the type of its first argument is
773 wider than its second argument, or it won't do the proper check. */
774 TREE_TYPE (value) = widest_unsigned_literal_type_node;
776 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
777 if (int_fits_type_p (value, integer_types[itk]))
778 return itk;
780 return itk_none;
783 /* Ditto, but narrowest signed type. */
784 static enum integer_type_kind
785 narrowest_signed_type (value, flags)
786 tree value;
787 unsigned int flags;
789 enum integer_type_kind itk;
791 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
792 itk = itk_int;
793 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
794 itk = itk_long;
795 else
796 itk = itk_long_long;
798 /* int_fits_type_p must think the type of its first argument is
799 wider than its second argument, or it won't do the proper check. */
800 TREE_TYPE (value) = widest_unsigned_literal_type_node;
802 for (; itk < itk_none; itk += 2 /* skip signed types */)
803 if (int_fits_type_p (value, integer_types[itk]))
804 return itk;
806 return itk_none;
809 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
810 static tree
811 interpret_integer (token, flags)
812 const cpp_token *token;
813 unsigned int flags;
815 tree value, type;
816 enum integer_type_kind itk;
817 cpp_num integer;
818 cpp_options *options = cpp_get_options (parse_in);
820 integer = cpp_interpret_integer (parse_in, token, flags);
821 integer = cpp_num_sign_extend (integer, options->precision);
822 value = build_int_2_wide (integer.low, integer.high);
824 /* The type of a constant with a U suffix is straightforward. */
825 if (flags & CPP_N_UNSIGNED)
826 itk = narrowest_unsigned_type (value, flags);
827 else
829 /* The type of a potentially-signed integer constant varies
830 depending on the base it's in, the standard in use, and the
831 length suffixes. */
832 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
833 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
835 /* In both C89 and C99, octal and hex constants may be signed or
836 unsigned, whichever fits tighter. We do not warn about this
837 choice differing from the traditional choice, as the constant
838 is probably a bit pattern and either way will work. */
839 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
840 itk = MIN (itk_u, itk_s);
841 else
843 /* In C99, decimal constants are always signed.
844 In C89, decimal constants that don't fit in long have
845 undefined behavior; we try to make them unsigned long.
846 In GCC's extended C89, that last is true of decimal
847 constants that don't fit in long long, too. */
849 itk = itk_s;
850 if (itk_s > itk_u && itk_s > itk_long)
852 if (!flag_isoc99)
854 if (itk_u < itk_unsigned_long)
855 itk_u = itk_unsigned_long;
856 itk = itk_u;
857 warning ("this decimal constant is unsigned only in ISO C90");
859 else if (warn_traditional)
860 warning ("this decimal constant would be unsigned in ISO C90");
865 if (itk == itk_none)
866 /* cpplib has already issued a warning for overflow. */
867 type = ((flags & CPP_N_UNSIGNED)
868 ? widest_unsigned_literal_type_node
869 : widest_integer_literal_type_node);
870 else
871 type = integer_types[itk];
873 if (itk > itk_unsigned_long
874 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
875 && ! in_system_header && ! flag_isoc99)
876 pedwarn ("integer constant is too large for \"%s\" type",
877 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
879 TREE_TYPE (value) = type;
881 /* Convert imaginary to a complex type. */
882 if (flags & CPP_N_IMAGINARY)
883 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
885 return value;
888 /* Interpret TOKEN, a floating point number with FLAGS as classified
889 by cpplib. */
890 static tree
891 interpret_float (token, flags)
892 const cpp_token *token;
893 unsigned int flags;
895 tree type;
896 tree value;
897 REAL_VALUE_TYPE real;
898 char *copy;
899 size_t copylen;
900 const char *typename;
902 /* FIXME: make %T work in error/warning, then we don't need typename. */
903 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
905 type = long_double_type_node;
906 typename = "long double";
908 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
909 || flag_single_precision_constant)
911 type = float_type_node;
912 typename = "float";
914 else
916 type = double_type_node;
917 typename = "double";
920 /* Copy the constant to a nul-terminated buffer. If the constant
921 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
922 can't handle them. */
923 copylen = token->val.str.len;
924 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
925 /* Must be an F or L suffix. */
926 copylen--;
927 if (flags & CPP_N_IMAGINARY)
928 /* I or J suffix. */
929 copylen--;
931 copy = alloca (copylen + 1);
932 memcpy (copy, token->val.str.text, copylen);
933 copy[copylen] = '\0';
935 real_from_string (&real, copy);
936 real_convert (&real, TYPE_MODE (type), &real);
938 /* A diagnostic is required for "soft" overflow by some ISO C
939 testsuites. This is not pedwarn, because some people don't want
940 an error for this.
941 ??? That's a dubious reason... is this a mandatory diagnostic or
942 isn't it? -- zw, 2001-08-21. */
943 if (REAL_VALUE_ISINF (real) && pedantic)
944 warning ("floating constant exceeds range of \"%s\"", typename);
946 /* Create a node with determined type and value. */
947 value = build_real (type, real);
948 if (flags & CPP_N_IMAGINARY)
949 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
951 return value;
954 static tree
955 lex_string (str, len, wide)
956 const unsigned char *str;
957 unsigned int len;
958 int wide;
960 tree value;
961 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
962 char *q = buf;
963 const unsigned char *p = str, *limit = str + len;
964 cppchar_t c;
966 #ifdef MULTIBYTE_CHARS
967 /* Reset multibyte conversion state. */
968 (void) local_mbtowc (NULL, NULL, 0);
969 #endif
971 while (p < limit)
973 #ifdef MULTIBYTE_CHARS
974 wchar_t wc;
975 int char_len;
977 char_len = local_mbtowc (&wc, (const char *) p, limit - p);
978 if (char_len == -1)
980 warning ("ignoring invalid multibyte character");
981 char_len = 1;
982 c = *p++;
984 else
986 p += char_len;
987 c = wc;
989 #else
990 c = *p++;
991 #endif
993 if (c == '\\' && !ignore_escape_flag)
994 c = cpp_parse_escape (parse_in, &p, limit, wide);
996 /* Add this single character into the buffer either as a wchar_t,
997 a multibyte sequence, or as a single byte. */
998 if (wide)
1000 unsigned charwidth = TYPE_PRECISION (char_type_node);
1001 unsigned bytemask = (1 << charwidth) - 1;
1002 int byte;
1004 for (byte = 0; byte < WCHAR_BYTES; ++byte)
1006 int n;
1007 if (byte >= (int) sizeof (c))
1008 n = 0;
1009 else
1010 n = (c >> (byte * charwidth)) & bytemask;
1011 if (BYTES_BIG_ENDIAN)
1012 q[WCHAR_BYTES - byte - 1] = n;
1013 else
1014 q[byte] = n;
1016 q += WCHAR_BYTES;
1018 #ifdef MULTIBYTE_CHARS
1019 else if (char_len > 1)
1021 /* We're dealing with a multibyte character. */
1022 for ( ; char_len >0; --char_len)
1024 *q++ = *(p - char_len);
1027 #endif
1028 else
1030 *q++ = c;
1034 /* Terminate the string value, either with a single byte zero
1035 or with a wide zero. */
1037 if (wide)
1039 memset (q, 0, WCHAR_BYTES);
1040 q += WCHAR_BYTES;
1042 else
1044 *q++ = '\0';
1047 value = build_string (q - buf, buf);
1049 if (wide)
1050 TREE_TYPE (value) = wchar_array_type_node;
1051 else
1052 TREE_TYPE (value) = char_array_type_node;
1053 return value;
1056 /* Converts a (possibly wide) character constant token into a tree. */
1057 static tree
1058 lex_charconst (token)
1059 const cpp_token *token;
1061 cppchar_t result;
1062 tree type, value;
1063 unsigned int chars_seen;
1064 int unsignedp;
1066 result = cpp_interpret_charconst (parse_in, token,
1067 &chars_seen, &unsignedp);
1069 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
1070 before possibly widening to HOST_WIDE_INT for build_int_2. */
1071 if (unsignedp || (cppchar_signed_t) result >= 0)
1072 value = build_int_2 (result, 0);
1073 else
1074 value = build_int_2 ((cppchar_signed_t) result, -1);
1076 if (token->type == CPP_WCHAR)
1077 type = wchar_type_node;
1078 /* In C, a character constant has type 'int'.
1079 In C++ 'char', but multi-char charconsts have type 'int'. */
1080 else if ((c_language == clk_c) || chars_seen > 1)
1081 type = integer_type_node;
1082 else
1083 type = char_type_node;
1085 TREE_TYPE (value) = type;
1086 return value;