2003-03-10 Aldy Hernandez <aldyh@redhat.com>
[official-gcc.git] / gcc / c-lex.c
blob9e97ad76b61675517fc4892139dbd829f392fe6a
1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
27 #include "real.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "expr.h"
31 #include "input.h"
32 #include "output.h"
33 #include "c-tree.h"
34 #include "c-common.h"
35 #include "flags.h"
36 #include "timevar.h"
37 #include "cpplib.h"
38 #include "c-pragma.h"
39 #include "toplev.h"
40 #include "intl.h"
41 #include "tm_p.h"
42 #include "splay-tree.h"
43 #include "debug.h"
45 #ifdef MULTIBYTE_CHARS
46 #include "mbchar.h"
47 #include <locale.h>
48 #endif /* MULTIBYTE_CHARS */
50 /* The current line map. */
51 static const struct line_map *map;
53 /* The line used to refresh the lineno global variable after each token. */
54 static unsigned int src_lineno;
56 /* We may keep statistics about how long which files took to compile. */
57 static int header_time, body_time;
58 static splay_tree file_info_tree;
60 /* File used for outputting assembler code. */
61 extern FILE *asm_out_file;
63 #undef WCHAR_TYPE_SIZE
64 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
66 /* Number of bytes in a wide character. */
67 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
69 int pending_lang_change; /* If we need to switch languages - C++ only */
70 int c_header_level; /* depth in C headers - C++ only */
72 /* Nonzero tells yylex to ignore \ in string constants. */
73 static int ignore_escape_flag;
75 static tree interpret_integer PARAMS ((const cpp_token *, unsigned int));
76 static tree interpret_float PARAMS ((const cpp_token *, unsigned int));
77 static enum integer_type_kind
78 narrowest_unsigned_type PARAMS ((tree, unsigned int));
79 static enum integer_type_kind
80 narrowest_signed_type PARAMS ((tree, unsigned int));
81 static tree lex_string PARAMS ((const unsigned char *, unsigned int,
82 int));
83 static tree lex_charconst PARAMS ((const cpp_token *));
84 static void update_header_times PARAMS ((const char *));
85 static int dump_one_header PARAMS ((splay_tree_node, void *));
86 static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int));
87 static void cb_ident PARAMS ((cpp_reader *, unsigned int,
88 const cpp_string *));
89 static void cb_file_change PARAMS ((cpp_reader *, const struct line_map *));
90 static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int));
91 static void cb_define PARAMS ((cpp_reader *, unsigned int,
92 cpp_hashnode *));
93 static void cb_undef PARAMS ((cpp_reader *, unsigned int,
94 cpp_hashnode *));
96 void
97 init_c_lex ()
99 struct cpp_callbacks *cb;
100 struct c_fileinfo *toplevel;
102 /* Set up filename timing. Must happen before cpp_read_main_file. */
103 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
105 (splay_tree_delete_value_fn)free);
106 toplevel = get_fileinfo ("<top level>");
107 if (flag_detailed_statistics)
109 header_time = 0;
110 body_time = get_run_time ();
111 toplevel->time = body_time;
114 cb = cpp_get_callbacks (parse_in);
116 cb->register_builtins = cb_register_builtins;
117 cb->line_change = cb_line_change;
118 cb->ident = cb_ident;
119 cb->file_change = cb_file_change;
120 cb->def_pragma = cb_def_pragma;
121 cb->valid_pch = c_common_valid_pch;
122 cb->read_pch = c_common_read_pch;
124 /* Set the debug callbacks if we can use them. */
125 if (debug_info_level == DINFO_LEVEL_VERBOSE
126 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
127 || write_symbols == VMS_AND_DWARF2_DEBUG))
129 cb->define = cb_define;
130 cb->undef = cb_undef;
134 /* A thin wrapper around the real parser that initializes the
135 integrated preprocessor after debug output has been initialized.
136 Also, make sure the start_source_file debug hook gets called for
137 the primary source file. */
139 void
140 c_common_parse_file (set_yydebug)
141 int set_yydebug ATTRIBUTE_UNUSED;
143 #if YYDEBUG != 0
144 yydebug = set_yydebug;
145 #else
146 warning ("YYDEBUG not defined");
147 #endif
149 (*debug_hooks->start_source_file) (lineno, input_filename);
150 cpp_finish_options (parse_in);
152 pch_init();
154 yyparse ();
155 free_parser_stacks ();
158 struct c_fileinfo *
159 get_fileinfo (name)
160 const char *name;
162 splay_tree_node n;
163 struct c_fileinfo *fi;
165 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
166 if (n)
167 return (struct c_fileinfo *) n->value;
169 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
170 fi->time = 0;
171 fi->interface_only = 0;
172 fi->interface_unknown = 1;
173 splay_tree_insert (file_info_tree, (splay_tree_key) name,
174 (splay_tree_value) fi);
175 return fi;
178 static void
179 update_header_times (name)
180 const char *name;
182 /* Changing files again. This means currently collected time
183 is charged against header time, and body time starts back at 0. */
184 if (flag_detailed_statistics)
186 int this_time = get_run_time ();
187 struct c_fileinfo *file = get_fileinfo (name);
188 header_time += this_time - body_time;
189 file->time += this_time - body_time;
190 body_time = this_time;
194 static int
195 dump_one_header (n, dummy)
196 splay_tree_node n;
197 void *dummy ATTRIBUTE_UNUSED;
199 print_time ((const char *) n->key,
200 ((struct c_fileinfo *) n->value)->time);
201 return 0;
204 void
205 dump_time_statistics ()
207 struct c_fileinfo *file = get_fileinfo (input_filename);
208 int this_time = get_run_time ();
209 file->time += this_time - body_time;
211 fprintf (stderr, "\n******\n");
212 print_time ("header files (total)", header_time);
213 print_time ("main file (total)", this_time - body_time);
214 fprintf (stderr, "ratio = %g : 1\n",
215 (double)header_time / (double)(this_time - body_time));
216 fprintf (stderr, "\n******\n");
218 splay_tree_foreach (file_info_tree, dump_one_header, 0);
221 static void
222 cb_ident (pfile, line, str)
223 cpp_reader *pfile ATTRIBUTE_UNUSED;
224 unsigned int line ATTRIBUTE_UNUSED;
225 const cpp_string *str ATTRIBUTE_UNUSED;
227 #ifdef ASM_OUTPUT_IDENT
228 if (! flag_no_ident)
230 /* Convert escapes in the string. */
231 tree value ATTRIBUTE_UNUSED = lex_string (str->text, str->len, 0);
232 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
234 #endif
237 /* Called at the start of every non-empty line. TOKEN is the first
238 lexed token on the line. Used for diagnostic line numbers. */
239 static void
240 cb_line_change (pfile, token, parsing_args)
241 cpp_reader *pfile ATTRIBUTE_UNUSED;
242 const cpp_token *token;
243 int parsing_args ATTRIBUTE_UNUSED;
245 src_lineno = SOURCE_LINE (map, token->line);
248 static void
249 cb_file_change (pfile, new_map)
250 cpp_reader *pfile ATTRIBUTE_UNUSED;
251 const struct line_map *new_map;
253 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
255 if (new_map->reason == LC_ENTER)
257 /* Don't stack the main buffer on the input stack;
258 we already did in compile_file. */
259 if (map == NULL)
260 main_input_filename = new_map->to_file;
261 else
263 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
265 lineno = included_at;
266 push_srcloc (new_map->to_file, 1);
267 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
268 #ifndef NO_IMPLICIT_EXTERN_C
269 if (c_header_level)
270 ++c_header_level;
271 else if (new_map->sysp == 2)
273 c_header_level = 1;
274 ++pending_lang_change;
276 #endif
279 else if (new_map->reason == LC_LEAVE)
281 #ifndef NO_IMPLICIT_EXTERN_C
282 if (c_header_level && --c_header_level == 0)
284 if (new_map->sysp == 2)
285 warning ("badly nested C headers from preprocessor");
286 --pending_lang_change;
288 #endif
289 pop_srcloc ();
291 (*debug_hooks->end_source_file) (to_line);
294 update_header_times (new_map->to_file);
295 in_system_header = new_map->sysp != 0;
296 input_filename = new_map->to_file;
297 lineno = to_line;
298 map = new_map;
300 /* Hook for C++. */
301 extract_interface_info ();
304 static void
305 cb_def_pragma (pfile, line)
306 cpp_reader *pfile;
307 unsigned int line;
309 /* Issue a warning message if we have been asked to do so. Ignore
310 unknown pragmas in system headers unless an explicit
311 -Wunknown-pragmas has been given. */
312 if (warn_unknown_pragmas > in_system_header)
314 const unsigned char *space, *name;
315 const cpp_token *s;
317 space = name = (const unsigned char *) "";
318 s = cpp_get_token (pfile);
319 if (s->type != CPP_EOF)
321 space = cpp_token_as_text (pfile, s);
322 s = cpp_get_token (pfile);
323 if (s->type == CPP_NAME)
324 name = cpp_token_as_text (pfile, s);
327 lineno = SOURCE_LINE (map, line);
328 warning ("ignoring #pragma %s %s", space, name);
332 /* #define callback for DWARF and DWARF2 debug info. */
333 static void
334 cb_define (pfile, line, node)
335 cpp_reader *pfile;
336 unsigned int line;
337 cpp_hashnode *node;
339 (*debug_hooks->define) (SOURCE_LINE (map, line),
340 (const char *) cpp_macro_definition (pfile, node));
343 /* #undef callback for DWARF and DWARF2 debug info. */
344 static void
345 cb_undef (pfile, line, node)
346 cpp_reader *pfile ATTRIBUTE_UNUSED;
347 unsigned int line;
348 cpp_hashnode *node;
350 (*debug_hooks->undef) (SOURCE_LINE (map, line),
351 (const char *) NODE_NAME (node));
354 #if 0 /* not yet */
355 /* Returns nonzero if C is a universal-character-name. Give an error if it
356 is not one which may appear in an identifier, as per [extendid].
358 Note that extended character support in identifiers has not yet been
359 implemented. It is my personal opinion that this is not a desirable
360 feature. Portable code cannot count on support for more than the basic
361 identifier character set. */
363 static inline int
364 is_extended_char (c)
365 int c;
367 #ifdef TARGET_EBCDIC
368 return 0;
369 #else
370 /* ASCII. */
371 if (c < 0x7f)
372 return 0;
374 /* None of the valid chars are outside the Basic Multilingual Plane (the
375 low 16 bits). */
376 if (c > 0xffff)
378 error ("universal-character-name '\\U%08x' not valid in identifier", c);
379 return 1;
382 /* Latin */
383 if ((c >= 0x00c0 && c <= 0x00d6)
384 || (c >= 0x00d8 && c <= 0x00f6)
385 || (c >= 0x00f8 && c <= 0x01f5)
386 || (c >= 0x01fa && c <= 0x0217)
387 || (c >= 0x0250 && c <= 0x02a8)
388 || (c >= 0x1e00 && c <= 0x1e9a)
389 || (c >= 0x1ea0 && c <= 0x1ef9))
390 return 1;
392 /* Greek */
393 if ((c == 0x0384)
394 || (c >= 0x0388 && c <= 0x038a)
395 || (c == 0x038c)
396 || (c >= 0x038e && c <= 0x03a1)
397 || (c >= 0x03a3 && c <= 0x03ce)
398 || (c >= 0x03d0 && c <= 0x03d6)
399 || (c == 0x03da)
400 || (c == 0x03dc)
401 || (c == 0x03de)
402 || (c == 0x03e0)
403 || (c >= 0x03e2 && c <= 0x03f3)
404 || (c >= 0x1f00 && c <= 0x1f15)
405 || (c >= 0x1f18 && c <= 0x1f1d)
406 || (c >= 0x1f20 && c <= 0x1f45)
407 || (c >= 0x1f48 && c <= 0x1f4d)
408 || (c >= 0x1f50 && c <= 0x1f57)
409 || (c == 0x1f59)
410 || (c == 0x1f5b)
411 || (c == 0x1f5d)
412 || (c >= 0x1f5f && c <= 0x1f7d)
413 || (c >= 0x1f80 && c <= 0x1fb4)
414 || (c >= 0x1fb6 && c <= 0x1fbc)
415 || (c >= 0x1fc2 && c <= 0x1fc4)
416 || (c >= 0x1fc6 && c <= 0x1fcc)
417 || (c >= 0x1fd0 && c <= 0x1fd3)
418 || (c >= 0x1fd6 && c <= 0x1fdb)
419 || (c >= 0x1fe0 && c <= 0x1fec)
420 || (c >= 0x1ff2 && c <= 0x1ff4)
421 || (c >= 0x1ff6 && c <= 0x1ffc))
422 return 1;
424 /* Cyrillic */
425 if ((c >= 0x0401 && c <= 0x040d)
426 || (c >= 0x040f && c <= 0x044f)
427 || (c >= 0x0451 && c <= 0x045c)
428 || (c >= 0x045e && c <= 0x0481)
429 || (c >= 0x0490 && c <= 0x04c4)
430 || (c >= 0x04c7 && c <= 0x04c8)
431 || (c >= 0x04cb && c <= 0x04cc)
432 || (c >= 0x04d0 && c <= 0x04eb)
433 || (c >= 0x04ee && c <= 0x04f5)
434 || (c >= 0x04f8 && c <= 0x04f9))
435 return 1;
437 /* Armenian */
438 if ((c >= 0x0531 && c <= 0x0556)
439 || (c >= 0x0561 && c <= 0x0587))
440 return 1;
442 /* Hebrew */
443 if ((c >= 0x05d0 && c <= 0x05ea)
444 || (c >= 0x05f0 && c <= 0x05f4))
445 return 1;
447 /* Arabic */
448 if ((c >= 0x0621 && c <= 0x063a)
449 || (c >= 0x0640 && c <= 0x0652)
450 || (c >= 0x0670 && c <= 0x06b7)
451 || (c >= 0x06ba && c <= 0x06be)
452 || (c >= 0x06c0 && c <= 0x06ce)
453 || (c >= 0x06e5 && c <= 0x06e7))
454 return 1;
456 /* Devanagari */
457 if ((c >= 0x0905 && c <= 0x0939)
458 || (c >= 0x0958 && c <= 0x0962))
459 return 1;
461 /* Bengali */
462 if ((c >= 0x0985 && c <= 0x098c)
463 || (c >= 0x098f && c <= 0x0990)
464 || (c >= 0x0993 && c <= 0x09a8)
465 || (c >= 0x09aa && c <= 0x09b0)
466 || (c == 0x09b2)
467 || (c >= 0x09b6 && c <= 0x09b9)
468 || (c >= 0x09dc && c <= 0x09dd)
469 || (c >= 0x09df && c <= 0x09e1)
470 || (c >= 0x09f0 && c <= 0x09f1))
471 return 1;
473 /* Gurmukhi */
474 if ((c >= 0x0a05 && c <= 0x0a0a)
475 || (c >= 0x0a0f && c <= 0x0a10)
476 || (c >= 0x0a13 && c <= 0x0a28)
477 || (c >= 0x0a2a && c <= 0x0a30)
478 || (c >= 0x0a32 && c <= 0x0a33)
479 || (c >= 0x0a35 && c <= 0x0a36)
480 || (c >= 0x0a38 && c <= 0x0a39)
481 || (c >= 0x0a59 && c <= 0x0a5c)
482 || (c == 0x0a5e))
483 return 1;
485 /* Gujarati */
486 if ((c >= 0x0a85 && c <= 0x0a8b)
487 || (c == 0x0a8d)
488 || (c >= 0x0a8f && c <= 0x0a91)
489 || (c >= 0x0a93 && c <= 0x0aa8)
490 || (c >= 0x0aaa && c <= 0x0ab0)
491 || (c >= 0x0ab2 && c <= 0x0ab3)
492 || (c >= 0x0ab5 && c <= 0x0ab9)
493 || (c == 0x0ae0))
494 return 1;
496 /* Oriya */
497 if ((c >= 0x0b05 && c <= 0x0b0c)
498 || (c >= 0x0b0f && c <= 0x0b10)
499 || (c >= 0x0b13 && c <= 0x0b28)
500 || (c >= 0x0b2a && c <= 0x0b30)
501 || (c >= 0x0b32 && c <= 0x0b33)
502 || (c >= 0x0b36 && c <= 0x0b39)
503 || (c >= 0x0b5c && c <= 0x0b5d)
504 || (c >= 0x0b5f && c <= 0x0b61))
505 return 1;
507 /* Tamil */
508 if ((c >= 0x0b85 && c <= 0x0b8a)
509 || (c >= 0x0b8e && c <= 0x0b90)
510 || (c >= 0x0b92 && c <= 0x0b95)
511 || (c >= 0x0b99 && c <= 0x0b9a)
512 || (c == 0x0b9c)
513 || (c >= 0x0b9e && c <= 0x0b9f)
514 || (c >= 0x0ba3 && c <= 0x0ba4)
515 || (c >= 0x0ba8 && c <= 0x0baa)
516 || (c >= 0x0bae && c <= 0x0bb5)
517 || (c >= 0x0bb7 && c <= 0x0bb9))
518 return 1;
520 /* Telugu */
521 if ((c >= 0x0c05 && c <= 0x0c0c)
522 || (c >= 0x0c0e && c <= 0x0c10)
523 || (c >= 0x0c12 && c <= 0x0c28)
524 || (c >= 0x0c2a && c <= 0x0c33)
525 || (c >= 0x0c35 && c <= 0x0c39)
526 || (c >= 0x0c60 && c <= 0x0c61))
527 return 1;
529 /* Kannada */
530 if ((c >= 0x0c85 && c <= 0x0c8c)
531 || (c >= 0x0c8e && c <= 0x0c90)
532 || (c >= 0x0c92 && c <= 0x0ca8)
533 || (c >= 0x0caa && c <= 0x0cb3)
534 || (c >= 0x0cb5 && c <= 0x0cb9)
535 || (c >= 0x0ce0 && c <= 0x0ce1))
536 return 1;
538 /* Malayalam */
539 if ((c >= 0x0d05 && c <= 0x0d0c)
540 || (c >= 0x0d0e && c <= 0x0d10)
541 || (c >= 0x0d12 && c <= 0x0d28)
542 || (c >= 0x0d2a && c <= 0x0d39)
543 || (c >= 0x0d60 && c <= 0x0d61))
544 return 1;
546 /* Thai */
547 if ((c >= 0x0e01 && c <= 0x0e30)
548 || (c >= 0x0e32 && c <= 0x0e33)
549 || (c >= 0x0e40 && c <= 0x0e46)
550 || (c >= 0x0e4f && c <= 0x0e5b))
551 return 1;
553 /* Lao */
554 if ((c >= 0x0e81 && c <= 0x0e82)
555 || (c == 0x0e84)
556 || (c == 0x0e87)
557 || (c == 0x0e88)
558 || (c == 0x0e8a)
559 || (c == 0x0e0d)
560 || (c >= 0x0e94 && c <= 0x0e97)
561 || (c >= 0x0e99 && c <= 0x0e9f)
562 || (c >= 0x0ea1 && c <= 0x0ea3)
563 || (c == 0x0ea5)
564 || (c == 0x0ea7)
565 || (c == 0x0eaa)
566 || (c == 0x0eab)
567 || (c >= 0x0ead && c <= 0x0eb0)
568 || (c == 0x0eb2)
569 || (c == 0x0eb3)
570 || (c == 0x0ebd)
571 || (c >= 0x0ec0 && c <= 0x0ec4)
572 || (c == 0x0ec6))
573 return 1;
575 /* Georgian */
576 if ((c >= 0x10a0 && c <= 0x10c5)
577 || (c >= 0x10d0 && c <= 0x10f6))
578 return 1;
580 /* Hiragana */
581 if ((c >= 0x3041 && c <= 0x3094)
582 || (c >= 0x309b && c <= 0x309e))
583 return 1;
585 /* Katakana */
586 if ((c >= 0x30a1 && c <= 0x30fe))
587 return 1;
589 /* Bopmofo */
590 if ((c >= 0x3105 && c <= 0x312c))
591 return 1;
593 /* Hangul */
594 if ((c >= 0x1100 && c <= 0x1159)
595 || (c >= 0x1161 && c <= 0x11a2)
596 || (c >= 0x11a8 && c <= 0x11f9))
597 return 1;
599 /* CJK Unified Ideographs */
600 if ((c >= 0xf900 && c <= 0xfa2d)
601 || (c >= 0xfb1f && c <= 0xfb36)
602 || (c >= 0xfb38 && c <= 0xfb3c)
603 || (c == 0xfb3e)
604 || (c >= 0xfb40 && c <= 0xfb41)
605 || (c >= 0xfb42 && c <= 0xfb44)
606 || (c >= 0xfb46 && c <= 0xfbb1)
607 || (c >= 0xfbd3 && c <= 0xfd3f)
608 || (c >= 0xfd50 && c <= 0xfd8f)
609 || (c >= 0xfd92 && c <= 0xfdc7)
610 || (c >= 0xfdf0 && c <= 0xfdfb)
611 || (c >= 0xfe70 && c <= 0xfe72)
612 || (c == 0xfe74)
613 || (c >= 0xfe76 && c <= 0xfefc)
614 || (c >= 0xff21 && c <= 0xff3a)
615 || (c >= 0xff41 && c <= 0xff5a)
616 || (c >= 0xff66 && c <= 0xffbe)
617 || (c >= 0xffc2 && c <= 0xffc7)
618 || (c >= 0xffca && c <= 0xffcf)
619 || (c >= 0xffd2 && c <= 0xffd7)
620 || (c >= 0xffda && c <= 0xffdc)
621 || (c >= 0x4e00 && c <= 0x9fa5))
622 return 1;
624 error ("universal-character-name '\\u%04x' not valid in identifier", c);
625 return 1;
626 #endif
629 /* Add the UTF-8 representation of C to the token_buffer. */
631 static void
632 utf8_extend_token (c)
633 int c;
635 int shift, mask;
637 if (c <= 0x0000007f)
639 extend_token (c);
640 return;
642 else if (c <= 0x000007ff)
643 shift = 6, mask = 0xc0;
644 else if (c <= 0x0000ffff)
645 shift = 12, mask = 0xe0;
646 else if (c <= 0x001fffff)
647 shift = 18, mask = 0xf0;
648 else if (c <= 0x03ffffff)
649 shift = 24, mask = 0xf8;
650 else
651 shift = 30, mask = 0xfc;
653 extend_token (mask | (c >> shift));
656 shift -= 6;
657 extend_token ((unsigned char) (0x80 | (c >> shift)));
659 while (shift);
661 #endif
664 c_lex (value)
665 tree *value;
667 const cpp_token *tok;
669 retry:
670 timevar_push (TV_CPP);
672 tok = cpp_get_token (parse_in);
673 while (tok->type == CPP_PADDING);
674 timevar_pop (TV_CPP);
676 /* The C++ front end does horrible things with the current line
677 number. To ensure an accurate line number, we must reset it
678 every time we return a token. */
679 lineno = src_lineno;
681 *value = NULL_TREE;
682 switch (tok->type)
684 /* Issue this error here, where we can get at tok->val.c. */
685 case CPP_OTHER:
686 if (ISGRAPH (tok->val.c))
687 error ("stray '%c' in program", tok->val.c);
688 else
689 error ("stray '\\%o' in program", tok->val.c);
690 goto retry;
692 case CPP_NAME:
693 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
694 break;
696 case CPP_NUMBER:
698 unsigned int flags = cpp_classify_number (parse_in, tok);
700 switch (flags & CPP_N_CATEGORY)
702 case CPP_N_INVALID:
703 /* cpplib has issued an error. */
704 *value = error_mark_node;
705 break;
707 case CPP_N_INTEGER:
708 *value = interpret_integer (tok, flags);
709 break;
711 case CPP_N_FLOATING:
712 *value = interpret_float (tok, flags);
713 break;
715 default:
716 abort ();
719 break;
721 case CPP_CHAR:
722 case CPP_WCHAR:
723 *value = lex_charconst (tok);
724 break;
726 case CPP_STRING:
727 case CPP_WSTRING:
728 *value = lex_string (tok->val.str.text, tok->val.str.len,
729 tok->type == CPP_WSTRING);
730 break;
732 /* These tokens should not be visible outside cpplib. */
733 case CPP_HEADER_NAME:
734 case CPP_COMMENT:
735 case CPP_MACRO_ARG:
736 abort ();
738 default: break;
741 return tok->type;
744 /* Returns the narrowest C-visible unsigned type, starting with the
745 minimum specified by FLAGS, that can fit VALUE, or itk_none if
746 there isn't one. */
747 static enum integer_type_kind
748 narrowest_unsigned_type (value, flags)
749 tree value;
750 unsigned int flags;
752 enum integer_type_kind itk;
754 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
755 itk = itk_unsigned_int;
756 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
757 itk = itk_unsigned_long;
758 else
759 itk = itk_unsigned_long_long;
761 /* int_fits_type_p must think the type of its first argument is
762 wider than its second argument, or it won't do the proper check. */
763 TREE_TYPE (value) = widest_unsigned_literal_type_node;
765 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
766 if (int_fits_type_p (value, integer_types[itk]))
767 return itk;
769 return itk_none;
772 /* Ditto, but narrowest signed type. */
773 static enum integer_type_kind
774 narrowest_signed_type (value, flags)
775 tree value;
776 unsigned int flags;
778 enum integer_type_kind itk;
780 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
781 itk = itk_int;
782 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
783 itk = itk_long;
784 else
785 itk = itk_long_long;
787 /* int_fits_type_p must think the type of its first argument is
788 wider than its second argument, or it won't do the proper check. */
789 TREE_TYPE (value) = widest_unsigned_literal_type_node;
791 for (; itk < itk_none; itk += 2 /* skip signed types */)
792 if (int_fits_type_p (value, integer_types[itk]))
793 return itk;
795 return itk_none;
798 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
799 static tree
800 interpret_integer (token, flags)
801 const cpp_token *token;
802 unsigned int flags;
804 tree value, type;
805 enum integer_type_kind itk;
806 cpp_num integer;
807 cpp_options *options = cpp_get_options (parse_in);
809 integer = cpp_interpret_integer (parse_in, token, flags);
810 integer = cpp_num_sign_extend (integer, options->precision);
811 value = build_int_2_wide (integer.low, integer.high);
813 /* The type of a constant with a U suffix is straightforward. */
814 if (flags & CPP_N_UNSIGNED)
815 itk = narrowest_unsigned_type (value, flags);
816 else
818 /* The type of a potentially-signed integer constant varies
819 depending on the base it's in, the standard in use, and the
820 length suffixes. */
821 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
822 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
824 /* In both C89 and C99, octal and hex constants may be signed or
825 unsigned, whichever fits tighter. We do not warn about this
826 choice differing from the traditional choice, as the constant
827 is probably a bit pattern and either way will work. */
828 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
829 itk = MIN (itk_u, itk_s);
830 else
832 /* In C99, decimal constants are always signed.
833 In C89, decimal constants that don't fit in long have
834 undefined behavior; we try to make them unsigned long.
835 In GCC's extended C89, that last is true of decimal
836 constants that don't fit in long long, too. */
838 itk = itk_s;
839 if (itk_s > itk_u && itk_s > itk_long)
841 if (!flag_isoc99)
843 if (itk_u < itk_unsigned_long)
844 itk_u = itk_unsigned_long;
845 itk = itk_u;
846 warning ("this decimal constant is unsigned only in ISO C90");
848 else if (warn_traditional)
849 warning ("this decimal constant would be unsigned in ISO C90");
854 if (itk == itk_none)
855 /* cpplib has already issued a warning for overflow. */
856 type = ((flags & CPP_N_UNSIGNED)
857 ? widest_unsigned_literal_type_node
858 : widest_integer_literal_type_node);
859 else
860 type = integer_types[itk];
862 if (itk > itk_unsigned_long
863 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
864 && ! in_system_header && ! flag_isoc99)
865 pedwarn ("integer constant is too large for \"%s\" type",
866 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
868 TREE_TYPE (value) = type;
870 /* Convert imaginary to a complex type. */
871 if (flags & CPP_N_IMAGINARY)
872 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
874 return value;
877 /* Interpret TOKEN, a floating point number with FLAGS as classified
878 by cpplib. */
879 static tree
880 interpret_float (token, flags)
881 const cpp_token *token;
882 unsigned int flags;
884 tree type;
885 tree value;
886 REAL_VALUE_TYPE real;
887 char *copy;
888 size_t copylen;
889 const char *typename;
891 /* FIXME: make %T work in error/warning, then we don't need typename. */
892 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
894 type = long_double_type_node;
895 typename = "long double";
897 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
898 || flag_single_precision_constant)
900 type = float_type_node;
901 typename = "float";
903 else
905 type = double_type_node;
906 typename = "double";
909 /* Copy the constant to a nul-terminated buffer. If the constant
910 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
911 can't handle them. */
912 copylen = token->val.str.len;
913 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
914 /* Must be an F or L suffix. */
915 copylen--;
916 if (flags & CPP_N_IMAGINARY)
917 /* I or J suffix. */
918 copylen--;
920 copy = alloca (copylen + 1);
921 memcpy (copy, token->val.str.text, copylen);
922 copy[copylen] = '\0';
924 real_from_string (&real, copy);
925 real_convert (&real, TYPE_MODE (type), &real);
927 /* A diagnostic is required for "soft" overflow by some ISO C
928 testsuites. This is not pedwarn, because some people don't want
929 an error for this.
930 ??? That's a dubious reason... is this a mandatory diagnostic or
931 isn't it? -- zw, 2001-08-21. */
932 if (REAL_VALUE_ISINF (real) && pedantic)
933 warning ("floating constant exceeds range of \"%s\"", typename);
935 /* Create a node with determined type and value. */
936 value = build_real (type, real);
937 if (flags & CPP_N_IMAGINARY)
938 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
940 return value;
943 static tree
944 lex_string (str, len, wide)
945 const unsigned char *str;
946 unsigned int len;
947 int wide;
949 tree value;
950 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
951 char *q = buf;
952 const unsigned char *p = str, *limit = str + len;
953 cppchar_t c;
955 #ifdef MULTIBYTE_CHARS
956 /* Reset multibyte conversion state. */
957 (void) local_mbtowc (NULL, NULL, 0);
958 #endif
960 while (p < limit)
962 #ifdef MULTIBYTE_CHARS
963 wchar_t wc;
964 int char_len;
966 char_len = local_mbtowc (&wc, (const char *) p, limit - p);
967 if (char_len == -1)
969 warning ("ignoring invalid multibyte character");
970 char_len = 1;
971 c = *p++;
973 else
975 p += char_len;
976 c = wc;
978 #else
979 c = *p++;
980 #endif
982 if (c == '\\' && !ignore_escape_flag)
983 c = cpp_parse_escape (parse_in, &p, limit, wide);
985 /* Add this single character into the buffer either as a wchar_t,
986 a multibyte sequence, or as a single byte. */
987 if (wide)
989 unsigned charwidth = TYPE_PRECISION (char_type_node);
990 unsigned bytemask = (1 << charwidth) - 1;
991 int byte;
993 for (byte = 0; byte < WCHAR_BYTES; ++byte)
995 int n;
996 if (byte >= (int) sizeof (c))
997 n = 0;
998 else
999 n = (c >> (byte * charwidth)) & bytemask;
1000 if (BYTES_BIG_ENDIAN)
1001 q[WCHAR_BYTES - byte - 1] = n;
1002 else
1003 q[byte] = n;
1005 q += WCHAR_BYTES;
1007 #ifdef MULTIBYTE_CHARS
1008 else if (char_len > 1)
1010 /* We're dealing with a multibyte character. */
1011 for ( ; char_len >0; --char_len)
1013 *q++ = *(p - char_len);
1016 #endif
1017 else
1019 *q++ = c;
1023 /* Terminate the string value, either with a single byte zero
1024 or with a wide zero. */
1026 if (wide)
1028 memset (q, 0, WCHAR_BYTES);
1029 q += WCHAR_BYTES;
1031 else
1033 *q++ = '\0';
1036 value = build_string (q - buf, buf);
1038 if (wide)
1039 TREE_TYPE (value) = wchar_array_type_node;
1040 else
1041 TREE_TYPE (value) = char_array_type_node;
1042 return value;
1045 /* Converts a (possibly wide) character constant token into a tree. */
1046 static tree
1047 lex_charconst (token)
1048 const cpp_token *token;
1050 cppchar_t result;
1051 tree type, value;
1052 unsigned int chars_seen;
1053 int unsignedp;
1055 result = cpp_interpret_charconst (parse_in, token,
1056 &chars_seen, &unsignedp);
1058 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
1059 before possibly widening to HOST_WIDE_INT for build_int_2. */
1060 if (unsignedp || (cppchar_signed_t) result >= 0)
1061 value = build_int_2 (result, 0);
1062 else
1063 value = build_int_2 ((cppchar_signed_t) result, -1);
1065 if (token->type == CPP_WCHAR)
1066 type = wchar_type_node;
1067 /* In C, a character constant has type 'int'.
1068 In C++ 'char', but multi-char charconsts have type 'int'. */
1069 else if ((c_language == clk_c) || chars_seen > 1)
1070 type = integer_type_node;
1071 else
1072 type = char_type_node;
1074 TREE_TYPE (value) = type;
1075 return value;