gcc:
[official-gcc.git] / gcc / c-lex.c
blob95419c5b37ec263a502e6f8ef82c985901f04581
1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
27 #include "real.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "expr.h"
31 #include "input.h"
32 #include "output.h"
33 #include "c-tree.h"
34 #include "c-common.h"
35 #include "flags.h"
36 #include "timevar.h"
37 #include "cpplib.h"
38 #include "c-pragma.h"
39 #include "toplev.h"
40 #include "intl.h"
41 #include "tm_p.h"
42 #include "splay-tree.h"
43 #include "debug.h"
45 #ifdef MULTIBYTE_CHARS
46 #include "mbchar.h"
47 #include <locale.h>
48 #endif /* MULTIBYTE_CHARS */
50 /* The current line map. */
51 static const struct line_map *map;
53 /* The line used to refresh the lineno global variable after each token. */
54 static unsigned int src_lineno;
56 /* We may keep statistics about how long which files took to compile. */
57 static int header_time, body_time;
58 static splay_tree file_info_tree;
60 /* File used for outputting assembler code. */
61 extern FILE *asm_out_file;
63 #undef WCHAR_TYPE_SIZE
64 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
66 /* Number of bytes in a wide character. */
67 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
69 int pending_lang_change; /* If we need to switch languages - C++ only */
70 int c_header_level; /* depth in C headers - C++ only */
72 /* Nonzero tells yylex to ignore \ in string constants. */
73 static int ignore_escape_flag;
75 static tree interpret_integer PARAMS ((const cpp_token *, unsigned int));
76 static tree interpret_float PARAMS ((const cpp_token *, unsigned int));
77 static enum integer_type_kind
78 narrowest_unsigned_type PARAMS ((tree, unsigned int));
79 static enum integer_type_kind
80 narrowest_signed_type PARAMS ((tree, unsigned int));
81 static tree lex_string PARAMS ((const unsigned char *, unsigned int,
82 int));
83 static tree lex_charconst PARAMS ((const cpp_token *));
84 static void update_header_times PARAMS ((const char *));
85 static int dump_one_header PARAMS ((splay_tree_node, void *));
86 static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int));
87 static void cb_ident PARAMS ((cpp_reader *, unsigned int,
88 const cpp_string *));
89 static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int));
90 static void cb_define PARAMS ((cpp_reader *, unsigned int,
91 cpp_hashnode *));
92 static void cb_undef PARAMS ((cpp_reader *, unsigned int,
93 cpp_hashnode *));
95 void
96 init_c_lex ()
98 struct cpp_callbacks *cb;
99 struct c_fileinfo *toplevel;
101 /* Set up filename timing. Must happen before cpp_read_main_file. */
102 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
104 (splay_tree_delete_value_fn)free);
105 toplevel = get_fileinfo ("<top level>");
106 if (flag_detailed_statistics)
108 header_time = 0;
109 body_time = get_run_time ();
110 toplevel->time = body_time;
113 cb = cpp_get_callbacks (parse_in);
115 cb->line_change = cb_line_change;
116 cb->ident = cb_ident;
117 cb->def_pragma = cb_def_pragma;
118 cb->valid_pch = c_common_valid_pch;
119 cb->read_pch = c_common_read_pch;
121 /* Set the debug callbacks if we can use them. */
122 if (debug_info_level == DINFO_LEVEL_VERBOSE
123 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
124 || write_symbols == VMS_AND_DWARF2_DEBUG))
126 cb->define = cb_define;
127 cb->undef = cb_undef;
131 struct c_fileinfo *
132 get_fileinfo (name)
133 const char *name;
135 splay_tree_node n;
136 struct c_fileinfo *fi;
138 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
139 if (n)
140 return (struct c_fileinfo *) n->value;
142 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
143 fi->time = 0;
144 fi->interface_only = 0;
145 fi->interface_unknown = 1;
146 splay_tree_insert (file_info_tree, (splay_tree_key) name,
147 (splay_tree_value) fi);
148 return fi;
151 static void
152 update_header_times (name)
153 const char *name;
155 /* Changing files again. This means currently collected time
156 is charged against header time, and body time starts back at 0. */
157 if (flag_detailed_statistics)
159 int this_time = get_run_time ();
160 struct c_fileinfo *file = get_fileinfo (name);
161 header_time += this_time - body_time;
162 file->time += this_time - body_time;
163 body_time = this_time;
167 static int
168 dump_one_header (n, dummy)
169 splay_tree_node n;
170 void *dummy ATTRIBUTE_UNUSED;
172 print_time ((const char *) n->key,
173 ((struct c_fileinfo *) n->value)->time);
174 return 0;
177 void
178 dump_time_statistics ()
180 struct c_fileinfo *file = get_fileinfo (input_filename);
181 int this_time = get_run_time ();
182 file->time += this_time - body_time;
184 fprintf (stderr, "\n******\n");
185 print_time ("header files (total)", header_time);
186 print_time ("main file (total)", this_time - body_time);
187 fprintf (stderr, "ratio = %g : 1\n",
188 (double)header_time / (double)(this_time - body_time));
189 fprintf (stderr, "\n******\n");
191 splay_tree_foreach (file_info_tree, dump_one_header, 0);
194 static void
195 cb_ident (pfile, line, str)
196 cpp_reader *pfile ATTRIBUTE_UNUSED;
197 unsigned int line ATTRIBUTE_UNUSED;
198 const cpp_string *str ATTRIBUTE_UNUSED;
200 #ifdef ASM_OUTPUT_IDENT
201 if (! flag_no_ident)
203 /* Convert escapes in the string. */
204 tree value ATTRIBUTE_UNUSED = lex_string (str->text, str->len, 0);
205 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
207 #endif
210 /* Called at the start of every non-empty line. TOKEN is the first
211 lexed token on the line. Used for diagnostic line numbers. */
212 static void
213 cb_line_change (pfile, token, parsing_args)
214 cpp_reader *pfile ATTRIBUTE_UNUSED;
215 const cpp_token *token;
216 int parsing_args ATTRIBUTE_UNUSED;
218 src_lineno = SOURCE_LINE (map, token->line);
221 void
222 fe_file_change (new_map)
223 const struct line_map *new_map;
225 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
227 if (new_map->reason == LC_ENTER)
229 /* Don't stack the main buffer on the input stack;
230 we already did in compile_file. */
231 if (map == NULL)
232 main_input_filename = new_map->to_file;
233 else
235 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
237 lineno = included_at;
238 push_srcloc (new_map->to_file, 1);
239 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
240 #ifndef NO_IMPLICIT_EXTERN_C
241 if (c_header_level)
242 ++c_header_level;
243 else if (new_map->sysp == 2)
245 c_header_level = 1;
246 ++pending_lang_change;
248 #endif
251 else if (new_map->reason == LC_LEAVE)
253 #ifndef NO_IMPLICIT_EXTERN_C
254 if (c_header_level && --c_header_level == 0)
256 if (new_map->sysp == 2)
257 warning ("badly nested C headers from preprocessor");
258 --pending_lang_change;
260 #endif
261 pop_srcloc ();
263 (*debug_hooks->end_source_file) (to_line);
266 update_header_times (new_map->to_file);
267 in_system_header = new_map->sysp != 0;
268 input_filename = new_map->to_file;
269 lineno = to_line;
270 map = new_map;
272 /* Hook for C++. */
273 extract_interface_info ();
276 static void
277 cb_def_pragma (pfile, line)
278 cpp_reader *pfile;
279 unsigned int line;
281 /* Issue a warning message if we have been asked to do so. Ignore
282 unknown pragmas in system headers unless an explicit
283 -Wunknown-pragmas has been given. */
284 if (warn_unknown_pragmas > in_system_header)
286 const unsigned char *space, *name;
287 const cpp_token *s;
289 space = name = (const unsigned char *) "";
290 s = cpp_get_token (pfile);
291 if (s->type != CPP_EOF)
293 space = cpp_token_as_text (pfile, s);
294 s = cpp_get_token (pfile);
295 if (s->type == CPP_NAME)
296 name = cpp_token_as_text (pfile, s);
299 lineno = SOURCE_LINE (map, line);
300 warning ("ignoring #pragma %s %s", space, name);
304 /* #define callback for DWARF and DWARF2 debug info. */
305 static void
306 cb_define (pfile, line, node)
307 cpp_reader *pfile;
308 unsigned int line;
309 cpp_hashnode *node;
311 (*debug_hooks->define) (SOURCE_LINE (map, line),
312 (const char *) cpp_macro_definition (pfile, node));
315 /* #undef callback for DWARF and DWARF2 debug info. */
316 static void
317 cb_undef (pfile, line, node)
318 cpp_reader *pfile ATTRIBUTE_UNUSED;
319 unsigned int line;
320 cpp_hashnode *node;
322 (*debug_hooks->undef) (SOURCE_LINE (map, line),
323 (const char *) NODE_NAME (node));
326 #if 0 /* not yet */
327 /* Returns nonzero if C is a universal-character-name. Give an error if it
328 is not one which may appear in an identifier, as per [extendid].
330 Note that extended character support in identifiers has not yet been
331 implemented. It is my personal opinion that this is not a desirable
332 feature. Portable code cannot count on support for more than the basic
333 identifier character set. */
335 static inline int
336 is_extended_char (c)
337 int c;
339 #ifdef TARGET_EBCDIC
340 return 0;
341 #else
342 /* ASCII. */
343 if (c < 0x7f)
344 return 0;
346 /* None of the valid chars are outside the Basic Multilingual Plane (the
347 low 16 bits). */
348 if (c > 0xffff)
350 error ("universal-character-name '\\U%08x' not valid in identifier", c);
351 return 1;
354 /* Latin */
355 if ((c >= 0x00c0 && c <= 0x00d6)
356 || (c >= 0x00d8 && c <= 0x00f6)
357 || (c >= 0x00f8 && c <= 0x01f5)
358 || (c >= 0x01fa && c <= 0x0217)
359 || (c >= 0x0250 && c <= 0x02a8)
360 || (c >= 0x1e00 && c <= 0x1e9a)
361 || (c >= 0x1ea0 && c <= 0x1ef9))
362 return 1;
364 /* Greek */
365 if ((c == 0x0384)
366 || (c >= 0x0388 && c <= 0x038a)
367 || (c == 0x038c)
368 || (c >= 0x038e && c <= 0x03a1)
369 || (c >= 0x03a3 && c <= 0x03ce)
370 || (c >= 0x03d0 && c <= 0x03d6)
371 || (c == 0x03da)
372 || (c == 0x03dc)
373 || (c == 0x03de)
374 || (c == 0x03e0)
375 || (c >= 0x03e2 && c <= 0x03f3)
376 || (c >= 0x1f00 && c <= 0x1f15)
377 || (c >= 0x1f18 && c <= 0x1f1d)
378 || (c >= 0x1f20 && c <= 0x1f45)
379 || (c >= 0x1f48 && c <= 0x1f4d)
380 || (c >= 0x1f50 && c <= 0x1f57)
381 || (c == 0x1f59)
382 || (c == 0x1f5b)
383 || (c == 0x1f5d)
384 || (c >= 0x1f5f && c <= 0x1f7d)
385 || (c >= 0x1f80 && c <= 0x1fb4)
386 || (c >= 0x1fb6 && c <= 0x1fbc)
387 || (c >= 0x1fc2 && c <= 0x1fc4)
388 || (c >= 0x1fc6 && c <= 0x1fcc)
389 || (c >= 0x1fd0 && c <= 0x1fd3)
390 || (c >= 0x1fd6 && c <= 0x1fdb)
391 || (c >= 0x1fe0 && c <= 0x1fec)
392 || (c >= 0x1ff2 && c <= 0x1ff4)
393 || (c >= 0x1ff6 && c <= 0x1ffc))
394 return 1;
396 /* Cyrillic */
397 if ((c >= 0x0401 && c <= 0x040d)
398 || (c >= 0x040f && c <= 0x044f)
399 || (c >= 0x0451 && c <= 0x045c)
400 || (c >= 0x045e && c <= 0x0481)
401 || (c >= 0x0490 && c <= 0x04c4)
402 || (c >= 0x04c7 && c <= 0x04c8)
403 || (c >= 0x04cb && c <= 0x04cc)
404 || (c >= 0x04d0 && c <= 0x04eb)
405 || (c >= 0x04ee && c <= 0x04f5)
406 || (c >= 0x04f8 && c <= 0x04f9))
407 return 1;
409 /* Armenian */
410 if ((c >= 0x0531 && c <= 0x0556)
411 || (c >= 0x0561 && c <= 0x0587))
412 return 1;
414 /* Hebrew */
415 if ((c >= 0x05d0 && c <= 0x05ea)
416 || (c >= 0x05f0 && c <= 0x05f4))
417 return 1;
419 /* Arabic */
420 if ((c >= 0x0621 && c <= 0x063a)
421 || (c >= 0x0640 && c <= 0x0652)
422 || (c >= 0x0670 && c <= 0x06b7)
423 || (c >= 0x06ba && c <= 0x06be)
424 || (c >= 0x06c0 && c <= 0x06ce)
425 || (c >= 0x06e5 && c <= 0x06e7))
426 return 1;
428 /* Devanagari */
429 if ((c >= 0x0905 && c <= 0x0939)
430 || (c >= 0x0958 && c <= 0x0962))
431 return 1;
433 /* Bengali */
434 if ((c >= 0x0985 && c <= 0x098c)
435 || (c >= 0x098f && c <= 0x0990)
436 || (c >= 0x0993 && c <= 0x09a8)
437 || (c >= 0x09aa && c <= 0x09b0)
438 || (c == 0x09b2)
439 || (c >= 0x09b6 && c <= 0x09b9)
440 || (c >= 0x09dc && c <= 0x09dd)
441 || (c >= 0x09df && c <= 0x09e1)
442 || (c >= 0x09f0 && c <= 0x09f1))
443 return 1;
445 /* Gurmukhi */
446 if ((c >= 0x0a05 && c <= 0x0a0a)
447 || (c >= 0x0a0f && c <= 0x0a10)
448 || (c >= 0x0a13 && c <= 0x0a28)
449 || (c >= 0x0a2a && c <= 0x0a30)
450 || (c >= 0x0a32 && c <= 0x0a33)
451 || (c >= 0x0a35 && c <= 0x0a36)
452 || (c >= 0x0a38 && c <= 0x0a39)
453 || (c >= 0x0a59 && c <= 0x0a5c)
454 || (c == 0x0a5e))
455 return 1;
457 /* Gujarati */
458 if ((c >= 0x0a85 && c <= 0x0a8b)
459 || (c == 0x0a8d)
460 || (c >= 0x0a8f && c <= 0x0a91)
461 || (c >= 0x0a93 && c <= 0x0aa8)
462 || (c >= 0x0aaa && c <= 0x0ab0)
463 || (c >= 0x0ab2 && c <= 0x0ab3)
464 || (c >= 0x0ab5 && c <= 0x0ab9)
465 || (c == 0x0ae0))
466 return 1;
468 /* Oriya */
469 if ((c >= 0x0b05 && c <= 0x0b0c)
470 || (c >= 0x0b0f && c <= 0x0b10)
471 || (c >= 0x0b13 && c <= 0x0b28)
472 || (c >= 0x0b2a && c <= 0x0b30)
473 || (c >= 0x0b32 && c <= 0x0b33)
474 || (c >= 0x0b36 && c <= 0x0b39)
475 || (c >= 0x0b5c && c <= 0x0b5d)
476 || (c >= 0x0b5f && c <= 0x0b61))
477 return 1;
479 /* Tamil */
480 if ((c >= 0x0b85 && c <= 0x0b8a)
481 || (c >= 0x0b8e && c <= 0x0b90)
482 || (c >= 0x0b92 && c <= 0x0b95)
483 || (c >= 0x0b99 && c <= 0x0b9a)
484 || (c == 0x0b9c)
485 || (c >= 0x0b9e && c <= 0x0b9f)
486 || (c >= 0x0ba3 && c <= 0x0ba4)
487 || (c >= 0x0ba8 && c <= 0x0baa)
488 || (c >= 0x0bae && c <= 0x0bb5)
489 || (c >= 0x0bb7 && c <= 0x0bb9))
490 return 1;
492 /* Telugu */
493 if ((c >= 0x0c05 && c <= 0x0c0c)
494 || (c >= 0x0c0e && c <= 0x0c10)
495 || (c >= 0x0c12 && c <= 0x0c28)
496 || (c >= 0x0c2a && c <= 0x0c33)
497 || (c >= 0x0c35 && c <= 0x0c39)
498 || (c >= 0x0c60 && c <= 0x0c61))
499 return 1;
501 /* Kannada */
502 if ((c >= 0x0c85 && c <= 0x0c8c)
503 || (c >= 0x0c8e && c <= 0x0c90)
504 || (c >= 0x0c92 && c <= 0x0ca8)
505 || (c >= 0x0caa && c <= 0x0cb3)
506 || (c >= 0x0cb5 && c <= 0x0cb9)
507 || (c >= 0x0ce0 && c <= 0x0ce1))
508 return 1;
510 /* Malayalam */
511 if ((c >= 0x0d05 && c <= 0x0d0c)
512 || (c >= 0x0d0e && c <= 0x0d10)
513 || (c >= 0x0d12 && c <= 0x0d28)
514 || (c >= 0x0d2a && c <= 0x0d39)
515 || (c >= 0x0d60 && c <= 0x0d61))
516 return 1;
518 /* Thai */
519 if ((c >= 0x0e01 && c <= 0x0e30)
520 || (c >= 0x0e32 && c <= 0x0e33)
521 || (c >= 0x0e40 && c <= 0x0e46)
522 || (c >= 0x0e4f && c <= 0x0e5b))
523 return 1;
525 /* Lao */
526 if ((c >= 0x0e81 && c <= 0x0e82)
527 || (c == 0x0e84)
528 || (c == 0x0e87)
529 || (c == 0x0e88)
530 || (c == 0x0e8a)
531 || (c == 0x0e0d)
532 || (c >= 0x0e94 && c <= 0x0e97)
533 || (c >= 0x0e99 && c <= 0x0e9f)
534 || (c >= 0x0ea1 && c <= 0x0ea3)
535 || (c == 0x0ea5)
536 || (c == 0x0ea7)
537 || (c == 0x0eaa)
538 || (c == 0x0eab)
539 || (c >= 0x0ead && c <= 0x0eb0)
540 || (c == 0x0eb2)
541 || (c == 0x0eb3)
542 || (c == 0x0ebd)
543 || (c >= 0x0ec0 && c <= 0x0ec4)
544 || (c == 0x0ec6))
545 return 1;
547 /* Georgian */
548 if ((c >= 0x10a0 && c <= 0x10c5)
549 || (c >= 0x10d0 && c <= 0x10f6))
550 return 1;
552 /* Hiragana */
553 if ((c >= 0x3041 && c <= 0x3094)
554 || (c >= 0x309b && c <= 0x309e))
555 return 1;
557 /* Katakana */
558 if ((c >= 0x30a1 && c <= 0x30fe))
559 return 1;
561 /* Bopmofo */
562 if ((c >= 0x3105 && c <= 0x312c))
563 return 1;
565 /* Hangul */
566 if ((c >= 0x1100 && c <= 0x1159)
567 || (c >= 0x1161 && c <= 0x11a2)
568 || (c >= 0x11a8 && c <= 0x11f9))
569 return 1;
571 /* CJK Unified Ideographs */
572 if ((c >= 0xf900 && c <= 0xfa2d)
573 || (c >= 0xfb1f && c <= 0xfb36)
574 || (c >= 0xfb38 && c <= 0xfb3c)
575 || (c == 0xfb3e)
576 || (c >= 0xfb40 && c <= 0xfb41)
577 || (c >= 0xfb42 && c <= 0xfb44)
578 || (c >= 0xfb46 && c <= 0xfbb1)
579 || (c >= 0xfbd3 && c <= 0xfd3f)
580 || (c >= 0xfd50 && c <= 0xfd8f)
581 || (c >= 0xfd92 && c <= 0xfdc7)
582 || (c >= 0xfdf0 && c <= 0xfdfb)
583 || (c >= 0xfe70 && c <= 0xfe72)
584 || (c == 0xfe74)
585 || (c >= 0xfe76 && c <= 0xfefc)
586 || (c >= 0xff21 && c <= 0xff3a)
587 || (c >= 0xff41 && c <= 0xff5a)
588 || (c >= 0xff66 && c <= 0xffbe)
589 || (c >= 0xffc2 && c <= 0xffc7)
590 || (c >= 0xffca && c <= 0xffcf)
591 || (c >= 0xffd2 && c <= 0xffd7)
592 || (c >= 0xffda && c <= 0xffdc)
593 || (c >= 0x4e00 && c <= 0x9fa5))
594 return 1;
596 error ("universal-character-name '\\u%04x' not valid in identifier", c);
597 return 1;
598 #endif
601 /* Add the UTF-8 representation of C to the token_buffer. */
603 static void
604 utf8_extend_token (c)
605 int c;
607 int shift, mask;
609 if (c <= 0x0000007f)
611 extend_token (c);
612 return;
614 else if (c <= 0x000007ff)
615 shift = 6, mask = 0xc0;
616 else if (c <= 0x0000ffff)
617 shift = 12, mask = 0xe0;
618 else if (c <= 0x001fffff)
619 shift = 18, mask = 0xf0;
620 else if (c <= 0x03ffffff)
621 shift = 24, mask = 0xf8;
622 else
623 shift = 30, mask = 0xfc;
625 extend_token (mask | (c >> shift));
628 shift -= 6;
629 extend_token ((unsigned char) (0x80 | (c >> shift)));
631 while (shift);
633 #endif
636 c_lex (value)
637 tree *value;
639 const cpp_token *tok;
641 retry:
642 timevar_push (TV_CPP);
644 tok = cpp_get_token (parse_in);
645 while (tok->type == CPP_PADDING);
646 timevar_pop (TV_CPP);
648 /* The C++ front end does horrible things with the current line
649 number. To ensure an accurate line number, we must reset it
650 every time we return a token. */
651 lineno = src_lineno;
653 *value = NULL_TREE;
654 switch (tok->type)
656 /* Issue this error here, where we can get at tok->val.c. */
657 case CPP_OTHER:
658 if (ISGRAPH (tok->val.c))
659 error ("stray '%c' in program", tok->val.c);
660 else
661 error ("stray '\\%o' in program", tok->val.c);
662 goto retry;
664 case CPP_NAME:
665 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
666 break;
668 case CPP_NUMBER:
670 unsigned int flags = cpp_classify_number (parse_in, tok);
672 switch (flags & CPP_N_CATEGORY)
674 case CPP_N_INVALID:
675 /* cpplib has issued an error. */
676 *value = error_mark_node;
677 break;
679 case CPP_N_INTEGER:
680 *value = interpret_integer (tok, flags);
681 break;
683 case CPP_N_FLOATING:
684 *value = interpret_float (tok, flags);
685 break;
687 default:
688 abort ();
691 break;
693 case CPP_CHAR:
694 case CPP_WCHAR:
695 *value = lex_charconst (tok);
696 break;
698 case CPP_STRING:
699 case CPP_WSTRING:
700 *value = lex_string (tok->val.str.text, tok->val.str.len,
701 tok->type == CPP_WSTRING);
702 break;
704 /* These tokens should not be visible outside cpplib. */
705 case CPP_HEADER_NAME:
706 case CPP_COMMENT:
707 case CPP_MACRO_ARG:
708 abort ();
710 default: break;
713 return tok->type;
716 /* Returns the narrowest C-visible unsigned type, starting with the
717 minimum specified by FLAGS, that can fit VALUE, or itk_none if
718 there isn't one. */
719 static enum integer_type_kind
720 narrowest_unsigned_type (value, flags)
721 tree value;
722 unsigned int flags;
724 enum integer_type_kind itk;
726 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
727 itk = itk_unsigned_int;
728 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
729 itk = itk_unsigned_long;
730 else
731 itk = itk_unsigned_long_long;
733 /* int_fits_type_p must think the type of its first argument is
734 wider than its second argument, or it won't do the proper check. */
735 TREE_TYPE (value) = widest_unsigned_literal_type_node;
737 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
738 if (int_fits_type_p (value, integer_types[itk]))
739 return itk;
741 return itk_none;
744 /* Ditto, but narrowest signed type. */
745 static enum integer_type_kind
746 narrowest_signed_type (value, flags)
747 tree value;
748 unsigned int flags;
750 enum integer_type_kind itk;
752 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
753 itk = itk_int;
754 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
755 itk = itk_long;
756 else
757 itk = itk_long_long;
759 /* int_fits_type_p must think the type of its first argument is
760 wider than its second argument, or it won't do the proper check. */
761 TREE_TYPE (value) = widest_unsigned_literal_type_node;
763 for (; itk < itk_none; itk += 2 /* skip signed types */)
764 if (int_fits_type_p (value, integer_types[itk]))
765 return itk;
767 return itk_none;
770 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
771 static tree
772 interpret_integer (token, flags)
773 const cpp_token *token;
774 unsigned int flags;
776 tree value, type;
777 enum integer_type_kind itk;
778 cpp_num integer;
779 cpp_options *options = cpp_get_options (parse_in);
781 integer = cpp_interpret_integer (parse_in, token, flags);
782 integer = cpp_num_sign_extend (integer, options->precision);
783 value = build_int_2_wide (integer.low, integer.high);
785 /* The type of a constant with a U suffix is straightforward. */
786 if (flags & CPP_N_UNSIGNED)
787 itk = narrowest_unsigned_type (value, flags);
788 else
790 /* The type of a potentially-signed integer constant varies
791 depending on the base it's in, the standard in use, and the
792 length suffixes. */
793 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
794 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
796 /* In both C89 and C99, octal and hex constants may be signed or
797 unsigned, whichever fits tighter. We do not warn about this
798 choice differing from the traditional choice, as the constant
799 is probably a bit pattern and either way will work. */
800 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
801 itk = MIN (itk_u, itk_s);
802 else
804 /* In C99, decimal constants are always signed.
805 In C89, decimal constants that don't fit in long have
806 undefined behavior; we try to make them unsigned long.
807 In GCC's extended C89, that last is true of decimal
808 constants that don't fit in long long, too. */
810 itk = itk_s;
811 if (itk_s > itk_u && itk_s > itk_long)
813 if (!flag_isoc99)
815 if (itk_u < itk_unsigned_long)
816 itk_u = itk_unsigned_long;
817 itk = itk_u;
818 warning ("this decimal constant is unsigned only in ISO C90");
820 else if (warn_traditional)
821 warning ("this decimal constant would be unsigned in ISO C90");
826 if (itk == itk_none)
827 /* cpplib has already issued a warning for overflow. */
828 type = ((flags & CPP_N_UNSIGNED)
829 ? widest_unsigned_literal_type_node
830 : widest_integer_literal_type_node);
831 else
832 type = integer_types[itk];
834 if (itk > itk_unsigned_long
835 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
836 && ! in_system_header && ! flag_isoc99)
837 pedwarn ("integer constant is too large for \"%s\" type",
838 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
840 TREE_TYPE (value) = type;
842 /* Convert imaginary to a complex type. */
843 if (flags & CPP_N_IMAGINARY)
844 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
846 return value;
849 /* Interpret TOKEN, a floating point number with FLAGS as classified
850 by cpplib. */
851 static tree
852 interpret_float (token, flags)
853 const cpp_token *token;
854 unsigned int flags;
856 tree type;
857 tree value;
858 REAL_VALUE_TYPE real;
859 char *copy;
860 size_t copylen;
861 const char *typename;
863 /* FIXME: make %T work in error/warning, then we don't need typename. */
864 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
866 type = long_double_type_node;
867 typename = "long double";
869 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
870 || flag_single_precision_constant)
872 type = float_type_node;
873 typename = "float";
875 else
877 type = double_type_node;
878 typename = "double";
881 /* Copy the constant to a nul-terminated buffer. If the constant
882 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
883 can't handle them. */
884 copylen = token->val.str.len;
885 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
886 /* Must be an F or L suffix. */
887 copylen--;
888 if (flags & CPP_N_IMAGINARY)
889 /* I or J suffix. */
890 copylen--;
892 copy = alloca (copylen + 1);
893 memcpy (copy, token->val.str.text, copylen);
894 copy[copylen] = '\0';
896 real_from_string (&real, copy);
897 real_convert (&real, TYPE_MODE (type), &real);
899 /* A diagnostic is required for "soft" overflow by some ISO C
900 testsuites. This is not pedwarn, because some people don't want
901 an error for this.
902 ??? That's a dubious reason... is this a mandatory diagnostic or
903 isn't it? -- zw, 2001-08-21. */
904 if (REAL_VALUE_ISINF (real) && pedantic)
905 warning ("floating constant exceeds range of \"%s\"", typename);
907 /* Create a node with determined type and value. */
908 value = build_real (type, real);
909 if (flags & CPP_N_IMAGINARY)
910 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
912 return value;
915 static tree
916 lex_string (str, len, wide)
917 const unsigned char *str;
918 unsigned int len;
919 int wide;
921 tree value;
922 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
923 char *q = buf;
924 const unsigned char *p = str, *limit = str + len;
925 cppchar_t c;
927 #ifdef MULTIBYTE_CHARS
928 /* Reset multibyte conversion state. */
929 (void) local_mbtowc (NULL, NULL, 0);
930 #endif
932 while (p < limit)
934 #ifdef MULTIBYTE_CHARS
935 wchar_t wc;
936 int char_len;
938 char_len = local_mbtowc (&wc, (const char *) p, limit - p);
939 if (char_len == -1)
941 warning ("ignoring invalid multibyte character");
942 char_len = 1;
943 c = *p++;
945 else
947 p += char_len;
948 c = wc;
950 #else
951 c = *p++;
952 #endif
954 if (c == '\\' && !ignore_escape_flag)
955 c = cpp_parse_escape (parse_in, &p, limit, wide);
957 /* Add this single character into the buffer either as a wchar_t,
958 a multibyte sequence, or as a single byte. */
959 if (wide)
961 unsigned charwidth = TYPE_PRECISION (char_type_node);
962 unsigned bytemask = (1 << charwidth) - 1;
963 int byte;
965 for (byte = 0; byte < WCHAR_BYTES; ++byte)
967 int n;
968 if (byte >= (int) sizeof (c))
969 n = 0;
970 else
971 n = (c >> (byte * charwidth)) & bytemask;
972 if (BYTES_BIG_ENDIAN)
973 q[WCHAR_BYTES - byte - 1] = n;
974 else
975 q[byte] = n;
977 q += WCHAR_BYTES;
979 #ifdef MULTIBYTE_CHARS
980 else if (char_len > 1)
982 /* We're dealing with a multibyte character. */
983 for ( ; char_len >0; --char_len)
985 *q++ = *(p - char_len);
988 #endif
989 else
991 *q++ = c;
995 /* Terminate the string value, either with a single byte zero
996 or with a wide zero. */
998 if (wide)
1000 memset (q, 0, WCHAR_BYTES);
1001 q += WCHAR_BYTES;
1003 else
1005 *q++ = '\0';
1008 value = build_string (q - buf, buf);
1010 if (wide)
1011 TREE_TYPE (value) = wchar_array_type_node;
1012 else
1013 TREE_TYPE (value) = char_array_type_node;
1014 return value;
1017 /* Converts a (possibly wide) character constant token into a tree. */
1018 static tree
1019 lex_charconst (token)
1020 const cpp_token *token;
1022 cppchar_t result;
1023 tree type, value;
1024 unsigned int chars_seen;
1025 int unsignedp;
1027 result = cpp_interpret_charconst (parse_in, token,
1028 &chars_seen, &unsignedp);
1030 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
1031 before possibly widening to HOST_WIDE_INT for build_int_2. */
1032 if (unsignedp || (cppchar_signed_t) result >= 0)
1033 value = build_int_2 (result, 0);
1034 else
1035 value = build_int_2 ((cppchar_signed_t) result, -1);
1037 if (token->type == CPP_WCHAR)
1038 type = wchar_type_node;
1039 /* In C, a character constant has type 'int'.
1040 In C++ 'char', but multi-char charconsts have type 'int'. */
1041 else if ((c_language == clk_c) || chars_seen > 1)
1042 type = integer_type_node;
1043 else
1044 type = char_type_node;
1046 TREE_TYPE (value) = type;
1047 return value;