2002-08-22 Paolo Carlini <pcarlini@unitus.it>
[official-gcc.git] / gcc / c-lex.c
blob1805ccbed0317722118bf9a597d5b6e4211aec82
1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
25 #include "real.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "expr.h"
29 #include "input.h"
30 #include "output.h"
31 #include "c-tree.h"
32 #include "c-common.h"
33 #include "flags.h"
34 #include "timevar.h"
35 #include "cpplib.h"
36 #include "c-pragma.h"
37 #include "toplev.h"
38 #include "intl.h"
39 #include "tm_p.h"
40 #include "splay-tree.h"
41 #include "debug.h"
43 #ifdef MULTIBYTE_CHARS
44 #include "mbchar.h"
45 #include <locale.h>
46 #endif /* MULTIBYTE_CHARS */
48 /* The current line map. */
49 static const struct line_map *map;
51 /* The line used to refresh the lineno global variable after each token. */
52 static unsigned int src_lineno;
54 /* We may keep statistics about how long which files took to compile. */
55 static int header_time, body_time;
56 static splay_tree file_info_tree;
58 /* File used for outputting assembler code. */
59 extern FILE *asm_out_file;
61 #undef WCHAR_TYPE_SIZE
62 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
64 /* Number of bytes in a wide character. */
65 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
67 int pending_lang_change; /* If we need to switch languages - C++ only */
68 int c_header_level; /* depth in C headers - C++ only */
70 /* Nonzero tells yylex to ignore \ in string constants. */
71 static int ignore_escape_flag;
73 static tree interpret_integer PARAMS ((const cpp_token *, unsigned int));
74 static tree interpret_float PARAMS ((const cpp_token *, unsigned int));
75 static enum integer_type_kind
76 narrowest_unsigned_type PARAMS ((tree, unsigned int));
77 static enum integer_type_kind
78 narrowest_signed_type PARAMS ((tree, unsigned int));
79 static tree lex_string PARAMS ((const unsigned char *, unsigned int,
80 int));
81 static tree lex_charconst PARAMS ((const cpp_token *));
82 static void update_header_times PARAMS ((const char *));
83 static int dump_one_header PARAMS ((splay_tree_node, void *));
84 static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int));
85 static void cb_ident PARAMS ((cpp_reader *, unsigned int,
86 const cpp_string *));
87 static void cb_file_change PARAMS ((cpp_reader *, const struct line_map *));
88 static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int));
89 static void cb_define PARAMS ((cpp_reader *, unsigned int,
90 cpp_hashnode *));
91 static void cb_undef PARAMS ((cpp_reader *, unsigned int,
92 cpp_hashnode *));
94 const char *
95 init_c_lex (filename)
96 const char *filename;
98 struct cpp_callbacks *cb;
99 struct c_fileinfo *toplevel;
101 /* Set up filename timing. Must happen before cpp_read_main_file. */
102 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
104 (splay_tree_delete_value_fn)free);
105 toplevel = get_fileinfo ("<top level>");
106 if (flag_detailed_statistics)
108 header_time = 0;
109 body_time = get_run_time ();
110 toplevel->time = body_time;
113 #ifdef MULTIBYTE_CHARS
114 /* Change to the native locale for multibyte conversions. */
115 setlocale (LC_CTYPE, "");
116 GET_ENVIRONMENT (literal_codeset, "LANG");
117 #endif
119 cb = cpp_get_callbacks (parse_in);
121 cb->line_change = cb_line_change;
122 cb->ident = cb_ident;
123 cb->file_change = cb_file_change;
124 cb->def_pragma = cb_def_pragma;
126 /* Set the debug callbacks if we can use them. */
127 if (debug_info_level == DINFO_LEVEL_VERBOSE
128 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
129 || write_symbols == VMS_AND_DWARF2_DEBUG))
131 cb->define = cb_define;
132 cb->undef = cb_undef;
135 /* Start it at 0. */
136 lineno = 0;
138 return cpp_read_main_file (parse_in, filename, ident_hash);
141 /* A thin wrapper around the real parser that initializes the
142 integrated preprocessor after debug output has been initialized.
143 Also, make sure the start_source_file debug hook gets called for
144 the primary source file. */
146 void
147 c_common_parse_file (set_yydebug)
148 int set_yydebug ATTRIBUTE_UNUSED;
150 #if YYDEBUG != 0
151 yydebug = set_yydebug;
152 #else
153 warning ("YYDEBUG not defined");
154 #endif
156 (*debug_hooks->start_source_file) (lineno, input_filename);
157 cpp_finish_options (parse_in);
159 yyparse ();
160 free_parser_stacks ();
163 struct c_fileinfo *
164 get_fileinfo (name)
165 const char *name;
167 splay_tree_node n;
168 struct c_fileinfo *fi;
170 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
171 if (n)
172 return (struct c_fileinfo *) n->value;
174 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
175 fi->time = 0;
176 fi->interface_only = 0;
177 fi->interface_unknown = 1;
178 splay_tree_insert (file_info_tree, (splay_tree_key) name,
179 (splay_tree_value) fi);
180 return fi;
183 static void
184 update_header_times (name)
185 const char *name;
187 /* Changing files again. This means currently collected time
188 is charged against header time, and body time starts back at 0. */
189 if (flag_detailed_statistics)
191 int this_time = get_run_time ();
192 struct c_fileinfo *file = get_fileinfo (name);
193 header_time += this_time - body_time;
194 file->time += this_time - body_time;
195 body_time = this_time;
199 static int
200 dump_one_header (n, dummy)
201 splay_tree_node n;
202 void *dummy ATTRIBUTE_UNUSED;
204 print_time ((const char *) n->key,
205 ((struct c_fileinfo *) n->value)->time);
206 return 0;
209 void
210 dump_time_statistics ()
212 struct c_fileinfo *file = get_fileinfo (input_filename);
213 int this_time = get_run_time ();
214 file->time += this_time - body_time;
216 fprintf (stderr, "\n******\n");
217 print_time ("header files (total)", header_time);
218 print_time ("main file (total)", this_time - body_time);
219 fprintf (stderr, "ratio = %g : 1\n",
220 (double)header_time / (double)(this_time - body_time));
221 fprintf (stderr, "\n******\n");
223 splay_tree_foreach (file_info_tree, dump_one_header, 0);
226 static void
227 cb_ident (pfile, line, str)
228 cpp_reader *pfile ATTRIBUTE_UNUSED;
229 unsigned int line ATTRIBUTE_UNUSED;
230 const cpp_string *str ATTRIBUTE_UNUSED;
232 #ifdef ASM_OUTPUT_IDENT
233 if (! flag_no_ident)
235 /* Convert escapes in the string. */
236 tree value = lex_string (str->text, str->len, 0);
237 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
239 #endif
242 /* Called at the start of every non-empty line. TOKEN is the first
243 lexed token on the line. Used for diagnostic line numbers. */
244 static void
245 cb_line_change (pfile, token, parsing_args)
246 cpp_reader *pfile ATTRIBUTE_UNUSED;
247 const cpp_token *token;
248 int parsing_args ATTRIBUTE_UNUSED;
250 src_lineno = SOURCE_LINE (map, token->line);
253 static void
254 cb_file_change (pfile, new_map)
255 cpp_reader *pfile ATTRIBUTE_UNUSED;
256 const struct line_map *new_map;
258 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
260 if (new_map->reason == LC_ENTER)
262 /* Don't stack the main buffer on the input stack;
263 we already did in compile_file. */
264 if (map == NULL)
265 main_input_filename = new_map->to_file;
266 else
268 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
270 lineno = included_at;
271 push_srcloc (new_map->to_file, 1);
272 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
273 #ifndef NO_IMPLICIT_EXTERN_C
274 if (c_header_level)
275 ++c_header_level;
276 else if (new_map->sysp == 2)
278 c_header_level = 1;
279 ++pending_lang_change;
281 #endif
284 else if (new_map->reason == LC_LEAVE)
286 #ifndef NO_IMPLICIT_EXTERN_C
287 if (c_header_level && --c_header_level == 0)
289 if (new_map->sysp == 2)
290 warning ("badly nested C headers from preprocessor");
291 --pending_lang_change;
293 #endif
294 pop_srcloc ();
296 (*debug_hooks->end_source_file) (to_line);
299 update_header_times (new_map->to_file);
300 in_system_header = new_map->sysp != 0;
301 input_filename = new_map->to_file;
302 lineno = to_line;
303 map = new_map;
305 /* Hook for C++. */
306 extract_interface_info ();
309 static void
310 cb_def_pragma (pfile, line)
311 cpp_reader *pfile;
312 unsigned int line;
314 /* Issue a warning message if we have been asked to do so. Ignore
315 unknown pragmas in system headers unless an explicit
316 -Wunknown-pragmas has been given. */
317 if (warn_unknown_pragmas > in_system_header)
319 const unsigned char *space, *name;
320 const cpp_token *s;
322 space = name = (const unsigned char *) "";
323 s = cpp_get_token (pfile);
324 if (s->type != CPP_EOF)
326 space = cpp_token_as_text (pfile, s);
327 s = cpp_get_token (pfile);
328 if (s->type == CPP_NAME)
329 name = cpp_token_as_text (pfile, s);
332 lineno = SOURCE_LINE (map, line);
333 warning ("ignoring #pragma %s %s", space, name);
337 /* #define callback for DWARF and DWARF2 debug info. */
338 static void
339 cb_define (pfile, line, node)
340 cpp_reader *pfile;
341 unsigned int line;
342 cpp_hashnode *node;
344 (*debug_hooks->define) (SOURCE_LINE (map, line),
345 (const char *) cpp_macro_definition (pfile, node));
348 /* #undef callback for DWARF and DWARF2 debug info. */
349 static void
350 cb_undef (pfile, line, node)
351 cpp_reader *pfile ATTRIBUTE_UNUSED;
352 unsigned int line;
353 cpp_hashnode *node;
355 (*debug_hooks->undef) (SOURCE_LINE (map, line),
356 (const char *) NODE_NAME (node));
359 #if 0 /* not yet */
360 /* Returns nonzero if C is a universal-character-name. Give an error if it
361 is not one which may appear in an identifier, as per [extendid].
363 Note that extended character support in identifiers has not yet been
364 implemented. It is my personal opinion that this is not a desirable
365 feature. Portable code cannot count on support for more than the basic
366 identifier character set. */
368 static inline int
369 is_extended_char (c)
370 int c;
372 #ifdef TARGET_EBCDIC
373 return 0;
374 #else
375 /* ASCII. */
376 if (c < 0x7f)
377 return 0;
379 /* None of the valid chars are outside the Basic Multilingual Plane (the
380 low 16 bits). */
381 if (c > 0xffff)
383 error ("universal-character-name '\\U%08x' not valid in identifier", c);
384 return 1;
387 /* Latin */
388 if ((c >= 0x00c0 && c <= 0x00d6)
389 || (c >= 0x00d8 && c <= 0x00f6)
390 || (c >= 0x00f8 && c <= 0x01f5)
391 || (c >= 0x01fa && c <= 0x0217)
392 || (c >= 0x0250 && c <= 0x02a8)
393 || (c >= 0x1e00 && c <= 0x1e9a)
394 || (c >= 0x1ea0 && c <= 0x1ef9))
395 return 1;
397 /* Greek */
398 if ((c == 0x0384)
399 || (c >= 0x0388 && c <= 0x038a)
400 || (c == 0x038c)
401 || (c >= 0x038e && c <= 0x03a1)
402 || (c >= 0x03a3 && c <= 0x03ce)
403 || (c >= 0x03d0 && c <= 0x03d6)
404 || (c == 0x03da)
405 || (c == 0x03dc)
406 || (c == 0x03de)
407 || (c == 0x03e0)
408 || (c >= 0x03e2 && c <= 0x03f3)
409 || (c >= 0x1f00 && c <= 0x1f15)
410 || (c >= 0x1f18 && c <= 0x1f1d)
411 || (c >= 0x1f20 && c <= 0x1f45)
412 || (c >= 0x1f48 && c <= 0x1f4d)
413 || (c >= 0x1f50 && c <= 0x1f57)
414 || (c == 0x1f59)
415 || (c == 0x1f5b)
416 || (c == 0x1f5d)
417 || (c >= 0x1f5f && c <= 0x1f7d)
418 || (c >= 0x1f80 && c <= 0x1fb4)
419 || (c >= 0x1fb6 && c <= 0x1fbc)
420 || (c >= 0x1fc2 && c <= 0x1fc4)
421 || (c >= 0x1fc6 && c <= 0x1fcc)
422 || (c >= 0x1fd0 && c <= 0x1fd3)
423 || (c >= 0x1fd6 && c <= 0x1fdb)
424 || (c >= 0x1fe0 && c <= 0x1fec)
425 || (c >= 0x1ff2 && c <= 0x1ff4)
426 || (c >= 0x1ff6 && c <= 0x1ffc))
427 return 1;
429 /* Cyrillic */
430 if ((c >= 0x0401 && c <= 0x040d)
431 || (c >= 0x040f && c <= 0x044f)
432 || (c >= 0x0451 && c <= 0x045c)
433 || (c >= 0x045e && c <= 0x0481)
434 || (c >= 0x0490 && c <= 0x04c4)
435 || (c >= 0x04c7 && c <= 0x04c8)
436 || (c >= 0x04cb && c <= 0x04cc)
437 || (c >= 0x04d0 && c <= 0x04eb)
438 || (c >= 0x04ee && c <= 0x04f5)
439 || (c >= 0x04f8 && c <= 0x04f9))
440 return 1;
442 /* Armenian */
443 if ((c >= 0x0531 && c <= 0x0556)
444 || (c >= 0x0561 && c <= 0x0587))
445 return 1;
447 /* Hebrew */
448 if ((c >= 0x05d0 && c <= 0x05ea)
449 || (c >= 0x05f0 && c <= 0x05f4))
450 return 1;
452 /* Arabic */
453 if ((c >= 0x0621 && c <= 0x063a)
454 || (c >= 0x0640 && c <= 0x0652)
455 || (c >= 0x0670 && c <= 0x06b7)
456 || (c >= 0x06ba && c <= 0x06be)
457 || (c >= 0x06c0 && c <= 0x06ce)
458 || (c >= 0x06e5 && c <= 0x06e7))
459 return 1;
461 /* Devanagari */
462 if ((c >= 0x0905 && c <= 0x0939)
463 || (c >= 0x0958 && c <= 0x0962))
464 return 1;
466 /* Bengali */
467 if ((c >= 0x0985 && c <= 0x098c)
468 || (c >= 0x098f && c <= 0x0990)
469 || (c >= 0x0993 && c <= 0x09a8)
470 || (c >= 0x09aa && c <= 0x09b0)
471 || (c == 0x09b2)
472 || (c >= 0x09b6 && c <= 0x09b9)
473 || (c >= 0x09dc && c <= 0x09dd)
474 || (c >= 0x09df && c <= 0x09e1)
475 || (c >= 0x09f0 && c <= 0x09f1))
476 return 1;
478 /* Gurmukhi */
479 if ((c >= 0x0a05 && c <= 0x0a0a)
480 || (c >= 0x0a0f && c <= 0x0a10)
481 || (c >= 0x0a13 && c <= 0x0a28)
482 || (c >= 0x0a2a && c <= 0x0a30)
483 || (c >= 0x0a32 && c <= 0x0a33)
484 || (c >= 0x0a35 && c <= 0x0a36)
485 || (c >= 0x0a38 && c <= 0x0a39)
486 || (c >= 0x0a59 && c <= 0x0a5c)
487 || (c == 0x0a5e))
488 return 1;
490 /* Gujarati */
491 if ((c >= 0x0a85 && c <= 0x0a8b)
492 || (c == 0x0a8d)
493 || (c >= 0x0a8f && c <= 0x0a91)
494 || (c >= 0x0a93 && c <= 0x0aa8)
495 || (c >= 0x0aaa && c <= 0x0ab0)
496 || (c >= 0x0ab2 && c <= 0x0ab3)
497 || (c >= 0x0ab5 && c <= 0x0ab9)
498 || (c == 0x0ae0))
499 return 1;
501 /* Oriya */
502 if ((c >= 0x0b05 && c <= 0x0b0c)
503 || (c >= 0x0b0f && c <= 0x0b10)
504 || (c >= 0x0b13 && c <= 0x0b28)
505 || (c >= 0x0b2a && c <= 0x0b30)
506 || (c >= 0x0b32 && c <= 0x0b33)
507 || (c >= 0x0b36 && c <= 0x0b39)
508 || (c >= 0x0b5c && c <= 0x0b5d)
509 || (c >= 0x0b5f && c <= 0x0b61))
510 return 1;
512 /* Tamil */
513 if ((c >= 0x0b85 && c <= 0x0b8a)
514 || (c >= 0x0b8e && c <= 0x0b90)
515 || (c >= 0x0b92 && c <= 0x0b95)
516 || (c >= 0x0b99 && c <= 0x0b9a)
517 || (c == 0x0b9c)
518 || (c >= 0x0b9e && c <= 0x0b9f)
519 || (c >= 0x0ba3 && c <= 0x0ba4)
520 || (c >= 0x0ba8 && c <= 0x0baa)
521 || (c >= 0x0bae && c <= 0x0bb5)
522 || (c >= 0x0bb7 && c <= 0x0bb9))
523 return 1;
525 /* Telugu */
526 if ((c >= 0x0c05 && c <= 0x0c0c)
527 || (c >= 0x0c0e && c <= 0x0c10)
528 || (c >= 0x0c12 && c <= 0x0c28)
529 || (c >= 0x0c2a && c <= 0x0c33)
530 || (c >= 0x0c35 && c <= 0x0c39)
531 || (c >= 0x0c60 && c <= 0x0c61))
532 return 1;
534 /* Kannada */
535 if ((c >= 0x0c85 && c <= 0x0c8c)
536 || (c >= 0x0c8e && c <= 0x0c90)
537 || (c >= 0x0c92 && c <= 0x0ca8)
538 || (c >= 0x0caa && c <= 0x0cb3)
539 || (c >= 0x0cb5 && c <= 0x0cb9)
540 || (c >= 0x0ce0 && c <= 0x0ce1))
541 return 1;
543 /* Malayalam */
544 if ((c >= 0x0d05 && c <= 0x0d0c)
545 || (c >= 0x0d0e && c <= 0x0d10)
546 || (c >= 0x0d12 && c <= 0x0d28)
547 || (c >= 0x0d2a && c <= 0x0d39)
548 || (c >= 0x0d60 && c <= 0x0d61))
549 return 1;
551 /* Thai */
552 if ((c >= 0x0e01 && c <= 0x0e30)
553 || (c >= 0x0e32 && c <= 0x0e33)
554 || (c >= 0x0e40 && c <= 0x0e46)
555 || (c >= 0x0e4f && c <= 0x0e5b))
556 return 1;
558 /* Lao */
559 if ((c >= 0x0e81 && c <= 0x0e82)
560 || (c == 0x0e84)
561 || (c == 0x0e87)
562 || (c == 0x0e88)
563 || (c == 0x0e8a)
564 || (c == 0x0e0d)
565 || (c >= 0x0e94 && c <= 0x0e97)
566 || (c >= 0x0e99 && c <= 0x0e9f)
567 || (c >= 0x0ea1 && c <= 0x0ea3)
568 || (c == 0x0ea5)
569 || (c == 0x0ea7)
570 || (c == 0x0eaa)
571 || (c == 0x0eab)
572 || (c >= 0x0ead && c <= 0x0eb0)
573 || (c == 0x0eb2)
574 || (c == 0x0eb3)
575 || (c == 0x0ebd)
576 || (c >= 0x0ec0 && c <= 0x0ec4)
577 || (c == 0x0ec6))
578 return 1;
580 /* Georgian */
581 if ((c >= 0x10a0 && c <= 0x10c5)
582 || (c >= 0x10d0 && c <= 0x10f6))
583 return 1;
585 /* Hiragana */
586 if ((c >= 0x3041 && c <= 0x3094)
587 || (c >= 0x309b && c <= 0x309e))
588 return 1;
590 /* Katakana */
591 if ((c >= 0x30a1 && c <= 0x30fe))
592 return 1;
594 /* Bopmofo */
595 if ((c >= 0x3105 && c <= 0x312c))
596 return 1;
598 /* Hangul */
599 if ((c >= 0x1100 && c <= 0x1159)
600 || (c >= 0x1161 && c <= 0x11a2)
601 || (c >= 0x11a8 && c <= 0x11f9))
602 return 1;
604 /* CJK Unified Ideographs */
605 if ((c >= 0xf900 && c <= 0xfa2d)
606 || (c >= 0xfb1f && c <= 0xfb36)
607 || (c >= 0xfb38 && c <= 0xfb3c)
608 || (c == 0xfb3e)
609 || (c >= 0xfb40 && c <= 0xfb41)
610 || (c >= 0xfb42 && c <= 0xfb44)
611 || (c >= 0xfb46 && c <= 0xfbb1)
612 || (c >= 0xfbd3 && c <= 0xfd3f)
613 || (c >= 0xfd50 && c <= 0xfd8f)
614 || (c >= 0xfd92 && c <= 0xfdc7)
615 || (c >= 0xfdf0 && c <= 0xfdfb)
616 || (c >= 0xfe70 && c <= 0xfe72)
617 || (c == 0xfe74)
618 || (c >= 0xfe76 && c <= 0xfefc)
619 || (c >= 0xff21 && c <= 0xff3a)
620 || (c >= 0xff41 && c <= 0xff5a)
621 || (c >= 0xff66 && c <= 0xffbe)
622 || (c >= 0xffc2 && c <= 0xffc7)
623 || (c >= 0xffca && c <= 0xffcf)
624 || (c >= 0xffd2 && c <= 0xffd7)
625 || (c >= 0xffda && c <= 0xffdc)
626 || (c >= 0x4e00 && c <= 0x9fa5))
627 return 1;
629 error ("universal-character-name '\\u%04x' not valid in identifier", c);
630 return 1;
631 #endif
634 /* Add the UTF-8 representation of C to the token_buffer. */
636 static void
637 utf8_extend_token (c)
638 int c;
640 int shift, mask;
642 if (c <= 0x0000007f)
644 extend_token (c);
645 return;
647 else if (c <= 0x000007ff)
648 shift = 6, mask = 0xc0;
649 else if (c <= 0x0000ffff)
650 shift = 12, mask = 0xe0;
651 else if (c <= 0x001fffff)
652 shift = 18, mask = 0xf0;
653 else if (c <= 0x03ffffff)
654 shift = 24, mask = 0xf8;
655 else
656 shift = 30, mask = 0xfc;
658 extend_token (mask | (c >> shift));
661 shift -= 6;
662 extend_token ((unsigned char) (0x80 | (c >> shift)));
664 while (shift);
666 #endif
669 c_lex (value)
670 tree *value;
672 const cpp_token *tok;
674 retry:
675 timevar_push (TV_CPP);
677 tok = cpp_get_token (parse_in);
678 while (tok->type == CPP_PADDING);
679 timevar_pop (TV_CPP);
681 /* The C++ front end does horrible things with the current line
682 number. To ensure an accurate line number, we must reset it
683 every time we return a token. */
684 lineno = src_lineno;
686 *value = NULL_TREE;
687 switch (tok->type)
689 /* Issue this error here, where we can get at tok->val.c. */
690 case CPP_OTHER:
691 if (ISGRAPH (tok->val.c))
692 error ("stray '%c' in program", tok->val.c);
693 else
694 error ("stray '\\%o' in program", tok->val.c);
695 goto retry;
697 case CPP_NAME:
698 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
699 break;
701 case CPP_NUMBER:
703 unsigned int flags = cpp_classify_number (parse_in, tok);
705 switch (flags & CPP_N_CATEGORY)
707 case CPP_N_INVALID:
708 /* cpplib has issued an error. */
709 break;
711 case CPP_N_INTEGER:
712 *value = interpret_integer (tok, flags);
713 break;
715 case CPP_N_FLOATING:
716 *value = interpret_float (tok, flags);
717 break;
719 default:
720 abort ();
723 break;
725 case CPP_CHAR:
726 case CPP_WCHAR:
727 *value = lex_charconst (tok);
728 break;
730 case CPP_STRING:
731 case CPP_WSTRING:
732 *value = lex_string (tok->val.str.text, tok->val.str.len,
733 tok->type == CPP_WSTRING);
734 break;
736 /* These tokens should not be visible outside cpplib. */
737 case CPP_HEADER_NAME:
738 case CPP_COMMENT:
739 case CPP_MACRO_ARG:
740 abort ();
742 default: break;
745 return tok->type;
748 /* Returns the narrowest C-visible unsigned type, starting with the
749 minimum specified by FLAGS, that can fit VALUE, or itk_none if
750 there isn't one. */
751 static enum integer_type_kind
752 narrowest_unsigned_type (value, flags)
753 tree value;
754 unsigned int flags;
756 enum integer_type_kind itk;
758 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
759 itk = itk_unsigned_int;
760 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
761 itk = itk_unsigned_long;
762 else
763 itk = itk_unsigned_long_long;
765 /* int_fits_type_p must think the type of its first argument is
766 wider than its second argument, or it won't do the proper check. */
767 TREE_TYPE (value) = widest_unsigned_literal_type_node;
769 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
770 if (int_fits_type_p (value, integer_types[itk]))
771 return itk;
773 return itk_none;
776 /* Ditto, but narrowest signed type. */
777 static enum integer_type_kind
778 narrowest_signed_type (value, flags)
779 tree value;
780 unsigned int flags;
782 enum integer_type_kind itk;
784 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
785 itk = itk_int;
786 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
787 itk = itk_long;
788 else
789 itk = itk_long_long;
791 /* int_fits_type_p must think the type of its first argument is
792 wider than its second argument, or it won't do the proper check. */
793 TREE_TYPE (value) = widest_unsigned_literal_type_node;
795 for (; itk < itk_none; itk += 2 /* skip signed types */)
796 if (int_fits_type_p (value, integer_types[itk]))
797 return itk;
799 return itk_none;
802 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
803 static tree
804 interpret_integer (token, flags)
805 const cpp_token *token;
806 unsigned int flags;
808 tree value, type;
809 enum integer_type_kind itk;
810 cpp_num integer;
811 cpp_options *options = cpp_get_options (parse_in);
813 integer = cpp_interpret_integer (parse_in, token, flags);
814 integer = cpp_num_sign_extend (integer, options->precision);
815 value = build_int_2_wide (integer.low, integer.high);
817 /* The type of a constant with a U suffix is straightforward. */
818 if (flags & CPP_N_UNSIGNED)
819 itk = narrowest_unsigned_type (value, flags);
820 else
822 /* The type of a potentially-signed integer constant varies
823 depending on the base it's in, the standard in use, and the
824 length suffixes. */
825 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
826 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
828 /* In both C89 and C99, octal and hex constants may be signed or
829 unsigned, whichever fits tighter. We do not warn about this
830 choice differing from the traditional choice, as the constant
831 is probably a bit pattern and either way will work. */
832 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
833 itk = MIN (itk_u, itk_s);
834 else
836 /* In C99, decimal constants are always signed.
837 In C89, decimal constants that don't fit in long have
838 undefined behaviour; we try to make them unsigned long.
839 In GCC's extended C89, that last is true of decimal
840 constants that don't fit in long long, too. */
842 itk = itk_s;
843 if (itk_s > itk_u && itk_s > itk_long)
845 if (!flag_isoc99)
847 if (itk_u < itk_unsigned_long)
848 itk_u = itk_unsigned_long;
849 itk = itk_u;
850 warning ("this decimal constant is unsigned only in ISO C90");
852 else if (warn_traditional)
853 warning ("this decimal constant would be unsigned in ISO C90");
858 if (itk == itk_none)
859 /* cpplib has already issued a warning for overflow. */
860 type = ((flags & CPP_N_UNSIGNED)
861 ? widest_unsigned_literal_type_node
862 : widest_integer_literal_type_node);
863 else
864 type = integer_types[itk];
866 if (itk > itk_unsigned_long
867 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
868 && ! in_system_header && ! flag_isoc99)
869 pedwarn ("integer constant is too large for \"%s\" type",
870 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
872 TREE_TYPE (value) = type;
874 /* Convert imaginary to a complex type. */
875 if (flags & CPP_N_IMAGINARY)
876 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
878 return value;
881 /* Interpret TOKEN, a floating point number with FLAGS as classified
882 by cpplib. */
883 static tree
884 interpret_float (token, flags)
885 const cpp_token *token;
886 unsigned int flags;
888 tree type;
889 tree value;
890 REAL_VALUE_TYPE real;
891 char *copy;
892 size_t copylen;
893 const char *typename;
895 /* FIXME: make %T work in error/warning, then we don't need typename. */
896 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
898 type = long_double_type_node;
899 typename = "long double";
901 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
902 || flag_single_precision_constant)
904 type = float_type_node;
905 typename = "float";
907 else
909 type = double_type_node;
910 typename = "double";
913 /* Copy the constant to a nul-terminated buffer. If the constant
914 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
915 can't handle them. */
916 copylen = token->val.str.len;
917 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
918 /* Must be an F or L suffix. */
919 copylen--;
920 if (flags & CPP_N_IMAGINARY)
921 /* I or J suffix. */
922 copylen--;
924 copy = alloca (copylen + 1);
925 memcpy (copy, token->val.str.text, copylen);
926 copy[copylen] = '\0';
928 /* The second argument, machine_mode, of REAL_VALUE_ATOF tells the
929 desired precision of the binary result of decimal-to-binary
930 conversion. */
931 if (flags & CPP_N_HEX)
932 real = REAL_VALUE_HTOF (copy, TYPE_MODE (type));
933 else
934 real = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
936 /* A diagnostic is required for "soft" overflow by some ISO C
937 testsuites. This is not pedwarn, because some people don't want
938 an error for this.
939 ??? That's a dubious reason... is this a mandatory diagnostic or
940 isn't it? -- zw, 2001-08-21. */
941 if (REAL_VALUE_ISINF (real) && pedantic)
942 warning ("floating constant exceeds range of \"%s\"", typename);
944 /* Create a node with determined type and value. */
945 value = build_real (type, real);
946 if (flags & CPP_N_IMAGINARY)
947 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
949 return value;
952 static tree
953 lex_string (str, len, wide)
954 const unsigned char *str;
955 unsigned int len;
956 int wide;
958 tree value;
959 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
960 char *q = buf;
961 const unsigned char *p = str, *limit = str + len;
962 cppchar_t c;
964 #ifdef MULTIBYTE_CHARS
965 /* Reset multibyte conversion state. */
966 (void) local_mbtowc (NULL, NULL, 0);
967 #endif
969 while (p < limit)
971 #ifdef MULTIBYTE_CHARS
972 wchar_t wc;
973 int char_len;
975 char_len = local_mbtowc (&wc, (const char *) p, limit - p);
976 if (char_len == -1)
978 warning ("ignoring invalid multibyte character");
979 char_len = 1;
980 c = *p++;
982 else
984 p += char_len;
985 c = wc;
987 #else
988 c = *p++;
989 #endif
991 if (c == '\\' && !ignore_escape_flag)
992 c = cpp_parse_escape (parse_in, &p, limit, wide);
994 /* Add this single character into the buffer either as a wchar_t,
995 a multibyte sequence, or as a single byte. */
996 if (wide)
998 unsigned charwidth = TYPE_PRECISION (char_type_node);
999 unsigned bytemask = (1 << charwidth) - 1;
1000 int byte;
1002 for (byte = 0; byte < WCHAR_BYTES; ++byte)
1004 int n;
1005 if (byte >= (int) sizeof (c))
1006 n = 0;
1007 else
1008 n = (c >> (byte * charwidth)) & bytemask;
1009 if (BYTES_BIG_ENDIAN)
1010 q[WCHAR_BYTES - byte - 1] = n;
1011 else
1012 q[byte] = n;
1014 q += WCHAR_BYTES;
1016 #ifdef MULTIBYTE_CHARS
1017 else if (char_len > 1)
1019 /* We're dealing with a multibyte character. */
1020 for ( ; char_len >0; --char_len)
1022 *q++ = *(p - char_len);
1025 #endif
1026 else
1028 *q++ = c;
1032 /* Terminate the string value, either with a single byte zero
1033 or with a wide zero. */
1035 if (wide)
1037 memset (q, 0, WCHAR_BYTES);
1038 q += WCHAR_BYTES;
1040 else
1042 *q++ = '\0';
1045 value = build_string (q - buf, buf);
1047 if (wide)
1048 TREE_TYPE (value) = wchar_array_type_node;
1049 else
1050 TREE_TYPE (value) = char_array_type_node;
1051 return value;
1054 /* Converts a (possibly wide) character constant token into a tree. */
1055 static tree
1056 lex_charconst (token)
1057 const cpp_token *token;
1059 cppchar_t result;
1060 tree type, value;
1061 unsigned int chars_seen;
1062 int unsignedp;
1064 result = cpp_interpret_charconst (parse_in, token,
1065 &chars_seen, &unsignedp);
1067 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
1068 before possibly widening to HOST_WIDE_INT for build_int_2. */
1069 if (unsignedp || (cppchar_signed_t) result >= 0)
1070 value = build_int_2 (result, 0);
1071 else
1072 value = build_int_2 ((cppchar_signed_t) result, -1);
1074 if (token->type == CPP_WCHAR)
1075 type = wchar_type_node;
1076 /* In C, a character constant has type 'int'.
1077 In C++ 'char', but multi-char charconsts have type 'int'. */
1078 else if ((c_language == clk_c) || chars_seen > 1)
1079 type = integer_type_node;
1080 else
1081 type = char_type_node;
1083 TREE_TYPE (value) = type;
1084 return value;