* predict.c (estimate_bb_frequencies): Correctly set
[official-gcc.git] / gcc / c-lex.c
blobe0617bd142d07f4c3953521dc676de7bfdd14a52
1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
27 #include "real.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "expr.h"
31 #include "input.h"
32 #include "output.h"
33 #include "c-tree.h"
34 #include "c-common.h"
35 #include "flags.h"
36 #include "timevar.h"
37 #include "cpplib.h"
38 #include "c-pragma.h"
39 #include "toplev.h"
40 #include "intl.h"
41 #include "tm_p.h"
42 #include "splay-tree.h"
43 #include "debug.h"
44 #include "c-incpath.h"
46 #ifdef MULTIBYTE_CHARS
47 #include "mbchar.h"
48 #include <locale.h>
49 #endif /* MULTIBYTE_CHARS */
51 /* The current line map. */
52 static const struct line_map *map;
54 /* The line used to refresh the lineno global variable after each token. */
55 static unsigned int src_lineno;
57 /* We may keep statistics about how long which files took to compile. */
58 static int header_time, body_time;
59 static splay_tree file_info_tree;
61 /* File used for outputting assembler code. */
62 extern FILE *asm_out_file;
64 #undef WCHAR_TYPE_SIZE
65 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
67 /* Number of bytes in a wide character. */
68 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
70 int pending_lang_change; /* If we need to switch languages - C++ only */
71 int c_header_level; /* depth in C headers - C++ only */
73 /* Nonzero tells yylex to ignore \ in string constants. */
74 static int ignore_escape_flag;
76 static tree interpret_integer PARAMS ((const cpp_token *, unsigned int));
77 static tree interpret_float PARAMS ((const cpp_token *, unsigned int));
78 static enum integer_type_kind
79 narrowest_unsigned_type PARAMS ((tree, unsigned int));
80 static enum integer_type_kind
81 narrowest_signed_type PARAMS ((tree, unsigned int));
82 static tree lex_string PARAMS ((const unsigned char *, unsigned int,
83 int));
84 static tree lex_charconst PARAMS ((const cpp_token *));
85 static void update_header_times PARAMS ((const char *));
86 static int dump_one_header PARAMS ((splay_tree_node, void *));
87 static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int));
88 static void cb_ident PARAMS ((cpp_reader *, unsigned int,
89 const cpp_string *));
90 static void cb_file_change PARAMS ((cpp_reader *, const struct line_map *));
91 static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int));
92 static void cb_define PARAMS ((cpp_reader *, unsigned int,
93 cpp_hashnode *));
94 static void cb_undef PARAMS ((cpp_reader *, unsigned int,
95 cpp_hashnode *));
97 const char *
98 init_c_lex (filename)
99 const char *filename;
101 struct cpp_callbacks *cb;
102 struct c_fileinfo *toplevel;
104 /* Set up filename timing. Must happen before cpp_read_main_file. */
105 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
107 (splay_tree_delete_value_fn)free);
108 toplevel = get_fileinfo ("<top level>");
109 if (flag_detailed_statistics)
111 header_time = 0;
112 body_time = get_run_time ();
113 toplevel->time = body_time;
116 #ifdef MULTIBYTE_CHARS
117 /* Change to the native locale for multibyte conversions. */
118 setlocale (LC_CTYPE, "");
119 GET_ENVIRONMENT (literal_codeset, "LANG");
120 #endif
122 cb = cpp_get_callbacks (parse_in);
124 cb->line_change = cb_line_change;
125 cb->ident = cb_ident;
126 cb->file_change = cb_file_change;
127 cb->def_pragma = cb_def_pragma;
128 cb->simplify_path = simplify_path;
129 cb->valid_pch = c_common_valid_pch;
130 cb->read_pch = c_common_read_pch;
132 /* Set the debug callbacks if we can use them. */
133 if (debug_info_level == DINFO_LEVEL_VERBOSE
134 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
135 || write_symbols == VMS_AND_DWARF2_DEBUG))
137 cb->define = cb_define;
138 cb->undef = cb_undef;
141 /* Start it at 0. */
142 lineno = 0;
144 return cpp_read_main_file (parse_in, filename, ident_hash);
147 /* A thin wrapper around the real parser that initializes the
148 integrated preprocessor after debug output has been initialized.
149 Also, make sure the start_source_file debug hook gets called for
150 the primary source file. */
152 void
153 c_common_parse_file (set_yydebug)
154 int set_yydebug ATTRIBUTE_UNUSED;
156 #if YYDEBUG != 0
157 yydebug = set_yydebug;
158 #else
159 warning ("YYDEBUG not defined");
160 #endif
162 (*debug_hooks->start_source_file) (lineno, input_filename);
163 cpp_finish_options (parse_in);
165 pch_init();
167 yyparse ();
168 free_parser_stacks ();
171 struct c_fileinfo *
172 get_fileinfo (name)
173 const char *name;
175 splay_tree_node n;
176 struct c_fileinfo *fi;
178 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
179 if (n)
180 return (struct c_fileinfo *) n->value;
182 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
183 fi->time = 0;
184 fi->interface_only = 0;
185 fi->interface_unknown = 1;
186 splay_tree_insert (file_info_tree, (splay_tree_key) name,
187 (splay_tree_value) fi);
188 return fi;
191 static void
192 update_header_times (name)
193 const char *name;
195 /* Changing files again. This means currently collected time
196 is charged against header time, and body time starts back at 0. */
197 if (flag_detailed_statistics)
199 int this_time = get_run_time ();
200 struct c_fileinfo *file = get_fileinfo (name);
201 header_time += this_time - body_time;
202 file->time += this_time - body_time;
203 body_time = this_time;
207 static int
208 dump_one_header (n, dummy)
209 splay_tree_node n;
210 void *dummy ATTRIBUTE_UNUSED;
212 print_time ((const char *) n->key,
213 ((struct c_fileinfo *) n->value)->time);
214 return 0;
217 void
218 dump_time_statistics ()
220 struct c_fileinfo *file = get_fileinfo (input_filename);
221 int this_time = get_run_time ();
222 file->time += this_time - body_time;
224 fprintf (stderr, "\n******\n");
225 print_time ("header files (total)", header_time);
226 print_time ("main file (total)", this_time - body_time);
227 fprintf (stderr, "ratio = %g : 1\n",
228 (double)header_time / (double)(this_time - body_time));
229 fprintf (stderr, "\n******\n");
231 splay_tree_foreach (file_info_tree, dump_one_header, 0);
234 static void
235 cb_ident (pfile, line, str)
236 cpp_reader *pfile ATTRIBUTE_UNUSED;
237 unsigned int line ATTRIBUTE_UNUSED;
238 const cpp_string *str ATTRIBUTE_UNUSED;
240 #ifdef ASM_OUTPUT_IDENT
241 if (! flag_no_ident)
243 /* Convert escapes in the string. */
244 tree value ATTRIBUTE_UNUSED = lex_string (str->text, str->len, 0);
245 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
247 #endif
250 /* Called at the start of every non-empty line. TOKEN is the first
251 lexed token on the line. Used for diagnostic line numbers. */
252 static void
253 cb_line_change (pfile, token, parsing_args)
254 cpp_reader *pfile ATTRIBUTE_UNUSED;
255 const cpp_token *token;
256 int parsing_args ATTRIBUTE_UNUSED;
258 src_lineno = SOURCE_LINE (map, token->line);
261 static void
262 cb_file_change (pfile, new_map)
263 cpp_reader *pfile ATTRIBUTE_UNUSED;
264 const struct line_map *new_map;
266 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
268 if (new_map->reason == LC_ENTER)
270 /* Don't stack the main buffer on the input stack;
271 we already did in compile_file. */
272 if (map == NULL)
273 main_input_filename = new_map->to_file;
274 else
276 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
278 lineno = included_at;
279 push_srcloc (new_map->to_file, 1);
280 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
281 #ifndef NO_IMPLICIT_EXTERN_C
282 if (c_header_level)
283 ++c_header_level;
284 else if (new_map->sysp == 2)
286 c_header_level = 1;
287 ++pending_lang_change;
289 #endif
292 else if (new_map->reason == LC_LEAVE)
294 #ifndef NO_IMPLICIT_EXTERN_C
295 if (c_header_level && --c_header_level == 0)
297 if (new_map->sysp == 2)
298 warning ("badly nested C headers from preprocessor");
299 --pending_lang_change;
301 #endif
302 pop_srcloc ();
304 (*debug_hooks->end_source_file) (to_line);
307 update_header_times (new_map->to_file);
308 in_system_header = new_map->sysp != 0;
309 input_filename = new_map->to_file;
310 lineno = to_line;
311 map = new_map;
313 /* Hook for C++. */
314 extract_interface_info ();
317 static void
318 cb_def_pragma (pfile, line)
319 cpp_reader *pfile;
320 unsigned int line;
322 /* Issue a warning message if we have been asked to do so. Ignore
323 unknown pragmas in system headers unless an explicit
324 -Wunknown-pragmas has been given. */
325 if (warn_unknown_pragmas > in_system_header)
327 const unsigned char *space, *name;
328 const cpp_token *s;
330 space = name = (const unsigned char *) "";
331 s = cpp_get_token (pfile);
332 if (s->type != CPP_EOF)
334 space = cpp_token_as_text (pfile, s);
335 s = cpp_get_token (pfile);
336 if (s->type == CPP_NAME)
337 name = cpp_token_as_text (pfile, s);
340 lineno = SOURCE_LINE (map, line);
341 warning ("ignoring #pragma %s %s", space, name);
345 /* #define callback for DWARF and DWARF2 debug info. */
346 static void
347 cb_define (pfile, line, node)
348 cpp_reader *pfile;
349 unsigned int line;
350 cpp_hashnode *node;
352 (*debug_hooks->define) (SOURCE_LINE (map, line),
353 (const char *) cpp_macro_definition (pfile, node));
356 /* #undef callback for DWARF and DWARF2 debug info. */
357 static void
358 cb_undef (pfile, line, node)
359 cpp_reader *pfile ATTRIBUTE_UNUSED;
360 unsigned int line;
361 cpp_hashnode *node;
363 (*debug_hooks->undef) (SOURCE_LINE (map, line),
364 (const char *) NODE_NAME (node));
367 #if 0 /* not yet */
368 /* Returns nonzero if C is a universal-character-name. Give an error if it
369 is not one which may appear in an identifier, as per [extendid].
371 Note that extended character support in identifiers has not yet been
372 implemented. It is my personal opinion that this is not a desirable
373 feature. Portable code cannot count on support for more than the basic
374 identifier character set. */
376 static inline int
377 is_extended_char (c)
378 int c;
380 #ifdef TARGET_EBCDIC
381 return 0;
382 #else
383 /* ASCII. */
384 if (c < 0x7f)
385 return 0;
387 /* None of the valid chars are outside the Basic Multilingual Plane (the
388 low 16 bits). */
389 if (c > 0xffff)
391 error ("universal-character-name '\\U%08x' not valid in identifier", c);
392 return 1;
395 /* Latin */
396 if ((c >= 0x00c0 && c <= 0x00d6)
397 || (c >= 0x00d8 && c <= 0x00f6)
398 || (c >= 0x00f8 && c <= 0x01f5)
399 || (c >= 0x01fa && c <= 0x0217)
400 || (c >= 0x0250 && c <= 0x02a8)
401 || (c >= 0x1e00 && c <= 0x1e9a)
402 || (c >= 0x1ea0 && c <= 0x1ef9))
403 return 1;
405 /* Greek */
406 if ((c == 0x0384)
407 || (c >= 0x0388 && c <= 0x038a)
408 || (c == 0x038c)
409 || (c >= 0x038e && c <= 0x03a1)
410 || (c >= 0x03a3 && c <= 0x03ce)
411 || (c >= 0x03d0 && c <= 0x03d6)
412 || (c == 0x03da)
413 || (c == 0x03dc)
414 || (c == 0x03de)
415 || (c == 0x03e0)
416 || (c >= 0x03e2 && c <= 0x03f3)
417 || (c >= 0x1f00 && c <= 0x1f15)
418 || (c >= 0x1f18 && c <= 0x1f1d)
419 || (c >= 0x1f20 && c <= 0x1f45)
420 || (c >= 0x1f48 && c <= 0x1f4d)
421 || (c >= 0x1f50 && c <= 0x1f57)
422 || (c == 0x1f59)
423 || (c == 0x1f5b)
424 || (c == 0x1f5d)
425 || (c >= 0x1f5f && c <= 0x1f7d)
426 || (c >= 0x1f80 && c <= 0x1fb4)
427 || (c >= 0x1fb6 && c <= 0x1fbc)
428 || (c >= 0x1fc2 && c <= 0x1fc4)
429 || (c >= 0x1fc6 && c <= 0x1fcc)
430 || (c >= 0x1fd0 && c <= 0x1fd3)
431 || (c >= 0x1fd6 && c <= 0x1fdb)
432 || (c >= 0x1fe0 && c <= 0x1fec)
433 || (c >= 0x1ff2 && c <= 0x1ff4)
434 || (c >= 0x1ff6 && c <= 0x1ffc))
435 return 1;
437 /* Cyrillic */
438 if ((c >= 0x0401 && c <= 0x040d)
439 || (c >= 0x040f && c <= 0x044f)
440 || (c >= 0x0451 && c <= 0x045c)
441 || (c >= 0x045e && c <= 0x0481)
442 || (c >= 0x0490 && c <= 0x04c4)
443 || (c >= 0x04c7 && c <= 0x04c8)
444 || (c >= 0x04cb && c <= 0x04cc)
445 || (c >= 0x04d0 && c <= 0x04eb)
446 || (c >= 0x04ee && c <= 0x04f5)
447 || (c >= 0x04f8 && c <= 0x04f9))
448 return 1;
450 /* Armenian */
451 if ((c >= 0x0531 && c <= 0x0556)
452 || (c >= 0x0561 && c <= 0x0587))
453 return 1;
455 /* Hebrew */
456 if ((c >= 0x05d0 && c <= 0x05ea)
457 || (c >= 0x05f0 && c <= 0x05f4))
458 return 1;
460 /* Arabic */
461 if ((c >= 0x0621 && c <= 0x063a)
462 || (c >= 0x0640 && c <= 0x0652)
463 || (c >= 0x0670 && c <= 0x06b7)
464 || (c >= 0x06ba && c <= 0x06be)
465 || (c >= 0x06c0 && c <= 0x06ce)
466 || (c >= 0x06e5 && c <= 0x06e7))
467 return 1;
469 /* Devanagari */
470 if ((c >= 0x0905 && c <= 0x0939)
471 || (c >= 0x0958 && c <= 0x0962))
472 return 1;
474 /* Bengali */
475 if ((c >= 0x0985 && c <= 0x098c)
476 || (c >= 0x098f && c <= 0x0990)
477 || (c >= 0x0993 && c <= 0x09a8)
478 || (c >= 0x09aa && c <= 0x09b0)
479 || (c == 0x09b2)
480 || (c >= 0x09b6 && c <= 0x09b9)
481 || (c >= 0x09dc && c <= 0x09dd)
482 || (c >= 0x09df && c <= 0x09e1)
483 || (c >= 0x09f0 && c <= 0x09f1))
484 return 1;
486 /* Gurmukhi */
487 if ((c >= 0x0a05 && c <= 0x0a0a)
488 || (c >= 0x0a0f && c <= 0x0a10)
489 || (c >= 0x0a13 && c <= 0x0a28)
490 || (c >= 0x0a2a && c <= 0x0a30)
491 || (c >= 0x0a32 && c <= 0x0a33)
492 || (c >= 0x0a35 && c <= 0x0a36)
493 || (c >= 0x0a38 && c <= 0x0a39)
494 || (c >= 0x0a59 && c <= 0x0a5c)
495 || (c == 0x0a5e))
496 return 1;
498 /* Gujarati */
499 if ((c >= 0x0a85 && c <= 0x0a8b)
500 || (c == 0x0a8d)
501 || (c >= 0x0a8f && c <= 0x0a91)
502 || (c >= 0x0a93 && c <= 0x0aa8)
503 || (c >= 0x0aaa && c <= 0x0ab0)
504 || (c >= 0x0ab2 && c <= 0x0ab3)
505 || (c >= 0x0ab5 && c <= 0x0ab9)
506 || (c == 0x0ae0))
507 return 1;
509 /* Oriya */
510 if ((c >= 0x0b05 && c <= 0x0b0c)
511 || (c >= 0x0b0f && c <= 0x0b10)
512 || (c >= 0x0b13 && c <= 0x0b28)
513 || (c >= 0x0b2a && c <= 0x0b30)
514 || (c >= 0x0b32 && c <= 0x0b33)
515 || (c >= 0x0b36 && c <= 0x0b39)
516 || (c >= 0x0b5c && c <= 0x0b5d)
517 || (c >= 0x0b5f && c <= 0x0b61))
518 return 1;
520 /* Tamil */
521 if ((c >= 0x0b85 && c <= 0x0b8a)
522 || (c >= 0x0b8e && c <= 0x0b90)
523 || (c >= 0x0b92 && c <= 0x0b95)
524 || (c >= 0x0b99 && c <= 0x0b9a)
525 || (c == 0x0b9c)
526 || (c >= 0x0b9e && c <= 0x0b9f)
527 || (c >= 0x0ba3 && c <= 0x0ba4)
528 || (c >= 0x0ba8 && c <= 0x0baa)
529 || (c >= 0x0bae && c <= 0x0bb5)
530 || (c >= 0x0bb7 && c <= 0x0bb9))
531 return 1;
533 /* Telugu */
534 if ((c >= 0x0c05 && c <= 0x0c0c)
535 || (c >= 0x0c0e && c <= 0x0c10)
536 || (c >= 0x0c12 && c <= 0x0c28)
537 || (c >= 0x0c2a && c <= 0x0c33)
538 || (c >= 0x0c35 && c <= 0x0c39)
539 || (c >= 0x0c60 && c <= 0x0c61))
540 return 1;
542 /* Kannada */
543 if ((c >= 0x0c85 && c <= 0x0c8c)
544 || (c >= 0x0c8e && c <= 0x0c90)
545 || (c >= 0x0c92 && c <= 0x0ca8)
546 || (c >= 0x0caa && c <= 0x0cb3)
547 || (c >= 0x0cb5 && c <= 0x0cb9)
548 || (c >= 0x0ce0 && c <= 0x0ce1))
549 return 1;
551 /* Malayalam */
552 if ((c >= 0x0d05 && c <= 0x0d0c)
553 || (c >= 0x0d0e && c <= 0x0d10)
554 || (c >= 0x0d12 && c <= 0x0d28)
555 || (c >= 0x0d2a && c <= 0x0d39)
556 || (c >= 0x0d60 && c <= 0x0d61))
557 return 1;
559 /* Thai */
560 if ((c >= 0x0e01 && c <= 0x0e30)
561 || (c >= 0x0e32 && c <= 0x0e33)
562 || (c >= 0x0e40 && c <= 0x0e46)
563 || (c >= 0x0e4f && c <= 0x0e5b))
564 return 1;
566 /* Lao */
567 if ((c >= 0x0e81 && c <= 0x0e82)
568 || (c == 0x0e84)
569 || (c == 0x0e87)
570 || (c == 0x0e88)
571 || (c == 0x0e8a)
572 || (c == 0x0e0d)
573 || (c >= 0x0e94 && c <= 0x0e97)
574 || (c >= 0x0e99 && c <= 0x0e9f)
575 || (c >= 0x0ea1 && c <= 0x0ea3)
576 || (c == 0x0ea5)
577 || (c == 0x0ea7)
578 || (c == 0x0eaa)
579 || (c == 0x0eab)
580 || (c >= 0x0ead && c <= 0x0eb0)
581 || (c == 0x0eb2)
582 || (c == 0x0eb3)
583 || (c == 0x0ebd)
584 || (c >= 0x0ec0 && c <= 0x0ec4)
585 || (c == 0x0ec6))
586 return 1;
588 /* Georgian */
589 if ((c >= 0x10a0 && c <= 0x10c5)
590 || (c >= 0x10d0 && c <= 0x10f6))
591 return 1;
593 /* Hiragana */
594 if ((c >= 0x3041 && c <= 0x3094)
595 || (c >= 0x309b && c <= 0x309e))
596 return 1;
598 /* Katakana */
599 if ((c >= 0x30a1 && c <= 0x30fe))
600 return 1;
602 /* Bopmofo */
603 if ((c >= 0x3105 && c <= 0x312c))
604 return 1;
606 /* Hangul */
607 if ((c >= 0x1100 && c <= 0x1159)
608 || (c >= 0x1161 && c <= 0x11a2)
609 || (c >= 0x11a8 && c <= 0x11f9))
610 return 1;
612 /* CJK Unified Ideographs */
613 if ((c >= 0xf900 && c <= 0xfa2d)
614 || (c >= 0xfb1f && c <= 0xfb36)
615 || (c >= 0xfb38 && c <= 0xfb3c)
616 || (c == 0xfb3e)
617 || (c >= 0xfb40 && c <= 0xfb41)
618 || (c >= 0xfb42 && c <= 0xfb44)
619 || (c >= 0xfb46 && c <= 0xfbb1)
620 || (c >= 0xfbd3 && c <= 0xfd3f)
621 || (c >= 0xfd50 && c <= 0xfd8f)
622 || (c >= 0xfd92 && c <= 0xfdc7)
623 || (c >= 0xfdf0 && c <= 0xfdfb)
624 || (c >= 0xfe70 && c <= 0xfe72)
625 || (c == 0xfe74)
626 || (c >= 0xfe76 && c <= 0xfefc)
627 || (c >= 0xff21 && c <= 0xff3a)
628 || (c >= 0xff41 && c <= 0xff5a)
629 || (c >= 0xff66 && c <= 0xffbe)
630 || (c >= 0xffc2 && c <= 0xffc7)
631 || (c >= 0xffca && c <= 0xffcf)
632 || (c >= 0xffd2 && c <= 0xffd7)
633 || (c >= 0xffda && c <= 0xffdc)
634 || (c >= 0x4e00 && c <= 0x9fa5))
635 return 1;
637 error ("universal-character-name '\\u%04x' not valid in identifier", c);
638 return 1;
639 #endif
642 /* Add the UTF-8 representation of C to the token_buffer. */
644 static void
645 utf8_extend_token (c)
646 int c;
648 int shift, mask;
650 if (c <= 0x0000007f)
652 extend_token (c);
653 return;
655 else if (c <= 0x000007ff)
656 shift = 6, mask = 0xc0;
657 else if (c <= 0x0000ffff)
658 shift = 12, mask = 0xe0;
659 else if (c <= 0x001fffff)
660 shift = 18, mask = 0xf0;
661 else if (c <= 0x03ffffff)
662 shift = 24, mask = 0xf8;
663 else
664 shift = 30, mask = 0xfc;
666 extend_token (mask | (c >> shift));
669 shift -= 6;
670 extend_token ((unsigned char) (0x80 | (c >> shift)));
672 while (shift);
674 #endif
677 c_lex (value)
678 tree *value;
680 const cpp_token *tok;
682 retry:
683 timevar_push (TV_CPP);
685 tok = cpp_get_token (parse_in);
686 while (tok->type == CPP_PADDING);
687 timevar_pop (TV_CPP);
689 /* The C++ front end does horrible things with the current line
690 number. To ensure an accurate line number, we must reset it
691 every time we return a token. */
692 lineno = src_lineno;
694 *value = NULL_TREE;
695 switch (tok->type)
697 /* Issue this error here, where we can get at tok->val.c. */
698 case CPP_OTHER:
699 if (ISGRAPH (tok->val.c))
700 error ("stray '%c' in program", tok->val.c);
701 else
702 error ("stray '\\%o' in program", tok->val.c);
703 goto retry;
705 case CPP_NAME:
706 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
707 break;
709 case CPP_NUMBER:
711 unsigned int flags = cpp_classify_number (parse_in, tok);
713 switch (flags & CPP_N_CATEGORY)
715 case CPP_N_INVALID:
716 /* cpplib has issued an error. */
717 *value = error_mark_node;
718 break;
720 case CPP_N_INTEGER:
721 *value = interpret_integer (tok, flags);
722 break;
724 case CPP_N_FLOATING:
725 *value = interpret_float (tok, flags);
726 break;
728 default:
729 abort ();
732 break;
734 case CPP_CHAR:
735 case CPP_WCHAR:
736 *value = lex_charconst (tok);
737 break;
739 case CPP_STRING:
740 case CPP_WSTRING:
741 *value = lex_string (tok->val.str.text, tok->val.str.len,
742 tok->type == CPP_WSTRING);
743 break;
745 /* These tokens should not be visible outside cpplib. */
746 case CPP_HEADER_NAME:
747 case CPP_COMMENT:
748 case CPP_MACRO_ARG:
749 abort ();
751 default: break;
754 return tok->type;
757 /* Returns the narrowest C-visible unsigned type, starting with the
758 minimum specified by FLAGS, that can fit VALUE, or itk_none if
759 there isn't one. */
760 static enum integer_type_kind
761 narrowest_unsigned_type (value, flags)
762 tree value;
763 unsigned int flags;
765 enum integer_type_kind itk;
767 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
768 itk = itk_unsigned_int;
769 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
770 itk = itk_unsigned_long;
771 else
772 itk = itk_unsigned_long_long;
774 /* int_fits_type_p must think the type of its first argument is
775 wider than its second argument, or it won't do the proper check. */
776 TREE_TYPE (value) = widest_unsigned_literal_type_node;
778 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
779 if (int_fits_type_p (value, integer_types[itk]))
780 return itk;
782 return itk_none;
785 /* Ditto, but narrowest signed type. */
786 static enum integer_type_kind
787 narrowest_signed_type (value, flags)
788 tree value;
789 unsigned int flags;
791 enum integer_type_kind itk;
793 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
794 itk = itk_int;
795 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
796 itk = itk_long;
797 else
798 itk = itk_long_long;
800 /* int_fits_type_p must think the type of its first argument is
801 wider than its second argument, or it won't do the proper check. */
802 TREE_TYPE (value) = widest_unsigned_literal_type_node;
804 for (; itk < itk_none; itk += 2 /* skip signed types */)
805 if (int_fits_type_p (value, integer_types[itk]))
806 return itk;
808 return itk_none;
811 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
812 static tree
813 interpret_integer (token, flags)
814 const cpp_token *token;
815 unsigned int flags;
817 tree value, type;
818 enum integer_type_kind itk;
819 cpp_num integer;
820 cpp_options *options = cpp_get_options (parse_in);
822 integer = cpp_interpret_integer (parse_in, token, flags);
823 integer = cpp_num_sign_extend (integer, options->precision);
824 value = build_int_2_wide (integer.low, integer.high);
826 /* The type of a constant with a U suffix is straightforward. */
827 if (flags & CPP_N_UNSIGNED)
828 itk = narrowest_unsigned_type (value, flags);
829 else
831 /* The type of a potentially-signed integer constant varies
832 depending on the base it's in, the standard in use, and the
833 length suffixes. */
834 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
835 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
837 /* In both C89 and C99, octal and hex constants may be signed or
838 unsigned, whichever fits tighter. We do not warn about this
839 choice differing from the traditional choice, as the constant
840 is probably a bit pattern and either way will work. */
841 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
842 itk = MIN (itk_u, itk_s);
843 else
845 /* In C99, decimal constants are always signed.
846 In C89, decimal constants that don't fit in long have
847 undefined behavior; we try to make them unsigned long.
848 In GCC's extended C89, that last is true of decimal
849 constants that don't fit in long long, too. */
851 itk = itk_s;
852 if (itk_s > itk_u && itk_s > itk_long)
854 if (!flag_isoc99)
856 if (itk_u < itk_unsigned_long)
857 itk_u = itk_unsigned_long;
858 itk = itk_u;
859 warning ("this decimal constant is unsigned only in ISO C90");
861 else if (warn_traditional)
862 warning ("this decimal constant would be unsigned in ISO C90");
867 if (itk == itk_none)
868 /* cpplib has already issued a warning for overflow. */
869 type = ((flags & CPP_N_UNSIGNED)
870 ? widest_unsigned_literal_type_node
871 : widest_integer_literal_type_node);
872 else
873 type = integer_types[itk];
875 if (itk > itk_unsigned_long
876 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
877 && ! in_system_header && ! flag_isoc99)
878 pedwarn ("integer constant is too large for \"%s\" type",
879 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
881 TREE_TYPE (value) = type;
883 /* Convert imaginary to a complex type. */
884 if (flags & CPP_N_IMAGINARY)
885 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
887 return value;
890 /* Interpret TOKEN, a floating point number with FLAGS as classified
891 by cpplib. */
892 static tree
893 interpret_float (token, flags)
894 const cpp_token *token;
895 unsigned int flags;
897 tree type;
898 tree value;
899 REAL_VALUE_TYPE real;
900 char *copy;
901 size_t copylen;
902 const char *typename;
904 /* FIXME: make %T work in error/warning, then we don't need typename. */
905 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
907 type = long_double_type_node;
908 typename = "long double";
910 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
911 || flag_single_precision_constant)
913 type = float_type_node;
914 typename = "float";
916 else
918 type = double_type_node;
919 typename = "double";
922 /* Copy the constant to a nul-terminated buffer. If the constant
923 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
924 can't handle them. */
925 copylen = token->val.str.len;
926 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
927 /* Must be an F or L suffix. */
928 copylen--;
929 if (flags & CPP_N_IMAGINARY)
930 /* I or J suffix. */
931 copylen--;
933 copy = alloca (copylen + 1);
934 memcpy (copy, token->val.str.text, copylen);
935 copy[copylen] = '\0';
937 real_from_string (&real, copy);
938 real_convert (&real, TYPE_MODE (type), &real);
940 /* A diagnostic is required for "soft" overflow by some ISO C
941 testsuites. This is not pedwarn, because some people don't want
942 an error for this.
943 ??? That's a dubious reason... is this a mandatory diagnostic or
944 isn't it? -- zw, 2001-08-21. */
945 if (REAL_VALUE_ISINF (real) && pedantic)
946 warning ("floating constant exceeds range of \"%s\"", typename);
948 /* Create a node with determined type and value. */
949 value = build_real (type, real);
950 if (flags & CPP_N_IMAGINARY)
951 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
953 return value;
956 static tree
957 lex_string (str, len, wide)
958 const unsigned char *str;
959 unsigned int len;
960 int wide;
962 tree value;
963 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
964 char *q = buf;
965 const unsigned char *p = str, *limit = str + len;
966 cppchar_t c;
968 #ifdef MULTIBYTE_CHARS
969 /* Reset multibyte conversion state. */
970 (void) local_mbtowc (NULL, NULL, 0);
971 #endif
973 while (p < limit)
975 #ifdef MULTIBYTE_CHARS
976 wchar_t wc;
977 int char_len;
979 char_len = local_mbtowc (&wc, (const char *) p, limit - p);
980 if (char_len == -1)
982 warning ("ignoring invalid multibyte character");
983 char_len = 1;
984 c = *p++;
986 else
988 p += char_len;
989 c = wc;
991 #else
992 c = *p++;
993 #endif
995 if (c == '\\' && !ignore_escape_flag)
996 c = cpp_parse_escape (parse_in, &p, limit, wide);
998 /* Add this single character into the buffer either as a wchar_t,
999 a multibyte sequence, or as a single byte. */
1000 if (wide)
1002 unsigned charwidth = TYPE_PRECISION (char_type_node);
1003 unsigned bytemask = (1 << charwidth) - 1;
1004 int byte;
1006 for (byte = 0; byte < WCHAR_BYTES; ++byte)
1008 int n;
1009 if (byte >= (int) sizeof (c))
1010 n = 0;
1011 else
1012 n = (c >> (byte * charwidth)) & bytemask;
1013 if (BYTES_BIG_ENDIAN)
1014 q[WCHAR_BYTES - byte - 1] = n;
1015 else
1016 q[byte] = n;
1018 q += WCHAR_BYTES;
1020 #ifdef MULTIBYTE_CHARS
1021 else if (char_len > 1)
1023 /* We're dealing with a multibyte character. */
1024 for ( ; char_len >0; --char_len)
1026 *q++ = *(p - char_len);
1029 #endif
1030 else
1032 *q++ = c;
1036 /* Terminate the string value, either with a single byte zero
1037 or with a wide zero. */
1039 if (wide)
1041 memset (q, 0, WCHAR_BYTES);
1042 q += WCHAR_BYTES;
1044 else
1046 *q++ = '\0';
1049 value = build_string (q - buf, buf);
1051 if (wide)
1052 TREE_TYPE (value) = wchar_array_type_node;
1053 else
1054 TREE_TYPE (value) = char_array_type_node;
1055 return value;
1058 /* Converts a (possibly wide) character constant token into a tree. */
1059 static tree
1060 lex_charconst (token)
1061 const cpp_token *token;
1063 cppchar_t result;
1064 tree type, value;
1065 unsigned int chars_seen;
1066 int unsignedp;
1068 result = cpp_interpret_charconst (parse_in, token,
1069 &chars_seen, &unsignedp);
1071 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
1072 before possibly widening to HOST_WIDE_INT for build_int_2. */
1073 if (unsignedp || (cppchar_signed_t) result >= 0)
1074 value = build_int_2 (result, 0);
1075 else
1076 value = build_int_2 ((cppchar_signed_t) result, -1);
1078 if (token->type == CPP_WCHAR)
1079 type = wchar_type_node;
1080 /* In C, a character constant has type 'int'.
1081 In C++ 'char', but multi-char charconsts have type 'int'. */
1082 else if ((c_language == clk_c) || chars_seen > 1)
1083 type = integer_type_node;
1084 else
1085 type = char_type_node;
1087 TREE_TYPE (value) = type;
1088 return value;