* system.h: Poison NO_RECURSIVE_FUNCTION_CSE.
[official-gcc.git] / gcc / c-lex.c
blob3986b2771bba87af8be967f6da04f39fec4c159d
1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
27 #include "real.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "expr.h"
31 #include "input.h"
32 #include "output.h"
33 #include "c-tree.h"
34 #include "c-common.h"
35 #include "flags.h"
36 #include "timevar.h"
37 #include "cpplib.h"
38 #include "c-pragma.h"
39 #include "toplev.h"
40 #include "intl.h"
41 #include "tm_p.h"
42 #include "splay-tree.h"
43 #include "debug.h"
45 /* We may keep statistics about how long which files took to compile. */
46 static int header_time, body_time;
47 static splay_tree file_info_tree;
49 #undef WCHAR_TYPE_SIZE
50 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
52 /* Number of bytes in a wide character. */
53 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
55 int pending_lang_change; /* If we need to switch languages - C++ only */
56 int c_header_level; /* depth in C headers - C++ only */
57 bool c_lex_string_translate = true; /* If we need to translate characters received. */
59 static tree interpret_integer (const cpp_token *, unsigned int);
60 static tree interpret_float (const cpp_token *, unsigned int);
61 static enum integer_type_kind
62 narrowest_unsigned_type (tree, unsigned int);
63 static enum integer_type_kind
64 narrowest_signed_type (tree, unsigned int);
65 static enum cpp_ttype lex_string (const cpp_token *, tree *, bool);
66 static tree lex_charconst (const cpp_token *);
67 static void update_header_times (const char *);
68 static int dump_one_header (splay_tree_node, void *);
69 static void cb_line_change (cpp_reader *, const cpp_token *, int);
70 static void cb_ident (cpp_reader *, unsigned int, const cpp_string *);
71 static void cb_def_pragma (cpp_reader *, unsigned int);
72 static void cb_define (cpp_reader *, unsigned int, cpp_hashnode *);
73 static void cb_undef (cpp_reader *, unsigned int, cpp_hashnode *);
75 void
76 init_c_lex (void)
78 struct cpp_callbacks *cb;
79 struct c_fileinfo *toplevel;
81 /* Set up filename timing. Must happen before cpp_read_main_file. */
82 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
84 (splay_tree_delete_value_fn)free);
85 toplevel = get_fileinfo ("<top level>");
86 if (flag_detailed_statistics)
88 header_time = 0;
89 body_time = get_run_time ();
90 toplevel->time = body_time;
93 cb = cpp_get_callbacks (parse_in);
95 cb->line_change = cb_line_change;
96 cb->ident = cb_ident;
97 cb->def_pragma = cb_def_pragma;
98 cb->valid_pch = c_common_valid_pch;
99 cb->read_pch = c_common_read_pch;
101 /* Set the debug callbacks if we can use them. */
102 if (debug_info_level == DINFO_LEVEL_VERBOSE
103 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
104 || write_symbols == VMS_AND_DWARF2_DEBUG))
106 cb->define = cb_define;
107 cb->undef = cb_undef;
111 struct c_fileinfo *
112 get_fileinfo (const char *name)
114 splay_tree_node n;
115 struct c_fileinfo *fi;
117 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
118 if (n)
119 return (struct c_fileinfo *) n->value;
121 fi = xmalloc (sizeof (struct c_fileinfo));
122 fi->time = 0;
123 fi->interface_only = 0;
124 fi->interface_unknown = 1;
125 splay_tree_insert (file_info_tree, (splay_tree_key) name,
126 (splay_tree_value) fi);
127 return fi;
130 static void
131 update_header_times (const char *name)
133 /* Changing files again. This means currently collected time
134 is charged against header time, and body time starts back at 0. */
135 if (flag_detailed_statistics)
137 int this_time = get_run_time ();
138 struct c_fileinfo *file = get_fileinfo (name);
139 header_time += this_time - body_time;
140 file->time += this_time - body_time;
141 body_time = this_time;
145 static int
146 dump_one_header (splay_tree_node n, void *dummy ATTRIBUTE_UNUSED)
148 print_time ((const char *) n->key,
149 ((struct c_fileinfo *) n->value)->time);
150 return 0;
153 void
154 dump_time_statistics (void)
156 struct c_fileinfo *file = get_fileinfo (input_filename);
157 int this_time = get_run_time ();
158 file->time += this_time - body_time;
160 fprintf (stderr, "\n******\n");
161 print_time ("header files (total)", header_time);
162 print_time ("main file (total)", this_time - body_time);
163 fprintf (stderr, "ratio = %g : 1\n",
164 (double)header_time / (double)(this_time - body_time));
165 fprintf (stderr, "\n******\n");
167 splay_tree_foreach (file_info_tree, dump_one_header, 0);
170 static void
171 cb_ident (cpp_reader *pfile ATTRIBUTE_UNUSED,
172 unsigned int line ATTRIBUTE_UNUSED,
173 const cpp_string *str ATTRIBUTE_UNUSED)
175 #ifdef ASM_OUTPUT_IDENT
176 if (! flag_no_ident)
178 /* Convert escapes in the string. */
179 cpp_string cstr = { 0, 0 };
180 if (cpp_interpret_string (pfile, str, 1, &cstr, false))
182 ASM_OUTPUT_IDENT (asm_out_file, (const char *) cstr.text);
183 free ((void *)cstr.text);
186 #endif
189 /* Called at the start of every non-empty line. TOKEN is the first
190 lexed token on the line. Used for diagnostic line numbers. */
191 static void
192 cb_line_change (cpp_reader *pfile ATTRIBUTE_UNUSED, const cpp_token *token,
193 int parsing_args)
195 if (token->type != CPP_EOF && !parsing_args)
197 source_location loc = token->src_loc;
198 const struct line_map *map = linemap_lookup (&line_table, loc);
199 input_line = SOURCE_LINE (map, loc);
203 void
204 fe_file_change (const struct line_map *new_map)
206 if (new_map == NULL)
207 return;
209 if (new_map->reason == LC_ENTER)
211 /* Don't stack the main buffer on the input stack;
212 we already did in compile_file. */
213 if (! MAIN_FILE_P (new_map))
215 int included_at = LAST_SOURCE_LINE (new_map - 1);
217 input_line = included_at;
218 push_srcloc (new_map->to_file, 1);
219 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
220 #ifndef NO_IMPLICIT_EXTERN_C
221 if (c_header_level)
222 ++c_header_level;
223 else if (new_map->sysp == 2)
225 c_header_level = 1;
226 ++pending_lang_change;
228 #endif
231 else if (new_map->reason == LC_LEAVE)
233 #ifndef NO_IMPLICIT_EXTERN_C
234 if (c_header_level && --c_header_level == 0)
236 if (new_map->sysp == 2)
237 warning ("badly nested C headers from preprocessor");
238 --pending_lang_change;
240 #endif
241 pop_srcloc ();
243 (*debug_hooks->end_source_file) (new_map->to_line);
246 update_header_times (new_map->to_file);
247 in_system_header = new_map->sysp != 0;
248 input_filename = new_map->to_file;
249 input_line = new_map->to_line;
251 /* Hook for C++. */
252 extract_interface_info ();
255 static void
256 cb_def_pragma (cpp_reader *pfile, source_location loc)
258 /* Issue a warning message if we have been asked to do so. Ignore
259 unknown pragmas in system headers unless an explicit
260 -Wunknown-pragmas has been given. */
261 if (warn_unknown_pragmas > in_system_header)
263 const struct line_map *map = linemap_lookup (&line_table, loc);
264 const unsigned char *space, *name;
265 const cpp_token *s;
267 space = name = (const unsigned char *) "";
268 s = cpp_get_token (pfile);
269 if (s->type != CPP_EOF)
271 space = cpp_token_as_text (pfile, s);
272 s = cpp_get_token (pfile);
273 if (s->type == CPP_NAME)
274 name = cpp_token_as_text (pfile, s);
277 input_line = SOURCE_LINE (map, loc);
278 warning ("ignoring #pragma %s %s", space, name);
282 /* #define callback for DWARF and DWARF2 debug info. */
283 static void
284 cb_define (cpp_reader *pfile, source_location loc, cpp_hashnode *node)
286 const struct line_map *map = linemap_lookup (&line_table, loc);
287 (*debug_hooks->define) (SOURCE_LINE (map, loc),
288 (const char *) cpp_macro_definition (pfile, node));
291 /* #undef callback for DWARF and DWARF2 debug info. */
292 static void
293 cb_undef (cpp_reader *pfile ATTRIBUTE_UNUSED, source_location loc,
294 cpp_hashnode *node)
296 const struct line_map *map = linemap_lookup (&line_table, loc);
297 (*debug_hooks->undef) (SOURCE_LINE (map, loc),
298 (const char *) NODE_NAME (node));
301 static inline const cpp_token *
302 get_nonpadding_token (void)
304 const cpp_token *tok;
305 timevar_push (TV_CPP);
307 tok = cpp_get_token (parse_in);
308 while (tok->type == CPP_PADDING);
309 timevar_pop (TV_CPP);
311 return tok;
315 c_lex_with_flags (tree *value, unsigned char *cpp_flags)
317 const cpp_token *tok;
318 location_t atloc;
319 static bool no_more_pch;
321 retry:
322 tok = get_nonpadding_token ();
324 retry_after_at:
325 switch (tok->type)
327 case CPP_NAME:
328 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
329 break;
331 case CPP_NUMBER:
333 unsigned int flags = cpp_classify_number (parse_in, tok);
335 switch (flags & CPP_N_CATEGORY)
337 case CPP_N_INVALID:
338 /* cpplib has issued an error. */
339 *value = error_mark_node;
340 break;
342 case CPP_N_INTEGER:
343 *value = interpret_integer (tok, flags);
344 break;
346 case CPP_N_FLOATING:
347 *value = interpret_float (tok, flags);
348 break;
350 default:
351 abort ();
354 break;
356 case CPP_ATSIGN:
357 /* An @ may give the next token special significance in Objective-C. */
358 atloc = input_location;
359 tok = get_nonpadding_token ();
360 if (c_dialect_objc ())
362 tree val;
363 switch (tok->type)
365 case CPP_NAME:
366 val = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
367 if (C_IS_RESERVED_WORD (val)
368 && OBJC_IS_AT_KEYWORD (C_RID_CODE (val)))
370 *value = val;
371 return CPP_AT_NAME;
373 break;
375 case CPP_STRING:
376 case CPP_WSTRING:
377 return lex_string (tok, value, true);
379 default: break;
383 /* ... or not. */
384 error ("%Hstray '@' in program", &atloc);
385 goto retry_after_at;
387 case CPP_OTHER:
389 cppchar_t c = tok->val.str.text[0];
391 if (c == '"' || c == '\'')
392 error ("missing terminating %c character", (int) c);
393 else if (ISGRAPH (c))
394 error ("stray '%c' in program", (int) c);
395 else
396 error ("stray '\\%o' in program", (int) c);
398 goto retry;
400 case CPP_CHAR:
401 case CPP_WCHAR:
402 *value = lex_charconst (tok);
403 break;
405 case CPP_STRING:
406 case CPP_WSTRING:
407 return lex_string (tok, value, false);
408 break;
410 /* These tokens should not be visible outside cpplib. */
411 case CPP_HEADER_NAME:
412 case CPP_COMMENT:
413 case CPP_MACRO_ARG:
414 abort ();
416 default:
417 *value = NULL_TREE;
418 break;
421 if (! no_more_pch)
423 no_more_pch = true;
424 c_common_no_more_pch ();
427 if (cpp_flags)
428 *cpp_flags = tok->flags;
429 return tok->type;
433 c_lex (tree *value)
435 return c_lex_with_flags (value, NULL);
438 /* Returns the narrowest C-visible unsigned type, starting with the
439 minimum specified by FLAGS, that can fit VALUE, or itk_none if
440 there isn't one. */
441 static enum integer_type_kind
442 narrowest_unsigned_type (tree value, unsigned int flags)
444 enum integer_type_kind itk;
446 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
447 itk = itk_unsigned_int;
448 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
449 itk = itk_unsigned_long;
450 else
451 itk = itk_unsigned_long_long;
453 /* int_fits_type_p must think the type of its first argument is
454 wider than its second argument, or it won't do the proper check. */
455 TREE_TYPE (value) = widest_unsigned_literal_type_node;
457 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
458 if (int_fits_type_p (value, integer_types[itk]))
459 return itk;
461 return itk_none;
464 /* Ditto, but narrowest signed type. */
465 static enum integer_type_kind
466 narrowest_signed_type (tree value, unsigned int flags)
468 enum integer_type_kind itk;
470 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
471 itk = itk_int;
472 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
473 itk = itk_long;
474 else
475 itk = itk_long_long;
477 /* int_fits_type_p must think the type of its first argument is
478 wider than its second argument, or it won't do the proper check. */
479 TREE_TYPE (value) = widest_unsigned_literal_type_node;
481 for (; itk < itk_none; itk += 2 /* skip signed types */)
482 if (int_fits_type_p (value, integer_types[itk]))
483 return itk;
485 return itk_none;
488 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
489 static tree
490 interpret_integer (const cpp_token *token, unsigned int flags)
492 tree value, type;
493 enum integer_type_kind itk;
494 cpp_num integer;
495 cpp_options *options = cpp_get_options (parse_in);
497 integer = cpp_interpret_integer (parse_in, token, flags);
498 integer = cpp_num_sign_extend (integer, options->precision);
499 value = build_int_2_wide (integer.low, integer.high);
501 /* The type of a constant with a U suffix is straightforward. */
502 if (flags & CPP_N_UNSIGNED)
503 itk = narrowest_unsigned_type (value, flags);
504 else
506 /* The type of a potentially-signed integer constant varies
507 depending on the base it's in, the standard in use, and the
508 length suffixes. */
509 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
510 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
512 /* In both C89 and C99, octal and hex constants may be signed or
513 unsigned, whichever fits tighter. We do not warn about this
514 choice differing from the traditional choice, as the constant
515 is probably a bit pattern and either way will work. */
516 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
517 itk = MIN (itk_u, itk_s);
518 else
520 /* In C99, decimal constants are always signed.
521 In C89, decimal constants that don't fit in long have
522 undefined behavior; we try to make them unsigned long.
523 In GCC's extended C89, that last is true of decimal
524 constants that don't fit in long long, too. */
526 itk = itk_s;
527 if (itk_s > itk_u && itk_s > itk_long)
529 if (!flag_isoc99)
531 if (itk_u < itk_unsigned_long)
532 itk_u = itk_unsigned_long;
533 itk = itk_u;
534 warning ("this decimal constant is unsigned only in ISO C90");
536 else if (warn_traditional)
537 warning ("this decimal constant would be unsigned in ISO C90");
542 if (itk == itk_none)
543 /* cpplib has already issued a warning for overflow. */
544 type = ((flags & CPP_N_UNSIGNED)
545 ? widest_unsigned_literal_type_node
546 : widest_integer_literal_type_node);
547 else
548 type = integer_types[itk];
550 if (itk > itk_unsigned_long
551 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
552 && ! in_system_header && ! flag_isoc99)
553 pedwarn ("integer constant is too large for \"%s\" type",
554 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
556 TREE_TYPE (value) = type;
558 /* Convert imaginary to a complex type. */
559 if (flags & CPP_N_IMAGINARY)
560 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
562 return value;
565 /* Interpret TOKEN, a floating point number with FLAGS as classified
566 by cpplib. */
567 static tree
568 interpret_float (const cpp_token *token, unsigned int flags)
570 tree type;
571 tree value;
572 REAL_VALUE_TYPE real;
573 char *copy;
574 size_t copylen;
575 const char *typename;
577 /* FIXME: make %T work in error/warning, then we don't need typename. */
578 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
580 type = long_double_type_node;
581 typename = "long double";
583 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
584 || flag_single_precision_constant)
586 type = float_type_node;
587 typename = "float";
589 else
591 type = double_type_node;
592 typename = "double";
595 /* Copy the constant to a nul-terminated buffer. If the constant
596 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
597 can't handle them. */
598 copylen = token->val.str.len;
599 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
600 /* Must be an F or L suffix. */
601 copylen--;
602 if (flags & CPP_N_IMAGINARY)
603 /* I or J suffix. */
604 copylen--;
606 copy = alloca (copylen + 1);
607 memcpy (copy, token->val.str.text, copylen);
608 copy[copylen] = '\0';
610 real_from_string (&real, copy);
611 real_convert (&real, TYPE_MODE (type), &real);
613 /* A diagnostic is required for "soft" overflow by some ISO C
614 testsuites. This is not pedwarn, because some people don't want
615 an error for this.
616 ??? That's a dubious reason... is this a mandatory diagnostic or
617 isn't it? -- zw, 2001-08-21. */
618 if (REAL_VALUE_ISINF (real) && pedantic)
619 warning ("floating constant exceeds range of \"%s\"", typename);
621 /* Create a node with determined type and value. */
622 value = build_real (type, real);
623 if (flags & CPP_N_IMAGINARY)
624 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
626 return value;
629 /* Convert a series of STRING and/or WSTRING tokens into a tree,
630 performing string constant concatenation. TOK is the first of
631 these. VALP is the location to write the string into. OBJC_STRING
632 indicates whether an '@' token preceded the incoming token.
633 Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
634 or CPP_OBJC_STRING).
636 This is unfortunately more work than it should be. If any of the
637 strings in the series has an L prefix, the result is a wide string
638 (6.4.5p4). Whether or not the result is a wide string affects the
639 meaning of octal and hexadecimal escapes (6.4.4.4p6,9). But escape
640 sequences do not continue across the boundary between two strings in
641 a series (6.4.5p7), so we must not lose the boundaries. Therefore
642 cpp_interpret_string takes a vector of cpp_string structures, which
643 we must arrange to provide. */
645 static enum cpp_ttype
646 lex_string (const cpp_token *tok, tree *valp, bool objc_string)
648 tree value;
649 bool wide = false;
650 size_t count = 1;
651 struct obstack str_ob;
652 cpp_string istr;
654 /* Try to avoid the overhead of creating and destroying an obstack
655 for the common case of just one string. */
656 cpp_string str = tok->val.str;
657 cpp_string *strs = &str;
659 if (tok->type == CPP_WSTRING)
660 wide = true;
662 tok = get_nonpadding_token ();
663 if (c_dialect_objc () && tok->type == CPP_ATSIGN)
665 objc_string = true;
666 tok = get_nonpadding_token ();
668 if (tok->type == CPP_STRING || tok->type == CPP_WSTRING)
670 gcc_obstack_init (&str_ob);
671 obstack_grow (&str_ob, &str, sizeof (cpp_string));
675 count++;
676 if (tok->type == CPP_WSTRING)
677 wide = true;
678 obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string));
680 tok = get_nonpadding_token ();
681 if (c_dialect_objc () && tok->type == CPP_ATSIGN)
683 objc_string = true;
684 tok = get_nonpadding_token ();
687 while (tok->type == CPP_STRING || tok->type == CPP_WSTRING);
688 strs = obstack_finish (&str_ob);
691 /* We have read one more token than we want. */
692 _cpp_backup_tokens (parse_in, 1);
694 if (count > 1 && !objc_string && warn_traditional && !in_system_header)
695 warning ("traditional C rejects string constant concatenation");
697 if ((c_lex_string_translate
698 ? cpp_interpret_string : cpp_interpret_string_notranslate)
699 (parse_in, strs, count, &istr, wide))
701 value = build_string (istr.len, (char *)istr.text);
702 free ((void *)istr.text);
704 else
706 /* Callers cannot generally handle error_mark_node in this context,
707 so return the empty string instead. cpp_interpret_string has
708 issued an error. */
709 if (wide)
710 value = build_string (TYPE_PRECISION (wchar_type_node)
711 / TYPE_PRECISION (char_type_node),
712 "\0\0\0"); /* widest supported wchar_t
713 is 32 bits */
714 else
715 value = build_string (1, "");
718 TREE_TYPE (value) = wide ? wchar_array_type_node : char_array_type_node;
719 *valp = fix_string_type (value);
721 if (strs != &str)
722 obstack_free (&str_ob, 0);
724 return objc_string ? CPP_OBJC_STRING : wide ? CPP_WSTRING : CPP_STRING;
727 /* Converts a (possibly wide) character constant token into a tree. */
728 static tree
729 lex_charconst (const cpp_token *token)
731 cppchar_t result;
732 tree type, value;
733 unsigned int chars_seen;
734 int unsignedp;
736 result = cpp_interpret_charconst (parse_in, token,
737 &chars_seen, &unsignedp);
739 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
740 before possibly widening to HOST_WIDE_INT for build_int_2. */
741 if (unsignedp || (cppchar_signed_t) result >= 0)
742 value = build_int_2 (result, 0);
743 else
744 value = build_int_2 ((cppchar_signed_t) result, -1);
746 if (token->type == CPP_WCHAR)
747 type = wchar_type_node;
748 /* In C, a character constant has type 'int'.
749 In C++ 'char', but multi-char charconsts have type 'int'. */
750 else if (!c_dialect_cxx () || chars_seen > 1)
751 type = integer_type_node;
752 else
753 type = char_type_node;
755 TREE_TYPE (value) = type;
756 return value;