* c-ppoutput.c (cb_line_change): Revert 2003-08-04's change.
[official-gcc.git] / gcc / c-lex.c
blob2109eff1da93e5ac9230946914625c8e37542838
1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
27 #include "real.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "expr.h"
31 #include "input.h"
32 #include "output.h"
33 #include "c-tree.h"
34 #include "c-common.h"
35 #include "flags.h"
36 #include "timevar.h"
37 #include "cpplib.h"
38 #include "c-pragma.h"
39 #include "toplev.h"
40 #include "intl.h"
41 #include "tm_p.h"
42 #include "splay-tree.h"
43 #include "debug.h"
45 /* The current line map. */
46 static const struct line_map *map;
48 /* The line used to refresh the lineno global variable after each token. */
49 static unsigned int src_lineno;
51 /* We may keep statistics about how long which files took to compile. */
52 static int header_time, body_time;
53 static splay_tree file_info_tree;
55 #undef WCHAR_TYPE_SIZE
56 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
58 /* Number of bytes in a wide character. */
59 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
61 int pending_lang_change; /* If we need to switch languages - C++ only */
62 int c_header_level; /* depth in C headers - C++ only */
64 static tree interpret_integer (const cpp_token *, unsigned int);
65 static tree interpret_float (const cpp_token *, unsigned int);
66 static enum integer_type_kind
67 narrowest_unsigned_type (tree, unsigned int);
68 static enum integer_type_kind
69 narrowest_signed_type (tree, unsigned int);
70 static enum cpp_ttype lex_string (const cpp_token *, tree *, bool);
71 static tree lex_charconst (const cpp_token *);
72 static void update_header_times (const char *);
73 static int dump_one_header (splay_tree_node, void *);
74 static void cb_line_change (cpp_reader *, const cpp_token *, int);
75 static void cb_dir_change (cpp_reader *, const char *);
76 static void cb_ident (cpp_reader *, unsigned int, const cpp_string *);
77 static void cb_def_pragma (cpp_reader *, unsigned int);
78 static void cb_define (cpp_reader *, unsigned int, cpp_hashnode *);
79 static void cb_undef (cpp_reader *, unsigned int, cpp_hashnode *);
81 void
82 init_c_lex (void)
84 struct cpp_callbacks *cb;
85 struct c_fileinfo *toplevel;
87 /* Set up filename timing. Must happen before cpp_read_main_file. */
88 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
90 (splay_tree_delete_value_fn)free);
91 toplevel = get_fileinfo ("<top level>");
92 if (flag_detailed_statistics)
94 header_time = 0;
95 body_time = get_run_time ();
96 toplevel->time = body_time;
99 cb = cpp_get_callbacks (parse_in);
101 cb->line_change = cb_line_change;
102 cb->dir_change = cb_dir_change;
103 cb->ident = cb_ident;
104 cb->def_pragma = cb_def_pragma;
105 cb->valid_pch = c_common_valid_pch;
106 cb->read_pch = c_common_read_pch;
108 /* Set the debug callbacks if we can use them. */
109 if (debug_info_level == DINFO_LEVEL_VERBOSE
110 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
111 || write_symbols == VMS_AND_DWARF2_DEBUG))
113 cb->define = cb_define;
114 cb->undef = cb_undef;
118 struct c_fileinfo *
119 get_fileinfo (const char *name)
121 splay_tree_node n;
122 struct c_fileinfo *fi;
124 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
125 if (n)
126 return (struct c_fileinfo *) n->value;
128 fi = xmalloc (sizeof (struct c_fileinfo));
129 fi->time = 0;
130 fi->interface_only = 0;
131 fi->interface_unknown = 1;
132 splay_tree_insert (file_info_tree, (splay_tree_key) name,
133 (splay_tree_value) fi);
134 return fi;
137 static void
138 update_header_times (const char *name)
140 /* Changing files again. This means currently collected time
141 is charged against header time, and body time starts back at 0. */
142 if (flag_detailed_statistics)
144 int this_time = get_run_time ();
145 struct c_fileinfo *file = get_fileinfo (name);
146 header_time += this_time - body_time;
147 file->time += this_time - body_time;
148 body_time = this_time;
152 static int
153 dump_one_header (splay_tree_node n, void *dummy ATTRIBUTE_UNUSED)
155 print_time ((const char *) n->key,
156 ((struct c_fileinfo *) n->value)->time);
157 return 0;
160 void
161 dump_time_statistics (void)
163 struct c_fileinfo *file = get_fileinfo (input_filename);
164 int this_time = get_run_time ();
165 file->time += this_time - body_time;
167 fprintf (stderr, "\n******\n");
168 print_time ("header files (total)", header_time);
169 print_time ("main file (total)", this_time - body_time);
170 fprintf (stderr, "ratio = %g : 1\n",
171 (double)header_time / (double)(this_time - body_time));
172 fprintf (stderr, "\n******\n");
174 splay_tree_foreach (file_info_tree, dump_one_header, 0);
177 static void
178 cb_ident (cpp_reader *pfile ATTRIBUTE_UNUSED,
179 unsigned int line ATTRIBUTE_UNUSED,
180 const cpp_string *str ATTRIBUTE_UNUSED)
182 #ifdef ASM_OUTPUT_IDENT
183 if (! flag_no_ident)
185 /* Convert escapes in the string. */
186 cpp_string cstr = { 0, 0 };
187 if (cpp_interpret_string (pfile, str, 1, &cstr, false))
189 ASM_OUTPUT_IDENT (asm_out_file, (const char *) cstr.text);
190 free ((void *)cstr.text);
193 #endif
196 /* Called at the start of every non-empty line. TOKEN is the first
197 lexed token on the line. Used for diagnostic line numbers. */
198 static void
199 cb_line_change (cpp_reader *pfile ATTRIBUTE_UNUSED, const cpp_token *token,
200 int parsing_args)
202 if (token->type == CPP_EOF || parsing_args)
203 return;
205 src_lineno = SOURCE_LINE (map, token->line);
208 static void
209 cb_dir_change (cpp_reader *pfile ATTRIBUTE_UNUSED, const char *dir)
211 if (! set_src_pwd (dir))
212 warning ("too late for # directive to set debug directory");
215 void
216 fe_file_change (const struct line_map *new_map)
218 if (new_map->reason == LC_ENTER)
220 /* Don't stack the main buffer on the input stack;
221 we already did in compile_file. */
222 if (map == NULL)
223 main_input_filename = new_map->to_file;
224 else
226 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
228 input_line = included_at;
229 push_srcloc (new_map->to_file, 1);
230 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
231 #ifndef NO_IMPLICIT_EXTERN_C
232 if (c_header_level)
233 ++c_header_level;
234 else if (new_map->sysp == 2)
236 c_header_level = 1;
237 ++pending_lang_change;
239 #endif
242 else if (new_map->reason == LC_LEAVE)
244 #ifndef NO_IMPLICIT_EXTERN_C
245 if (c_header_level && --c_header_level == 0)
247 if (new_map->sysp == 2)
248 warning ("badly nested C headers from preprocessor");
249 --pending_lang_change;
251 #endif
252 pop_srcloc ();
254 (*debug_hooks->end_source_file) (new_map->to_line);
257 update_header_times (new_map->to_file);
258 in_system_header = new_map->sysp != 0;
259 input_filename = new_map->to_file;
260 input_line = new_map->to_line;
261 map = new_map;
263 /* Hook for C++. */
264 extract_interface_info ();
267 static void
268 cb_def_pragma (cpp_reader *pfile, unsigned int line)
270 /* Issue a warning message if we have been asked to do so. Ignore
271 unknown pragmas in system headers unless an explicit
272 -Wunknown-pragmas has been given. */
273 if (warn_unknown_pragmas > in_system_header)
275 const unsigned char *space, *name;
276 const cpp_token *s;
278 space = name = (const unsigned char *) "";
279 s = cpp_get_token (pfile);
280 if (s->type != CPP_EOF)
282 space = cpp_token_as_text (pfile, s);
283 s = cpp_get_token (pfile);
284 if (s->type == CPP_NAME)
285 name = cpp_token_as_text (pfile, s);
288 input_line = SOURCE_LINE (map, line);
289 warning ("ignoring #pragma %s %s", space, name);
293 /* #define callback for DWARF and DWARF2 debug info. */
294 static void
295 cb_define (cpp_reader *pfile, unsigned int line, cpp_hashnode *node)
297 (*debug_hooks->define) (SOURCE_LINE (map, line),
298 (const char *) cpp_macro_definition (pfile, node));
301 /* #undef callback for DWARF and DWARF2 debug info. */
302 static void
303 cb_undef (cpp_reader *pfile ATTRIBUTE_UNUSED, unsigned int line,
304 cpp_hashnode *node)
306 (*debug_hooks->undef) (SOURCE_LINE (map, line),
307 (const char *) NODE_NAME (node));
310 static inline const cpp_token *
311 get_nonpadding_token (void)
313 const cpp_token *tok;
314 timevar_push (TV_CPP);
316 tok = cpp_get_token (parse_in);
317 while (tok->type == CPP_PADDING);
318 timevar_pop (TV_CPP);
320 /* The C++ front end does horrible things with the current line
321 number. To ensure an accurate line number, we must reset it
322 every time we advance a token. */
323 input_line = src_lineno;
325 return tok;
329 c_lex (tree *value)
331 const cpp_token *tok;
332 location_t atloc;
333 static bool no_more_pch;
335 retry:
336 tok = get_nonpadding_token ();
338 retry_after_at:
339 switch (tok->type)
341 case CPP_NAME:
342 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
343 break;
345 case CPP_NUMBER:
347 unsigned int flags = cpp_classify_number (parse_in, tok);
349 switch (flags & CPP_N_CATEGORY)
351 case CPP_N_INVALID:
352 /* cpplib has issued an error. */
353 *value = error_mark_node;
354 break;
356 case CPP_N_INTEGER:
357 *value = interpret_integer (tok, flags);
358 break;
360 case CPP_N_FLOATING:
361 *value = interpret_float (tok, flags);
362 break;
364 default:
365 abort ();
368 break;
370 case CPP_ATSIGN:
371 /* An @ may give the next token special significance in Objective-C. */
372 atloc = input_location;
373 tok = get_nonpadding_token ();
374 if (c_dialect_objc ())
376 tree val;
377 switch (tok->type)
379 case CPP_NAME:
380 val = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
381 if (C_IS_RESERVED_WORD (val)
382 && OBJC_IS_AT_KEYWORD (C_RID_CODE (val)))
384 *value = val;
385 return CPP_AT_NAME;
387 break;
389 case CPP_STRING:
390 case CPP_WSTRING:
391 return lex_string (tok, value, true);
393 default: break;
397 /* ... or not. */
398 error ("%Hstray '@' in program", &atloc);
399 goto retry_after_at;
401 case CPP_OTHER:
403 cppchar_t c = tok->val.str.text[0];
405 if (c == '"' || c == '\'')
406 error ("missing terminating %c character", (int) c);
407 else if (ISGRAPH (c))
408 error ("stray '%c' in program", (int) c);
409 else
410 error ("stray '\\%o' in program", (int) c);
412 goto retry;
414 case CPP_CHAR:
415 case CPP_WCHAR:
416 *value = lex_charconst (tok);
417 break;
419 case CPP_STRING:
420 case CPP_WSTRING:
421 return lex_string (tok, value, false);
422 break;
424 /* These tokens should not be visible outside cpplib. */
425 case CPP_HEADER_NAME:
426 case CPP_COMMENT:
427 case CPP_MACRO_ARG:
428 abort ();
430 default:
431 *value = NULL_TREE;
432 break;
435 if (! no_more_pch)
437 no_more_pch = true;
438 c_common_no_more_pch ();
441 return tok->type;
444 /* Returns the narrowest C-visible unsigned type, starting with the
445 minimum specified by FLAGS, that can fit VALUE, or itk_none if
446 there isn't one. */
447 static enum integer_type_kind
448 narrowest_unsigned_type (tree value, unsigned int flags)
450 enum integer_type_kind itk;
452 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
453 itk = itk_unsigned_int;
454 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
455 itk = itk_unsigned_long;
456 else
457 itk = itk_unsigned_long_long;
459 /* int_fits_type_p must think the type of its first argument is
460 wider than its second argument, or it won't do the proper check. */
461 TREE_TYPE (value) = widest_unsigned_literal_type_node;
463 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
464 if (int_fits_type_p (value, integer_types[itk]))
465 return itk;
467 return itk_none;
470 /* Ditto, but narrowest signed type. */
471 static enum integer_type_kind
472 narrowest_signed_type (tree value, unsigned int flags)
474 enum integer_type_kind itk;
476 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
477 itk = itk_int;
478 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
479 itk = itk_long;
480 else
481 itk = itk_long_long;
483 /* int_fits_type_p must think the type of its first argument is
484 wider than its second argument, or it won't do the proper check. */
485 TREE_TYPE (value) = widest_unsigned_literal_type_node;
487 for (; itk < itk_none; itk += 2 /* skip signed types */)
488 if (int_fits_type_p (value, integer_types[itk]))
489 return itk;
491 return itk_none;
494 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
495 static tree
496 interpret_integer (const cpp_token *token, unsigned int flags)
498 tree value, type;
499 enum integer_type_kind itk;
500 cpp_num integer;
501 cpp_options *options = cpp_get_options (parse_in);
503 integer = cpp_interpret_integer (parse_in, token, flags);
504 integer = cpp_num_sign_extend (integer, options->precision);
505 value = build_int_2_wide (integer.low, integer.high);
507 /* The type of a constant with a U suffix is straightforward. */
508 if (flags & CPP_N_UNSIGNED)
509 itk = narrowest_unsigned_type (value, flags);
510 else
512 /* The type of a potentially-signed integer constant varies
513 depending on the base it's in, the standard in use, and the
514 length suffixes. */
515 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
516 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
518 /* In both C89 and C99, octal and hex constants may be signed or
519 unsigned, whichever fits tighter. We do not warn about this
520 choice differing from the traditional choice, as the constant
521 is probably a bit pattern and either way will work. */
522 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
523 itk = MIN (itk_u, itk_s);
524 else
526 /* In C99, decimal constants are always signed.
527 In C89, decimal constants that don't fit in long have
528 undefined behavior; we try to make them unsigned long.
529 In GCC's extended C89, that last is true of decimal
530 constants that don't fit in long long, too. */
532 itk = itk_s;
533 if (itk_s > itk_u && itk_s > itk_long)
535 if (!flag_isoc99)
537 if (itk_u < itk_unsigned_long)
538 itk_u = itk_unsigned_long;
539 itk = itk_u;
540 warning ("this decimal constant is unsigned only in ISO C90");
542 else if (warn_traditional)
543 warning ("this decimal constant would be unsigned in ISO C90");
548 if (itk == itk_none)
549 /* cpplib has already issued a warning for overflow. */
550 type = ((flags & CPP_N_UNSIGNED)
551 ? widest_unsigned_literal_type_node
552 : widest_integer_literal_type_node);
553 else
554 type = integer_types[itk];
556 if (itk > itk_unsigned_long
557 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
558 && ! in_system_header && ! flag_isoc99)
559 pedwarn ("integer constant is too large for \"%s\" type",
560 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
562 TREE_TYPE (value) = type;
564 /* Convert imaginary to a complex type. */
565 if (flags & CPP_N_IMAGINARY)
566 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
568 return value;
571 /* Interpret TOKEN, a floating point number with FLAGS as classified
572 by cpplib. */
573 static tree
574 interpret_float (const cpp_token *token, unsigned int flags)
576 tree type;
577 tree value;
578 REAL_VALUE_TYPE real;
579 char *copy;
580 size_t copylen;
581 const char *typename;
583 /* FIXME: make %T work in error/warning, then we don't need typename. */
584 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
586 type = long_double_type_node;
587 typename = "long double";
589 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
590 || flag_single_precision_constant)
592 type = float_type_node;
593 typename = "float";
595 else
597 type = double_type_node;
598 typename = "double";
601 /* Copy the constant to a nul-terminated buffer. If the constant
602 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
603 can't handle them. */
604 copylen = token->val.str.len;
605 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
606 /* Must be an F or L suffix. */
607 copylen--;
608 if (flags & CPP_N_IMAGINARY)
609 /* I or J suffix. */
610 copylen--;
612 copy = alloca (copylen + 1);
613 memcpy (copy, token->val.str.text, copylen);
614 copy[copylen] = '\0';
616 real_from_string (&real, copy);
617 real_convert (&real, TYPE_MODE (type), &real);
619 /* A diagnostic is required for "soft" overflow by some ISO C
620 testsuites. This is not pedwarn, because some people don't want
621 an error for this.
622 ??? That's a dubious reason... is this a mandatory diagnostic or
623 isn't it? -- zw, 2001-08-21. */
624 if (REAL_VALUE_ISINF (real) && pedantic)
625 warning ("floating constant exceeds range of \"%s\"", typename);
627 /* Create a node with determined type and value. */
628 value = build_real (type, real);
629 if (flags & CPP_N_IMAGINARY)
630 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
632 return value;
635 /* Convert a series of STRING and/or WSTRING tokens into a tree,
636 performing string constant concatenation. TOK is the first of
637 these. VALP is the location to write the string into. OBJC_STRING
638 indicates whether an '@' token preceded the incoming token.
639 Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
640 or CPP_OBJC_STRING).
642 This is unfortunately more work than it should be. If any of the
643 strings in the series has an L prefix, the result is a wide string
644 (6.4.5p4). Whether or not the result is a wide string affects the
645 meaning of octal and hexadecimal escapes (6.4.4.4p6,9). But escape
646 sequences do not continue across the boundary between two strings in
647 a series (6.4.5p7), so we must not lose the boundaries. Therefore
648 cpp_interpret_string takes a vector of cpp_string structures, which
649 we must arrange to provide. */
651 static enum cpp_ttype
652 lex_string (const cpp_token *tok, tree *valp, bool objc_string)
654 tree value;
655 bool wide = false;
656 size_t count = 1;
657 struct obstack str_ob;
658 cpp_string istr;
660 /* Try to avoid the overhead of creating and destroying an obstack
661 for the common case of just one string. */
662 cpp_string str = tok->val.str;
663 cpp_string *strs = &str;
665 if (tok->type == CPP_WSTRING)
666 wide = true;
668 tok = get_nonpadding_token ();
669 if (c_dialect_objc () && tok->type == CPP_ATSIGN)
671 objc_string = true;
672 tok = get_nonpadding_token ();
674 if (tok->type == CPP_STRING || tok->type == CPP_WSTRING)
676 gcc_obstack_init (&str_ob);
677 obstack_grow (&str_ob, &str, sizeof (cpp_string));
681 count++;
682 if (tok->type == CPP_WSTRING)
683 wide = true;
684 obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string));
686 tok = get_nonpadding_token ();
687 if (c_dialect_objc () && tok->type == CPP_ATSIGN)
689 objc_string = true;
690 tok = get_nonpadding_token ();
693 while (tok->type == CPP_STRING || tok->type == CPP_WSTRING);
694 strs = obstack_finish (&str_ob);
697 /* We have read one more token than we want. */
698 _cpp_backup_tokens (parse_in, 1);
700 if (count > 1 && !objc_string && warn_traditional && !in_system_header)
701 warning ("traditional C rejects string constant concatenation");
703 if (cpp_interpret_string (parse_in, strs, count, &istr, wide))
705 value = build_string (istr.len, (char *)istr.text);
706 free ((void *)istr.text);
708 else
710 /* Callers cannot generally handle error_mark_node in this context,
711 so return the empty string instead. cpp_interpret_string has
712 issued an error. */
713 if (wide)
714 value = build_string (TYPE_PRECISION (wchar_type_node)
715 / TYPE_PRECISION (char_type_node),
716 "\0\0\0"); /* widest supported wchar_t
717 is 32 bits */
718 else
719 value = build_string (1, "");
722 TREE_TYPE (value) = wide ? wchar_array_type_node : char_array_type_node;
723 *valp = fix_string_type (value);
725 if (strs != &str)
726 obstack_free (&str_ob, 0);
728 return objc_string ? CPP_OBJC_STRING : wide ? CPP_WSTRING : CPP_STRING;
731 /* Converts a (possibly wide) character constant token into a tree. */
732 static tree
733 lex_charconst (const cpp_token *token)
735 cppchar_t result;
736 tree type, value;
737 unsigned int chars_seen;
738 int unsignedp;
740 result = cpp_interpret_charconst (parse_in, token,
741 &chars_seen, &unsignedp);
743 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
744 before possibly widening to HOST_WIDE_INT for build_int_2. */
745 if (unsignedp || (cppchar_signed_t) result >= 0)
746 value = build_int_2 (result, 0);
747 else
748 value = build_int_2 ((cppchar_signed_t) result, -1);
750 if (token->type == CPP_WCHAR)
751 type = wchar_type_node;
752 /* In C, a character constant has type 'int'.
753 In C++ 'char', but multi-char charconsts have type 'int'. */
754 else if (!c_dialect_cxx () || chars_seen > 1)
755 type = integer_type_node;
756 else
757 type = char_type_node;
759 TREE_TYPE (value) = type;
760 return value;