Makefile.in (stmp-docobjdir): New target; ensure $docobjdir exists.
[official-gcc.git] / gcc / c-lex.c
blobb6720da2b5bb388a6ce4b56a11565543623a205b
1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
27 #include "real.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "expr.h"
31 #include "input.h"
32 #include "output.h"
33 #include "c-tree.h"
34 #include "c-common.h"
35 #include "flags.h"
36 #include "timevar.h"
37 #include "cpplib.h"
38 #include "c-pragma.h"
39 #include "toplev.h"
40 #include "intl.h"
41 #include "tm_p.h"
42 #include "splay-tree.h"
43 #include "debug.h"
45 /* The current line map. */
46 static const struct line_map *map;
48 /* The line used to refresh the lineno global variable after each token. */
49 static unsigned int src_lineno;
51 /* We may keep statistics about how long which files took to compile. */
52 static int header_time, body_time;
53 static splay_tree file_info_tree;
55 #undef WCHAR_TYPE_SIZE
56 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
58 /* Number of bytes in a wide character. */
59 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
61 int pending_lang_change; /* If we need to switch languages - C++ only */
62 int c_header_level; /* depth in C headers - C++ only */
64 static tree interpret_integer (const cpp_token *, unsigned int);
65 static tree interpret_float (const cpp_token *, unsigned int);
66 static enum integer_type_kind
67 narrowest_unsigned_type (tree, unsigned int);
68 static enum integer_type_kind
69 narrowest_signed_type (tree, unsigned int);
70 static enum cpp_ttype lex_string (const cpp_token *, tree *, bool);
71 static tree lex_charconst (const cpp_token *);
72 static void update_header_times (const char *);
73 static int dump_one_header (splay_tree_node, void *);
74 static void cb_line_change (cpp_reader *, const cpp_token *, int);
75 static void cb_ident (cpp_reader *, unsigned int, const cpp_string *);
76 static void cb_def_pragma (cpp_reader *, unsigned int);
77 static void cb_define (cpp_reader *, unsigned int, cpp_hashnode *);
78 static void cb_undef (cpp_reader *, unsigned int, cpp_hashnode *);
80 void
81 init_c_lex (void)
83 struct cpp_callbacks *cb;
84 struct c_fileinfo *toplevel;
86 /* Set up filename timing. Must happen before cpp_read_main_file. */
87 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
89 (splay_tree_delete_value_fn)free);
90 toplevel = get_fileinfo ("<top level>");
91 if (flag_detailed_statistics)
93 header_time = 0;
94 body_time = get_run_time ();
95 toplevel->time = body_time;
98 cb = cpp_get_callbacks (parse_in);
100 cb->line_change = cb_line_change;
101 cb->ident = cb_ident;
102 cb->def_pragma = cb_def_pragma;
103 cb->valid_pch = c_common_valid_pch;
104 cb->read_pch = c_common_read_pch;
106 /* Set the debug callbacks if we can use them. */
107 if (debug_info_level == DINFO_LEVEL_VERBOSE
108 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
109 || write_symbols == VMS_AND_DWARF2_DEBUG))
111 cb->define = cb_define;
112 cb->undef = cb_undef;
116 struct c_fileinfo *
117 get_fileinfo (const char *name)
119 splay_tree_node n;
120 struct c_fileinfo *fi;
122 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
123 if (n)
124 return (struct c_fileinfo *) n->value;
126 fi = xmalloc (sizeof (struct c_fileinfo));
127 fi->time = 0;
128 fi->interface_only = 0;
129 fi->interface_unknown = 1;
130 splay_tree_insert (file_info_tree, (splay_tree_key) name,
131 (splay_tree_value) fi);
132 return fi;
135 static void
136 update_header_times (const char *name)
138 /* Changing files again. This means currently collected time
139 is charged against header time, and body time starts back at 0. */
140 if (flag_detailed_statistics)
142 int this_time = get_run_time ();
143 struct c_fileinfo *file = get_fileinfo (name);
144 header_time += this_time - body_time;
145 file->time += this_time - body_time;
146 body_time = this_time;
150 static int
151 dump_one_header (splay_tree_node n, void *dummy ATTRIBUTE_UNUSED)
153 print_time ((const char *) n->key,
154 ((struct c_fileinfo *) n->value)->time);
155 return 0;
158 void
159 dump_time_statistics (void)
161 struct c_fileinfo *file = get_fileinfo (input_filename);
162 int this_time = get_run_time ();
163 file->time += this_time - body_time;
165 fprintf (stderr, "\n******\n");
166 print_time ("header files (total)", header_time);
167 print_time ("main file (total)", this_time - body_time);
168 fprintf (stderr, "ratio = %g : 1\n",
169 (double)header_time / (double)(this_time - body_time));
170 fprintf (stderr, "\n******\n");
172 splay_tree_foreach (file_info_tree, dump_one_header, 0);
175 static void
176 cb_ident (cpp_reader *pfile ATTRIBUTE_UNUSED,
177 unsigned int line ATTRIBUTE_UNUSED,
178 const cpp_string *str ATTRIBUTE_UNUSED)
180 #ifdef ASM_OUTPUT_IDENT
181 if (! flag_no_ident)
183 /* Convert escapes in the string. */
184 cpp_string cstr = { 0, 0 };
185 if (cpp_interpret_string (pfile, str, 1, &cstr, false))
187 ASM_OUTPUT_IDENT (asm_out_file, (const char *) cstr.text);
188 free ((void *)cstr.text);
191 #endif
194 /* Called at the start of every non-empty line. TOKEN is the first
195 lexed token on the line. Used for diagnostic line numbers. */
196 static void
197 cb_line_change (cpp_reader *pfile ATTRIBUTE_UNUSED, const cpp_token *token,
198 int parsing_args ATTRIBUTE_UNUSED)
200 src_lineno = SOURCE_LINE (map, token->line);
203 void
204 fe_file_change (const struct line_map *new_map)
206 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
208 if (new_map->reason == LC_ENTER)
210 /* Don't stack the main buffer on the input stack;
211 we already did in compile_file. */
212 if (map == NULL)
213 main_input_filename = new_map->to_file;
214 else
216 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
218 input_line = included_at;
219 push_srcloc (new_map->to_file, 1);
220 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
221 #ifndef NO_IMPLICIT_EXTERN_C
222 if (c_header_level)
223 ++c_header_level;
224 else if (new_map->sysp == 2)
226 c_header_level = 1;
227 ++pending_lang_change;
229 #endif
232 else if (new_map->reason == LC_LEAVE)
234 #ifndef NO_IMPLICIT_EXTERN_C
235 if (c_header_level && --c_header_level == 0)
237 if (new_map->sysp == 2)
238 warning ("badly nested C headers from preprocessor");
239 --pending_lang_change;
241 #endif
242 pop_srcloc ();
244 (*debug_hooks->end_source_file) (to_line);
247 update_header_times (new_map->to_file);
248 in_system_header = new_map->sysp != 0;
249 input_filename = new_map->to_file;
250 input_line = to_line;
251 map = new_map;
253 /* Hook for C++. */
254 extract_interface_info ();
257 static void
258 cb_def_pragma (cpp_reader *pfile, unsigned int line)
260 /* Issue a warning message if we have been asked to do so. Ignore
261 unknown pragmas in system headers unless an explicit
262 -Wunknown-pragmas has been given. */
263 if (warn_unknown_pragmas > in_system_header)
265 const unsigned char *space, *name;
266 const cpp_token *s;
268 space = name = (const unsigned char *) "";
269 s = cpp_get_token (pfile);
270 if (s->type != CPP_EOF)
272 space = cpp_token_as_text (pfile, s);
273 s = cpp_get_token (pfile);
274 if (s->type == CPP_NAME)
275 name = cpp_token_as_text (pfile, s);
278 input_line = SOURCE_LINE (map, line);
279 warning ("ignoring #pragma %s %s", space, name);
283 /* #define callback for DWARF and DWARF2 debug info. */
284 static void
285 cb_define (cpp_reader *pfile, unsigned int line, cpp_hashnode *node)
287 (*debug_hooks->define) (SOURCE_LINE (map, line),
288 (const char *) cpp_macro_definition (pfile, node));
291 /* #undef callback for DWARF and DWARF2 debug info. */
292 static void
293 cb_undef (cpp_reader *pfile ATTRIBUTE_UNUSED, unsigned int line,
294 cpp_hashnode *node)
296 (*debug_hooks->undef) (SOURCE_LINE (map, line),
297 (const char *) NODE_NAME (node));
300 static inline const cpp_token *
301 get_nonpadding_token (void)
303 const cpp_token *tok;
304 timevar_push (TV_CPP);
306 tok = cpp_get_token (parse_in);
307 while (tok->type == CPP_PADDING);
308 timevar_pop (TV_CPP);
310 /* The C++ front end does horrible things with the current line
311 number. To ensure an accurate line number, we must reset it
312 every time we advance a token. */
313 input_line = src_lineno;
315 return tok;
319 c_lex (tree *value)
321 const cpp_token *tok;
322 location_t atloc;
324 retry:
325 tok = get_nonpadding_token ();
327 retry_after_at:
328 switch (tok->type)
330 case CPP_NAME:
331 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
332 break;
334 case CPP_NUMBER:
336 unsigned int flags = cpp_classify_number (parse_in, tok);
338 switch (flags & CPP_N_CATEGORY)
340 case CPP_N_INVALID:
341 /* cpplib has issued an error. */
342 *value = error_mark_node;
343 break;
345 case CPP_N_INTEGER:
346 *value = interpret_integer (tok, flags);
347 break;
349 case CPP_N_FLOATING:
350 *value = interpret_float (tok, flags);
351 break;
353 default:
354 abort ();
357 break;
359 case CPP_ATSIGN:
360 /* An @ may give the next token special significance in Objective-C. */
361 atloc = input_location;
362 tok = get_nonpadding_token ();
363 if (c_dialect_objc ())
365 tree val;
366 switch (tok->type)
368 case CPP_NAME:
369 val = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
370 if (C_IS_RESERVED_WORD (val)
371 && OBJC_IS_AT_KEYWORD (C_RID_CODE (val)))
373 *value = val;
374 return CPP_AT_NAME;
376 break;
378 case CPP_STRING:
379 case CPP_WSTRING:
380 return lex_string (tok, value, true);
382 default: break;
386 /* ... or not. */
387 error ("%Hstray '@' in program", &atloc);
388 goto retry_after_at;
390 case CPP_OTHER:
392 cppchar_t c = tok->val.str.text[0];
394 if (c == '"' || c == '\'')
395 error ("missing terminating %c character", (int) c);
396 else if (ISGRAPH (c))
397 error ("stray '%c' in program", (int) c);
398 else
399 error ("stray '\\%o' in program", (int) c);
401 goto retry;
403 case CPP_CHAR:
404 case CPP_WCHAR:
405 *value = lex_charconst (tok);
406 break;
408 case CPP_STRING:
409 case CPP_WSTRING:
410 return lex_string (tok, value, false);
411 break;
413 /* These tokens should not be visible outside cpplib. */
414 case CPP_HEADER_NAME:
415 case CPP_COMMENT:
416 case CPP_MACRO_ARG:
417 abort ();
419 default:
420 *value = NULL_TREE;
421 break;
424 return tok->type;
427 /* Returns the narrowest C-visible unsigned type, starting with the
428 minimum specified by FLAGS, that can fit VALUE, or itk_none if
429 there isn't one. */
430 static enum integer_type_kind
431 narrowest_unsigned_type (tree value, unsigned int flags)
433 enum integer_type_kind itk;
435 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
436 itk = itk_unsigned_int;
437 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
438 itk = itk_unsigned_long;
439 else
440 itk = itk_unsigned_long_long;
442 /* int_fits_type_p must think the type of its first argument is
443 wider than its second argument, or it won't do the proper check. */
444 TREE_TYPE (value) = widest_unsigned_literal_type_node;
446 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
447 if (int_fits_type_p (value, integer_types[itk]))
448 return itk;
450 return itk_none;
453 /* Ditto, but narrowest signed type. */
454 static enum integer_type_kind
455 narrowest_signed_type (tree value, unsigned int flags)
457 enum integer_type_kind itk;
459 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
460 itk = itk_int;
461 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
462 itk = itk_long;
463 else
464 itk = itk_long_long;
466 /* int_fits_type_p must think the type of its first argument is
467 wider than its second argument, or it won't do the proper check. */
468 TREE_TYPE (value) = widest_unsigned_literal_type_node;
470 for (; itk < itk_none; itk += 2 /* skip signed types */)
471 if (int_fits_type_p (value, integer_types[itk]))
472 return itk;
474 return itk_none;
477 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
478 static tree
479 interpret_integer (const cpp_token *token, unsigned int flags)
481 tree value, type;
482 enum integer_type_kind itk;
483 cpp_num integer;
484 cpp_options *options = cpp_get_options (parse_in);
486 integer = cpp_interpret_integer (parse_in, token, flags);
487 integer = cpp_num_sign_extend (integer, options->precision);
488 value = build_int_2_wide (integer.low, integer.high);
490 /* The type of a constant with a U suffix is straightforward. */
491 if (flags & CPP_N_UNSIGNED)
492 itk = narrowest_unsigned_type (value, flags);
493 else
495 /* The type of a potentially-signed integer constant varies
496 depending on the base it's in, the standard in use, and the
497 length suffixes. */
498 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
499 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
501 /* In both C89 and C99, octal and hex constants may be signed or
502 unsigned, whichever fits tighter. We do not warn about this
503 choice differing from the traditional choice, as the constant
504 is probably a bit pattern and either way will work. */
505 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
506 itk = MIN (itk_u, itk_s);
507 else
509 /* In C99, decimal constants are always signed.
510 In C89, decimal constants that don't fit in long have
511 undefined behavior; we try to make them unsigned long.
512 In GCC's extended C89, that last is true of decimal
513 constants that don't fit in long long, too. */
515 itk = itk_s;
516 if (itk_s > itk_u && itk_s > itk_long)
518 if (!flag_isoc99)
520 if (itk_u < itk_unsigned_long)
521 itk_u = itk_unsigned_long;
522 itk = itk_u;
523 warning ("this decimal constant is unsigned only in ISO C90");
525 else if (warn_traditional)
526 warning ("this decimal constant would be unsigned in ISO C90");
531 if (itk == itk_none)
532 /* cpplib has already issued a warning for overflow. */
533 type = ((flags & CPP_N_UNSIGNED)
534 ? widest_unsigned_literal_type_node
535 : widest_integer_literal_type_node);
536 else
537 type = integer_types[itk];
539 if (itk > itk_unsigned_long
540 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
541 && ! in_system_header && ! flag_isoc99)
542 pedwarn ("integer constant is too large for \"%s\" type",
543 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
545 TREE_TYPE (value) = type;
547 /* Convert imaginary to a complex type. */
548 if (flags & CPP_N_IMAGINARY)
549 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
551 return value;
554 /* Interpret TOKEN, a floating point number with FLAGS as classified
555 by cpplib. */
556 static tree
557 interpret_float (const cpp_token *token, unsigned int flags)
559 tree type;
560 tree value;
561 REAL_VALUE_TYPE real;
562 char *copy;
563 size_t copylen;
564 const char *typename;
566 /* FIXME: make %T work in error/warning, then we don't need typename. */
567 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
569 type = long_double_type_node;
570 typename = "long double";
572 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
573 || flag_single_precision_constant)
575 type = float_type_node;
576 typename = "float";
578 else
580 type = double_type_node;
581 typename = "double";
584 /* Copy the constant to a nul-terminated buffer. If the constant
585 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
586 can't handle them. */
587 copylen = token->val.str.len;
588 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
589 /* Must be an F or L suffix. */
590 copylen--;
591 if (flags & CPP_N_IMAGINARY)
592 /* I or J suffix. */
593 copylen--;
595 copy = alloca (copylen + 1);
596 memcpy (copy, token->val.str.text, copylen);
597 copy[copylen] = '\0';
599 real_from_string (&real, copy);
600 real_convert (&real, TYPE_MODE (type), &real);
602 /* A diagnostic is required for "soft" overflow by some ISO C
603 testsuites. This is not pedwarn, because some people don't want
604 an error for this.
605 ??? That's a dubious reason... is this a mandatory diagnostic or
606 isn't it? -- zw, 2001-08-21. */
607 if (REAL_VALUE_ISINF (real) && pedantic)
608 warning ("floating constant exceeds range of \"%s\"", typename);
610 /* Create a node with determined type and value. */
611 value = build_real (type, real);
612 if (flags & CPP_N_IMAGINARY)
613 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
615 return value;
618 /* Convert a series of STRING and/or WSTRING tokens into a tree,
619 performing string constant concatenation. TOK is the first of
620 these. VALP is the location to write the string into. OBJC_STRING
621 indicates whether an '@' token preceded the incoming token.
622 Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
623 or CPP_OBJC_STRING).
625 This is unfortunately more work than it should be. If any of the
626 strings in the series has an L prefix, the result is a wide string
627 (6.4.5p4). Whether or not the result is a wide string affects the
628 meaning of octal and hexadecimal escapes (6.4.4.4p6,9). But escape
629 sequences do not continue across the boundary between two strings in
630 a series (6.4.5p7), so we must not lose the boundaries. Therefore
631 cpp_interpret_string takes a vector of cpp_string structures, which
632 we must arrange to provide. */
634 static enum cpp_ttype
635 lex_string (const cpp_token *tok, tree *valp, bool objc_string)
637 tree value;
638 bool wide = false;
639 size_t count = 1;
640 struct obstack str_ob;
641 cpp_string istr;
643 /* Try to avoid the overhead of creating and destroying an obstack
644 for the common case of just one string. */
645 cpp_string str = tok->val.str;
646 cpp_string *strs = &str;
648 if (tok->type == CPP_WSTRING)
649 wide = true;
651 tok = get_nonpadding_token ();
652 if (c_dialect_objc () && tok->type == CPP_ATSIGN)
654 objc_string = true;
655 tok = get_nonpadding_token ();
657 if (tok->type == CPP_STRING || tok->type == CPP_WSTRING)
659 gcc_obstack_init (&str_ob);
660 obstack_grow (&str_ob, &str, sizeof (cpp_string));
664 count++;
665 if (tok->type == CPP_WSTRING)
666 wide = true;
667 obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string));
669 tok = get_nonpadding_token ();
670 if (c_dialect_objc () && tok->type == CPP_ATSIGN)
672 objc_string = true;
673 tok = get_nonpadding_token ();
676 while (tok->type == CPP_STRING || tok->type == CPP_WSTRING);
677 strs = obstack_finish (&str_ob);
680 /* We have read one more token than we want. */
681 _cpp_backup_tokens (parse_in, 1);
683 if (count > 1 && !objc_string && warn_traditional && !in_system_header)
684 warning ("traditional C rejects string constant concatenation");
686 if (cpp_interpret_string (parse_in, strs, count, &istr, wide))
688 value = build_string (istr.len, (char *)istr.text);
689 free ((void *)istr.text);
691 else
693 /* Callers cannot generally handle error_mark_node in this context,
694 so return the empty string instead. cpp_interpret_string has
695 issued an error. */
696 if (wide)
697 value = build_string (TYPE_PRECISION (wchar_type_node)
698 / TYPE_PRECISION (char_type_node),
699 "\0\0\0"); /* widest supported wchar_t
700 is 32 bits */
701 else
702 value = build_string (1, "");
705 TREE_TYPE (value) = wide ? wchar_array_type_node : char_array_type_node;
706 *valp = fix_string_type (value);
708 if (strs != &str)
709 obstack_free (&str_ob, 0);
711 return objc_string ? CPP_OBJC_STRING : wide ? CPP_WSTRING : CPP_STRING;
714 /* Converts a (possibly wide) character constant token into a tree. */
715 static tree
716 lex_charconst (const cpp_token *token)
718 cppchar_t result;
719 tree type, value;
720 unsigned int chars_seen;
721 int unsignedp;
723 result = cpp_interpret_charconst (parse_in, token,
724 &chars_seen, &unsignedp);
726 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
727 before possibly widening to HOST_WIDE_INT for build_int_2. */
728 if (unsignedp || (cppchar_signed_t) result >= 0)
729 value = build_int_2 (result, 0);
730 else
731 value = build_int_2 ((cppchar_signed_t) result, -1);
733 if (token->type == CPP_WCHAR)
734 type = wchar_type_node;
735 /* In C, a character constant has type 'int'.
736 In C++ 'char', but multi-char charconsts have type 'int'. */
737 else if (!c_dialect_cxx () || chars_seen > 1)
738 type = integer_type_node;
739 else
740 type = char_type_node;
742 TREE_TYPE (value) = type;
743 return value;