1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
41 #include "splay-tree.h"
44 /* We may keep statistics about how long which files took to compile. */
45 static int header_time
, body_time
;
46 static splay_tree file_info_tree
;
48 #undef WCHAR_TYPE_SIZE
49 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
51 /* Number of bytes in a wide character. */
52 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
54 int pending_lang_change
; /* If we need to switch languages - C++ only */
55 int c_header_level
; /* depth in C headers - C++ only */
57 /* If we need to translate characters received. This is tri-state:
58 0 means use only the untranslated string; 1 means use only
59 the translated string; -1 means chain the translated string
60 to the untranslated one. */
61 int c_lex_string_translate
= 1;
63 static tree
interpret_integer (const cpp_token
*, unsigned int);
64 static tree
interpret_float (const cpp_token
*, unsigned int);
65 static enum integer_type_kind narrowest_unsigned_type
66 (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
, unsigned int);
67 static enum integer_type_kind narrowest_signed_type
68 (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
, unsigned int);
69 static enum cpp_ttype
lex_string (const cpp_token
*, tree
*, bool);
70 static tree
lex_charconst (const cpp_token
*);
71 static void update_header_times (const char *);
72 static int dump_one_header (splay_tree_node
, void *);
73 static void cb_line_change (cpp_reader
*, const cpp_token
*, int);
74 static void cb_ident (cpp_reader
*, unsigned int, const cpp_string
*);
75 static void cb_def_pragma (cpp_reader
*, unsigned int);
76 static void cb_define (cpp_reader
*, unsigned int, cpp_hashnode
*);
77 static void cb_undef (cpp_reader
*, unsigned int, cpp_hashnode
*);
82 struct cpp_callbacks
*cb
;
83 struct c_fileinfo
*toplevel
;
85 /* The get_fileinfo data structure must be initialized before
86 cpp_read_main_file is called. */
87 toplevel
= get_fileinfo ("<top level>");
88 if (flag_detailed_statistics
)
91 body_time
= get_run_time ();
92 toplevel
->time
= body_time
;
95 cb
= cpp_get_callbacks (parse_in
);
97 cb
->line_change
= cb_line_change
;
99 cb
->def_pragma
= cb_def_pragma
;
100 cb
->valid_pch
= c_common_valid_pch
;
101 cb
->read_pch
= c_common_read_pch
;
103 /* Set the debug callbacks if we can use them. */
104 if (debug_info_level
== DINFO_LEVEL_VERBOSE
105 && (write_symbols
== DWARF2_DEBUG
106 || write_symbols
== VMS_AND_DWARF2_DEBUG
))
108 cb
->define
= cb_define
;
109 cb
->undef
= cb_undef
;
114 get_fileinfo (const char *name
)
117 struct c_fileinfo
*fi
;
120 file_info_tree
= splay_tree_new ((splay_tree_compare_fn
)strcmp
,
122 (splay_tree_delete_value_fn
)free
);
124 n
= splay_tree_lookup (file_info_tree
, (splay_tree_key
) name
);
126 return (struct c_fileinfo
*) n
->value
;
128 fi
= XNEW (struct c_fileinfo
);
130 fi
->interface_only
= 0;
131 fi
->interface_unknown
= 1;
132 splay_tree_insert (file_info_tree
, (splay_tree_key
) name
,
133 (splay_tree_value
) fi
);
138 update_header_times (const char *name
)
140 /* Changing files again. This means currently collected time
141 is charged against header time, and body time starts back at 0. */
142 if (flag_detailed_statistics
)
144 int this_time
= get_run_time ();
145 struct c_fileinfo
*file
= get_fileinfo (name
);
146 header_time
+= this_time
- body_time
;
147 file
->time
+= this_time
- body_time
;
148 body_time
= this_time
;
153 dump_one_header (splay_tree_node n
, void * ARG_UNUSED (dummy
))
155 print_time ((const char *) n
->key
,
156 ((struct c_fileinfo
*) n
->value
)->time
);
161 dump_time_statistics (void)
163 struct c_fileinfo
*file
= get_fileinfo (input_filename
);
164 int this_time
= get_run_time ();
165 file
->time
+= this_time
- body_time
;
167 fprintf (stderr
, "\n******\n");
168 print_time ("header files (total)", header_time
);
169 print_time ("main file (total)", this_time
- body_time
);
170 fprintf (stderr
, "ratio = %g : 1\n",
171 (double)header_time
/ (double)(this_time
- body_time
));
172 fprintf (stderr
, "\n******\n");
174 splay_tree_foreach (file_info_tree
, dump_one_header
, 0);
178 cb_ident (cpp_reader
* ARG_UNUSED (pfile
),
179 unsigned int ARG_UNUSED (line
),
180 const cpp_string
* ARG_UNUSED (str
))
182 #ifdef ASM_OUTPUT_IDENT
185 /* Convert escapes in the string. */
186 cpp_string cstr
= { 0, 0 };
187 if (cpp_interpret_string (pfile
, str
, 1, &cstr
, false))
189 ASM_OUTPUT_IDENT (asm_out_file
, (const char *) cstr
.text
);
190 free ((void *)cstr
.text
);
196 /* Called at the start of every non-empty line. TOKEN is the first
197 lexed token on the line. Used for diagnostic line numbers. */
199 cb_line_change (cpp_reader
* ARG_UNUSED (pfile
), const cpp_token
*token
,
202 if (token
->type
!= CPP_EOF
&& !parsing_args
)
203 #ifdef USE_MAPPED_LOCATION
204 input_location
= token
->src_loc
;
207 source_location loc
= token
->src_loc
;
208 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
209 input_line
= SOURCE_LINE (map
, loc
);
215 fe_file_change (const struct line_map
*new_map
)
220 if (new_map
->reason
== LC_ENTER
)
222 /* Don't stack the main buffer on the input stack;
223 we already did in compile_file. */
224 if (! MAIN_FILE_P (new_map
))
226 #ifdef USE_MAPPED_LOCATION
227 int included_at
= LAST_SOURCE_LINE_LOCATION (new_map
- 1);
229 input_location
= included_at
;
230 push_srcloc (new_map
->start_location
);
232 int included_at
= LAST_SOURCE_LINE (new_map
- 1);
234 input_line
= included_at
;
235 push_srcloc (new_map
->to_file
, 1);
237 (*debug_hooks
->start_source_file
) (included_at
, new_map
->to_file
);
238 #ifndef NO_IMPLICIT_EXTERN_C
241 else if (new_map
->sysp
== 2)
244 ++pending_lang_change
;
249 else if (new_map
->reason
== LC_LEAVE
)
251 #ifndef NO_IMPLICIT_EXTERN_C
252 if (c_header_level
&& --c_header_level
== 0)
254 if (new_map
->sysp
== 2)
255 warning ("badly nested C headers from preprocessor");
256 --pending_lang_change
;
261 (*debug_hooks
->end_source_file
) (new_map
->to_line
);
264 update_header_times (new_map
->to_file
);
265 in_system_header
= new_map
->sysp
!= 0;
266 #ifdef USE_MAPPED_LOCATION
267 input_location
= new_map
->start_location
;
269 input_filename
= new_map
->to_file
;
270 input_line
= new_map
->to_line
;
275 cb_def_pragma (cpp_reader
*pfile
, source_location loc
)
277 /* Issue a warning message if we have been asked to do so. Ignore
278 unknown pragmas in system headers unless an explicit
279 -Wunknown-pragmas has been given. */
280 if (warn_unknown_pragmas
> in_system_header
)
282 #ifndef USE_MAPPED_LOCATION
283 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
285 const unsigned char *space
, *name
;
288 space
= name
= (const unsigned char *) "";
289 s
= cpp_get_token (pfile
);
290 if (s
->type
!= CPP_EOF
)
292 space
= cpp_token_as_text (pfile
, s
);
293 s
= cpp_get_token (pfile
);
294 if (s
->type
== CPP_NAME
)
295 name
= cpp_token_as_text (pfile
, s
);
298 #ifdef USE_MAPPED_LOCATION
299 input_location
= loc
;
301 input_line
= SOURCE_LINE (map
, loc
);
303 warning ("ignoring #pragma %s %s", space
, name
);
307 /* #define callback for DWARF and DWARF2 debug info. */
309 cb_define (cpp_reader
*pfile
, source_location loc
, cpp_hashnode
*node
)
311 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
312 (*debug_hooks
->define
) (SOURCE_LINE (map
, loc
),
313 (const char *) cpp_macro_definition (pfile
, node
));
316 /* #undef callback for DWARF and DWARF2 debug info. */
318 cb_undef (cpp_reader
* ARG_UNUSED (pfile
), source_location loc
,
321 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
322 (*debug_hooks
->undef
) (SOURCE_LINE (map
, loc
),
323 (const char *) NODE_NAME (node
));
326 static inline const cpp_token
*
327 get_nonpadding_token (void)
329 const cpp_token
*tok
;
330 timevar_push (TV_CPP
);
332 tok
= cpp_get_token (parse_in
);
333 while (tok
->type
== CPP_PADDING
);
334 timevar_pop (TV_CPP
);
340 c_lex_with_flags (tree
*value
, unsigned char *cpp_flags
)
342 const cpp_token
*tok
;
344 static bool no_more_pch
;
347 tok
= get_nonpadding_token ();
353 *value
= HT_IDENT_TO_GCC_IDENT (HT_NODE (tok
->val
.node
));
358 unsigned int flags
= cpp_classify_number (parse_in
, tok
);
360 switch (flags
& CPP_N_CATEGORY
)
363 /* cpplib has issued an error. */
364 *value
= error_mark_node
;
368 *value
= interpret_integer (tok
, flags
);
372 *value
= interpret_float (tok
, flags
);
382 /* An @ may give the next token special significance in Objective-C. */
383 atloc
= input_location
;
384 tok
= get_nonpadding_token ();
385 if (c_dialect_objc ())
391 val
= HT_IDENT_TO_GCC_IDENT (HT_NODE (tok
->val
.node
));
392 if (objc_is_reserved_word (val
))
401 return lex_string (tok
, value
, true);
408 error ("%Hstray '@' in program", &atloc
);
413 cppchar_t c
= tok
->val
.str
.text
[0];
415 if (c
== '"' || c
== '\'')
416 error ("missing terminating %c character", (int) c
);
417 else if (ISGRAPH (c
))
418 error ("stray '%c' in program", (int) c
);
420 error ("stray '\\%o' in program", (int) c
);
426 *value
= lex_charconst (tok
);
431 return lex_string (tok
, value
, false);
434 /* These tokens should not be visible outside cpplib. */
435 case CPP_HEADER_NAME
:
448 c_common_no_more_pch ();
452 *cpp_flags
= tok
->flags
;
459 return c_lex_with_flags (value
, NULL
);
462 /* Returns the narrowest C-visible unsigned type, starting with the
463 minimum specified by FLAGS, that can fit HIGH:LOW, or itk_none if
466 static enum integer_type_kind
467 narrowest_unsigned_type (unsigned HOST_WIDE_INT low
,
468 unsigned HOST_WIDE_INT high
,
471 enum integer_type_kind itk
;
473 if ((flags
& CPP_N_WIDTH
) == CPP_N_SMALL
)
474 itk
= itk_unsigned_int
;
475 else if ((flags
& CPP_N_WIDTH
) == CPP_N_MEDIUM
)
476 itk
= itk_unsigned_long
;
478 itk
= itk_unsigned_long_long
;
480 for (; itk
< itk_none
; itk
+= 2 /* skip unsigned types */)
482 tree upper
= TYPE_MAX_VALUE (integer_types
[itk
]);
484 if ((unsigned HOST_WIDE_INT
)TREE_INT_CST_HIGH (upper
) > high
485 || ((unsigned HOST_WIDE_INT
)TREE_INT_CST_HIGH (upper
) == high
486 && TREE_INT_CST_LOW (upper
) >= low
))
493 /* Ditto, but narrowest signed type. */
494 static enum integer_type_kind
495 narrowest_signed_type (unsigned HOST_WIDE_INT low
,
496 unsigned HOST_WIDE_INT high
, unsigned int flags
)
498 enum integer_type_kind itk
;
500 if ((flags
& CPP_N_WIDTH
) == CPP_N_SMALL
)
502 else if ((flags
& CPP_N_WIDTH
) == CPP_N_MEDIUM
)
508 for (; itk
< itk_none
; itk
+= 2 /* skip signed types */)
510 tree upper
= TYPE_MAX_VALUE (integer_types
[itk
]);
512 if ((unsigned HOST_WIDE_INT
)TREE_INT_CST_HIGH (upper
) > high
513 || ((unsigned HOST_WIDE_INT
)TREE_INT_CST_HIGH (upper
) == high
514 && TREE_INT_CST_LOW (upper
) >= low
))
521 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
523 interpret_integer (const cpp_token
*token
, unsigned int flags
)
526 enum integer_type_kind itk
;
528 cpp_options
*options
= cpp_get_options (parse_in
);
530 integer
= cpp_interpret_integer (parse_in
, token
, flags
);
531 integer
= cpp_num_sign_extend (integer
, options
->precision
);
533 /* The type of a constant with a U suffix is straightforward. */
534 if (flags
& CPP_N_UNSIGNED
)
535 itk
= narrowest_unsigned_type (integer
.low
, integer
.high
, flags
);
538 /* The type of a potentially-signed integer constant varies
539 depending on the base it's in, the standard in use, and the
541 enum integer_type_kind itk_u
542 = narrowest_unsigned_type (integer
.low
, integer
.high
, flags
);
543 enum integer_type_kind itk_s
544 = narrowest_signed_type (integer
.low
, integer
.high
, flags
);
546 /* In both C89 and C99, octal and hex constants may be signed or
547 unsigned, whichever fits tighter. We do not warn about this
548 choice differing from the traditional choice, as the constant
549 is probably a bit pattern and either way will work. */
550 if ((flags
& CPP_N_RADIX
) != CPP_N_DECIMAL
)
551 itk
= MIN (itk_u
, itk_s
);
554 /* In C99, decimal constants are always signed.
555 In C89, decimal constants that don't fit in long have
556 undefined behavior; we try to make them unsigned long.
557 In GCC's extended C89, that last is true of decimal
558 constants that don't fit in long long, too. */
561 if (itk_s
> itk_u
&& itk_s
> itk_long
)
565 if (itk_u
< itk_unsigned_long
)
566 itk_u
= itk_unsigned_long
;
568 warning ("this decimal constant is unsigned only in ISO C90");
570 else if (warn_traditional
)
571 warning ("this decimal constant would be unsigned in ISO C90");
577 /* cpplib has already issued a warning for overflow. */
578 type
= ((flags
& CPP_N_UNSIGNED
)
579 ? widest_unsigned_literal_type_node
580 : widest_integer_literal_type_node
);
582 type
= integer_types
[itk
];
584 if (itk
> itk_unsigned_long
585 && (flags
& CPP_N_WIDTH
) != CPP_N_LARGE
586 && ! in_system_header
&& ! flag_isoc99
)
587 pedwarn ("integer constant is too large for %qs type",
588 (flags
& CPP_N_UNSIGNED
) ? "unsigned long" : "long");
590 value
= build_int_cst_wide (type
, integer
.low
, integer
.high
);
592 /* Convert imaginary to a complex type. */
593 if (flags
& CPP_N_IMAGINARY
)
594 value
= build_complex (NULL_TREE
, build_int_cst (type
, 0), value
);
599 /* Interpret TOKEN, a floating point number with FLAGS as classified
602 interpret_float (const cpp_token
*token
, unsigned int flags
)
606 REAL_VALUE_TYPE real
;
609 const char *type_name
;
611 /* FIXME: make %T work in error/warning, then we don't need type_name. */
612 if ((flags
& CPP_N_WIDTH
) == CPP_N_LARGE
)
614 type
= long_double_type_node
;
615 type_name
= "long double";
617 else if ((flags
& CPP_N_WIDTH
) == CPP_N_SMALL
618 || flag_single_precision_constant
)
620 type
= float_type_node
;
625 type
= double_type_node
;
626 type_name
= "double";
629 /* Copy the constant to a nul-terminated buffer. If the constant
630 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
631 can't handle them. */
632 copylen
= token
->val
.str
.len
;
633 if ((flags
& CPP_N_WIDTH
) != CPP_N_MEDIUM
)
634 /* Must be an F or L suffix. */
636 if (flags
& CPP_N_IMAGINARY
)
640 copy
= (char *) alloca (copylen
+ 1);
641 memcpy (copy
, token
->val
.str
.text
, copylen
);
642 copy
[copylen
] = '\0';
644 real_from_string (&real
, copy
);
645 real_convert (&real
, TYPE_MODE (type
), &real
);
647 /* A diagnostic is required for "soft" overflow by some ISO C
648 testsuites. This is not pedwarn, because some people don't want
650 ??? That's a dubious reason... is this a mandatory diagnostic or
651 isn't it? -- zw, 2001-08-21. */
652 if (REAL_VALUE_ISINF (real
) && pedantic
)
653 warning ("floating constant exceeds range of %<%s%>", type_name
);
655 /* Create a node with determined type and value. */
656 value
= build_real (type
, real
);
657 if (flags
& CPP_N_IMAGINARY
)
658 value
= build_complex (NULL_TREE
, convert (type
, integer_zero_node
), value
);
663 /* Convert a series of STRING and/or WSTRING tokens into a tree,
664 performing string constant concatenation. TOK is the first of
665 these. VALP is the location to write the string into. OBJC_STRING
666 indicates whether an '@' token preceded the incoming token.
667 Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
670 This is unfortunately more work than it should be. If any of the
671 strings in the series has an L prefix, the result is a wide string
672 (6.4.5p4). Whether or not the result is a wide string affects the
673 meaning of octal and hexadecimal escapes (6.4.4.4p6,9). But escape
674 sequences do not continue across the boundary between two strings in
675 a series (6.4.5p7), so we must not lose the boundaries. Therefore
676 cpp_interpret_string takes a vector of cpp_string structures, which
677 we must arrange to provide. */
679 static enum cpp_ttype
680 lex_string (const cpp_token
*tok
, tree
*valp
, bool objc_string
)
685 struct obstack str_ob
;
688 /* Try to avoid the overhead of creating and destroying an obstack
689 for the common case of just one string. */
690 cpp_string str
= tok
->val
.str
;
691 cpp_string
*strs
= &str
;
693 if (tok
->type
== CPP_WSTRING
)
696 tok
= get_nonpadding_token ();
697 if (c_dialect_objc () && tok
->type
== CPP_ATSIGN
)
700 tok
= get_nonpadding_token ();
702 if (tok
->type
== CPP_STRING
|| tok
->type
== CPP_WSTRING
)
704 gcc_obstack_init (&str_ob
);
705 obstack_grow (&str_ob
, &str
, sizeof (cpp_string
));
710 if (tok
->type
== CPP_WSTRING
)
712 obstack_grow (&str_ob
, &tok
->val
.str
, sizeof (cpp_string
));
714 tok
= get_nonpadding_token ();
715 if (c_dialect_objc () && tok
->type
== CPP_ATSIGN
)
718 tok
= get_nonpadding_token ();
721 while (tok
->type
== CPP_STRING
|| tok
->type
== CPP_WSTRING
);
722 strs
= (cpp_string
*) obstack_finish (&str_ob
);
725 /* We have read one more token than we want. */
726 _cpp_backup_tokens (parse_in
, 1);
728 if (count
> 1 && !objc_string
&& warn_traditional
&& !in_system_header
)
729 warning ("traditional C rejects string constant concatenation");
731 if ((c_lex_string_translate
732 ? cpp_interpret_string
: cpp_interpret_string_notranslate
)
733 (parse_in
, strs
, count
, &istr
, wide
))
735 value
= build_string (istr
.len
, (char *)istr
.text
);
736 free ((void *)istr
.text
);
738 if (c_lex_string_translate
== -1)
740 int xlated
= cpp_interpret_string_notranslate (parse_in
, strs
, count
,
742 /* Assume that, if we managed to translate the string above,
743 then the untranslated parsing will always succeed. */
746 if (TREE_STRING_LENGTH (value
) != (int)istr
.len
747 || 0 != strncmp (TREE_STRING_POINTER (value
), (char *)istr
.text
,
750 /* Arrange for us to return the untranslated string in
751 *valp, but to set up the C type of the translated
753 *valp
= build_string (istr
.len
, (char *)istr
.text
);
754 valp
= &TREE_CHAIN (*valp
);
756 free ((void *)istr
.text
);
761 /* Callers cannot generally handle error_mark_node in this context,
762 so return the empty string instead. cpp_interpret_string has
765 value
= build_string (TYPE_PRECISION (wchar_type_node
)
766 / TYPE_PRECISION (char_type_node
),
767 "\0\0\0"); /* widest supported wchar_t
770 value
= build_string (1, "");
773 TREE_TYPE (value
) = wide
? wchar_array_type_node
: char_array_type_node
;
774 *valp
= fix_string_type (value
);
777 obstack_free (&str_ob
, 0);
779 return objc_string
? CPP_OBJC_STRING
: wide
? CPP_WSTRING
: CPP_STRING
;
782 /* Converts a (possibly wide) character constant token into a tree. */
784 lex_charconst (const cpp_token
*token
)
788 unsigned int chars_seen
;
791 result
= cpp_interpret_charconst (parse_in
, token
,
792 &chars_seen
, &unsignedp
);
794 if (token
->type
== CPP_WCHAR
)
795 type
= wchar_type_node
;
796 /* In C, a character constant has type 'int'.
797 In C++ 'char', but multi-char charconsts have type 'int'. */
798 else if (!c_dialect_cxx () || chars_seen
> 1)
799 type
= integer_type_node
;
801 type
= char_type_node
;
803 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
804 before possibly widening to HOST_WIDE_INT for build_int_cst. */
805 if (unsignedp
|| (cppchar_signed_t
) result
>= 0)
806 value
= build_int_cst_wide (type
, result
, 0);
808 value
= build_int_cst_wide (type
, (cppchar_signed_t
) result
, -1);