1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include "coretypes.h"
42 #include "splay-tree.h"
45 /* We may keep statistics about how long which files took to compile. */
46 static int header_time
, body_time
;
47 static splay_tree file_info_tree
;
49 #undef WCHAR_TYPE_SIZE
50 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
52 /* Number of bytes in a wide character. */
53 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
55 int pending_lang_change
; /* If we need to switch languages - C++ only */
56 int c_header_level
; /* depth in C headers - C++ only */
58 /* If we need to translate characters received. This is tri-state:
59 0 means use only the untranslated string; 1 means use only
60 the translated string; -1 means chain the translated string
61 to the untranslated one. */
62 int c_lex_string_translate
= 1;
64 /* True if strings should be passed to the caller of c_lex completely
65 unmolested (no concatenation, no translation). */
66 bool c_lex_return_raw_strings
= false;
68 static tree
interpret_integer (const cpp_token
*, unsigned int);
69 static tree
interpret_float (const cpp_token
*, unsigned int);
70 static enum integer_type_kind narrowest_unsigned_type
71 (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
, unsigned int);
72 static enum integer_type_kind narrowest_signed_type
73 (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
, unsigned int);
74 static enum cpp_ttype
lex_string (const cpp_token
*, tree
*, bool);
75 static tree
lex_charconst (const cpp_token
*);
76 static void update_header_times (const char *);
77 static int dump_one_header (splay_tree_node
, void *);
78 static void cb_line_change (cpp_reader
*, const cpp_token
*, int);
79 static void cb_ident (cpp_reader
*, unsigned int, const cpp_string
*);
80 static void cb_def_pragma (cpp_reader
*, unsigned int);
81 static void cb_define (cpp_reader
*, unsigned int, cpp_hashnode
*);
82 static void cb_undef (cpp_reader
*, unsigned int, cpp_hashnode
*);
87 struct cpp_callbacks
*cb
;
88 struct c_fileinfo
*toplevel
;
90 /* The get_fileinfo data structure must be initialized before
91 cpp_read_main_file is called. */
92 toplevel
= get_fileinfo ("<top level>");
93 if (flag_detailed_statistics
)
96 body_time
= get_run_time ();
97 toplevel
->time
= body_time
;
100 cb
= cpp_get_callbacks (parse_in
);
102 cb
->line_change
= cb_line_change
;
103 cb
->ident
= cb_ident
;
104 cb
->def_pragma
= cb_def_pragma
;
105 cb
->valid_pch
= c_common_valid_pch
;
106 cb
->read_pch
= c_common_read_pch
;
108 /* Set the debug callbacks if we can use them. */
109 if (debug_info_level
== DINFO_LEVEL_VERBOSE
110 && (write_symbols
== DWARF2_DEBUG
111 || write_symbols
== VMS_AND_DWARF2_DEBUG
))
113 cb
->define
= cb_define
;
114 cb
->undef
= cb_undef
;
119 get_fileinfo (const char *name
)
122 struct c_fileinfo
*fi
;
125 file_info_tree
= splay_tree_new ((splay_tree_compare_fn
) strcmp
,
127 (splay_tree_delete_value_fn
) free
);
129 n
= splay_tree_lookup (file_info_tree
, (splay_tree_key
) name
);
131 return (struct c_fileinfo
*) n
->value
;
133 fi
= XNEW (struct c_fileinfo
);
135 fi
->interface_only
= 0;
136 fi
->interface_unknown
= 1;
137 splay_tree_insert (file_info_tree
, (splay_tree_key
) name
,
138 (splay_tree_value
) fi
);
143 update_header_times (const char *name
)
145 /* Changing files again. This means currently collected time
146 is charged against header time, and body time starts back at 0. */
147 if (flag_detailed_statistics
)
149 int this_time
= get_run_time ();
150 struct c_fileinfo
*file
= get_fileinfo (name
);
151 header_time
+= this_time
- body_time
;
152 file
->time
+= this_time
- body_time
;
153 body_time
= this_time
;
158 dump_one_header (splay_tree_node n
, void * ARG_UNUSED (dummy
))
160 print_time ((const char *) n
->key
,
161 ((struct c_fileinfo
*) n
->value
)->time
);
166 dump_time_statistics (void)
168 struct c_fileinfo
*file
= get_fileinfo (input_filename
);
169 int this_time
= get_run_time ();
170 file
->time
+= this_time
- body_time
;
172 fprintf (stderr
, "\n******\n");
173 print_time ("header files (total)", header_time
);
174 print_time ("main file (total)", this_time
- body_time
);
175 fprintf (stderr
, "ratio = %g : 1\n",
176 (double) header_time
/ (double) (this_time
- body_time
));
177 fprintf (stderr
, "\n******\n");
179 splay_tree_foreach (file_info_tree
, dump_one_header
, 0);
183 cb_ident (cpp_reader
* ARG_UNUSED (pfile
),
184 unsigned int ARG_UNUSED (line
),
185 const cpp_string
* ARG_UNUSED (str
))
187 #ifdef ASM_OUTPUT_IDENT
190 /* Convert escapes in the string. */
191 cpp_string cstr
= { 0, 0 };
192 if (cpp_interpret_string (pfile
, str
, 1, &cstr
, false))
194 ASM_OUTPUT_IDENT (asm_out_file
, (const char *) cstr
.text
);
195 free ((void *) cstr
.text
);
201 /* Called at the start of every non-empty line. TOKEN is the first
202 lexed token on the line. Used for diagnostic line numbers. */
204 cb_line_change (cpp_reader
* ARG_UNUSED (pfile
), const cpp_token
*token
,
207 if (token
->type
!= CPP_EOF
&& !parsing_args
)
208 #ifdef USE_MAPPED_LOCATION
209 input_location
= token
->src_loc
;
212 source_location loc
= token
->src_loc
;
213 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
214 input_line
= SOURCE_LINE (map
, loc
);
220 fe_file_change (const struct line_map
*new_map
)
225 if (new_map
->reason
== LC_ENTER
)
227 /* Don't stack the main buffer on the input stack;
228 we already did in compile_file. */
229 if (!MAIN_FILE_P (new_map
))
231 #ifdef USE_MAPPED_LOCATION
232 int included_at
= LAST_SOURCE_LINE_LOCATION (new_map
- 1);
234 input_location
= included_at
;
235 push_srcloc (new_map
->start_location
);
237 int included_at
= LAST_SOURCE_LINE (new_map
- 1);
239 input_line
= included_at
;
240 push_srcloc (new_map
->to_file
, 1);
242 (*debug_hooks
->start_source_file
) (included_at
, new_map
->to_file
);
243 #ifndef NO_IMPLICIT_EXTERN_C
246 else if (new_map
->sysp
== 2)
249 ++pending_lang_change
;
254 else if (new_map
->reason
== LC_LEAVE
)
256 #ifndef NO_IMPLICIT_EXTERN_C
257 if (c_header_level
&& --c_header_level
== 0)
259 if (new_map
->sysp
== 2)
260 warning ("badly nested C headers from preprocessor");
261 --pending_lang_change
;
266 (*debug_hooks
->end_source_file
) (new_map
->to_line
);
269 update_header_times (new_map
->to_file
);
270 in_system_header
= new_map
->sysp
!= 0;
271 #ifdef USE_MAPPED_LOCATION
272 input_location
= new_map
->start_location
;
274 input_filename
= new_map
->to_file
;
275 input_line
= new_map
->to_line
;
280 cb_def_pragma (cpp_reader
*pfile
, source_location loc
)
282 /* Issue a warning message if we have been asked to do so. Ignore
283 unknown pragmas in system headers unless an explicit
284 -Wunknown-pragmas has been given. */
285 if (warn_unknown_pragmas
> in_system_header
)
287 const unsigned char *space
, *name
;
289 #ifndef USE_MAPPED_LOCATION
291 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
292 fe_loc
.file
= map
->to_file
;
293 fe_loc
.line
= SOURCE_LINE (map
, loc
);
295 location_t fe_loc
= loc
;
298 space
= name
= (const unsigned char *) "";
299 s
= cpp_get_token (pfile
);
300 if (s
->type
!= CPP_EOF
)
302 space
= cpp_token_as_text (pfile
, s
);
303 s
= cpp_get_token (pfile
);
304 if (s
->type
== CPP_NAME
)
305 name
= cpp_token_as_text (pfile
, s
);
308 warning ("%Hignoring #pragma %s %s", &fe_loc
, space
, name
);
312 /* #define callback for DWARF and DWARF2 debug info. */
314 cb_define (cpp_reader
*pfile
, source_location loc
, cpp_hashnode
*node
)
316 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
317 (*debug_hooks
->define
) (SOURCE_LINE (map
, loc
),
318 (const char *) cpp_macro_definition (pfile
, node
));
321 /* #undef callback for DWARF and DWARF2 debug info. */
323 cb_undef (cpp_reader
* ARG_UNUSED (pfile
), source_location loc
,
326 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
327 (*debug_hooks
->undef
) (SOURCE_LINE (map
, loc
),
328 (const char *) NODE_NAME (node
));
331 /* Read a token and return its type. Fill *VALUE with its value, if
332 applicable. Fill *CPP_FLAGS with the token's flags, if it is
336 c_lex_with_flags (tree
*value
, location_t
*loc
, unsigned char *cpp_flags
)
338 static bool no_more_pch
;
339 const cpp_token
*tok
;
342 timevar_push (TV_CPP
);
344 tok
= cpp_get_token (parse_in
);
348 #ifdef USE_MAPPED_LOCATION
351 *loc
= input_location
;
359 *value
= HT_IDENT_TO_GCC_IDENT (HT_NODE (tok
->val
.node
));
364 unsigned int flags
= cpp_classify_number (parse_in
, tok
);
366 switch (flags
& CPP_N_CATEGORY
)
369 /* cpplib has issued an error. */
370 *value
= error_mark_node
;
374 *value
= interpret_integer (tok
, flags
);
378 *value
= interpret_float (tok
, flags
);
388 /* An @ may give the next token special significance in Objective-C. */
389 if (c_dialect_objc ())
391 location_t atloc
= input_location
;
394 tok
= cpp_get_token (parse_in
);
403 type
= lex_string (tok
, value
, true);
407 *value
= HT_IDENT_TO_GCC_IDENT (HT_NODE (tok
->val
.node
));
408 if (objc_is_reserved_word (*value
))
417 error ("%Hstray %<@%> in program", &atloc
);
427 unsigned char name
[4];
429 *cpp_spell_token (parse_in
, tok
, name
, true) = 0;
431 error ("stray %qs in program", name
);
438 cppchar_t c
= tok
->val
.str
.text
[0];
440 if (c
== '"' || c
== '\'')
441 error ("missing terminating %c character", (int) c
);
442 else if (ISGRAPH (c
))
443 error ("stray %qc in program", (int) c
);
445 error ("stray %<\\%o%> in program", (int) c
);
451 *value
= lex_charconst (tok
);
456 if (!c_lex_return_raw_strings
)
458 type
= lex_string (tok
, value
, false);
465 *value
= build_string (tok
->val
.str
.len
, (char *) tok
->val
.str
.text
);
468 /* These tokens should not be visible outside cpplib. */
469 case CPP_HEADER_NAME
:
480 *cpp_flags
= tok
->flags
;
485 c_common_no_more_pch ();
488 timevar_pop (TV_CPP
);
497 return c_lex_with_flags (value
, &loc
, NULL
);
500 /* Returns the narrowest C-visible unsigned type, starting with the
501 minimum specified by FLAGS, that can fit HIGH:LOW, or itk_none if
504 static enum integer_type_kind
505 narrowest_unsigned_type (unsigned HOST_WIDE_INT low
,
506 unsigned HOST_WIDE_INT high
,
509 enum integer_type_kind itk
;
511 if ((flags
& CPP_N_WIDTH
) == CPP_N_SMALL
)
512 itk
= itk_unsigned_int
;
513 else if ((flags
& CPP_N_WIDTH
) == CPP_N_MEDIUM
)
514 itk
= itk_unsigned_long
;
516 itk
= itk_unsigned_long_long
;
518 for (; itk
< itk_none
; itk
+= 2 /* skip unsigned types */)
520 tree upper
= TYPE_MAX_VALUE (integer_types
[itk
]);
522 if ((unsigned HOST_WIDE_INT
) TREE_INT_CST_HIGH (upper
) > high
523 || ((unsigned HOST_WIDE_INT
) TREE_INT_CST_HIGH (upper
) == high
524 && TREE_INT_CST_LOW (upper
) >= low
))
531 /* Ditto, but narrowest signed type. */
532 static enum integer_type_kind
533 narrowest_signed_type (unsigned HOST_WIDE_INT low
,
534 unsigned HOST_WIDE_INT high
, unsigned int flags
)
536 enum integer_type_kind itk
;
538 if ((flags
& CPP_N_WIDTH
) == CPP_N_SMALL
)
540 else if ((flags
& CPP_N_WIDTH
) == CPP_N_MEDIUM
)
546 for (; itk
< itk_none
; itk
+= 2 /* skip signed types */)
548 tree upper
= TYPE_MAX_VALUE (integer_types
[itk
]);
550 if ((unsigned HOST_WIDE_INT
) TREE_INT_CST_HIGH (upper
) > high
551 || ((unsigned HOST_WIDE_INT
) TREE_INT_CST_HIGH (upper
) == high
552 && TREE_INT_CST_LOW (upper
) >= low
))
559 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
561 interpret_integer (const cpp_token
*token
, unsigned int flags
)
564 enum integer_type_kind itk
;
566 cpp_options
*options
= cpp_get_options (parse_in
);
568 integer
= cpp_interpret_integer (parse_in
, token
, flags
);
569 integer
= cpp_num_sign_extend (integer
, options
->precision
);
571 /* The type of a constant with a U suffix is straightforward. */
572 if (flags
& CPP_N_UNSIGNED
)
573 itk
= narrowest_unsigned_type (integer
.low
, integer
.high
, flags
);
576 /* The type of a potentially-signed integer constant varies
577 depending on the base it's in, the standard in use, and the
579 enum integer_type_kind itk_u
580 = narrowest_unsigned_type (integer
.low
, integer
.high
, flags
);
581 enum integer_type_kind itk_s
582 = narrowest_signed_type (integer
.low
, integer
.high
, flags
);
584 /* In both C89 and C99, octal and hex constants may be signed or
585 unsigned, whichever fits tighter. We do not warn about this
586 choice differing from the traditional choice, as the constant
587 is probably a bit pattern and either way will work. */
588 if ((flags
& CPP_N_RADIX
) != CPP_N_DECIMAL
)
589 itk
= MIN (itk_u
, itk_s
);
592 /* In C99, decimal constants are always signed.
593 In C89, decimal constants that don't fit in long have
594 undefined behavior; we try to make them unsigned long.
595 In GCC's extended C89, that last is true of decimal
596 constants that don't fit in long long, too. */
599 if (itk_s
> itk_u
&& itk_s
> itk_long
)
603 if (itk_u
< itk_unsigned_long
)
604 itk_u
= itk_unsigned_long
;
606 warning ("this decimal constant is unsigned only in ISO C90");
608 else if (warn_traditional
)
609 warning ("this decimal constant would be unsigned in ISO C90");
615 /* cpplib has already issued a warning for overflow. */
616 type
= ((flags
& CPP_N_UNSIGNED
)
617 ? widest_unsigned_literal_type_node
618 : widest_integer_literal_type_node
);
620 type
= integer_types
[itk
];
622 if (itk
> itk_unsigned_long
623 && (flags
& CPP_N_WIDTH
) != CPP_N_LARGE
624 && !in_system_header
&& !flag_isoc99
)
625 pedwarn ("integer constant is too large for %qs type",
626 (flags
& CPP_N_UNSIGNED
) ? "unsigned long" : "long");
628 value
= build_int_cst_wide (type
, integer
.low
, integer
.high
);
630 /* Convert imaginary to a complex type. */
631 if (flags
& CPP_N_IMAGINARY
)
632 value
= build_complex (NULL_TREE
, build_int_cst (type
, 0), value
);
637 /* Interpret TOKEN, a floating point number with FLAGS as classified
640 interpret_float (const cpp_token
*token
, unsigned int flags
)
644 REAL_VALUE_TYPE real
;
647 const char *type_name
;
649 /* FIXME: make %T work in error/warning, then we don't need type_name. */
650 if ((flags
& CPP_N_WIDTH
) == CPP_N_LARGE
)
652 type
= long_double_type_node
;
653 type_name
= "long double";
655 else if ((flags
& CPP_N_WIDTH
) == CPP_N_SMALL
656 || flag_single_precision_constant
)
658 type
= float_type_node
;
663 type
= double_type_node
;
664 type_name
= "double";
667 /* Copy the constant to a nul-terminated buffer. If the constant
668 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
669 can't handle them. */
670 copylen
= token
->val
.str
.len
;
671 if ((flags
& CPP_N_WIDTH
) != CPP_N_MEDIUM
)
672 /* Must be an F or L suffix. */
674 if (flags
& CPP_N_IMAGINARY
)
678 copy
= (char *) alloca (copylen
+ 1);
679 memcpy (copy
, token
->val
.str
.text
, copylen
);
680 copy
[copylen
] = '\0';
682 real_from_string (&real
, copy
);
683 real_convert (&real
, TYPE_MODE (type
), &real
);
685 /* Both C and C++ require a diagnostic for a floating constant
686 outside the range of representable values of its type. Since we
687 have __builtin_inf* to produce an infinity, it might now be
688 appropriate for this to be a mandatory pedwarn rather than
689 conditioned on -pedantic. */
690 if (REAL_VALUE_ISINF (real
) && pedantic
)
691 pedwarn ("floating constant exceeds range of %<%s%>", type_name
);
693 /* Create a node with determined type and value. */
694 value
= build_real (type
, real
);
695 if (flags
& CPP_N_IMAGINARY
)
696 value
= build_complex (NULL_TREE
, convert (type
, integer_zero_node
), value
);
701 /* Convert a series of STRING and/or WSTRING tokens into a tree,
702 performing string constant concatenation. TOK is the first of
703 these. VALP is the location to write the string into. OBJC_STRING
704 indicates whether an '@' token preceded the incoming token.
705 Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
708 This is unfortunately more work than it should be. If any of the
709 strings in the series has an L prefix, the result is a wide string
710 (6.4.5p4). Whether or not the result is a wide string affects the
711 meaning of octal and hexadecimal escapes (6.4.4.4p6,9). But escape
712 sequences do not continue across the boundary between two strings in
713 a series (6.4.5p7), so we must not lose the boundaries. Therefore
714 cpp_interpret_string takes a vector of cpp_string structures, which
715 we must arrange to provide. */
717 static enum cpp_ttype
718 lex_string (const cpp_token
*tok
, tree
*valp
, bool objc_string
)
723 struct obstack str_ob
;
726 /* Try to avoid the overhead of creating and destroying an obstack
727 for the common case of just one string. */
728 cpp_string str
= tok
->val
.str
;
729 cpp_string
*strs
= &str
;
731 if (tok
->type
== CPP_WSTRING
)
735 tok
= cpp_get_token (parse_in
);
741 if (c_dialect_objc ())
758 gcc_obstack_init (&str_ob
);
759 obstack_grow (&str_ob
, &str
, sizeof (cpp_string
));
763 obstack_grow (&str_ob
, &tok
->val
.str
, sizeof (cpp_string
));
767 /* We have read one more token than we want. */
768 _cpp_backup_tokens (parse_in
, 1);
770 strs
= (cpp_string
*) obstack_finish (&str_ob
);
772 if (concats
&& !objc_string
&& warn_traditional
&& !in_system_header
)
773 warning ("traditional C rejects string constant concatenation");
775 if ((c_lex_string_translate
776 ? cpp_interpret_string
: cpp_interpret_string_notranslate
)
777 (parse_in
, strs
, concats
+ 1, &istr
, wide
))
779 value
= build_string (istr
.len
, (char *) istr
.text
);
780 free ((void *) istr
.text
);
782 if (c_lex_string_translate
== -1)
784 int xlated
= cpp_interpret_string_notranslate (parse_in
, strs
,
787 /* Assume that, if we managed to translate the string above,
788 then the untranslated parsing will always succeed. */
791 if (TREE_STRING_LENGTH (value
) != (int) istr
.len
792 || 0 != strncmp (TREE_STRING_POINTER (value
), (char *) istr
.text
,
795 /* Arrange for us to return the untranslated string in
796 *valp, but to set up the C type of the translated
798 *valp
= build_string (istr
.len
, (char *) istr
.text
);
799 valp
= &TREE_CHAIN (*valp
);
801 free ((void *) istr
.text
);
806 /* Callers cannot generally handle error_mark_node in this context,
807 so return the empty string instead. cpp_interpret_string has
810 value
= build_string (TYPE_PRECISION (wchar_type_node
)
811 / TYPE_PRECISION (char_type_node
),
812 "\0\0\0"); /* widest supported wchar_t
815 value
= build_string (1, "");
818 TREE_TYPE (value
) = wide
? wchar_array_type_node
: char_array_type_node
;
819 *valp
= fix_string_type (value
);
822 obstack_free (&str_ob
, 0);
824 return objc_string
? CPP_OBJC_STRING
: wide
? CPP_WSTRING
: CPP_STRING
;
827 /* Converts a (possibly wide) character constant token into a tree. */
829 lex_charconst (const cpp_token
*token
)
833 unsigned int chars_seen
;
836 result
= cpp_interpret_charconst (parse_in
, token
,
837 &chars_seen
, &unsignedp
);
839 if (token
->type
== CPP_WCHAR
)
840 type
= wchar_type_node
;
841 /* In C, a character constant has type 'int'.
842 In C++ 'char', but multi-char charconsts have type 'int'. */
843 else if (!c_dialect_cxx () || chars_seen
> 1)
844 type
= integer_type_node
;
846 type
= char_type_node
;
848 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
849 before possibly widening to HOST_WIDE_INT for build_int_cst. */
850 if (unsignedp
|| (cppchar_signed_t
) result
>= 0)
851 value
= build_int_cst_wide (type
, result
, 0);
853 value
= build_int_cst_wide (type
, (cppchar_signed_t
) result
, -1);