1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
41 #include "splay-tree.h"
44 /* We may keep statistics about how long which files took to compile. */
45 static int header_time
, body_time
;
46 static splay_tree file_info_tree
;
48 int pending_lang_change
; /* If we need to switch languages - C++ only */
49 int c_header_level
; /* depth in C headers - C++ only */
51 /* If we need to translate characters received. This is tri-state:
52 0 means use only the untranslated string; 1 means use only
53 the translated string; -1 means chain the translated string
54 to the untranslated one. */
55 int c_lex_string_translate
= 1;
57 /* True if strings should be passed to the caller of c_lex completely
58 unmolested (no concatenation, no translation). */
59 bool c_lex_return_raw_strings
= false;
61 static tree
interpret_integer (const cpp_token
*, unsigned int);
62 static tree
interpret_float (const cpp_token
*, unsigned int);
63 static enum integer_type_kind narrowest_unsigned_type
64 (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
, unsigned int);
65 static enum integer_type_kind narrowest_signed_type
66 (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
, unsigned int);
67 static enum cpp_ttype
lex_string (const cpp_token
*, tree
*, bool);
68 static tree
lex_charconst (const cpp_token
*);
69 static void update_header_times (const char *);
70 static int dump_one_header (splay_tree_node
, void *);
71 static void cb_line_change (cpp_reader
*, const cpp_token
*, int);
72 static void cb_ident (cpp_reader
*, unsigned int, const cpp_string
*);
73 static void cb_def_pragma (cpp_reader
*, unsigned int);
74 static void cb_define (cpp_reader
*, unsigned int, cpp_hashnode
*);
75 static void cb_undef (cpp_reader
*, unsigned int, cpp_hashnode
*);
80 struct cpp_callbacks
*cb
;
81 struct c_fileinfo
*toplevel
;
83 /* The get_fileinfo data structure must be initialized before
84 cpp_read_main_file is called. */
85 toplevel
= get_fileinfo ("<top level>");
86 if (flag_detailed_statistics
)
89 body_time
= get_run_time ();
90 toplevel
->time
= body_time
;
93 cb
= cpp_get_callbacks (parse_in
);
95 cb
->line_change
= cb_line_change
;
97 cb
->def_pragma
= cb_def_pragma
;
98 cb
->valid_pch
= c_common_valid_pch
;
99 cb
->read_pch
= c_common_read_pch
;
101 /* Set the debug callbacks if we can use them. */
102 if (debug_info_level
== DINFO_LEVEL_VERBOSE
103 && (write_symbols
== DWARF2_DEBUG
104 || write_symbols
== VMS_AND_DWARF2_DEBUG
))
106 cb
->define
= cb_define
;
107 cb
->undef
= cb_undef
;
112 get_fileinfo (const char *name
)
115 struct c_fileinfo
*fi
;
118 file_info_tree
= splay_tree_new ((splay_tree_compare_fn
) strcmp
,
120 (splay_tree_delete_value_fn
) free
);
122 n
= splay_tree_lookup (file_info_tree
, (splay_tree_key
) name
);
124 return (struct c_fileinfo
*) n
->value
;
126 fi
= XNEW (struct c_fileinfo
);
128 fi
->interface_only
= 0;
129 fi
->interface_unknown
= 1;
130 splay_tree_insert (file_info_tree
, (splay_tree_key
) name
,
131 (splay_tree_value
) fi
);
136 update_header_times (const char *name
)
138 /* Changing files again. This means currently collected time
139 is charged against header time, and body time starts back at 0. */
140 if (flag_detailed_statistics
)
142 int this_time
= get_run_time ();
143 struct c_fileinfo
*file
= get_fileinfo (name
);
144 header_time
+= this_time
- body_time
;
145 file
->time
+= this_time
- body_time
;
146 body_time
= this_time
;
151 dump_one_header (splay_tree_node n
, void * ARG_UNUSED (dummy
))
153 print_time ((const char *) n
->key
,
154 ((struct c_fileinfo
*) n
->value
)->time
);
159 dump_time_statistics (void)
161 struct c_fileinfo
*file
= get_fileinfo (input_filename
);
162 int this_time
= get_run_time ();
163 file
->time
+= this_time
- body_time
;
165 fprintf (stderr
, "\n******\n");
166 print_time ("header files (total)", header_time
);
167 print_time ("main file (total)", this_time
- body_time
);
168 fprintf (stderr
, "ratio = %g : 1\n",
169 (double) header_time
/ (double) (this_time
- body_time
));
170 fprintf (stderr
, "\n******\n");
172 splay_tree_foreach (file_info_tree
, dump_one_header
, 0);
176 cb_ident (cpp_reader
* ARG_UNUSED (pfile
),
177 unsigned int ARG_UNUSED (line
),
178 const cpp_string
* ARG_UNUSED (str
))
180 #ifdef ASM_OUTPUT_IDENT
183 /* Convert escapes in the string. */
184 cpp_string cstr
= { 0, 0 };
185 if (cpp_interpret_string (pfile
, str
, 1, &cstr
, false))
187 ASM_OUTPUT_IDENT (asm_out_file
, (const char *) cstr
.text
);
188 free ((void *) cstr
.text
);
194 /* Called at the start of every non-empty line. TOKEN is the first
195 lexed token on the line. Used for diagnostic line numbers. */
197 cb_line_change (cpp_reader
* ARG_UNUSED (pfile
), const cpp_token
*token
,
200 if (token
->type
!= CPP_EOF
&& !parsing_args
)
201 #ifdef USE_MAPPED_LOCATION
202 input_location
= token
->src_loc
;
205 source_location loc
= token
->src_loc
;
206 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
207 input_line
= SOURCE_LINE (map
, loc
);
213 fe_file_change (const struct line_map
*new_map
)
218 if (new_map
->reason
== LC_ENTER
)
220 /* Don't stack the main buffer on the input stack;
221 we already did in compile_file. */
222 if (!MAIN_FILE_P (new_map
))
224 #ifdef USE_MAPPED_LOCATION
225 int included_at
= LAST_SOURCE_LINE_LOCATION (new_map
- 1);
227 input_location
= included_at
;
228 push_srcloc (new_map
->start_location
);
230 int included_at
= LAST_SOURCE_LINE (new_map
- 1);
232 input_line
= included_at
;
233 push_srcloc (new_map
->to_file
, 1);
235 (*debug_hooks
->start_source_file
) (included_at
, new_map
->to_file
);
236 #ifndef NO_IMPLICIT_EXTERN_C
239 else if (new_map
->sysp
== 2)
242 ++pending_lang_change
;
247 else if (new_map
->reason
== LC_LEAVE
)
249 #ifndef NO_IMPLICIT_EXTERN_C
250 if (c_header_level
&& --c_header_level
== 0)
252 if (new_map
->sysp
== 2)
253 warning (0, "badly nested C headers from preprocessor");
254 --pending_lang_change
;
259 (*debug_hooks
->end_source_file
) (new_map
->to_line
);
262 update_header_times (new_map
->to_file
);
263 in_system_header
= new_map
->sysp
!= 0;
264 #ifdef USE_MAPPED_LOCATION
265 input_location
= new_map
->start_location
;
267 input_filename
= new_map
->to_file
;
268 input_line
= new_map
->to_line
;
273 cb_def_pragma (cpp_reader
*pfile
, source_location loc
)
275 /* Issue a warning message if we have been asked to do so. Ignore
276 unknown pragmas in system headers unless an explicit
277 -Wunknown-pragmas has been given. */
278 if (warn_unknown_pragmas
> in_system_header
)
280 const unsigned char *space
, *name
;
282 #ifndef USE_MAPPED_LOCATION
284 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
285 fe_loc
.file
= map
->to_file
;
286 fe_loc
.line
= SOURCE_LINE (map
, loc
);
288 location_t fe_loc
= loc
;
291 space
= name
= (const unsigned char *) "";
292 s
= cpp_get_token (pfile
);
293 if (s
->type
!= CPP_EOF
)
295 space
= cpp_token_as_text (pfile
, s
);
296 s
= cpp_get_token (pfile
);
297 if (s
->type
== CPP_NAME
)
298 name
= cpp_token_as_text (pfile
, s
);
301 warning (OPT_Wunknown_pragmas
, "%Hignoring #pragma %s %s",
302 &fe_loc
, space
, name
);
306 /* #define callback for DWARF and DWARF2 debug info. */
308 cb_define (cpp_reader
*pfile
, source_location loc
, cpp_hashnode
*node
)
310 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
311 (*debug_hooks
->define
) (SOURCE_LINE (map
, loc
),
312 (const char *) cpp_macro_definition (pfile
, node
));
315 /* #undef callback for DWARF and DWARF2 debug info. */
317 cb_undef (cpp_reader
* ARG_UNUSED (pfile
), source_location loc
,
320 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
321 (*debug_hooks
->undef
) (SOURCE_LINE (map
, loc
),
322 (const char *) NODE_NAME (node
));
325 /* Read a token and return its type. Fill *VALUE with its value, if
326 applicable. Fill *CPP_FLAGS with the token's flags, if it is
330 c_lex_with_flags (tree
*value
, location_t
*loc
, unsigned char *cpp_flags
)
332 static bool no_more_pch
;
333 const cpp_token
*tok
;
335 unsigned char add_flags
= 0;
337 timevar_push (TV_CPP
);
339 tok
= cpp_get_token (parse_in
);
343 #ifdef USE_MAPPED_LOCATION
346 *loc
= input_location
;
354 *value
= HT_IDENT_TO_GCC_IDENT (HT_NODE (tok
->val
.node
));
359 unsigned int flags
= cpp_classify_number (parse_in
, tok
);
361 switch (flags
& CPP_N_CATEGORY
)
364 /* cpplib has issued an error. */
365 *value
= error_mark_node
;
370 /* C++ uses '0' to mark virtual functions as pure.
371 Set PURE_ZERO to pass this information to the C++ parser. */
372 if (tok
->val
.str
.len
== 1 && *tok
->val
.str
.text
== '0')
373 add_flags
= PURE_ZERO
;
374 *value
= interpret_integer (tok
, flags
);
378 *value
= interpret_float (tok
, flags
);
388 /* An @ may give the next token special significance in Objective-C. */
389 if (c_dialect_objc ())
391 location_t atloc
= input_location
;
394 tok
= cpp_get_token (parse_in
);
403 type
= lex_string (tok
, value
, true);
407 *value
= HT_IDENT_TO_GCC_IDENT (HT_NODE (tok
->val
.node
));
408 if (objc_is_reserved_word (*value
))
417 error ("%Hstray %<@%> in program", &atloc
);
427 unsigned char name
[4];
429 *cpp_spell_token (parse_in
, tok
, name
, true) = 0;
431 error ("stray %qs in program", name
);
438 cppchar_t c
= tok
->val
.str
.text
[0];
440 if (c
== '"' || c
== '\'')
441 error ("missing terminating %c character", (int) c
);
442 else if (ISGRAPH (c
))
443 error ("stray %qc in program", (int) c
);
445 error ("stray %<\\%o%> in program", (int) c
);
451 *value
= lex_charconst (tok
);
456 if (!c_lex_return_raw_strings
)
458 type
= lex_string (tok
, value
, false);
461 *value
= build_string (tok
->val
.str
.len
, (char *) tok
->val
.str
.text
);
465 *value
= build_int_cst (NULL
, tok
->val
.pragma
);
468 /* These tokens should not be visible outside cpplib. */
469 case CPP_HEADER_NAME
:
480 *cpp_flags
= tok
->flags
| add_flags
;
485 c_common_no_more_pch ();
488 timevar_pop (TV_CPP
);
493 /* Returns the narrowest C-visible unsigned type, starting with the
494 minimum specified by FLAGS, that can fit HIGH:LOW, or itk_none if
497 static enum integer_type_kind
498 narrowest_unsigned_type (unsigned HOST_WIDE_INT low
,
499 unsigned HOST_WIDE_INT high
,
502 enum integer_type_kind itk
;
504 if ((flags
& CPP_N_WIDTH
) == CPP_N_SMALL
)
505 itk
= itk_unsigned_int
;
506 else if ((flags
& CPP_N_WIDTH
) == CPP_N_MEDIUM
)
507 itk
= itk_unsigned_long
;
509 itk
= itk_unsigned_long_long
;
511 for (; itk
< itk_none
; itk
+= 2 /* skip unsigned types */)
513 tree upper
= TYPE_MAX_VALUE (integer_types
[itk
]);
515 if ((unsigned HOST_WIDE_INT
) TREE_INT_CST_HIGH (upper
) > high
516 || ((unsigned HOST_WIDE_INT
) TREE_INT_CST_HIGH (upper
) == high
517 && TREE_INT_CST_LOW (upper
) >= low
))
524 /* Ditto, but narrowest signed type. */
525 static enum integer_type_kind
526 narrowest_signed_type (unsigned HOST_WIDE_INT low
,
527 unsigned HOST_WIDE_INT high
, unsigned int flags
)
529 enum integer_type_kind itk
;
531 if ((flags
& CPP_N_WIDTH
) == CPP_N_SMALL
)
533 else if ((flags
& CPP_N_WIDTH
) == CPP_N_MEDIUM
)
539 for (; itk
< itk_none
; itk
+= 2 /* skip signed types */)
541 tree upper
= TYPE_MAX_VALUE (integer_types
[itk
]);
543 if ((unsigned HOST_WIDE_INT
) TREE_INT_CST_HIGH (upper
) > high
544 || ((unsigned HOST_WIDE_INT
) TREE_INT_CST_HIGH (upper
) == high
545 && TREE_INT_CST_LOW (upper
) >= low
))
552 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
554 interpret_integer (const cpp_token
*token
, unsigned int flags
)
557 enum integer_type_kind itk
;
559 cpp_options
*options
= cpp_get_options (parse_in
);
561 integer
= cpp_interpret_integer (parse_in
, token
, flags
);
562 integer
= cpp_num_sign_extend (integer
, options
->precision
);
564 /* The type of a constant with a U suffix is straightforward. */
565 if (flags
& CPP_N_UNSIGNED
)
566 itk
= narrowest_unsigned_type (integer
.low
, integer
.high
, flags
);
569 /* The type of a potentially-signed integer constant varies
570 depending on the base it's in, the standard in use, and the
572 enum integer_type_kind itk_u
573 = narrowest_unsigned_type (integer
.low
, integer
.high
, flags
);
574 enum integer_type_kind itk_s
575 = narrowest_signed_type (integer
.low
, integer
.high
, flags
);
577 /* In both C89 and C99, octal and hex constants may be signed or
578 unsigned, whichever fits tighter. We do not warn about this
579 choice differing from the traditional choice, as the constant
580 is probably a bit pattern and either way will work. */
581 if ((flags
& CPP_N_RADIX
) != CPP_N_DECIMAL
)
582 itk
= MIN (itk_u
, itk_s
);
585 /* In C99, decimal constants are always signed.
586 In C89, decimal constants that don't fit in long have
587 undefined behavior; we try to make them unsigned long.
588 In GCC's extended C89, that last is true of decimal
589 constants that don't fit in long long, too. */
592 if (itk_s
> itk_u
&& itk_s
> itk_long
)
596 if (itk_u
< itk_unsigned_long
)
597 itk_u
= itk_unsigned_long
;
599 warning (0, "this decimal constant is unsigned only in ISO C90");
602 warning (OPT_Wtraditional
,
603 "this decimal constant would be unsigned in ISO C90");
609 /* cpplib has already issued a warning for overflow. */
610 type
= ((flags
& CPP_N_UNSIGNED
)
611 ? widest_unsigned_literal_type_node
612 : widest_integer_literal_type_node
);
614 type
= integer_types
[itk
];
616 if (itk
> itk_unsigned_long
617 && (flags
& CPP_N_WIDTH
) != CPP_N_LARGE
618 && !in_system_header
&& !flag_isoc99
)
619 pedwarn ("integer constant is too large for %qs type",
620 (flags
& CPP_N_UNSIGNED
) ? "unsigned long" : "long");
622 value
= build_int_cst_wide (type
, integer
.low
, integer
.high
);
624 /* Convert imaginary to a complex type. */
625 if (flags
& CPP_N_IMAGINARY
)
626 value
= build_complex (NULL_TREE
, build_int_cst (type
, 0), value
);
631 /* Interpret TOKEN, a floating point number with FLAGS as classified
634 interpret_float (const cpp_token
*token
, unsigned int flags
)
638 REAL_VALUE_TYPE real
;
642 /* Decode type based on width and properties. */
643 if (flags
& CPP_N_DFLOAT
)
644 if ((flags
& CPP_N_WIDTH
) == CPP_N_LARGE
)
645 type
= dfloat128_type_node
;
646 else if ((flags
& CPP_N_WIDTH
) == CPP_N_SMALL
)
647 type
= dfloat32_type_node
;
649 type
= dfloat64_type_node
;
651 if ((flags
& CPP_N_WIDTH
) == CPP_N_LARGE
)
652 type
= long_double_type_node
;
653 else if ((flags
& CPP_N_WIDTH
) == CPP_N_SMALL
654 || flag_single_precision_constant
)
655 type
= float_type_node
;
657 type
= double_type_node
;
659 /* Copy the constant to a nul-terminated buffer. If the constant
660 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
661 can't handle them. */
662 copylen
= token
->val
.str
.len
;
663 if (flags
& CPP_N_DFLOAT
)
667 if ((flags
& CPP_N_WIDTH
) != CPP_N_MEDIUM
)
668 /* Must be an F or L suffix. */
670 if (flags
& CPP_N_IMAGINARY
)
675 copy
= (char *) alloca (copylen
+ 1);
676 memcpy (copy
, token
->val
.str
.text
, copylen
);
677 copy
[copylen
] = '\0';
679 real_from_string3 (&real
, copy
, TYPE_MODE (type
));
681 /* Both C and C++ require a diagnostic for a floating constant
682 outside the range of representable values of its type. Since we
683 have __builtin_inf* to produce an infinity, it might now be
684 appropriate for this to be a mandatory pedwarn rather than
685 conditioned on -pedantic. */
686 if (REAL_VALUE_ISINF (real
) && pedantic
)
687 pedwarn ("floating constant exceeds range of %qT", type
);
689 /* Create a node with determined type and value. */
690 value
= build_real (type
, real
);
691 if (flags
& CPP_N_IMAGINARY
)
692 value
= build_complex (NULL_TREE
, convert (type
, integer_zero_node
), value
);
697 /* Convert a series of STRING and/or WSTRING tokens into a tree,
698 performing string constant concatenation. TOK is the first of
699 these. VALP is the location to write the string into. OBJC_STRING
700 indicates whether an '@' token preceded the incoming token.
701 Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
704 This is unfortunately more work than it should be. If any of the
705 strings in the series has an L prefix, the result is a wide string
706 (6.4.5p4). Whether or not the result is a wide string affects the
707 meaning of octal and hexadecimal escapes (6.4.4.4p6,9). But escape
708 sequences do not continue across the boundary between two strings in
709 a series (6.4.5p7), so we must not lose the boundaries. Therefore
710 cpp_interpret_string takes a vector of cpp_string structures, which
711 we must arrange to provide. */
713 static enum cpp_ttype
714 lex_string (const cpp_token
*tok
, tree
*valp
, bool objc_string
)
719 struct obstack str_ob
;
722 /* Try to avoid the overhead of creating and destroying an obstack
723 for the common case of just one string. */
724 cpp_string str
= tok
->val
.str
;
725 cpp_string
*strs
= &str
;
727 if (tok
->type
== CPP_WSTRING
)
731 tok
= cpp_get_token (parse_in
);
737 if (c_dialect_objc ())
754 gcc_obstack_init (&str_ob
);
755 obstack_grow (&str_ob
, &str
, sizeof (cpp_string
));
759 obstack_grow (&str_ob
, &tok
->val
.str
, sizeof (cpp_string
));
763 /* We have read one more token than we want. */
764 _cpp_backup_tokens (parse_in
, 1);
766 strs
= XOBFINISH (&str_ob
, cpp_string
*);
768 if (concats
&& !objc_string
&& !in_system_header
)
769 warning (OPT_Wtraditional
,
770 "traditional C rejects string constant concatenation");
772 if ((c_lex_string_translate
773 ? cpp_interpret_string
: cpp_interpret_string_notranslate
)
774 (parse_in
, strs
, concats
+ 1, &istr
, wide
))
776 value
= build_string (istr
.len
, (char *) istr
.text
);
777 free ((void *) istr
.text
);
779 if (c_lex_string_translate
== -1)
781 int xlated
= cpp_interpret_string_notranslate (parse_in
, strs
,
784 /* Assume that, if we managed to translate the string above,
785 then the untranslated parsing will always succeed. */
788 if (TREE_STRING_LENGTH (value
) != (int) istr
.len
789 || 0 != strncmp (TREE_STRING_POINTER (value
), (char *) istr
.text
,
792 /* Arrange for us to return the untranslated string in
793 *valp, but to set up the C type of the translated
795 *valp
= build_string (istr
.len
, (char *) istr
.text
);
796 valp
= &TREE_CHAIN (*valp
);
798 free ((void *) istr
.text
);
803 /* Callers cannot generally handle error_mark_node in this context,
804 so return the empty string instead. cpp_interpret_string has
807 value
= build_string (TYPE_PRECISION (wchar_type_node
)
808 / TYPE_PRECISION (char_type_node
),
809 "\0\0\0"); /* widest supported wchar_t
812 value
= build_string (1, "");
815 TREE_TYPE (value
) = wide
? wchar_array_type_node
: char_array_type_node
;
816 *valp
= fix_string_type (value
);
819 obstack_free (&str_ob
, 0);
821 return objc_string
? CPP_OBJC_STRING
: wide
? CPP_WSTRING
: CPP_STRING
;
824 /* Converts a (possibly wide) character constant token into a tree. */
826 lex_charconst (const cpp_token
*token
)
830 unsigned int chars_seen
;
833 result
= cpp_interpret_charconst (parse_in
, token
,
834 &chars_seen
, &unsignedp
);
836 if (token
->type
== CPP_WCHAR
)
837 type
= wchar_type_node
;
838 /* In C, a character constant has type 'int'.
839 In C++ 'char', but multi-char charconsts have type 'int'. */
840 else if (!c_dialect_cxx () || chars_seen
> 1)
841 type
= integer_type_node
;
843 type
= char_type_node
;
845 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
846 before possibly widening to HOST_WIDE_INT for build_int_cst. */
847 if (unsignedp
|| (cppchar_signed_t
) result
>= 0)
848 value
= build_int_cst_wide (type
, result
, 0);
850 value
= build_int_cst_wide (type
, (cppchar_signed_t
) result
, -1);