1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
41 #include "splay-tree.h"
44 /* We may keep statistics about how long which files took to compile. */
45 static int header_time
, body_time
;
46 static splay_tree file_info_tree
;
48 #undef WCHAR_TYPE_SIZE
49 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
51 /* Number of bytes in a wide character. */
52 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
54 int pending_lang_change
; /* If we need to switch languages - C++ only */
55 int c_header_level
; /* depth in C headers - C++ only */
57 /* If we need to translate characters received. This is tri-state:
58 0 means use only the untranslated string; 1 means use only
59 the translated string; -1 means chain the translated string
60 to the untranslated one. */
61 int c_lex_string_translate
= 1;
63 /* True if strings should be passed to the caller of c_lex completely
64 unmolested (no concatenation, no translation). */
65 bool c_lex_return_raw_strings
= false;
67 static tree
interpret_integer (const cpp_token
*, unsigned int);
68 static tree
interpret_float (const cpp_token
*, unsigned int);
69 static enum integer_type_kind narrowest_unsigned_type
70 (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
, unsigned int);
71 static enum integer_type_kind narrowest_signed_type
72 (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
, unsigned int);
73 static enum cpp_ttype
lex_string (const cpp_token
*, tree
*, bool);
74 static tree
lex_charconst (const cpp_token
*);
75 static void update_header_times (const char *);
76 static int dump_one_header (splay_tree_node
, void *);
77 static void cb_line_change (cpp_reader
*, const cpp_token
*, int);
78 static void cb_ident (cpp_reader
*, unsigned int, const cpp_string
*);
79 static void cb_def_pragma (cpp_reader
*, unsigned int);
80 static void cb_define (cpp_reader
*, unsigned int, cpp_hashnode
*);
81 static void cb_undef (cpp_reader
*, unsigned int, cpp_hashnode
*);
86 struct cpp_callbacks
*cb
;
87 struct c_fileinfo
*toplevel
;
89 /* The get_fileinfo data structure must be initialized before
90 cpp_read_main_file is called. */
91 toplevel
= get_fileinfo ("<top level>");
92 if (flag_detailed_statistics
)
95 body_time
= get_run_time ();
96 toplevel
->time
= body_time
;
99 cb
= cpp_get_callbacks (parse_in
);
101 cb
->line_change
= cb_line_change
;
102 cb
->ident
= cb_ident
;
103 cb
->def_pragma
= cb_def_pragma
;
104 cb
->valid_pch
= c_common_valid_pch
;
105 cb
->read_pch
= c_common_read_pch
;
107 /* Set the debug callbacks if we can use them. */
108 if (debug_info_level
== DINFO_LEVEL_VERBOSE
109 && (write_symbols
== DWARF2_DEBUG
110 || write_symbols
== VMS_AND_DWARF2_DEBUG
))
112 cb
->define
= cb_define
;
113 cb
->undef
= cb_undef
;
118 get_fileinfo (const char *name
)
121 struct c_fileinfo
*fi
;
124 file_info_tree
= splay_tree_new ((splay_tree_compare_fn
) strcmp
,
126 (splay_tree_delete_value_fn
) free
);
128 n
= splay_tree_lookup (file_info_tree
, (splay_tree_key
) name
);
130 return (struct c_fileinfo
*) n
->value
;
132 fi
= XNEW (struct c_fileinfo
);
134 fi
->interface_only
= 0;
135 fi
->interface_unknown
= 1;
136 splay_tree_insert (file_info_tree
, (splay_tree_key
) name
,
137 (splay_tree_value
) fi
);
142 update_header_times (const char *name
)
144 /* Changing files again. This means currently collected time
145 is charged against header time, and body time starts back at 0. */
146 if (flag_detailed_statistics
)
148 int this_time
= get_run_time ();
149 struct c_fileinfo
*file
= get_fileinfo (name
);
150 header_time
+= this_time
- body_time
;
151 file
->time
+= this_time
- body_time
;
152 body_time
= this_time
;
157 dump_one_header (splay_tree_node n
, void * ARG_UNUSED (dummy
))
159 print_time ((const char *) n
->key
,
160 ((struct c_fileinfo
*) n
->value
)->time
);
165 dump_time_statistics (void)
167 struct c_fileinfo
*file
= get_fileinfo (input_filename
);
168 int this_time
= get_run_time ();
169 file
->time
+= this_time
- body_time
;
171 fprintf (stderr
, "\n******\n");
172 print_time ("header files (total)", header_time
);
173 print_time ("main file (total)", this_time
- body_time
);
174 fprintf (stderr
, "ratio = %g : 1\n",
175 (double) header_time
/ (double) (this_time
- body_time
));
176 fprintf (stderr
, "\n******\n");
178 splay_tree_foreach (file_info_tree
, dump_one_header
, 0);
182 cb_ident (cpp_reader
* ARG_UNUSED (pfile
),
183 unsigned int ARG_UNUSED (line
),
184 const cpp_string
* ARG_UNUSED (str
))
186 #ifdef ASM_OUTPUT_IDENT
189 /* Convert escapes in the string. */
190 cpp_string cstr
= { 0, 0 };
191 if (cpp_interpret_string (pfile
, str
, 1, &cstr
, false))
193 ASM_OUTPUT_IDENT (asm_out_file
, (const char *) cstr
.text
);
194 free ((void *) cstr
.text
);
200 /* Called at the start of every non-empty line. TOKEN is the first
201 lexed token on the line. Used for diagnostic line numbers. */
203 cb_line_change (cpp_reader
* ARG_UNUSED (pfile
), const cpp_token
*token
,
206 if (token
->type
!= CPP_EOF
&& !parsing_args
)
207 #ifdef USE_MAPPED_LOCATION
208 input_location
= token
->src_loc
;
211 source_location loc
= token
->src_loc
;
212 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
213 input_line
= SOURCE_LINE (map
, loc
);
219 fe_file_change (const struct line_map
*new_map
)
224 if (new_map
->reason
== LC_ENTER
)
226 /* Don't stack the main buffer on the input stack;
227 we already did in compile_file. */
228 if (!MAIN_FILE_P (new_map
))
230 #ifdef USE_MAPPED_LOCATION
231 int included_at
= LAST_SOURCE_LINE_LOCATION (new_map
- 1);
233 input_location
= included_at
;
234 push_srcloc (new_map
->start_location
);
236 int included_at
= LAST_SOURCE_LINE (new_map
- 1);
238 input_line
= included_at
;
239 push_srcloc (new_map
->to_file
, 1);
241 (*debug_hooks
->start_source_file
) (included_at
, new_map
->to_file
);
242 #ifndef NO_IMPLICIT_EXTERN_C
245 else if (new_map
->sysp
== 2)
248 ++pending_lang_change
;
253 else if (new_map
->reason
== LC_LEAVE
)
255 #ifndef NO_IMPLICIT_EXTERN_C
256 if (c_header_level
&& --c_header_level
== 0)
258 if (new_map
->sysp
== 2)
259 warning ("badly nested C headers from preprocessor");
260 --pending_lang_change
;
265 (*debug_hooks
->end_source_file
) (new_map
->to_line
);
268 update_header_times (new_map
->to_file
);
269 in_system_header
= new_map
->sysp
!= 0;
270 #ifdef USE_MAPPED_LOCATION
271 input_location
= new_map
->start_location
;
273 input_filename
= new_map
->to_file
;
274 input_line
= new_map
->to_line
;
279 cb_def_pragma (cpp_reader
*pfile
, source_location loc
)
281 /* Issue a warning message if we have been asked to do so. Ignore
282 unknown pragmas in system headers unless an explicit
283 -Wunknown-pragmas has been given. */
284 if (warn_unknown_pragmas
> in_system_header
)
286 #ifndef USE_MAPPED_LOCATION
287 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
289 const unsigned char *space
, *name
;
292 space
= name
= (const unsigned char *) "";
293 s
= cpp_get_token (pfile
);
294 if (s
->type
!= CPP_EOF
)
296 space
= cpp_token_as_text (pfile
, s
);
297 s
= cpp_get_token (pfile
);
298 if (s
->type
== CPP_NAME
)
299 name
= cpp_token_as_text (pfile
, s
);
302 #ifdef USE_MAPPED_LOCATION
303 input_location
= loc
;
305 input_line
= SOURCE_LINE (map
, loc
);
307 warning ("ignoring #pragma %s %s", space
, name
);
311 /* #define callback for DWARF and DWARF2 debug info. */
313 cb_define (cpp_reader
*pfile
, source_location loc
, cpp_hashnode
*node
)
315 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
316 (*debug_hooks
->define
) (SOURCE_LINE (map
, loc
),
317 (const char *) cpp_macro_definition (pfile
, node
));
320 /* #undef callback for DWARF and DWARF2 debug info. */
322 cb_undef (cpp_reader
* ARG_UNUSED (pfile
), source_location loc
,
325 const struct line_map
*map
= linemap_lookup (&line_table
, loc
);
326 (*debug_hooks
->undef
) (SOURCE_LINE (map
, loc
),
327 (const char *) NODE_NAME (node
));
330 static inline const cpp_token
*
331 get_nonpadding_token (void)
333 const cpp_token
*tok
;
334 timevar_push (TV_CPP
);
336 tok
= cpp_get_token (parse_in
);
337 while (tok
->type
== CPP_PADDING
);
338 timevar_pop (TV_CPP
);
344 c_lex_with_flags (tree
*value
, unsigned char *cpp_flags
)
346 const cpp_token
*tok
;
348 static bool no_more_pch
;
351 tok
= get_nonpadding_token ();
357 *value
= HT_IDENT_TO_GCC_IDENT (HT_NODE (tok
->val
.node
));
362 unsigned int flags
= cpp_classify_number (parse_in
, tok
);
364 switch (flags
& CPP_N_CATEGORY
)
367 /* cpplib has issued an error. */
368 *value
= error_mark_node
;
372 *value
= interpret_integer (tok
, flags
);
376 *value
= interpret_float (tok
, flags
);
386 /* An @ may give the next token special significance in Objective-C. */
387 atloc
= input_location
;
388 tok
= get_nonpadding_token ();
389 if (c_dialect_objc ())
395 val
= HT_IDENT_TO_GCC_IDENT (HT_NODE (tok
->val
.node
));
396 if (objc_is_reserved_word (val
))
405 return lex_string (tok
, value
, true);
412 error ("%Hstray '@' in program", &atloc
);
417 cppchar_t c
= tok
->val
.str
.text
[0];
419 if (c
== '"' || c
== '\'')
420 error ("missing terminating %c character", (int) c
);
421 else if (ISGRAPH (c
))
422 error ("stray '%c' in program", (int) c
);
424 error ("stray '\\%o' in program", (int) c
);
430 *value
= lex_charconst (tok
);
435 if (!c_lex_return_raw_strings
)
436 return lex_string (tok
, value
, false);
437 /* else fall through */
440 *value
= build_string (tok
->val
.str
.len
, (char *) tok
->val
.str
.text
);
443 /* These tokens should not be visible outside cpplib. */
444 case CPP_HEADER_NAME
:
457 c_common_no_more_pch ();
461 *cpp_flags
= tok
->flags
;
468 return c_lex_with_flags (value
, NULL
);
471 /* Returns the narrowest C-visible unsigned type, starting with the
472 minimum specified by FLAGS, that can fit HIGH:LOW, or itk_none if
475 static enum integer_type_kind
476 narrowest_unsigned_type (unsigned HOST_WIDE_INT low
,
477 unsigned HOST_WIDE_INT high
,
480 enum integer_type_kind itk
;
482 if ((flags
& CPP_N_WIDTH
) == CPP_N_SMALL
)
483 itk
= itk_unsigned_int
;
484 else if ((flags
& CPP_N_WIDTH
) == CPP_N_MEDIUM
)
485 itk
= itk_unsigned_long
;
487 itk
= itk_unsigned_long_long
;
489 for (; itk
< itk_none
; itk
+= 2 /* skip unsigned types */)
491 tree upper
= TYPE_MAX_VALUE (integer_types
[itk
]);
493 if ((unsigned HOST_WIDE_INT
) TREE_INT_CST_HIGH (upper
) > high
494 || ((unsigned HOST_WIDE_INT
) TREE_INT_CST_HIGH (upper
) == high
495 && TREE_INT_CST_LOW (upper
) >= low
))
502 /* Ditto, but narrowest signed type. */
503 static enum integer_type_kind
504 narrowest_signed_type (unsigned HOST_WIDE_INT low
,
505 unsigned HOST_WIDE_INT high
, unsigned int flags
)
507 enum integer_type_kind itk
;
509 if ((flags
& CPP_N_WIDTH
) == CPP_N_SMALL
)
511 else if ((flags
& CPP_N_WIDTH
) == CPP_N_MEDIUM
)
517 for (; itk
< itk_none
; itk
+= 2 /* skip signed types */)
519 tree upper
= TYPE_MAX_VALUE (integer_types
[itk
]);
521 if ((unsigned HOST_WIDE_INT
) TREE_INT_CST_HIGH (upper
) > high
522 || ((unsigned HOST_WIDE_INT
) TREE_INT_CST_HIGH (upper
) == high
523 && TREE_INT_CST_LOW (upper
) >= low
))
530 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
532 interpret_integer (const cpp_token
*token
, unsigned int flags
)
535 enum integer_type_kind itk
;
537 cpp_options
*options
= cpp_get_options (parse_in
);
539 integer
= cpp_interpret_integer (parse_in
, token
, flags
);
540 integer
= cpp_num_sign_extend (integer
, options
->precision
);
542 /* The type of a constant with a U suffix is straightforward. */
543 if (flags
& CPP_N_UNSIGNED
)
544 itk
= narrowest_unsigned_type (integer
.low
, integer
.high
, flags
);
547 /* The type of a potentially-signed integer constant varies
548 depending on the base it's in, the standard in use, and the
550 enum integer_type_kind itk_u
551 = narrowest_unsigned_type (integer
.low
, integer
.high
, flags
);
552 enum integer_type_kind itk_s
553 = narrowest_signed_type (integer
.low
, integer
.high
, flags
);
555 /* In both C89 and C99, octal and hex constants may be signed or
556 unsigned, whichever fits tighter. We do not warn about this
557 choice differing from the traditional choice, as the constant
558 is probably a bit pattern and either way will work. */
559 if ((flags
& CPP_N_RADIX
) != CPP_N_DECIMAL
)
560 itk
= MIN (itk_u
, itk_s
);
563 /* In C99, decimal constants are always signed.
564 In C89, decimal constants that don't fit in long have
565 undefined behavior; we try to make them unsigned long.
566 In GCC's extended C89, that last is true of decimal
567 constants that don't fit in long long, too. */
570 if (itk_s
> itk_u
&& itk_s
> itk_long
)
574 if (itk_u
< itk_unsigned_long
)
575 itk_u
= itk_unsigned_long
;
577 warning ("this decimal constant is unsigned only in ISO C90");
579 else if (warn_traditional
)
580 warning ("this decimal constant would be unsigned in ISO C90");
586 /* cpplib has already issued a warning for overflow. */
587 type
= ((flags
& CPP_N_UNSIGNED
)
588 ? widest_unsigned_literal_type_node
589 : widest_integer_literal_type_node
);
591 type
= integer_types
[itk
];
593 if (itk
> itk_unsigned_long
594 && (flags
& CPP_N_WIDTH
) != CPP_N_LARGE
595 && !in_system_header
&& !flag_isoc99
)
596 pedwarn ("integer constant is too large for %qs type",
597 (flags
& CPP_N_UNSIGNED
) ? "unsigned long" : "long");
599 value
= build_int_cst_wide (type
, integer
.low
, integer
.high
);
601 /* Convert imaginary to a complex type. */
602 if (flags
& CPP_N_IMAGINARY
)
603 value
= build_complex (NULL_TREE
, build_int_cst (type
, 0), value
);
608 /* Interpret TOKEN, a floating point number with FLAGS as classified
611 interpret_float (const cpp_token
*token
, unsigned int flags
)
615 REAL_VALUE_TYPE real
;
618 const char *type_name
;
620 /* FIXME: make %T work in error/warning, then we don't need type_name. */
621 if ((flags
& CPP_N_WIDTH
) == CPP_N_LARGE
)
623 type
= long_double_type_node
;
624 type_name
= "long double";
626 else if ((flags
& CPP_N_WIDTH
) == CPP_N_SMALL
627 || flag_single_precision_constant
)
629 type
= float_type_node
;
634 type
= double_type_node
;
635 type_name
= "double";
638 /* Copy the constant to a nul-terminated buffer. If the constant
639 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
640 can't handle them. */
641 copylen
= token
->val
.str
.len
;
642 if ((flags
& CPP_N_WIDTH
) != CPP_N_MEDIUM
)
643 /* Must be an F or L suffix. */
645 if (flags
& CPP_N_IMAGINARY
)
649 copy
= (char *) alloca (copylen
+ 1);
650 memcpy (copy
, token
->val
.str
.text
, copylen
);
651 copy
[copylen
] = '\0';
653 real_from_string (&real
, copy
);
654 real_convert (&real
, TYPE_MODE (type
), &real
);
656 /* A diagnostic is required for "soft" overflow by some ISO C
657 testsuites. This is not pedwarn, because some people don't want
659 ??? That's a dubious reason... is this a mandatory diagnostic or
660 isn't it? -- zw, 2001-08-21. */
661 if (REAL_VALUE_ISINF (real
) && pedantic
)
662 warning ("floating constant exceeds range of %<%s%>", type_name
);
664 /* Create a node with determined type and value. */
665 value
= build_real (type
, real
);
666 if (flags
& CPP_N_IMAGINARY
)
667 value
= build_complex (NULL_TREE
, convert (type
, integer_zero_node
), value
);
672 /* Convert a series of STRING and/or WSTRING tokens into a tree,
673 performing string constant concatenation. TOK is the first of
674 these. VALP is the location to write the string into. OBJC_STRING
675 indicates whether an '@' token preceded the incoming token.
676 Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
679 This is unfortunately more work than it should be. If any of the
680 strings in the series has an L prefix, the result is a wide string
681 (6.4.5p4). Whether or not the result is a wide string affects the
682 meaning of octal and hexadecimal escapes (6.4.4.4p6,9). But escape
683 sequences do not continue across the boundary between two strings in
684 a series (6.4.5p7), so we must not lose the boundaries. Therefore
685 cpp_interpret_string takes a vector of cpp_string structures, which
686 we must arrange to provide. */
688 static enum cpp_ttype
689 lex_string (const cpp_token
*tok
, tree
*valp
, bool objc_string
)
694 struct obstack str_ob
;
697 /* Try to avoid the overhead of creating and destroying an obstack
698 for the common case of just one string. */
699 cpp_string str
= tok
->val
.str
;
700 cpp_string
*strs
= &str
;
702 if (tok
->type
== CPP_WSTRING
)
705 tok
= get_nonpadding_token ();
706 if (c_dialect_objc () && tok
->type
== CPP_ATSIGN
)
709 tok
= get_nonpadding_token ();
711 if (tok
->type
== CPP_STRING
|| tok
->type
== CPP_WSTRING
)
713 gcc_obstack_init (&str_ob
);
714 obstack_grow (&str_ob
, &str
, sizeof (cpp_string
));
719 if (tok
->type
== CPP_WSTRING
)
721 obstack_grow (&str_ob
, &tok
->val
.str
, sizeof (cpp_string
));
723 tok
= get_nonpadding_token ();
724 if (c_dialect_objc () && tok
->type
== CPP_ATSIGN
)
727 tok
= get_nonpadding_token ();
730 while (tok
->type
== CPP_STRING
|| tok
->type
== CPP_WSTRING
);
731 strs
= (cpp_string
*) obstack_finish (&str_ob
);
734 /* We have read one more token than we want. */
735 _cpp_backup_tokens (parse_in
, 1);
737 if (count
> 1 && !objc_string
&& warn_traditional
&& !in_system_header
)
738 warning ("traditional C rejects string constant concatenation");
740 if ((c_lex_string_translate
741 ? cpp_interpret_string
: cpp_interpret_string_notranslate
)
742 (parse_in
, strs
, count
, &istr
, wide
))
744 value
= build_string (istr
.len
, (char *) istr
.text
);
745 free ((void *) istr
.text
);
747 if (c_lex_string_translate
== -1)
749 int xlated
= cpp_interpret_string_notranslate (parse_in
, strs
, count
,
751 /* Assume that, if we managed to translate the string above,
752 then the untranslated parsing will always succeed. */
755 if (TREE_STRING_LENGTH (value
) != (int) istr
.len
756 || 0 != strncmp (TREE_STRING_POINTER (value
), (char *) istr
.text
,
759 /* Arrange for us to return the untranslated string in
760 *valp, but to set up the C type of the translated
762 *valp
= build_string (istr
.len
, (char *) istr
.text
);
763 valp
= &TREE_CHAIN (*valp
);
765 free ((void *) istr
.text
);
770 /* Callers cannot generally handle error_mark_node in this context,
771 so return the empty string instead. cpp_interpret_string has
774 value
= build_string (TYPE_PRECISION (wchar_type_node
)
775 / TYPE_PRECISION (char_type_node
),
776 "\0\0\0"); /* widest supported wchar_t
779 value
= build_string (1, "");
782 TREE_TYPE (value
) = wide
? wchar_array_type_node
: char_array_type_node
;
783 *valp
= fix_string_type (value
);
786 obstack_free (&str_ob
, 0);
788 return objc_string
? CPP_OBJC_STRING
: wide
? CPP_WSTRING
: CPP_STRING
;
791 /* Converts a (possibly wide) character constant token into a tree. */
793 lex_charconst (const cpp_token
*token
)
797 unsigned int chars_seen
;
800 result
= cpp_interpret_charconst (parse_in
, token
,
801 &chars_seen
, &unsignedp
);
803 if (token
->type
== CPP_WCHAR
)
804 type
= wchar_type_node
;
805 /* In C, a character constant has type 'int'.
806 In C++ 'char', but multi-char charconsts have type 'int'. */
807 else if (!c_dialect_cxx () || chars_seen
> 1)
808 type
= integer_type_node
;
810 type
= char_type_node
;
812 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
813 before possibly widening to HOST_WIDE_INT for build_int_cst. */
814 if (unsignedp
|| (cppchar_signed_t
) result
>= 0)
815 value
= build_int_cst_wide (type
, result
, 0);
817 value
= build_int_cst_wide (type
, (cppchar_signed_t
) result
, -1);