include/ChangeLog:
[official-gcc.git] / gcc / c-lex.c
blob2cca2313c2fe0640c86ac6c6829c7c1378aa7952
1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
27 #include "real.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "expr.h"
31 #include "input.h"
32 #include "output.h"
33 #include "c-tree.h"
34 #include "c-common.h"
35 #include "flags.h"
36 #include "timevar.h"
37 #include "cpplib.h"
38 #include "c-pragma.h"
39 #include "toplev.h"
40 #include "intl.h"
41 #include "tm_p.h"
42 #include "splay-tree.h"
43 #include "debug.h"
45 /* The current line map. */
46 static const struct line_map *map;
48 /* The line used to refresh the lineno global variable after each token. */
49 static unsigned int src_lineno;
51 /* We may keep statistics about how long which files took to compile. */
52 static int header_time, body_time;
53 static splay_tree file_info_tree;
55 #undef WCHAR_TYPE_SIZE
56 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
58 /* Number of bytes in a wide character. */
59 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
61 int pending_lang_change; /* If we need to switch languages - C++ only */
62 int c_header_level; /* depth in C headers - C++ only */
64 /* Nonzero tells yylex to ignore \ in string constants. */
65 static int ignore_escape_flag;
67 static tree interpret_integer (const cpp_token *, unsigned int);
68 static tree interpret_float (const cpp_token *, unsigned int);
69 static enum integer_type_kind
70 narrowest_unsigned_type (tree, unsigned int);
71 static enum integer_type_kind
72 narrowest_signed_type (tree, unsigned int);
73 static tree lex_string (const cpp_string *);
74 static tree lex_charconst (const cpp_token *);
75 static void update_header_times (const char *);
76 static int dump_one_header (splay_tree_node, void *);
77 static void cb_line_change (cpp_reader *, const cpp_token *, int);
78 static void cb_ident (cpp_reader *, unsigned int, const cpp_string *);
79 static void cb_def_pragma (cpp_reader *, unsigned int);
80 static void cb_define (cpp_reader *, unsigned int, cpp_hashnode *);
81 static void cb_undef (cpp_reader *, unsigned int, cpp_hashnode *);
83 void
84 init_c_lex (void)
86 struct cpp_callbacks *cb;
87 struct c_fileinfo *toplevel;
89 /* Set up filename timing. Must happen before cpp_read_main_file. */
90 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
92 (splay_tree_delete_value_fn)free);
93 toplevel = get_fileinfo ("<top level>");
94 if (flag_detailed_statistics)
96 header_time = 0;
97 body_time = get_run_time ();
98 toplevel->time = body_time;
101 cb = cpp_get_callbacks (parse_in);
103 cb->line_change = cb_line_change;
104 cb->ident = cb_ident;
105 cb->def_pragma = cb_def_pragma;
106 cb->valid_pch = c_common_valid_pch;
107 cb->read_pch = c_common_read_pch;
109 /* Set the debug callbacks if we can use them. */
110 if (debug_info_level == DINFO_LEVEL_VERBOSE
111 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
112 || write_symbols == VMS_AND_DWARF2_DEBUG))
114 cb->define = cb_define;
115 cb->undef = cb_undef;
119 struct c_fileinfo *
120 get_fileinfo (const char *name)
122 splay_tree_node n;
123 struct c_fileinfo *fi;
125 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
126 if (n)
127 return (struct c_fileinfo *) n->value;
129 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
130 fi->time = 0;
131 fi->interface_only = 0;
132 fi->interface_unknown = 1;
133 splay_tree_insert (file_info_tree, (splay_tree_key) name,
134 (splay_tree_value) fi);
135 return fi;
138 static void
139 update_header_times (const char *name)
141 /* Changing files again. This means currently collected time
142 is charged against header time, and body time starts back at 0. */
143 if (flag_detailed_statistics)
145 int this_time = get_run_time ();
146 struct c_fileinfo *file = get_fileinfo (name);
147 header_time += this_time - body_time;
148 file->time += this_time - body_time;
149 body_time = this_time;
153 static int
154 dump_one_header (splay_tree_node n, void *dummy ATTRIBUTE_UNUSED)
156 print_time ((const char *) n->key,
157 ((struct c_fileinfo *) n->value)->time);
158 return 0;
161 void
162 dump_time_statistics (void)
164 struct c_fileinfo *file = get_fileinfo (input_filename);
165 int this_time = get_run_time ();
166 file->time += this_time - body_time;
168 fprintf (stderr, "\n******\n");
169 print_time ("header files (total)", header_time);
170 print_time ("main file (total)", this_time - body_time);
171 fprintf (stderr, "ratio = %g : 1\n",
172 (double)header_time / (double)(this_time - body_time));
173 fprintf (stderr, "\n******\n");
175 splay_tree_foreach (file_info_tree, dump_one_header, 0);
178 static void
179 cb_ident (cpp_reader *pfile ATTRIBUTE_UNUSED,
180 unsigned int line ATTRIBUTE_UNUSED,
181 const cpp_string *str ATTRIBUTE_UNUSED)
183 #ifdef ASM_OUTPUT_IDENT
184 if (! flag_no_ident)
186 /* Convert escapes in the string. */
187 tree value ATTRIBUTE_UNUSED = lex_string (str);
188 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
190 #endif
193 /* Called at the start of every non-empty line. TOKEN is the first
194 lexed token on the line. Used for diagnostic line numbers. */
195 static void
196 cb_line_change (cpp_reader *pfile ATTRIBUTE_UNUSED, const cpp_token *token,
197 int parsing_args ATTRIBUTE_UNUSED)
199 src_lineno = SOURCE_LINE (map, token->line);
202 void
203 fe_file_change (const struct line_map *new_map)
205 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
207 if (new_map->reason == LC_ENTER)
209 /* Don't stack the main buffer on the input stack;
210 we already did in compile_file. */
211 if (map == NULL)
212 main_input_filename = new_map->to_file;
213 else
215 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
217 input_line = included_at;
218 push_srcloc (new_map->to_file, 1);
219 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
220 #ifndef NO_IMPLICIT_EXTERN_C
221 if (c_header_level)
222 ++c_header_level;
223 else if (new_map->sysp == 2)
225 c_header_level = 1;
226 ++pending_lang_change;
228 #endif
231 else if (new_map->reason == LC_LEAVE)
233 #ifndef NO_IMPLICIT_EXTERN_C
234 if (c_header_level && --c_header_level == 0)
236 if (new_map->sysp == 2)
237 warning ("badly nested C headers from preprocessor");
238 --pending_lang_change;
240 #endif
241 pop_srcloc ();
243 (*debug_hooks->end_source_file) (to_line);
246 update_header_times (new_map->to_file);
247 in_system_header = new_map->sysp != 0;
248 input_filename = new_map->to_file;
249 input_line = to_line;
250 map = new_map;
252 /* Hook for C++. */
253 extract_interface_info ();
256 static void
257 cb_def_pragma (cpp_reader *pfile, unsigned int line)
259 /* Issue a warning message if we have been asked to do so. Ignore
260 unknown pragmas in system headers unless an explicit
261 -Wunknown-pragmas has been given. */
262 if (warn_unknown_pragmas > in_system_header)
264 const unsigned char *space, *name;
265 const cpp_token *s;
267 space = name = (const unsigned char *) "";
268 s = cpp_get_token (pfile);
269 if (s->type != CPP_EOF)
271 space = cpp_token_as_text (pfile, s);
272 s = cpp_get_token (pfile);
273 if (s->type == CPP_NAME)
274 name = cpp_token_as_text (pfile, s);
277 input_line = SOURCE_LINE (map, line);
278 warning ("ignoring #pragma %s %s", space, name);
282 /* #define callback for DWARF and DWARF2 debug info. */
283 static void
284 cb_define (cpp_reader *pfile, unsigned int line, cpp_hashnode *node)
286 (*debug_hooks->define) (SOURCE_LINE (map, line),
287 (const char *) cpp_macro_definition (pfile, node));
290 /* #undef callback for DWARF and DWARF2 debug info. */
291 static void
292 cb_undef (cpp_reader *pfile ATTRIBUTE_UNUSED, unsigned int line,
293 cpp_hashnode *node)
295 (*debug_hooks->undef) (SOURCE_LINE (map, line),
296 (const char *) NODE_NAME (node));
300 c_lex (tree *value)
302 const cpp_token *tok;
304 retry:
305 timevar_push (TV_CPP);
307 tok = cpp_get_token (parse_in);
308 while (tok->type == CPP_PADDING);
309 timevar_pop (TV_CPP);
311 /* The C++ front end does horrible things with the current line
312 number. To ensure an accurate line number, we must reset it
313 every time we return a token. */
314 input_line = src_lineno;
316 *value = NULL_TREE;
317 switch (tok->type)
319 case CPP_NAME:
320 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
321 break;
323 case CPP_NUMBER:
325 unsigned int flags = cpp_classify_number (parse_in, tok);
327 switch (flags & CPP_N_CATEGORY)
329 case CPP_N_INVALID:
330 /* cpplib has issued an error. */
331 *value = error_mark_node;
332 break;
334 case CPP_N_INTEGER:
335 *value = interpret_integer (tok, flags);
336 break;
338 case CPP_N_FLOATING:
339 *value = interpret_float (tok, flags);
340 break;
342 default:
343 abort ();
346 break;
348 case CPP_OTHER:
350 cppchar_t c = tok->val.str.text[0];
352 if (c == '"' || c == '\'')
353 error ("missing terminating %c character", (int) c);
354 else if (ISGRAPH (c))
355 error ("stray '%c' in program", (int) c);
356 else
357 error ("stray '\\%o' in program", (int) c);
359 goto retry;
361 case CPP_CHAR:
362 case CPP_WCHAR:
363 *value = lex_charconst (tok);
364 break;
366 case CPP_STRING:
367 case CPP_WSTRING:
368 *value = lex_string (&tok->val.str);
369 break;
371 /* These tokens should not be visible outside cpplib. */
372 case CPP_HEADER_NAME:
373 case CPP_COMMENT:
374 case CPP_MACRO_ARG:
375 abort ();
377 default: break;
380 return tok->type;
383 /* Returns the narrowest C-visible unsigned type, starting with the
384 minimum specified by FLAGS, that can fit VALUE, or itk_none if
385 there isn't one. */
386 static enum integer_type_kind
387 narrowest_unsigned_type (tree value, unsigned int flags)
389 enum integer_type_kind itk;
391 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
392 itk = itk_unsigned_int;
393 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
394 itk = itk_unsigned_long;
395 else
396 itk = itk_unsigned_long_long;
398 /* int_fits_type_p must think the type of its first argument is
399 wider than its second argument, or it won't do the proper check. */
400 TREE_TYPE (value) = widest_unsigned_literal_type_node;
402 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
403 if (int_fits_type_p (value, integer_types[itk]))
404 return itk;
406 return itk_none;
409 /* Ditto, but narrowest signed type. */
410 static enum integer_type_kind
411 narrowest_signed_type (tree value, unsigned int flags)
413 enum integer_type_kind itk;
415 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
416 itk = itk_int;
417 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
418 itk = itk_long;
419 else
420 itk = itk_long_long;
422 /* int_fits_type_p must think the type of its first argument is
423 wider than its second argument, or it won't do the proper check. */
424 TREE_TYPE (value) = widest_unsigned_literal_type_node;
426 for (; itk < itk_none; itk += 2 /* skip signed types */)
427 if (int_fits_type_p (value, integer_types[itk]))
428 return itk;
430 return itk_none;
433 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
434 static tree
435 interpret_integer (const cpp_token *token, unsigned int flags)
437 tree value, type;
438 enum integer_type_kind itk;
439 cpp_num integer;
440 cpp_options *options = cpp_get_options (parse_in);
442 integer = cpp_interpret_integer (parse_in, token, flags);
443 integer = cpp_num_sign_extend (integer, options->precision);
444 value = build_int_2_wide (integer.low, integer.high);
446 /* The type of a constant with a U suffix is straightforward. */
447 if (flags & CPP_N_UNSIGNED)
448 itk = narrowest_unsigned_type (value, flags);
449 else
451 /* The type of a potentially-signed integer constant varies
452 depending on the base it's in, the standard in use, and the
453 length suffixes. */
454 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
455 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
457 /* In both C89 and C99, octal and hex constants may be signed or
458 unsigned, whichever fits tighter. We do not warn about this
459 choice differing from the traditional choice, as the constant
460 is probably a bit pattern and either way will work. */
461 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
462 itk = MIN (itk_u, itk_s);
463 else
465 /* In C99, decimal constants are always signed.
466 In C89, decimal constants that don't fit in long have
467 undefined behavior; we try to make them unsigned long.
468 In GCC's extended C89, that last is true of decimal
469 constants that don't fit in long long, too. */
471 itk = itk_s;
472 if (itk_s > itk_u && itk_s > itk_long)
474 if (!flag_isoc99)
476 if (itk_u < itk_unsigned_long)
477 itk_u = itk_unsigned_long;
478 itk = itk_u;
479 warning ("this decimal constant is unsigned only in ISO C90");
481 else if (warn_traditional)
482 warning ("this decimal constant would be unsigned in ISO C90");
487 if (itk == itk_none)
488 /* cpplib has already issued a warning for overflow. */
489 type = ((flags & CPP_N_UNSIGNED)
490 ? widest_unsigned_literal_type_node
491 : widest_integer_literal_type_node);
492 else
493 type = integer_types[itk];
495 if (itk > itk_unsigned_long
496 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
497 && ! in_system_header && ! flag_isoc99)
498 pedwarn ("integer constant is too large for \"%s\" type",
499 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
501 TREE_TYPE (value) = type;
503 /* Convert imaginary to a complex type. */
504 if (flags & CPP_N_IMAGINARY)
505 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
507 return value;
510 /* Interpret TOKEN, a floating point number with FLAGS as classified
511 by cpplib. */
512 static tree
513 interpret_float (const cpp_token *token, unsigned int flags)
515 tree type;
516 tree value;
517 REAL_VALUE_TYPE real;
518 char *copy;
519 size_t copylen;
520 const char *typename;
522 /* FIXME: make %T work in error/warning, then we don't need typename. */
523 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
525 type = long_double_type_node;
526 typename = "long double";
528 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
529 || flag_single_precision_constant)
531 type = float_type_node;
532 typename = "float";
534 else
536 type = double_type_node;
537 typename = "double";
540 /* Copy the constant to a nul-terminated buffer. If the constant
541 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
542 can't handle them. */
543 copylen = token->val.str.len;
544 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
545 /* Must be an F or L suffix. */
546 copylen--;
547 if (flags & CPP_N_IMAGINARY)
548 /* I or J suffix. */
549 copylen--;
551 copy = alloca (copylen + 1);
552 memcpy (copy, token->val.str.text, copylen);
553 copy[copylen] = '\0';
555 real_from_string (&real, copy);
556 real_convert (&real, TYPE_MODE (type), &real);
558 /* A diagnostic is required for "soft" overflow by some ISO C
559 testsuites. This is not pedwarn, because some people don't want
560 an error for this.
561 ??? That's a dubious reason... is this a mandatory diagnostic or
562 isn't it? -- zw, 2001-08-21. */
563 if (REAL_VALUE_ISINF (real) && pedantic)
564 warning ("floating constant exceeds range of \"%s\"", typename);
566 /* Create a node with determined type and value. */
567 value = build_real (type, real);
568 if (flags & CPP_N_IMAGINARY)
569 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
571 return value;
574 static tree
575 lex_string (const cpp_string *str)
577 bool wide;
578 tree value;
579 char *buf, *q;
580 cppchar_t c;
581 const unsigned char *p, *limit;
583 wide = str->text[0] == 'L';
584 p = str->text + 1 + wide;
585 limit = str->text + str->len - 1;
586 q = buf = alloca ((str->len + 1) * (wide ? WCHAR_BYTES : 1));
588 while (p < limit)
590 c = *p++;
592 if (c == '\\' && !ignore_escape_flag)
593 c = cpp_parse_escape (parse_in, &p, limit, wide);
595 /* Add this single character into the buffer either as a wchar_t,
596 a multibyte sequence, or as a single byte. */
597 if (wide)
599 unsigned charwidth = TYPE_PRECISION (char_type_node);
600 unsigned bytemask = (1 << charwidth) - 1;
601 int byte;
603 for (byte = 0; byte < WCHAR_BYTES; ++byte)
605 int n;
606 if (byte >= (int) sizeof (c))
607 n = 0;
608 else
609 n = (c >> (byte * charwidth)) & bytemask;
610 if (BYTES_BIG_ENDIAN)
611 q[WCHAR_BYTES - byte - 1] = n;
612 else
613 q[byte] = n;
615 q += WCHAR_BYTES;
617 else
619 *q++ = c;
623 /* Terminate the string value, either with a single byte zero
624 or with a wide zero. */
626 if (wide)
628 memset (q, 0, WCHAR_BYTES);
629 q += WCHAR_BYTES;
631 else
633 *q++ = '\0';
636 value = build_string (q - buf, buf);
638 if (wide)
639 TREE_TYPE (value) = wchar_array_type_node;
640 else
641 TREE_TYPE (value) = char_array_type_node;
642 return value;
645 /* Converts a (possibly wide) character constant token into a tree. */
646 static tree
647 lex_charconst (const cpp_token *token)
649 cppchar_t result;
650 tree type, value;
651 unsigned int chars_seen;
652 int unsignedp;
654 result = cpp_interpret_charconst (parse_in, token,
655 &chars_seen, &unsignedp);
657 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
658 before possibly widening to HOST_WIDE_INT for build_int_2. */
659 if (unsignedp || (cppchar_signed_t) result >= 0)
660 value = build_int_2 (result, 0);
661 else
662 value = build_int_2 ((cppchar_signed_t) result, -1);
664 if (token->type == CPP_WCHAR)
665 type = wchar_type_node;
666 /* In C, a character constant has type 'int'.
667 In C++ 'char', but multi-char charconsts have type 'int'. */
668 else if (!c_dialect_cxx () || chars_seen > 1)
669 type = integer_type_node;
670 else
671 type = char_type_node;
673 TREE_TYPE (value) = type;
674 return value;