2003-06-16 Richard Henderson <rth@redhat.com>
[official-gcc.git] / gcc / c-lex.c
blobea0f80c7b83263987807a4b4d9e7852d3db771b9
1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
27 #include "real.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "expr.h"
31 #include "input.h"
32 #include "output.h"
33 #include "c-tree.h"
34 #include "c-common.h"
35 #include "flags.h"
36 #include "timevar.h"
37 #include "cpplib.h"
38 #include "c-pragma.h"
39 #include "toplev.h"
40 #include "intl.h"
41 #include "tm_p.h"
42 #include "splay-tree.h"
43 #include "debug.h"
45 /* The current line map. */
46 static const struct line_map *map;
48 /* The line used to refresh the lineno global variable after each token. */
49 static unsigned int src_lineno;
51 /* We may keep statistics about how long which files took to compile. */
52 static int header_time, body_time;
53 static splay_tree file_info_tree;
55 /* File used for outputting assembler code. */
56 extern FILE *asm_out_file;
58 #undef WCHAR_TYPE_SIZE
59 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
61 /* Number of bytes in a wide character. */
62 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
64 int pending_lang_change; /* If we need to switch languages - C++ only */
65 int c_header_level; /* depth in C headers - C++ only */
67 /* Nonzero tells yylex to ignore \ in string constants. */
68 static int ignore_escape_flag;
70 static tree interpret_integer PARAMS ((const cpp_token *, unsigned int));
71 static tree interpret_float PARAMS ((const cpp_token *, unsigned int));
72 static enum integer_type_kind
73 narrowest_unsigned_type PARAMS ((tree, unsigned int));
74 static enum integer_type_kind
75 narrowest_signed_type PARAMS ((tree, unsigned int));
76 static tree lex_string PARAMS ((const cpp_string *));
77 static tree lex_charconst PARAMS ((const cpp_token *));
78 static void update_header_times PARAMS ((const char *));
79 static int dump_one_header PARAMS ((splay_tree_node, void *));
80 static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int));
81 static void cb_ident PARAMS ((cpp_reader *, unsigned int,
82 const cpp_string *));
83 static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int));
84 static void cb_define PARAMS ((cpp_reader *, unsigned int,
85 cpp_hashnode *));
86 static void cb_undef PARAMS ((cpp_reader *, unsigned int,
87 cpp_hashnode *));
89 void
90 init_c_lex ()
92 struct cpp_callbacks *cb;
93 struct c_fileinfo *toplevel;
95 /* Set up filename timing. Must happen before cpp_read_main_file. */
96 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
98 (splay_tree_delete_value_fn)free);
99 toplevel = get_fileinfo ("<top level>");
100 if (flag_detailed_statistics)
102 header_time = 0;
103 body_time = get_run_time ();
104 toplevel->time = body_time;
107 cb = cpp_get_callbacks (parse_in);
109 cb->line_change = cb_line_change;
110 cb->ident = cb_ident;
111 cb->def_pragma = cb_def_pragma;
112 cb->valid_pch = c_common_valid_pch;
113 cb->read_pch = c_common_read_pch;
115 /* Set the debug callbacks if we can use them. */
116 if (debug_info_level == DINFO_LEVEL_VERBOSE
117 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
118 || write_symbols == VMS_AND_DWARF2_DEBUG))
120 cb->define = cb_define;
121 cb->undef = cb_undef;
125 struct c_fileinfo *
126 get_fileinfo (name)
127 const char *name;
129 splay_tree_node n;
130 struct c_fileinfo *fi;
132 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
133 if (n)
134 return (struct c_fileinfo *) n->value;
136 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
137 fi->time = 0;
138 fi->interface_only = 0;
139 fi->interface_unknown = 1;
140 splay_tree_insert (file_info_tree, (splay_tree_key) name,
141 (splay_tree_value) fi);
142 return fi;
145 static void
146 update_header_times (name)
147 const char *name;
149 /* Changing files again. This means currently collected time
150 is charged against header time, and body time starts back at 0. */
151 if (flag_detailed_statistics)
153 int this_time = get_run_time ();
154 struct c_fileinfo *file = get_fileinfo (name);
155 header_time += this_time - body_time;
156 file->time += this_time - body_time;
157 body_time = this_time;
161 static int
162 dump_one_header (n, dummy)
163 splay_tree_node n;
164 void *dummy ATTRIBUTE_UNUSED;
166 print_time ((const char *) n->key,
167 ((struct c_fileinfo *) n->value)->time);
168 return 0;
171 void
172 dump_time_statistics ()
174 struct c_fileinfo *file = get_fileinfo (input_filename);
175 int this_time = get_run_time ();
176 file->time += this_time - body_time;
178 fprintf (stderr, "\n******\n");
179 print_time ("header files (total)", header_time);
180 print_time ("main file (total)", this_time - body_time);
181 fprintf (stderr, "ratio = %g : 1\n",
182 (double)header_time / (double)(this_time - body_time));
183 fprintf (stderr, "\n******\n");
185 splay_tree_foreach (file_info_tree, dump_one_header, 0);
188 static void
189 cb_ident (pfile, line, str)
190 cpp_reader *pfile ATTRIBUTE_UNUSED;
191 unsigned int line ATTRIBUTE_UNUSED;
192 const cpp_string *str ATTRIBUTE_UNUSED;
194 #ifdef ASM_OUTPUT_IDENT
195 if (! flag_no_ident)
197 /* Convert escapes in the string. */
198 tree value ATTRIBUTE_UNUSED = lex_string (str);
199 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
201 #endif
204 /* Called at the start of every non-empty line. TOKEN is the first
205 lexed token on the line. Used for diagnostic line numbers. */
206 static void
207 cb_line_change (pfile, token, parsing_args)
208 cpp_reader *pfile ATTRIBUTE_UNUSED;
209 const cpp_token *token;
210 int parsing_args ATTRIBUTE_UNUSED;
212 src_lineno = SOURCE_LINE (map, token->line);
215 void
216 fe_file_change (new_map)
217 const struct line_map *new_map;
219 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
221 if (new_map->reason == LC_ENTER)
223 /* Don't stack the main buffer on the input stack;
224 we already did in compile_file. */
225 if (map == NULL)
226 main_input_filename = new_map->to_file;
227 else
229 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
231 input_line = included_at;
232 push_srcloc (new_map->to_file, 1);
233 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
234 #ifndef NO_IMPLICIT_EXTERN_C
235 if (c_header_level)
236 ++c_header_level;
237 else if (new_map->sysp == 2)
239 c_header_level = 1;
240 ++pending_lang_change;
242 #endif
245 else if (new_map->reason == LC_LEAVE)
247 #ifndef NO_IMPLICIT_EXTERN_C
248 if (c_header_level && --c_header_level == 0)
250 if (new_map->sysp == 2)
251 warning ("badly nested C headers from preprocessor");
252 --pending_lang_change;
254 #endif
255 pop_srcloc ();
257 (*debug_hooks->end_source_file) (to_line);
260 update_header_times (new_map->to_file);
261 in_system_header = new_map->sysp != 0;
262 input_filename = new_map->to_file;
263 input_line = to_line;
264 map = new_map;
266 /* Hook for C++. */
267 extract_interface_info ();
270 static void
271 cb_def_pragma (pfile, line)
272 cpp_reader *pfile;
273 unsigned int line;
275 /* Issue a warning message if we have been asked to do so. Ignore
276 unknown pragmas in system headers unless an explicit
277 -Wunknown-pragmas has been given. */
278 if (warn_unknown_pragmas > in_system_header)
280 const unsigned char *space, *name;
281 const cpp_token *s;
283 space = name = (const unsigned char *) "";
284 s = cpp_get_token (pfile);
285 if (s->type != CPP_EOF)
287 space = cpp_token_as_text (pfile, s);
288 s = cpp_get_token (pfile);
289 if (s->type == CPP_NAME)
290 name = cpp_token_as_text (pfile, s);
293 input_line = SOURCE_LINE (map, line);
294 warning ("ignoring #pragma %s %s", space, name);
298 /* #define callback for DWARF and DWARF2 debug info. */
299 static void
300 cb_define (pfile, line, node)
301 cpp_reader *pfile;
302 unsigned int line;
303 cpp_hashnode *node;
305 (*debug_hooks->define) (SOURCE_LINE (map, line),
306 (const char *) cpp_macro_definition (pfile, node));
309 /* #undef callback for DWARF and DWARF2 debug info. */
310 static void
311 cb_undef (pfile, line, node)
312 cpp_reader *pfile ATTRIBUTE_UNUSED;
313 unsigned int line;
314 cpp_hashnode *node;
316 (*debug_hooks->undef) (SOURCE_LINE (map, line),
317 (const char *) NODE_NAME (node));
321 c_lex (value)
322 tree *value;
324 const cpp_token *tok;
326 retry:
327 timevar_push (TV_CPP);
329 tok = cpp_get_token (parse_in);
330 while (tok->type == CPP_PADDING);
331 timevar_pop (TV_CPP);
333 /* The C++ front end does horrible things with the current line
334 number. To ensure an accurate line number, we must reset it
335 every time we return a token. */
336 input_line = src_lineno;
338 *value = NULL_TREE;
339 switch (tok->type)
341 case CPP_NAME:
342 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
343 break;
345 case CPP_NUMBER:
347 unsigned int flags = cpp_classify_number (parse_in, tok);
349 switch (flags & CPP_N_CATEGORY)
351 case CPP_N_INVALID:
352 /* cpplib has issued an error. */
353 *value = error_mark_node;
354 break;
356 case CPP_N_INTEGER:
357 *value = interpret_integer (tok, flags);
358 break;
360 case CPP_N_FLOATING:
361 *value = interpret_float (tok, flags);
362 break;
364 default:
365 abort ();
368 break;
370 case CPP_OTHER:
372 cppchar_t c = tok->val.str.text[0];
374 if (c == '"' || c == '\'')
375 error ("missing terminating %c character", (int) c);
376 else if (ISGRAPH (c))
377 error ("stray '%c' in program", (int) c);
378 else
379 error ("stray '\\%o' in program", (int) c);
381 goto retry;
383 case CPP_CHAR:
384 case CPP_WCHAR:
385 *value = lex_charconst (tok);
386 break;
388 case CPP_STRING:
389 case CPP_WSTRING:
390 *value = lex_string (&tok->val.str);
391 break;
393 /* These tokens should not be visible outside cpplib. */
394 case CPP_HEADER_NAME:
395 case CPP_COMMENT:
396 case CPP_MACRO_ARG:
397 abort ();
399 default: break;
402 return tok->type;
405 /* Returns the narrowest C-visible unsigned type, starting with the
406 minimum specified by FLAGS, that can fit VALUE, or itk_none if
407 there isn't one. */
408 static enum integer_type_kind
409 narrowest_unsigned_type (value, flags)
410 tree value;
411 unsigned int flags;
413 enum integer_type_kind itk;
415 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
416 itk = itk_unsigned_int;
417 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
418 itk = itk_unsigned_long;
419 else
420 itk = itk_unsigned_long_long;
422 /* int_fits_type_p must think the type of its first argument is
423 wider than its second argument, or it won't do the proper check. */
424 TREE_TYPE (value) = widest_unsigned_literal_type_node;
426 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
427 if (int_fits_type_p (value, integer_types[itk]))
428 return itk;
430 return itk_none;
433 /* Ditto, but narrowest signed type. */
434 static enum integer_type_kind
435 narrowest_signed_type (value, flags)
436 tree value;
437 unsigned int flags;
439 enum integer_type_kind itk;
441 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
442 itk = itk_int;
443 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
444 itk = itk_long;
445 else
446 itk = itk_long_long;
448 /* int_fits_type_p must think the type of its first argument is
449 wider than its second argument, or it won't do the proper check. */
450 TREE_TYPE (value) = widest_unsigned_literal_type_node;
452 for (; itk < itk_none; itk += 2 /* skip signed types */)
453 if (int_fits_type_p (value, integer_types[itk]))
454 return itk;
456 return itk_none;
459 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
460 static tree
461 interpret_integer (token, flags)
462 const cpp_token *token;
463 unsigned int flags;
465 tree value, type;
466 enum integer_type_kind itk;
467 cpp_num integer;
468 cpp_options *options = cpp_get_options (parse_in);
470 integer = cpp_interpret_integer (parse_in, token, flags);
471 integer = cpp_num_sign_extend (integer, options->precision);
472 value = build_int_2_wide (integer.low, integer.high);
474 /* The type of a constant with a U suffix is straightforward. */
475 if (flags & CPP_N_UNSIGNED)
476 itk = narrowest_unsigned_type (value, flags);
477 else
479 /* The type of a potentially-signed integer constant varies
480 depending on the base it's in, the standard in use, and the
481 length suffixes. */
482 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
483 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
485 /* In both C89 and C99, octal and hex constants may be signed or
486 unsigned, whichever fits tighter. We do not warn about this
487 choice differing from the traditional choice, as the constant
488 is probably a bit pattern and either way will work. */
489 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
490 itk = MIN (itk_u, itk_s);
491 else
493 /* In C99, decimal constants are always signed.
494 In C89, decimal constants that don't fit in long have
495 undefined behavior; we try to make them unsigned long.
496 In GCC's extended C89, that last is true of decimal
497 constants that don't fit in long long, too. */
499 itk = itk_s;
500 if (itk_s > itk_u && itk_s > itk_long)
502 if (!flag_isoc99)
504 if (itk_u < itk_unsigned_long)
505 itk_u = itk_unsigned_long;
506 itk = itk_u;
507 warning ("this decimal constant is unsigned only in ISO C90");
509 else if (warn_traditional)
510 warning ("this decimal constant would be unsigned in ISO C90");
515 if (itk == itk_none)
516 /* cpplib has already issued a warning for overflow. */
517 type = ((flags & CPP_N_UNSIGNED)
518 ? widest_unsigned_literal_type_node
519 : widest_integer_literal_type_node);
520 else
521 type = integer_types[itk];
523 if (itk > itk_unsigned_long
524 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
525 && ! in_system_header && ! flag_isoc99)
526 pedwarn ("integer constant is too large for \"%s\" type",
527 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
529 TREE_TYPE (value) = type;
531 /* Convert imaginary to a complex type. */
532 if (flags & CPP_N_IMAGINARY)
533 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
535 return value;
538 /* Interpret TOKEN, a floating point number with FLAGS as classified
539 by cpplib. */
540 static tree
541 interpret_float (token, flags)
542 const cpp_token *token;
543 unsigned int flags;
545 tree type;
546 tree value;
547 REAL_VALUE_TYPE real;
548 char *copy;
549 size_t copylen;
550 const char *typename;
552 /* FIXME: make %T work in error/warning, then we don't need typename. */
553 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
555 type = long_double_type_node;
556 typename = "long double";
558 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
559 || flag_single_precision_constant)
561 type = float_type_node;
562 typename = "float";
564 else
566 type = double_type_node;
567 typename = "double";
570 /* Copy the constant to a nul-terminated buffer. If the constant
571 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
572 can't handle them. */
573 copylen = token->val.str.len;
574 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
575 /* Must be an F or L suffix. */
576 copylen--;
577 if (flags & CPP_N_IMAGINARY)
578 /* I or J suffix. */
579 copylen--;
581 copy = alloca (copylen + 1);
582 memcpy (copy, token->val.str.text, copylen);
583 copy[copylen] = '\0';
585 real_from_string (&real, copy);
586 real_convert (&real, TYPE_MODE (type), &real);
588 /* A diagnostic is required for "soft" overflow by some ISO C
589 testsuites. This is not pedwarn, because some people don't want
590 an error for this.
591 ??? That's a dubious reason... is this a mandatory diagnostic or
592 isn't it? -- zw, 2001-08-21. */
593 if (REAL_VALUE_ISINF (real) && pedantic)
594 warning ("floating constant exceeds range of \"%s\"", typename);
596 /* Create a node with determined type and value. */
597 value = build_real (type, real);
598 if (flags & CPP_N_IMAGINARY)
599 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
601 return value;
604 static tree
605 lex_string (str)
606 const cpp_string *str;
608 bool wide;
609 tree value;
610 char *buf, *q;
611 cppchar_t c;
612 const unsigned char *p, *limit;
614 wide = str->text[0] == 'L';
615 p = str->text + 1 + wide;
616 limit = str->text + str->len - 1;
617 q = buf = alloca ((str->len + 1) * (wide ? WCHAR_BYTES : 1));
619 while (p < limit)
621 c = *p++;
623 if (c == '\\' && !ignore_escape_flag)
624 c = cpp_parse_escape (parse_in, &p, limit, wide);
626 /* Add this single character into the buffer either as a wchar_t,
627 a multibyte sequence, or as a single byte. */
628 if (wide)
630 unsigned charwidth = TYPE_PRECISION (char_type_node);
631 unsigned bytemask = (1 << charwidth) - 1;
632 int byte;
634 for (byte = 0; byte < WCHAR_BYTES; ++byte)
636 int n;
637 if (byte >= (int) sizeof (c))
638 n = 0;
639 else
640 n = (c >> (byte * charwidth)) & bytemask;
641 if (BYTES_BIG_ENDIAN)
642 q[WCHAR_BYTES - byte - 1] = n;
643 else
644 q[byte] = n;
646 q += WCHAR_BYTES;
648 else
650 *q++ = c;
654 /* Terminate the string value, either with a single byte zero
655 or with a wide zero. */
657 if (wide)
659 memset (q, 0, WCHAR_BYTES);
660 q += WCHAR_BYTES;
662 else
664 *q++ = '\0';
667 value = build_string (q - buf, buf);
669 if (wide)
670 TREE_TYPE (value) = wchar_array_type_node;
671 else
672 TREE_TYPE (value) = char_array_type_node;
673 return value;
676 /* Converts a (possibly wide) character constant token into a tree. */
677 static tree
678 lex_charconst (token)
679 const cpp_token *token;
681 cppchar_t result;
682 tree type, value;
683 unsigned int chars_seen;
684 int unsignedp;
686 result = cpp_interpret_charconst (parse_in, token,
687 &chars_seen, &unsignedp);
689 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
690 before possibly widening to HOST_WIDE_INT for build_int_2. */
691 if (unsignedp || (cppchar_signed_t) result >= 0)
692 value = build_int_2 (result, 0);
693 else
694 value = build_int_2 ((cppchar_signed_t) result, -1);
696 if (token->type == CPP_WCHAR)
697 type = wchar_type_node;
698 /* In C, a character constant has type 'int'.
699 In C++ 'char', but multi-char charconsts have type 'int'. */
700 else if ((c_language == clk_c) || chars_seen > 1)
701 type = integer_type_node;
702 else
703 type = char_type_node;
705 TREE_TYPE (value) = type;
706 return value;