* config/i386/netbsd-elf.h (LINK_SPEC): Define as
[official-gcc.git] / gcc / c-lex.c
blob4cb70c8f88636b947fec14e24c2a40dccc64f1b5
1 /* Mainly the interface between cpplib and the C front ends.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
25 #include "real.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "expr.h"
29 #include "input.h"
30 #include "output.h"
31 #include "c-tree.h"
32 #include "c-common.h"
33 #include "flags.h"
34 #include "timevar.h"
35 #include "cpplib.h"
36 #include "c-pragma.h"
37 #include "toplev.h"
38 #include "intl.h"
39 #include "tm_p.h"
40 #include "splay-tree.h"
41 #include "debug.h"
43 #ifdef MULTIBYTE_CHARS
44 #include "mbchar.h"
45 #include <locale.h>
46 #endif /* MULTIBYTE_CHARS */
47 #ifndef GET_ENVIRONMENT
48 #define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
49 #endif
51 /* The current line map. */
52 static const struct line_map *map;
54 /* The line used to refresh the lineno global variable after each token. */
55 static unsigned int src_lineno;
57 /* We may keep statistics about how long which files took to compile. */
58 static int header_time, body_time;
59 static splay_tree file_info_tree;
61 /* File used for outputting assembler code. */
62 extern FILE *asm_out_file;
64 #undef WCHAR_TYPE_SIZE
65 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
67 /* Number of bytes in a wide character. */
68 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
70 int pending_lang_change; /* If we need to switch languages - C++ only */
71 int c_header_level; /* depth in C headers - C++ only */
73 /* Nonzero tells yylex to ignore \ in string constants. */
74 static int ignore_escape_flag;
76 static tree interpret_integer PARAMS ((const cpp_token *, unsigned int));
77 static tree interpret_float PARAMS ((const cpp_token *, unsigned int));
78 static enum integer_type_kind
79 narrowest_unsigned_type PARAMS ((tree, unsigned int));
80 static enum integer_type_kind
81 narrowest_signed_type PARAMS ((tree, unsigned int));
82 static tree lex_string PARAMS ((const unsigned char *, unsigned int,
83 int));
84 static tree lex_charconst PARAMS ((const cpp_token *));
85 static void update_header_times PARAMS ((const char *));
86 static int dump_one_header PARAMS ((splay_tree_node, void *));
87 static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int));
88 static void cb_ident PARAMS ((cpp_reader *, unsigned int,
89 const cpp_string *));
90 static void cb_file_change PARAMS ((cpp_reader *, const struct line_map *));
91 static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int));
92 static void cb_define PARAMS ((cpp_reader *, unsigned int,
93 cpp_hashnode *));
94 static void cb_undef PARAMS ((cpp_reader *, unsigned int,
95 cpp_hashnode *));
97 const char *
98 init_c_lex (filename)
99 const char *filename;
101 struct cpp_callbacks *cb;
102 struct c_fileinfo *toplevel;
104 /* Set up filename timing. Must happen before cpp_read_main_file. */
105 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
107 (splay_tree_delete_value_fn)free);
108 toplevel = get_fileinfo ("<top level>");
109 if (flag_detailed_statistics)
111 header_time = 0;
112 body_time = get_run_time ();
113 toplevel->time = body_time;
116 #ifdef MULTIBYTE_CHARS
117 /* Change to the native locale for multibyte conversions. */
118 setlocale (LC_CTYPE, "");
119 GET_ENVIRONMENT (literal_codeset, "LANG");
120 #endif
122 cb = cpp_get_callbacks (parse_in);
124 cb->line_change = cb_line_change;
125 cb->ident = cb_ident;
126 cb->file_change = cb_file_change;
127 cb->def_pragma = cb_def_pragma;
129 /* Set the debug callbacks if we can use them. */
130 if (debug_info_level == DINFO_LEVEL_VERBOSE
131 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
132 || write_symbols == VMS_AND_DWARF2_DEBUG))
134 cb->define = cb_define;
135 cb->undef = cb_undef;
138 /* Start it at 0. */
139 lineno = 0;
141 if (filename == NULL || !strcmp (filename, "-"))
142 filename = "";
144 return cpp_read_main_file (parse_in, filename, ident_hash);
147 /* A thin wrapper around the real parser that initializes the
148 integrated preprocessor after debug output has been initialized.
149 Also, make sure the start_source_file debug hook gets called for
150 the primary source file. */
152 void
153 c_common_parse_file (set_yydebug)
154 int set_yydebug ATTRIBUTE_UNUSED;
156 #if YYDEBUG != 0
157 yydebug = set_yydebug;
158 #else
159 warning ("YYDEBUG not defined");
160 #endif
162 (*debug_hooks->start_source_file) (lineno, input_filename);
163 cpp_finish_options (parse_in);
165 yyparse ();
166 free_parser_stacks ();
169 struct c_fileinfo *
170 get_fileinfo (name)
171 const char *name;
173 splay_tree_node n;
174 struct c_fileinfo *fi;
176 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
177 if (n)
178 return (struct c_fileinfo *) n->value;
180 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
181 fi->time = 0;
182 fi->interface_only = 0;
183 fi->interface_unknown = 1;
184 splay_tree_insert (file_info_tree, (splay_tree_key) name,
185 (splay_tree_value) fi);
186 return fi;
189 static void
190 update_header_times (name)
191 const char *name;
193 /* Changing files again. This means currently collected time
194 is charged against header time, and body time starts back at 0. */
195 if (flag_detailed_statistics)
197 int this_time = get_run_time ();
198 struct c_fileinfo *file = get_fileinfo (name);
199 header_time += this_time - body_time;
200 file->time += this_time - body_time;
201 body_time = this_time;
205 static int
206 dump_one_header (n, dummy)
207 splay_tree_node n;
208 void *dummy ATTRIBUTE_UNUSED;
210 print_time ((const char *) n->key,
211 ((struct c_fileinfo *) n->value)->time);
212 return 0;
215 void
216 dump_time_statistics ()
218 struct c_fileinfo *file = get_fileinfo (input_filename);
219 int this_time = get_run_time ();
220 file->time += this_time - body_time;
222 fprintf (stderr, "\n******\n");
223 print_time ("header files (total)", header_time);
224 print_time ("main file (total)", this_time - body_time);
225 fprintf (stderr, "ratio = %g : 1\n",
226 (double)header_time / (double)(this_time - body_time));
227 fprintf (stderr, "\n******\n");
229 splay_tree_foreach (file_info_tree, dump_one_header, 0);
232 static void
233 cb_ident (pfile, line, str)
234 cpp_reader *pfile ATTRIBUTE_UNUSED;
235 unsigned int line ATTRIBUTE_UNUSED;
236 const cpp_string *str ATTRIBUTE_UNUSED;
238 #ifdef ASM_OUTPUT_IDENT
239 if (! flag_no_ident)
241 /* Convert escapes in the string. */
242 tree value = lex_string (str->text, str->len, 0);
243 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
245 #endif
248 /* Called at the start of every non-empty line. TOKEN is the first
249 lexed token on the line. Used for diagnostic line numbers. */
250 static void
251 cb_line_change (pfile, token, parsing_args)
252 cpp_reader *pfile ATTRIBUTE_UNUSED;
253 const cpp_token *token;
254 int parsing_args ATTRIBUTE_UNUSED;
256 src_lineno = SOURCE_LINE (map, token->line);
259 static void
260 cb_file_change (pfile, new_map)
261 cpp_reader *pfile ATTRIBUTE_UNUSED;
262 const struct line_map *new_map;
264 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
266 if (new_map->reason == LC_ENTER)
268 /* Don't stack the main buffer on the input stack;
269 we already did in compile_file. */
270 if (map == NULL)
271 main_input_filename = new_map->to_file;
272 else
274 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
276 lineno = included_at;
277 push_srcloc (new_map->to_file, 1);
278 (*debug_hooks->start_source_file) (included_at, new_map->to_file);
279 #ifndef NO_IMPLICIT_EXTERN_C
280 if (c_header_level)
281 ++c_header_level;
282 else if (new_map->sysp == 2)
284 c_header_level = 1;
285 ++pending_lang_change;
287 #endif
290 else if (new_map->reason == LC_LEAVE)
292 #ifndef NO_IMPLICIT_EXTERN_C
293 if (c_header_level && --c_header_level == 0)
295 if (new_map->sysp == 2)
296 warning ("badly nested C headers from preprocessor");
297 --pending_lang_change;
299 #endif
300 pop_srcloc ();
302 (*debug_hooks->end_source_file) (to_line);
305 update_header_times (new_map->to_file);
306 in_system_header = new_map->sysp != 0;
307 input_filename = new_map->to_file;
308 lineno = to_line;
309 map = new_map;
311 /* Hook for C++. */
312 extract_interface_info ();
315 static void
316 cb_def_pragma (pfile, line)
317 cpp_reader *pfile;
318 unsigned int line;
320 /* Issue a warning message if we have been asked to do so. Ignore
321 unknown pragmas in system headers unless an explicit
322 -Wunknown-pragmas has been given. */
323 if (warn_unknown_pragmas > in_system_header)
325 const unsigned char *space, *name = 0;
326 const cpp_token *s;
328 s = cpp_get_token (pfile);
329 space = cpp_token_as_text (pfile, s);
330 s = cpp_get_token (pfile);
331 if (s->type == CPP_NAME)
332 name = cpp_token_as_text (pfile, s);
334 lineno = SOURCE_LINE (map, line);
335 if (name)
336 warning ("ignoring #pragma %s %s", space, name);
337 else
338 warning ("ignoring #pragma %s", space);
342 /* #define callback for DWARF and DWARF2 debug info. */
343 static void
344 cb_define (pfile, line, node)
345 cpp_reader *pfile;
346 unsigned int line;
347 cpp_hashnode *node;
349 (*debug_hooks->define) (SOURCE_LINE (map, line),
350 (const char *) cpp_macro_definition (pfile, node));
353 /* #undef callback for DWARF and DWARF2 debug info. */
354 static void
355 cb_undef (pfile, line, node)
356 cpp_reader *pfile ATTRIBUTE_UNUSED;
357 unsigned int line;
358 cpp_hashnode *node;
360 (*debug_hooks->undef) (SOURCE_LINE (map, line),
361 (const char *) NODE_NAME (node));
364 #if 0 /* not yet */
365 /* Returns nonzero if C is a universal-character-name. Give an error if it
366 is not one which may appear in an identifier, as per [extendid].
368 Note that extended character support in identifiers has not yet been
369 implemented. It is my personal opinion that this is not a desirable
370 feature. Portable code cannot count on support for more than the basic
371 identifier character set. */
373 static inline int
374 is_extended_char (c)
375 int c;
377 #ifdef TARGET_EBCDIC
378 return 0;
379 #else
380 /* ASCII. */
381 if (c < 0x7f)
382 return 0;
384 /* None of the valid chars are outside the Basic Multilingual Plane (the
385 low 16 bits). */
386 if (c > 0xffff)
388 error ("universal-character-name '\\U%08x' not valid in identifier", c);
389 return 1;
392 /* Latin */
393 if ((c >= 0x00c0 && c <= 0x00d6)
394 || (c >= 0x00d8 && c <= 0x00f6)
395 || (c >= 0x00f8 && c <= 0x01f5)
396 || (c >= 0x01fa && c <= 0x0217)
397 || (c >= 0x0250 && c <= 0x02a8)
398 || (c >= 0x1e00 && c <= 0x1e9a)
399 || (c >= 0x1ea0 && c <= 0x1ef9))
400 return 1;
402 /* Greek */
403 if ((c == 0x0384)
404 || (c >= 0x0388 && c <= 0x038a)
405 || (c == 0x038c)
406 || (c >= 0x038e && c <= 0x03a1)
407 || (c >= 0x03a3 && c <= 0x03ce)
408 || (c >= 0x03d0 && c <= 0x03d6)
409 || (c == 0x03da)
410 || (c == 0x03dc)
411 || (c == 0x03de)
412 || (c == 0x03e0)
413 || (c >= 0x03e2 && c <= 0x03f3)
414 || (c >= 0x1f00 && c <= 0x1f15)
415 || (c >= 0x1f18 && c <= 0x1f1d)
416 || (c >= 0x1f20 && c <= 0x1f45)
417 || (c >= 0x1f48 && c <= 0x1f4d)
418 || (c >= 0x1f50 && c <= 0x1f57)
419 || (c == 0x1f59)
420 || (c == 0x1f5b)
421 || (c == 0x1f5d)
422 || (c >= 0x1f5f && c <= 0x1f7d)
423 || (c >= 0x1f80 && c <= 0x1fb4)
424 || (c >= 0x1fb6 && c <= 0x1fbc)
425 || (c >= 0x1fc2 && c <= 0x1fc4)
426 || (c >= 0x1fc6 && c <= 0x1fcc)
427 || (c >= 0x1fd0 && c <= 0x1fd3)
428 || (c >= 0x1fd6 && c <= 0x1fdb)
429 || (c >= 0x1fe0 && c <= 0x1fec)
430 || (c >= 0x1ff2 && c <= 0x1ff4)
431 || (c >= 0x1ff6 && c <= 0x1ffc))
432 return 1;
434 /* Cyrillic */
435 if ((c >= 0x0401 && c <= 0x040d)
436 || (c >= 0x040f && c <= 0x044f)
437 || (c >= 0x0451 && c <= 0x045c)
438 || (c >= 0x045e && c <= 0x0481)
439 || (c >= 0x0490 && c <= 0x04c4)
440 || (c >= 0x04c7 && c <= 0x04c8)
441 || (c >= 0x04cb && c <= 0x04cc)
442 || (c >= 0x04d0 && c <= 0x04eb)
443 || (c >= 0x04ee && c <= 0x04f5)
444 || (c >= 0x04f8 && c <= 0x04f9))
445 return 1;
447 /* Armenian */
448 if ((c >= 0x0531 && c <= 0x0556)
449 || (c >= 0x0561 && c <= 0x0587))
450 return 1;
452 /* Hebrew */
453 if ((c >= 0x05d0 && c <= 0x05ea)
454 || (c >= 0x05f0 && c <= 0x05f4))
455 return 1;
457 /* Arabic */
458 if ((c >= 0x0621 && c <= 0x063a)
459 || (c >= 0x0640 && c <= 0x0652)
460 || (c >= 0x0670 && c <= 0x06b7)
461 || (c >= 0x06ba && c <= 0x06be)
462 || (c >= 0x06c0 && c <= 0x06ce)
463 || (c >= 0x06e5 && c <= 0x06e7))
464 return 1;
466 /* Devanagari */
467 if ((c >= 0x0905 && c <= 0x0939)
468 || (c >= 0x0958 && c <= 0x0962))
469 return 1;
471 /* Bengali */
472 if ((c >= 0x0985 && c <= 0x098c)
473 || (c >= 0x098f && c <= 0x0990)
474 || (c >= 0x0993 && c <= 0x09a8)
475 || (c >= 0x09aa && c <= 0x09b0)
476 || (c == 0x09b2)
477 || (c >= 0x09b6 && c <= 0x09b9)
478 || (c >= 0x09dc && c <= 0x09dd)
479 || (c >= 0x09df && c <= 0x09e1)
480 || (c >= 0x09f0 && c <= 0x09f1))
481 return 1;
483 /* Gurmukhi */
484 if ((c >= 0x0a05 && c <= 0x0a0a)
485 || (c >= 0x0a0f && c <= 0x0a10)
486 || (c >= 0x0a13 && c <= 0x0a28)
487 || (c >= 0x0a2a && c <= 0x0a30)
488 || (c >= 0x0a32 && c <= 0x0a33)
489 || (c >= 0x0a35 && c <= 0x0a36)
490 || (c >= 0x0a38 && c <= 0x0a39)
491 || (c >= 0x0a59 && c <= 0x0a5c)
492 || (c == 0x0a5e))
493 return 1;
495 /* Gujarati */
496 if ((c >= 0x0a85 && c <= 0x0a8b)
497 || (c == 0x0a8d)
498 || (c >= 0x0a8f && c <= 0x0a91)
499 || (c >= 0x0a93 && c <= 0x0aa8)
500 || (c >= 0x0aaa && c <= 0x0ab0)
501 || (c >= 0x0ab2 && c <= 0x0ab3)
502 || (c >= 0x0ab5 && c <= 0x0ab9)
503 || (c == 0x0ae0))
504 return 1;
506 /* Oriya */
507 if ((c >= 0x0b05 && c <= 0x0b0c)
508 || (c >= 0x0b0f && c <= 0x0b10)
509 || (c >= 0x0b13 && c <= 0x0b28)
510 || (c >= 0x0b2a && c <= 0x0b30)
511 || (c >= 0x0b32 && c <= 0x0b33)
512 || (c >= 0x0b36 && c <= 0x0b39)
513 || (c >= 0x0b5c && c <= 0x0b5d)
514 || (c >= 0x0b5f && c <= 0x0b61))
515 return 1;
517 /* Tamil */
518 if ((c >= 0x0b85 && c <= 0x0b8a)
519 || (c >= 0x0b8e && c <= 0x0b90)
520 || (c >= 0x0b92 && c <= 0x0b95)
521 || (c >= 0x0b99 && c <= 0x0b9a)
522 || (c == 0x0b9c)
523 || (c >= 0x0b9e && c <= 0x0b9f)
524 || (c >= 0x0ba3 && c <= 0x0ba4)
525 || (c >= 0x0ba8 && c <= 0x0baa)
526 || (c >= 0x0bae && c <= 0x0bb5)
527 || (c >= 0x0bb7 && c <= 0x0bb9))
528 return 1;
530 /* Telugu */
531 if ((c >= 0x0c05 && c <= 0x0c0c)
532 || (c >= 0x0c0e && c <= 0x0c10)
533 || (c >= 0x0c12 && c <= 0x0c28)
534 || (c >= 0x0c2a && c <= 0x0c33)
535 || (c >= 0x0c35 && c <= 0x0c39)
536 || (c >= 0x0c60 && c <= 0x0c61))
537 return 1;
539 /* Kannada */
540 if ((c >= 0x0c85 && c <= 0x0c8c)
541 || (c >= 0x0c8e && c <= 0x0c90)
542 || (c >= 0x0c92 && c <= 0x0ca8)
543 || (c >= 0x0caa && c <= 0x0cb3)
544 || (c >= 0x0cb5 && c <= 0x0cb9)
545 || (c >= 0x0ce0 && c <= 0x0ce1))
546 return 1;
548 /* Malayalam */
549 if ((c >= 0x0d05 && c <= 0x0d0c)
550 || (c >= 0x0d0e && c <= 0x0d10)
551 || (c >= 0x0d12 && c <= 0x0d28)
552 || (c >= 0x0d2a && c <= 0x0d39)
553 || (c >= 0x0d60 && c <= 0x0d61))
554 return 1;
556 /* Thai */
557 if ((c >= 0x0e01 && c <= 0x0e30)
558 || (c >= 0x0e32 && c <= 0x0e33)
559 || (c >= 0x0e40 && c <= 0x0e46)
560 || (c >= 0x0e4f && c <= 0x0e5b))
561 return 1;
563 /* Lao */
564 if ((c >= 0x0e81 && c <= 0x0e82)
565 || (c == 0x0e84)
566 || (c == 0x0e87)
567 || (c == 0x0e88)
568 || (c == 0x0e8a)
569 || (c == 0x0e0d)
570 || (c >= 0x0e94 && c <= 0x0e97)
571 || (c >= 0x0e99 && c <= 0x0e9f)
572 || (c >= 0x0ea1 && c <= 0x0ea3)
573 || (c == 0x0ea5)
574 || (c == 0x0ea7)
575 || (c == 0x0eaa)
576 || (c == 0x0eab)
577 || (c >= 0x0ead && c <= 0x0eb0)
578 || (c == 0x0eb2)
579 || (c == 0x0eb3)
580 || (c == 0x0ebd)
581 || (c >= 0x0ec0 && c <= 0x0ec4)
582 || (c == 0x0ec6))
583 return 1;
585 /* Georgian */
586 if ((c >= 0x10a0 && c <= 0x10c5)
587 || (c >= 0x10d0 && c <= 0x10f6))
588 return 1;
590 /* Hiragana */
591 if ((c >= 0x3041 && c <= 0x3094)
592 || (c >= 0x309b && c <= 0x309e))
593 return 1;
595 /* Katakana */
596 if ((c >= 0x30a1 && c <= 0x30fe))
597 return 1;
599 /* Bopmofo */
600 if ((c >= 0x3105 && c <= 0x312c))
601 return 1;
603 /* Hangul */
604 if ((c >= 0x1100 && c <= 0x1159)
605 || (c >= 0x1161 && c <= 0x11a2)
606 || (c >= 0x11a8 && c <= 0x11f9))
607 return 1;
609 /* CJK Unified Ideographs */
610 if ((c >= 0xf900 && c <= 0xfa2d)
611 || (c >= 0xfb1f && c <= 0xfb36)
612 || (c >= 0xfb38 && c <= 0xfb3c)
613 || (c == 0xfb3e)
614 || (c >= 0xfb40 && c <= 0xfb41)
615 || (c >= 0xfb42 && c <= 0xfb44)
616 || (c >= 0xfb46 && c <= 0xfbb1)
617 || (c >= 0xfbd3 && c <= 0xfd3f)
618 || (c >= 0xfd50 && c <= 0xfd8f)
619 || (c >= 0xfd92 && c <= 0xfdc7)
620 || (c >= 0xfdf0 && c <= 0xfdfb)
621 || (c >= 0xfe70 && c <= 0xfe72)
622 || (c == 0xfe74)
623 || (c >= 0xfe76 && c <= 0xfefc)
624 || (c >= 0xff21 && c <= 0xff3a)
625 || (c >= 0xff41 && c <= 0xff5a)
626 || (c >= 0xff66 && c <= 0xffbe)
627 || (c >= 0xffc2 && c <= 0xffc7)
628 || (c >= 0xffca && c <= 0xffcf)
629 || (c >= 0xffd2 && c <= 0xffd7)
630 || (c >= 0xffda && c <= 0xffdc)
631 || (c >= 0x4e00 && c <= 0x9fa5))
632 return 1;
634 error ("universal-character-name '\\u%04x' not valid in identifier", c);
635 return 1;
636 #endif
639 /* Add the UTF-8 representation of C to the token_buffer. */
641 static void
642 utf8_extend_token (c)
643 int c;
645 int shift, mask;
647 if (c <= 0x0000007f)
649 extend_token (c);
650 return;
652 else if (c <= 0x000007ff)
653 shift = 6, mask = 0xc0;
654 else if (c <= 0x0000ffff)
655 shift = 12, mask = 0xe0;
656 else if (c <= 0x001fffff)
657 shift = 18, mask = 0xf0;
658 else if (c <= 0x03ffffff)
659 shift = 24, mask = 0xf8;
660 else
661 shift = 30, mask = 0xfc;
663 extend_token (mask | (c >> shift));
666 shift -= 6;
667 extend_token ((unsigned char) (0x80 | (c >> shift)));
669 while (shift);
671 #endif
674 c_lex (value)
675 tree *value;
677 const cpp_token *tok;
679 retry:
680 timevar_push (TV_CPP);
682 tok = cpp_get_token (parse_in);
683 while (tok->type == CPP_PADDING);
684 timevar_pop (TV_CPP);
686 /* The C++ front end does horrible things with the current line
687 number. To ensure an accurate line number, we must reset it
688 every time we return a token. */
689 lineno = src_lineno;
691 *value = NULL_TREE;
692 switch (tok->type)
694 /* Issue this error here, where we can get at tok->val.c. */
695 case CPP_OTHER:
696 if (ISGRAPH (tok->val.c))
697 error ("stray '%c' in program", tok->val.c);
698 else
699 error ("stray '\\%o' in program", tok->val.c);
700 goto retry;
702 case CPP_NAME:
703 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
704 break;
706 case CPP_NUMBER:
708 unsigned int flags = cpp_classify_number (parse_in, tok);
710 switch (flags & CPP_N_CATEGORY)
712 case CPP_N_INVALID:
713 /* cpplib has issued an error. */
714 break;
716 case CPP_N_INTEGER:
717 *value = interpret_integer (tok, flags);
718 break;
720 case CPP_N_FLOATING:
721 *value = interpret_float (tok, flags);
722 break;
724 default:
725 abort ();
728 break;
730 case CPP_CHAR:
731 case CPP_WCHAR:
732 *value = lex_charconst (tok);
733 break;
735 case CPP_STRING:
736 case CPP_WSTRING:
737 *value = lex_string (tok->val.str.text, tok->val.str.len,
738 tok->type == CPP_WSTRING);
739 break;
741 /* These tokens should not be visible outside cpplib. */
742 case CPP_HEADER_NAME:
743 case CPP_COMMENT:
744 case CPP_MACRO_ARG:
745 abort ();
747 default: break;
750 return tok->type;
753 /* Returns the narrowest C-visible unsigned type, starting with the
754 minimum specified by FLAGS, that can fit VALUE, or itk_none if
755 there isn't one. */
756 static enum integer_type_kind
757 narrowest_unsigned_type (value, flags)
758 tree value;
759 unsigned int flags;
761 enum integer_type_kind itk;
763 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
764 itk = itk_unsigned_int;
765 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
766 itk = itk_unsigned_long;
767 else
768 itk = itk_unsigned_long_long;
770 /* int_fits_type_p must think the type of its first argument is
771 wider than its second argument, or it won't do the proper check. */
772 TREE_TYPE (value) = widest_unsigned_literal_type_node;
774 for (; itk < itk_none; itk += 2 /* skip unsigned types */)
775 if (int_fits_type_p (value, integer_types[itk]))
776 return itk;
778 return itk_none;
781 /* Ditto, but narrowest signed type. */
782 static enum integer_type_kind
783 narrowest_signed_type (value, flags)
784 tree value;
785 unsigned int flags;
787 enum integer_type_kind itk;
789 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
790 itk = itk_int;
791 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
792 itk = itk_long;
793 else
794 itk = itk_long_long;
796 /* int_fits_type_p must think the type of its first argument is
797 wider than its second argument, or it won't do the proper check. */
798 TREE_TYPE (value) = widest_unsigned_literal_type_node;
800 for (; itk < itk_none; itk += 2 /* skip signed types */)
801 if (int_fits_type_p (value, integer_types[itk]))
802 return itk;
804 return itk_none;
807 /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */
808 static tree
809 interpret_integer (token, flags)
810 const cpp_token *token;
811 unsigned int flags;
813 tree value, type;
814 enum integer_type_kind itk;
815 cpp_num integer;
816 cpp_options *options = cpp_get_options (parse_in);
818 integer = cpp_interpret_integer (parse_in, token, flags);
819 integer = cpp_num_sign_extend (integer, options->precision);
820 value = build_int_2_wide (integer.low, integer.high);
822 /* The type of a constant with a U suffix is straightforward. */
823 if (flags & CPP_N_UNSIGNED)
824 itk = narrowest_unsigned_type (value, flags);
825 else
827 /* The type of a potentially-signed integer constant varies
828 depending on the base it's in, the standard in use, and the
829 length suffixes. */
830 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
831 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
833 /* In both C89 and C99, octal and hex constants may be signed or
834 unsigned, whichever fits tighter. We do not warn about this
835 choice differing from the traditional choice, as the constant
836 is probably a bit pattern and either way will work. */
837 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
838 itk = MIN (itk_u, itk_s);
839 else
841 /* In C99, decimal constants are always signed.
842 In C89, decimal constants that don't fit in long have
843 undefined behaviour; we try to make them unsigned long.
844 In GCC's extended C89, that last is true of decimal
845 constants that don't fit in long long, too. */
847 itk = itk_s;
848 if (itk_s > itk_u && itk_s > itk_long)
850 if (!flag_isoc99)
852 if (itk_u < itk_unsigned_long)
853 itk_u = itk_unsigned_long;
854 itk = itk_u;
855 warning ("this decimal constant is unsigned only in ISO C89");
857 else if (warn_traditional)
858 warning ("this decimal constant would be unsigned in ISO C89");
863 if (itk == itk_none)
864 /* cpplib has already issued a warning for overflow. */
865 type = ((flags & CPP_N_UNSIGNED)
866 ? widest_unsigned_literal_type_node
867 : widest_integer_literal_type_node);
868 else
869 type = integer_types[itk];
871 if (itk > itk_unsigned_long
872 && (flags & CPP_N_WIDTH) != CPP_N_LARGE
873 && ! in_system_header && ! flag_isoc99)
874 pedwarn ("integer constant is too large for \"%s\" type",
875 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
877 TREE_TYPE (value) = type;
879 /* Convert imaginary to a complex type. */
880 if (flags & CPP_N_IMAGINARY)
881 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
883 return value;
886 /* Interpret TOKEN, a floating point number with FLAGS as classified
887 by cpplib. */
888 static tree
889 interpret_float (token, flags)
890 const cpp_token *token;
891 unsigned int flags;
893 tree type;
894 tree value;
895 REAL_VALUE_TYPE real;
896 char *copy;
897 size_t copylen;
898 const char *typename;
900 /* FIXME: make %T work in error/warning, then we don't need typename. */
901 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
903 type = long_double_type_node;
904 typename = "long double";
906 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
907 || flag_single_precision_constant)
909 type = float_type_node;
910 typename = "float";
912 else
914 type = double_type_node;
915 typename = "double";
918 /* Copy the constant to a nul-terminated buffer. If the constant
919 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
920 can't handle them. */
921 copylen = token->val.str.len;
922 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
923 /* Must be an F or L suffix. */
924 copylen--;
925 if (flags & CPP_N_IMAGINARY)
926 /* I or J suffix. */
927 copylen--;
929 copy = alloca (copylen + 1);
930 memcpy (copy, token->val.str.text, copylen);
931 copy[copylen] = '\0';
933 /* The second argument, machine_mode, of REAL_VALUE_ATOF tells the
934 desired precision of the binary result of decimal-to-binary
935 conversion. */
936 if (flags & CPP_N_HEX)
937 real = REAL_VALUE_HTOF (copy, TYPE_MODE (type));
938 else
939 real = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
941 /* A diagnostic is required for "soft" overflow by some ISO C
942 testsuites. This is not pedwarn, because some people don't want
943 an error for this.
944 ??? That's a dubious reason... is this a mandatory diagnostic or
945 isn't it? -- zw, 2001-08-21. */
946 if (REAL_VALUE_ISINF (real) && pedantic)
947 warning ("floating constant exceeds range of \"%s\"", typename);
949 /* Create a node with determined type and value. */
950 value = build_real (type, real);
951 if (flags & CPP_N_IMAGINARY)
952 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
954 return value;
957 static tree
958 lex_string (str, len, wide)
959 const unsigned char *str;
960 unsigned int len;
961 int wide;
963 tree value;
964 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
965 char *q = buf;
966 const unsigned char *p = str, *limit = str + len;
967 cppchar_t c;
969 #ifdef MULTIBYTE_CHARS
970 /* Reset multibyte conversion state. */
971 (void) local_mbtowc (NULL, NULL, 0);
972 #endif
974 while (p < limit)
976 #ifdef MULTIBYTE_CHARS
977 wchar_t wc;
978 int char_len;
980 char_len = local_mbtowc (&wc, (const char *) p, limit - p);
981 if (char_len == -1)
983 warning ("ignoring invalid multibyte character");
984 char_len = 1;
985 c = *p++;
987 else
989 p += char_len;
990 c = wc;
992 #else
993 c = *p++;
994 #endif
996 if (c == '\\' && !ignore_escape_flag)
997 c = cpp_parse_escape (parse_in, &p, limit, wide);
999 /* Add this single character into the buffer either as a wchar_t,
1000 a multibyte sequence, or as a single byte. */
1001 if (wide)
1003 unsigned charwidth = TYPE_PRECISION (char_type_node);
1004 unsigned bytemask = (1 << charwidth) - 1;
1005 int byte;
1007 for (byte = 0; byte < WCHAR_BYTES; ++byte)
1009 int n;
1010 if (byte >= (int) sizeof (c))
1011 n = 0;
1012 else
1013 n = (c >> (byte * charwidth)) & bytemask;
1014 if (BYTES_BIG_ENDIAN)
1015 q[WCHAR_BYTES - byte - 1] = n;
1016 else
1017 q[byte] = n;
1019 q += WCHAR_BYTES;
1021 #ifdef MULTIBYTE_CHARS
1022 else if (char_len > 1)
1024 /* We're dealing with a multibyte character. */
1025 for ( ; char_len >0; --char_len)
1027 *q++ = *(p - char_len);
1030 #endif
1031 else
1033 *q++ = c;
1037 /* Terminate the string value, either with a single byte zero
1038 or with a wide zero. */
1040 if (wide)
1042 memset (q, 0, WCHAR_BYTES);
1043 q += WCHAR_BYTES;
1045 else
1047 *q++ = '\0';
1050 value = build_string (q - buf, buf);
1052 if (wide)
1053 TREE_TYPE (value) = wchar_array_type_node;
1054 else
1055 TREE_TYPE (value) = char_array_type_node;
1056 return value;
1059 /* Converts a (possibly wide) character constant token into a tree. */
1060 static tree
1061 lex_charconst (token)
1062 const cpp_token *token;
1064 cppchar_t result;
1065 tree type, value;
1066 unsigned int chars_seen;
1067 int unsignedp;
1069 result = cpp_interpret_charconst (parse_in, token,
1070 &chars_seen, &unsignedp);
1072 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
1073 before possibly widening to HOST_WIDE_INT for build_int_2. */
1074 if (unsignedp || (cppchar_signed_t) result >= 0)
1075 value = build_int_2 (result, 0);
1076 else
1077 value = build_int_2 ((cppchar_signed_t) result, -1);
1079 if (token->type == CPP_WCHAR)
1080 type = wchar_type_node;
1081 /* In C, a character constant has type 'int'.
1082 In C++ 'char', but multi-char charconsts have type 'int'. */
1083 else if ((c_language == clk_c || c_language == clk_objective_c)
1084 || chars_seen > 1)
1085 type = integer_type_node;
1086 else
1087 type = char_type_node;
1089 TREE_TYPE (value) = type;
1090 return value;