2 Copyright (C) 1986, 1987, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
4 Contributed by Per Bothner, 1994-95.
5 Based on CCCP program by Paul Rubin, June 1986
6 Adapted to ANSI C, Richard Stallman, Jan 1987
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
27 #include "localedir.h"
29 static void init_library (void);
30 static void mark_named_operators (cpp_reader
*);
31 static void read_original_filename (cpp_reader
*);
32 static void read_original_directory (cpp_reader
*);
33 static void post_options (cpp_reader
*);
35 /* If we have designated initializers (GCC >2.7) these tables can be
36 initialized, constant data. Otherwise, they have to be filled in at
38 #if HAVE_DESIGNATED_INITIALIZERS
40 #define init_trigraph_map() /* Nothing. */
41 #define TRIGRAPH_MAP \
42 __extension__ const uchar _cpp_trigraph_map[UCHAR_MAX + 1] = {
45 #define s(p, v) [p] = v,
49 #define TRIGRAPH_MAP uchar _cpp_trigraph_map[UCHAR_MAX + 1] = { 0 }; \
50 static void init_trigraph_map (void) { \
51 unsigned char *x = _cpp_trigraph_map;
54 #define s(p, v) x[p] = v;
59 s('=', '#') s(')', ']') s('!', '|')
60 s('(', '[') s('\'', '^') s('>', '}')
61 s('/', '\\') s('<', '{') s('-', '~')
68 /* A set of booleans indicating what CPP features each source language
74 char extended_numbers
;
75 char extended_identifiers
;
77 char cplusplus_comments
;
82 static const struct lang_flags lang_defaults
[] =
83 { /* c99 c++ xnum xid std // digr ulit */
84 /* GNUC89 */ { 0, 0, 1, 0, 0, 1, 1, 0 },
85 /* GNUC99 */ { 1, 0, 1, 0, 0, 1, 1, 1 },
86 /* STDC89 */ { 0, 0, 0, 0, 1, 0, 0, 0 },
87 /* STDC94 */ { 0, 0, 0, 0, 1, 0, 1, 0 },
88 /* STDC99 */ { 1, 0, 1, 0, 1, 1, 1, 0 },
89 /* GNUCXX */ { 0, 1, 1, 0, 0, 1, 1, 0 },
90 /* CXX98 */ { 0, 1, 1, 0, 1, 1, 1, 0 },
91 /* GNUCXX0X */ { 1, 1, 1, 0, 0, 1, 1, 1 },
92 /* CXX0X */ { 1, 1, 1, 0, 1, 1, 1, 1 },
93 /* ASM */ { 0, 0, 1, 0, 0, 1, 0, 0 }
94 /* xid should be 1 for GNUC99, STDC99, GNUCXX, CXX98, GNUCXX0X, and
95 CXX0X when no longer experimental (when all uses of identifiers
96 in the compiler have been audited for correct handling of
97 extended identifiers). */
100 /* Sets internal flags correctly for a given language. */
102 cpp_set_lang (cpp_reader
*pfile
, enum c_lang lang
)
104 const struct lang_flags
*l
= &lang_defaults
[(int) lang
];
106 CPP_OPTION (pfile
, lang
) = lang
;
108 CPP_OPTION (pfile
, c99
) = l
->c99
;
109 CPP_OPTION (pfile
, cplusplus
) = l
->cplusplus
;
110 CPP_OPTION (pfile
, extended_numbers
) = l
->extended_numbers
;
111 CPP_OPTION (pfile
, extended_identifiers
) = l
->extended_identifiers
;
112 CPP_OPTION (pfile
, std
) = l
->std
;
113 CPP_OPTION (pfile
, trigraphs
) = l
->std
;
114 CPP_OPTION (pfile
, cplusplus_comments
) = l
->cplusplus_comments
;
115 CPP_OPTION (pfile
, digraphs
) = l
->digraphs
;
116 CPP_OPTION (pfile
, uliterals
) = l
->uliterals
;
119 /* Initialize library global state. */
123 static int initialized
= 0;
129 /* Set up the trigraph map. This doesn't need to do anything if
130 we were compiled with a compiler that supports C99 designated
132 init_trigraph_map ();
135 (void) bindtextdomain (PACKAGE
, LOCALEDIR
);
140 /* Initialize a cpp_reader structure. */
142 cpp_create_reader (enum c_lang lang
, hash_table
*table
,
143 struct line_maps
*line_table
)
147 /* Initialize this instance of the library if it hasn't been already. */
150 pfile
= XCNEW (cpp_reader
);
152 cpp_set_lang (pfile
, lang
);
153 CPP_OPTION (pfile
, warn_multichar
) = 1;
154 CPP_OPTION (pfile
, discard_comments
) = 1;
155 CPP_OPTION (pfile
, discard_comments_in_macro_exp
) = 1;
156 CPP_OPTION (pfile
, show_column
) = 1;
157 CPP_OPTION (pfile
, tabstop
) = 8;
158 CPP_OPTION (pfile
, operator_names
) = 1;
159 CPP_OPTION (pfile
, warn_trigraphs
) = 2;
160 CPP_OPTION (pfile
, warn_endif_labels
) = 1;
161 CPP_OPTION (pfile
, warn_deprecated
) = 1;
162 CPP_OPTION (pfile
, warn_long_long
) = !CPP_OPTION (pfile
, c99
);
163 CPP_OPTION (pfile
, dollars_in_ident
) = 1;
164 CPP_OPTION (pfile
, warn_dollars
) = 1;
165 CPP_OPTION (pfile
, warn_variadic_macros
) = 1;
166 CPP_OPTION (pfile
, warn_normalize
) = normalized_C
;
168 /* Default CPP arithmetic to something sensible for the host for the
169 benefit of dumb users like fix-header. */
170 CPP_OPTION (pfile
, precision
) = CHAR_BIT
* sizeof (long);
171 CPP_OPTION (pfile
, char_precision
) = CHAR_BIT
;
172 CPP_OPTION (pfile
, wchar_precision
) = CHAR_BIT
* sizeof (int);
173 CPP_OPTION (pfile
, int_precision
) = CHAR_BIT
* sizeof (int);
174 CPP_OPTION (pfile
, unsigned_char
) = 0;
175 CPP_OPTION (pfile
, unsigned_wchar
) = 1;
176 CPP_OPTION (pfile
, bytes_big_endian
) = 1; /* does not matter */
178 /* Default to no charset conversion. */
179 CPP_OPTION (pfile
, narrow_charset
) = _cpp_default_encoding ();
180 CPP_OPTION (pfile
, wide_charset
) = 0;
182 /* Default the input character set to UTF-8. */
183 CPP_OPTION (pfile
, input_charset
) = _cpp_default_encoding ();
185 /* A fake empty "directory" used as the starting point for files
186 looked up without a search path. Name cannot be '/' because we
187 don't want to prepend anything at all to filenames using it. All
188 other entries are correct zero-initialized. */
189 pfile
->no_search_path
.name
= (char *) "";
191 /* Initialize the line map. */
192 pfile
->line_table
= line_table
;
194 /* Initialize lexer state. */
195 pfile
->state
.save_comments
= ! CPP_OPTION (pfile
, discard_comments
);
197 /* Set up static tokens. */
198 pfile
->avoid_paste
.type
= CPP_PADDING
;
199 pfile
->avoid_paste
.val
.source
= NULL
;
200 pfile
->eof
.type
= CPP_EOF
;
201 pfile
->eof
.flags
= 0;
203 /* Create a token buffer for the lexer. */
204 _cpp_init_tokenrun (&pfile
->base_run
, 250);
205 pfile
->cur_run
= &pfile
->base_run
;
206 pfile
->cur_token
= pfile
->base_run
.base
;
208 /* Initialize the base context. */
209 pfile
->context
= &pfile
->base_context
;
210 pfile
->base_context
.macro
= 0;
211 pfile
->base_context
.prev
= pfile
->base_context
.next
= 0;
213 /* Aligned and unaligned storage. */
214 pfile
->a_buff
= _cpp_get_buff (pfile
, 0);
215 pfile
->u_buff
= _cpp_get_buff (pfile
, 0);
217 /* The expression parser stack. */
218 _cpp_expand_op_stack (pfile
);
220 /* Initialize the buffer obstack. */
221 _obstack_begin (&pfile
->buffer_ob
, 0, 0,
222 (void *(*) (long)) xmalloc
,
223 (void (*) (void *)) free
);
225 _cpp_init_files (pfile
);
227 _cpp_init_hashtable (pfile
, table
);
232 /* Set the line_table entry in PFILE. This is called after reading a
233 PCH file, as the old line_table will be incorrect. */
235 cpp_set_line_map (cpp_reader
*pfile
, struct line_maps
*line_table
)
237 pfile
->line_table
= line_table
;
240 /* Free resources used by PFILE. Accessing PFILE after this function
241 returns leads to undefined behavior. Returns the error count. */
243 cpp_destroy (cpp_reader
*pfile
)
245 cpp_context
*context
, *contextn
;
246 tokenrun
*run
, *runn
;
248 free (pfile
->op_stack
);
250 while (CPP_BUFFER (pfile
) != NULL
)
251 _cpp_pop_buffer (pfile
);
254 free (pfile
->out
.base
);
256 if (pfile
->macro_buffer
)
258 free (pfile
->macro_buffer
);
259 pfile
->macro_buffer
= NULL
;
260 pfile
->macro_buffer_len
= 0;
264 deps_free (pfile
->deps
);
265 obstack_free (&pfile
->buffer_ob
, 0);
267 _cpp_destroy_hashtable (pfile
);
268 _cpp_cleanup_files (pfile
);
269 _cpp_destroy_iconv (pfile
);
271 _cpp_free_buff (pfile
->a_buff
);
272 _cpp_free_buff (pfile
->u_buff
);
273 _cpp_free_buff (pfile
->free_buffs
);
275 for (run
= &pfile
->base_run
; run
; run
= runn
)
279 if (run
!= &pfile
->base_run
)
283 for (context
= pfile
->base_context
.next
; context
; context
= contextn
)
285 contextn
= context
->next
;
292 /* This structure defines one built-in identifier. A node will be
293 entered in the hash table under the name NAME, with value VALUE.
295 There are two tables of these. builtin_array holds all the
296 "builtin" macros: these are handled by builtin_macro() in
297 macro.c. Builtin is somewhat of a misnomer -- the property of
298 interest is that these macros require special code to compute their
299 expansions. The value is a "builtin_type" enumerator.
301 operator_array holds the C++ named operators. These are keywords
302 which act as aliases for punctuators. In C++, they cannot be
303 altered through #define, and #if recognizes them as operators. In
304 C, these are not entered into the hash table at all (but see
305 <iso646.h>). The value is a token-type enumerator. */
310 unsigned short value
;
313 #define B(n, t) { DSC(n), t }
314 static const struct builtin builtin_array
[] =
316 B("__TIMESTAMP__", BT_TIMESTAMP
),
317 B("__TIME__", BT_TIME
),
318 B("__DATE__", BT_DATE
),
319 B("__FILE__", BT_FILE
),
320 B("__BASE_FILE__", BT_BASE_FILE
),
321 B("__LINE__", BT_SPECLINE
),
322 B("__INCLUDE_LEVEL__", BT_INCLUDE_LEVEL
),
323 B("__COUNTER__", BT_COUNTER
),
324 /* Keep builtins not used for -traditional-cpp at the end, and
325 update init_builtins() if any more are added. */
326 B("_Pragma", BT_PRAGMA
),
327 B("__STDC__", BT_STDC
),
330 static const struct builtin operator_array
[] =
332 B("and", CPP_AND_AND
),
333 B("and_eq", CPP_AND_EQ
),
334 B("bitand", CPP_AND
),
336 B("compl", CPP_COMPL
),
338 B("not_eq", CPP_NOT_EQ
),
340 B("or_eq", CPP_OR_EQ
),
342 B("xor_eq", CPP_XOR_EQ
)
346 /* Mark the C++ named operators in the hash table. */
348 mark_named_operators (cpp_reader
*pfile
)
350 const struct builtin
*b
;
352 for (b
= operator_array
;
353 b
< (operator_array
+ ARRAY_SIZE (operator_array
));
356 cpp_hashnode
*hp
= cpp_lookup (pfile
, b
->name
, b
->len
);
357 hp
->flags
|= NODE_OPERATOR
;
358 hp
->is_directive
= 0;
359 hp
->directive_index
= b
->value
;
364 cpp_init_special_builtins (cpp_reader
*pfile
)
366 const struct builtin
*b
;
367 size_t n
= ARRAY_SIZE (builtin_array
);
369 if (CPP_OPTION (pfile
, traditional
))
371 else if (! CPP_OPTION (pfile
, stdc_0_in_system_headers
)
372 || CPP_OPTION (pfile
, std
))
375 for (b
= builtin_array
; b
< builtin_array
+ n
; b
++)
377 cpp_hashnode
*hp
= cpp_lookup (pfile
, b
->name
, b
->len
);
379 hp
->flags
|= NODE_BUILTIN
| NODE_WARN
;
380 hp
->value
.builtin
= (enum builtin_type
) b
->value
;
384 /* Read the builtins table above and enter them, and language-specific
385 macros, into the hash table. HOSTED is true if this is a hosted
388 cpp_init_builtins (cpp_reader
*pfile
, int hosted
)
390 cpp_init_special_builtins (pfile
);
392 if (!CPP_OPTION (pfile
, traditional
)
393 && (! CPP_OPTION (pfile
, stdc_0_in_system_headers
)
394 || CPP_OPTION (pfile
, std
)))
395 _cpp_define_builtin (pfile
, "__STDC__ 1");
397 if (CPP_OPTION (pfile
, cplusplus
))
398 _cpp_define_builtin (pfile
, "__cplusplus 1");
399 else if (CPP_OPTION (pfile
, lang
) == CLK_ASM
)
400 _cpp_define_builtin (pfile
, "__ASSEMBLER__ 1");
401 else if (CPP_OPTION (pfile
, lang
) == CLK_STDC94
)
402 _cpp_define_builtin (pfile
, "__STDC_VERSION__ 199409L");
403 else if (CPP_OPTION (pfile
, c99
))
404 _cpp_define_builtin (pfile
, "__STDC_VERSION__ 199901L");
407 _cpp_define_builtin (pfile
, "__STDC_HOSTED__ 1");
409 _cpp_define_builtin (pfile
, "__STDC_HOSTED__ 0");
411 if (CPP_OPTION (pfile
, objc
))
412 _cpp_define_builtin (pfile
, "__OBJC__ 1");
415 /* Sanity-checks are dependent on command-line options, so it is
416 called as a subroutine of cpp_read_main_file (). */
418 static void sanity_checks (cpp_reader
*);
419 static void sanity_checks (cpp_reader
*pfile
)
422 size_t max_precision
= 2 * CHAR_BIT
* sizeof (cpp_num_part
);
424 /* Sanity checks for assumptions about CPP arithmetic and target
425 type precisions made by cpplib. */
428 cpp_error (pfile
, CPP_DL_ICE
, "cppchar_t must be an unsigned type");
430 if (CPP_OPTION (pfile
, precision
) > max_precision
)
431 cpp_error (pfile
, CPP_DL_ICE
,
432 "preprocessor arithmetic has maximum precision of %lu bits;"
433 " target requires %lu bits",
434 (unsigned long) max_precision
,
435 (unsigned long) CPP_OPTION (pfile
, precision
));
437 if (CPP_OPTION (pfile
, precision
) < CPP_OPTION (pfile
, int_precision
))
438 cpp_error (pfile
, CPP_DL_ICE
,
439 "CPP arithmetic must be at least as precise as a target int");
441 if (CPP_OPTION (pfile
, char_precision
) < 8)
442 cpp_error (pfile
, CPP_DL_ICE
, "target char is less than 8 bits wide");
444 if (CPP_OPTION (pfile
, wchar_precision
) < CPP_OPTION (pfile
, char_precision
))
445 cpp_error (pfile
, CPP_DL_ICE
,
446 "target wchar_t is narrower than target char");
448 if (CPP_OPTION (pfile
, int_precision
) < CPP_OPTION (pfile
, char_precision
))
449 cpp_error (pfile
, CPP_DL_ICE
,
450 "target int is narrower than target char");
452 /* This is assumed in eval_token() and could be fixed if necessary. */
453 if (sizeof (cppchar_t
) > sizeof (cpp_num_part
))
454 cpp_error (pfile
, CPP_DL_ICE
,
455 "CPP half-integer narrower than CPP character");
457 if (CPP_OPTION (pfile
, wchar_precision
) > BITS_PER_CPPCHAR_T
)
458 cpp_error (pfile
, CPP_DL_ICE
,
459 "CPP on this host cannot handle wide character constants over"
460 " %lu bits, but the target requires %lu bits",
461 (unsigned long) BITS_PER_CPPCHAR_T
,
462 (unsigned long) CPP_OPTION (pfile
, wchar_precision
));
465 # define sanity_checks(PFILE)
468 /* This is called after options have been parsed, and partially
471 cpp_post_options (cpp_reader
*pfile
)
473 sanity_checks (pfile
);
475 post_options (pfile
);
477 /* Mark named operators before handling command line macros. */
478 if (CPP_OPTION (pfile
, cplusplus
) && CPP_OPTION (pfile
, operator_names
))
479 mark_named_operators (pfile
);
482 /* Setup for processing input from the file named FNAME, or stdin if
483 it is the empty string. Return the original filename
484 on success (e.g. foo.i->foo.c), or NULL on failure. */
486 cpp_read_main_file (cpp_reader
*pfile
, const char *fname
)
488 if (CPP_OPTION (pfile
, deps
.style
) != DEPS_NONE
)
491 pfile
->deps
= deps_init ();
493 /* Set the default target (if there is none already). */
494 deps_add_default_target (pfile
->deps
, fname
);
498 = _cpp_find_file (pfile
, fname
, &pfile
->no_search_path
, false, 0);
499 if (_cpp_find_failed (pfile
->main_file
))
502 _cpp_stack_file (pfile
, pfile
->main_file
, false);
504 /* For foo.i, read the original filename foo.c now, for the benefit
505 of the front ends. */
506 if (CPP_OPTION (pfile
, preprocessed
))
508 read_original_filename (pfile
);
509 fname
= pfile
->line_table
->maps
[pfile
->line_table
->used
-1].to_file
;
514 /* For preprocessed files, if the first tokens are of the form # NUM.
515 handle the directive so we know the original file name. This will
516 generate file_change callbacks, which the front ends must handle
517 appropriately given their state of initialization. */
519 read_original_filename (cpp_reader
*pfile
)
521 const cpp_token
*token
, *token1
;
523 /* Lex ahead; if the first tokens are of the form # NUM, then
524 process the directive, otherwise back up. */
525 token
= _cpp_lex_direct (pfile
);
526 if (token
->type
== CPP_HASH
)
528 pfile
->state
.in_directive
= 1;
529 token1
= _cpp_lex_direct (pfile
);
530 _cpp_backup_tokens (pfile
, 1);
531 pfile
->state
.in_directive
= 0;
533 /* If it's a #line directive, handle it. */
534 if (token1
->type
== CPP_NUMBER
)
536 _cpp_handle_directive (pfile
, token
->flags
& PREV_WHITE
);
537 read_original_directory (pfile
);
542 /* Backup as if nothing happened. */
543 _cpp_backup_tokens (pfile
, 1);
546 /* For preprocessed files, if the tokens following the first filename
547 line is of the form # <line> "/path/name//", handle the
548 directive so we know the original current directory. */
550 read_original_directory (cpp_reader
*pfile
)
552 const cpp_token
*hash
, *token
;
554 /* Lex ahead; if the first tokens are of the form # NUM, then
555 process the directive, otherwise back up. */
556 hash
= _cpp_lex_direct (pfile
);
557 if (hash
->type
!= CPP_HASH
)
559 _cpp_backup_tokens (pfile
, 1);
563 token
= _cpp_lex_direct (pfile
);
565 if (token
->type
!= CPP_NUMBER
)
567 _cpp_backup_tokens (pfile
, 2);
571 token
= _cpp_lex_direct (pfile
);
573 if (token
->type
!= CPP_STRING
574 || ! (token
->val
.str
.len
>= 5
575 && token
->val
.str
.text
[token
->val
.str
.len
-2] == '/'
576 && token
->val
.str
.text
[token
->val
.str
.len
-3] == '/'))
578 _cpp_backup_tokens (pfile
, 3);
582 if (pfile
->cb
.dir_change
)
584 char *debugdir
= (char *) alloca (token
->val
.str
.len
- 3);
586 memcpy (debugdir
, (const char *) token
->val
.str
.text
+ 1,
587 token
->val
.str
.len
- 4);
588 debugdir
[token
->val
.str
.len
- 4] = '\0';
590 pfile
->cb
.dir_change (pfile
, debugdir
);
594 /* This is called at the end of preprocessing. It pops the last
595 buffer and writes dependency output, and returns the number of
598 Maybe it should also reset state, such that you could call
599 cpp_start_read with a new filename to restart processing. */
601 cpp_finish (cpp_reader
*pfile
, FILE *deps_stream
)
603 /* Warn about unused macros before popping the final buffer. */
604 if (CPP_OPTION (pfile
, warn_unused_macros
))
605 cpp_forall_identifiers (pfile
, _cpp_warn_if_unused_macro
, NULL
);
607 /* lex.c leaves the final buffer on the stack. This it so that
608 it returns an unending stream of CPP_EOFs to the client. If we
609 popped the buffer, we'd dereference a NULL buffer pointer and
610 segfault. It's nice to allow the client to do worry-free excess
611 cpp_get_token calls. */
612 while (pfile
->buffer
)
613 _cpp_pop_buffer (pfile
);
615 /* Don't write the deps file if there are errors. */
616 if (CPP_OPTION (pfile
, deps
.style
) != DEPS_NONE
617 && deps_stream
&& pfile
->errors
== 0)
619 deps_write (pfile
->deps
, deps_stream
, 72);
621 if (CPP_OPTION (pfile
, deps
.phony_targets
))
622 deps_phony_targets (pfile
->deps
, deps_stream
);
625 /* Report on headers that could use multiple include guards. */
626 if (CPP_OPTION (pfile
, print_include_names
))
627 _cpp_report_missing_guards (pfile
);
629 return pfile
->errors
;
633 post_options (cpp_reader
*pfile
)
635 /* -Wtraditional is not useful in C++ mode. */
636 if (CPP_OPTION (pfile
, cplusplus
))
637 CPP_OPTION (pfile
, warn_traditional
) = 0;
639 /* Permanently disable macro expansion if we are rescanning
640 preprocessed text. Read preprocesed source in ISO mode. */
641 if (CPP_OPTION (pfile
, preprocessed
))
643 if (!CPP_OPTION (pfile
, directives_only
))
644 pfile
->state
.prevent_expansion
= 1;
645 CPP_OPTION (pfile
, traditional
) = 0;
648 if (CPP_OPTION (pfile
, warn_trigraphs
) == 2)
649 CPP_OPTION (pfile
, warn_trigraphs
) = !CPP_OPTION (pfile
, trigraphs
);
651 if (CPP_OPTION (pfile
, traditional
))
653 CPP_OPTION (pfile
, cplusplus_comments
) = 0;
655 /* Traditional CPP does not accurately track column information. */
656 CPP_OPTION (pfile
, show_column
) = 0;
657 CPP_OPTION (pfile
, trigraphs
) = 0;
658 CPP_OPTION (pfile
, warn_trigraphs
) = 0;