preliminary patch to fix http://lists.nongnu.org/archive/html/tinycc-devel/2015-04...
[tinycc.git] / tccpp.c
blob92f3368fe4279faa1fb5c73244e630db5430f4f0
1 /*
2 * TCC - Tiny C Compiler
3 *
4 * Copyright (c) 2001-2004 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "tcc.h"
23 /********************************************************/
24 /* global variables */
26 ST_DATA int tok_flags;
27 ST_DATA int parse_flags;
29 ST_DATA struct BufferedFile *file;
30 ST_DATA int ch, tok;
31 ST_DATA CValue tokc;
32 ST_DATA const int *macro_ptr;
33 ST_DATA CString tokcstr; /* current parsed string, if any */
35 /* display benchmark infos */
36 ST_DATA int total_lines;
37 ST_DATA int total_bytes;
38 ST_DATA int tok_ident;
39 ST_DATA TokenSym **table_ident;
41 /* ------------------------------------------------------------------------- */
43 static int *macro_ptr_allocated;
44 static const int *unget_saved_macro_ptr;
45 static int unget_saved_buffer[TOK_MAX_SIZE + 1];
46 static int unget_buffer_enabled;
47 static TokenSym *hash_ident[TOK_HASH_SIZE];
48 static char token_buf[STRING_MAX_SIZE + 1];
49 /* true if isid(c) || isnum(c) */
50 static unsigned char isidnum_table[256-CH_EOF];
52 static const char tcc_keywords[] =
53 #define DEF(id, str) str "\0"
54 #include "tcctok.h"
55 #undef DEF
58 /* WARNING: the content of this string encodes token numbers */
59 static const unsigned char tok_two_chars[] =
60 /* outdated -- gr
61 "<=\236>=\235!=\225&&\240||\241++\244--\242==\224<<\1>>\2+=\253"
62 "-=\255*=\252/=\257%=\245&=\246^=\336|=\374->\313..\250##\266";
63 */{
64 '<','=', TOK_LE,
65 '>','=', TOK_GE,
66 '!','=', TOK_NE,
67 '&','&', TOK_LAND,
68 '|','|', TOK_LOR,
69 '+','+', TOK_INC,
70 '-','-', TOK_DEC,
71 '=','=', TOK_EQ,
72 '<','<', TOK_SHL,
73 '>','>', TOK_SAR,
74 '+','=', TOK_A_ADD,
75 '-','=', TOK_A_SUB,
76 '*','=', TOK_A_MUL,
77 '/','=', TOK_A_DIV,
78 '%','=', TOK_A_MOD,
79 '&','=', TOK_A_AND,
80 '^','=', TOK_A_XOR,
81 '|','=', TOK_A_OR,
82 '-','>', TOK_ARROW,
83 '.','.', 0xa8, // C++ token ?
84 '#','#', TOK_TWOSHARPS,
85 '\\','#', TOK_QHASH,
89 struct macro_level {
90 struct macro_level *prev;
91 const int *p;
94 static void next_nomacro_spc(void);
95 static void macro_subst(
96 TokenString *tok_str,
97 Sym **nested_list,
98 const int *macro_str,
99 struct macro_level **can_read_stream
102 ST_FUNC void skip(int c)
104 if (tok != c)
105 tcc_error("'%c' expected (got \"%s\")", c, get_tok_str(tok, &tokc));
106 next();
109 ST_FUNC void expect(const char *msg)
111 tcc_error("%s expected", msg);
114 /* ------------------------------------------------------------------------- */
115 /* CString handling */
116 static void cstr_realloc(CString *cstr, int new_size)
118 int size;
119 void *data;
121 size = cstr->size_allocated;
122 if (size == 0)
123 size = 8; /* no need to allocate a too small first string */
124 while (size < new_size)
125 size = size * 2;
126 data = tcc_realloc(cstr->data_allocated, size);
127 cstr->data_allocated = data;
128 cstr->size_allocated = size;
129 cstr->data = data;
132 /* add a byte */
133 ST_FUNC void cstr_ccat(CString *cstr, int ch)
135 int size;
136 size = cstr->size + 1;
137 if (size > cstr->size_allocated)
138 cstr_realloc(cstr, size);
139 ((unsigned char *)cstr->data)[size - 1] = ch;
140 cstr->size = size;
143 ST_FUNC void cstr_cat(CString *cstr, const char *str)
145 int c;
146 for(;;) {
147 c = *str;
148 if (c == '\0')
149 break;
150 cstr_ccat(cstr, c);
151 str++;
155 /* add a wide char */
156 ST_FUNC void cstr_wccat(CString *cstr, int ch)
158 int size;
159 size = cstr->size + sizeof(nwchar_t);
160 if (size > cstr->size_allocated)
161 cstr_realloc(cstr, size);
162 *(nwchar_t *)(((unsigned char *)cstr->data) + size - sizeof(nwchar_t)) = ch;
163 cstr->size = size;
166 ST_FUNC void cstr_new(CString *cstr)
168 memset(cstr, 0, sizeof(CString));
171 /* free string and reset it to NULL */
172 ST_FUNC void cstr_free(CString *cstr)
174 tcc_free(cstr->data_allocated);
175 cstr_new(cstr);
178 /* reset string to empty */
179 ST_FUNC void cstr_reset(CString *cstr)
181 cstr->size = 0;
184 /* XXX: unicode ? */
185 static void add_char(CString *cstr, int c)
187 if (c == '\'' || c == '\"' || c == '\\') {
188 /* XXX: could be more precise if char or string */
189 cstr_ccat(cstr, '\\');
191 if (c >= 32 && c <= 126) {
192 cstr_ccat(cstr, c);
193 } else {
194 cstr_ccat(cstr, '\\');
195 if (c == '\n') {
196 cstr_ccat(cstr, 'n');
197 } else {
198 cstr_ccat(cstr, '0' + ((c >> 6) & 7));
199 cstr_ccat(cstr, '0' + ((c >> 3) & 7));
200 cstr_ccat(cstr, '0' + (c & 7));
205 /* ------------------------------------------------------------------------- */
206 /* allocate a new token */
207 static TokenSym *tok_alloc_new(TokenSym **pts, const char *str, int len)
209 TokenSym *ts, **ptable;
210 int i;
212 if (tok_ident >= SYM_FIRST_ANOM)
213 tcc_error("memory full (symbols)");
215 /* expand token table if needed */
216 i = tok_ident - TOK_IDENT;
217 if ((i % TOK_ALLOC_INCR) == 0) {
218 ptable = tcc_realloc(table_ident, (i + TOK_ALLOC_INCR) * sizeof(TokenSym *));
219 table_ident = ptable;
222 ts = tcc_malloc(sizeof(TokenSym) + len);
223 table_ident[i] = ts;
224 ts->tok = tok_ident++;
225 ts->sym_define = NULL;
226 ts->sym_label = NULL;
227 ts->sym_struct = NULL;
228 ts->sym_identifier = NULL;
229 ts->len = len;
230 ts->hash_next = NULL;
231 memcpy(ts->str, str, len);
232 ts->str[len] = '\0';
233 *pts = ts;
234 return ts;
237 #define TOK_HASH_INIT 1
238 #define TOK_HASH_FUNC(h, c) ((h) * 263 + (c))
240 /* find a token and add it if not found */
241 ST_FUNC TokenSym *tok_alloc(const char *str, int len)
243 TokenSym *ts, **pts;
244 int i;
245 unsigned int h;
247 h = TOK_HASH_INIT;
248 for(i=0;i<len;i++)
249 h = TOK_HASH_FUNC(h, ((unsigned char *)str)[i]);
250 h &= (TOK_HASH_SIZE - 1);
252 pts = &hash_ident[h];
253 for(;;) {
254 ts = *pts;
255 if (!ts)
256 break;
257 if (ts->len == len && !memcmp(ts->str, str, len))
258 return ts;
259 pts = &(ts->hash_next);
261 return tok_alloc_new(pts, str, len);
264 /* XXX: buffer overflow */
265 /* XXX: float tokens */
266 ST_FUNC char *get_tok_str(int v, CValue *cv)
268 static char buf[STRING_MAX_SIZE + 1];
269 static CString cstr_buf;
270 CString *cstr;
271 char *p;
272 int i, len;
274 /* NOTE: to go faster, we give a fixed buffer for small strings */
275 cstr_reset(&cstr_buf);
276 cstr_buf.data = buf;
277 cstr_buf.size_allocated = sizeof(buf);
278 p = buf;
280 /* just an explanation, should never happen:
281 if (v <= TOK_LINENUM && v >= TOK_CINT && cv == NULL)
282 tcc_error("internal error: get_tok_str"); */
284 switch(v) {
285 case TOK_CINT:
286 case TOK_CUINT:
287 /* XXX: not quite exact, but only useful for testing */
288 sprintf(p, "%u", cv->ui);
289 break;
290 case TOK_CLLONG:
291 case TOK_CULLONG:
292 /* XXX: not quite exact, but only useful for testing */
293 #ifdef _WIN32
294 sprintf(p, "%u", (unsigned)cv->ull);
295 #else
296 sprintf(p, "%llu", cv->ull);
297 #endif
298 break;
299 case TOK_LCHAR:
300 cstr_ccat(&cstr_buf, 'L');
301 case TOK_CCHAR:
302 cstr_ccat(&cstr_buf, '\'');
303 add_char(&cstr_buf, cv->i);
304 cstr_ccat(&cstr_buf, '\'');
305 cstr_ccat(&cstr_buf, '\0');
306 break;
307 case TOK_PPNUM:
308 cstr = cv->cstr;
309 len = cstr->size - 1;
310 for(i=0;i<len;i++)
311 add_char(&cstr_buf, ((unsigned char *)cstr->data)[i]);
312 cstr_ccat(&cstr_buf, '\0');
313 break;
314 case TOK_LSTR:
315 cstr_ccat(&cstr_buf, 'L');
316 case TOK_STR:
317 cstr = cv->cstr;
318 cstr_ccat(&cstr_buf, '\"');
319 if (v == TOK_STR) {
320 len = cstr->size - 1;
321 for(i=0;i<len;i++)
322 add_char(&cstr_buf, ((unsigned char *)cstr->data)[i]);
323 } else {
324 len = (cstr->size / sizeof(nwchar_t)) - 1;
325 for(i=0;i<len;i++)
326 add_char(&cstr_buf, ((nwchar_t *)cstr->data)[i]);
328 cstr_ccat(&cstr_buf, '\"');
329 cstr_ccat(&cstr_buf, '\0');
330 break;
332 case TOK_CFLOAT:
333 case TOK_CDOUBLE:
334 case TOK_CLDOUBLE:
335 case TOK_LINENUM:
336 return NULL; /* should not happen */
338 /* above tokens have value, the ones below don't */
340 case TOK_LT:
341 v = '<';
342 goto addv;
343 case TOK_GT:
344 v = '>';
345 goto addv;
346 case TOK_DOTS:
347 return strcpy(p, "...");
348 case TOK_A_SHL:
349 return strcpy(p, "<<=");
350 case TOK_A_SAR:
351 return strcpy(p, ">>=");
352 default:
353 if (v < TOK_IDENT) {
354 /* search in two bytes table */
355 const unsigned char *q = tok_two_chars;
356 while (*q) {
357 if (q[2] == v) {
358 *p++ = q[0];
359 *p++ = q[1];
360 *p = '\0';
361 return buf;
363 q += 3;
365 addv:
366 *p++ = v;
367 *p = '\0';
368 } else if (v < tok_ident) {
369 return table_ident[v - TOK_IDENT]->str;
370 } else if (v >= SYM_FIRST_ANOM) {
371 /* special name for anonymous symbol */
372 sprintf(p, "L.%u", v - SYM_FIRST_ANOM);
373 } else {
374 /* should never happen */
375 return NULL;
377 break;
379 return cstr_buf.data;
382 /* fill input buffer and peek next char */
383 static int tcc_peekc_slow(BufferedFile *bf)
385 int len;
386 /* only tries to read if really end of buffer */
387 if (bf->buf_ptr >= bf->buf_end) {
388 if (bf->fd != -1) {
389 #if defined(PARSE_DEBUG)
390 len = 1;
391 #else
392 len = IO_BUF_SIZE;
393 #endif
394 len = read(bf->fd, bf->buffer, len);
395 if (len < 0)
396 len = 0;
397 } else {
398 len = 0;
400 total_bytes += len;
401 bf->buf_ptr = bf->buffer;
402 bf->buf_end = bf->buffer + len;
403 *bf->buf_end = CH_EOB;
405 if (bf->buf_ptr < bf->buf_end) {
406 return bf->buf_ptr[0];
407 } else {
408 bf->buf_ptr = bf->buf_end;
409 return CH_EOF;
413 /* return the current character, handling end of block if necessary
414 (but not stray) */
415 ST_FUNC int handle_eob(void)
417 return tcc_peekc_slow(file);
420 /* read next char from current input file and handle end of input buffer */
421 ST_INLN void inp(void)
423 ch = *(++(file->buf_ptr));
424 /* end of buffer/file handling */
425 if (ch == CH_EOB)
426 ch = handle_eob();
429 /* handle '\[\r]\n' */
430 static int handle_stray_noerror(void)
432 while (ch == '\\') {
433 inp();
434 if (ch == '\n') {
435 file->line_num++;
436 inp();
437 } else if (ch == '\r') {
438 inp();
439 if (ch != '\n')
440 goto fail;
441 file->line_num++;
442 inp();
443 } else if (ch == '#') {
444 ch = TOK_QHASH;
445 } else {
446 fail:
447 return 1;
450 return 0;
453 static void handle_stray(void)
455 if (handle_stray_noerror())
456 tcc_error("stray '\\' in program");
459 /* skip the stray and handle the \\n case. Output an error if
460 incorrect char after the stray */
461 static int handle_stray1(uint8_t *p)
463 int c;
465 if (p >= file->buf_end) {
466 file->buf_ptr = p;
467 c = handle_eob();
468 p = file->buf_ptr;
469 if (c == '\\')
470 goto parse_stray;
471 } else {
472 parse_stray:
473 file->buf_ptr = p;
474 ch = *p;
475 handle_stray();
476 p = file->buf_ptr;
477 c = *p;
479 return c;
482 /* handle just the EOB case, but not stray */
483 #define PEEKC_EOB(c, p)\
485 p++;\
486 c = *p;\
487 if (c == '\\') {\
488 file->buf_ptr = p;\
489 c = handle_eob();\
490 p = file->buf_ptr;\
494 /* handle the complicated stray case */
495 #define PEEKC(c, p)\
497 p++;\
498 c = *p;\
499 if (c == '\\') {\
500 c = handle_stray1(p);\
501 p = file->buf_ptr;\
505 /* input with '\[\r]\n' handling. Note that this function cannot
506 handle other characters after '\', so you cannot call it inside
507 strings or comments */
508 ST_FUNC void minp(void)
510 inp();
511 if (ch == '\\')
512 handle_stray();
516 /* single line C++ comments */
517 static uint8_t *parse_line_comment(uint8_t *p)
519 int c;
521 p++;
522 for(;;) {
523 c = *p;
524 redo:
525 if (c == '\n' || c == CH_EOF) {
526 break;
527 } else if (c == '\\') {
528 file->buf_ptr = p;
529 c = handle_eob();
530 p = file->buf_ptr;
531 if (c == '\\') {
532 PEEKC_EOB(c, p);
533 if (c == '\n') {
534 file->line_num++;
535 PEEKC_EOB(c, p);
536 } else if (c == '\r') {
537 PEEKC_EOB(c, p);
538 if (c == '\n') {
539 file->line_num++;
540 PEEKC_EOB(c, p);
543 } else {
544 goto redo;
546 } else {
547 p++;
550 return p;
553 /* C comments */
554 ST_FUNC uint8_t *parse_comment(uint8_t *p)
556 int c;
558 p++;
559 for(;;) {
560 /* fast skip loop */
561 for(;;) {
562 c = *p;
563 if (c == '\n' || c == '*' || c == '\\')
564 break;
565 p++;
566 c = *p;
567 if (c == '\n' || c == '*' || c == '\\')
568 break;
569 p++;
571 /* now we can handle all the cases */
572 if (c == '\n') {
573 file->line_num++;
574 p++;
575 } else if (c == '*') {
576 p++;
577 for(;;) {
578 c = *p;
579 if (c == '*') {
580 p++;
581 } else if (c == '/') {
582 goto end_of_comment;
583 } else if (c == '\\') {
584 file->buf_ptr = p;
585 c = handle_eob();
586 p = file->buf_ptr;
587 if (c == '\\') {
588 /* skip '\[\r]\n', otherwise just skip the stray */
589 while (c == '\\') {
590 PEEKC_EOB(c, p);
591 if (c == '\n') {
592 file->line_num++;
593 PEEKC_EOB(c, p);
594 } else if (c == '\r') {
595 PEEKC_EOB(c, p);
596 if (c == '\n') {
597 file->line_num++;
598 PEEKC_EOB(c, p);
600 } else {
601 goto after_star;
605 } else {
606 break;
609 after_star: ;
610 } else {
611 /* stray, eob or eof */
612 file->buf_ptr = p;
613 c = handle_eob();
614 p = file->buf_ptr;
615 if (c == CH_EOF) {
616 tcc_error("unexpected end of file in comment");
617 } else if (c == '\\') {
618 p++;
622 end_of_comment:
623 p++;
624 return p;
627 #define cinp minp
629 static inline void skip_spaces(void)
631 while (is_space(ch))
632 cinp();
635 static inline int check_space(int t, int *spc)
637 if (is_space(t)) {
638 if (*spc)
639 return 1;
640 *spc = 1;
641 } else
642 *spc = 0;
643 return 0;
646 /* parse a string without interpreting escapes */
647 static uint8_t *parse_pp_string(uint8_t *p,
648 int sep, CString *str)
650 int c;
651 p++;
652 for(;;) {
653 c = *p;
654 if (c == sep) {
655 break;
656 } else if (c == '\\') {
657 file->buf_ptr = p;
658 c = handle_eob();
659 p = file->buf_ptr;
660 if (c == CH_EOF) {
661 unterminated_string:
662 /* XXX: indicate line number of start of string */
663 tcc_error("missing terminating %c character", sep);
664 } else if (c == '\\') {
665 /* escape : just skip \[\r]\n */
666 PEEKC_EOB(c, p);
667 if (c == '\n') {
668 file->line_num++;
669 p++;
670 } else if (c == '\r') {
671 PEEKC_EOB(c, p);
672 if (c != '\n')
673 expect("'\n' after '\r'");
674 file->line_num++;
675 p++;
676 } else if (c == CH_EOF) {
677 goto unterminated_string;
678 } else {
679 if (str) {
680 cstr_ccat(str, '\\');
681 cstr_ccat(str, c);
683 p++;
686 } else if (c == '\n') {
687 file->line_num++;
688 goto add_char;
689 } else if (c == '\r') {
690 PEEKC_EOB(c, p);
691 if (c != '\n') {
692 if (str)
693 cstr_ccat(str, '\r');
694 } else {
695 file->line_num++;
696 goto add_char;
698 } else {
699 add_char:
700 if (str)
701 cstr_ccat(str, c);
702 p++;
705 p++;
706 return p;
709 /* skip block of text until #else, #elif or #endif. skip also pairs of
710 #if/#endif */
711 static void preprocess_skip(void)
713 int a, start_of_line, c, in_warn_or_error;
714 uint8_t *p;
716 p = file->buf_ptr;
717 a = 0;
718 redo_start:
719 start_of_line = 1;
720 in_warn_or_error = 0;
721 for(;;) {
722 redo_no_start:
723 c = *p;
724 switch(c) {
725 case ' ':
726 case '\t':
727 case '\f':
728 case '\v':
729 case '\r':
730 p++;
731 goto redo_no_start;
732 case '\n':
733 file->line_num++;
734 p++;
735 goto redo_start;
736 case '\\':
737 file->buf_ptr = p;
738 c = handle_eob();
739 if (c == CH_EOF) {
740 expect("#endif");
741 } else if (c == '\\') {
742 ch = file->buf_ptr[0];
743 handle_stray_noerror();
745 p = file->buf_ptr;
746 goto redo_no_start;
747 /* skip strings */
748 case '\"':
749 case '\'':
750 if (in_warn_or_error)
751 goto _default;
752 p = parse_pp_string(p, c, NULL);
753 break;
754 /* skip comments */
755 case '/':
756 if (in_warn_or_error)
757 goto _default;
758 file->buf_ptr = p;
759 ch = *p;
760 minp();
761 p = file->buf_ptr;
762 if (ch == '*') {
763 p = parse_comment(p);
764 } else if (ch == '/') {
765 p = parse_line_comment(p);
767 break;
768 case '#':
769 p++;
770 if (start_of_line) {
771 file->buf_ptr = p;
772 next_nomacro();
773 p = file->buf_ptr;
774 if (a == 0 &&
775 (tok == TOK_ELSE || tok == TOK_ELIF || tok == TOK_ENDIF))
776 goto the_end;
777 if (tok == TOK_IF || tok == TOK_IFDEF || tok == TOK_IFNDEF)
778 a++;
779 else if (tok == TOK_ENDIF)
780 a--;
781 else if( tok == TOK_ERROR || tok == TOK_WARNING)
782 in_warn_or_error = 1;
783 else if (tok == TOK_LINEFEED)
784 goto redo_start;
785 else if (parse_flags & PARSE_FLAG_ASM_FILE)
786 p = parse_line_comment(p);
788 else if (parse_flags & PARSE_FLAG_ASM_FILE)
789 p = parse_line_comment(p);
790 break;
791 _default:
792 default:
793 p++;
794 break;
796 start_of_line = 0;
798 the_end: ;
799 file->buf_ptr = p;
802 /* ParseState handling */
804 /* XXX: currently, no include file info is stored. Thus, we cannot display
805 accurate messages if the function or data definition spans multiple
806 files */
808 /* save current parse state in 's' */
809 ST_FUNC void save_parse_state(ParseState *s)
811 s->line_num = file->line_num;
812 s->macro_ptr = macro_ptr;
813 s->tok = tok;
814 s->tokc = tokc;
817 /* restore parse state from 's' */
818 ST_FUNC void restore_parse_state(ParseState *s)
820 file->line_num = s->line_num;
821 macro_ptr = s->macro_ptr;
822 tok = s->tok;
823 tokc = s->tokc;
826 /* return the number of additional 'ints' necessary to store the
827 token */
828 static inline int tok_ext_size(int t)
830 switch(t) {
831 /* 4 bytes */
832 case TOK_CINT:
833 case TOK_CUINT:
834 case TOK_CCHAR:
835 case TOK_LCHAR:
836 case TOK_CFLOAT:
837 case TOK_LINENUM:
838 return 1;
839 case TOK_STR:
840 case TOK_LSTR:
841 case TOK_PPNUM:
842 tcc_error("unsupported token");
843 return 1;
844 case TOK_CDOUBLE:
845 case TOK_CLLONG:
846 case TOK_CULLONG:
847 return 2;
848 case TOK_CLDOUBLE:
849 return LDOUBLE_SIZE / 4;
850 default:
851 return 0;
855 /* token string handling */
857 ST_INLN void tok_str_new(TokenString *s)
859 s->str = NULL;
860 s->len = 0;
861 s->allocated_len = 0;
862 s->last_line_num = -1;
865 ST_FUNC void tok_str_free(int *str)
867 tcc_free(str);
870 static int *tok_str_realloc(TokenString *s)
872 int *str, len;
874 if (s->allocated_len == 0) {
875 len = 8;
876 } else {
877 len = s->allocated_len * 2;
879 str = tcc_realloc(s->str, len * sizeof(int));
880 s->allocated_len = len;
881 s->str = str;
882 return str;
885 ST_FUNC void tok_str_add(TokenString *s, int t)
887 int len, *str;
889 len = s->len;
890 str = s->str;
891 if (len >= s->allocated_len)
892 str = tok_str_realloc(s);
893 str[len++] = t;
894 s->len = len;
897 static void tok_str_add2(TokenString *s, int t, CValue *cv)
899 int len, *str;
901 len = s->len;
902 str = s->str;
904 /* allocate space for worst case */
905 if (len + TOK_MAX_SIZE > s->allocated_len)
906 str = tok_str_realloc(s);
907 str[len++] = t;
908 switch(t) {
909 case TOK_CINT:
910 case TOK_CUINT:
911 case TOK_CCHAR:
912 case TOK_LCHAR:
913 case TOK_CFLOAT:
914 case TOK_LINENUM:
915 str[len++] = cv->tab[0];
916 break;
917 case TOK_PPNUM:
918 case TOK_STR:
919 case TOK_LSTR:
921 int nb_words;
922 CString *cstr;
924 nb_words = (sizeof(CString) + cv->cstr->size + 3) >> 2;
925 while ((len + nb_words) > s->allocated_len)
926 str = tok_str_realloc(s);
927 cstr = (CString *)(str + len);
928 cstr->data = NULL;
929 cstr->size = cv->cstr->size;
930 cstr->data_allocated = NULL;
931 cstr->size_allocated = cstr->size;
932 memcpy((char *)cstr + sizeof(CString),
933 cv->cstr->data, cstr->size);
934 len += nb_words;
936 break;
937 case TOK_CDOUBLE:
938 case TOK_CLLONG:
939 case TOK_CULLONG:
940 #if LDOUBLE_SIZE == 8
941 case TOK_CLDOUBLE:
942 #endif
943 str[len++] = cv->tab[0];
944 str[len++] = cv->tab[1];
945 break;
946 #if LDOUBLE_SIZE == 12
947 case TOK_CLDOUBLE:
948 str[len++] = cv->tab[0];
949 str[len++] = cv->tab[1];
950 str[len++] = cv->tab[2];
951 #elif LDOUBLE_SIZE == 16
952 case TOK_CLDOUBLE:
953 str[len++] = cv->tab[0];
954 str[len++] = cv->tab[1];
955 str[len++] = cv->tab[2];
956 str[len++] = cv->tab[3];
957 #elif LDOUBLE_SIZE != 8
958 #error add long double size support
959 #endif
960 break;
961 default:
962 break;
964 s->len = len;
967 /* add the current parse token in token string 's' */
968 ST_FUNC void tok_str_add_tok(TokenString *s)
970 CValue cval;
972 /* save line number info */
973 if (file->line_num != s->last_line_num) {
974 s->last_line_num = file->line_num;
975 cval.i = s->last_line_num;
976 tok_str_add2(s, TOK_LINENUM, &cval);
978 tok_str_add2(s, tok, &tokc);
981 /* get a token from an integer array and increment pointer
982 accordingly. we code it as a macro to avoid pointer aliasing. */
983 static inline void TOK_GET(int *t, const int **pp, CValue *cv)
985 const int *p = *pp;
986 int n, *tab;
988 tab = cv->tab;
989 switch(*t = *p++) {
990 case TOK_CINT:
991 case TOK_CUINT:
992 case TOK_CCHAR:
993 case TOK_LCHAR:
994 case TOK_CFLOAT:
995 case TOK_LINENUM:
996 tab[0] = *p++;
997 break;
998 case TOK_STR:
999 case TOK_LSTR:
1000 case TOK_PPNUM:
1001 cv->cstr = (CString *)p;
1002 cv->cstr->data = (char *)p + sizeof(CString);
1003 p += (sizeof(CString) + cv->cstr->size + 3) >> 2;
1004 break;
1005 case TOK_CDOUBLE:
1006 case TOK_CLLONG:
1007 case TOK_CULLONG:
1008 n = 2;
1009 goto copy;
1010 case TOK_CLDOUBLE:
1011 #if LDOUBLE_SIZE == 16
1012 n = 4;
1013 #elif LDOUBLE_SIZE == 12
1014 n = 3;
1015 #elif LDOUBLE_SIZE == 8
1016 n = 2;
1017 #else
1018 # error add long double size support
1019 #endif
1020 copy:
1022 *tab++ = *p++;
1023 while (--n);
1024 break;
1025 default:
1026 break;
1028 *pp = p;
1031 static int macro_is_equal(const int *a, const int *b)
1033 char buf[STRING_MAX_SIZE + 1];
1034 CValue cv;
1035 int t;
1036 while (*a && *b) {
1037 TOK_GET(&t, &a, &cv);
1038 pstrcpy(buf, sizeof buf, get_tok_str(t, &cv));
1039 TOK_GET(&t, &b, &cv);
1040 if (strcmp(buf, get_tok_str(t, &cv)))
1041 return 0;
1043 return !(*a || *b);
1046 static void define_print(Sym *s, int is_undef)
1048 int c, t;
1049 CValue cval;
1050 const int *str;
1051 Sym *arg;
1053 if (tcc_state->dflag == 0 || !s || !tcc_state->ppfp)
1054 return;
1056 if (file) {
1057 c = file->line_num - file->line_ref - 1;
1058 if (c > 0) {
1059 while (c--)
1060 fputs("\n", tcc_state->ppfp);
1061 file->line_ref = file->line_num;
1065 if (is_undef) {
1066 fprintf(tcc_state->ppfp, "// #undef %s", get_tok_str(s->v, NULL));
1067 return;
1070 fprintf(tcc_state->ppfp, "// #define %s", get_tok_str(s->v, NULL));
1071 arg = s->next;
1072 if (arg) {
1073 char *sep = "(";
1074 while (arg) {
1075 fprintf(tcc_state->ppfp, "%s%s", sep, get_tok_str(arg->v & ~SYM_FIELD, NULL));
1076 sep = ",";
1077 arg = arg->next;
1079 fprintf(tcc_state->ppfp, ")");
1082 str = s->d;
1083 if (str)
1084 fprintf(tcc_state->ppfp, " ");
1086 while (str) {
1087 TOK_GET(&t, &str, &cval);
1088 if (!t)
1089 break;
1090 fprintf(tcc_state->ppfp, "%s", get_tok_str(t, &cval));
1094 /* defines handling */
1095 ST_INLN void define_push(int v, int macro_type, int *str, Sym *first_arg)
1097 Sym *s;
1099 s = define_find(v);
1100 if (s && !macro_is_equal(s->d, str))
1101 tcc_warning("%s redefined", get_tok_str(v, NULL));
1103 s = sym_push2(&define_stack, v, macro_type, 0);
1104 s->d = str;
1105 s->next = first_arg;
1106 table_ident[v - TOK_IDENT]->sym_define = s;
1107 define_print(s, 0);
1110 /* undefined a define symbol. Its name is just set to zero */
1111 ST_FUNC void define_undef(Sym *s)
1113 int v;
1114 v = s->v;
1115 if (v >= TOK_IDENT && v < tok_ident) {
1116 define_print(s, 1);
1117 table_ident[v - TOK_IDENT]->sym_define = NULL;
1121 ST_INLN Sym *define_find(int v)
1123 v -= TOK_IDENT;
1124 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1125 return NULL;
1126 return table_ident[v]->sym_define;
1129 /* free define stack until top reaches 'b' */
1130 ST_FUNC void free_defines(Sym *b)
1132 Sym *top, *top1;
1133 int v;
1135 top = define_stack;
1136 while (top != b) {
1137 top1 = top->prev;
1138 /* do not free args or predefined defines */
1139 if (top->d)
1140 tok_str_free(top->d);
1141 v = top->v;
1142 if (v >= TOK_IDENT && v < tok_ident)
1143 table_ident[v - TOK_IDENT]->sym_define = NULL;
1144 sym_free(top);
1145 top = top1;
1147 define_stack = b;
1150 ST_FUNC void print_defines(void)
1152 Sym *top, *s;
1153 int v;
1155 top = define_stack;
1156 while (top) {
1157 v = top->v;
1158 if (v >= TOK_IDENT && v < tok_ident) {
1159 s = table_ident[v - TOK_IDENT]->sym_define;
1160 define_print(s, 0);
1162 top = top->prev;
1166 /* label lookup */
1167 ST_FUNC Sym *label_find(int v)
1169 v -= TOK_IDENT;
1170 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1171 return NULL;
1172 return table_ident[v]->sym_label;
1175 ST_FUNC Sym *label_push(Sym **ptop, int v, int flags)
1177 Sym *s, **ps;
1178 s = sym_push2(ptop, v, 0, 0);
1179 s->r = flags;
1180 ps = &table_ident[v - TOK_IDENT]->sym_label;
1181 if (ptop == &global_label_stack) {
1182 /* modify the top most local identifier, so that
1183 sym_identifier will point to 's' when popped */
1184 while (*ps != NULL)
1185 ps = &(*ps)->prev_tok;
1187 s->prev_tok = *ps;
1188 *ps = s;
1189 return s;
1192 /* pop labels until element last is reached. Look if any labels are
1193 undefined. Define symbols if '&&label' was used. */
1194 ST_FUNC void label_pop(Sym **ptop, Sym *slast)
1196 Sym *s, *s1;
1197 for(s = *ptop; s != slast; s = s1) {
1198 s1 = s->prev;
1199 if (s->r == LABEL_DECLARED) {
1200 tcc_warning("label '%s' declared but not used", get_tok_str(s->v, NULL));
1201 } else if (s->r == LABEL_FORWARD) {
1202 tcc_error("label '%s' used but not defined",
1203 get_tok_str(s->v, NULL));
1204 } else {
1205 if (s->c) {
1206 /* define corresponding symbol. A size of
1207 1 is put. */
1208 put_extern_sym(s, cur_text_section, s->jnext, 1);
1211 /* remove label */
1212 table_ident[s->v - TOK_IDENT]->sym_label = s->prev_tok;
1213 sym_free(s);
1215 *ptop = slast;
1218 /* eval an expression for #if/#elif */
1219 static int expr_preprocess(void)
1221 int c, t;
1222 TokenString str;
1224 tok_str_new(&str);
1225 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1226 next(); /* do macro subst */
1227 if (tok == TOK_DEFINED) {
1228 next_nomacro();
1229 t = tok;
1230 if (t == '(')
1231 next_nomacro();
1232 c = define_find(tok) != 0;
1233 if (t == '(')
1234 next_nomacro();
1235 tok = TOK_CINT;
1236 tokc.i = c;
1237 } else if (tok >= TOK_IDENT) {
1238 /* if undefined macro */
1239 tok = TOK_CINT;
1240 tokc.i = 0;
1242 tok_str_add_tok(&str);
1244 tok_str_add(&str, -1); /* simulate end of file */
1245 tok_str_add(&str, 0);
1246 /* now evaluate C constant expression */
1247 macro_ptr = str.str;
1248 next();
1249 c = expr_const();
1250 macro_ptr = NULL;
1251 tok_str_free(str.str);
1252 return c != 0;
1255 /* parse after #define */
1256 ST_FUNC void parse_define(void)
1258 Sym *s, *first, **ps;
1259 int v, t, varg, is_vaargs, spc, ptok, macro_list_start;
1260 TokenString str;
1262 v = tok;
1263 if (v < TOK_IDENT)
1264 tcc_error("invalid macro name '%s'", get_tok_str(tok, &tokc));
1265 /* XXX: should check if same macro (ANSI) */
1266 first = NULL;
1267 t = MACRO_OBJ;
1268 /* '(' must be just after macro definition for MACRO_FUNC */
1269 next_nomacro_spc();
1270 if (tok == '(') {
1271 next_nomacro();
1272 ps = &first;
1273 while (tok != ')') {
1274 varg = tok;
1275 next_nomacro();
1276 is_vaargs = 0;
1277 if (varg == TOK_DOTS) {
1278 varg = TOK___VA_ARGS__;
1279 is_vaargs = 1;
1280 } else if (tok == TOK_DOTS && gnu_ext) {
1281 is_vaargs = 1;
1282 next_nomacro();
1284 if (varg < TOK_IDENT)
1285 tcc_error( "\'%s\' may not appear in parameter list", get_tok_str(varg, NULL));
1286 s = sym_push2(&define_stack, varg | SYM_FIELD, is_vaargs, 0);
1287 *ps = s;
1288 ps = &s->next;
1289 if (tok != ',')
1290 continue;
1291 next_nomacro();
1293 next_nomacro_spc();
1294 t = MACRO_FUNC;
1296 tok_str_new(&str);
1297 spc = 2;
1298 /* EOF testing necessary for '-D' handling */
1299 ptok = 0;
1300 macro_list_start = 1;
1301 int old_parse_flags = parse_flags;
1302 parse_flags |= PARSE_FLAG_ACCEPT_STRAYS;
1303 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1304 if (!macro_list_start && spc == 2 && tok == TOK_TWOSHARPS)
1305 tcc_error("'##' invalid at start of macro");
1306 ptok = tok;
1307 /* remove spaces around ## and after '#' */
1308 if (TOK_TWOSHARPS == tok) {
1309 if (1 == spc)
1310 --str.len;
1311 spc = 2;
1312 } else if ('#' == tok) {
1313 spc = 3;
1314 } else if (check_space(tok, &spc)) {
1315 goto skip;
1317 tok_str_add2(&str, tok, &tokc);
1318 skip: {
1319 next_nomacro_spc();
1320 macro_list_start = 0;
1323 parse_flags = old_parse_flags;
1324 if (ptok == TOK_TWOSHARPS)
1325 tcc_error("'##' invalid at end of macro");
1326 if (spc == 1)
1327 --str.len; /* remove trailing space */
1328 tok_str_add(&str, 0);
1329 define_push(v, t, str.str, first);
1332 static inline int hash_cached_include(const char *filename)
1334 const unsigned char *s;
1335 unsigned int h;
1337 h = TOK_HASH_INIT;
1338 s = (unsigned char *) filename;
1339 while (*s) {
1340 h = TOK_HASH_FUNC(h, *s);
1341 s++;
1343 h &= (CACHED_INCLUDES_HASH_SIZE - 1);
1344 return h;
1347 static CachedInclude *search_cached_include(TCCState *s1, const char *filename)
1349 CachedInclude *e;
1350 int i, h;
1351 h = hash_cached_include(filename);
1352 i = s1->cached_includes_hash[h];
1353 for(;;) {
1354 if (i == 0)
1355 break;
1356 e = s1->cached_includes[i - 1];
1357 if (0 == PATHCMP(e->filename, filename))
1358 return e;
1359 i = e->hash_next;
1361 return NULL;
1364 static inline void add_cached_include(TCCState *s1, const char *filename, int ifndef_macro)
1366 CachedInclude *e;
1367 int h;
1369 if (search_cached_include(s1, filename))
1370 return;
1371 #ifdef INC_DEBUG
1372 printf("adding cached '%s' %s\n", filename, get_tok_str(ifndef_macro, NULL));
1373 #endif
1374 e = tcc_malloc(sizeof(CachedInclude) + strlen(filename));
1375 strcpy(e->filename, filename);
1376 e->ifndef_macro = ifndef_macro;
1377 dynarray_add((void ***)&s1->cached_includes, &s1->nb_cached_includes, e);
1378 /* add in hash table */
1379 h = hash_cached_include(filename);
1380 e->hash_next = s1->cached_includes_hash[h];
1381 s1->cached_includes_hash[h] = s1->nb_cached_includes;
1384 static void pragma_parse(TCCState *s1)
1386 next_nomacro();
1387 if (tok == TOK_pack) {
1389 This may be:
1390 #pragma pack(1) // set
1391 #pragma pack() // reset to default
1392 #pragma pack(push,1) // push & set
1393 #pragma pack(pop) // restore previous
1395 next();
1396 skip('(');
1397 if (tok == TOK_ASM_pop) {
1398 next();
1399 if (s1->pack_stack_ptr <= s1->pack_stack) {
1400 stk_error:
1401 tcc_error("out of pack stack");
1403 s1->pack_stack_ptr--;
1404 } else {
1405 int val = 0;
1406 if (tok != ')') {
1407 if (tok == TOK_ASM_push) {
1408 next();
1409 if (s1->pack_stack_ptr >= s1->pack_stack + PACK_STACK_SIZE - 1)
1410 goto stk_error;
1411 s1->pack_stack_ptr++;
1412 skip(',');
1414 if (tok != TOK_CINT)
1415 goto pragma_err;
1416 val = tokc.i;
1417 if (val < 1 || val > 16 || (val & (val - 1)) != 0)
1418 goto pragma_err;
1419 next();
1421 *s1->pack_stack_ptr = val;
1423 if (tok != ')')
1424 goto pragma_err;
1425 } else if (tok == TOK_comment) {
1426 if (s1->ms_extensions) {
1427 next();
1428 skip('(');
1429 if (tok == TOK_lib) {
1430 next();
1431 skip(',');
1432 if (tok != TOK_STR) {
1433 tcc_error("invalid library specification");
1434 } else {
1435 int len = strlen((char *)tokc.cstr->data);
1436 char *file = tcc_malloc(len + 4); /* filetype, "-l", and \0 at the end */
1437 file[0] = TCC_FILETYPE_BINARY;
1438 file[1] = '-';
1439 file[2] = 'l';
1440 strcpy(&file[3],(char *)tokc.cstr->data);
1441 dynarray_add((void ***)&s1->files, &s1->nb_files, file);
1443 next();
1444 tok = TOK_LINEFEED;
1445 } else {
1446 tcc_warning("unknown specifier '%s' in #pragma comment", get_tok_str(tok, &tokc));
1448 } else {
1449 tcc_warning("#pragma comment(lib) is ignored");
1451 } else if (tok == TOK_push_macro || tok == TOK_pop_macro) {
1452 int t = tok, v;
1453 Sym *s;
1455 if (next_nomacro(), tok != '(')
1456 goto pragma_err;
1457 if (next_nomacro(), tok != TOK_STR)
1458 goto pragma_err;
1459 v = tok_alloc(tokc.cstr->data, tokc.cstr->size - 1)->tok;
1460 if (next_nomacro(), tok != ')')
1461 goto pragma_err;
1462 if (t == TOK_push_macro) {
1463 while (NULL == (s = define_find(v)))
1464 define_push(v, 0, NULL, NULL);
1465 s->type.ref = s; /* set push boundary */
1466 } else {
1467 for (s = define_stack; s; s = s->prev)
1468 if (s->v == v && s->type.ref == s) {
1469 s->type.ref = NULL;
1470 break;
1473 if (s)
1474 table_ident[v - TOK_IDENT]->sym_define = s->d ? s : NULL;
1475 else
1476 tcc_warning("unbalanced #pragma pop_macro");
1478 /* print info when tcc is called with "-E -dD" switches */
1479 if (s1->dflag && s1->ppfp) {
1480 if (file) {
1481 int c = file->line_num - file->line_ref - 1;
1482 if (c > 0) {
1483 while (c--)
1484 fputs("\n", tcc_state->ppfp);
1485 file->line_ref = file->line_num;
1488 fprintf(s1->ppfp, "// #pragma %s_macro(\"%s\")",
1489 (t == TOK_push_macro) ? "push" : "pop",
1490 get_tok_str(v, NULL));
1492 } else if (tok == TOK_once) {
1493 add_cached_include(s1, file->filename, TOK_once);
1494 } else {
1495 tcc_warning("unknown #pragma %s", get_tok_str(tok, &tokc));
1497 return;
1498 pragma_err:
1499 tcc_error("malformed #pragma directive");
1502 /* is_bof is true if first non space token at beginning of file */
1503 ST_FUNC void preprocess(int is_bof)
1505 TCCState *s1 = tcc_state;
1506 int i, c, n, saved_parse_flags;
1507 char buf[1024], *q;
1508 Sym *s;
1510 saved_parse_flags = parse_flags;
1511 parse_flags = PARSE_FLAG_PREPROCESS | PARSE_FLAG_TOK_NUM |
1512 PARSE_FLAG_LINEFEED;
1513 parse_flags |= (saved_parse_flags & PARSE_FLAG_ASM_FILE);
1514 next_nomacro();
1515 redo:
1516 switch(tok) {
1517 case TOK_DEFINE:
1518 next_nomacro();
1519 parse_define();
1520 break;
1521 case TOK_UNDEF:
1522 next_nomacro();
1523 s = define_find(tok);
1524 /* undefine symbol by putting an invalid name */
1525 if (s)
1526 define_undef(s);
1527 break;
1528 case TOK_INCLUDE:
1529 case TOK_INCLUDE_NEXT:
1530 ch = file->buf_ptr[0];
1531 /* XXX: incorrect if comments : use next_nomacro with a special mode */
1532 skip_spaces();
1533 if (ch == '<') {
1534 c = '>';
1535 goto read_name;
1536 } else if (ch == '\"') {
1537 c = ch;
1538 read_name:
1539 inp();
1540 q = buf;
1541 while (ch != c && ch != '\n' && ch != CH_EOF) {
1542 if ((q - buf) < sizeof(buf) - 1)
1543 *q++ = ch;
1544 if (ch == '\\') {
1545 if (handle_stray_noerror() == 0)
1546 --q;
1547 } else
1548 inp();
1550 *q = '\0';
1551 minp();
1552 #if 0
1553 /* eat all spaces and comments after include */
1554 /* XXX: slightly incorrect */
1555 while (ch1 != '\n' && ch1 != CH_EOF)
1556 inp();
1557 #endif
1558 } else {
1559 /* computed #include : either we have only strings or
1560 we have anything enclosed in '<>' */
1561 next();
1562 buf[0] = '\0';
1563 if (tok == TOK_STR) {
1564 while (tok != TOK_LINEFEED) {
1565 if (tok != TOK_STR) {
1566 include_syntax:
1567 tcc_error("'#include' expects \"FILENAME\" or <FILENAME>");
1569 pstrcat(buf, sizeof(buf), (char *)tokc.cstr->data);
1570 next();
1572 c = '\"';
1573 } else {
1574 int len;
1575 while (tok != TOK_LINEFEED) {
1576 pstrcat(buf, sizeof(buf), get_tok_str(tok, &tokc));
1577 next();
1579 len = strlen(buf);
1580 /* check syntax and remove '<>' */
1581 if (len < 2 || buf[0] != '<' || buf[len - 1] != '>')
1582 goto include_syntax;
1583 memmove(buf, buf + 1, len - 2);
1584 buf[len - 2] = '\0';
1585 c = '>';
1589 if (s1->include_stack_ptr >= s1->include_stack + INCLUDE_STACK_SIZE)
1590 tcc_error("#include recursion too deep");
1591 /* store current file in stack, but increment stack later below */
1592 *s1->include_stack_ptr = file;
1594 n = s1->nb_include_paths + s1->nb_sysinclude_paths;
1595 for (i = -2; i < n; ++i) {
1596 char buf1[sizeof file->filename];
1597 CachedInclude *e;
1598 BufferedFile **f;
1599 const char *path;
1601 if (i == -2) {
1602 /* check absolute include path */
1603 if (!IS_ABSPATH(buf))
1604 continue;
1605 buf1[0] = 0;
1606 i = n; /* force end loop */
1608 } else if (i == -1) {
1609 /* search in current dir if "header.h" */
1610 if (c != '\"')
1611 continue;
1612 path = file->filename;
1613 pstrncpy(buf1, path, tcc_basename(path) - path);
1615 } else {
1616 /* search in all the include paths */
1617 if (i < s1->nb_include_paths)
1618 path = s1->include_paths[i];
1619 else
1620 path = s1->sysinclude_paths[i - s1->nb_include_paths];
1621 pstrcpy(buf1, sizeof(buf1), path);
1622 pstrcat(buf1, sizeof(buf1), "/");
1625 pstrcat(buf1, sizeof(buf1), buf);
1627 if (tok == TOK_INCLUDE_NEXT)
1628 for (f = s1->include_stack_ptr; f >= s1->include_stack; --f)
1629 if (0 == PATHCMP((*f)->filename, buf1)) {
1630 #ifdef INC_DEBUG
1631 printf("%s: #include_next skipping %s\n", file->filename, buf1);
1632 #endif
1633 goto include_trynext;
1636 e = search_cached_include(s1, buf1);
1637 if (e && (define_find(e->ifndef_macro) || e->ifndef_macro == TOK_once)) {
1638 /* no need to parse the include because the 'ifndef macro'
1639 is defined */
1640 #ifdef INC_DEBUG
1641 printf("%s: skipping cached %s\n", file->filename, buf1);
1642 #endif
1643 goto include_done;
1646 if (tcc_open(s1, buf1) < 0)
1647 include_trynext:
1648 continue;
1650 #ifdef INC_DEBUG
1651 printf("%s: including %s\n", file->prev->filename, file->filename);
1652 #endif
1653 /* update target deps */
1654 dynarray_add((void ***)&s1->target_deps, &s1->nb_target_deps,
1655 tcc_strdup(buf1));
1656 /* push current file in stack */
1657 ++s1->include_stack_ptr;
1658 /* add include file debug info */
1659 if (s1->do_debug)
1660 put_stabs(file->filename, N_BINCL, 0, 0, 0);
1661 tok_flags |= TOK_FLAG_BOF | TOK_FLAG_BOL;
1662 ch = file->buf_ptr[0];
1663 goto the_end;
1665 tcc_error("include file '%s' not found", buf);
1666 include_done:
1667 break;
1668 case TOK_IFNDEF:
1669 c = 1;
1670 goto do_ifdef;
1671 case TOK_IF:
1672 c = expr_preprocess();
1673 goto do_if;
1674 case TOK_IFDEF:
1675 c = 0;
1676 do_ifdef:
1677 next_nomacro();
1678 if (tok < TOK_IDENT)
1679 tcc_error("invalid argument for '#if%sdef'", c ? "n" : "");
1680 if (is_bof) {
1681 if (c) {
1682 #ifdef INC_DEBUG
1683 printf("#ifndef %s\n", get_tok_str(tok, NULL));
1684 #endif
1685 file->ifndef_macro = tok;
1688 c = (define_find(tok) != 0) ^ c;
1689 do_if:
1690 if (s1->ifdef_stack_ptr >= s1->ifdef_stack + IFDEF_STACK_SIZE)
1691 tcc_error("memory full (ifdef)");
1692 *s1->ifdef_stack_ptr++ = c;
1693 goto test_skip;
1694 case TOK_ELSE:
1695 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1696 tcc_error("#else without matching #if");
1697 if (s1->ifdef_stack_ptr[-1] & 2)
1698 tcc_error("#else after #else");
1699 c = (s1->ifdef_stack_ptr[-1] ^= 3);
1700 goto test_else;
1701 case TOK_ELIF:
1702 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1703 tcc_error("#elif without matching #if");
1704 c = s1->ifdef_stack_ptr[-1];
1705 if (c > 1)
1706 tcc_error("#elif after #else");
1707 /* last #if/#elif expression was true: we skip */
1708 if (c == 1)
1709 goto skip;
1710 c = expr_preprocess();
1711 s1->ifdef_stack_ptr[-1] = c;
1712 test_else:
1713 if (s1->ifdef_stack_ptr == file->ifdef_stack_ptr + 1)
1714 file->ifndef_macro = 0;
1715 test_skip:
1716 if (!(c & 1)) {
1717 skip:
1718 preprocess_skip();
1719 is_bof = 0;
1720 goto redo;
1722 break;
1723 case TOK_ENDIF:
1724 if (s1->ifdef_stack_ptr <= file->ifdef_stack_ptr)
1725 tcc_error("#endif without matching #if");
1726 s1->ifdef_stack_ptr--;
1727 /* '#ifndef macro' was at the start of file. Now we check if
1728 an '#endif' is exactly at the end of file */
1729 if (file->ifndef_macro &&
1730 s1->ifdef_stack_ptr == file->ifdef_stack_ptr) {
1731 file->ifndef_macro_saved = file->ifndef_macro;
1732 /* need to set to zero to avoid false matches if another
1733 #ifndef at middle of file */
1734 file->ifndef_macro = 0;
1735 while (tok != TOK_LINEFEED)
1736 next_nomacro();
1737 tok_flags |= TOK_FLAG_ENDIF;
1738 goto the_end;
1740 break;
1741 case TOK_LINE:
1742 next();
1743 if (tok != TOK_CINT)
1744 tcc_error("A #line format is wrong");
1745 case TOK_PPNUM:
1746 if (tok != TOK_CINT) {
1747 char *p = tokc.cstr->data;
1748 tokc.i = strtoul(p, (char **)&p, 10);
1750 i = file->line_num;
1751 file->line_num = tokc.i - 1;
1752 next();
1753 if (tok != TOK_LINEFEED) {
1754 if (tok != TOK_STR) {
1755 if ((parse_flags & PARSE_FLAG_ASM_FILE) == 0) {
1756 file->line_num = i;
1757 tcc_error("#line format is wrong");
1759 break;
1761 pstrcpy(file->filename, sizeof(file->filename),
1762 (char *)tokc.cstr->data);
1764 if (s1->do_debug)
1765 put_stabs(file->filename, N_BINCL, 0, 0, 0);
1766 break;
1767 case TOK_ERROR:
1768 case TOK_WARNING:
1769 c = tok;
1770 ch = file->buf_ptr[0];
1771 skip_spaces();
1772 q = buf;
1773 while (ch != '\n' && ch != CH_EOF) {
1774 if ((q - buf) < sizeof(buf) - 1)
1775 *q++ = ch;
1776 if (ch == '\\') {
1777 if (handle_stray_noerror() == 0)
1778 --q;
1779 } else
1780 inp();
1782 *q = '\0';
1783 if (c == TOK_ERROR)
1784 tcc_error("#error %s", buf);
1785 else
1786 tcc_warning("#warning %s", buf);
1787 break;
1788 case TOK_PRAGMA:
1789 pragma_parse(s1);
1790 break;
1791 default:
1792 if (tok == TOK_LINEFEED || tok == '!' || tok == TOK_PPNUM) {
1793 /* '!' is ignored to allow C scripts. numbers are ignored
1794 to emulate cpp behaviour */
1795 } else {
1796 if (!(parse_flags & PARSE_FLAG_ASM_FILE))
1797 tcc_warning("Ignoring unknown preprocessing directive #%s", get_tok_str(tok, &tokc));
1798 else {
1799 /* this is a gas line comment in an 'S' file. */
1800 file->buf_ptr = parse_line_comment(file->buf_ptr);
1801 goto the_end;
1804 break;
1806 /* ignore other preprocess commands or #! for C scripts */
1807 while (tok != TOK_LINEFEED)
1808 next_nomacro();
1809 the_end:
1810 parse_flags = saved_parse_flags;
1813 /* evaluate escape codes in a string. */
1814 static void parse_escape_string(CString *outstr, const uint8_t *buf, int is_long)
1816 int c, n;
1817 const uint8_t *p;
1819 p = buf;
1820 for(;;) {
1821 c = *p;
1822 if (c == '\0')
1823 break;
1824 if (c == '\\') {
1825 p++;
1826 /* escape */
1827 c = *p;
1828 switch(c) {
1829 case '0': case '1': case '2': case '3':
1830 case '4': case '5': case '6': case '7':
1831 /* at most three octal digits */
1832 n = c - '0';
1833 p++;
1834 c = *p;
1835 if (isoct(c)) {
1836 n = n * 8 + c - '0';
1837 p++;
1838 c = *p;
1839 if (isoct(c)) {
1840 n = n * 8 + c - '0';
1841 p++;
1844 c = n;
1845 goto add_char_nonext;
1846 case 'x':
1847 case 'u':
1848 case 'U':
1849 p++;
1850 n = 0;
1851 for(;;) {
1852 c = *p;
1853 if (c >= 'a' && c <= 'f')
1854 c = c - 'a' + 10;
1855 else if (c >= 'A' && c <= 'F')
1856 c = c - 'A' + 10;
1857 else if (isnum(c))
1858 c = c - '0';
1859 else
1860 break;
1861 n = n * 16 + c;
1862 p++;
1864 c = n;
1865 goto add_char_nonext;
1866 case 'a':
1867 c = '\a';
1868 break;
1869 case 'b':
1870 c = '\b';
1871 break;
1872 case 'f':
1873 c = '\f';
1874 break;
1875 case 'n':
1876 c = '\n';
1877 break;
1878 case 'r':
1879 c = '\r';
1880 break;
1881 case 't':
1882 c = '\t';
1883 break;
1884 case 'v':
1885 c = '\v';
1886 break;
1887 case 'e':
1888 if (!gnu_ext)
1889 goto invalid_escape;
1890 c = 27;
1891 break;
1892 case '\'':
1893 case '\"':
1894 case '\\':
1895 case '?':
1896 break;
1897 default:
1898 invalid_escape:
1899 if (c >= '!' && c <= '~')
1900 tcc_warning("unknown escape sequence: \'\\%c\'", c);
1901 else
1902 tcc_warning("unknown escape sequence: \'\\x%x\'", c);
1903 break;
1906 p++;
1907 add_char_nonext:
1908 if (!is_long)
1909 cstr_ccat(outstr, c);
1910 else
1911 cstr_wccat(outstr, c);
1913 /* add a trailing '\0' */
1914 if (!is_long)
1915 cstr_ccat(outstr, '\0');
1916 else
1917 cstr_wccat(outstr, '\0');
1920 /* we use 64 bit numbers */
1921 #define BN_SIZE 2
1923 /* bn = (bn << shift) | or_val */
1924 static void bn_lshift(unsigned int *bn, int shift, int or_val)
1926 int i;
1927 unsigned int v;
1928 for(i=0;i<BN_SIZE;i++) {
1929 v = bn[i];
1930 bn[i] = (v << shift) | or_val;
1931 or_val = v >> (32 - shift);
1935 static void bn_zero(unsigned int *bn)
1937 int i;
1938 for(i=0;i<BN_SIZE;i++) {
1939 bn[i] = 0;
1943 /* parse number in null terminated string 'p' and return it in the
1944 current token */
1945 static void parse_number(const char *p)
1947 int b, t, shift, frac_bits, s, exp_val, ch;
1948 char *q;
1949 unsigned int bn[BN_SIZE];
1950 double d;
1952 /* number */
1953 q = token_buf;
1954 ch = *p++;
1955 t = ch;
1956 ch = *p++;
1957 *q++ = t;
1958 b = 10;
1959 if (t == '.') {
1960 goto float_frac_parse;
1961 } else if (t == '0') {
1962 if (ch == 'x' || ch == 'X') {
1963 q--;
1964 ch = *p++;
1965 b = 16;
1966 } else if (tcc_ext && (ch == 'b' || ch == 'B')) {
1967 q--;
1968 ch = *p++;
1969 b = 2;
1972 /* parse all digits. cannot check octal numbers at this stage
1973 because of floating point constants */
1974 while (1) {
1975 if (ch >= 'a' && ch <= 'f')
1976 t = ch - 'a' + 10;
1977 else if (ch >= 'A' && ch <= 'F')
1978 t = ch - 'A' + 10;
1979 else if (isnum(ch))
1980 t = ch - '0';
1981 else
1982 break;
1983 if (t >= b)
1984 break;
1985 if (q >= token_buf + STRING_MAX_SIZE) {
1986 num_too_long:
1987 tcc_error("number too long");
1989 *q++ = ch;
1990 ch = *p++;
1992 if (ch == '.' ||
1993 ((ch == 'e' || ch == 'E') && b == 10) ||
1994 ((ch == 'p' || ch == 'P') && (b == 16 || b == 2))) {
1995 if (b != 10) {
1996 /* NOTE: strtox should support that for hexa numbers, but
1997 non ISOC99 libcs do not support it, so we prefer to do
1998 it by hand */
1999 /* hexadecimal or binary floats */
2000 /* XXX: handle overflows */
2001 *q = '\0';
2002 if (b == 16)
2003 shift = 4;
2004 else
2005 shift = 1;
2006 bn_zero(bn);
2007 q = token_buf;
2008 while (1) {
2009 t = *q++;
2010 if (t == '\0') {
2011 break;
2012 } else if (t >= 'a') {
2013 t = t - 'a' + 10;
2014 } else if (t >= 'A') {
2015 t = t - 'A' + 10;
2016 } else {
2017 t = t - '0';
2019 bn_lshift(bn, shift, t);
2021 frac_bits = 0;
2022 if (ch == '.') {
2023 ch = *p++;
2024 while (1) {
2025 t = ch;
2026 if (t >= 'a' && t <= 'f') {
2027 t = t - 'a' + 10;
2028 } else if (t >= 'A' && t <= 'F') {
2029 t = t - 'A' + 10;
2030 } else if (t >= '0' && t <= '9') {
2031 t = t - '0';
2032 } else {
2033 break;
2035 if (t >= b)
2036 tcc_error("invalid digit");
2037 bn_lshift(bn, shift, t);
2038 frac_bits += shift;
2039 ch = *p++;
2042 if (ch != 'p' && ch != 'P')
2043 expect("exponent");
2044 ch = *p++;
2045 s = 1;
2046 exp_val = 0;
2047 if (ch == '+') {
2048 ch = *p++;
2049 } else if (ch == '-') {
2050 s = -1;
2051 ch = *p++;
2053 if (ch < '0' || ch > '9')
2054 expect("exponent digits");
2055 while (ch >= '0' && ch <= '9') {
2056 exp_val = exp_val * 10 + ch - '0';
2057 ch = *p++;
2059 exp_val = exp_val * s;
2061 /* now we can generate the number */
2062 /* XXX: should patch directly float number */
2063 d = (double)bn[1] * 4294967296.0 + (double)bn[0];
2064 d = ldexp(d, exp_val - frac_bits);
2065 t = toup(ch);
2066 if (t == 'F') {
2067 ch = *p++;
2068 tok = TOK_CFLOAT;
2069 /* float : should handle overflow */
2070 tokc.f = (float)d;
2071 } else if (t == 'L') {
2072 ch = *p++;
2073 #ifdef TCC_TARGET_PE
2074 tok = TOK_CDOUBLE;
2075 tokc.d = d;
2076 #else
2077 tok = TOK_CLDOUBLE;
2078 /* XXX: not large enough */
2079 tokc.ld = (long double)d;
2080 #endif
2081 } else {
2082 tok = TOK_CDOUBLE;
2083 tokc.d = d;
2085 } else {
2086 /* decimal floats */
2087 if (ch == '.') {
2088 if (q >= token_buf + STRING_MAX_SIZE)
2089 goto num_too_long;
2090 *q++ = ch;
2091 ch = *p++;
2092 float_frac_parse:
2093 while (ch >= '0' && ch <= '9') {
2094 if (q >= token_buf + STRING_MAX_SIZE)
2095 goto num_too_long;
2096 *q++ = ch;
2097 ch = *p++;
2100 if (ch == 'e' || ch == 'E') {
2101 if (q >= token_buf + STRING_MAX_SIZE)
2102 goto num_too_long;
2103 *q++ = ch;
2104 ch = *p++;
2105 if (ch == '-' || ch == '+') {
2106 if (q >= token_buf + STRING_MAX_SIZE)
2107 goto num_too_long;
2108 *q++ = ch;
2109 ch = *p++;
2111 if (ch < '0' || ch > '9')
2112 expect("exponent digits");
2113 while (ch >= '0' && ch <= '9') {
2114 if (q >= token_buf + STRING_MAX_SIZE)
2115 goto num_too_long;
2116 *q++ = ch;
2117 ch = *p++;
2120 *q = '\0';
2121 t = toup(ch);
2122 errno = 0;
2123 if (t == 'F') {
2124 ch = *p++;
2125 tok = TOK_CFLOAT;
2126 tokc.f = strtof(token_buf, NULL);
2127 } else if (t == 'L') {
2128 ch = *p++;
2129 #ifdef TCC_TARGET_PE
2130 tok = TOK_CDOUBLE;
2131 tokc.d = strtod(token_buf, NULL);
2132 #else
2133 tok = TOK_CLDOUBLE;
2134 tokc.ld = strtold(token_buf, NULL);
2135 #endif
2136 } else {
2137 tok = TOK_CDOUBLE;
2138 tokc.d = strtod(token_buf, NULL);
2141 } else {
2142 unsigned long long n, n1;
2143 int lcount, ucount, must_64bit;
2144 const char *p1;
2146 /* integer number */
2147 *q = '\0';
2148 q = token_buf;
2149 if (b == 10 && *q == '0') {
2150 b = 8;
2151 q++;
2153 n = 0;
2154 while(1) {
2155 t = *q++;
2156 /* no need for checks except for base 10 / 8 errors */
2157 if (t == '\0')
2158 break;
2159 else if (t >= 'a')
2160 t = t - 'a' + 10;
2161 else if (t >= 'A')
2162 t = t - 'A' + 10;
2163 else
2164 t = t - '0';
2165 if (t >= b)
2166 tcc_error("invalid digit");
2167 n1 = n;
2168 n = n * b + t;
2169 /* detect overflow */
2170 /* XXX: this test is not reliable */
2171 if (n < n1)
2172 tcc_error("integer constant overflow");
2175 /* Determine the characteristics (unsigned and/or 64bit) the type of
2176 the constant must have according to the constant suffix(es) */
2177 lcount = ucount = must_64bit = 0;
2178 p1 = p;
2179 for(;;) {
2180 t = toup(ch);
2181 if (t == 'L') {
2182 if (lcount >= 2)
2183 tcc_error("three 'l's in integer constant");
2184 if (lcount && *(p - 1) != ch)
2185 tcc_error("incorrect integer suffix: %s", p1);
2186 lcount++;
2187 #if !defined TCC_TARGET_X86_64 || defined TCC_TARGET_PE
2188 if (lcount == 2)
2189 #endif
2190 must_64bit = 1;
2191 ch = *p++;
2192 } else if (t == 'U') {
2193 if (ucount >= 1)
2194 tcc_error("two 'u's in integer constant");
2195 ucount++;
2196 ch = *p++;
2197 } else {
2198 break;
2202 /* Whether 64 bits are needed to hold the constant's value */
2203 if (n & 0xffffffff00000000LL || must_64bit) {
2204 tok = TOK_CLLONG;
2205 n1 = n >> 32;
2206 } else {
2207 tok = TOK_CINT;
2208 n1 = n;
2211 /* Whether type must be unsigned to hold the constant's value */
2212 if (ucount || ((n1 >> 31) && (b != 10))) {
2213 if (tok == TOK_CLLONG)
2214 tok = TOK_CULLONG;
2215 else
2216 tok = TOK_CUINT;
2217 /* If decimal and no unsigned suffix, bump to 64 bits or throw error */
2218 } else if (n1 >> 31) {
2219 if (tok == TOK_CINT)
2220 tok = TOK_CLLONG;
2221 else
2222 tcc_error("integer constant overflow");
2225 if (tok == TOK_CINT || tok == TOK_CUINT)
2226 tokc.ui = n;
2227 else
2228 tokc.ull = n;
2230 if (ch)
2231 tcc_error("invalid number\n");
2235 #define PARSE2(c1, tok1, c2, tok2) \
2236 case c1: \
2237 PEEKC(c, p); \
2238 if (c == c2) { \
2239 p++; \
2240 tok = tok2; \
2241 } else { \
2242 tok = tok1; \
2244 break;
2246 /* return next token without macro substitution */
2247 static inline void next_nomacro1(void)
2249 int t, c, is_long;
2250 TokenSym *ts;
2251 uint8_t *p, *p1;
2252 unsigned int h;
2254 p = file->buf_ptr;
2255 redo_no_start:
2256 c = *p;
2257 redo_no_start_2:
2258 switch(c) {
2259 case ' ':
2260 case '\t':
2261 tok = c;
2262 p++;
2263 goto keep_tok_flags;
2264 case '\f':
2265 case '\v':
2266 case '\r':
2267 p++;
2268 goto redo_no_start;
2269 case '\\':
2270 /* first look if it is in fact an end of buffer */
2271 if (p >= file->buf_end) {
2272 file->buf_ptr = p;
2273 handle_eob();
2274 p = file->buf_ptr;
2275 if (p >= file->buf_end)
2276 goto parse_eof;
2277 else
2278 goto redo_no_start;
2279 } else {
2280 file->buf_ptr = p;
2281 ch = *p;
2282 if (parse_flags & PARSE_FLAG_ACCEPT_STRAYS) {
2283 if (handle_stray_noerror() != 0) {
2284 goto parse_simple;
2286 } else {
2287 handle_stray();
2289 p = file->buf_ptr;
2290 c = ch;
2291 goto redo_no_start_2;
2293 parse_eof:
2295 TCCState *s1 = tcc_state;
2296 if ((parse_flags & PARSE_FLAG_LINEFEED)
2297 && !(tok_flags & TOK_FLAG_EOF)) {
2298 tok_flags |= TOK_FLAG_EOF;
2299 tok = TOK_LINEFEED;
2300 goto keep_tok_flags;
2301 } else if (!(parse_flags & PARSE_FLAG_PREPROCESS)) {
2302 tok = TOK_EOF;
2303 } else if (s1->ifdef_stack_ptr != file->ifdef_stack_ptr) {
2304 tcc_error("missing #endif");
2305 } else if (s1->include_stack_ptr == s1->include_stack) {
2306 /* no include left : end of file. */
2307 tok = TOK_EOF;
2308 } else {
2309 tok_flags &= ~TOK_FLAG_EOF;
2310 /* pop include file */
2312 /* test if previous '#endif' was after a #ifdef at
2313 start of file */
2314 if (tok_flags & TOK_FLAG_ENDIF) {
2315 #ifdef INC_DEBUG
2316 printf("#endif %s\n", get_tok_str(file->ifndef_macro_saved, NULL));
2317 #endif
2318 add_cached_include(s1, file->filename, file->ifndef_macro_saved);
2319 tok_flags &= ~TOK_FLAG_ENDIF;
2322 /* add end of include file debug info */
2323 if (tcc_state->do_debug) {
2324 put_stabd(N_EINCL, 0, 0);
2326 /* pop include stack */
2327 tcc_close();
2328 s1->include_stack_ptr--;
2329 p = file->buf_ptr;
2330 goto redo_no_start;
2333 break;
2335 case '\n':
2336 file->line_num++;
2337 tok_flags |= TOK_FLAG_BOL;
2338 p++;
2339 maybe_newline:
2340 if (0 == (parse_flags & PARSE_FLAG_LINEFEED))
2341 goto redo_no_start;
2342 tok = TOK_LINEFEED;
2343 goto keep_tok_flags;
2345 case '#':
2346 /* XXX: simplify */
2347 PEEKC(c, p);
2348 if (is_space(c) && (parse_flags & PARSE_FLAG_ASM_FILE)) {
2349 p = parse_line_comment(p);
2350 goto redo_no_start;
2352 else
2353 if ((tok_flags & TOK_FLAG_BOL) &&
2354 (parse_flags & PARSE_FLAG_PREPROCESS)) {
2355 file->buf_ptr = p;
2356 preprocess(tok_flags & TOK_FLAG_BOF);
2357 p = file->buf_ptr;
2358 goto maybe_newline;
2359 } else {
2360 if (c == '#') {
2361 p++;
2362 tok = TOK_TWOSHARPS;
2363 } else {
2364 if (parse_flags & PARSE_FLAG_ASM_FILE) {
2365 p = parse_line_comment(p - 1);
2366 goto redo_no_start;
2367 } else {
2368 tok = '#';
2372 break;
2374 /* dollar is allowed to start identifiers when not parsing asm */
2375 case '$':
2376 if (!tcc_state->dollars_in_identifiers
2377 || (parse_flags & PARSE_FLAG_ASM_FILE)) goto parse_simple;
2379 case 'a': case 'b': case 'c': case 'd':
2380 case 'e': case 'f': case 'g': case 'h':
2381 case 'i': case 'j': case 'k': case 'l':
2382 case 'm': case 'n': case 'o': case 'p':
2383 case 'q': case 'r': case 's': case 't':
2384 case 'u': case 'v': case 'w': case 'x':
2385 case 'y': case 'z':
2386 case 'A': case 'B': case 'C': case 'D':
2387 case 'E': case 'F': case 'G': case 'H':
2388 case 'I': case 'J': case 'K':
2389 case 'M': case 'N': case 'O': case 'P':
2390 case 'Q': case 'R': case 'S': case 'T':
2391 case 'U': case 'V': case 'W': case 'X':
2392 case 'Y': case 'Z':
2393 case '_':
2394 parse_ident_fast:
2395 p1 = p;
2396 h = TOK_HASH_INIT;
2397 h = TOK_HASH_FUNC(h, c);
2398 p++;
2399 for(;;) {
2400 c = *p;
2401 if (!isidnum_table[c-CH_EOF]
2402 && (tcc_state->dollars_in_identifiers ? (c != '$') : 1))
2403 break;
2404 h = TOK_HASH_FUNC(h, c);
2405 p++;
2407 if (c != '\\') {
2408 TokenSym **pts;
2409 int len;
2411 /* fast case : no stray found, so we have the full token
2412 and we have already hashed it */
2413 len = p - p1;
2414 h &= (TOK_HASH_SIZE - 1);
2415 pts = &hash_ident[h];
2416 for(;;) {
2417 ts = *pts;
2418 if (!ts)
2419 break;
2420 if (ts->len == len && !memcmp(ts->str, p1, len))
2421 goto token_found;
2422 pts = &(ts->hash_next);
2424 ts = tok_alloc_new(pts, (char *) p1, len);
2425 token_found: ;
2426 } else {
2427 /* slower case */
2428 cstr_reset(&tokcstr);
2430 while (p1 < p) {
2431 cstr_ccat(&tokcstr, *p1);
2432 p1++;
2434 p--;
2435 PEEKC(c, p);
2436 parse_ident_slow:
2437 while (isidnum_table[c-CH_EOF]
2438 || (tcc_state->dollars_in_identifiers ? (c == '$') : 0)) {
2439 cstr_ccat(&tokcstr, c);
2440 PEEKC(c, p);
2442 ts = tok_alloc(tokcstr.data, tokcstr.size);
2444 tok = ts->tok;
2445 break;
2446 case 'L':
2447 t = p[1];
2448 if (t != '\\' && t != '\'' && t != '\"') {
2449 /* fast case */
2450 goto parse_ident_fast;
2451 } else {
2452 PEEKC(c, p);
2453 if (c == '\'' || c == '\"') {
2454 is_long = 1;
2455 goto str_const;
2456 } else {
2457 cstr_reset(&tokcstr);
2458 cstr_ccat(&tokcstr, 'L');
2459 goto parse_ident_slow;
2462 break;
2463 case '0': case '1': case '2': case '3':
2464 case '4': case '5': case '6': case '7':
2465 case '8': case '9':
2467 cstr_reset(&tokcstr);
2468 /* after the first digit, accept digits, alpha, '.' or sign if
2469 prefixed by 'eEpP' */
2470 parse_num:
2471 for(;;) {
2472 t = c;
2473 cstr_ccat(&tokcstr, c);
2474 PEEKC(c, p);
2475 if (!(isnum(c) || isid(c) || c == '.' ||
2476 ((c == '+' || c == '-') &&
2477 (t == 'e' || t == 'E' || t == 'p' || t == 'P'))))
2478 break;
2480 /* We add a trailing '\0' to ease parsing */
2481 cstr_ccat(&tokcstr, '\0');
2482 tokc.cstr = &tokcstr;
2483 tok = TOK_PPNUM;
2484 break;
2485 case '.':
2486 /* special dot handling because it can also start a number */
2487 PEEKC(c, p);
2488 if (isnum(c)) {
2489 cstr_reset(&tokcstr);
2490 cstr_ccat(&tokcstr, '.');
2491 goto parse_num;
2492 } else if (c == '.') {
2493 PEEKC(c, p);
2494 if (c != '.') {
2495 if ((parse_flags & PARSE_FLAG_ASM_FILE) == 0)
2496 expect("'.'");
2497 tok = '.';
2498 break;
2500 PEEKC(c, p);
2501 tok = TOK_DOTS;
2502 } else {
2503 tok = '.';
2505 break;
2506 case '\'':
2507 case '\"':
2508 is_long = 0;
2509 str_const:
2511 CString str;
2512 int sep;
2514 sep = c;
2516 /* parse the string */
2517 cstr_new(&str);
2518 p = parse_pp_string(p, sep, &str);
2519 cstr_ccat(&str, '\0');
2521 /* eval the escape (should be done as TOK_PPNUM) */
2522 cstr_reset(&tokcstr);
2523 parse_escape_string(&tokcstr, str.data, is_long);
2524 cstr_free(&str);
2526 if (sep == '\'') {
2527 int char_size;
2528 /* XXX: make it portable */
2529 if (!is_long)
2530 char_size = 1;
2531 else
2532 char_size = sizeof(nwchar_t);
2533 if (tokcstr.size <= char_size)
2534 tcc_error("empty character constant");
2535 if (tokcstr.size > 2 * char_size)
2536 tcc_warning("multi-character character constant");
2537 if (!is_long) {
2538 tokc.i = *(int8_t *)tokcstr.data;
2539 tok = TOK_CCHAR;
2540 } else {
2541 tokc.i = *(nwchar_t *)tokcstr.data;
2542 tok = TOK_LCHAR;
2544 } else {
2545 tokc.cstr = &tokcstr;
2546 if (!is_long)
2547 tok = TOK_STR;
2548 else
2549 tok = TOK_LSTR;
2552 break;
2554 case '<':
2555 PEEKC(c, p);
2556 if (c == '=') {
2557 p++;
2558 tok = TOK_LE;
2559 } else if (c == '<') {
2560 PEEKC(c, p);
2561 if (c == '=') {
2562 p++;
2563 tok = TOK_A_SHL;
2564 } else {
2565 tok = TOK_SHL;
2567 } else {
2568 tok = TOK_LT;
2570 break;
2572 case '>':
2573 PEEKC(c, p);
2574 if (c == '=') {
2575 p++;
2576 tok = TOK_GE;
2577 } else if (c == '>') {
2578 PEEKC(c, p);
2579 if (c == '=') {
2580 p++;
2581 tok = TOK_A_SAR;
2582 } else {
2583 tok = TOK_SAR;
2585 } else {
2586 tok = TOK_GT;
2588 break;
2590 case '&':
2591 PEEKC(c, p);
2592 if (c == '&') {
2593 p++;
2594 tok = TOK_LAND;
2595 } else if (c == '=') {
2596 p++;
2597 tok = TOK_A_AND;
2598 } else {
2599 tok = '&';
2601 break;
2603 case '|':
2604 PEEKC(c, p);
2605 if (c == '|') {
2606 p++;
2607 tok = TOK_LOR;
2608 } else if (c == '=') {
2609 p++;
2610 tok = TOK_A_OR;
2611 } else {
2612 tok = '|';
2614 break;
2616 case '+':
2617 PEEKC(c, p);
2618 if (c == '+') {
2619 p++;
2620 tok = TOK_INC;
2621 } else if (c == '=') {
2622 p++;
2623 tok = TOK_A_ADD;
2624 } else {
2625 tok = '+';
2627 break;
2629 case '-':
2630 PEEKC(c, p);
2631 if (c == '-') {
2632 p++;
2633 tok = TOK_DEC;
2634 } else if (c == '=') {
2635 p++;
2636 tok = TOK_A_SUB;
2637 } else if (c == '>') {
2638 p++;
2639 tok = TOK_ARROW;
2640 } else {
2641 tok = '-';
2643 break;
2645 PARSE2('!', '!', '=', TOK_NE)
2646 PARSE2('=', '=', '=', TOK_EQ)
2647 PARSE2('*', '*', '=', TOK_A_MUL)
2648 PARSE2('%', '%', '=', TOK_A_MOD)
2649 PARSE2('^', '^', '=', TOK_A_XOR)
2651 /* comments or operator */
2652 case '/':
2653 PEEKC(c, p);
2654 if (c == '*') {
2655 p = parse_comment(p);
2656 /* comments replaced by a blank */
2657 tok = ' ';
2658 goto keep_tok_flags;
2659 } else if (c == '/') {
2660 p = parse_line_comment(p);
2661 tok = ' ';
2662 goto keep_tok_flags;
2663 } else if (c == '=') {
2664 p++;
2665 tok = TOK_A_DIV;
2666 } else {
2667 tok = '/';
2669 break;
2671 /* simple tokens */
2672 case '(':
2673 case ')':
2674 case '[':
2675 case ']':
2676 case '{':
2677 case '}':
2678 case ',':
2679 case ';':
2680 case ':':
2681 case '?':
2682 case '~':
2683 case '@': /* only used in assembler */
2684 case TOK_QHASH:
2685 parse_simple:
2686 tok = c;
2687 p++;
2688 break;
2689 default:
2690 if ((parse_flags & PARSE_FLAG_ASM_FILE) == 0)
2691 tcc_error("unrecognized character \\x%02x", c);
2692 else {
2693 tok = ' ';
2694 p++;
2696 break;
2698 tok_flags = 0;
2699 keep_tok_flags:
2700 file->buf_ptr = p;
2701 #if defined(PARSE_DEBUG)
2702 printf("token = %s\n", get_tok_str(tok, &tokc));
2703 #endif
2706 /* return next token without macro substitution. Can read input from
2707 macro_ptr buffer */
2708 static void next_nomacro_spc(void)
2710 if (macro_ptr) {
2711 redo:
2712 tok = *macro_ptr;
2713 if (tok) {
2714 TOK_GET(&tok, &macro_ptr, &tokc);
2715 if (tok == TOK_LINENUM) {
2716 file->line_num = tokc.i;
2717 goto redo;
2720 } else {
2721 next_nomacro1();
2725 ST_FUNC void next_nomacro(void)
2727 do {
2728 next_nomacro_spc();
2729 } while (is_space(tok));
2732 /* substitute arguments in replacement lists in macro_str by the values in
2733 args (field d) and return allocated string */
2734 static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args)
2736 int last_tok, t, spc;
2737 const int *st;
2738 Sym *s;
2739 CValue cval;
2740 TokenString str;
2741 CString cstr;
2742 CString cstr2;
2744 tok_str_new(&str);
2745 last_tok = 0;
2746 while(1) {
2747 TOK_GET(&t, &macro_str, &cval);
2748 if (!t)
2749 break;
2750 if (t == '#') {
2751 /* stringize */
2752 TOK_GET(&t, &macro_str, &cval);
2753 if (!t)
2754 break;
2755 s = sym_find2(args, t);
2756 if (s) {
2757 cstr_new(&cstr);
2758 st = s->d;
2759 spc = 0;
2760 while (*st) {
2761 TOK_GET(&t, &st, &cval);
2762 if (t != TOK_PLCHLDR && !check_space(t, &spc))
2763 cstr_cat(&cstr, get_tok_str(t, &cval));
2765 cstr.size -= spc;
2766 cstr_ccat(&cstr, '\0');
2767 /* add string */
2768 cstr_new(&cstr2);
2769 /* emulate GCC behaviour and parse escapes in the token string */
2770 parse_escape_string(&cstr2, cstr.data, 0);
2771 cstr_free(&cstr);
2772 cval.cstr = &cstr2;
2773 tok_str_add2(&str, TOK_STR, &cval);
2774 cstr_free(cval.cstr);
2775 } else {
2776 tok_str_add2(&str, t, &cval);
2778 } else if (t >= TOK_IDENT) {
2779 s = sym_find2(args, t);
2780 if (s) {
2781 st = s->d;
2782 /* if '##' is present before or after, no arg substitution */
2783 if (*macro_str == TOK_TWOSHARPS || last_tok == TOK_TWOSHARPS) {
2784 /* special case for var arg macros : ## eats the
2785 ',' if empty VA_ARGS variable. */
2786 /* XXX: test of the ',' is not 100%
2787 reliable. should fix it to avoid security
2788 problems */
2789 if (gnu_ext && s->type.t &&
2790 last_tok == TOK_TWOSHARPS &&
2791 str.len >= 2 && str.str[str.len - 2] == ',') {
2792 str.len -= 2;
2793 tok_str_add(&str, TOK_GNUCOMMA);
2794 str.str[str.len] = 0; // not actually needed.
2797 for(;;) {
2798 int t1;
2799 TOK_GET(&t1, &st, &cval);
2800 if (!t1)
2801 break;
2802 tok_str_add2(&str, t1, &cval);
2804 } else {
2805 /* NOTE: the stream cannot be read when macro
2806 substituing an argument */
2807 macro_subst(&str, nested_list, st, NULL);
2809 } else {
2810 tok_str_add(&str, t);
2812 } else {
2813 tok_str_add2(&str, t, &cval);
2815 last_tok = t;
2817 if (str.len == 0)
2818 tok_str_add(&str, TOK_PLCHLDR);
2819 tok_str_add(&str, 0);
2820 return str.str;
2823 static char const ab_month_name[12][4] =
2825 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
2826 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
2829 /* do macro substitution of current token with macro 's' and add
2830 result to (tok_str,tok_len). 'nested_list' is the list of all
2831 macros we got inside to avoid recursing. Return non zero if no
2832 substitution needs to be done */
2833 static int macro_subst_tok(TokenString *tok_str,
2834 Sym **nested_list, Sym *s, struct macro_level **can_read_stream)
2836 Sym *args, *sa, *sa1;
2837 int mstr_allocated, parlevel, *mstr, t, t1, spc;
2838 const int *p;
2839 TokenString str;
2840 char *cstrval;
2841 CValue cval;
2842 CString cstr;
2843 char buf[32];
2845 /* if symbol is a macro, prepare substitution */
2846 /* special macros */
2847 if (tok == TOK___LINE__) {
2848 snprintf(buf, sizeof(buf), "%d", file->line_num);
2849 cstrval = buf;
2850 t1 = TOK_PPNUM;
2851 goto add_cstr1;
2852 } else if (tok == TOK___FILE__) {
2853 cstrval = file->filename;
2854 goto add_cstr;
2855 } else if (tok == TOK___DATE__ || tok == TOK___TIME__) {
2856 time_t ti;
2857 struct tm *tm;
2859 time(&ti);
2860 tm = localtime(&ti);
2861 if (tok == TOK___DATE__) {
2862 snprintf(buf, sizeof(buf), "%s %2d %d",
2863 ab_month_name[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900);
2864 } else {
2865 snprintf(buf, sizeof(buf), "%02d:%02d:%02d",
2866 tm->tm_hour, tm->tm_min, tm->tm_sec);
2868 cstrval = buf;
2869 add_cstr:
2870 t1 = TOK_STR;
2871 add_cstr1:
2872 cstr_new(&cstr);
2873 cstr_cat(&cstr, cstrval);
2874 cstr_ccat(&cstr, '\0');
2875 cval.cstr = &cstr;
2876 tok_str_add2(tok_str, t1, &cval);
2877 cstr_free(&cstr);
2878 } else {
2879 int mtok = tok;
2880 int old_parse_flags = parse_flags;
2881 parse_flags |= PARSE_FLAG_ACCEPT_STRAYS;
2883 mstr = s->d;
2884 mstr_allocated = 0;
2885 if (s->type.t == MACRO_FUNC) {
2886 /* NOTE: we do not use next_nomacro to avoid eating the
2887 next token. XXX: find better solution */
2888 redo:
2889 if (macro_ptr) {
2890 p = macro_ptr;
2891 while (is_space(t = *p) || TOK_LINEFEED == t)
2892 ++p;
2893 if (t == 0 && can_read_stream) {
2894 /* end of macro stream: we must look at the token
2895 after in the file */
2896 struct macro_level *ml = *can_read_stream;
2897 macro_ptr = NULL;
2898 if (ml)
2900 macro_ptr = ml->p;
2901 ml->p = NULL;
2902 *can_read_stream = ml -> prev;
2904 /* also, end of scope for nested defined symbol */
2905 (*nested_list)->v = -1;
2906 goto redo;
2908 } else {
2909 ch = tcc_peekc_slow(file);
2910 while (is_space(ch) || ch == '\n' || ch == '/')
2912 if (ch == '/')
2914 int c;
2915 uint8_t *p = file->buf_ptr;
2916 PEEKC(c, p);
2917 if (c == '*') {
2918 p = parse_comment(p);
2919 file->buf_ptr = p - 1;
2920 } else if (c == '/') {
2921 p = parse_line_comment(p);
2922 file->buf_ptr = p - 1;
2923 } else
2924 break;
2926 cinp();
2928 t = ch;
2930 if (t != '(') /* no macro subst */
2931 return -1;
2933 /* argument macro */
2934 next_nomacro();
2935 next_nomacro();
2936 args = NULL;
2937 sa = s->next;
2938 /* NOTE: empty args are allowed, except if no args */
2939 for(;;) {
2940 /* handle '()' case */
2941 if (!args && !sa && tok == ')')
2942 break;
2943 if (!sa)
2944 tcc_error("macro '%s' used with too many args",
2945 get_tok_str(s->v, 0));
2946 tok_str_new(&str);
2947 parlevel = spc = 0;
2948 /* NOTE: non zero sa->t indicates VA_ARGS */
2949 while ((parlevel > 0 ||
2950 (tok != ')' &&
2951 (tok != ',' || sa->type.t))) &&
2952 tok != -1 &&
2953 tok != 0) {
2954 if (tok == '(')
2955 parlevel++;
2956 else if (tok == ')')
2957 parlevel--;
2958 if (tok == TOK_LINEFEED)
2959 tok = ' ';
2960 if (tok == TOK_EOF || tok == 0)
2961 break;
2962 if (!check_space(tok, &spc))
2963 tok_str_add2(&str, tok, &tokc);
2964 next_nomacro_spc();
2966 if (parlevel)
2967 return -1;
2968 str.len -= spc;
2969 if (str.len == 0)
2970 tok_str_add(&str, TOK_PLCHLDR);
2971 tok_str_add(&str, 0);
2972 sa1 = sym_push2(&args, sa->v & ~SYM_FIELD, sa->type.t, 0);
2973 sa1->d = str.str;
2974 sa = sa->next;
2975 if (tok == ')') {
2976 /* special case for gcc var args: add an empty
2977 var arg argument if it is omitted */
2978 if (sa && sa->type.t && gnu_ext)
2979 continue;
2980 else
2981 break;
2984 * #define f(a) f(a)
2985 * #define h f(r
2986 * h 5)
2988 if (tok != ',') {
2989 /* Argh! Not a macro invocation after all, at this
2990 * point, so put everything back onto mstr that's
2991 * been skipped since we saw the '(' )*/
2992 tok_str_new(&str);
2993 tok_str_add(&str, mtok);
2994 tok_str_add(&str, '(');
2995 for (sa = s->next; sa; sa = sa->next) {
2996 int *p = sa->d;
2997 while (p && *p) {
2998 tok_str_add(&str, *p);
2999 p++;
3001 mstr = str.str;
3002 /* leak memory */;
3003 mstr_allocated = 0;
3004 goto free_memory;
3007 next_nomacro();
3009 if (sa) {
3010 tcc_error("macro '%s' used with too few args",
3011 get_tok_str(s->v, 0));
3014 /* now subst each arg */
3015 mstr = macro_arg_subst(nested_list, mstr, args);
3016 free_memory:
3017 /* free memory */
3018 sa = args;
3019 while (sa) {
3020 sa1 = sa->prev;
3021 tok_str_free(sa->d);
3022 sym_free(sa);
3023 sa = sa1;
3025 mstr_allocated = 1;
3027 sym_push2(nested_list, s->v, 0, 0);
3028 parse_flags = old_parse_flags;
3029 macro_subst(tok_str, nested_list, mstr, can_read_stream);
3030 /* pop nested defined symbol */
3031 sa1 = *nested_list;
3032 *nested_list = sa1->prev;
3033 sym_free(sa1);
3034 if (mstr_allocated)
3035 tok_str_free(mstr);
3037 return 0;
3040 /* handle the '##' operator. Return NULL if no '##' seen. Otherwise
3041 return the resulting string (which must be freed). */
3042 static inline int *macro_twosharps(const int *macro_str)
3044 const int *ptr;
3045 int t;
3046 int last_tok = -1;
3047 TokenString macro_str1;
3048 CString cstr;
3049 int n, start_of_nosubsts;
3051 /* we search the first '##' */
3052 for(ptr = macro_str;;) {
3053 CValue cval;
3054 TOK_GET(&t, &ptr, &cval);
3055 if (t == TOK_TWOSHARPS)
3056 break;
3057 /* nothing more to do if end of string */
3058 if (t == 0)
3059 return NULL;
3062 /* we saw '##', so we need more processing to handle it */
3063 start_of_nosubsts = -1;
3064 tok_str_new(&macro_str1);
3065 for(ptr = macro_str;;) {
3066 TOK_GET(&tok, &ptr, &tokc);
3067 if (tok == 0)
3068 break;
3069 if (tok == TOK_NOSUBST && start_of_nosubsts < 0)
3070 start_of_nosubsts = macro_str1.len;
3071 while (*ptr == TOK_TWOSHARPS) {
3072 /* given 'a##b', remove nosubsts preceding 'a' */
3073 if (start_of_nosubsts >= 0)
3074 macro_str1.len = start_of_nosubsts;
3075 /* given 'a##b', skip '##' */
3076 t = *++ptr;
3077 /* given 'a##b', remove nosubsts preceding 'b' */
3078 while (t == TOK_NOSUBST)
3079 t = *++ptr;
3080 if (t && t != TOK_TWOSHARPS) {
3081 CValue cval;
3082 TOK_GET(&t, &ptr, &cval);
3083 /* We concatenate the two tokens */
3084 cstr_new(&cstr);
3085 if (tok != TOK_PLCHLDR)
3086 cstr_cat(&cstr, get_tok_str(tok, &tokc));
3087 n = cstr.size;
3088 if (t != TOK_PLCHLDR)
3089 cstr_cat(&cstr, get_tok_str(t, &cval));
3090 cstr_ccat(&cstr, '\0');
3092 tcc_open_bf(tcc_state, ":paste:", cstr.size);
3093 memcpy(file->buffer, cstr.data, cstr.size);
3094 for (;;) {
3095 if (0 == *file->buf_ptr)
3096 break;
3097 next_nomacro1();
3098 if (0 == *file->buf_ptr)
3099 break;
3100 tok_str_add2(&macro_str1, tok, &tokc);
3101 tcc_warning("pasting \"%.*s\" and \"%s\" does not give a valid preprocessing token",
3102 n, cstr.data, (char*)cstr.data + n);
3104 tcc_close();
3105 cstr_free(&cstr);
3107 if (tok == TOK_TWOSHARPS) {
3108 /* two sharps twosharped together tokenize to two
3109 * sharp tokens, not a twosharp token. */
3110 /* That's fun to say, but is it actually true? GCC
3111 * stringifies #define a # ## # ## # to "## #" (and a
3112 * warning), while we produce "###" (no warning) */
3113 tok_str_add(&macro_str1, '#');
3114 tok = '#';
3117 if (tok != TOK_NOSUBST) {
3118 tok_str_add2(&macro_str1, tok, &tokc);
3119 tok = ' ';
3120 start_of_nosubsts = -1;
3121 } else
3122 tok_str_add2(&macro_str1, tok, &tokc);
3124 tok_str_add(&macro_str1, 0);
3125 return macro_str1.str;
3129 /* do macro substitution of macro_str and add result to
3130 (tok_str,tok_len). 'nested_list' is the list of all macros we got
3131 inside to avoid recursing. */
3132 static void macro_subst(TokenString *tok_str, Sym **nested_list,
3133 const int *macro_str, struct macro_level ** can_read_stream)
3135 Sym *s;
3136 int *macro_str1;
3137 const int *ptr;
3138 int t, ret, spc;
3139 CValue cval;
3140 struct macro_level ml;
3141 int force_blank;
3142 int gnucomma_index = -1;
3144 /* first scan for '##' operator handling */
3145 ptr = macro_str;
3146 macro_str1 = macro_twosharps(ptr);
3148 if (macro_str1)
3149 ptr = macro_str1;
3150 spc = 0;
3151 force_blank = 0;
3153 while (1) {
3154 /* NOTE: ptr == NULL can only happen if tokens are read from
3155 file stream due to a macro function call */
3156 if (ptr == NULL)
3157 break;
3158 TOK_GET(&t, &ptr, &cval);
3159 if (t == 0)
3160 break;
3161 if (t == '\\' && !(parse_flags & PARSE_FLAG_ACCEPT_STRAYS)) {
3162 tcc_error("stray '\\' in program");
3164 if (t == TOK_NOSUBST) {
3165 /* following token has already been subst'd. just copy it on */
3166 tok_str_add2(tok_str, TOK_NOSUBST, NULL);
3167 TOK_GET(&t, &ptr, &cval);
3168 goto no_subst;
3170 if (t == TOK_GNUCOMMA) {
3171 if (gnucomma_index != -1)
3172 tcc_error("two GNU commas in the same macro");
3173 gnucomma_index = tok_str->len;
3174 tok_str_add(tok_str, ',');
3175 TOK_GET(&t, &ptr, &cval);
3177 s = define_find(t);
3178 if (s != NULL) {
3179 int old_length = tok_str->len;
3180 /* if nested substitution, do nothing */
3181 if (sym_find2(*nested_list, t)) {
3182 /* and mark it as TOK_NOSUBST, so it doesn't get subst'd again */
3183 tok_str_add2(tok_str, TOK_NOSUBST, NULL);
3184 goto no_subst;
3186 ml.p = macro_ptr;
3187 if (can_read_stream)
3188 ml.prev = *can_read_stream, *can_read_stream = &ml;
3189 macro_ptr = (int *)ptr;
3190 tok = t;
3191 ret = macro_subst_tok(tok_str, nested_list, s, can_read_stream);
3192 spc = 0;
3193 if (tok_str->len && tok_str->str[tok_str->len-1] == ' ')
3194 spc = 1;
3195 ptr = (int *)macro_ptr;
3196 macro_ptr = ml.p;
3197 if (can_read_stream && *can_read_stream == &ml)
3198 *can_read_stream = ml.prev;
3199 if (ret != 0) {
3200 if (!check_space(t, &spc))
3201 tok_str_add2(tok_str, t, &cval);
3202 tok_str_add(tok_str, ' ');
3203 spc = 1;
3204 force_blank = 0;
3206 if (ret == 0 && old_length == tok_str->len)
3207 tok_str_add(tok_str, TOK_PLCHLDR);
3208 if (parse_flags & PARSE_FLAG_SPACES)
3209 force_blank = 0; //1;
3210 } else {
3211 no_subst:
3212 if (force_blank) {
3213 tok_str_add(tok_str, ' ');
3214 spc = 1;
3215 force_blank = 0;
3217 if (!check_space(t, &spc))
3218 tok_str_add2(tok_str, t, &cval);
3220 if (gnucomma_index != -1 && tok_str->len >= gnucomma_index+2) {
3221 if (tok_str->str[gnucomma_index+1] == TOK_PLCHLDR)
3222 tok_str->len -= 2;
3223 gnucomma_index = -1;
3225 if (tok_str->len && tok_str->str[tok_str->len-1] == TOK_PLCHLDR)
3226 tok_str->len--;
3228 if (macro_str1)
3229 tok_str_free(macro_str1);
3232 /* return next token with macro substitution */
3233 ST_FUNC void next(void)
3235 Sym *nested_list, *s;
3236 TokenString str;
3237 struct macro_level *ml;
3239 redo:
3240 if (parse_flags & PARSE_FLAG_SPACES)
3241 next_nomacro_spc();
3242 else
3243 next_nomacro();
3244 if (!macro_ptr) {
3245 /* if not reading from macro substituted string, then try
3246 to substitute macros */
3247 if (tok >= TOK_IDENT &&
3248 (parse_flags & PARSE_FLAG_PREPROCESS)) {
3249 s = define_find(tok);
3250 if (s) {
3251 /* we have a macro: we try to substitute */
3252 tok_str_new(&str);
3253 nested_list = NULL;
3254 ml = NULL;
3255 if (macro_subst_tok(&str, &nested_list, s, &ml) == 0) {
3256 /* substitution done, NOTE: maybe empty */
3257 tok_str_add(&str, 0);
3258 macro_ptr = str.str;
3259 macro_ptr_allocated = str.str;
3260 goto redo;
3262 if (str.len && str.str[str.len-1] == TOK_PLCHLDR)
3263 str.len--;
3266 } else {
3267 if (tok == 0) {
3268 /* end of macro or end of unget buffer */
3269 if (unget_buffer_enabled) {
3270 macro_ptr = unget_saved_macro_ptr;
3271 unget_buffer_enabled = 0;
3272 } else {
3273 /* end of macro string: free it */
3274 tok_str_free(macro_ptr_allocated);
3275 macro_ptr_allocated = NULL;
3276 macro_ptr = NULL;
3278 goto redo;
3279 } else if (tok == TOK_NOSUBST) {
3280 /* discard preprocessor's nosubst markers */
3281 goto redo;
3285 /* convert preprocessor tokens into C tokens */
3286 if (tok == TOK_PPNUM &&
3287 (parse_flags & PARSE_FLAG_TOK_NUM)) {
3288 parse_number((char *)tokc.cstr->data);
3292 /* push back current token and set current token to 'last_tok'. Only
3293 identifier case handled for labels. */
3294 ST_INLN void unget_tok(int last_tok)
3296 int i, n;
3297 int *q;
3298 if (unget_buffer_enabled)
3300 /* assert(macro_ptr == unget_saved_buffer + 1);
3301 assert(*macro_ptr == 0); */
3303 else
3305 unget_saved_macro_ptr = macro_ptr;
3306 unget_buffer_enabled = 1;
3308 q = unget_saved_buffer;
3309 macro_ptr = q;
3310 *q++ = tok;
3311 n = tok_ext_size(tok) - 1;
3312 for(i=0;i<n;i++)
3313 *q++ = tokc.tab[i];
3314 *q = 0; /* end of token string */
3315 tok = last_tok;
3319 /* better than nothing, but needs extension to handle '-E' option
3320 correctly too */
3321 ST_FUNC void preprocess_init(TCCState *s1)
3323 s1->include_stack_ptr = s1->include_stack;
3324 /* XXX: move that before to avoid having to initialize
3325 file->ifdef_stack_ptr ? */
3326 s1->ifdef_stack_ptr = s1->ifdef_stack;
3327 file->ifdef_stack_ptr = s1->ifdef_stack_ptr;
3329 vtop = vstack - 1;
3330 s1->pack_stack[0] = 0;
3331 s1->pack_stack_ptr = s1->pack_stack;
3334 ST_FUNC void preprocess_new(void)
3336 int i, c;
3337 const char *p, *r;
3339 /* init isid table */
3341 for(i=CH_EOF;i<256;i++)
3342 isidnum_table[i-CH_EOF] = isid(i) || isnum(i);
3344 /* add all tokens */
3345 if (table_ident) {
3346 tcc_free (table_ident);
3347 table_ident = NULL;
3349 memset(hash_ident, 0, TOK_HASH_SIZE * sizeof(TokenSym *));
3351 tok_ident = TOK_IDENT;
3352 p = tcc_keywords;
3353 while (*p) {
3354 r = p;
3355 for(;;) {
3356 c = *r++;
3357 if (c == '\0')
3358 break;
3360 tok_alloc(p, r - p - 1);
3361 p = r;
3365 static void line_macro_output(BufferedFile *f, const char *s, TCCState *s1)
3367 switch (s1->Pflag) {
3368 case LINE_MACRO_OUTPUT_FORMAT_STD:
3369 /* "tcc -E -P1" case */
3370 fprintf(s1->ppfp, "# line %d \"%s\"\n", f->line_num, f->filename);
3371 break;
3373 case LINE_MACRO_OUTPUT_FORMAT_NONE:
3374 /* "tcc -E -P" case: don't output a line directive */
3375 break;
3377 case LINE_MACRO_OUTPUT_FORMAT_GCC:
3378 default:
3379 /* "tcc -E" case: a gcc standard by default */
3380 fprintf(s1->ppfp, "# %d \"%s\"%s\n", f->line_num, f->filename, s);
3381 break;
3385 /* Preprocess the current file */
3386 ST_FUNC int tcc_preprocess(TCCState *s1)
3388 BufferedFile *file_ref, **iptr, **iptr_new;
3389 int token_seen, d;
3390 const char *s;
3392 preprocess_init(s1);
3393 ch = file->buf_ptr[0];
3394 tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF;
3395 parse_flags &= PARSE_FLAG_ASM_FILE;
3396 parse_flags |= PARSE_FLAG_PREPROCESS | PARSE_FLAG_LINEFEED | PARSE_FLAG_SPACES;
3397 token_seen = 0;
3398 file->line_ref = 0;
3399 file_ref = NULL;
3400 iptr = s1->include_stack_ptr;
3402 for (;;) {
3403 next();
3404 if (tok == TOK_EOF) {
3405 break;
3406 } else if (file != file_ref) {
3407 if (file_ref)
3408 line_macro_output(file_ref, "", s1);
3409 goto print_line;
3410 } else if (tok == TOK_LINEFEED) {
3411 if (!token_seen)
3412 continue;
3413 file->line_ref++;
3414 token_seen = 0;
3415 } else if (!token_seen) {
3416 d = file->line_num - file->line_ref;
3417 if (file != file_ref || d >= 8) {
3418 print_line:
3419 s = "";
3420 if (tcc_state->Pflag == LINE_MACRO_OUTPUT_FORMAT_GCC) {
3421 iptr_new = s1->include_stack_ptr;
3422 s = iptr_new > iptr ? " 1"
3423 : iptr_new < iptr ? " 2"
3424 : iptr_new > s1->include_stack ? " 3"
3425 : ""
3428 line_macro_output(file, s, s1);
3429 } else {
3430 while (d > 0)
3431 fputs("\n", s1->ppfp), --d;
3433 file->line_ref = (file_ref = file)->line_num;
3434 token_seen = tok != TOK_LINEFEED;
3435 if (!token_seen)
3436 continue;
3438 fputs(get_tok_str(tok, &tokc), s1->ppfp);
3440 return 0;