x86-64: fix shared libs
[tinycc.git] / tccpp.c
blob0fd90a50a5c83a93a82418fcbd1a0624a04faf04
1 /*
2 * TCC - Tiny C Compiler
3 *
4 * Copyright (c) 2001-2004 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "tcc.h"
23 /********************************************************/
24 /* global variables */
26 ST_DATA int tok_flags;
27 ST_DATA int parse_flags;
29 ST_DATA struct BufferedFile *file;
30 ST_DATA int ch, tok;
31 ST_DATA CValue tokc;
32 ST_DATA const int *macro_ptr;
33 ST_DATA CString tokcstr; /* current parsed string, if any */
35 /* display benchmark infos */
36 ST_DATA int total_lines;
37 ST_DATA int total_bytes;
38 ST_DATA int tok_ident;
39 ST_DATA TokenSym **table_ident;
41 /* ------------------------------------------------------------------------- */
43 static TokenSym *hash_ident[TOK_HASH_SIZE];
44 static char token_buf[STRING_MAX_SIZE + 1];
45 static unsigned char isidnum_table[256 - CH_EOF];
46 /* isidnum_table flags: */
47 #define IS_SPC 1
48 #define IS_ID 2
49 #define IS_NUM 4
51 static TokenString *macro_stack;
53 static const char tcc_keywords[] =
54 #define DEF(id, str) str "\0"
55 #include "tcctok.h"
56 #undef DEF
59 /* WARNING: the content of this string encodes token numbers */
60 static const unsigned char tok_two_chars[] =
61 /* outdated -- gr
62 "<=\236>=\235!=\225&&\240||\241++\244--\242==\224<<\1>>\2+=\253"
63 "-=\255*=\252/=\257%=\245&=\246^=\336|=\374->\313..\250##\266";
64 */{
65 '<','=', TOK_LE,
66 '>','=', TOK_GE,
67 '!','=', TOK_NE,
68 '&','&', TOK_LAND,
69 '|','|', TOK_LOR,
70 '+','+', TOK_INC,
71 '-','-', TOK_DEC,
72 '=','=', TOK_EQ,
73 '<','<', TOK_SHL,
74 '>','>', TOK_SAR,
75 '+','=', TOK_A_ADD,
76 '-','=', TOK_A_SUB,
77 '*','=', TOK_A_MUL,
78 '/','=', TOK_A_DIV,
79 '%','=', TOK_A_MOD,
80 '&','=', TOK_A_AND,
81 '^','=', TOK_A_XOR,
82 '|','=', TOK_A_OR,
83 '-','>', TOK_ARROW,
84 '.','.', 0xa8, // C++ token ?
85 '#','#', TOK_TWOSHARPS,
89 static void next_nomacro_spc(void);
91 ST_FUNC void skip(int c)
93 if (tok != c)
94 tcc_error("'%c' expected (got \"%s\")", c, get_tok_str(tok, &tokc));
95 next();
98 ST_FUNC void expect(const char *msg)
100 tcc_error("%s expected", msg);
103 ST_FUNC void begin_macro(TokenString *str, int alloc)
105 str->alloc = alloc;
106 str->prev = macro_stack;
107 str->prev_ptr = macro_ptr;
108 macro_ptr = str->str;
109 macro_stack = str;
112 ST_FUNC void end_macro(void)
114 TokenString *str = macro_stack;
115 macro_stack = str->prev;
116 macro_ptr = str->prev_ptr;
117 if (str->alloc == 2) {
118 str->alloc = 3; /* just mark as finished */
119 } else {
120 tok_str_free(str->str);
121 if (str->alloc == 1)
122 tcc_free(str);
126 /* ------------------------------------------------------------------------- */
127 /* CString handling */
128 static void cstr_realloc(CString *cstr, int new_size)
130 int size;
131 void *data;
133 size = cstr->size_allocated;
134 if (size == 0)
135 size = 8; /* no need to allocate a too small first string */
136 while (size < new_size)
137 size = size * 2;
138 data = tcc_realloc(cstr->data_allocated, size);
139 cstr->data_allocated = data;
140 cstr->size_allocated = size;
141 cstr->data = data;
144 /* add a byte */
145 ST_FUNC void cstr_ccat(CString *cstr, int ch)
147 int size;
148 size = cstr->size + 1;
149 if (size > cstr->size_allocated)
150 cstr_realloc(cstr, size);
151 ((unsigned char *)cstr->data)[size - 1] = ch;
152 cstr->size = size;
155 ST_FUNC void cstr_cat(CString *cstr, const char *str)
157 int c;
158 for(;;) {
159 c = *str;
160 if (c == '\0')
161 break;
162 cstr_ccat(cstr, c);
163 str++;
167 /* add a wide char */
168 ST_FUNC void cstr_wccat(CString *cstr, int ch)
170 int size;
171 size = cstr->size + sizeof(nwchar_t);
172 if (size > cstr->size_allocated)
173 cstr_realloc(cstr, size);
174 *(nwchar_t *)(((unsigned char *)cstr->data) + size - sizeof(nwchar_t)) = ch;
175 cstr->size = size;
178 ST_FUNC void cstr_new(CString *cstr)
180 memset(cstr, 0, sizeof(CString));
183 /* free string and reset it to NULL */
184 ST_FUNC void cstr_free(CString *cstr)
186 tcc_free(cstr->data_allocated);
187 cstr_new(cstr);
190 /* reset string to empty */
191 ST_FUNC void cstr_reset(CString *cstr)
193 cstr->size = 0;
196 /* XXX: unicode ? */
197 static void add_char(CString *cstr, int c)
199 if (c == '\'' || c == '\"' || c == '\\') {
200 /* XXX: could be more precise if char or string */
201 cstr_ccat(cstr, '\\');
203 if (c >= 32 && c <= 126) {
204 cstr_ccat(cstr, c);
205 } else {
206 cstr_ccat(cstr, '\\');
207 if (c == '\n') {
208 cstr_ccat(cstr, 'n');
209 } else {
210 cstr_ccat(cstr, '0' + ((c >> 6) & 7));
211 cstr_ccat(cstr, '0' + ((c >> 3) & 7));
212 cstr_ccat(cstr, '0' + (c & 7));
217 /* ------------------------------------------------------------------------- */
218 /* allocate a new token */
219 static TokenSym *tok_alloc_new(TokenSym **pts, const char *str, int len)
221 TokenSym *ts, **ptable;
222 int i;
224 if (tok_ident >= SYM_FIRST_ANOM)
225 tcc_error("memory full (symbols)");
227 /* expand token table if needed */
228 i = tok_ident - TOK_IDENT;
229 if ((i % TOK_ALLOC_INCR) == 0) {
230 ptable = tcc_realloc(table_ident, (i + TOK_ALLOC_INCR) * sizeof(TokenSym *));
231 table_ident = ptable;
234 ts = tcc_malloc(sizeof(TokenSym) + len);
235 table_ident[i] = ts;
236 ts->tok = tok_ident++;
237 ts->sym_define = NULL;
238 ts->sym_label = NULL;
239 ts->sym_struct = NULL;
240 ts->sym_identifier = NULL;
241 ts->len = len;
242 ts->hash_next = NULL;
243 memcpy(ts->str, str, len);
244 ts->str[len] = '\0';
245 *pts = ts;
246 return ts;
249 #define TOK_HASH_INIT 1
250 #define TOK_HASH_FUNC(h, c) ((h) * 263 + (c))
252 /* find a token and add it if not found */
253 ST_FUNC TokenSym *tok_alloc(const char *str, int len)
255 TokenSym *ts, **pts;
256 int i;
257 unsigned int h;
259 h = TOK_HASH_INIT;
260 for(i=0;i<len;i++)
261 h = TOK_HASH_FUNC(h, ((unsigned char *)str)[i]);
262 h &= (TOK_HASH_SIZE - 1);
264 pts = &hash_ident[h];
265 for(;;) {
266 ts = *pts;
267 if (!ts)
268 break;
269 if (ts->len == len && !memcmp(ts->str, str, len))
270 return ts;
271 pts = &(ts->hash_next);
273 return tok_alloc_new(pts, str, len);
276 /* XXX: buffer overflow */
277 /* XXX: float tokens */
278 ST_FUNC const char *get_tok_str(int v, CValue *cv)
280 static char buf[STRING_MAX_SIZE + 1];
281 static CString cstr_buf;
282 char *p;
283 int i, len;
285 /* NOTE: to go faster, we give a fixed buffer for small strings */
286 cstr_reset(&cstr_buf);
287 cstr_buf.data = buf;
288 cstr_buf.size_allocated = sizeof(buf);
289 p = buf;
291 switch(v) {
292 case TOK_CINT:
293 case TOK_CUINT:
294 /* XXX: not quite exact, but only useful for testing */
295 sprintf(p, "%llu", (unsigned long long)cv->i);
296 break;
297 case TOK_CLLONG:
298 case TOK_CULLONG:
299 /* XXX: not quite exact, but only useful for testing */
300 #ifdef _WIN32
301 sprintf(p, "%u", (unsigned)cv->i);
302 #else
303 sprintf(p, "%llu", (unsigned long long)cv->i);
304 #endif
305 break;
306 case TOK_LCHAR:
307 cstr_ccat(&cstr_buf, 'L');
308 case TOK_CCHAR:
309 cstr_ccat(&cstr_buf, '\'');
310 add_char(&cstr_buf, cv->i);
311 cstr_ccat(&cstr_buf, '\'');
312 cstr_ccat(&cstr_buf, '\0');
313 break;
314 case TOK_PPNUM:
315 case TOK_PPSTR:
316 return (char*)cv->str.data;
317 case TOK_LSTR:
318 cstr_ccat(&cstr_buf, 'L');
319 case TOK_STR:
320 cstr_ccat(&cstr_buf, '\"');
321 if (v == TOK_STR) {
322 len = cv->str.size - 1;
323 for(i=0;i<len;i++)
324 add_char(&cstr_buf, ((unsigned char *)cv->str.data)[i]);
325 } else {
326 len = (cv->str.size / sizeof(nwchar_t)) - 1;
327 for(i=0;i<len;i++)
328 add_char(&cstr_buf, ((nwchar_t *)cv->str.data)[i]);
330 cstr_ccat(&cstr_buf, '\"');
331 cstr_ccat(&cstr_buf, '\0');
332 break;
334 case TOK_CFLOAT:
335 cstr_cat(&cstr_buf, "<float>");
336 break;
337 case TOK_CDOUBLE:
338 cstr_cat(&cstr_buf, "<double>");
339 break;
340 case TOK_CLDOUBLE:
341 cstr_cat(&cstr_buf, "<long double>");
342 break;
343 case TOK_LINENUM:
344 cstr_cat(&cstr_buf, "<linenumber>");
345 break;
346 //return NULL; /* should not happen */
348 /* above tokens have value, the ones below don't */
350 case TOK_LT:
351 v = '<';
352 goto addv;
353 case TOK_GT:
354 v = '>';
355 goto addv;
356 case TOK_DOTS:
357 return strcpy(p, "...");
358 case TOK_A_SHL:
359 return strcpy(p, "<<=");
360 case TOK_A_SAR:
361 return strcpy(p, ">>=");
362 default:
363 if (v < TOK_IDENT) {
364 /* search in two bytes table */
365 const unsigned char *q = tok_two_chars;
366 while (*q) {
367 if (q[2] == v) {
368 *p++ = q[0];
369 *p++ = q[1];
370 *p = '\0';
371 return buf;
373 q += 3;
375 if (v >= 127) {
376 sprintf(buf, "<%02x>", v);
377 return buf;
379 addv:
380 *p++ = v;
381 *p = '\0';
382 } else if (v < tok_ident) {
383 return table_ident[v - TOK_IDENT]->str;
384 } else if (v >= SYM_FIRST_ANOM) {
385 /* special name for anonymous symbol */
386 sprintf(p, "L.%u", v - SYM_FIRST_ANOM);
387 } else {
388 /* should never happen */
389 return NULL;
391 break;
393 return cstr_buf.data;
396 /* return the current character, handling end of block if necessary
397 (but not stray) */
398 ST_FUNC int handle_eob(void)
400 BufferedFile *bf = file;
401 int len;
403 /* only tries to read if really end of buffer */
404 if (bf->buf_ptr >= bf->buf_end) {
405 if (bf->fd != -1) {
406 #if defined(PARSE_DEBUG)
407 len = 1;
408 #else
409 len = IO_BUF_SIZE;
410 #endif
411 len = read(bf->fd, bf->buffer, len);
412 if (len < 0)
413 len = 0;
414 } else {
415 len = 0;
417 total_bytes += len;
418 bf->buf_ptr = bf->buffer;
419 bf->buf_end = bf->buffer + len;
420 *bf->buf_end = CH_EOB;
422 if (bf->buf_ptr < bf->buf_end) {
423 return bf->buf_ptr[0];
424 } else {
425 bf->buf_ptr = bf->buf_end;
426 return CH_EOF;
430 /* read next char from current input file and handle end of input buffer */
431 ST_INLN void inp(void)
433 ch = *(++(file->buf_ptr));
434 /* end of buffer/file handling */
435 if (ch == CH_EOB)
436 ch = handle_eob();
439 /* handle '\[\r]\n' */
440 static int handle_stray_noerror(void)
442 while (ch == '\\') {
443 inp();
444 if (ch == '\n') {
445 file->line_num++;
446 inp();
447 } else if (ch == '\r') {
448 inp();
449 if (ch != '\n')
450 goto fail;
451 file->line_num++;
452 inp();
453 } else {
454 fail:
455 return 1;
458 return 0;
461 static void handle_stray(void)
463 if (handle_stray_noerror())
464 tcc_error("stray '\\' in program");
467 /* skip the stray and handle the \\n case. Output an error if
468 incorrect char after the stray */
469 static int handle_stray1(uint8_t *p)
471 int c;
473 file->buf_ptr = p;
474 if (p >= file->buf_end) {
475 c = handle_eob();
476 if (c != '\\')
477 return c;
478 p = file->buf_ptr;
480 ch = *p;
481 if (handle_stray_noerror()) {
482 if (!(parse_flags & PARSE_FLAG_ACCEPT_STRAYS))
483 tcc_error("stray '\\' in program");
484 *--file->buf_ptr = '\\';
486 p = file->buf_ptr;
487 c = *p;
488 return c;
491 /* handle just the EOB case, but not stray */
492 #define PEEKC_EOB(c, p)\
494 p++;\
495 c = *p;\
496 if (c == '\\') {\
497 file->buf_ptr = p;\
498 c = handle_eob();\
499 p = file->buf_ptr;\
503 /* handle the complicated stray case */
504 #define PEEKC(c, p)\
506 p++;\
507 c = *p;\
508 if (c == '\\') {\
509 c = handle_stray1(p);\
510 p = file->buf_ptr;\
514 /* input with '\[\r]\n' handling. Note that this function cannot
515 handle other characters after '\', so you cannot call it inside
516 strings or comments */
517 ST_FUNC void minp(void)
519 inp();
520 if (ch == '\\')
521 handle_stray();
525 /* single line C++ comments */
526 static uint8_t *parse_line_comment(uint8_t *p)
528 int c;
530 p++;
531 for(;;) {
532 c = *p;
533 redo:
534 if (c == '\n' || c == CH_EOF) {
535 break;
536 } else if (c == '\\') {
537 file->buf_ptr = p;
538 c = handle_eob();
539 p = file->buf_ptr;
540 if (c == '\\') {
541 PEEKC_EOB(c, p);
542 if (c == '\n') {
543 file->line_num++;
544 PEEKC_EOB(c, p);
545 } else if (c == '\r') {
546 PEEKC_EOB(c, p);
547 if (c == '\n') {
548 file->line_num++;
549 PEEKC_EOB(c, p);
552 } else {
553 goto redo;
555 } else {
556 p++;
559 return p;
562 /* C comments */
563 ST_FUNC uint8_t *parse_comment(uint8_t *p)
565 int c;
567 p++;
568 for(;;) {
569 /* fast skip loop */
570 for(;;) {
571 c = *p;
572 if (c == '\n' || c == '*' || c == '\\')
573 break;
574 p++;
575 c = *p;
576 if (c == '\n' || c == '*' || c == '\\')
577 break;
578 p++;
580 /* now we can handle all the cases */
581 if (c == '\n') {
582 file->line_num++;
583 p++;
584 } else if (c == '*') {
585 p++;
586 for(;;) {
587 c = *p;
588 if (c == '*') {
589 p++;
590 } else if (c == '/') {
591 goto end_of_comment;
592 } else if (c == '\\') {
593 file->buf_ptr = p;
594 c = handle_eob();
595 p = file->buf_ptr;
596 if (c == CH_EOF)
597 tcc_error("unexpected end of file in comment");
598 if (c == '\\') {
599 /* skip '\[\r]\n', otherwise just skip the stray */
600 while (c == '\\') {
601 PEEKC_EOB(c, p);
602 if (c == '\n') {
603 file->line_num++;
604 PEEKC_EOB(c, p);
605 } else if (c == '\r') {
606 PEEKC_EOB(c, p);
607 if (c == '\n') {
608 file->line_num++;
609 PEEKC_EOB(c, p);
611 } else {
612 goto after_star;
616 } else {
617 break;
620 after_star: ;
621 } else {
622 /* stray, eob or eof */
623 file->buf_ptr = p;
624 c = handle_eob();
625 p = file->buf_ptr;
626 if (c == CH_EOF) {
627 tcc_error("unexpected end of file in comment");
628 } else if (c == '\\') {
629 p++;
633 end_of_comment:
634 p++;
635 return p;
638 #define cinp minp
640 static inline void skip_spaces(void)
642 while (isidnum_table[ch - CH_EOF] & IS_SPC)
643 cinp();
646 static inline int check_space(int t, int *spc)
648 if (t < 256 && (isidnum_table[t - CH_EOF] & IS_SPC)) {
649 if (*spc)
650 return 1;
651 *spc = 1;
652 } else
653 *spc = 0;
654 return 0;
657 /* parse a string without interpreting escapes */
658 static uint8_t *parse_pp_string(uint8_t *p,
659 int sep, CString *str)
661 int c;
662 p++;
663 for(;;) {
664 c = *p;
665 if (c == sep) {
666 break;
667 } else if (c == '\\') {
668 file->buf_ptr = p;
669 c = handle_eob();
670 p = file->buf_ptr;
671 if (c == CH_EOF) {
672 unterminated_string:
673 /* XXX: indicate line number of start of string */
674 tcc_error("missing terminating %c character", sep);
675 } else if (c == '\\') {
676 /* escape : just skip \[\r]\n */
677 PEEKC_EOB(c, p);
678 if (c == '\n') {
679 file->line_num++;
680 p++;
681 } else if (c == '\r') {
682 PEEKC_EOB(c, p);
683 if (c != '\n')
684 expect("'\n' after '\r'");
685 file->line_num++;
686 p++;
687 } else if (c == CH_EOF) {
688 goto unterminated_string;
689 } else {
690 if (str) {
691 cstr_ccat(str, '\\');
692 cstr_ccat(str, c);
694 p++;
697 } else if (c == '\n') {
698 file->line_num++;
699 goto add_char;
700 } else if (c == '\r') {
701 PEEKC_EOB(c, p);
702 if (c != '\n') {
703 if (str)
704 cstr_ccat(str, '\r');
705 } else {
706 file->line_num++;
707 goto add_char;
709 } else {
710 add_char:
711 if (str)
712 cstr_ccat(str, c);
713 p++;
716 p++;
717 return p;
720 /* skip block of text until #else, #elif or #endif. skip also pairs of
721 #if/#endif */
722 static void preprocess_skip(void)
724 int a, start_of_line, c, in_warn_or_error;
725 uint8_t *p;
727 p = file->buf_ptr;
728 a = 0;
729 redo_start:
730 start_of_line = 1;
731 in_warn_or_error = 0;
732 for(;;) {
733 redo_no_start:
734 c = *p;
735 switch(c) {
736 case ' ':
737 case '\t':
738 case '\f':
739 case '\v':
740 case '\r':
741 p++;
742 goto redo_no_start;
743 case '\n':
744 file->line_num++;
745 p++;
746 goto redo_start;
747 case '\\':
748 file->buf_ptr = p;
749 c = handle_eob();
750 if (c == CH_EOF) {
751 expect("#endif");
752 } else if (c == '\\') {
753 ch = file->buf_ptr[0];
754 handle_stray_noerror();
756 p = file->buf_ptr;
757 goto redo_no_start;
758 /* skip strings */
759 case '\"':
760 case '\'':
761 if (in_warn_or_error)
762 goto _default;
763 p = parse_pp_string(p, c, NULL);
764 break;
765 /* skip comments */
766 case '/':
767 if (in_warn_or_error)
768 goto _default;
769 file->buf_ptr = p;
770 ch = *p;
771 minp();
772 p = file->buf_ptr;
773 if (ch == '*') {
774 p = parse_comment(p);
775 } else if (ch == '/') {
776 p = parse_line_comment(p);
778 break;
779 case '#':
780 p++;
781 if (start_of_line) {
782 file->buf_ptr = p;
783 next_nomacro();
784 p = file->buf_ptr;
785 if (a == 0 &&
786 (tok == TOK_ELSE || tok == TOK_ELIF || tok == TOK_ENDIF))
787 goto the_end;
788 if (tok == TOK_IF || tok == TOK_IFDEF || tok == TOK_IFNDEF)
789 a++;
790 else if (tok == TOK_ENDIF)
791 a--;
792 else if( tok == TOK_ERROR || tok == TOK_WARNING)
793 in_warn_or_error = 1;
794 else if (tok == TOK_LINEFEED)
795 goto redo_start;
796 } else if (parse_flags & PARSE_FLAG_ASM_FILE)
797 p = parse_line_comment(p);
798 break;
799 _default:
800 default:
801 p++;
802 break;
804 start_of_line = 0;
806 the_end: ;
807 file->buf_ptr = p;
810 /* ParseState handling */
812 /* XXX: currently, no include file info is stored. Thus, we cannot display
813 accurate messages if the function or data definition spans multiple
814 files */
816 /* save current parse state in 's' */
817 ST_FUNC void save_parse_state(ParseState *s)
819 s->line_num = file->line_num;
820 s->macro_ptr = macro_ptr;
821 s->tok = tok;
822 s->tokc = tokc;
825 /* restore parse state from 's' */
826 ST_FUNC void restore_parse_state(ParseState *s)
828 file->line_num = s->line_num;
829 macro_ptr = s->macro_ptr;
830 tok = s->tok;
831 tokc = s->tokc;
834 /* return the number of additional 'ints' necessary to store the
835 token */
836 static inline int tok_size(const int *p)
838 switch(*p) {
839 /* 4 bytes */
840 case TOK_CINT:
841 case TOK_CUINT:
842 case TOK_CCHAR:
843 case TOK_LCHAR:
844 case TOK_CFLOAT:
845 case TOK_LINENUM:
846 return 1 + 1;
847 case TOK_STR:
848 case TOK_LSTR:
849 case TOK_PPNUM:
850 case TOK_PPSTR:
851 return 1 + ((sizeof(CString) + ((CString *)(p+1))->size + 3) >> 2);
852 case TOK_CDOUBLE:
853 case TOK_CLLONG:
854 case TOK_CULLONG:
855 return 1 + 2;
856 case TOK_CLDOUBLE:
857 return 1 + LDOUBLE_SIZE / 4;
858 default:
859 return 1 + 0;
863 /* token string handling */
865 ST_INLN void tok_str_new(TokenString *s)
867 s->str = NULL;
868 s->len = 0;
869 s->allocated_len = 0;
870 s->last_line_num = -1;
873 ST_FUNC void tok_str_free(int *str)
875 tcc_free(str);
878 static int *tok_str_realloc(TokenString *s)
880 int *str, len;
882 if (s->allocated_len == 0) {
883 len = 8;
884 } else {
885 len = s->allocated_len * 2;
887 str = tcc_realloc(s->str, len * sizeof(int));
888 s->allocated_len = len;
889 s->str = str;
890 return str;
893 ST_FUNC void tok_str_add(TokenString *s, int t)
895 int len, *str;
897 len = s->len;
898 str = s->str;
899 if (len >= s->allocated_len)
900 str = tok_str_realloc(s);
901 str[len++] = t;
902 s->len = len;
905 static void tok_str_add2(TokenString *s, int t, CValue *cv)
907 int len, *str;
909 len = s->len;
910 str = s->str;
912 /* allocate space for worst case */
913 if (len + TOK_MAX_SIZE > s->allocated_len)
914 str = tok_str_realloc(s);
915 str[len++] = t;
916 switch(t) {
917 case TOK_CINT:
918 case TOK_CUINT:
919 case TOK_CCHAR:
920 case TOK_LCHAR:
921 case TOK_CFLOAT:
922 case TOK_LINENUM:
923 str[len++] = cv->tab[0];
924 break;
925 case TOK_PPNUM:
926 case TOK_PPSTR:
927 case TOK_STR:
928 case TOK_LSTR:
930 /* Insert the string into the int array. */
931 size_t nb_words =
932 1 + (cv->str.size + sizeof(int) - 1) / sizeof(int);
933 while ((len + nb_words) > s->allocated_len)
934 str = tok_str_realloc(s);
935 str[len] = cv->str.size;
936 memcpy(&str[len + 1], cv->str.data, cv->str.size);
937 len += nb_words;
939 break;
940 case TOK_CDOUBLE:
941 case TOK_CLLONG:
942 case TOK_CULLONG:
943 #if LDOUBLE_SIZE == 8
944 case TOK_CLDOUBLE:
945 #endif
946 str[len++] = cv->tab[0];
947 str[len++] = cv->tab[1];
948 break;
949 #if LDOUBLE_SIZE == 12
950 case TOK_CLDOUBLE:
951 str[len++] = cv->tab[0];
952 str[len++] = cv->tab[1];
953 str[len++] = cv->tab[2];
954 #elif LDOUBLE_SIZE == 16
955 case TOK_CLDOUBLE:
956 str[len++] = cv->tab[0];
957 str[len++] = cv->tab[1];
958 str[len++] = cv->tab[2];
959 str[len++] = cv->tab[3];
960 #elif LDOUBLE_SIZE != 8
961 #error add long double size support
962 #endif
963 break;
964 default:
965 break;
967 s->len = len;
970 /* add the current parse token in token string 's' */
971 ST_FUNC void tok_str_add_tok(TokenString *s)
973 CValue cval;
975 /* save line number info */
976 if (file->line_num != s->last_line_num) {
977 s->last_line_num = file->line_num;
978 cval.i = s->last_line_num;
979 tok_str_add2(s, TOK_LINENUM, &cval);
981 tok_str_add2(s, tok, &tokc);
984 /* get a token from an integer array and increment pointer
985 accordingly. we code it as a macro to avoid pointer aliasing. */
986 static inline void TOK_GET(int *t, const int **pp, CValue *cv)
988 const int *p = *pp;
989 int n, *tab;
991 tab = cv->tab;
992 switch(*t = *p++) {
993 case TOK_CINT:
994 case TOK_CUINT:
995 case TOK_CCHAR:
996 case TOK_LCHAR:
997 case TOK_CFLOAT:
998 case TOK_LINENUM:
999 tab[0] = *p++;
1000 break;
1001 case TOK_STR:
1002 case TOK_LSTR:
1003 case TOK_PPNUM:
1004 case TOK_PPSTR:
1005 cv->str.size = *p++;
1006 cv->str.data = p;
1007 cv->str.data_allocated = 0;
1008 p += (cv->str.size + sizeof(int) - 1) / sizeof(int);
1009 break;
1010 case TOK_CDOUBLE:
1011 case TOK_CLLONG:
1012 case TOK_CULLONG:
1013 n = 2;
1014 goto copy;
1015 case TOK_CLDOUBLE:
1016 #if LDOUBLE_SIZE == 16
1017 n = 4;
1018 #elif LDOUBLE_SIZE == 12
1019 n = 3;
1020 #elif LDOUBLE_SIZE == 8
1021 n = 2;
1022 #else
1023 # error add long double size support
1024 #endif
1025 copy:
1027 *tab++ = *p++;
1028 while (--n);
1029 break;
1030 default:
1031 break;
1033 *pp = p;
1036 /* Calling this function is expensive, but it is not possible
1037 to read a token string backwards. */
1038 static int tok_last(const int *str0, const int *str1)
1040 const int *str = str0;
1041 int tok = 0;
1042 CValue cval;
1044 while (str < str1)
1045 TOK_GET(&tok, &str, &cval);
1046 return tok;
1049 static int macro_is_equal(const int *a, const int *b)
1051 char buf[STRING_MAX_SIZE + 1];
1052 CValue cv;
1053 int t;
1054 while (*a && *b) {
1055 TOK_GET(&t, &a, &cv);
1056 pstrcpy(buf, sizeof buf, get_tok_str(t, &cv));
1057 TOK_GET(&t, &b, &cv);
1058 if (strcmp(buf, get_tok_str(t, &cv)))
1059 return 0;
1061 return !(*a || *b);
1064 static void pp_line(TCCState *s1, BufferedFile *f, int level)
1066 int d = f->line_num - f->line_ref;
1067 if (s1->Pflag == LINE_MACRO_OUTPUT_FORMAT_NONE
1068 || (level == 0 && f->line_ref && d < 8))
1070 while (d > 0)
1071 fputs("\n", s1->ppfp), --d;
1073 else
1074 if (s1->Pflag == LINE_MACRO_OUTPUT_FORMAT_STD) {
1075 fprintf(s1->ppfp, "#line %d \"%s\"\n", f->line_num, f->filename);
1077 else {
1078 fprintf(s1->ppfp, "# %d \"%s\"%s\n", f->line_num, f->filename,
1079 level > 0 ? " 1" : level < 0 ? " 2" : "");
1081 f->line_ref = f->line_num;
1084 static void tok_print(const char *msg, const int *str)
1086 FILE *pr = tcc_state->ppfp;
1087 int t;
1088 CValue cval;
1090 fprintf(pr, "%s ", msg);
1091 while (str) {
1092 TOK_GET(&t, &str, &cval);
1093 if (!t)
1094 break;
1095 fprintf(pr,"%s", get_tok_str(t, &cval));
1097 fprintf(pr, "\n");
1100 static int define_print_prepared(Sym *s)
1102 if (!s || !tcc_state->ppfp || tcc_state->dflag == 0)
1103 return 0;
1105 if (s->v < TOK_IDENT || s->v >= tok_ident)
1106 return 0;
1108 if (file) {
1109 file->line_num--;
1110 pp_line(tcc_state, file, 0);
1111 file->line_ref = ++file->line_num;
1113 return 1;
1116 static void define_print(int v)
1118 FILE *pr = tcc_state->ppfp;
1119 Sym *s, *a;
1121 s = define_find(v);
1122 if (define_print_prepared(s) == 0)
1123 return;
1125 fprintf(pr, "// #define %s", get_tok_str(v, NULL));
1126 if (s->type.t == MACRO_FUNC) {
1127 a = s->next;
1128 fprintf(pr,"(");
1129 if (a)
1130 for (;;) {
1131 fprintf(pr,"%s", get_tok_str(a->v & ~SYM_FIELD, NULL));
1132 if (!(a = a->next))
1133 break;
1134 fprintf(pr,",");
1136 fprintf(pr,")");
1138 tok_print("", s->d);
1141 static void undef_print(int v)
1143 FILE *pr = tcc_state->ppfp;
1144 Sym *s;
1146 s = define_find(v);
1147 if (define_print_prepared(s) == 0)
1148 return;
1150 fprintf(pr, "// #undef %s\n", get_tok_str(s->v, NULL));
1153 ST_FUNC void print_defines(void)
1155 Sym *top = define_stack;
1156 while (top) {
1157 define_print(top->v);
1158 top = top->prev;
1162 /* defines handling */
1163 ST_INLN void define_push(int v, int macro_type, int *str, Sym *first_arg)
1165 Sym *s;
1167 s = define_find(v);
1168 if (s && !macro_is_equal(s->d, str))
1169 tcc_warning("%s redefined", get_tok_str(v, NULL));
1171 s = sym_push2(&define_stack, v, macro_type, 0);
1172 s->d = str;
1173 s->next = first_arg;
1174 table_ident[v - TOK_IDENT]->sym_define = s;
1177 /* undefined a define symbol. Its name is just set to zero */
1178 ST_FUNC void define_undef(Sym *s)
1180 int v = s->v;
1181 undef_print(v);
1182 if (v >= TOK_IDENT && v < tok_ident)
1183 table_ident[v - TOK_IDENT]->sym_define = NULL;
1186 ST_INLN Sym *define_find(int v)
1188 v -= TOK_IDENT;
1189 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1190 return NULL;
1191 return table_ident[v]->sym_define;
1194 /* free define stack until top reaches 'b' */
1195 ST_FUNC void free_defines(Sym *b)
1197 Sym *top, *top1;
1198 int v;
1200 top = define_stack;
1201 while (top != b) {
1202 top1 = top->prev;
1203 /* do not free args or predefined defines */
1204 if (top->d)
1205 tok_str_free(top->d);
1206 v = top->v;
1207 if (v >= TOK_IDENT && v < tok_ident)
1208 table_ident[v - TOK_IDENT]->sym_define = NULL;
1209 sym_free(top);
1210 top = top1;
1212 define_stack = b;
1215 /* label lookup */
1216 ST_FUNC Sym *label_find(int v)
1218 v -= TOK_IDENT;
1219 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1220 return NULL;
1221 return table_ident[v]->sym_label;
1224 ST_FUNC Sym *label_push(Sym **ptop, int v, int flags)
1226 Sym *s, **ps;
1227 s = sym_push2(ptop, v, 0, 0);
1228 s->r = flags;
1229 ps = &table_ident[v - TOK_IDENT]->sym_label;
1230 if (ptop == &global_label_stack) {
1231 /* modify the top most local identifier, so that
1232 sym_identifier will point to 's' when popped */
1233 while (*ps != NULL)
1234 ps = &(*ps)->prev_tok;
1236 s->prev_tok = *ps;
1237 *ps = s;
1238 return s;
1241 /* pop labels until element last is reached. Look if any labels are
1242 undefined. Define symbols if '&&label' was used. */
1243 ST_FUNC void label_pop(Sym **ptop, Sym *slast)
1245 Sym *s, *s1;
1246 for(s = *ptop; s != slast; s = s1) {
1247 s1 = s->prev;
1248 if (s->r == LABEL_DECLARED) {
1249 tcc_warning("label '%s' declared but not used", get_tok_str(s->v, NULL));
1250 } else if (s->r == LABEL_FORWARD) {
1251 tcc_error("label '%s' used but not defined",
1252 get_tok_str(s->v, NULL));
1253 } else {
1254 if (s->c) {
1255 /* define corresponding symbol. A size of
1256 1 is put. */
1257 put_extern_sym(s, cur_text_section, s->jnext, 1);
1260 /* remove label */
1261 table_ident[s->v - TOK_IDENT]->sym_label = s->prev_tok;
1262 sym_free(s);
1264 *ptop = slast;
1267 /* eval an expression for #if/#elif */
1268 static int expr_preprocess(void)
1270 int c, t;
1271 TokenString str;
1273 tok_str_new(&str);
1274 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1275 next(); /* do macro subst */
1276 if (tok == TOK_DEFINED) {
1277 next_nomacro();
1278 t = tok;
1279 if (t == '(')
1280 next_nomacro();
1281 c = define_find(tok) != 0;
1282 if (t == '(')
1283 next_nomacro();
1284 tok = TOK_CINT;
1285 tokc.i = c;
1286 } else if (tok >= TOK_IDENT) {
1287 /* if undefined macro */
1288 tok = TOK_CINT;
1289 tokc.i = 0;
1291 tok_str_add_tok(&str);
1293 tok_str_add(&str, -1); /* simulate end of file */
1294 tok_str_add(&str, 0);
1295 /* now evaluate C constant expression */
1296 begin_macro(&str, 0);
1297 next();
1298 c = expr_const();
1299 end_macro();
1300 return c != 0;
1304 /* parse after #define */
1305 ST_FUNC void parse_define(void)
1307 Sym *s, *first, **ps;
1308 int v, t, varg, is_vaargs, spc;
1309 int saved_parse_flags = parse_flags;
1310 TokenString str;
1312 v = tok;
1313 if (v < TOK_IDENT)
1314 tcc_error("invalid macro name '%s'", get_tok_str(tok, &tokc));
1315 /* XXX: should check if same macro (ANSI) */
1316 first = NULL;
1317 t = MACRO_OBJ;
1318 /* '(' must be just after macro definition for MACRO_FUNC */
1319 parse_flags |= PARSE_FLAG_SPACES;
1320 next_nomacro_spc();
1321 if (tok == '(') {
1322 next_nomacro();
1323 ps = &first;
1324 if (tok != ')') for (;;) {
1325 varg = tok;
1326 next_nomacro();
1327 is_vaargs = 0;
1328 if (varg == TOK_DOTS) {
1329 varg = TOK___VA_ARGS__;
1330 is_vaargs = 1;
1331 } else if (tok == TOK_DOTS && gnu_ext) {
1332 is_vaargs = 1;
1333 next_nomacro();
1335 if (varg < TOK_IDENT)
1336 bad_list:
1337 tcc_error("bad macro parameter list");
1338 s = sym_push2(&define_stack, varg | SYM_FIELD, is_vaargs, 0);
1339 *ps = s;
1340 ps = &s->next;
1341 if (tok == ')')
1342 break;
1343 if (tok != ',' || is_vaargs)
1344 goto bad_list;
1345 next_nomacro();
1347 next_nomacro_spc();
1348 t = MACRO_FUNC;
1350 tok_str_new(&str);
1351 spc = 2;
1352 parse_flags |= PARSE_FLAG_ACCEPT_STRAYS | PARSE_FLAG_SPACES | PARSE_FLAG_LINEFEED;
1353 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1354 /* remove spaces around ## and after '#' */
1355 if (TOK_TWOSHARPS == tok) {
1356 if (2 == spc)
1357 goto bad_twosharp;
1358 if (1 == spc)
1359 --str.len;
1360 spc = 3;
1361 } else if ('#' == tok) {
1362 spc = 4;
1363 } else if (check_space(tok, &spc)) {
1364 goto skip;
1366 tok_str_add2(&str, tok, &tokc);
1367 skip:
1368 next_nomacro_spc();
1371 parse_flags = saved_parse_flags;
1372 if (spc == 1)
1373 --str.len; /* remove trailing space */
1374 tok_str_add(&str, 0);
1375 if (3 == spc)
1376 bad_twosharp:
1377 tcc_error("'##' cannot appear at either end of macro");
1378 define_push(v, t, str.str, first);
1379 define_print(v);
1382 static inline int hash_cached_include(const char *filename)
1384 const unsigned char *s;
1385 unsigned int h;
1387 h = TOK_HASH_INIT;
1388 s = (unsigned char *) filename;
1389 while (*s) {
1390 h = TOK_HASH_FUNC(h, *s);
1391 s++;
1393 h &= (CACHED_INCLUDES_HASH_SIZE - 1);
1394 return h;
1397 static CachedInclude *search_cached_include(TCCState *s1, const char *filename)
1399 CachedInclude *e;
1400 int i, h;
1401 h = hash_cached_include(filename);
1402 i = s1->cached_includes_hash[h];
1403 for(;;) {
1404 if (i == 0)
1405 break;
1406 e = s1->cached_includes[i - 1];
1407 if (0 == PATHCMP(e->filename, filename))
1408 return e;
1409 i = e->hash_next;
1411 return NULL;
1414 static inline void add_cached_include(TCCState *s1, const char *filename, int ifndef_macro)
1416 CachedInclude *e;
1417 int h;
1419 if (search_cached_include(s1, filename))
1420 return;
1421 #ifdef INC_DEBUG
1422 printf("adding cached '%s' %s\n", filename, get_tok_str(ifndef_macro, NULL));
1423 #endif
1424 e = tcc_malloc(sizeof(CachedInclude) + strlen(filename));
1425 strcpy(e->filename, filename);
1426 e->ifndef_macro = ifndef_macro;
1427 dynarray_add((void ***)&s1->cached_includes, &s1->nb_cached_includes, e);
1428 /* add in hash table */
1429 h = hash_cached_include(filename);
1430 e->hash_next = s1->cached_includes_hash[h];
1431 s1->cached_includes_hash[h] = s1->nb_cached_includes;
1434 static void pragma_parse(TCCState *s1)
1436 next_nomacro();
1437 if (tok == TOK_push_macro || tok == TOK_pop_macro) {
1438 int t = tok, v;
1439 Sym *s;
1441 if (next(), tok != '(')
1442 goto pragma_err;
1443 if (next(), tok != TOK_STR)
1444 goto pragma_err;
1445 v = tok_alloc(tokc.str.data, tokc.str.size - 1)->tok;
1446 if (next(), tok != ')')
1447 goto pragma_err;
1448 if (t == TOK_push_macro) {
1449 while (NULL == (s = define_find(v)))
1450 define_push(v, 0, NULL, NULL);
1451 s->type.ref = s; /* set push boundary */
1452 } else {
1453 for (s = define_stack; s; s = s->prev)
1454 if (s->v == v && s->type.ref == s) {
1455 s->type.ref = NULL;
1456 break;
1459 if (s)
1460 table_ident[v - TOK_IDENT]->sym_define = s->d ? s : NULL;
1461 else
1462 tcc_warning("unbalanced #pragma pop_macro");
1464 } else if (tok == TOK_once) {
1465 add_cached_include(s1, file->filename, TOK_once);
1467 } else if (s1->ppfp) {
1468 /* tcc -E: keep pragmas below unchanged */
1469 unget_tok(' ');
1470 unget_tok(TOK_PRAGMA);
1471 unget_tok('#');
1472 unget_tok(TOK_LINEFEED);
1474 } else if (tok == TOK_pack) {
1475 /* This may be:
1476 #pragma pack(1) // set
1477 #pragma pack() // reset to default
1478 #pragma pack(push,1) // push & set
1479 #pragma pack(pop) // restore previous */
1480 next();
1481 skip('(');
1482 if (tok == TOK_ASM_pop) {
1483 next();
1484 if (s1->pack_stack_ptr <= s1->pack_stack) {
1485 stk_error:
1486 tcc_error("out of pack stack");
1488 s1->pack_stack_ptr--;
1489 } else {
1490 int val = 0;
1491 if (tok != ')') {
1492 if (tok == TOK_ASM_push) {
1493 next();
1494 if (s1->pack_stack_ptr >= s1->pack_stack + PACK_STACK_SIZE - 1)
1495 goto stk_error;
1496 s1->pack_stack_ptr++;
1497 skip(',');
1499 if (tok != TOK_CINT)
1500 goto pragma_err;
1501 val = tokc.i;
1502 if (val < 1 || val > 16 || (val & (val - 1)) != 0)
1503 goto pragma_err;
1504 next();
1506 *s1->pack_stack_ptr = val;
1508 if (tok != ')')
1509 goto pragma_err;
1511 } else if (tok == TOK_comment) {
1512 char *file;
1513 next();
1514 skip('(');
1515 if (tok != TOK_lib)
1516 goto pragma_warn;
1517 next();
1518 skip(',');
1519 if (tok != TOK_STR)
1520 goto pragma_err;
1521 file = tcc_strdup((char *)tokc.str.data);
1522 dynarray_add((void ***)&s1->pragma_libs, &s1->nb_pragma_libs, file);
1523 next();
1524 if (tok != ')')
1525 goto pragma_err;
1526 } else {
1527 pragma_warn:
1528 if (s1->warn_unsupported)
1529 tcc_warning("#pragma %s is ignored", get_tok_str(tok, &tokc));
1531 return;
1533 pragma_err:
1534 tcc_error("malformed #pragma directive");
1535 return;
1538 /* is_bof is true if first non space token at beginning of file */
1539 ST_FUNC void preprocess(int is_bof)
1541 TCCState *s1 = tcc_state;
1542 int i, c, n, saved_parse_flags;
1543 char buf[1024], *q;
1544 Sym *s;
1546 saved_parse_flags = parse_flags;
1547 parse_flags = PARSE_FLAG_PREPROCESS
1548 | PARSE_FLAG_TOK_NUM
1549 | PARSE_FLAG_TOK_STR
1550 | PARSE_FLAG_LINEFEED
1551 | (parse_flags & PARSE_FLAG_ASM_FILE)
1554 next_nomacro();
1555 redo:
1556 switch(tok) {
1557 case TOK_DEFINE:
1558 next_nomacro();
1559 parse_define();
1560 break;
1561 case TOK_UNDEF:
1562 next_nomacro();
1563 s = define_find(tok);
1564 /* undefine symbol by putting an invalid name */
1565 if (s)
1566 define_undef(s);
1567 break;
1568 case TOK_INCLUDE:
1569 case TOK_INCLUDE_NEXT:
1570 ch = file->buf_ptr[0];
1571 /* XXX: incorrect if comments : use next_nomacro with a special mode */
1572 skip_spaces();
1573 if (ch == '<') {
1574 c = '>';
1575 goto read_name;
1576 } else if (ch == '\"') {
1577 c = ch;
1578 read_name:
1579 inp();
1580 q = buf;
1581 while (ch != c && ch != '\n' && ch != CH_EOF) {
1582 if ((q - buf) < sizeof(buf) - 1)
1583 *q++ = ch;
1584 if (ch == '\\') {
1585 if (handle_stray_noerror() == 0)
1586 --q;
1587 } else
1588 inp();
1590 *q = '\0';
1591 minp();
1592 #if 0
1593 /* eat all spaces and comments after include */
1594 /* XXX: slightly incorrect */
1595 while (ch1 != '\n' && ch1 != CH_EOF)
1596 inp();
1597 #endif
1598 } else {
1599 /* computed #include : either we have only strings or
1600 we have anything enclosed in '<>' */
1601 next();
1602 buf[0] = '\0';
1603 if (tok == TOK_STR) {
1604 while (tok != TOK_LINEFEED) {
1605 if (tok != TOK_STR) {
1606 include_syntax:
1607 tcc_error("'#include' expects \"FILENAME\" or <FILENAME>");
1609 pstrcat(buf, sizeof(buf), (char *)tokc.str.data);
1610 next();
1612 c = '\"';
1613 } else {
1614 int len;
1615 while (tok != TOK_LINEFEED) {
1616 pstrcat(buf, sizeof(buf), get_tok_str(tok, &tokc));
1617 next();
1619 len = strlen(buf);
1620 /* check syntax and remove '<>' */
1621 if (len < 2 || buf[0] != '<' || buf[len - 1] != '>')
1622 goto include_syntax;
1623 memmove(buf, buf + 1, len - 2);
1624 buf[len - 2] = '\0';
1625 c = '>';
1629 if (s1->include_stack_ptr >= s1->include_stack + INCLUDE_STACK_SIZE)
1630 tcc_error("#include recursion too deep");
1631 /* store current file in stack, but increment stack later below */
1632 *s1->include_stack_ptr = file;
1633 i = tok == TOK_INCLUDE_NEXT ? file->include_next_index : 0;
1634 n = 2 + s1->nb_include_paths + s1->nb_sysinclude_paths;
1635 for (; i < n; ++i) {
1636 char buf1[sizeof file->filename];
1637 CachedInclude *e;
1638 const char *path;
1640 if (i == 0) {
1641 /* check absolute include path */
1642 if (!IS_ABSPATH(buf))
1643 continue;
1644 buf1[0] = 0;
1646 } else if (i == 1) {
1647 /* search in current dir if "header.h" */
1648 if (c != '\"')
1649 continue;
1650 path = file->filename;
1651 pstrncpy(buf1, path, tcc_basename(path) - path);
1653 } else {
1654 /* search in all the include paths */
1655 int j = i - 2, k = j - s1->nb_include_paths;
1656 path = k < 0 ? s1->include_paths[j] : s1->sysinclude_paths[k];
1657 pstrcpy(buf1, sizeof(buf1), path);
1658 pstrcat(buf1, sizeof(buf1), "/");
1661 pstrcat(buf1, sizeof(buf1), buf);
1662 e = search_cached_include(s1, buf1);
1663 if (e && (define_find(e->ifndef_macro) || e->ifndef_macro == TOK_once)) {
1664 /* no need to parse the include because the 'ifndef macro'
1665 is defined */
1666 #ifdef INC_DEBUG
1667 printf("%s: skipping cached %s\n", file->filename, buf1);
1668 #endif
1669 goto include_done;
1672 if (tcc_open(s1, buf1) < 0)
1673 continue;
1675 file->include_next_index = i + 1;
1676 #ifdef INC_DEBUG
1677 printf("%s: including %s\n", file->prev->filename, file->filename);
1678 #endif
1679 /* update target deps */
1680 dynarray_add((void ***)&s1->target_deps, &s1->nb_target_deps,
1681 tcc_strdup(buf1));
1682 /* push current file in stack */
1683 ++s1->include_stack_ptr;
1684 /* add include file debug info */
1685 if (s1->do_debug)
1686 put_stabs(file->filename, N_BINCL, 0, 0, 0);
1687 tok_flags |= TOK_FLAG_BOF | TOK_FLAG_BOL;
1688 ch = file->buf_ptr[0];
1689 goto the_end;
1691 tcc_error("include file '%s' not found", buf);
1692 include_done:
1693 break;
1694 case TOK_IFNDEF:
1695 c = 1;
1696 goto do_ifdef;
1697 case TOK_IF:
1698 c = expr_preprocess();
1699 goto do_if;
1700 case TOK_IFDEF:
1701 c = 0;
1702 do_ifdef:
1703 next_nomacro();
1704 if (tok < TOK_IDENT)
1705 tcc_error("invalid argument for '#if%sdef'", c ? "n" : "");
1706 if (is_bof) {
1707 if (c) {
1708 #ifdef INC_DEBUG
1709 printf("#ifndef %s\n", get_tok_str(tok, NULL));
1710 #endif
1711 file->ifndef_macro = tok;
1714 c = (define_find(tok) != 0) ^ c;
1715 do_if:
1716 if (s1->ifdef_stack_ptr >= s1->ifdef_stack + IFDEF_STACK_SIZE)
1717 tcc_error("memory full (ifdef)");
1718 *s1->ifdef_stack_ptr++ = c;
1719 goto test_skip;
1720 case TOK_ELSE:
1721 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1722 tcc_error("#else without matching #if");
1723 if (s1->ifdef_stack_ptr[-1] & 2)
1724 tcc_error("#else after #else");
1725 c = (s1->ifdef_stack_ptr[-1] ^= 3);
1726 goto test_else;
1727 case TOK_ELIF:
1728 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1729 tcc_error("#elif without matching #if");
1730 c = s1->ifdef_stack_ptr[-1];
1731 if (c > 1)
1732 tcc_error("#elif after #else");
1733 /* last #if/#elif expression was true: we skip */
1734 if (c == 1)
1735 goto skip;
1736 c = expr_preprocess();
1737 s1->ifdef_stack_ptr[-1] = c;
1738 test_else:
1739 if (s1->ifdef_stack_ptr == file->ifdef_stack_ptr + 1)
1740 file->ifndef_macro = 0;
1741 test_skip:
1742 if (!(c & 1)) {
1743 skip:
1744 preprocess_skip();
1745 is_bof = 0;
1746 goto redo;
1748 break;
1749 case TOK_ENDIF:
1750 if (s1->ifdef_stack_ptr <= file->ifdef_stack_ptr)
1751 tcc_error("#endif without matching #if");
1752 s1->ifdef_stack_ptr--;
1753 /* '#ifndef macro' was at the start of file. Now we check if
1754 an '#endif' is exactly at the end of file */
1755 if (file->ifndef_macro &&
1756 s1->ifdef_stack_ptr == file->ifdef_stack_ptr) {
1757 file->ifndef_macro_saved = file->ifndef_macro;
1758 /* need to set to zero to avoid false matches if another
1759 #ifndef at middle of file */
1760 file->ifndef_macro = 0;
1761 while (tok != TOK_LINEFEED)
1762 next_nomacro();
1763 tok_flags |= TOK_FLAG_ENDIF;
1764 goto the_end;
1766 break;
1767 case TOK_PPNUM:
1768 n = strtoul((char*)tokc.str.data, &q, 10);
1769 goto _line_num;
1770 case TOK_LINE:
1771 next();
1772 if (tok != TOK_CINT)
1773 _line_err:
1774 tcc_error("wrong #line format");
1775 n = tokc.i;
1776 _line_num:
1777 next();
1778 if (tok != TOK_LINEFEED) {
1779 if (tok == TOK_STR)
1780 pstrcpy(file->filename, sizeof(file->filename), (char *)tokc.str.data);
1781 else if (parse_flags & PARSE_FLAG_ASM_FILE)
1782 break;
1783 else
1784 goto _line_err;
1785 --n;
1787 if (file->fd > 0)
1788 total_lines += file->line_num - n;
1789 file->line_num = n;
1790 if (s1->do_debug)
1791 put_stabs(file->filename, N_BINCL, 0, 0, 0);
1792 break;
1793 case TOK_ERROR:
1794 case TOK_WARNING:
1795 c = tok;
1796 ch = file->buf_ptr[0];
1797 skip_spaces();
1798 q = buf;
1799 while (ch != '\n' && ch != CH_EOF) {
1800 if ((q - buf) < sizeof(buf) - 1)
1801 *q++ = ch;
1802 if (ch == '\\') {
1803 if (handle_stray_noerror() == 0)
1804 --q;
1805 } else
1806 inp();
1808 *q = '\0';
1809 if (c == TOK_ERROR)
1810 tcc_error("#error %s", buf);
1811 else
1812 tcc_warning("#warning %s", buf);
1813 break;
1814 case TOK_PRAGMA:
1815 pragma_parse(s1);
1816 break;
1817 case TOK_LINEFEED:
1818 goto the_end;
1819 default:
1820 /* ignore gas line comment in an 'S' file. */
1821 if (saved_parse_flags & PARSE_FLAG_ASM_FILE)
1822 goto ignore;
1823 if (tok == '!' && is_bof)
1824 /* '!' is ignored at beginning to allow C scripts. */
1825 goto ignore;
1826 tcc_warning("Ignoring unknown preprocessing directive #%s", get_tok_str(tok, &tokc));
1827 ignore:
1828 file->buf_ptr = parse_line_comment(file->buf_ptr);
1829 goto the_end;
1831 /* ignore other preprocess commands or #! for C scripts */
1832 while (tok != TOK_LINEFEED)
1833 next_nomacro();
1834 the_end:
1835 parse_flags = saved_parse_flags;
1838 /* evaluate escape codes in a string. */
1839 static void parse_escape_string(CString *outstr, const uint8_t *buf, int is_long)
1841 int c, n;
1842 const uint8_t *p;
1844 p = buf;
1845 for(;;) {
1846 c = *p;
1847 if (c == '\0')
1848 break;
1849 if (c == '\\') {
1850 p++;
1851 /* escape */
1852 c = *p;
1853 switch(c) {
1854 case '0': case '1': case '2': case '3':
1855 case '4': case '5': case '6': case '7':
1856 /* at most three octal digits */
1857 n = c - '0';
1858 p++;
1859 c = *p;
1860 if (isoct(c)) {
1861 n = n * 8 + c - '0';
1862 p++;
1863 c = *p;
1864 if (isoct(c)) {
1865 n = n * 8 + c - '0';
1866 p++;
1869 c = n;
1870 goto add_char_nonext;
1871 case 'x':
1872 case 'u':
1873 case 'U':
1874 p++;
1875 n = 0;
1876 for(;;) {
1877 c = *p;
1878 if (c >= 'a' && c <= 'f')
1879 c = c - 'a' + 10;
1880 else if (c >= 'A' && c <= 'F')
1881 c = c - 'A' + 10;
1882 else if (isnum(c))
1883 c = c - '0';
1884 else
1885 break;
1886 n = n * 16 + c;
1887 p++;
1889 c = n;
1890 goto add_char_nonext;
1891 case 'a':
1892 c = '\a';
1893 break;
1894 case 'b':
1895 c = '\b';
1896 break;
1897 case 'f':
1898 c = '\f';
1899 break;
1900 case 'n':
1901 c = '\n';
1902 break;
1903 case 'r':
1904 c = '\r';
1905 break;
1906 case 't':
1907 c = '\t';
1908 break;
1909 case 'v':
1910 c = '\v';
1911 break;
1912 case 'e':
1913 if (!gnu_ext)
1914 goto invalid_escape;
1915 c = 27;
1916 break;
1917 case '\'':
1918 case '\"':
1919 case '\\':
1920 case '?':
1921 break;
1922 default:
1923 invalid_escape:
1924 if (c >= '!' && c <= '~')
1925 tcc_warning("unknown escape sequence: \'\\%c\'", c);
1926 else
1927 tcc_warning("unknown escape sequence: \'\\x%x\'", c);
1928 break;
1931 p++;
1932 add_char_nonext:
1933 if (!is_long)
1934 cstr_ccat(outstr, c);
1935 else
1936 cstr_wccat(outstr, c);
1938 /* add a trailing '\0' */
1939 if (!is_long)
1940 cstr_ccat(outstr, '\0');
1941 else
1942 cstr_wccat(outstr, '\0');
1945 void parse_string(const char *s, int len)
1947 uint8_t buf[1000], *p = buf;
1948 int is_long, sep;
1950 if ((is_long = *s == 'L'))
1951 ++s, --len;
1952 sep = *s++;
1953 len -= 2;
1954 if (len >= sizeof buf)
1955 p = tcc_malloc(len + 1);
1956 memcpy(p, s, len);
1957 p[len] = 0;
1959 cstr_reset(&tokcstr);
1960 parse_escape_string(&tokcstr, p, is_long);
1961 if (p != buf)
1962 tcc_free(p);
1964 if (sep == '\'') {
1965 int char_size;
1966 /* XXX: make it portable */
1967 if (!is_long)
1968 char_size = 1;
1969 else
1970 char_size = sizeof(nwchar_t);
1971 if (tokcstr.size <= char_size)
1972 tcc_error("empty character constant");
1973 if (tokcstr.size > 2 * char_size)
1974 tcc_warning("multi-character character constant");
1975 if (!is_long) {
1976 tokc.i = *(int8_t *)tokcstr.data;
1977 tok = TOK_CCHAR;
1978 } else {
1979 tokc.i = *(nwchar_t *)tokcstr.data;
1980 tok = TOK_LCHAR;
1982 } else {
1983 tokc.str.size = tokcstr.size;
1984 tokc.str.data = tokcstr.data;
1985 tokc.str.data_allocated = tokcstr.data_allocated;
1986 if (!is_long)
1987 tok = TOK_STR;
1988 else
1989 tok = TOK_LSTR;
1993 /* we use 64 bit numbers */
1994 #define BN_SIZE 2
1996 /* bn = (bn << shift) | or_val */
1997 static void bn_lshift(unsigned int *bn, int shift, int or_val)
1999 int i;
2000 unsigned int v;
2001 for(i=0;i<BN_SIZE;i++) {
2002 v = bn[i];
2003 bn[i] = (v << shift) | or_val;
2004 or_val = v >> (32 - shift);
2008 static void bn_zero(unsigned int *bn)
2010 int i;
2011 for(i=0;i<BN_SIZE;i++) {
2012 bn[i] = 0;
2016 /* parse number in null terminated string 'p' and return it in the
2017 current token */
2018 static void parse_number(const char *p)
2020 int b, t, shift, frac_bits, s, exp_val, ch;
2021 char *q;
2022 unsigned int bn[BN_SIZE];
2023 double d;
2025 /* number */
2026 q = token_buf;
2027 ch = *p++;
2028 t = ch;
2029 ch = *p++;
2030 *q++ = t;
2031 b = 10;
2032 if (t == '.') {
2033 goto float_frac_parse;
2034 } else if (t == '0') {
2035 if (ch == 'x' || ch == 'X') {
2036 q--;
2037 ch = *p++;
2038 b = 16;
2039 } else if (tcc_ext && (ch == 'b' || ch == 'B')) {
2040 q--;
2041 ch = *p++;
2042 b = 2;
2045 /* parse all digits. cannot check octal numbers at this stage
2046 because of floating point constants */
2047 while (1) {
2048 if (ch >= 'a' && ch <= 'f')
2049 t = ch - 'a' + 10;
2050 else if (ch >= 'A' && ch <= 'F')
2051 t = ch - 'A' + 10;
2052 else if (isnum(ch))
2053 t = ch - '0';
2054 else
2055 break;
2056 if (t >= b)
2057 break;
2058 if (q >= token_buf + STRING_MAX_SIZE) {
2059 num_too_long:
2060 tcc_error("number too long");
2062 *q++ = ch;
2063 ch = *p++;
2065 if (ch == '.' ||
2066 ((ch == 'e' || ch == 'E') && b == 10) ||
2067 ((ch == 'p' || ch == 'P') && (b == 16 || b == 2))) {
2068 if (b != 10) {
2069 /* NOTE: strtox should support that for hexa numbers, but
2070 non ISOC99 libcs do not support it, so we prefer to do
2071 it by hand */
2072 /* hexadecimal or binary floats */
2073 /* XXX: handle overflows */
2074 *q = '\0';
2075 if (b == 16)
2076 shift = 4;
2077 else
2078 shift = 1;
2079 bn_zero(bn);
2080 q = token_buf;
2081 while (1) {
2082 t = *q++;
2083 if (t == '\0') {
2084 break;
2085 } else if (t >= 'a') {
2086 t = t - 'a' + 10;
2087 } else if (t >= 'A') {
2088 t = t - 'A' + 10;
2089 } else {
2090 t = t - '0';
2092 bn_lshift(bn, shift, t);
2094 frac_bits = 0;
2095 if (ch == '.') {
2096 ch = *p++;
2097 while (1) {
2098 t = ch;
2099 if (t >= 'a' && t <= 'f') {
2100 t = t - 'a' + 10;
2101 } else if (t >= 'A' && t <= 'F') {
2102 t = t - 'A' + 10;
2103 } else if (t >= '0' && t <= '9') {
2104 t = t - '0';
2105 } else {
2106 break;
2108 if (t >= b)
2109 tcc_error("invalid digit");
2110 bn_lshift(bn, shift, t);
2111 frac_bits += shift;
2112 ch = *p++;
2115 if (ch != 'p' && ch != 'P')
2116 expect("exponent");
2117 ch = *p++;
2118 s = 1;
2119 exp_val = 0;
2120 if (ch == '+') {
2121 ch = *p++;
2122 } else if (ch == '-') {
2123 s = -1;
2124 ch = *p++;
2126 if (ch < '0' || ch > '9')
2127 expect("exponent digits");
2128 while (ch >= '0' && ch <= '9') {
2129 exp_val = exp_val * 10 + ch - '0';
2130 ch = *p++;
2132 exp_val = exp_val * s;
2134 /* now we can generate the number */
2135 /* XXX: should patch directly float number */
2136 d = (double)bn[1] * 4294967296.0 + (double)bn[0];
2137 d = ldexp(d, exp_val - frac_bits);
2138 t = toup(ch);
2139 if (t == 'F') {
2140 ch = *p++;
2141 tok = TOK_CFLOAT;
2142 /* float : should handle overflow */
2143 tokc.f = (float)d;
2144 } else if (t == 'L') {
2145 ch = *p++;
2146 #ifdef TCC_TARGET_PE
2147 tok = TOK_CDOUBLE;
2148 tokc.d = d;
2149 #else
2150 tok = TOK_CLDOUBLE;
2151 /* XXX: not large enough */
2152 tokc.ld = (long double)d;
2153 #endif
2154 } else {
2155 tok = TOK_CDOUBLE;
2156 tokc.d = d;
2158 } else {
2159 /* decimal floats */
2160 if (ch == '.') {
2161 if (q >= token_buf + STRING_MAX_SIZE)
2162 goto num_too_long;
2163 *q++ = ch;
2164 ch = *p++;
2165 float_frac_parse:
2166 while (ch >= '0' && ch <= '9') {
2167 if (q >= token_buf + STRING_MAX_SIZE)
2168 goto num_too_long;
2169 *q++ = ch;
2170 ch = *p++;
2173 if (ch == 'e' || ch == 'E') {
2174 if (q >= token_buf + STRING_MAX_SIZE)
2175 goto num_too_long;
2176 *q++ = ch;
2177 ch = *p++;
2178 if (ch == '-' || ch == '+') {
2179 if (q >= token_buf + STRING_MAX_SIZE)
2180 goto num_too_long;
2181 *q++ = ch;
2182 ch = *p++;
2184 if (ch < '0' || ch > '9')
2185 expect("exponent digits");
2186 while (ch >= '0' && ch <= '9') {
2187 if (q >= token_buf + STRING_MAX_SIZE)
2188 goto num_too_long;
2189 *q++ = ch;
2190 ch = *p++;
2193 *q = '\0';
2194 t = toup(ch);
2195 errno = 0;
2196 if (t == 'F') {
2197 ch = *p++;
2198 tok = TOK_CFLOAT;
2199 tokc.f = strtof(token_buf, NULL);
2200 } else if (t == 'L') {
2201 ch = *p++;
2202 #ifdef TCC_TARGET_PE
2203 tok = TOK_CDOUBLE;
2204 tokc.d = strtod(token_buf, NULL);
2205 #else
2206 tok = TOK_CLDOUBLE;
2207 tokc.ld = strtold(token_buf, NULL);
2208 #endif
2209 } else {
2210 tok = TOK_CDOUBLE;
2211 tokc.d = strtod(token_buf, NULL);
2214 } else {
2215 unsigned long long n, n1;
2216 int lcount, ucount, must_64bit;
2217 const char *p1;
2219 /* integer number */
2220 *q = '\0';
2221 q = token_buf;
2222 if (b == 10 && *q == '0') {
2223 b = 8;
2224 q++;
2226 n = 0;
2227 while(1) {
2228 t = *q++;
2229 /* no need for checks except for base 10 / 8 errors */
2230 if (t == '\0')
2231 break;
2232 else if (t >= 'a')
2233 t = t - 'a' + 10;
2234 else if (t >= 'A')
2235 t = t - 'A' + 10;
2236 else
2237 t = t - '0';
2238 if (t >= b)
2239 tcc_error("invalid digit");
2240 n1 = n;
2241 n = n * b + t;
2242 /* detect overflow */
2243 /* XXX: this test is not reliable */
2244 if (n < n1)
2245 tcc_error("integer constant overflow");
2248 /* Determine the characteristics (unsigned and/or 64bit) the type of
2249 the constant must have according to the constant suffix(es) */
2250 lcount = ucount = must_64bit = 0;
2251 p1 = p;
2252 for(;;) {
2253 t = toup(ch);
2254 if (t == 'L') {
2255 if (lcount >= 2)
2256 tcc_error("three 'l's in integer constant");
2257 if (lcount && *(p - 1) != ch)
2258 tcc_error("incorrect integer suffix: %s", p1);
2259 lcount++;
2260 #if !defined TCC_TARGET_X86_64 || defined TCC_TARGET_PE
2261 if (lcount == 2)
2262 #endif
2263 must_64bit = 1;
2264 ch = *p++;
2265 } else if (t == 'U') {
2266 if (ucount >= 1)
2267 tcc_error("two 'u's in integer constant");
2268 ucount++;
2269 ch = *p++;
2270 } else {
2271 break;
2275 /* Whether 64 bits are needed to hold the constant's value */
2276 if (n & 0xffffffff00000000LL || must_64bit) {
2277 tok = TOK_CLLONG;
2278 n1 = n >> 32;
2279 } else {
2280 tok = TOK_CINT;
2281 n1 = n;
2284 /* Whether type must be unsigned to hold the constant's value */
2285 if (ucount || ((n1 >> 31) && (b != 10))) {
2286 if (tok == TOK_CLLONG)
2287 tok = TOK_CULLONG;
2288 else
2289 tok = TOK_CUINT;
2290 /* If decimal and no unsigned suffix, bump to 64 bits or throw error */
2291 } else if (n1 >> 31) {
2292 if (tok == TOK_CINT)
2293 tok = TOK_CLLONG;
2294 else
2295 tcc_error("integer constant overflow");
2298 if (tok == TOK_CINT || tok == TOK_CUINT)
2299 tokc.i = n;
2300 else
2301 tokc.i = n;
2303 if (ch)
2304 tcc_error("invalid number\n");
2308 #define PARSE2(c1, tok1, c2, tok2) \
2309 case c1: \
2310 PEEKC(c, p); \
2311 if (c == c2) { \
2312 p++; \
2313 tok = tok2; \
2314 } else { \
2315 tok = tok1; \
2317 break;
2319 /* return next token without macro substitution */
2320 static inline void next_nomacro1(void)
2322 int t, c, is_long;
2323 TokenSym *ts;
2324 uint8_t *p, *p1;
2325 unsigned int h;
2327 p = file->buf_ptr;
2328 redo_no_start:
2329 c = *p;
2330 switch(c) {
2331 case ' ':
2332 case '\t':
2333 tok = c;
2334 p++;
2335 if (parse_flags & PARSE_FLAG_SPACES)
2336 goto keep_tok_flags;
2337 while (isidnum_table[*p - CH_EOF] & IS_SPC)
2338 ++p;
2339 goto redo_no_start;
2340 case '\f':
2341 case '\v':
2342 case '\r':
2343 p++;
2344 goto redo_no_start;
2345 case '\\':
2346 /* first look if it is in fact an end of buffer */
2347 c = handle_stray1(p);
2348 p = file->buf_ptr;
2349 if (c == '\\')
2350 goto parse_simple;
2351 if (c != CH_EOF)
2352 goto redo_no_start;
2354 TCCState *s1 = tcc_state;
2355 if ((parse_flags & PARSE_FLAG_LINEFEED)
2356 && !(tok_flags & TOK_FLAG_EOF)) {
2357 tok_flags |= TOK_FLAG_EOF;
2358 tok = TOK_LINEFEED;
2359 goto keep_tok_flags;
2360 } else if (!(parse_flags & PARSE_FLAG_PREPROCESS)) {
2361 tok = TOK_EOF;
2362 } else if (s1->ifdef_stack_ptr != file->ifdef_stack_ptr) {
2363 tcc_error("missing #endif");
2364 } else if (s1->include_stack_ptr == s1->include_stack) {
2365 /* no include left : end of file. */
2366 tok = TOK_EOF;
2367 } else {
2368 tok_flags &= ~TOK_FLAG_EOF;
2369 /* pop include file */
2371 /* test if previous '#endif' was after a #ifdef at
2372 start of file */
2373 if (tok_flags & TOK_FLAG_ENDIF) {
2374 #ifdef INC_DEBUG
2375 printf("#endif %s\n", get_tok_str(file->ifndef_macro_saved, NULL));
2376 #endif
2377 add_cached_include(s1, file->filename, file->ifndef_macro_saved);
2378 tok_flags &= ~TOK_FLAG_ENDIF;
2381 /* add end of include file debug info */
2382 if (tcc_state->do_debug) {
2383 put_stabd(N_EINCL, 0, 0);
2385 /* pop include stack */
2386 tcc_close();
2387 s1->include_stack_ptr--;
2388 p = file->buf_ptr;
2389 goto redo_no_start;
2392 break;
2394 case '\n':
2395 file->line_num++;
2396 tok_flags |= TOK_FLAG_BOL;
2397 p++;
2398 maybe_newline:
2399 if (0 == (parse_flags & PARSE_FLAG_LINEFEED))
2400 goto redo_no_start;
2401 tok = TOK_LINEFEED;
2402 goto keep_tok_flags;
2404 case '#':
2405 /* XXX: simplify */
2406 PEEKC(c, p);
2407 if ((tok_flags & TOK_FLAG_BOL) &&
2408 (parse_flags & PARSE_FLAG_PREPROCESS)) {
2409 file->buf_ptr = p;
2410 preprocess(tok_flags & TOK_FLAG_BOF);
2411 p = file->buf_ptr;
2412 goto maybe_newline;
2413 } else {
2414 if (c == '#') {
2415 p++;
2416 tok = TOK_TWOSHARPS;
2417 } else {
2418 if (parse_flags & PARSE_FLAG_ASM_FILE) {
2419 p = parse_line_comment(p - 1);
2420 goto redo_no_start;
2421 } else {
2422 tok = '#';
2426 break;
2428 /* dollar is allowed to start identifiers when not parsing asm */
2429 case '$':
2430 if (!(isidnum_table[c - CH_EOF] & IS_ID)
2431 || (parse_flags & PARSE_FLAG_ASM_FILE))
2432 goto parse_simple;
2434 case 'a': case 'b': case 'c': case 'd':
2435 case 'e': case 'f': case 'g': case 'h':
2436 case 'i': case 'j': case 'k': case 'l':
2437 case 'm': case 'n': case 'o': case 'p':
2438 case 'q': case 'r': case 's': case 't':
2439 case 'u': case 'v': case 'w': case 'x':
2440 case 'y': case 'z':
2441 case 'A': case 'B': case 'C': case 'D':
2442 case 'E': case 'F': case 'G': case 'H':
2443 case 'I': case 'J': case 'K':
2444 case 'M': case 'N': case 'O': case 'P':
2445 case 'Q': case 'R': case 'S': case 'T':
2446 case 'U': case 'V': case 'W': case 'X':
2447 case 'Y': case 'Z':
2448 case '_':
2449 parse_ident_fast:
2450 p1 = p;
2451 h = TOK_HASH_INIT;
2452 h = TOK_HASH_FUNC(h, c);
2453 while (c = *++p, isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM))
2454 h = TOK_HASH_FUNC(h, c);
2455 if (c != '\\') {
2456 TokenSym **pts;
2457 int len;
2459 /* fast case : no stray found, so we have the full token
2460 and we have already hashed it */
2461 len = p - p1;
2462 h &= (TOK_HASH_SIZE - 1);
2463 pts = &hash_ident[h];
2464 for(;;) {
2465 ts = *pts;
2466 if (!ts)
2467 break;
2468 if (ts->len == len && !memcmp(ts->str, p1, len))
2469 goto token_found;
2470 pts = &(ts->hash_next);
2472 ts = tok_alloc_new(pts, (char *) p1, len);
2473 token_found: ;
2474 } else {
2475 /* slower case */
2476 cstr_reset(&tokcstr);
2478 while (p1 < p) {
2479 cstr_ccat(&tokcstr, *p1);
2480 p1++;
2482 p--;
2483 PEEKC(c, p);
2484 parse_ident_slow:
2485 while (isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM)) {
2486 cstr_ccat(&tokcstr, c);
2487 PEEKC(c, p);
2489 ts = tok_alloc(tokcstr.data, tokcstr.size);
2491 tok = ts->tok;
2492 break;
2493 case 'L':
2494 t = p[1];
2495 if (t != '\\' && t != '\'' && t != '\"') {
2496 /* fast case */
2497 goto parse_ident_fast;
2498 } else {
2499 PEEKC(c, p);
2500 if (c == '\'' || c == '\"') {
2501 is_long = 1;
2502 goto str_const;
2503 } else {
2504 cstr_reset(&tokcstr);
2505 cstr_ccat(&tokcstr, 'L');
2506 goto parse_ident_slow;
2509 break;
2511 case '0': case '1': case '2': case '3':
2512 case '4': case '5': case '6': case '7':
2513 case '8': case '9':
2514 cstr_reset(&tokcstr);
2515 /* after the first digit, accept digits, alpha, '.' or sign if
2516 prefixed by 'eEpP' */
2517 parse_num:
2518 for(;;) {
2519 t = c;
2520 cstr_ccat(&tokcstr, c);
2521 PEEKC(c, p);
2522 if (!((isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM))
2523 || c == '.'
2524 || ((c == '+' || c == '-')
2525 && (t == 'e' || t == 'E' || t == 'p' || t == 'P')
2527 break;
2529 /* We add a trailing '\0' to ease parsing */
2530 cstr_ccat(&tokcstr, '\0');
2531 tokc.str.size = tokcstr.size;
2532 tokc.str.data = tokcstr.data;
2533 tokc.str.data_allocated = tokcstr.data_allocated;
2534 tok = TOK_PPNUM;
2535 break;
2537 case '.':
2538 /* special dot handling because it can also start a number */
2539 PEEKC(c, p);
2540 if (isnum(c)) {
2541 cstr_reset(&tokcstr);
2542 cstr_ccat(&tokcstr, '.');
2543 goto parse_num;
2544 } else if (c == '.') {
2545 PEEKC(c, p);
2546 if (c == '.') {
2547 p++;
2548 tok = TOK_DOTS;
2549 } else {
2550 *--p = '.'; /* may underflow into file->unget[] */
2551 tok = '.';
2553 } else {
2554 tok = '.';
2556 break;
2557 case '\'':
2558 case '\"':
2559 is_long = 0;
2560 str_const:
2561 cstr_reset(&tokcstr);
2562 if (is_long)
2563 cstr_ccat(&tokcstr, 'L');
2564 cstr_ccat(&tokcstr, c);
2565 p = parse_pp_string(p, c, &tokcstr);
2566 cstr_ccat(&tokcstr, c);
2567 cstr_ccat(&tokcstr, '\0');
2568 tokc.str.size = tokcstr.size;
2569 tokc.str.data = tokcstr.data;
2570 tokc.str.data_allocated = tokcstr.data_allocated;
2571 tok = TOK_PPSTR;
2572 break;
2574 case '<':
2575 PEEKC(c, p);
2576 if (c == '=') {
2577 p++;
2578 tok = TOK_LE;
2579 } else if (c == '<') {
2580 PEEKC(c, p);
2581 if (c == '=') {
2582 p++;
2583 tok = TOK_A_SHL;
2584 } else {
2585 tok = TOK_SHL;
2587 } else {
2588 tok = TOK_LT;
2590 break;
2591 case '>':
2592 PEEKC(c, p);
2593 if (c == '=') {
2594 p++;
2595 tok = TOK_GE;
2596 } else if (c == '>') {
2597 PEEKC(c, p);
2598 if (c == '=') {
2599 p++;
2600 tok = TOK_A_SAR;
2601 } else {
2602 tok = TOK_SAR;
2604 } else {
2605 tok = TOK_GT;
2607 break;
2609 case '&':
2610 PEEKC(c, p);
2611 if (c == '&') {
2612 p++;
2613 tok = TOK_LAND;
2614 } else if (c == '=') {
2615 p++;
2616 tok = TOK_A_AND;
2617 } else {
2618 tok = '&';
2620 break;
2622 case '|':
2623 PEEKC(c, p);
2624 if (c == '|') {
2625 p++;
2626 tok = TOK_LOR;
2627 } else if (c == '=') {
2628 p++;
2629 tok = TOK_A_OR;
2630 } else {
2631 tok = '|';
2633 break;
2635 case '+':
2636 PEEKC(c, p);
2637 if (c == '+') {
2638 p++;
2639 tok = TOK_INC;
2640 } else if (c == '=') {
2641 p++;
2642 tok = TOK_A_ADD;
2643 } else {
2644 tok = '+';
2646 break;
2648 case '-':
2649 PEEKC(c, p);
2650 if (c == '-') {
2651 p++;
2652 tok = TOK_DEC;
2653 } else if (c == '=') {
2654 p++;
2655 tok = TOK_A_SUB;
2656 } else if (c == '>') {
2657 p++;
2658 tok = TOK_ARROW;
2659 } else {
2660 tok = '-';
2662 break;
2664 PARSE2('!', '!', '=', TOK_NE)
2665 PARSE2('=', '=', '=', TOK_EQ)
2666 PARSE2('*', '*', '=', TOK_A_MUL)
2667 PARSE2('%', '%', '=', TOK_A_MOD)
2668 PARSE2('^', '^', '=', TOK_A_XOR)
2670 /* comments or operator */
2671 case '/':
2672 PEEKC(c, p);
2673 if (c == '*') {
2674 p = parse_comment(p);
2675 /* comments replaced by a blank */
2676 tok = ' ';
2677 goto keep_tok_flags;
2678 } else if (c == '/') {
2679 p = parse_line_comment(p);
2680 tok = ' ';
2681 goto keep_tok_flags;
2682 } else if (c == '=') {
2683 p++;
2684 tok = TOK_A_DIV;
2685 } else {
2686 tok = '/';
2688 break;
2690 /* simple tokens */
2691 case '(':
2692 case ')':
2693 case '[':
2694 case ']':
2695 case '{':
2696 case '}':
2697 case ',':
2698 case ';':
2699 case ':':
2700 case '?':
2701 case '~':
2702 case '@': /* only used in assembler */
2703 parse_simple:
2704 tok = c;
2705 p++;
2706 break;
2707 default:
2708 tcc_error("unrecognized character \\x%02x", c);
2709 break;
2711 tok_flags = 0;
2712 keep_tok_flags:
2713 file->buf_ptr = p;
2714 #if defined(PARSE_DEBUG)
2715 printf("token = %s\n", get_tok_str(tok, &tokc));
2716 #endif
2719 /* return next token without macro substitution. Can read input from
2720 macro_ptr buffer */
2721 static void next_nomacro_spc(void)
2723 if (macro_ptr) {
2724 redo:
2725 tok = *macro_ptr;
2726 if (tok) {
2727 TOK_GET(&tok, &macro_ptr, &tokc);
2728 if (tok == TOK_LINENUM) {
2729 file->line_num = tokc.i;
2730 goto redo;
2733 } else {
2734 next_nomacro1();
2738 ST_FUNC void next_nomacro(void)
2740 do {
2741 next_nomacro_spc();
2742 } while (tok < 256 && (isidnum_table[tok - CH_EOF] & IS_SPC));
2746 static void macro_subst(
2747 TokenString *tok_str,
2748 Sym **nested_list,
2749 const int *macro_str,
2750 int can_read_stream
2753 /* substitute arguments in replacement lists in macro_str by the values in
2754 args (field d) and return allocated string */
2755 static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args)
2757 int t, t0, t1, spc;
2758 const int *st;
2759 Sym *s;
2760 CValue cval;
2761 TokenString str;
2762 CString cstr;
2764 tok_str_new(&str);
2765 t0 = t1 = 0;
2766 while(1) {
2767 TOK_GET(&t, &macro_str, &cval);
2768 if (!t)
2769 break;
2770 if (t == '#') {
2771 /* stringize */
2772 TOK_GET(&t, &macro_str, &cval);
2773 if (!t)
2774 goto bad_stringy;
2775 s = sym_find2(args, t);
2776 if (s) {
2777 cstr_new(&cstr);
2778 cstr_ccat(&cstr, '\"');
2779 st = s->d;
2780 spc = 0;
2781 while (*st) {
2782 TOK_GET(&t, &st, &cval);
2783 if (t != TOK_PLCHLDR
2784 && t != TOK_NOSUBST
2785 && 0 == check_space(t, &spc)) {
2786 const char *s = get_tok_str(t, &cval);
2787 while (*s) {
2788 if (t == TOK_PPSTR && *s != '\'')
2789 add_char(&cstr, *s);
2790 else
2791 cstr_ccat(&cstr, *s);
2792 ++s;
2796 cstr.size -= spc;
2797 cstr_ccat(&cstr, '\"');
2798 cstr_ccat(&cstr, '\0');
2799 #ifdef PP_DEBUG
2800 printf("\nstringize: <%s>\n", (char *)cstr.data);
2801 #endif
2802 /* add string */
2803 cval.str.size = cstr.size;
2804 cval.str.data = cstr.data;
2805 cval.str.data_allocated = cstr.data_allocated;
2806 tok_str_add2(&str, TOK_PPSTR, &cval);
2807 tcc_free(cval.str.data_allocated);
2808 } else {
2809 bad_stringy:
2810 expect("macro parameter after '#'");
2812 } else if (t >= TOK_IDENT) {
2813 s = sym_find2(args, t);
2814 if (s) {
2815 int l0 = str.len;
2816 st = s->d;
2817 /* if '##' is present before or after, no arg substitution */
2818 if (*macro_str == TOK_TWOSHARPS || t1 == TOK_TWOSHARPS) {
2819 /* special case for var arg macros : ## eats the ','
2820 if empty VA_ARGS variable. */
2821 if (t1 == TOK_TWOSHARPS && t0 == ',' && gnu_ext && s->type.t) {
2822 if (*st == 0) {
2823 /* suppress ',' '##' */
2824 str.len -= 2;
2825 } else {
2826 /* suppress '##' and add variable */
2827 str.len--;
2828 goto add_var;
2830 } else {
2831 for(;;) {
2832 int t1;
2833 TOK_GET(&t1, &st, &cval);
2834 if (!t1)
2835 break;
2836 tok_str_add2(&str, t1, &cval);
2840 } else {
2841 add_var:
2842 /* NOTE: the stream cannot be read when macro
2843 substituing an argument */
2844 macro_subst(&str, nested_list, st, 0);
2846 if (str.len == l0) /* exanded to empty string */
2847 tok_str_add(&str, TOK_PLCHLDR);
2848 } else {
2849 tok_str_add(&str, t);
2851 } else {
2852 tok_str_add2(&str, t, &cval);
2854 t0 = t1, t1 = t;
2856 tok_str_add(&str, 0);
2857 return str.str;
2860 static char const ab_month_name[12][4] =
2862 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
2863 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
2866 /* peek or read [ws_str == NULL] next token from function macro call,
2867 walking up macro levels up to the file if necessary */
2868 static int next_argstream(Sym **nested_list, int can_read_stream, TokenString *ws_str)
2870 int t;
2871 const int *p;
2872 Sym *sa;
2874 for (;;) {
2875 if (macro_ptr) {
2876 p = macro_ptr, t = *p;
2877 if (ws_str) {
2878 while (is_space(t) || TOK_LINEFEED == t)
2879 tok_str_add(ws_str, t), t = *++p;
2881 if (t == 0 && can_read_stream) {
2882 end_macro();
2883 /* also, end of scope for nested defined symbol */
2884 sa = *nested_list;
2885 while (sa && sa->v == -1)
2886 sa = sa->prev;
2887 if (sa)
2888 sa->v = -1;
2889 continue;
2891 } else {
2892 ch = handle_eob();
2893 if (ws_str) {
2894 while (is_space(ch) || ch == '\n' || ch == '/') {
2895 if (ch == '/') {
2896 int c;
2897 uint8_t *p = file->buf_ptr;
2898 PEEKC(c, p);
2899 if (c == '*') {
2900 p = parse_comment(p);
2901 file->buf_ptr = p - 1;
2902 } else if (c == '/') {
2903 p = parse_line_comment(p);
2904 file->buf_ptr = p - 1;
2905 } else
2906 break;
2907 ch = ' ';
2909 tok_str_add(ws_str, ch);
2910 cinp();
2913 t = ch;
2916 if (ws_str)
2917 return t;
2918 next_nomacro_spc();
2919 return tok;
2923 /* do macro substitution of current token with macro 's' and add
2924 result to (tok_str,tok_len). 'nested_list' is the list of all
2925 macros we got inside to avoid recursing. Return non zero if no
2926 substitution needs to be done */
2927 static int macro_subst_tok(
2928 TokenString *tok_str,
2929 Sym **nested_list,
2930 Sym *s,
2931 int can_read_stream)
2933 Sym *args, *sa, *sa1;
2934 int parlevel, *mstr, t, t1, spc;
2935 TokenString str;
2936 char *cstrval;
2937 CValue cval;
2938 CString cstr;
2939 char buf[32];
2941 /* if symbol is a macro, prepare substitution */
2942 /* special macros */
2943 if (tok == TOK___LINE__) {
2944 snprintf(buf, sizeof(buf), "%d", file->line_num);
2945 cstrval = buf;
2946 t1 = TOK_PPNUM;
2947 goto add_cstr1;
2948 } else if (tok == TOK___FILE__) {
2949 cstrval = file->filename;
2950 goto add_cstr;
2951 } else if (tok == TOK___DATE__ || tok == TOK___TIME__) {
2952 time_t ti;
2953 struct tm *tm;
2955 time(&ti);
2956 tm = localtime(&ti);
2957 if (tok == TOK___DATE__) {
2958 snprintf(buf, sizeof(buf), "%s %2d %d",
2959 ab_month_name[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900);
2960 } else {
2961 snprintf(buf, sizeof(buf), "%02d:%02d:%02d",
2962 tm->tm_hour, tm->tm_min, tm->tm_sec);
2964 cstrval = buf;
2965 add_cstr:
2966 t1 = TOK_STR;
2967 add_cstr1:
2968 cstr_new(&cstr);
2969 cstr_cat(&cstr, cstrval);
2970 cstr_ccat(&cstr, '\0');
2971 cval.str.size = cstr.size;
2972 cval.str.data = cstr.data;
2973 cval.str.data_allocated = cstr.data_allocated;
2974 tok_str_add2(tok_str, t1, &cval);
2975 cstr_free(&cstr);
2976 } else {
2977 int saved_parse_flags = parse_flags;
2979 mstr = s->d;
2980 if (s->type.t == MACRO_FUNC) {
2981 /* whitespace between macro name and argument list */
2982 TokenString ws_str;
2983 tok_str_new(&ws_str);
2985 spc = 0;
2986 parse_flags |= PARSE_FLAG_SPACES | PARSE_FLAG_LINEFEED
2987 | PARSE_FLAG_ACCEPT_STRAYS;
2989 /* get next token from argument stream */
2990 t = next_argstream(nested_list, can_read_stream, &ws_str);
2991 if (t != '(') {
2992 /* not a macro substitution after all, restore the
2993 * macro token plus all whitespace we've read.
2994 * whitespace is intentionally not merged to preserve
2995 * newlines. */
2996 parse_flags = saved_parse_flags;
2997 tok_str_add(tok_str, tok);
2998 if (parse_flags & PARSE_FLAG_SPACES) {
2999 int i;
3000 for (i = 0; i < ws_str.len; i++)
3001 tok_str_add(tok_str, ws_str.str[i]);
3003 tok_str_free(ws_str.str);
3004 return 0;
3005 } else {
3006 tok_str_free(ws_str.str);
3008 next_nomacro(); /* eat '(' */
3010 /* argument macro */
3011 args = NULL;
3012 sa = s->next;
3013 /* NOTE: empty args are allowed, except if no args */
3014 for(;;) {
3015 do {
3016 next_argstream(nested_list, can_read_stream, NULL);
3017 } while (is_space(tok) || TOK_LINEFEED == tok);
3018 empty_arg:
3019 /* handle '()' case */
3020 if (!args && !sa && tok == ')')
3021 break;
3022 if (!sa)
3023 tcc_error("macro '%s' used with too many args",
3024 get_tok_str(s->v, 0));
3025 tok_str_new(&str);
3026 parlevel = spc = 0;
3027 /* NOTE: non zero sa->t indicates VA_ARGS */
3028 while ((parlevel > 0 ||
3029 (tok != ')' &&
3030 (tok != ',' || sa->type.t)))) {
3031 if (tok == TOK_EOF || tok == 0)
3032 break;
3033 if (tok == '(')
3034 parlevel++;
3035 else if (tok == ')')
3036 parlevel--;
3037 if (tok == TOK_LINEFEED)
3038 tok = ' ';
3039 if (!check_space(tok, &spc))
3040 tok_str_add2(&str, tok, &tokc);
3041 next_argstream(nested_list, can_read_stream, NULL);
3043 if (parlevel)
3044 expect(")");
3045 str.len -= spc;
3046 tok_str_add(&str, 0);
3047 sa1 = sym_push2(&args, sa->v & ~SYM_FIELD, sa->type.t, 0);
3048 sa1->d = str.str;
3049 sa = sa->next;
3050 if (tok == ')') {
3051 /* special case for gcc var args: add an empty
3052 var arg argument if it is omitted */
3053 if (sa && sa->type.t && gnu_ext)
3054 goto empty_arg;
3055 break;
3057 if (tok != ',')
3058 expect(",");
3060 if (sa) {
3061 tcc_error("macro '%s' used with too few args",
3062 get_tok_str(s->v, 0));
3065 parse_flags = saved_parse_flags;
3067 /* now subst each arg */
3068 mstr = macro_arg_subst(nested_list, mstr, args);
3069 /* free memory */
3070 sa = args;
3071 while (sa) {
3072 sa1 = sa->prev;
3073 tok_str_free(sa->d);
3074 sym_free(sa);
3075 sa = sa1;
3079 sym_push2(nested_list, s->v, 0, 0);
3080 parse_flags = saved_parse_flags;
3081 macro_subst(tok_str, nested_list, mstr, can_read_stream);
3083 /* pop nested defined symbol */
3084 sa1 = *nested_list;
3085 *nested_list = sa1->prev;
3086 sym_free(sa1);
3087 if (mstr != s->d)
3088 tok_str_free(mstr);
3090 return 0;
3093 int paste_tokens(int t1, CValue *v1, int t2, CValue *v2)
3095 CString cstr;
3096 int n;
3098 cstr_new(&cstr);
3099 if (t1 != TOK_PLCHLDR)
3100 cstr_cat(&cstr, get_tok_str(t1, v1));
3101 n = cstr.size;
3102 if (t2 != TOK_PLCHLDR)
3103 cstr_cat(&cstr, get_tok_str(t2, v2));
3104 cstr_ccat(&cstr, '\0');
3106 tcc_open_bf(tcc_state, ":paste:", cstr.size);
3107 memcpy(file->buffer, cstr.data, cstr.size);
3108 for (;;) {
3109 next_nomacro1();
3110 if (0 == *file->buf_ptr)
3111 break;
3112 if (is_space(tok))
3113 continue;
3114 tcc_warning("pasting <%.*s> and <%s> does not give a valid preprocessing token",
3115 n, cstr.data, (char*)cstr.data + n);
3116 break;
3118 tcc_close();
3120 //printf("paste <%s>\n", (char*)cstr.data);
3121 cstr_free(&cstr);
3122 return 0;
3125 /* handle the '##' operator. Return NULL if no '##' seen. Otherwise
3126 return the resulting string (which must be freed). */
3127 static inline int *macro_twosharps(const int *ptr0)
3129 int t;
3130 CValue cval;
3131 TokenString macro_str1;
3132 int start_of_nosubsts = -1;
3133 const int *ptr;
3135 /* we search the first '##' */
3136 for (ptr = ptr0;;) {
3137 TOK_GET(&t, &ptr, &cval);
3138 if (t == TOK_TWOSHARPS)
3139 break;
3140 if (t == 0)
3141 return NULL;
3144 tok_str_new(&macro_str1);
3146 //tok_print(" $$$", ptr0);
3147 for (ptr = ptr0;;) {
3148 TOK_GET(&t, &ptr, &cval);
3149 if (t == 0)
3150 break;
3151 if (t == TOK_TWOSHARPS)
3152 continue;
3153 while (*ptr == TOK_TWOSHARPS) {
3154 int t1; CValue cv1;
3155 /* given 'a##b', remove nosubsts preceding 'a' */
3156 if (start_of_nosubsts >= 0)
3157 macro_str1.len = start_of_nosubsts;
3158 /* given 'a##b', remove nosubsts preceding 'b' */
3159 while ((t1 = *++ptr) == TOK_NOSUBST)
3161 if (t1 && t1 != TOK_TWOSHARPS
3162 && t1 != ':') /* 'a##:' don't build a new token */
3164 TOK_GET(&t1, &ptr, &cv1);
3165 if (t != TOK_PLCHLDR || t1 != TOK_PLCHLDR) {
3166 paste_tokens(t, &cval, t1, &cv1);
3167 t = tok, cval = tokc;
3171 if (t == TOK_NOSUBST) {
3172 if (start_of_nosubsts < 0)
3173 start_of_nosubsts = macro_str1.len;
3174 } else {
3175 start_of_nosubsts = -1;
3177 tok_str_add2(&macro_str1, t, &cval);
3179 tok_str_add(&macro_str1, 0);
3180 //tok_print(" ###", macro_str1.str);
3181 return macro_str1.str;
3184 /* do macro substitution of macro_str and add result to
3185 (tok_str,tok_len). 'nested_list' is the list of all macros we got
3186 inside to avoid recursing. */
3187 static void macro_subst(
3188 TokenString *tok_str,
3189 Sym **nested_list,
3190 const int *macro_str,
3191 int can_read_stream
3194 Sym *s;
3195 const int *ptr;
3196 int t, spc, nosubst;
3197 CValue cval;
3198 int *macro_str1 = NULL;
3200 /* first scan for '##' operator handling */
3201 ptr = macro_str;
3202 spc = nosubst = 0;
3204 /* first scan for '##' operator handling */
3205 if (can_read_stream) {
3206 macro_str1 = macro_twosharps(ptr);
3207 if (macro_str1)
3208 ptr = macro_str1;
3211 while (1) {
3212 TOK_GET(&t, &ptr, &cval);
3213 if (t == 0)
3214 break;
3216 if (t >= TOK_IDENT && 0 == nosubst) {
3217 s = define_find(t);
3218 if (s == NULL)
3219 goto no_subst;
3221 /* if nested substitution, do nothing */
3222 if (sym_find2(*nested_list, t)) {
3223 /* and mark it as TOK_NOSUBST, so it doesn't get subst'd again */
3224 tok_str_add2(tok_str, TOK_NOSUBST, NULL);
3225 goto no_subst;
3229 TokenString str;
3230 str.str = (int*)ptr;
3231 begin_macro(&str, 2);
3233 tok = t;
3234 macro_subst_tok(tok_str, nested_list, s, can_read_stream);
3236 if (str.alloc == 3) {
3237 /* already finished by reading function macro arguments */
3238 break;
3241 ptr = macro_ptr;
3242 end_macro ();
3245 spc = (tok_str->len &&
3246 is_space(tok_last(tok_str->str,
3247 tok_str->str + tok_str->len)));
3249 } else {
3251 if (t == '\\' && !(parse_flags & PARSE_FLAG_ACCEPT_STRAYS))
3252 tcc_error("stray '\\' in program");
3254 no_subst:
3255 if (!check_space(t, &spc))
3256 tok_str_add2(tok_str, t, &cval);
3257 nosubst = 0;
3258 if (t == TOK_NOSUBST)
3259 nosubst = 1;
3262 if (macro_str1)
3263 tok_str_free(macro_str1);
3267 /* return next token with macro substitution */
3268 ST_FUNC void next(void)
3270 redo:
3271 if (parse_flags & PARSE_FLAG_SPACES)
3272 next_nomacro_spc();
3273 else
3274 next_nomacro();
3276 if (macro_ptr) {
3277 if (tok == TOK_NOSUBST || tok == TOK_PLCHLDR) {
3278 /* discard preprocessor markers */
3279 goto redo;
3280 } else if (tok == 0) {
3281 /* end of macro or unget token string */
3282 end_macro();
3283 goto redo;
3285 } else if (tok >= TOK_IDENT && (parse_flags & PARSE_FLAG_PREPROCESS)) {
3286 Sym *s;
3287 /* if reading from file, try to substitute macros */
3288 s = define_find(tok);
3289 if (s) {
3290 static TokenString str; /* using static string for speed */
3291 Sym *nested_list = NULL;
3292 tok_str_new(&str);
3293 nested_list = NULL;
3294 macro_subst_tok(&str, &nested_list, s, 1);
3295 tok_str_add(&str, 0);
3296 begin_macro(&str, 0);
3297 goto redo;
3300 /* convert preprocessor tokens into C tokens */
3301 if (tok == TOK_PPNUM) {
3302 if (parse_flags & PARSE_FLAG_TOK_NUM)
3303 parse_number((char *)tokc.str.data);
3304 } else if (tok == TOK_PPSTR) {
3305 if (parse_flags & PARSE_FLAG_TOK_STR)
3306 parse_string((char *)tokc.str.data, tokc.str.size - 1);
3310 /* push back current token and set current token to 'last_tok'. Only
3311 identifier case handled for labels. */
3312 ST_INLN void unget_tok(int last_tok)
3314 TokenString *str = tcc_malloc(sizeof *str);
3315 tok_str_new(str);
3316 tok_str_add2(str, tok, &tokc);
3317 tok_str_add(str, 0);
3318 begin_macro(str, 1);
3319 tok = last_tok;
3322 /* better than nothing, but needs extension to handle '-E' option
3323 correctly too */
3324 ST_FUNC void preprocess_init(TCCState *s1)
3326 s1->include_stack_ptr = s1->include_stack;
3327 /* XXX: move that before to avoid having to initialize
3328 file->ifdef_stack_ptr ? */
3329 s1->ifdef_stack_ptr = s1->ifdef_stack;
3330 file->ifdef_stack_ptr = s1->ifdef_stack_ptr;
3332 pvtop = vtop = vstack - 1;
3333 s1->pack_stack[0] = 0;
3334 s1->pack_stack_ptr = s1->pack_stack;
3336 isidnum_table['$' - CH_EOF] =
3337 tcc_state->dollars_in_identifiers ? IS_ID : 0;
3340 ST_FUNC void preprocess_new(void)
3342 int i, c;
3343 const char *p, *r;
3345 /* init isid table */
3346 for(i = CH_EOF; i<256; i++)
3347 isidnum_table[i - CH_EOF]
3348 = is_space(i) ? IS_SPC
3349 : isid(i) ? IS_ID
3350 : isnum(i) ? IS_NUM
3351 : 0;
3353 memset(hash_ident, 0, TOK_HASH_SIZE * sizeof(TokenSym *));
3355 tok_ident = TOK_IDENT;
3356 p = tcc_keywords;
3357 while (*p) {
3358 r = p;
3359 for(;;) {
3360 c = *r++;
3361 if (c == '\0')
3362 break;
3364 tok_alloc(p, r - p - 1);
3365 p = r;
3369 ST_FUNC void preprocess_delete(void)
3371 int i, n;
3373 /* free -D and compiler defines */
3374 free_defines(NULL);
3376 /* cleanup from error/setjmp */
3377 while (macro_stack)
3378 end_macro();
3379 macro_ptr = NULL;
3381 /* free tokens */
3382 n = tok_ident - TOK_IDENT;
3383 for(i = 0; i < n; i++)
3384 tcc_free(table_ident[i]);
3385 tcc_free(table_ident);
3386 table_ident = NULL;
3389 /* Preprocess the current file */
3390 ST_FUNC int tcc_preprocess(TCCState *s1)
3392 BufferedFile **iptr;
3393 int token_seen, spcs, level;
3395 preprocess_init(s1);
3396 ch = file->buf_ptr[0];
3397 tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF;
3398 parse_flags = PARSE_FLAG_PREPROCESS
3399 | (parse_flags & PARSE_FLAG_ASM_FILE)
3400 | PARSE_FLAG_LINEFEED
3401 | PARSE_FLAG_SPACES
3402 | PARSE_FLAG_ACCEPT_STRAYS
3405 #ifdef PP_BENCH
3406 do next(); while (tok != TOK_EOF); return 0;
3407 #endif
3409 token_seen = spcs = 0;
3410 pp_line(s1, file, 0);
3412 for (;;) {
3413 iptr = s1->include_stack_ptr;
3414 next();
3415 if (tok == TOK_EOF)
3416 break;
3417 level = s1->include_stack_ptr - iptr;
3418 if (level) {
3419 if (level > 0)
3420 pp_line(s1, *iptr, 0);
3421 pp_line(s1, file, level);
3424 if (0 == token_seen) {
3425 if (tok == ' ') {
3426 ++spcs;
3427 continue;
3429 if (tok == TOK_LINEFEED) {
3430 spcs = 0;
3431 continue;
3433 pp_line(s1, file, 0);
3434 while (spcs)
3435 fputs(" ", s1->ppfp), --spcs;
3436 token_seen = 1;
3438 } else if (tok == TOK_LINEFEED) {
3439 ++file->line_ref;
3440 token_seen = 0;
3443 fputs(get_tok_str(tok, &tokc), s1->ppfp);
3446 return 0;