fix self-referential token pasting
[tinycc.git] / tccpp.c
blob779aa42220082544f9baefba3827f592bec2207c
1 /*
2 * TCC - Tiny C Compiler
3 *
4 * Copyright (c) 2001-2004 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "tcc.h"
23 /********************************************************/
24 /* global variables */
26 ST_DATA int tok_flags;
27 /* additional informations about token */
28 #define TOK_FLAG_BOL 0x0001 /* beginning of line before */
29 #define TOK_FLAG_BOF 0x0002 /* beginning of file before */
30 #define TOK_FLAG_ENDIF 0x0004 /* a endif was found matching starting #ifdef */
31 #define TOK_FLAG_EOF 0x0008 /* end of file */
33 ST_DATA int parse_flags;
34 #define PARSE_FLAG_PREPROCESS 0x0001 /* activate preprocessing */
35 #define PARSE_FLAG_TOK_NUM 0x0002 /* return numbers instead of TOK_PPNUM */
36 #define PARSE_FLAG_LINEFEED 0x0004 /* line feed is returned as a
37 token. line feed is also
38 returned at eof */
39 #define PARSE_FLAG_ASM_COMMENTS 0x0008 /* '#' can be used for line comment */
40 #define PARSE_FLAG_SPACES 0x0010 /* next() returns space tokens (for -E) */
42 ST_DATA struct BufferedFile *file;
43 ST_DATA int ch, tok;
44 ST_DATA CValue tokc;
45 ST_DATA const int *macro_ptr;
46 ST_DATA CString tokcstr; /* current parsed string, if any */
48 /* display benchmark infos */
49 ST_DATA int total_lines;
50 ST_DATA int total_bytes;
51 ST_DATA int tok_ident;
52 ST_DATA TokenSym **table_ident;
54 /* ------------------------------------------------------------------------- */
56 static int *macro_ptr_allocated;
57 static const int *unget_saved_macro_ptr;
58 static int unget_saved_buffer[TOK_MAX_SIZE + 1];
59 static int unget_buffer_enabled;
60 static TokenSym *hash_ident[TOK_HASH_SIZE];
61 static char token_buf[STRING_MAX_SIZE + 1];
62 /* true if isid(c) || isnum(c) */
63 static unsigned char isidnum_table[256-CH_EOF];
65 static const char tcc_keywords[] =
66 #define DEF(id, str) str "\0"
67 #include "tcctok.h"
68 #undef DEF
71 /* WARNING: the content of this string encodes token numbers */
72 static const unsigned char tok_two_chars[] =
73 "<=\236>=\235!=\225&&\240||\241++\244--\242==\224<<\1>>\2+=\253"
74 "-=\255*=\252/=\257%=\245&=\246^=\336|=\374->\313..\250##\266";
76 struct macro_level {
77 struct macro_level *prev;
78 const int *p;
81 ST_FUNC void next_nomacro(void);
82 static void next_nomacro_spc(void);
83 static void macro_subst(
84 TokenString *tok_str,
85 Sym **nested_list,
86 const int *macro_str,
87 struct macro_level **can_read_stream
90 ST_FUNC void skip(int c)
92 if (tok != c)
93 error("'%c' expected (got \"%s\")", c, get_tok_str(tok, &tokc));
94 next();
97 /* ------------------------------------------------------------------------- */
98 /* CString handling */
99 static void cstr_realloc(CString *cstr, int new_size)
101 int size;
102 void *data;
104 size = cstr->size_allocated;
105 if (size == 0)
106 size = 8; /* no need to allocate a too small first string */
107 while (size < new_size)
108 size = size * 2;
109 data = tcc_realloc(cstr->data_allocated, size);
110 if (!data)
111 error("memory full");
112 cstr->data_allocated = data;
113 cstr->size_allocated = size;
114 cstr->data = data;
117 /* add a byte */
118 ST_INLN void cstr_ccat(CString *cstr, int ch)
120 int size;
121 size = cstr->size + 1;
122 if (size > cstr->size_allocated)
123 cstr_realloc(cstr, size);
124 ((unsigned char *)cstr->data)[size - 1] = ch;
125 cstr->size = size;
128 ST_FUNC void cstr_cat(CString *cstr, const char *str)
130 int c;
131 for(;;) {
132 c = *str;
133 if (c == '\0')
134 break;
135 cstr_ccat(cstr, c);
136 str++;
140 /* add a wide char */
141 ST_FUNC void cstr_wccat(CString *cstr, int ch)
143 int size;
144 size = cstr->size + sizeof(nwchar_t);
145 if (size > cstr->size_allocated)
146 cstr_realloc(cstr, size);
147 *(nwchar_t *)(((unsigned char *)cstr->data) + size - sizeof(nwchar_t)) = ch;
148 cstr->size = size;
151 ST_FUNC void cstr_new(CString *cstr)
153 memset(cstr, 0, sizeof(CString));
156 /* free string and reset it to NULL */
157 ST_FUNC void cstr_free(CString *cstr)
159 tcc_free(cstr->data_allocated);
160 cstr_new(cstr);
163 /* XXX: unicode ? */
164 ST_FUNC void add_char(CString *cstr, int c)
166 if (c == '\'' || c == '\"' || c == '\\') {
167 /* XXX: could be more precise if char or string */
168 cstr_ccat(cstr, '\\');
170 if (c >= 32 && c <= 126) {
171 cstr_ccat(cstr, c);
172 } else {
173 cstr_ccat(cstr, '\\');
174 if (c == '\n') {
175 cstr_ccat(cstr, 'n');
176 } else {
177 cstr_ccat(cstr, '0' + ((c >> 6) & 7));
178 cstr_ccat(cstr, '0' + ((c >> 3) & 7));
179 cstr_ccat(cstr, '0' + (c & 7));
184 /* ------------------------------------------------------------------------- */
185 /* allocate a new token */
186 static TokenSym *tok_alloc_new(TokenSym **pts, const char *str, int len)
188 TokenSym *ts, **ptable;
189 int i;
191 if (tok_ident >= SYM_FIRST_ANOM)
192 error("memory full");
194 /* expand token table if needed */
195 i = tok_ident - TOK_IDENT;
196 if ((i % TOK_ALLOC_INCR) == 0) {
197 ptable = tcc_realloc(table_ident, (i + TOK_ALLOC_INCR) * sizeof(TokenSym *));
198 if (!ptable)
199 error("memory full");
200 table_ident = ptable;
203 ts = tcc_malloc(sizeof(TokenSym) + len);
204 table_ident[i] = ts;
205 ts->tok = tok_ident++;
206 ts->sym_define = NULL;
207 ts->sym_label = NULL;
208 ts->sym_struct = NULL;
209 ts->sym_identifier = NULL;
210 ts->len = len;
211 ts->hash_next = NULL;
212 memcpy(ts->str, str, len);
213 ts->str[len] = '\0';
214 *pts = ts;
215 return ts;
218 #define TOK_HASH_INIT 1
219 #define TOK_HASH_FUNC(h, c) ((h) * 263 + (c))
221 /* find a token and add it if not found */
222 ST_FUNC TokenSym *tok_alloc(const char *str, int len)
224 TokenSym *ts, **pts;
225 int i;
226 unsigned int h;
228 h = TOK_HASH_INIT;
229 for(i=0;i<len;i++)
230 h = TOK_HASH_FUNC(h, ((unsigned char *)str)[i]);
231 h &= (TOK_HASH_SIZE - 1);
233 pts = &hash_ident[h];
234 for(;;) {
235 ts = *pts;
236 if (!ts)
237 break;
238 if (ts->len == len && !memcmp(ts->str, str, len))
239 return ts;
240 pts = &(ts->hash_next);
242 return tok_alloc_new(pts, str, len);
245 /* XXX: buffer overflow */
246 /* XXX: float tokens */
247 ST_FUNC char *get_tok_str(int v, CValue *cv)
249 static char buf[STRING_MAX_SIZE + 1];
250 static CString cstr_buf;
251 CString *cstr;
252 char *p;
253 int i, len;
255 /* NOTE: to go faster, we give a fixed buffer for small strings */
256 cstr_reset(&cstr_buf);
257 cstr_buf.data = buf;
258 cstr_buf.size_allocated = sizeof(buf);
259 p = buf;
261 switch(v) {
262 case TOK_CINT:
263 case TOK_CUINT:
264 /* XXX: not quite exact, but only useful for testing */
265 sprintf(p, "%u", cv->ui);
266 break;
267 case TOK_CLLONG:
268 case TOK_CULLONG:
269 /* XXX: not quite exact, but only useful for testing */
270 #ifdef _WIN32
271 sprintf(p, "%u", (unsigned)cv->ull);
272 #else
273 sprintf(p, "%Lu", cv->ull);
274 #endif
275 break;
276 case TOK_LCHAR:
277 cstr_ccat(&cstr_buf, 'L');
278 case TOK_CCHAR:
279 cstr_ccat(&cstr_buf, '\'');
280 add_char(&cstr_buf, cv->i);
281 cstr_ccat(&cstr_buf, '\'');
282 cstr_ccat(&cstr_buf, '\0');
283 break;
284 case TOK_PPNUM:
285 cstr = cv->cstr;
286 len = cstr->size - 1;
287 for(i=0;i<len;i++)
288 add_char(&cstr_buf, ((unsigned char *)cstr->data)[i]);
289 cstr_ccat(&cstr_buf, '\0');
290 break;
291 case TOK_LSTR:
292 cstr_ccat(&cstr_buf, 'L');
293 case TOK_STR:
294 cstr = cv->cstr;
295 cstr_ccat(&cstr_buf, '\"');
296 if (v == TOK_STR) {
297 len = cstr->size - 1;
298 for(i=0;i<len;i++)
299 add_char(&cstr_buf, ((unsigned char *)cstr->data)[i]);
300 } else {
301 len = (cstr->size / sizeof(nwchar_t)) - 1;
302 for(i=0;i<len;i++)
303 add_char(&cstr_buf, ((nwchar_t *)cstr->data)[i]);
305 cstr_ccat(&cstr_buf, '\"');
306 cstr_ccat(&cstr_buf, '\0');
307 break;
308 case TOK_LT:
309 v = '<';
310 goto addv;
311 case TOK_GT:
312 v = '>';
313 goto addv;
314 case TOK_DOTS:
315 return strcpy(p, "...");
316 case TOK_A_SHL:
317 return strcpy(p, "<<=");
318 case TOK_A_SAR:
319 return strcpy(p, ">>=");
320 default:
321 if (v < TOK_IDENT) {
322 /* search in two bytes table */
323 const unsigned char *q = tok_two_chars;
324 while (*q) {
325 if (q[2] == v) {
326 *p++ = q[0];
327 *p++ = q[1];
328 *p = '\0';
329 return buf;
331 q += 3;
333 addv:
334 *p++ = v;
335 *p = '\0';
336 } else if (v < tok_ident) {
337 return table_ident[v - TOK_IDENT]->str;
338 } else if (v >= SYM_FIRST_ANOM) {
339 /* special name for anonymous symbol */
340 sprintf(p, "L.%u", v - SYM_FIRST_ANOM);
341 } else {
342 /* should never happen */
343 return NULL;
345 break;
347 return cstr_buf.data;
350 /* fill input buffer and peek next char */
351 static int tcc_peekc_slow(BufferedFile *bf)
353 int len;
354 /* only tries to read if really end of buffer */
355 if (bf->buf_ptr >= bf->buf_end) {
356 if (bf->fd != -1) {
357 #if defined(PARSE_DEBUG)
358 len = 8;
359 #else
360 len = IO_BUF_SIZE;
361 #endif
362 len = read(bf->fd, bf->buffer, len);
363 if (len < 0)
364 len = 0;
365 } else {
366 len = 0;
368 total_bytes += len;
369 bf->buf_ptr = bf->buffer;
370 bf->buf_end = bf->buffer + len;
371 *bf->buf_end = CH_EOB;
373 if (bf->buf_ptr < bf->buf_end) {
374 return bf->buf_ptr[0];
375 } else {
376 bf->buf_ptr = bf->buf_end;
377 return CH_EOF;
381 /* return the current character, handling end of block if necessary
382 (but not stray) */
383 ST_FUNC int handle_eob(void)
385 return tcc_peekc_slow(file);
388 /* read next char from current input file and handle end of input buffer */
389 ST_INLN void inp(void)
391 ch = *(++(file->buf_ptr));
392 /* end of buffer/file handling */
393 if (ch == CH_EOB)
394 ch = handle_eob();
397 /* handle '\[\r]\n' */
398 static int handle_stray_noerror(void)
400 while (ch == '\\') {
401 inp();
402 if (ch == '\n') {
403 file->line_num++;
404 inp();
405 } else if (ch == '\r') {
406 inp();
407 if (ch != '\n')
408 goto fail;
409 file->line_num++;
410 inp();
411 } else {
412 fail:
413 return 1;
416 return 0;
419 static void handle_stray(void)
421 if (handle_stray_noerror())
422 error("stray '\\' in program");
425 /* skip the stray and handle the \\n case. Output an error if
426 incorrect char after the stray */
427 static int handle_stray1(uint8_t *p)
429 int c;
431 if (p >= file->buf_end) {
432 file->buf_ptr = p;
433 c = handle_eob();
434 p = file->buf_ptr;
435 if (c == '\\')
436 goto parse_stray;
437 } else {
438 parse_stray:
439 file->buf_ptr = p;
440 ch = *p;
441 handle_stray();
442 p = file->buf_ptr;
443 c = *p;
445 return c;
448 /* handle just the EOB case, but not stray */
449 #define PEEKC_EOB(c, p)\
451 p++;\
452 c = *p;\
453 if (c == '\\') {\
454 file->buf_ptr = p;\
455 c = handle_eob();\
456 p = file->buf_ptr;\
460 /* handle the complicated stray case */
461 #define PEEKC(c, p)\
463 p++;\
464 c = *p;\
465 if (c == '\\') {\
466 c = handle_stray1(p);\
467 p = file->buf_ptr;\
471 /* input with '\[\r]\n' handling. Note that this function cannot
472 handle other characters after '\', so you cannot call it inside
473 strings or comments */
474 ST_FUNC void minp(void)
476 inp();
477 if (ch == '\\')
478 handle_stray();
482 /* single line C++ comments */
483 static uint8_t *parse_line_comment(uint8_t *p)
485 int c;
487 p++;
488 for(;;) {
489 c = *p;
490 redo:
491 if (c == '\n' || c == CH_EOF) {
492 break;
493 } else if (c == '\\') {
494 file->buf_ptr = p;
495 c = handle_eob();
496 p = file->buf_ptr;
497 if (c == '\\') {
498 PEEKC_EOB(c, p);
499 if (c == '\n') {
500 file->line_num++;
501 PEEKC_EOB(c, p);
502 } else if (c == '\r') {
503 PEEKC_EOB(c, p);
504 if (c == '\n') {
505 file->line_num++;
506 PEEKC_EOB(c, p);
509 } else {
510 goto redo;
512 } else {
513 p++;
516 return p;
519 /* C comments */
520 ST_FUNC uint8_t *parse_comment(uint8_t *p)
522 int c;
524 p++;
525 for(;;) {
526 /* fast skip loop */
527 for(;;) {
528 c = *p;
529 if (c == '\n' || c == '*' || c == '\\')
530 break;
531 p++;
532 c = *p;
533 if (c == '\n' || c == '*' || c == '\\')
534 break;
535 p++;
537 /* now we can handle all the cases */
538 if (c == '\n') {
539 file->line_num++;
540 p++;
541 } else if (c == '*') {
542 p++;
543 for(;;) {
544 c = *p;
545 if (c == '*') {
546 p++;
547 } else if (c == '/') {
548 goto end_of_comment;
549 } else if (c == '\\') {
550 file->buf_ptr = p;
551 c = handle_eob();
552 p = file->buf_ptr;
553 if (c == '\\') {
554 /* skip '\[\r]\n', otherwise just skip the stray */
555 while (c == '\\') {
556 PEEKC_EOB(c, p);
557 if (c == '\n') {
558 file->line_num++;
559 PEEKC_EOB(c, p);
560 } else if (c == '\r') {
561 PEEKC_EOB(c, p);
562 if (c == '\n') {
563 file->line_num++;
564 PEEKC_EOB(c, p);
566 } else {
567 goto after_star;
571 } else {
572 break;
575 after_star: ;
576 } else {
577 /* stray, eob or eof */
578 file->buf_ptr = p;
579 c = handle_eob();
580 p = file->buf_ptr;
581 if (c == CH_EOF) {
582 error("unexpected end of file in comment");
583 } else if (c == '\\') {
584 p++;
588 end_of_comment:
589 p++;
590 return p;
593 #define cinp minp
595 static inline void skip_spaces(void)
597 while (is_space(ch))
598 cinp();
601 static inline int check_space(int t, int *spc)
603 if (is_space(t)) {
604 if (*spc)
605 return 1;
606 *spc = 1;
607 } else
608 *spc = 0;
609 return 0;
612 /* parse a string without interpreting escapes */
613 static uint8_t *parse_pp_string(uint8_t *p,
614 int sep, CString *str)
616 int c;
617 p++;
618 for(;;) {
619 c = *p;
620 if (c == sep) {
621 break;
622 } else if (c == '\\') {
623 file->buf_ptr = p;
624 c = handle_eob();
625 p = file->buf_ptr;
626 if (c == CH_EOF) {
627 unterminated_string:
628 /* XXX: indicate line number of start of string */
629 error("missing terminating %c character", sep);
630 } else if (c == '\\') {
631 /* escape : just skip \[\r]\n */
632 PEEKC_EOB(c, p);
633 if (c == '\n') {
634 file->line_num++;
635 p++;
636 } else if (c == '\r') {
637 PEEKC_EOB(c, p);
638 if (c != '\n')
639 expect("'\n' after '\r'");
640 file->line_num++;
641 p++;
642 } else if (c == CH_EOF) {
643 goto unterminated_string;
644 } else {
645 if (str) {
646 cstr_ccat(str, '\\');
647 cstr_ccat(str, c);
649 p++;
652 } else if (c == '\n') {
653 file->line_num++;
654 goto add_char;
655 } else if (c == '\r') {
656 PEEKC_EOB(c, p);
657 if (c != '\n') {
658 if (str)
659 cstr_ccat(str, '\r');
660 } else {
661 file->line_num++;
662 goto add_char;
664 } else {
665 add_char:
666 if (str)
667 cstr_ccat(str, c);
668 p++;
671 p++;
672 return p;
675 /* skip block of text until #else, #elif or #endif. skip also pairs of
676 #if/#endif */
677 static void preprocess_skip(void)
679 int a, start_of_line, c, in_warn_or_error;
680 uint8_t *p;
682 p = file->buf_ptr;
683 a = 0;
684 redo_start:
685 start_of_line = 1;
686 in_warn_or_error = 0;
687 for(;;) {
688 redo_no_start:
689 c = *p;
690 switch(c) {
691 case ' ':
692 case '\t':
693 case '\f':
694 case '\v':
695 case '\r':
696 p++;
697 goto redo_no_start;
698 case '\n':
699 file->line_num++;
700 p++;
701 goto redo_start;
702 case '\\':
703 file->buf_ptr = p;
704 c = handle_eob();
705 if (c == CH_EOF) {
706 expect("#endif");
707 } else if (c == '\\') {
708 ch = file->buf_ptr[0];
709 handle_stray_noerror();
711 p = file->buf_ptr;
712 goto redo_no_start;
713 /* skip strings */
714 case '\"':
715 case '\'':
716 if (in_warn_or_error)
717 goto _default;
718 p = parse_pp_string(p, c, NULL);
719 break;
720 /* skip comments */
721 case '/':
722 if (in_warn_or_error)
723 goto _default;
724 file->buf_ptr = p;
725 ch = *p;
726 minp();
727 p = file->buf_ptr;
728 if (ch == '*') {
729 p = parse_comment(p);
730 } else if (ch == '/') {
731 p = parse_line_comment(p);
733 break;
734 case '#':
735 p++;
736 if (start_of_line) {
737 file->buf_ptr = p;
738 next_nomacro();
739 p = file->buf_ptr;
740 if (a == 0 &&
741 (tok == TOK_ELSE || tok == TOK_ELIF || tok == TOK_ENDIF))
742 goto the_end;
743 if (tok == TOK_IF || tok == TOK_IFDEF || tok == TOK_IFNDEF)
744 a++;
745 else if (tok == TOK_ENDIF)
746 a--;
747 else if( tok == TOK_ERROR || tok == TOK_WARNING)
748 in_warn_or_error = 1;
749 else if (tok == TOK_LINEFEED)
750 goto redo_start;
752 break;
753 _default:
754 default:
755 p++;
756 break;
758 start_of_line = 0;
760 the_end: ;
761 file->buf_ptr = p;
764 /* ParseState handling */
766 /* XXX: currently, no include file info is stored. Thus, we cannot display
767 accurate messages if the function or data definition spans multiple
768 files */
770 /* save current parse state in 's' */
771 ST_FUNC void save_parse_state(ParseState *s)
773 s->line_num = file->line_num;
774 s->macro_ptr = macro_ptr;
775 s->tok = tok;
776 s->tokc = tokc;
779 /* restore parse state from 's' */
780 ST_FUNC void restore_parse_state(ParseState *s)
782 file->line_num = s->line_num;
783 macro_ptr = s->macro_ptr;
784 tok = s->tok;
785 tokc = s->tokc;
788 /* return the number of additional 'ints' necessary to store the
789 token */
790 static inline int tok_ext_size(int t)
792 switch(t) {
793 /* 4 bytes */
794 case TOK_CINT:
795 case TOK_CUINT:
796 case TOK_CCHAR:
797 case TOK_LCHAR:
798 case TOK_CFLOAT:
799 case TOK_LINENUM:
800 return 1;
801 case TOK_STR:
802 case TOK_LSTR:
803 case TOK_PPNUM:
804 error("unsupported token");
805 return 1;
806 case TOK_CDOUBLE:
807 case TOK_CLLONG:
808 case TOK_CULLONG:
809 return 2;
810 case TOK_CLDOUBLE:
811 return LDOUBLE_SIZE / 4;
812 default:
813 return 0;
817 /* token string handling */
819 ST_INLN void tok_str_new(TokenString *s)
821 s->str = NULL;
822 s->len = 0;
823 s->allocated_len = 0;
824 s->last_line_num = -1;
827 ST_FUNC void tok_str_free(int *str)
829 tcc_free(str);
832 static int *tok_str_realloc(TokenString *s)
834 int *str, len;
836 if (s->allocated_len == 0) {
837 len = 8;
838 } else {
839 len = s->allocated_len * 2;
841 str = tcc_realloc(s->str, len * sizeof(int));
842 if (!str)
843 error("memory full");
844 s->allocated_len = len;
845 s->str = str;
846 return str;
849 ST_FUNC void tok_str_add(TokenString *s, int t)
851 int len, *str;
853 len = s->len;
854 str = s->str;
855 if (len >= s->allocated_len)
856 str = tok_str_realloc(s);
857 str[len++] = t;
858 s->len = len;
861 static void tok_str_add2(TokenString *s, int t, CValue *cv)
863 int len, *str;
865 len = s->len;
866 str = s->str;
868 /* allocate space for worst case */
869 if (len + TOK_MAX_SIZE > s->allocated_len)
870 str = tok_str_realloc(s);
871 str[len++] = t;
872 switch(t) {
873 case TOK_CINT:
874 case TOK_CUINT:
875 case TOK_CCHAR:
876 case TOK_LCHAR:
877 case TOK_CFLOAT:
878 case TOK_LINENUM:
879 str[len++] = cv->tab[0];
880 break;
881 case TOK_PPNUM:
882 case TOK_STR:
883 case TOK_LSTR:
885 int nb_words;
886 CString *cstr;
888 nb_words = (sizeof(CString) + cv->cstr->size + 3) >> 2;
889 while ((len + nb_words) > s->allocated_len)
890 str = tok_str_realloc(s);
891 cstr = (CString *)(str + len);
892 cstr->data = NULL;
893 cstr->size = cv->cstr->size;
894 cstr->data_allocated = NULL;
895 cstr->size_allocated = cstr->size;
896 memcpy((char *)cstr + sizeof(CString),
897 cv->cstr->data, cstr->size);
898 len += nb_words;
900 break;
901 case TOK_CDOUBLE:
902 case TOK_CLLONG:
903 case TOK_CULLONG:
904 #if LDOUBLE_SIZE == 8
905 case TOK_CLDOUBLE:
906 #endif
907 str[len++] = cv->tab[0];
908 str[len++] = cv->tab[1];
909 break;
910 #if LDOUBLE_SIZE == 12
911 case TOK_CLDOUBLE:
912 str[len++] = cv->tab[0];
913 str[len++] = cv->tab[1];
914 str[len++] = cv->tab[2];
915 #elif LDOUBLE_SIZE == 16
916 case TOK_CLDOUBLE:
917 str[len++] = cv->tab[0];
918 str[len++] = cv->tab[1];
919 str[len++] = cv->tab[2];
920 str[len++] = cv->tab[3];
921 #elif LDOUBLE_SIZE != 8
922 #error add long double size support
923 #endif
924 break;
925 default:
926 break;
928 s->len = len;
931 /* add the current parse token in token string 's' */
932 ST_FUNC void tok_str_add_tok(TokenString *s)
934 CValue cval;
936 /* save line number info */
937 if (file->line_num != s->last_line_num) {
938 s->last_line_num = file->line_num;
939 cval.i = s->last_line_num;
940 tok_str_add2(s, TOK_LINENUM, &cval);
942 tok_str_add2(s, tok, &tokc);
945 /* get a token from an integer array and increment pointer
946 accordingly. we code it as a macro to avoid pointer aliasing. */
947 static inline void TOK_GET(int *t, const int **pp, CValue *cv)
949 const int *p = *pp;
950 int n, *tab;
952 tab = cv->tab;
953 switch(*t = *p++) {
954 case TOK_CINT:
955 case TOK_CUINT:
956 case TOK_CCHAR:
957 case TOK_LCHAR:
958 case TOK_CFLOAT:
959 case TOK_LINENUM:
960 tab[0] = *p++;
961 break;
962 case TOK_STR:
963 case TOK_LSTR:
964 case TOK_PPNUM:
965 cv->cstr = (CString *)p;
966 cv->cstr->data = (char *)p + sizeof(CString);
967 p += (sizeof(CString) + cv->cstr->size + 3) >> 2;
968 break;
969 case TOK_CDOUBLE:
970 case TOK_CLLONG:
971 case TOK_CULLONG:
972 n = 2;
973 goto copy;
974 case TOK_CLDOUBLE:
975 #if LDOUBLE_SIZE == 16
976 n = 4;
977 #elif LDOUBLE_SIZE == 12
978 n = 3;
979 #elif LDOUBLE_SIZE == 8
980 n = 2;
981 #else
982 # error add long double size support
983 #endif
984 copy:
986 *tab++ = *p++;
987 while (--n);
988 break;
989 default:
990 break;
992 *pp = p;
995 static int macro_is_equal(const int *a, const int *b)
997 char buf[STRING_MAX_SIZE + 1];
998 CValue cv;
999 int t;
1000 while (*a && *b) {
1001 TOK_GET(&t, &a, &cv);
1002 pstrcpy(buf, sizeof buf, get_tok_str(t, &cv));
1003 TOK_GET(&t, &b, &cv);
1004 if (strcmp(buf, get_tok_str(t, &cv)))
1005 return 0;
1007 return !(*a || *b);
1010 /* defines handling */
1011 ST_INLN void define_push(int v, int macro_type, int *str, Sym *first_arg)
1013 Sym *s;
1015 s = define_find(v);
1016 if (s && !macro_is_equal(s->d, str))
1017 warning("%s redefined", get_tok_str(v, NULL));
1019 s = sym_push2(&define_stack, v, macro_type, 0);
1020 s->d = str;
1021 s->next = first_arg;
1022 table_ident[v - TOK_IDENT]->sym_define = s;
1025 /* undefined a define symbol. Its name is just set to zero */
1026 ST_FUNC void define_undef(Sym *s)
1028 int v;
1029 v = s->v;
1030 if (v >= TOK_IDENT && v < tok_ident)
1031 table_ident[v - TOK_IDENT]->sym_define = NULL;
1032 s->v = 0;
1035 ST_INLN Sym *define_find(int v)
1037 v -= TOK_IDENT;
1038 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1039 return NULL;
1040 return table_ident[v]->sym_define;
1043 /* free define stack until top reaches 'b' */
1044 ST_FUNC void free_defines(Sym *b)
1046 Sym *top, *top1;
1047 int v;
1049 top = define_stack;
1050 while (top != b) {
1051 top1 = top->prev;
1052 /* do not free args or predefined defines */
1053 if (top->d)
1054 tok_str_free(top->d);
1055 v = top->v;
1056 if (v >= TOK_IDENT && v < tok_ident)
1057 table_ident[v - TOK_IDENT]->sym_define = NULL;
1058 sym_free(top);
1059 top = top1;
1061 define_stack = b;
1064 /* label lookup */
1065 ST_FUNC Sym *label_find(int v)
1067 v -= TOK_IDENT;
1068 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1069 return NULL;
1070 return table_ident[v]->sym_label;
1073 ST_FUNC Sym *label_push(Sym **ptop, int v, int flags)
1075 Sym *s, **ps;
1076 s = sym_push2(ptop, v, 0, 0);
1077 s->r = flags;
1078 ps = &table_ident[v - TOK_IDENT]->sym_label;
1079 if (ptop == &global_label_stack) {
1080 /* modify the top most local identifier, so that
1081 sym_identifier will point to 's' when popped */
1082 while (*ps != NULL)
1083 ps = &(*ps)->prev_tok;
1085 s->prev_tok = *ps;
1086 *ps = s;
1087 return s;
1090 /* pop labels until element last is reached. Look if any labels are
1091 undefined. Define symbols if '&&label' was used. */
1092 ST_FUNC void label_pop(Sym **ptop, Sym *slast)
1094 Sym *s, *s1;
1095 for(s = *ptop; s != slast; s = s1) {
1096 s1 = s->prev;
1097 if (s->r == LABEL_DECLARED) {
1098 warning("label '%s' declared but not used", get_tok_str(s->v, NULL));
1099 } else if (s->r == LABEL_FORWARD) {
1100 error("label '%s' used but not defined",
1101 get_tok_str(s->v, NULL));
1102 } else {
1103 if (s->c) {
1104 /* define corresponding symbol. A size of
1105 1 is put. */
1106 put_extern_sym(s, cur_text_section, s->jnext, 1);
1109 /* remove label */
1110 table_ident[s->v - TOK_IDENT]->sym_label = s->prev_tok;
1111 sym_free(s);
1113 *ptop = slast;
1116 /* eval an expression for #if/#elif */
1117 static int expr_preprocess(void)
1119 int c, t;
1120 TokenString str;
1122 tok_str_new(&str);
1123 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1124 next(); /* do macro subst */
1125 if (tok == TOK_DEFINED) {
1126 next_nomacro();
1127 t = tok;
1128 if (t == '(')
1129 next_nomacro();
1130 c = define_find(tok) != 0;
1131 if (t == '(')
1132 next_nomacro();
1133 tok = TOK_CINT;
1134 tokc.i = c;
1135 } else if (tok >= TOK_IDENT) {
1136 /* if undefined macro */
1137 tok = TOK_CINT;
1138 tokc.i = 0;
1140 tok_str_add_tok(&str);
1142 tok_str_add(&str, -1); /* simulate end of file */
1143 tok_str_add(&str, 0);
1144 /* now evaluate C constant expression */
1145 macro_ptr = str.str;
1146 next();
1147 c = expr_const();
1148 macro_ptr = NULL;
1149 tok_str_free(str.str);
1150 return c != 0;
1153 #if defined(PARSE_DEBUG) || defined(PP_DEBUG)
1154 static void tok_print(int *str)
1156 int t;
1157 CValue cval;
1159 printf("<");
1160 while (1) {
1161 TOK_GET(&t, &str, &cval);
1162 if (!t)
1163 break;
1164 printf("%s", get_tok_str(t, &cval));
1166 printf(">\n");
1168 #endif
1170 /* parse after #define */
1171 ST_FUNC void parse_define(void)
1173 Sym *s, *first, **ps;
1174 int v, t, varg, is_vaargs, spc;
1175 TokenString str;
1177 v = tok;
1178 if (v < TOK_IDENT)
1179 error("invalid macro name '%s'", get_tok_str(tok, &tokc));
1180 /* XXX: should check if same macro (ANSI) */
1181 first = NULL;
1182 t = MACRO_OBJ;
1183 /* '(' must be just after macro definition for MACRO_FUNC */
1184 next_nomacro_spc();
1185 if (tok == '(') {
1186 next_nomacro();
1187 ps = &first;
1188 while (tok != ')') {
1189 varg = tok;
1190 next_nomacro();
1191 is_vaargs = 0;
1192 if (varg == TOK_DOTS) {
1193 varg = TOK___VA_ARGS__;
1194 is_vaargs = 1;
1195 } else if (tok == TOK_DOTS && gnu_ext) {
1196 is_vaargs = 1;
1197 next_nomacro();
1199 if (varg < TOK_IDENT)
1200 error("badly punctuated parameter list");
1201 s = sym_push2(&define_stack, varg | SYM_FIELD, is_vaargs, 0);
1202 *ps = s;
1203 ps = &s->next;
1204 if (tok != ',')
1205 break;
1206 next_nomacro();
1208 if (tok == ')')
1209 next_nomacro_spc();
1210 t = MACRO_FUNC;
1212 tok_str_new(&str);
1213 spc = 2;
1214 /* EOF testing necessary for '-D' handling */
1215 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1216 /* remove spaces around ## and after '#' */
1217 if (TOK_TWOSHARPS == tok) {
1218 if (1 == spc)
1219 --str.len;
1220 spc = 2;
1221 } else if ('#' == tok) {
1222 spc = 2;
1223 } else if (check_space(tok, &spc)) {
1224 goto skip;
1226 tok_str_add2(&str, tok, &tokc);
1227 skip:
1228 next_nomacro_spc();
1230 if (spc == 1)
1231 --str.len; /* remove trailing space */
1232 tok_str_add(&str, 0);
1233 #ifdef PP_DEBUG
1234 printf("define %s %d: ", get_tok_str(v, NULL), t);
1235 tok_print(str.str);
1236 #endif
1237 define_push(v, t, str.str, first);
1240 static inline int hash_cached_include(int type, const char *filename)
1242 const unsigned char *s;
1243 unsigned int h;
1245 h = TOK_HASH_INIT;
1246 h = TOK_HASH_FUNC(h, type);
1247 s = filename;
1248 while (*s) {
1249 h = TOK_HASH_FUNC(h, *s);
1250 s++;
1252 h &= (CACHED_INCLUDES_HASH_SIZE - 1);
1253 return h;
1256 /* XXX: use a token or a hash table to accelerate matching ? */
1257 static CachedInclude *search_cached_include(TCCState *s1,
1258 int type, const char *filename)
1260 CachedInclude *e;
1261 int i, h;
1262 h = hash_cached_include(type, filename);
1263 i = s1->cached_includes_hash[h];
1264 for(;;) {
1265 if (i == 0)
1266 break;
1267 e = s1->cached_includes[i - 1];
1268 if (e->type == type && !PATHCMP(e->filename, filename))
1269 return e;
1270 i = e->hash_next;
1272 return NULL;
1275 static inline void add_cached_include(TCCState *s1, int type,
1276 const char *filename, int ifndef_macro)
1278 CachedInclude *e;
1279 int h;
1281 if (search_cached_include(s1, type, filename))
1282 return;
1283 #ifdef INC_DEBUG
1284 printf("adding cached '%s' %s\n", filename, get_tok_str(ifndef_macro, NULL));
1285 #endif
1286 e = tcc_malloc(sizeof(CachedInclude) + strlen(filename));
1287 if (!e)
1288 return;
1289 e->type = type;
1290 strcpy(e->filename, filename);
1291 e->ifndef_macro = ifndef_macro;
1292 dynarray_add((void ***)&s1->cached_includes, &s1->nb_cached_includes, e);
1293 /* add in hash table */
1294 h = hash_cached_include(type, filename);
1295 e->hash_next = s1->cached_includes_hash[h];
1296 s1->cached_includes_hash[h] = s1->nb_cached_includes;
1299 static void pragma_parse(TCCState *s1)
1301 int val;
1303 next();
1304 if (tok == TOK_pack) {
1306 This may be:
1307 #pragma pack(1) // set
1308 #pragma pack() // reset to default
1309 #pragma pack(push,1) // push & set
1310 #pragma pack(pop) // restore previous
1312 next();
1313 skip('(');
1314 if (tok == TOK_ASM_pop) {
1315 next();
1316 if (s1->pack_stack_ptr <= s1->pack_stack) {
1317 stk_error:
1318 error("out of pack stack");
1320 s1->pack_stack_ptr--;
1321 } else {
1322 val = 0;
1323 if (tok != ')') {
1324 if (tok == TOK_ASM_push) {
1325 next();
1326 if (s1->pack_stack_ptr >= s1->pack_stack + PACK_STACK_SIZE - 1)
1327 goto stk_error;
1328 s1->pack_stack_ptr++;
1329 skip(',');
1331 if (tok != TOK_CINT) {
1332 pack_error:
1333 error("invalid pack pragma");
1335 val = tokc.i;
1336 if (val < 1 || val > 16 || (val & (val - 1)) != 0)
1337 goto pack_error;
1338 next();
1340 *s1->pack_stack_ptr = val;
1341 skip(')');
1346 /* is_bof is true if first non space token at beginning of file */
1347 ST_FUNC void preprocess(int is_bof)
1349 TCCState *s1 = tcc_state;
1350 int i, c, n, saved_parse_flags;
1351 char buf[1024], *q;
1352 Sym *s;
1354 saved_parse_flags = parse_flags;
1355 parse_flags = PARSE_FLAG_PREPROCESS | PARSE_FLAG_TOK_NUM |
1356 PARSE_FLAG_LINEFEED;
1357 next_nomacro();
1358 redo:
1359 switch(tok) {
1360 case TOK_DEFINE:
1361 next_nomacro();
1362 parse_define();
1363 break;
1364 case TOK_UNDEF:
1365 next_nomacro();
1366 s = define_find(tok);
1367 /* undefine symbol by putting an invalid name */
1368 if (s)
1369 define_undef(s);
1370 break;
1371 case TOK_INCLUDE:
1372 case TOK_INCLUDE_NEXT:
1373 ch = file->buf_ptr[0];
1374 /* XXX: incorrect if comments : use next_nomacro with a special mode */
1375 skip_spaces();
1376 if (ch == '<') {
1377 c = '>';
1378 goto read_name;
1379 } else if (ch == '\"') {
1380 c = ch;
1381 read_name:
1382 inp();
1383 q = buf;
1384 while (ch != c && ch != '\n' && ch != CH_EOF) {
1385 if ((q - buf) < sizeof(buf) - 1)
1386 *q++ = ch;
1387 if (ch == '\\') {
1388 if (handle_stray_noerror() == 0)
1389 --q;
1390 } else
1391 inp();
1393 *q = '\0';
1394 minp();
1395 #if 0
1396 /* eat all spaces and comments after include */
1397 /* XXX: slightly incorrect */
1398 while (ch1 != '\n' && ch1 != CH_EOF)
1399 inp();
1400 #endif
1401 } else {
1402 /* computed #include : either we have only strings or
1403 we have anything enclosed in '<>' */
1404 next();
1405 buf[0] = '\0';
1406 if (tok == TOK_STR) {
1407 while (tok != TOK_LINEFEED) {
1408 if (tok != TOK_STR) {
1409 include_syntax:
1410 error("'#include' expects \"FILENAME\" or <FILENAME>");
1412 pstrcat(buf, sizeof(buf), (char *)tokc.cstr->data);
1413 next();
1415 c = '\"';
1416 } else {
1417 int len;
1418 while (tok != TOK_LINEFEED) {
1419 pstrcat(buf, sizeof(buf), get_tok_str(tok, &tokc));
1420 next();
1422 len = strlen(buf);
1423 /* check syntax and remove '<>' */
1424 if (len < 2 || buf[0] != '<' || buf[len - 1] != '>')
1425 goto include_syntax;
1426 memmove(buf, buf + 1, len - 2);
1427 buf[len - 2] = '\0';
1428 c = '>';
1432 if (s1->include_stack_ptr >= s1->include_stack + INCLUDE_STACK_SIZE)
1433 error("#include recursion too deep");
1435 n = s1->nb_include_paths + s1->nb_sysinclude_paths;
1436 for (i = -2; i < n; ++i) {
1437 char buf1[sizeof file->filename];
1438 CachedInclude *e;
1439 const char *path;
1440 int size, fd;
1442 if (i == -2) {
1443 /* check absolute include path */
1444 if (!IS_ABSPATH(buf))
1445 continue;
1446 buf1[0] = 0;
1448 } else if (i == -1) {
1449 /* search in current dir if "header.h" */
1450 if (c != '\"')
1451 continue;
1452 size = tcc_basename(file->filename) - file->filename;
1453 memcpy(buf1, file->filename, size);
1454 buf1[size] = '\0';
1456 } else {
1457 /* search in all the include paths */
1458 if (i < s1->nb_include_paths)
1459 path = s1->include_paths[i];
1460 else
1461 path = s1->sysinclude_paths[i - s1->nb_include_paths];
1462 pstrcpy(buf1, sizeof(buf1), path);
1463 pstrcat(buf1, sizeof(buf1), "/");
1466 pstrcat(buf1, sizeof(buf1), buf);
1468 e = search_cached_include(s1, c, buf1);
1469 if (e && define_find(e->ifndef_macro)) {
1470 /* no need to parse the include because the 'ifndef macro'
1471 is defined */
1472 #ifdef INC_DEBUG
1473 printf("%s: skipping %s\n", file->filename, buf);
1474 #endif
1475 fd = 0;
1476 } else {
1477 fd = tcc_open(s1, buf1);
1478 if (fd < 0)
1479 continue;
1482 if (tok == TOK_INCLUDE_NEXT) {
1483 tok = TOK_INCLUDE;
1484 if (fd)
1485 tcc_close();
1486 continue;
1489 if (0 == fd)
1490 goto include_done;
1492 #ifdef INC_DEBUG
1493 printf("%s: including %s\n", file->filename, buf1);
1494 #endif
1495 /* update target deps */
1496 dynarray_add((void ***)&s1->target_deps, &s1->nb_target_deps,
1497 tcc_strdup(buf1));
1498 /* XXX: fix current line init */
1499 /* push current file in stack */
1500 *s1->include_stack_ptr++ = file->prev;
1501 file->inc_type = c;
1502 pstrcpy(file->inc_filename, sizeof(file->inc_filename), buf1);
1503 /* add include file debug info */
1504 if (s1->do_debug)
1505 put_stabs(file->filename, N_BINCL, 0, 0, 0);
1506 tok_flags |= TOK_FLAG_BOF | TOK_FLAG_BOL;
1507 ch = file->buf_ptr[0];
1508 goto the_end;
1510 error("include file '%s' not found", buf);
1511 include_done:
1512 break;
1513 case TOK_IFNDEF:
1514 c = 1;
1515 goto do_ifdef;
1516 case TOK_IF:
1517 c = expr_preprocess();
1518 goto do_if;
1519 case TOK_IFDEF:
1520 c = 0;
1521 do_ifdef:
1522 next_nomacro();
1523 if (tok < TOK_IDENT)
1524 error("invalid argument for '#if%sdef'", c ? "n" : "");
1525 if (is_bof) {
1526 if (c) {
1527 #ifdef INC_DEBUG
1528 printf("#ifndef %s\n", get_tok_str(tok, NULL));
1529 #endif
1530 file->ifndef_macro = tok;
1533 c = (define_find(tok) != 0) ^ c;
1534 do_if:
1535 if (s1->ifdef_stack_ptr >= s1->ifdef_stack + IFDEF_STACK_SIZE)
1536 error("memory full");
1537 *s1->ifdef_stack_ptr++ = c;
1538 goto test_skip;
1539 case TOK_ELSE:
1540 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1541 error("#else without matching #if");
1542 if (s1->ifdef_stack_ptr[-1] & 2)
1543 error("#else after #else");
1544 c = (s1->ifdef_stack_ptr[-1] ^= 3);
1545 goto test_else;
1546 case TOK_ELIF:
1547 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1548 error("#elif without matching #if");
1549 c = s1->ifdef_stack_ptr[-1];
1550 if (c > 1)
1551 error("#elif after #else");
1552 /* last #if/#elif expression was true: we skip */
1553 if (c == 1)
1554 goto skip;
1555 c = expr_preprocess();
1556 s1->ifdef_stack_ptr[-1] = c;
1557 test_else:
1558 if (s1->ifdef_stack_ptr == file->ifdef_stack_ptr + 1)
1559 file->ifndef_macro = 0;
1560 test_skip:
1561 if (!(c & 1)) {
1562 skip:
1563 preprocess_skip();
1564 is_bof = 0;
1565 goto redo;
1567 break;
1568 case TOK_ENDIF:
1569 if (s1->ifdef_stack_ptr <= file->ifdef_stack_ptr)
1570 error("#endif without matching #if");
1571 s1->ifdef_stack_ptr--;
1572 /* '#ifndef macro' was at the start of file. Now we check if
1573 an '#endif' is exactly at the end of file */
1574 if (file->ifndef_macro &&
1575 s1->ifdef_stack_ptr == file->ifdef_stack_ptr) {
1576 file->ifndef_macro_saved = file->ifndef_macro;
1577 /* need to set to zero to avoid false matches if another
1578 #ifndef at middle of file */
1579 file->ifndef_macro = 0;
1580 while (tok != TOK_LINEFEED)
1581 next_nomacro();
1582 tok_flags |= TOK_FLAG_ENDIF;
1583 goto the_end;
1585 break;
1586 case TOK_LINE:
1587 next();
1588 if (tok != TOK_CINT)
1589 error("#line");
1590 file->line_num = tokc.i - 1; /* the line number will be incremented after */
1591 next();
1592 if (tok != TOK_LINEFEED) {
1593 if (tok != TOK_STR)
1594 error("#line");
1595 pstrcpy(file->filename, sizeof(file->filename),
1596 (char *)tokc.cstr->data);
1598 break;
1599 case TOK_ERROR:
1600 case TOK_WARNING:
1601 c = tok;
1602 ch = file->buf_ptr[0];
1603 skip_spaces();
1604 q = buf;
1605 while (ch != '\n' && ch != CH_EOF) {
1606 if ((q - buf) < sizeof(buf) - 1)
1607 *q++ = ch;
1608 if (ch == '\\') {
1609 if (handle_stray_noerror() == 0)
1610 --q;
1611 } else
1612 inp();
1614 *q = '\0';
1615 if (c == TOK_ERROR)
1616 error("#error %s", buf);
1617 else
1618 warning("#warning %s", buf);
1619 break;
1620 case TOK_PRAGMA:
1621 pragma_parse(s1);
1622 break;
1623 default:
1624 if (tok == TOK_LINEFEED || tok == '!' || tok == TOK_PPNUM) {
1625 /* '!' is ignored to allow C scripts. numbers are ignored
1626 to emulate cpp behaviour */
1627 } else {
1628 if (!(saved_parse_flags & PARSE_FLAG_ASM_COMMENTS))
1629 warning("Ignoring unknown preprocessing directive #%s", get_tok_str(tok, &tokc));
1630 else {
1631 /* this is a gas line comment in an 'S' file. */
1632 file->buf_ptr = parse_line_comment(file->buf_ptr);
1633 goto the_end;
1636 break;
1638 /* ignore other preprocess commands or #! for C scripts */
1639 while (tok != TOK_LINEFEED)
1640 next_nomacro();
1641 the_end:
1642 parse_flags = saved_parse_flags;
1645 /* evaluate escape codes in a string. */
1646 static void parse_escape_string(CString *outstr, const uint8_t *buf, int is_long)
1648 int c, n;
1649 const uint8_t *p;
1651 p = buf;
1652 for(;;) {
1653 c = *p;
1654 if (c == '\0')
1655 break;
1656 if (c == '\\') {
1657 p++;
1658 /* escape */
1659 c = *p;
1660 switch(c) {
1661 case '0': case '1': case '2': case '3':
1662 case '4': case '5': case '6': case '7':
1663 /* at most three octal digits */
1664 n = c - '0';
1665 p++;
1666 c = *p;
1667 if (isoct(c)) {
1668 n = n * 8 + c - '0';
1669 p++;
1670 c = *p;
1671 if (isoct(c)) {
1672 n = n * 8 + c - '0';
1673 p++;
1676 c = n;
1677 goto add_char_nonext;
1678 case 'x':
1679 case 'u':
1680 case 'U':
1681 p++;
1682 n = 0;
1683 for(;;) {
1684 c = *p;
1685 if (c >= 'a' && c <= 'f')
1686 c = c - 'a' + 10;
1687 else if (c >= 'A' && c <= 'F')
1688 c = c - 'A' + 10;
1689 else if (isnum(c))
1690 c = c - '0';
1691 else
1692 break;
1693 n = n * 16 + c;
1694 p++;
1696 c = n;
1697 goto add_char_nonext;
1698 case 'a':
1699 c = '\a';
1700 break;
1701 case 'b':
1702 c = '\b';
1703 break;
1704 case 'f':
1705 c = '\f';
1706 break;
1707 case 'n':
1708 c = '\n';
1709 break;
1710 case 'r':
1711 c = '\r';
1712 break;
1713 case 't':
1714 c = '\t';
1715 break;
1716 case 'v':
1717 c = '\v';
1718 break;
1719 case 'e':
1720 if (!gnu_ext)
1721 goto invalid_escape;
1722 c = 27;
1723 break;
1724 case '\'':
1725 case '\"':
1726 case '\\':
1727 case '?':
1728 break;
1729 default:
1730 invalid_escape:
1731 if (c >= '!' && c <= '~')
1732 warning("unknown escape sequence: \'\\%c\'", c);
1733 else
1734 warning("unknown escape sequence: \'\\x%x\'", c);
1735 break;
1738 p++;
1739 add_char_nonext:
1740 if (!is_long)
1741 cstr_ccat(outstr, c);
1742 else
1743 cstr_wccat(outstr, c);
1745 /* add a trailing '\0' */
1746 if (!is_long)
1747 cstr_ccat(outstr, '\0');
1748 else
1749 cstr_wccat(outstr, '\0');
1752 /* we use 64 bit numbers */
1753 #define BN_SIZE 2
1755 /* bn = (bn << shift) | or_val */
1756 static void bn_lshift(unsigned int *bn, int shift, int or_val)
1758 int i;
1759 unsigned int v;
1760 for(i=0;i<BN_SIZE;i++) {
1761 v = bn[i];
1762 bn[i] = (v << shift) | or_val;
1763 or_val = v >> (32 - shift);
1767 static void bn_zero(unsigned int *bn)
1769 int i;
1770 for(i=0;i<BN_SIZE;i++) {
1771 bn[i] = 0;
1775 /* parse number in null terminated string 'p' and return it in the
1776 current token */
1777 static void parse_number(const char *p)
1779 int b, t, shift, frac_bits, s, exp_val, ch;
1780 char *q;
1781 unsigned int bn[BN_SIZE];
1782 double d;
1784 /* number */
1785 q = token_buf;
1786 ch = *p++;
1787 t = ch;
1788 ch = *p++;
1789 *q++ = t;
1790 b = 10;
1791 if (t == '.') {
1792 goto float_frac_parse;
1793 } else if (t == '0') {
1794 if (ch == 'x' || ch == 'X') {
1795 q--;
1796 ch = *p++;
1797 b = 16;
1798 } else if (tcc_ext && (ch == 'b' || ch == 'B')) {
1799 q--;
1800 ch = *p++;
1801 b = 2;
1804 /* parse all digits. cannot check octal numbers at this stage
1805 because of floating point constants */
1806 while (1) {
1807 if (ch >= 'a' && ch <= 'f')
1808 t = ch - 'a' + 10;
1809 else if (ch >= 'A' && ch <= 'F')
1810 t = ch - 'A' + 10;
1811 else if (isnum(ch))
1812 t = ch - '0';
1813 else
1814 break;
1815 if (t >= b)
1816 break;
1817 if (q >= token_buf + STRING_MAX_SIZE) {
1818 num_too_long:
1819 error("number too long");
1821 *q++ = ch;
1822 ch = *p++;
1824 if (ch == '.' ||
1825 ((ch == 'e' || ch == 'E') && b == 10) ||
1826 ((ch == 'p' || ch == 'P') && (b == 16 || b == 2))) {
1827 if (b != 10) {
1828 /* NOTE: strtox should support that for hexa numbers, but
1829 non ISOC99 libcs do not support it, so we prefer to do
1830 it by hand */
1831 /* hexadecimal or binary floats */
1832 /* XXX: handle overflows */
1833 *q = '\0';
1834 if (b == 16)
1835 shift = 4;
1836 else
1837 shift = 2;
1838 bn_zero(bn);
1839 q = token_buf;
1840 while (1) {
1841 t = *q++;
1842 if (t == '\0') {
1843 break;
1844 } else if (t >= 'a') {
1845 t = t - 'a' + 10;
1846 } else if (t >= 'A') {
1847 t = t - 'A' + 10;
1848 } else {
1849 t = t - '0';
1851 bn_lshift(bn, shift, t);
1853 frac_bits = 0;
1854 if (ch == '.') {
1855 ch = *p++;
1856 while (1) {
1857 t = ch;
1858 if (t >= 'a' && t <= 'f') {
1859 t = t - 'a' + 10;
1860 } else if (t >= 'A' && t <= 'F') {
1861 t = t - 'A' + 10;
1862 } else if (t >= '0' && t <= '9') {
1863 t = t - '0';
1864 } else {
1865 break;
1867 if (t >= b)
1868 error("invalid digit");
1869 bn_lshift(bn, shift, t);
1870 frac_bits += shift;
1871 ch = *p++;
1874 if (ch != 'p' && ch != 'P')
1875 expect("exponent");
1876 ch = *p++;
1877 s = 1;
1878 exp_val = 0;
1879 if (ch == '+') {
1880 ch = *p++;
1881 } else if (ch == '-') {
1882 s = -1;
1883 ch = *p++;
1885 if (ch < '0' || ch > '9')
1886 expect("exponent digits");
1887 while (ch >= '0' && ch <= '9') {
1888 exp_val = exp_val * 10 + ch - '0';
1889 ch = *p++;
1891 exp_val = exp_val * s;
1893 /* now we can generate the number */
1894 /* XXX: should patch directly float number */
1895 d = (double)bn[1] * 4294967296.0 + (double)bn[0];
1896 d = ldexp(d, exp_val - frac_bits);
1897 t = toup(ch);
1898 if (t == 'F') {
1899 ch = *p++;
1900 tok = TOK_CFLOAT;
1901 /* float : should handle overflow */
1902 tokc.f = (float)d;
1903 } else if (t == 'L') {
1904 ch = *p++;
1905 #ifdef TCC_TARGET_PE
1906 tok = TOK_CDOUBLE;
1907 tokc.d = d;
1908 #else
1909 tok = TOK_CLDOUBLE;
1910 /* XXX: not large enough */
1911 tokc.ld = (long double)d;
1912 #endif
1913 } else {
1914 tok = TOK_CDOUBLE;
1915 tokc.d = d;
1917 } else {
1918 /* decimal floats */
1919 if (ch == '.') {
1920 if (q >= token_buf + STRING_MAX_SIZE)
1921 goto num_too_long;
1922 *q++ = ch;
1923 ch = *p++;
1924 float_frac_parse:
1925 while (ch >= '0' && ch <= '9') {
1926 if (q >= token_buf + STRING_MAX_SIZE)
1927 goto num_too_long;
1928 *q++ = ch;
1929 ch = *p++;
1932 if (ch == 'e' || ch == 'E') {
1933 if (q >= token_buf + STRING_MAX_SIZE)
1934 goto num_too_long;
1935 *q++ = ch;
1936 ch = *p++;
1937 if (ch == '-' || ch == '+') {
1938 if (q >= token_buf + STRING_MAX_SIZE)
1939 goto num_too_long;
1940 *q++ = ch;
1941 ch = *p++;
1943 if (ch < '0' || ch > '9')
1944 expect("exponent digits");
1945 while (ch >= '0' && ch <= '9') {
1946 if (q >= token_buf + STRING_MAX_SIZE)
1947 goto num_too_long;
1948 *q++ = ch;
1949 ch = *p++;
1952 *q = '\0';
1953 t = toup(ch);
1954 errno = 0;
1955 if (t == 'F') {
1956 ch = *p++;
1957 tok = TOK_CFLOAT;
1958 tokc.f = strtof(token_buf, NULL);
1959 } else if (t == 'L') {
1960 ch = *p++;
1961 #ifdef TCC_TARGET_PE
1962 tok = TOK_CDOUBLE;
1963 tokc.d = strtod(token_buf, NULL);
1964 #else
1965 tok = TOK_CLDOUBLE;
1966 tokc.ld = strtold(token_buf, NULL);
1967 #endif
1968 } else {
1969 tok = TOK_CDOUBLE;
1970 tokc.d = strtod(token_buf, NULL);
1973 } else {
1974 unsigned long long n, n1;
1975 int lcount, ucount;
1977 /* integer number */
1978 *q = '\0';
1979 q = token_buf;
1980 if (b == 10 && *q == '0') {
1981 b = 8;
1982 q++;
1984 n = 0;
1985 while(1) {
1986 t = *q++;
1987 /* no need for checks except for base 10 / 8 errors */
1988 if (t == '\0') {
1989 break;
1990 } else if (t >= 'a') {
1991 t = t - 'a' + 10;
1992 } else if (t >= 'A') {
1993 t = t - 'A' + 10;
1994 } else {
1995 t = t - '0';
1996 if (t >= b)
1997 error("invalid digit");
1999 n1 = n;
2000 n = n * b + t;
2001 /* detect overflow */
2002 /* XXX: this test is not reliable */
2003 if (n < n1)
2004 error("integer constant overflow");
2007 /* XXX: not exactly ANSI compliant */
2008 if ((n & 0xffffffff00000000LL) != 0) {
2009 if ((n >> 63) != 0)
2010 tok = TOK_CULLONG;
2011 else
2012 tok = TOK_CLLONG;
2013 } else if (n > 0x7fffffff) {
2014 tok = TOK_CUINT;
2015 } else {
2016 tok = TOK_CINT;
2018 lcount = 0;
2019 ucount = 0;
2020 for(;;) {
2021 t = toup(ch);
2022 if (t == 'L') {
2023 if (lcount >= 2)
2024 error("three 'l's in integer constant");
2025 lcount++;
2026 if (lcount == 2) {
2027 if (tok == TOK_CINT)
2028 tok = TOK_CLLONG;
2029 else if (tok == TOK_CUINT)
2030 tok = TOK_CULLONG;
2032 ch = *p++;
2033 } else if (t == 'U') {
2034 if (ucount >= 1)
2035 error("two 'u's in integer constant");
2036 ucount++;
2037 if (tok == TOK_CINT)
2038 tok = TOK_CUINT;
2039 else if (tok == TOK_CLLONG)
2040 tok = TOK_CULLONG;
2041 ch = *p++;
2042 } else {
2043 break;
2046 if (tok == TOK_CINT || tok == TOK_CUINT)
2047 tokc.ui = n;
2048 else
2049 tokc.ull = n;
2051 if (ch)
2052 error("invalid number\n");
2056 #define PARSE2(c1, tok1, c2, tok2) \
2057 case c1: \
2058 PEEKC(c, p); \
2059 if (c == c2) { \
2060 p++; \
2061 tok = tok2; \
2062 } else { \
2063 tok = tok1; \
2065 break;
2067 /* return next token without macro substitution */
2068 static inline void next_nomacro1(void)
2070 int t, c, is_long;
2071 TokenSym *ts;
2072 uint8_t *p, *p1;
2073 unsigned int h;
2075 p = file->buf_ptr;
2076 redo_no_start:
2077 c = *p;
2078 switch(c) {
2079 case ' ':
2080 case '\t':
2081 tok = c;
2082 p++;
2083 goto keep_tok_flags;
2084 case '\f':
2085 case '\v':
2086 case '\r':
2087 p++;
2088 goto redo_no_start;
2089 case '\\':
2090 /* first look if it is in fact an end of buffer */
2091 if (p >= file->buf_end) {
2092 file->buf_ptr = p;
2093 handle_eob();
2094 p = file->buf_ptr;
2095 if (p >= file->buf_end)
2096 goto parse_eof;
2097 else
2098 goto redo_no_start;
2099 } else {
2100 file->buf_ptr = p;
2101 ch = *p;
2102 handle_stray();
2103 p = file->buf_ptr;
2104 goto redo_no_start;
2106 parse_eof:
2108 TCCState *s1 = tcc_state;
2109 if ((parse_flags & PARSE_FLAG_LINEFEED)
2110 && !(tok_flags & TOK_FLAG_EOF)) {
2111 tok_flags |= TOK_FLAG_EOF;
2112 tok = TOK_LINEFEED;
2113 goto keep_tok_flags;
2114 } else if (!(parse_flags & PARSE_FLAG_PREPROCESS)) {
2115 tok = TOK_EOF;
2116 } else if (s1->ifdef_stack_ptr != file->ifdef_stack_ptr) {
2117 error("missing #endif");
2118 } else if (s1->include_stack_ptr == s1->include_stack) {
2119 /* no include left : end of file. */
2120 tok = TOK_EOF;
2121 } else {
2122 tok_flags &= ~TOK_FLAG_EOF;
2123 /* pop include file */
2125 /* test if previous '#endif' was after a #ifdef at
2126 start of file */
2127 if (tok_flags & TOK_FLAG_ENDIF) {
2128 #ifdef INC_DEBUG
2129 printf("#endif %s\n", get_tok_str(file->ifndef_macro_saved, NULL));
2130 #endif
2131 add_cached_include(s1, file->inc_type, file->inc_filename,
2132 file->ifndef_macro_saved);
2135 /* add end of include file debug info */
2136 if (tcc_state->do_debug) {
2137 put_stabd(N_EINCL, 0, 0);
2139 /* pop include stack */
2140 tcc_close();
2141 s1->include_stack_ptr--;
2142 p = file->buf_ptr;
2143 goto redo_no_start;
2146 break;
2148 case '\n':
2149 file->line_num++;
2150 tok_flags |= TOK_FLAG_BOL;
2151 p++;
2152 maybe_newline:
2153 if (0 == (parse_flags & PARSE_FLAG_LINEFEED))
2154 goto redo_no_start;
2155 tok = TOK_LINEFEED;
2156 goto keep_tok_flags;
2158 case '#':
2159 /* XXX: simplify */
2160 PEEKC(c, p);
2161 if ((tok_flags & TOK_FLAG_BOL) &&
2162 (parse_flags & PARSE_FLAG_PREPROCESS)) {
2163 file->buf_ptr = p;
2164 preprocess(tok_flags & TOK_FLAG_BOF);
2165 p = file->buf_ptr;
2166 goto maybe_newline;
2167 } else {
2168 if (c == '#') {
2169 p++;
2170 tok = TOK_TWOSHARPS;
2171 } else {
2172 if (parse_flags & PARSE_FLAG_ASM_COMMENTS) {
2173 p = parse_line_comment(p - 1);
2174 goto redo_no_start;
2175 } else {
2176 tok = '#';
2180 break;
2182 case 'a': case 'b': case 'c': case 'd':
2183 case 'e': case 'f': case 'g': case 'h':
2184 case 'i': case 'j': case 'k': case 'l':
2185 case 'm': case 'n': case 'o': case 'p':
2186 case 'q': case 'r': case 's': case 't':
2187 case 'u': case 'v': case 'w': case 'x':
2188 case 'y': case 'z':
2189 case 'A': case 'B': case 'C': case 'D':
2190 case 'E': case 'F': case 'G': case 'H':
2191 case 'I': case 'J': case 'K':
2192 case 'M': case 'N': case 'O': case 'P':
2193 case 'Q': case 'R': case 'S': case 'T':
2194 case 'U': case 'V': case 'W': case 'X':
2195 case 'Y': case 'Z':
2196 case '_':
2197 parse_ident_fast:
2198 p1 = p;
2199 h = TOK_HASH_INIT;
2200 h = TOK_HASH_FUNC(h, c);
2201 p++;
2202 for(;;) {
2203 c = *p;
2204 if (!isidnum_table[c-CH_EOF])
2205 break;
2206 h = TOK_HASH_FUNC(h, c);
2207 p++;
2209 if (c != '\\') {
2210 TokenSym **pts;
2211 int len;
2213 /* fast case : no stray found, so we have the full token
2214 and we have already hashed it */
2215 len = p - p1;
2216 h &= (TOK_HASH_SIZE - 1);
2217 pts = &hash_ident[h];
2218 for(;;) {
2219 ts = *pts;
2220 if (!ts)
2221 break;
2222 if (ts->len == len && !memcmp(ts->str, p1, len))
2223 goto token_found;
2224 pts = &(ts->hash_next);
2226 ts = tok_alloc_new(pts, p1, len);
2227 token_found: ;
2228 } else {
2229 /* slower case */
2230 cstr_reset(&tokcstr);
2232 while (p1 < p) {
2233 cstr_ccat(&tokcstr, *p1);
2234 p1++;
2236 p--;
2237 PEEKC(c, p);
2238 parse_ident_slow:
2239 while (isidnum_table[c-CH_EOF]) {
2240 cstr_ccat(&tokcstr, c);
2241 PEEKC(c, p);
2243 ts = tok_alloc(tokcstr.data, tokcstr.size);
2245 tok = ts->tok;
2246 break;
2247 case 'L':
2248 t = p[1];
2249 if (t != '\\' && t != '\'' && t != '\"') {
2250 /* fast case */
2251 goto parse_ident_fast;
2252 } else {
2253 PEEKC(c, p);
2254 if (c == '\'' || c == '\"') {
2255 is_long = 1;
2256 goto str_const;
2257 } else {
2258 cstr_reset(&tokcstr);
2259 cstr_ccat(&tokcstr, 'L');
2260 goto parse_ident_slow;
2263 break;
2264 case '0': case '1': case '2': case '3':
2265 case '4': case '5': case '6': case '7':
2266 case '8': case '9':
2268 cstr_reset(&tokcstr);
2269 /* after the first digit, accept digits, alpha, '.' or sign if
2270 prefixed by 'eEpP' */
2271 parse_num:
2272 for(;;) {
2273 t = c;
2274 cstr_ccat(&tokcstr, c);
2275 PEEKC(c, p);
2276 if (!(isnum(c) || isid(c) || c == '.' ||
2277 ((c == '+' || c == '-') &&
2278 (t == 'e' || t == 'E' || t == 'p' || t == 'P'))))
2279 break;
2281 /* We add a trailing '\0' to ease parsing */
2282 cstr_ccat(&tokcstr, '\0');
2283 tokc.cstr = &tokcstr;
2284 tok = TOK_PPNUM;
2285 break;
2286 case '.':
2287 /* special dot handling because it can also start a number */
2288 PEEKC(c, p);
2289 if (isnum(c)) {
2290 cstr_reset(&tokcstr);
2291 cstr_ccat(&tokcstr, '.');
2292 goto parse_num;
2293 } else if (c == '.') {
2294 PEEKC(c, p);
2295 if (c != '.')
2296 expect("'.'");
2297 PEEKC(c, p);
2298 tok = TOK_DOTS;
2299 } else {
2300 tok = '.';
2302 break;
2303 case '\'':
2304 case '\"':
2305 is_long = 0;
2306 str_const:
2308 CString str;
2309 int sep;
2311 sep = c;
2313 /* parse the string */
2314 cstr_new(&str);
2315 p = parse_pp_string(p, sep, &str);
2316 cstr_ccat(&str, '\0');
2318 /* eval the escape (should be done as TOK_PPNUM) */
2319 cstr_reset(&tokcstr);
2320 parse_escape_string(&tokcstr, str.data, is_long);
2321 cstr_free(&str);
2323 if (sep == '\'') {
2324 int char_size;
2325 /* XXX: make it portable */
2326 if (!is_long)
2327 char_size = 1;
2328 else
2329 char_size = sizeof(nwchar_t);
2330 if (tokcstr.size <= char_size)
2331 error("empty character constant");
2332 if (tokcstr.size > 2 * char_size)
2333 warning("multi-character character constant");
2334 if (!is_long) {
2335 tokc.i = *(int8_t *)tokcstr.data;
2336 tok = TOK_CCHAR;
2337 } else {
2338 tokc.i = *(nwchar_t *)tokcstr.data;
2339 tok = TOK_LCHAR;
2341 } else {
2342 tokc.cstr = &tokcstr;
2343 if (!is_long)
2344 tok = TOK_STR;
2345 else
2346 tok = TOK_LSTR;
2349 break;
2351 case '<':
2352 PEEKC(c, p);
2353 if (c == '=') {
2354 p++;
2355 tok = TOK_LE;
2356 } else if (c == '<') {
2357 PEEKC(c, p);
2358 if (c == '=') {
2359 p++;
2360 tok = TOK_A_SHL;
2361 } else {
2362 tok = TOK_SHL;
2364 } else {
2365 tok = TOK_LT;
2367 break;
2369 case '>':
2370 PEEKC(c, p);
2371 if (c == '=') {
2372 p++;
2373 tok = TOK_GE;
2374 } else if (c == '>') {
2375 PEEKC(c, p);
2376 if (c == '=') {
2377 p++;
2378 tok = TOK_A_SAR;
2379 } else {
2380 tok = TOK_SAR;
2382 } else {
2383 tok = TOK_GT;
2385 break;
2387 case '&':
2388 PEEKC(c, p);
2389 if (c == '&') {
2390 p++;
2391 tok = TOK_LAND;
2392 } else if (c == '=') {
2393 p++;
2394 tok = TOK_A_AND;
2395 } else {
2396 tok = '&';
2398 break;
2400 case '|':
2401 PEEKC(c, p);
2402 if (c == '|') {
2403 p++;
2404 tok = TOK_LOR;
2405 } else if (c == '=') {
2406 p++;
2407 tok = TOK_A_OR;
2408 } else {
2409 tok = '|';
2411 break;
2413 case '+':
2414 PEEKC(c, p);
2415 if (c == '+') {
2416 p++;
2417 tok = TOK_INC;
2418 } else if (c == '=') {
2419 p++;
2420 tok = TOK_A_ADD;
2421 } else {
2422 tok = '+';
2424 break;
2426 case '-':
2427 PEEKC(c, p);
2428 if (c == '-') {
2429 p++;
2430 tok = TOK_DEC;
2431 } else if (c == '=') {
2432 p++;
2433 tok = TOK_A_SUB;
2434 } else if (c == '>') {
2435 p++;
2436 tok = TOK_ARROW;
2437 } else {
2438 tok = '-';
2440 break;
2442 PARSE2('!', '!', '=', TOK_NE)
2443 PARSE2('=', '=', '=', TOK_EQ)
2444 PARSE2('*', '*', '=', TOK_A_MUL)
2445 PARSE2('%', '%', '=', TOK_A_MOD)
2446 PARSE2('^', '^', '=', TOK_A_XOR)
2448 /* comments or operator */
2449 case '/':
2450 PEEKC(c, p);
2451 if (c == '*') {
2452 p = parse_comment(p);
2453 /* comments replaced by a blank */
2454 tok = ' ';
2455 goto keep_tok_flags;
2456 } else if (c == '/') {
2457 p = parse_line_comment(p);
2458 tok = ' ';
2459 goto keep_tok_flags;
2460 } else if (c == '=') {
2461 p++;
2462 tok = TOK_A_DIV;
2463 } else {
2464 tok = '/';
2466 break;
2468 /* simple tokens */
2469 case '(':
2470 case ')':
2471 case '[':
2472 case ']':
2473 case '{':
2474 case '}':
2475 case ',':
2476 case ';':
2477 case ':':
2478 case '?':
2479 case '~':
2480 case '$': /* only used in assembler */
2481 case '@': /* dito */
2482 tok = c;
2483 p++;
2484 break;
2485 default:
2486 error("unrecognized character \\x%02x", c);
2487 break;
2489 tok_flags = 0;
2490 keep_tok_flags:
2491 file->buf_ptr = p;
2492 #if defined(PARSE_DEBUG)
2493 printf("token = %s\n", get_tok_str(tok, &tokc));
2494 #endif
2497 /* return next token without macro substitution. Can read input from
2498 macro_ptr buffer */
2499 static void next_nomacro_spc(void)
2501 if (macro_ptr) {
2502 redo:
2503 tok = *macro_ptr;
2504 if (tok) {
2505 TOK_GET(&tok, &macro_ptr, &tokc);
2506 if (tok == TOK_LINENUM) {
2507 file->line_num = tokc.i;
2508 goto redo;
2511 } else {
2512 next_nomacro1();
2516 ST_FUNC void next_nomacro(void)
2518 do {
2519 next_nomacro_spc();
2520 } while (is_space(tok));
2523 /* substitute args in macro_str and return allocated string */
2524 static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args)
2526 int last_tok, t, spc;
2527 const int *st;
2528 Sym *s;
2529 CValue cval;
2530 TokenString str;
2531 CString cstr;
2533 tok_str_new(&str);
2534 last_tok = 0;
2535 while(1) {
2536 TOK_GET(&t, &macro_str, &cval);
2537 if (!t)
2538 break;
2539 if (t == '#') {
2540 /* stringize */
2541 TOK_GET(&t, &macro_str, &cval);
2542 if (!t)
2543 break;
2544 s = sym_find2(args, t);
2545 if (s) {
2546 cstr_new(&cstr);
2547 st = s->d;
2548 spc = 0;
2549 while (*st) {
2550 TOK_GET(&t, &st, &cval);
2551 if (!check_space(t, &spc))
2552 cstr_cat(&cstr, get_tok_str(t, &cval));
2554 cstr.size -= spc;
2555 cstr_ccat(&cstr, '\0');
2556 #ifdef PP_DEBUG
2557 printf("stringize: %s\n", (char *)cstr.data);
2558 #endif
2559 /* add string */
2560 cval.cstr = &cstr;
2561 tok_str_add2(&str, TOK_STR, &cval);
2562 cstr_free(&cstr);
2563 } else {
2564 tok_str_add2(&str, t, &cval);
2566 } else if (t >= TOK_IDENT) {
2567 s = sym_find2(args, t);
2568 if (s) {
2569 st = s->d;
2570 /* if '##' is present before or after, no arg substitution */
2571 if (*macro_str == TOK_TWOSHARPS || last_tok == TOK_TWOSHARPS) {
2572 /* special case for var arg macros : ## eats the
2573 ',' if empty VA_ARGS variable. */
2574 /* XXX: test of the ',' is not 100%
2575 reliable. should fix it to avoid security
2576 problems */
2577 if (gnu_ext && s->type.t &&
2578 last_tok == TOK_TWOSHARPS &&
2579 str.len >= 2 && str.str[str.len - 2] == ',') {
2580 if (*st == 0) {
2581 /* suppress ',' '##' */
2582 str.len -= 2;
2583 } else {
2584 /* suppress '##' and add variable */
2585 str.len--;
2586 goto add_var;
2588 } else {
2589 int t1;
2590 add_var:
2591 for(;;) {
2592 TOK_GET(&t1, &st, &cval);
2593 if (!t1)
2594 break;
2595 tok_str_add2(&str, t1, &cval);
2598 } else {
2599 /* NOTE: the stream cannot be read when macro
2600 substituing an argument */
2601 macro_subst(&str, nested_list, st, NULL);
2603 } else {
2604 tok_str_add(&str, t);
2606 } else {
2607 tok_str_add2(&str, t, &cval);
2609 last_tok = t;
2611 tok_str_add(&str, 0);
2612 return str.str;
2615 static char const ab_month_name[12][4] =
2617 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
2618 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
2621 /* do macro substitution of current token with macro 's' and add
2622 result to (tok_str,tok_len). 'nested_list' is the list of all
2623 macros we got inside to avoid recursing. Return non zero if no
2624 substitution needs to be done */
2625 static int macro_subst_tok(TokenString *tok_str,
2626 Sym **nested_list, Sym *s, struct macro_level **can_read_stream)
2628 Sym *args, *sa, *sa1;
2629 int mstr_allocated, parlevel, *mstr, t, t1, spc;
2630 const int *p;
2631 TokenString str;
2632 char *cstrval;
2633 CValue cval;
2634 CString cstr;
2635 char buf[32];
2637 /* if symbol is a macro, prepare substitution */
2638 /* special macros */
2639 if (tok == TOK___LINE__) {
2640 snprintf(buf, sizeof(buf), "%d", file->line_num);
2641 cstrval = buf;
2642 t1 = TOK_PPNUM;
2643 goto add_cstr1;
2644 } else if (tok == TOK___FILE__) {
2645 cstrval = file->filename;
2646 goto add_cstr;
2647 } else if (tok == TOK___DATE__ || tok == TOK___TIME__) {
2648 time_t ti;
2649 struct tm *tm;
2651 time(&ti);
2652 tm = localtime(&ti);
2653 if (tok == TOK___DATE__) {
2654 snprintf(buf, sizeof(buf), "%s %2d %d",
2655 ab_month_name[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900);
2656 } else {
2657 snprintf(buf, sizeof(buf), "%02d:%02d:%02d",
2658 tm->tm_hour, tm->tm_min, tm->tm_sec);
2660 cstrval = buf;
2661 add_cstr:
2662 t1 = TOK_STR;
2663 add_cstr1:
2664 cstr_new(&cstr);
2665 cstr_cat(&cstr, cstrval);
2666 cstr_ccat(&cstr, '\0');
2667 cval.cstr = &cstr;
2668 tok_str_add2(tok_str, t1, &cval);
2669 cstr_free(&cstr);
2670 } else {
2671 mstr = s->d;
2672 mstr_allocated = 0;
2673 if (s->type.t == MACRO_FUNC) {
2674 /* NOTE: we do not use next_nomacro to avoid eating the
2675 next token. XXX: find better solution */
2676 redo:
2677 if (macro_ptr) {
2678 p = macro_ptr;
2679 while (is_space(t = *p) || TOK_LINEFEED == t)
2680 ++p;
2681 if (t == 0 && can_read_stream) {
2682 /* end of macro stream: we must look at the token
2683 after in the file */
2684 struct macro_level *ml = *can_read_stream;
2685 macro_ptr = NULL;
2686 if (ml)
2688 macro_ptr = ml->p;
2689 ml->p = NULL;
2690 *can_read_stream = ml -> prev;
2692 /* also, end of scope for nested defined symbol */
2693 (*nested_list)->v = -1;
2694 goto redo;
2696 } else {
2697 /* XXX: incorrect with comments */
2698 ch = file->buf_ptr[0];
2699 while (is_space(ch) || ch == '\n')
2700 cinp();
2701 t = ch;
2703 if (t != '(') /* no macro subst */
2704 return -1;
2706 /* argument macro */
2707 next_nomacro();
2708 next_nomacro();
2709 args = NULL;
2710 sa = s->next;
2711 /* NOTE: empty args are allowed, except if no args */
2712 for(;;) {
2713 /* handle '()' case */
2714 if (!args && !sa && tok == ')')
2715 break;
2716 if (!sa)
2717 error("macro '%s' used with too many args",
2718 get_tok_str(s->v, 0));
2719 tok_str_new(&str);
2720 parlevel = spc = 0;
2721 /* NOTE: non zero sa->t indicates VA_ARGS */
2722 while ((parlevel > 0 ||
2723 (tok != ')' &&
2724 (tok != ',' || sa->type.t))) &&
2725 tok != -1) {
2726 if (tok == '(')
2727 parlevel++;
2728 else if (tok == ')')
2729 parlevel--;
2730 if (tok == TOK_LINEFEED)
2731 tok = ' ';
2732 if (!check_space(tok, &spc))
2733 tok_str_add2(&str, tok, &tokc);
2734 next_nomacro_spc();
2736 str.len -= spc;
2737 tok_str_add(&str, 0);
2738 sa1 = sym_push2(&args, sa->v & ~SYM_FIELD, sa->type.t, 0);
2739 sa1->d = str.str;
2740 sa = sa->next;
2741 if (tok == ')') {
2742 /* special case for gcc var args: add an empty
2743 var arg argument if it is omitted */
2744 if (sa && sa->type.t && gnu_ext)
2745 continue;
2746 else
2747 break;
2749 if (tok != ',')
2750 expect(",");
2751 next_nomacro();
2753 if (sa) {
2754 error("macro '%s' used with too few args",
2755 get_tok_str(s->v, 0));
2758 /* now subst each arg */
2759 mstr = macro_arg_subst(nested_list, mstr, args);
2760 /* free memory */
2761 sa = args;
2762 while (sa) {
2763 sa1 = sa->prev;
2764 tok_str_free(sa->d);
2765 sym_free(sa);
2766 sa = sa1;
2768 mstr_allocated = 1;
2770 sym_push2(nested_list, s->v, 0, 0);
2771 macro_subst(tok_str, nested_list, mstr, can_read_stream);
2772 /* pop nested defined symbol */
2773 sa1 = *nested_list;
2774 *nested_list = sa1->prev;
2775 sym_free(sa1);
2776 if (mstr_allocated)
2777 tok_str_free(mstr);
2779 return 0;
2782 /* handle the '##' operator. Return NULL if no '##' seen. Otherwise
2783 return the resulting string (which must be freed). */
2784 static inline int *macro_twosharps(const int *macro_str)
2786 const int *ptr;
2787 int t;
2788 CValue cval;
2789 TokenString macro_str1;
2790 CString cstr;
2791 int n, start_of_nosubsts;
2793 /* we search the first '##' */
2794 for(ptr = macro_str;;) {
2795 TOK_GET(&t, &ptr, &cval);
2796 if (t == TOK_TWOSHARPS)
2797 break;
2798 /* nothing more to do if end of string */
2799 if (t == 0)
2800 return NULL;
2803 /* we saw '##', so we need more processing to handle it */
2804 start_of_nosubsts = -1;
2805 tok_str_new(&macro_str1);
2806 for(ptr = macro_str;;) {
2807 TOK_GET(&tok, &ptr, &tokc);
2808 if (tok == 0)
2809 break;
2810 if (tok == TOK_TWOSHARPS)
2811 continue;
2812 if (tok == TOK_NOSUBST && start_of_nosubsts < 0)
2813 start_of_nosubsts = macro_str1.len;
2814 while (*ptr == TOK_TWOSHARPS) {
2815 /* given 'a##b', remove nosubsts preceding 'a' */
2816 if (start_of_nosubsts >= 0)
2817 macro_str1.len = start_of_nosubsts;
2818 /* given 'a##b', skip '##' */
2819 t = *++ptr;
2820 /* given 'a##b', remove nosubsts preceding 'b' */
2821 while (t == TOK_NOSUBST)
2822 t = *++ptr;
2824 if (t && t != TOK_TWOSHARPS) {
2825 TOK_GET(&t, &ptr, &cval);
2827 /* We concatenate the two tokens */
2828 cstr_new(&cstr);
2829 cstr_cat(&cstr, get_tok_str(tok, &tokc));
2830 n = cstr.size;
2831 cstr_cat(&cstr, get_tok_str(t, &cval));
2832 cstr_ccat(&cstr, '\0');
2834 tcc_open_bf(tcc_state, "<paste>", cstr.size);
2835 memcpy(file->buffer, cstr.data, cstr.size);
2836 for (;;) {
2837 next_nomacro1();
2838 if (0 == *file->buf_ptr)
2839 break;
2840 tok_str_add2(&macro_str1, tok, &tokc);
2841 warning("pasting \"%.*s\" and \"%s\" does not give a valid preprocessing token",
2842 n, cstr.data, (char*)cstr.data + n);
2844 tcc_close();
2845 cstr_reset(&cstr);
2848 if (tok != TOK_NOSUBST)
2849 start_of_nosubsts = -1;
2850 tok_str_add2(&macro_str1, tok, &tokc);
2852 tok_str_add(&macro_str1, 0);
2853 return macro_str1.str;
2857 /* do macro substitution of macro_str and add result to
2858 (tok_str,tok_len). 'nested_list' is the list of all macros we got
2859 inside to avoid recursing. */
2860 static void macro_subst(TokenString *tok_str, Sym **nested_list,
2861 const int *macro_str, struct macro_level ** can_read_stream)
2863 Sym *s;
2864 int *macro_str1;
2865 const int *ptr;
2866 int t, ret, spc;
2867 CValue cval;
2868 struct macro_level ml;
2869 int force_blank;
2871 /* first scan for '##' operator handling */
2872 ptr = macro_str;
2873 macro_str1 = macro_twosharps(ptr);
2875 if (macro_str1)
2876 ptr = macro_str1;
2877 spc = 0;
2878 force_blank = 0;
2880 while (1) {
2881 /* NOTE: ptr == NULL can only happen if tokens are read from
2882 file stream due to a macro function call */
2883 if (ptr == NULL)
2884 break;
2885 TOK_GET(&t, &ptr, &cval);
2886 if (t == 0)
2887 break;
2888 if (t == TOK_NOSUBST) {
2889 /* following token has already been subst'd. just copy it on */
2890 tok_str_add2(tok_str, TOK_NOSUBST, NULL);
2891 TOK_GET(&t, &ptr, &cval);
2892 goto no_subst;
2894 s = define_find(t);
2895 if (s != NULL) {
2896 /* if nested substitution, do nothing */
2897 if (sym_find2(*nested_list, t)) {
2898 /* and mark it as TOK_NOSUBST, so it doesn't get subst'd again */
2899 tok_str_add2(tok_str, TOK_NOSUBST, NULL);
2900 goto no_subst;
2902 ml.p = macro_ptr;
2903 if (can_read_stream)
2904 ml.prev = *can_read_stream, *can_read_stream = &ml;
2905 macro_ptr = (int *)ptr;
2906 tok = t;
2907 ret = macro_subst_tok(tok_str, nested_list, s, can_read_stream);
2908 ptr = (int *)macro_ptr;
2909 macro_ptr = ml.p;
2910 if (can_read_stream && *can_read_stream == &ml)
2911 *can_read_stream = ml.prev;
2912 if (ret != 0)
2913 goto no_subst;
2914 if (parse_flags & PARSE_FLAG_SPACES)
2915 force_blank = 1;
2916 } else {
2917 no_subst:
2918 if (force_blank) {
2919 tok_str_add(tok_str, ' ');
2920 spc = 1;
2921 force_blank = 0;
2923 if (!check_space(t, &spc))
2924 tok_str_add2(tok_str, t, &cval);
2927 if (macro_str1)
2928 tok_str_free(macro_str1);
2931 /* return next token with macro substitution */
2932 ST_FUNC void next(void)
2934 Sym *nested_list, *s;
2935 TokenString str;
2936 struct macro_level *ml;
2938 redo:
2939 if (parse_flags & PARSE_FLAG_SPACES)
2940 next_nomacro_spc();
2941 else
2942 next_nomacro();
2943 if (!macro_ptr) {
2944 /* if not reading from macro substituted string, then try
2945 to substitute macros */
2946 if (tok >= TOK_IDENT &&
2947 (parse_flags & PARSE_FLAG_PREPROCESS)) {
2948 s = define_find(tok);
2949 if (s) {
2950 /* we have a macro: we try to substitute */
2951 tok_str_new(&str);
2952 nested_list = NULL;
2953 ml = NULL;
2954 if (macro_subst_tok(&str, &nested_list, s, &ml) == 0) {
2955 /* substitution done, NOTE: maybe empty */
2956 tok_str_add(&str, 0);
2957 macro_ptr = str.str;
2958 macro_ptr_allocated = str.str;
2959 goto redo;
2963 } else {
2964 if (tok == 0) {
2965 /* end of macro or end of unget buffer */
2966 if (unget_buffer_enabled) {
2967 macro_ptr = unget_saved_macro_ptr;
2968 unget_buffer_enabled = 0;
2969 } else {
2970 /* end of macro string: free it */
2971 tok_str_free(macro_ptr_allocated);
2972 macro_ptr_allocated = NULL;
2973 macro_ptr = NULL;
2975 goto redo;
2976 } else if (tok == TOK_NOSUBST) {
2977 /* discard preprocessor's nosubst markers */
2978 goto redo;
2982 /* convert preprocessor tokens into C tokens */
2983 if (tok == TOK_PPNUM &&
2984 (parse_flags & PARSE_FLAG_TOK_NUM)) {
2985 parse_number((char *)tokc.cstr->data);
2989 /* push back current token and set current token to 'last_tok'. Only
2990 identifier case handled for labels. */
2991 ST_INLN void unget_tok(int last_tok)
2993 int i, n;
2994 int *q;
2995 unget_saved_macro_ptr = macro_ptr;
2996 unget_buffer_enabled = 1;
2997 q = unget_saved_buffer;
2998 macro_ptr = q;
2999 *q++ = tok;
3000 n = tok_ext_size(tok) - 1;
3001 for(i=0;i<n;i++)
3002 *q++ = tokc.tab[i];
3003 *q = 0; /* end of token string */
3004 tok = last_tok;
3008 /* better than nothing, but needs extension to handle '-E' option
3009 correctly too */
3010 ST_FUNC void preprocess_init(TCCState *s1)
3012 s1->include_stack_ptr = s1->include_stack;
3013 /* XXX: move that before to avoid having to initialize
3014 file->ifdef_stack_ptr ? */
3015 s1->ifdef_stack_ptr = s1->ifdef_stack;
3016 file->ifdef_stack_ptr = s1->ifdef_stack_ptr;
3018 /* XXX: not ANSI compliant: bound checking says error */
3019 vtop = vstack - 1;
3020 s1->pack_stack[0] = 0;
3021 s1->pack_stack_ptr = s1->pack_stack;
3024 ST_FUNC void preprocess_new()
3026 int i, c;
3027 const char *p, *r;
3029 /* init isid table */
3030 for(i=CH_EOF;i<256;i++)
3031 isidnum_table[i-CH_EOF] = isid(i) || isnum(i);
3033 /* add all tokens */
3034 table_ident = NULL;
3035 memset(hash_ident, 0, TOK_HASH_SIZE * sizeof(TokenSym *));
3037 tok_ident = TOK_IDENT;
3038 p = tcc_keywords;
3039 while (*p) {
3040 r = p;
3041 for(;;) {
3042 c = *r++;
3043 if (c == '\0')
3044 break;
3046 tok_alloc(p, r - p - 1);
3047 p = r;
3051 /* Preprocess the current file */
3052 ST_FUNC int tcc_preprocess(TCCState *s1)
3054 Sym *define_start;
3056 BufferedFile *file_ref, **iptr, **iptr_new;
3057 int token_seen, line_ref, d;
3058 const char *s;
3060 preprocess_init(s1);
3061 define_start = define_stack;
3062 ch = file->buf_ptr[0];
3063 tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF;
3064 parse_flags = PARSE_FLAG_ASM_COMMENTS | PARSE_FLAG_PREPROCESS |
3065 PARSE_FLAG_LINEFEED | PARSE_FLAG_SPACES;
3066 token_seen = 0;
3067 line_ref = 0;
3068 file_ref = NULL;
3069 iptr = s1->include_stack_ptr;
3071 for (;;) {
3072 next();
3073 if (tok == TOK_EOF) {
3074 break;
3075 } else if (file != file_ref) {
3076 goto print_line;
3077 } else if (tok == TOK_LINEFEED) {
3078 if (!token_seen)
3079 continue;
3080 ++line_ref;
3081 token_seen = 0;
3082 } else if (!token_seen) {
3083 d = file->line_num - line_ref;
3084 if (file != file_ref || d < 0 || d >= 8) {
3085 print_line:
3086 iptr_new = s1->include_stack_ptr;
3087 s = iptr_new > iptr ? " 1"
3088 : iptr_new < iptr ? " 2"
3089 : iptr_new > s1->include_stack ? " 3"
3090 : ""
3092 iptr = iptr_new;
3093 fprintf(s1->outfile, "# %d \"%s\"%s\n", file->line_num, file->filename, s);
3094 } else {
3095 while (d)
3096 fputs("\n", s1->outfile), --d;
3098 line_ref = (file_ref = file)->line_num;
3099 token_seen = tok != TOK_LINEFEED;
3100 if (!token_seen)
3101 continue;
3103 fputs(get_tok_str(tok, &tokc), s1->outfile);
3105 free_defines(define_start);
3106 return 0;