The hack to allow valgrind works with tcc compiled programs
[tinycc.git] / tccpp.c
blob2609ad7da67443ea4342081d600db39f60783b8b
1 /*
2 * TCC - Tiny C Compiler
3 *
4 * Copyright (c) 2001-2004 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "tcc.h"
23 /********************************************************/
24 /* global variables */
26 ST_DATA int tok_flags;
27 /* additional informations about token */
28 #define TOK_FLAG_BOL 0x0001 /* beginning of line before */
29 #define TOK_FLAG_BOF 0x0002 /* beginning of file before */
30 #define TOK_FLAG_ENDIF 0x0004 /* a endif was found matching starting #ifdef */
31 #define TOK_FLAG_EOF 0x0008 /* end of file */
33 ST_DATA int parse_flags;
34 #define PARSE_FLAG_PREPROCESS 0x0001 /* activate preprocessing */
35 #define PARSE_FLAG_TOK_NUM 0x0002 /* return numbers instead of TOK_PPNUM */
36 #define PARSE_FLAG_LINEFEED 0x0004 /* line feed is returned as a
37 token. line feed is also
38 returned at eof */
39 #define PARSE_FLAG_ASM_COMMENTS 0x0008 /* '#' can be used for line comment */
40 #define PARSE_FLAG_SPACES 0x0010 /* next() returns space tokens (for -E) */
42 ST_DATA struct BufferedFile *file;
43 ST_DATA int ch, tok;
44 ST_DATA CValue tokc;
45 ST_DATA const int *macro_ptr;
46 ST_DATA CString tokcstr; /* current parsed string, if any */
48 /* display benchmark infos */
49 ST_DATA int total_lines;
50 ST_DATA int total_bytes;
51 ST_DATA int tok_ident;
52 ST_DATA TokenSym **table_ident;
54 /* ------------------------------------------------------------------------- */
56 static int *macro_ptr_allocated;
57 static const int *unget_saved_macro_ptr;
58 static int unget_saved_buffer[TOK_MAX_SIZE + 1];
59 static int unget_buffer_enabled;
60 static TokenSym *hash_ident[TOK_HASH_SIZE];
61 static char token_buf[STRING_MAX_SIZE + 1];
62 /* true if isid(c) || isnum(c) */
63 static unsigned char isidnum_table[256-CH_EOF];
65 static const char tcc_keywords[] =
66 #define DEF(id, str) str "\0"
67 #include "tcctok.h"
68 #undef DEF
71 /* WARNING: the content of this string encodes token numbers */
72 static const unsigned char tok_two_chars[] =
73 "<=\236>=\235!=\225&&\240||\241++\244--\242==\224<<\1>>\2+=\253"
74 "-=\255*=\252/=\257%=\245&=\246^=\336|=\374->\313..\250##\266";
76 struct macro_level {
77 struct macro_level *prev;
78 const int *p;
81 static void next_nomacro_spc(void);
82 static void macro_subst(
83 TokenString *tok_str,
84 Sym **nested_list,
85 const int *macro_str,
86 struct macro_level **can_read_stream
89 ST_FUNC void skip(int c)
91 if (tok != c)
92 tcc_error("'%c' expected (got \"%s\")", c, get_tok_str(tok, &tokc));
93 next();
96 ST_FUNC void expect(const char *msg)
98 tcc_error("%s expected", msg);
101 /* ------------------------------------------------------------------------- */
102 /* CString handling */
103 static void cstr_realloc(CString *cstr, int new_size)
105 int size;
106 void *data;
108 size = cstr->size_allocated;
109 if (size == 0)
110 size = 8; /* no need to allocate a too small first string */
111 while (size < new_size)
112 size = size * 2;
113 data = tcc_realloc(cstr->data_allocated, size);
114 cstr->data_allocated = data;
115 cstr->size_allocated = size;
116 cstr->data = data;
119 /* add a byte */
120 ST_FUNC void cstr_ccat(CString *cstr, int ch)
122 int size;
123 size = cstr->size + 1;
124 if (size > cstr->size_allocated)
125 cstr_realloc(cstr, size);
126 ((unsigned char *)cstr->data)[size - 1] = ch;
127 cstr->size = size;
130 ST_FUNC void cstr_cat(CString *cstr, const char *str)
132 int c;
133 for(;;) {
134 c = *str;
135 if (c == '\0')
136 break;
137 cstr_ccat(cstr, c);
138 str++;
142 /* add a wide char */
143 ST_FUNC void cstr_wccat(CString *cstr, int ch)
145 int size;
146 size = cstr->size + sizeof(nwchar_t);
147 if (size > cstr->size_allocated)
148 cstr_realloc(cstr, size);
149 *(nwchar_t *)(((unsigned char *)cstr->data) + size - sizeof(nwchar_t)) = ch;
150 cstr->size = size;
153 ST_FUNC void cstr_new(CString *cstr)
155 memset(cstr, 0, sizeof(CString));
158 /* free string and reset it to NULL */
159 ST_FUNC void cstr_free(CString *cstr)
161 tcc_free(cstr->data_allocated);
162 cstr_new(cstr);
165 /* reset string to empty */
166 ST_FUNC void cstr_reset(CString *cstr)
168 cstr->size = 0;
171 /* XXX: unicode ? */
172 static void add_char(CString *cstr, int c)
174 if (c == '\'' || c == '\"' || c == '\\') {
175 /* XXX: could be more precise if char or string */
176 cstr_ccat(cstr, '\\');
178 if (c >= 32 && c <= 126) {
179 cstr_ccat(cstr, c);
180 } else {
181 cstr_ccat(cstr, '\\');
182 if (c == '\n') {
183 cstr_ccat(cstr, 'n');
184 } else {
185 cstr_ccat(cstr, '0' + ((c >> 6) & 7));
186 cstr_ccat(cstr, '0' + ((c >> 3) & 7));
187 cstr_ccat(cstr, '0' + (c & 7));
192 /* ------------------------------------------------------------------------- */
193 /* allocate a new token */
194 static TokenSym *tok_alloc_new(TokenSym **pts, const char *str, int len)
196 TokenSym *ts, **ptable;
197 int i;
199 if (tok_ident >= SYM_FIRST_ANOM)
200 tcc_error("memory full (symbols)");
202 /* expand token table if needed */
203 i = tok_ident - TOK_IDENT;
204 if ((i % TOK_ALLOC_INCR) == 0) {
205 ptable = tcc_realloc(table_ident, (i + TOK_ALLOC_INCR) * sizeof(TokenSym *));
206 table_ident = ptable;
209 ts = tcc_malloc(sizeof(TokenSym) + len);
210 table_ident[i] = ts;
211 ts->tok = tok_ident++;
212 ts->sym_define = NULL;
213 ts->sym_label = NULL;
214 ts->sym_struct = NULL;
215 ts->sym_identifier = NULL;
216 ts->len = len;
217 ts->hash_next = NULL;
218 memcpy(ts->str, str, len);
219 ts->str[len] = '\0';
220 *pts = ts;
221 return ts;
224 #define TOK_HASH_INIT 1
225 #define TOK_HASH_FUNC(h, c) ((h) * 263 + (c))
227 /* find a token and add it if not found */
228 ST_FUNC TokenSym *tok_alloc(const char *str, int len)
230 TokenSym *ts, **pts;
231 int i;
232 unsigned int h;
234 h = TOK_HASH_INIT;
235 for(i=0;i<len;i++)
236 h = TOK_HASH_FUNC(h, ((unsigned char *)str)[i]);
237 h &= (TOK_HASH_SIZE - 1);
239 pts = &hash_ident[h];
240 for(;;) {
241 ts = *pts;
242 if (!ts)
243 break;
244 if (ts->len == len && !memcmp(ts->str, str, len))
245 return ts;
246 pts = &(ts->hash_next);
248 return tok_alloc_new(pts, str, len);
251 /* XXX: buffer overflow */
252 /* XXX: float tokens */
253 ST_FUNC char *get_tok_str(int v, CValue *cv)
255 static char buf[STRING_MAX_SIZE + 1];
256 static CString cstr_buf;
257 CString *cstr;
258 char *p;
259 int i, len;
261 /* NOTE: to go faster, we give a fixed buffer for small strings */
262 cstr_reset(&cstr_buf);
263 cstr_buf.data = buf;
264 cstr_buf.size_allocated = sizeof(buf);
265 p = buf;
267 switch(v) {
268 case TOK_CINT:
269 case TOK_CUINT:
270 /* XXX: not quite exact, but only useful for testing */
271 sprintf(p, "%u", cv->ui);
272 break;
273 case TOK_CLLONG:
274 case TOK_CULLONG:
275 /* XXX: not quite exact, but only useful for testing */
276 #ifdef _WIN32
277 sprintf(p, "%u", (unsigned)cv->ull);
278 #else
279 sprintf(p, "%llu", cv->ull);
280 #endif
281 break;
282 case TOK_LCHAR:
283 cstr_ccat(&cstr_buf, 'L');
284 case TOK_CCHAR:
285 cstr_ccat(&cstr_buf, '\'');
286 add_char(&cstr_buf, cv->i);
287 cstr_ccat(&cstr_buf, '\'');
288 cstr_ccat(&cstr_buf, '\0');
289 break;
290 case TOK_PPNUM:
291 cstr = cv->cstr;
292 len = cstr->size - 1;
293 for(i=0;i<len;i++)
294 add_char(&cstr_buf, ((unsigned char *)cstr->data)[i]);
295 cstr_ccat(&cstr_buf, '\0');
296 break;
297 case TOK_LSTR:
298 cstr_ccat(&cstr_buf, 'L');
299 case TOK_STR:
300 cstr = cv->cstr;
301 cstr_ccat(&cstr_buf, '\"');
302 if (v == TOK_STR) {
303 len = cstr->size - 1;
304 for(i=0;i<len;i++)
305 add_char(&cstr_buf, ((unsigned char *)cstr->data)[i]);
306 } else {
307 len = (cstr->size / sizeof(nwchar_t)) - 1;
308 for(i=0;i<len;i++)
309 add_char(&cstr_buf, ((nwchar_t *)cstr->data)[i]);
311 cstr_ccat(&cstr_buf, '\"');
312 cstr_ccat(&cstr_buf, '\0');
313 break;
314 case TOK_LT:
315 v = '<';
316 goto addv;
317 case TOK_GT:
318 v = '>';
319 goto addv;
320 case TOK_DOTS:
321 return strcpy(p, "...");
322 case TOK_A_SHL:
323 return strcpy(p, "<<=");
324 case TOK_A_SAR:
325 return strcpy(p, ">>=");
326 default:
327 if (v < TOK_IDENT) {
328 /* search in two bytes table */
329 const unsigned char *q = tok_two_chars;
330 while (*q) {
331 if (q[2] == v) {
332 *p++ = q[0];
333 *p++ = q[1];
334 *p = '\0';
335 return buf;
337 q += 3;
339 addv:
340 *p++ = v;
341 *p = '\0';
342 } else if (v < tok_ident) {
343 return table_ident[v - TOK_IDENT]->str;
344 } else if (v >= SYM_FIRST_ANOM) {
345 /* special name for anonymous symbol */
346 sprintf(p, "L.%u", v - SYM_FIRST_ANOM);
347 } else {
348 /* should never happen */
349 return NULL;
351 break;
353 return cstr_buf.data;
356 /* fill input buffer and peek next char */
357 static int tcc_peekc_slow(BufferedFile *bf)
359 int len;
360 /* only tries to read if really end of buffer */
361 if (bf->buf_ptr >= bf->buf_end) {
362 if (bf->fd != -1) {
363 #if defined(PARSE_DEBUG)
364 len = 8;
365 #else
366 len = IO_BUF_SIZE;
367 #endif
368 len = read(bf->fd, bf->buffer, len);
369 if (len < 0)
370 len = 0;
371 } else {
372 len = 0;
374 total_bytes += len;
375 bf->buf_ptr = bf->buffer;
376 bf->buf_end = bf->buffer + len;
377 *bf->buf_end = CH_EOB;
379 if (bf->buf_ptr < bf->buf_end) {
380 return bf->buf_ptr[0];
381 } else {
382 bf->buf_ptr = bf->buf_end;
383 return CH_EOF;
387 /* return the current character, handling end of block if necessary
388 (but not stray) */
389 ST_FUNC int handle_eob(void)
391 return tcc_peekc_slow(file);
394 /* read next char from current input file and handle end of input buffer */
395 ST_INLN void inp(void)
397 ch = *(++(file->buf_ptr));
398 /* end of buffer/file handling */
399 if (ch == CH_EOB)
400 ch = handle_eob();
403 /* handle '\[\r]\n' */
404 static int handle_stray_noerror(void)
406 while (ch == '\\') {
407 inp();
408 if (ch == '\n') {
409 file->line_num++;
410 inp();
411 } else if (ch == '\r') {
412 inp();
413 if (ch != '\n')
414 goto fail;
415 file->line_num++;
416 inp();
417 } else {
418 fail:
419 return 1;
422 return 0;
425 static void handle_stray(void)
427 if (handle_stray_noerror())
428 tcc_error("stray '\\' in program");
431 /* skip the stray and handle the \\n case. Output an error if
432 incorrect char after the stray */
433 static int handle_stray1(uint8_t *p)
435 int c;
437 if (p >= file->buf_end) {
438 file->buf_ptr = p;
439 c = handle_eob();
440 p = file->buf_ptr;
441 if (c == '\\')
442 goto parse_stray;
443 } else {
444 parse_stray:
445 file->buf_ptr = p;
446 ch = *p;
447 handle_stray();
448 p = file->buf_ptr;
449 c = *p;
451 return c;
454 /* handle just the EOB case, but not stray */
455 #define PEEKC_EOB(c, p)\
457 p++;\
458 c = *p;\
459 if (c == '\\') {\
460 file->buf_ptr = p;\
461 c = handle_eob();\
462 p = file->buf_ptr;\
466 /* handle the complicated stray case */
467 #define PEEKC(c, p)\
469 p++;\
470 c = *p;\
471 if (c == '\\') {\
472 c = handle_stray1(p);\
473 p = file->buf_ptr;\
477 /* input with '\[\r]\n' handling. Note that this function cannot
478 handle other characters after '\', so you cannot call it inside
479 strings or comments */
480 ST_FUNC void minp(void)
482 inp();
483 if (ch == '\\')
484 handle_stray();
488 /* single line C++ comments */
489 static uint8_t *parse_line_comment(uint8_t *p)
491 int c;
493 p++;
494 for(;;) {
495 c = *p;
496 redo:
497 if (c == '\n' || c == CH_EOF) {
498 break;
499 } else if (c == '\\') {
500 file->buf_ptr = p;
501 c = handle_eob();
502 p = file->buf_ptr;
503 if (c == '\\') {
504 PEEKC_EOB(c, p);
505 if (c == '\n') {
506 file->line_num++;
507 PEEKC_EOB(c, p);
508 } else if (c == '\r') {
509 PEEKC_EOB(c, p);
510 if (c == '\n') {
511 file->line_num++;
512 PEEKC_EOB(c, p);
515 } else {
516 goto redo;
518 } else {
519 p++;
522 return p;
525 /* C comments */
526 ST_FUNC uint8_t *parse_comment(uint8_t *p)
528 int c;
530 p++;
531 for(;;) {
532 /* fast skip loop */
533 for(;;) {
534 c = *p;
535 if (c == '\n' || c == '*' || c == '\\')
536 break;
537 p++;
538 c = *p;
539 if (c == '\n' || c == '*' || c == '\\')
540 break;
541 p++;
543 /* now we can handle all the cases */
544 if (c == '\n') {
545 file->line_num++;
546 p++;
547 } else if (c == '*') {
548 p++;
549 for(;;) {
550 c = *p;
551 if (c == '*') {
552 p++;
553 } else if (c == '/') {
554 goto end_of_comment;
555 } else if (c == '\\') {
556 file->buf_ptr = p;
557 c = handle_eob();
558 p = file->buf_ptr;
559 if (c == '\\') {
560 /* skip '\[\r]\n', otherwise just skip the stray */
561 while (c == '\\') {
562 PEEKC_EOB(c, p);
563 if (c == '\n') {
564 file->line_num++;
565 PEEKC_EOB(c, p);
566 } else if (c == '\r') {
567 PEEKC_EOB(c, p);
568 if (c == '\n') {
569 file->line_num++;
570 PEEKC_EOB(c, p);
572 } else {
573 goto after_star;
577 } else {
578 break;
581 after_star: ;
582 } else {
583 /* stray, eob or eof */
584 file->buf_ptr = p;
585 c = handle_eob();
586 p = file->buf_ptr;
587 if (c == CH_EOF) {
588 tcc_error("unexpected end of file in comment");
589 } else if (c == '\\') {
590 p++;
594 end_of_comment:
595 p++;
596 return p;
599 #define cinp minp
601 static inline void skip_spaces(void)
603 while (is_space(ch))
604 cinp();
607 static inline int check_space(int t, int *spc)
609 if (is_space(t)) {
610 if (*spc)
611 return 1;
612 *spc = 1;
613 } else
614 *spc = 0;
615 return 0;
618 /* parse a string without interpreting escapes */
619 static uint8_t *parse_pp_string(uint8_t *p,
620 int sep, CString *str)
622 int c;
623 p++;
624 for(;;) {
625 c = *p;
626 if (c == sep) {
627 break;
628 } else if (c == '\\') {
629 file->buf_ptr = p;
630 c = handle_eob();
631 p = file->buf_ptr;
632 if (c == CH_EOF) {
633 unterminated_string:
634 /* XXX: indicate line number of start of string */
635 tcc_error("missing terminating %c character", sep);
636 } else if (c == '\\') {
637 /* escape : just skip \[\r]\n */
638 PEEKC_EOB(c, p);
639 if (c == '\n') {
640 file->line_num++;
641 p++;
642 } else if (c == '\r') {
643 PEEKC_EOB(c, p);
644 if (c != '\n')
645 expect("'\n' after '\r'");
646 file->line_num++;
647 p++;
648 } else if (c == CH_EOF) {
649 goto unterminated_string;
650 } else {
651 if (str) {
652 cstr_ccat(str, '\\');
653 cstr_ccat(str, c);
655 p++;
658 } else if (c == '\n') {
659 file->line_num++;
660 goto add_char;
661 } else if (c == '\r') {
662 PEEKC_EOB(c, p);
663 if (c != '\n') {
664 if (str)
665 cstr_ccat(str, '\r');
666 } else {
667 file->line_num++;
668 goto add_char;
670 } else {
671 add_char:
672 if (str)
673 cstr_ccat(str, c);
674 p++;
677 p++;
678 return p;
681 /* skip block of text until #else, #elif or #endif. skip also pairs of
682 #if/#endif */
683 static void preprocess_skip(void)
685 int a, start_of_line, c, in_warn_or_error;
686 uint8_t *p;
688 p = file->buf_ptr;
689 a = 0;
690 redo_start:
691 start_of_line = 1;
692 in_warn_or_error = 0;
693 for(;;) {
694 redo_no_start:
695 c = *p;
696 switch(c) {
697 case ' ':
698 case '\t':
699 case '\f':
700 case '\v':
701 case '\r':
702 p++;
703 goto redo_no_start;
704 case '\n':
705 file->line_num++;
706 p++;
707 goto redo_start;
708 case '\\':
709 file->buf_ptr = p;
710 c = handle_eob();
711 if (c == CH_EOF) {
712 expect("#endif");
713 } else if (c == '\\') {
714 ch = file->buf_ptr[0];
715 handle_stray_noerror();
717 p = file->buf_ptr;
718 goto redo_no_start;
719 /* skip strings */
720 case '\"':
721 case '\'':
722 if (in_warn_or_error)
723 goto _default;
724 p = parse_pp_string(p, c, NULL);
725 break;
726 /* skip comments */
727 case '/':
728 if (in_warn_or_error)
729 goto _default;
730 file->buf_ptr = p;
731 ch = *p;
732 minp();
733 p = file->buf_ptr;
734 if (ch == '*') {
735 p = parse_comment(p);
736 } else if (ch == '/') {
737 p = parse_line_comment(p);
739 break;
740 case '#':
741 p++;
742 if (start_of_line) {
743 file->buf_ptr = p;
744 next_nomacro();
745 p = file->buf_ptr;
746 if (a == 0 &&
747 (tok == TOK_ELSE || tok == TOK_ELIF || tok == TOK_ENDIF))
748 goto the_end;
749 if (tok == TOK_IF || tok == TOK_IFDEF || tok == TOK_IFNDEF)
750 a++;
751 else if (tok == TOK_ENDIF)
752 a--;
753 else if( tok == TOK_ERROR || tok == TOK_WARNING)
754 in_warn_or_error = 1;
755 else if (tok == TOK_LINEFEED)
756 goto redo_start;
758 break;
759 _default:
760 default:
761 p++;
762 break;
764 start_of_line = 0;
766 the_end: ;
767 file->buf_ptr = p;
770 /* ParseState handling */
772 /* XXX: currently, no include file info is stored. Thus, we cannot display
773 accurate messages if the function or data definition spans multiple
774 files */
776 /* save current parse state in 's' */
777 ST_FUNC void save_parse_state(ParseState *s)
779 s->line_num = file->line_num;
780 s->macro_ptr = macro_ptr;
781 s->tok = tok;
782 s->tokc = tokc;
785 /* restore parse state from 's' */
786 ST_FUNC void restore_parse_state(ParseState *s)
788 file->line_num = s->line_num;
789 macro_ptr = s->macro_ptr;
790 tok = s->tok;
791 tokc = s->tokc;
794 /* return the number of additional 'ints' necessary to store the
795 token */
796 static inline int tok_ext_size(int t)
798 switch(t) {
799 /* 4 bytes */
800 case TOK_CINT:
801 case TOK_CUINT:
802 case TOK_CCHAR:
803 case TOK_LCHAR:
804 case TOK_CFLOAT:
805 case TOK_LINENUM:
806 return 1;
807 case TOK_STR:
808 case TOK_LSTR:
809 case TOK_PPNUM:
810 tcc_error("unsupported token");
811 return 1;
812 case TOK_CDOUBLE:
813 case TOK_CLLONG:
814 case TOK_CULLONG:
815 return 2;
816 case TOK_CLDOUBLE:
817 return LDOUBLE_SIZE / 4;
818 default:
819 return 0;
823 /* token string handling */
825 ST_INLN void tok_str_new(TokenString *s)
827 s->str = NULL;
828 s->len = 0;
829 s->allocated_len = 0;
830 s->last_line_num = -1;
833 ST_FUNC void tok_str_free(int *str)
835 tcc_free(str);
838 static int *tok_str_realloc(TokenString *s)
840 int *str, len;
842 if (s->allocated_len == 0) {
843 len = 8;
844 } else {
845 len = s->allocated_len * 2;
847 str = tcc_realloc(s->str, len * sizeof(int));
848 s->allocated_len = len;
849 s->str = str;
850 return str;
853 ST_FUNC void tok_str_add(TokenString *s, int t)
855 int len, *str;
857 len = s->len;
858 str = s->str;
859 if (len >= s->allocated_len)
860 str = tok_str_realloc(s);
861 str[len++] = t;
862 s->len = len;
865 static void tok_str_add2(TokenString *s, int t, CValue *cv)
867 int len, *str;
869 len = s->len;
870 str = s->str;
872 /* allocate space for worst case */
873 if (len + TOK_MAX_SIZE > s->allocated_len)
874 str = tok_str_realloc(s);
875 str[len++] = t;
876 switch(t) {
877 case TOK_CINT:
878 case TOK_CUINT:
879 case TOK_CCHAR:
880 case TOK_LCHAR:
881 case TOK_CFLOAT:
882 case TOK_LINENUM:
883 str[len++] = cv->tab[0];
884 break;
885 case TOK_PPNUM:
886 case TOK_STR:
887 case TOK_LSTR:
889 int nb_words;
890 CString *cstr;
892 nb_words = (sizeof(CString) + cv->cstr->size + 3) >> 2;
893 while ((len + nb_words) > s->allocated_len)
894 str = tok_str_realloc(s);
895 cstr = (CString *)(str + len);
896 cstr->data = NULL;
897 cstr->size = cv->cstr->size;
898 cstr->data_allocated = NULL;
899 cstr->size_allocated = cstr->size;
900 memcpy((char *)cstr + sizeof(CString),
901 cv->cstr->data, cstr->size);
902 len += nb_words;
904 break;
905 case TOK_CDOUBLE:
906 case TOK_CLLONG:
907 case TOK_CULLONG:
908 #if LDOUBLE_SIZE == 8
909 case TOK_CLDOUBLE:
910 #endif
911 str[len++] = cv->tab[0];
912 str[len++] = cv->tab[1];
913 break;
914 #if LDOUBLE_SIZE == 12
915 case TOK_CLDOUBLE:
916 str[len++] = cv->tab[0];
917 str[len++] = cv->tab[1];
918 str[len++] = cv->tab[2];
919 #elif LDOUBLE_SIZE == 16
920 case TOK_CLDOUBLE:
921 str[len++] = cv->tab[0];
922 str[len++] = cv->tab[1];
923 str[len++] = cv->tab[2];
924 str[len++] = cv->tab[3];
925 #elif LDOUBLE_SIZE != 8
926 #error add long double size support
927 #endif
928 break;
929 default:
930 break;
932 s->len = len;
935 /* add the current parse token in token string 's' */
936 ST_FUNC void tok_str_add_tok(TokenString *s)
938 CValue cval;
939 memset(&cval, 0, sizeof(CValue));
941 /* save line number info */
942 if (file->line_num != s->last_line_num) {
943 s->last_line_num = file->line_num;
944 cval.i = s->last_line_num;
945 tok_str_add2(s, TOK_LINENUM, &cval);
947 tok_str_add2(s, tok, &tokc);
950 /* get a token from an integer array and increment pointer
951 accordingly. we code it as a macro to avoid pointer aliasing. */
952 static inline void TOK_GET(int *t, const int **pp, CValue *cv)
954 const int *p = *pp;
955 int n, *tab;
957 tab = cv->tab;
958 switch(*t = *p++) {
959 case TOK_CINT:
960 case TOK_CUINT:
961 case TOK_CCHAR:
962 case TOK_LCHAR:
963 case TOK_CFLOAT:
964 case TOK_LINENUM:
965 tab[0] = *p++;
966 break;
967 case TOK_STR:
968 case TOK_LSTR:
969 case TOK_PPNUM:
970 cv->cstr = (CString *)p;
971 cv->cstr->data = (char *)p + sizeof(CString);
972 p += (sizeof(CString) + cv->cstr->size + 3) >> 2;
973 break;
974 case TOK_CDOUBLE:
975 case TOK_CLLONG:
976 case TOK_CULLONG:
977 n = 2;
978 goto copy;
979 case TOK_CLDOUBLE:
980 #if LDOUBLE_SIZE == 16
981 n = 4;
982 #elif LDOUBLE_SIZE == 12
983 n = 3;
984 #elif LDOUBLE_SIZE == 8
985 n = 2;
986 #else
987 # error add long double size support
988 #endif
989 copy:
991 *tab++ = *p++;
992 while (--n);
993 break;
994 default:
995 break;
997 *pp = p;
1000 static int macro_is_equal(const int *a, const int *b)
1002 char buf[STRING_MAX_SIZE + 1];
1003 int t;
1004 CValue cv;
1005 memset(&cv, 0, sizeof(CValue));
1006 while (*a && *b) {
1007 TOK_GET(&t, &a, &cv);
1008 pstrcpy(buf, sizeof buf, get_tok_str(t, &cv));
1009 TOK_GET(&t, &b, &cv);
1010 if (strcmp(buf, get_tok_str(t, &cv)))
1011 return 0;
1013 return !(*a || *b);
1016 /* defines handling */
1017 ST_INLN void define_push(int v, int macro_type, int *str, Sym *first_arg)
1019 Sym *s;
1021 s = define_find(v);
1022 if (s && !macro_is_equal(s->d, str))
1023 tcc_warning("%s redefined", get_tok_str(v, NULL));
1025 s = sym_push2(&define_stack, v, macro_type, 0);
1026 s->d = str;
1027 s->next = first_arg;
1028 table_ident[v - TOK_IDENT]->sym_define = s;
1031 /* undefined a define symbol. Its name is just set to zero */
1032 ST_FUNC void define_undef(Sym *s)
1034 int v;
1035 v = s->v;
1036 if (v >= TOK_IDENT && v < tok_ident)
1037 table_ident[v - TOK_IDENT]->sym_define = NULL;
1038 s->v = 0;
1041 ST_INLN Sym *define_find(int v)
1043 v -= TOK_IDENT;
1044 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1045 return NULL;
1046 return table_ident[v]->sym_define;
1049 /* free define stack until top reaches 'b' */
1050 ST_FUNC void free_defines(Sym *b)
1052 Sym *top, *top1;
1053 int v;
1055 top = define_stack;
1056 while (top != b) {
1057 top1 = top->prev;
1058 /* do not free args or predefined defines */
1059 if (top->d)
1060 tok_str_free(top->d);
1061 v = top->v;
1062 if (v >= TOK_IDENT && v < tok_ident)
1063 table_ident[v - TOK_IDENT]->sym_define = NULL;
1064 sym_free(top);
1065 top = top1;
1067 define_stack = b;
1070 /* label lookup */
1071 ST_FUNC Sym *label_find(int v)
1073 v -= TOK_IDENT;
1074 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1075 return NULL;
1076 return table_ident[v]->sym_label;
1079 ST_FUNC Sym *label_push(Sym **ptop, int v, int flags)
1081 Sym *s, **ps;
1082 s = sym_push2(ptop, v, 0, 0);
1083 s->r = flags;
1084 ps = &table_ident[v - TOK_IDENT]->sym_label;
1085 if (ptop == &global_label_stack) {
1086 /* modify the top most local identifier, so that
1087 sym_identifier will point to 's' when popped */
1088 while (*ps != NULL)
1089 ps = &(*ps)->prev_tok;
1091 s->prev_tok = *ps;
1092 *ps = s;
1093 return s;
1096 /* pop labels until element last is reached. Look if any labels are
1097 undefined. Define symbols if '&&label' was used. */
1098 ST_FUNC void label_pop(Sym **ptop, Sym *slast)
1100 Sym *s, *s1;
1101 for(s = *ptop; s != slast; s = s1) {
1102 s1 = s->prev;
1103 if (s->r == LABEL_DECLARED) {
1104 tcc_warning("label '%s' declared but not used", get_tok_str(s->v, NULL));
1105 } else if (s->r == LABEL_FORWARD) {
1106 tcc_error("label '%s' used but not defined",
1107 get_tok_str(s->v, NULL));
1108 } else {
1109 if (s->c) {
1110 /* define corresponding symbol. A size of
1111 1 is put. */
1112 put_extern_sym(s, cur_text_section, s->jnext, 1);
1115 /* remove label */
1116 table_ident[s->v - TOK_IDENT]->sym_label = s->prev_tok;
1117 sym_free(s);
1119 *ptop = slast;
1122 /* eval an expression for #if/#elif */
1123 static int expr_preprocess(void)
1125 int c, t;
1126 TokenString str;
1128 tok_str_new(&str);
1129 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1130 next(); /* do macro subst */
1131 if (tok == TOK_DEFINED) {
1132 next_nomacro();
1133 t = tok;
1134 if (t == '(')
1135 next_nomacro();
1136 c = define_find(tok) != 0;
1137 if (t == '(')
1138 next_nomacro();
1139 tok = TOK_CINT;
1140 tokc.i = c;
1141 } else if (tok >= TOK_IDENT) {
1142 /* if undefined macro */
1143 tok = TOK_CINT;
1144 tokc.i = 0;
1146 tok_str_add_tok(&str);
1148 tok_str_add(&str, -1); /* simulate end of file */
1149 tok_str_add(&str, 0);
1150 /* now evaluate C constant expression */
1151 macro_ptr = str.str;
1152 next();
1153 c = expr_const();
1154 macro_ptr = NULL;
1155 tok_str_free(str.str);
1156 return c != 0;
1159 #if defined(PARSE_DEBUG) || defined(PP_DEBUG)
1160 static void tok_print(int *str)
1162 int t;
1163 CValue cval;
1164 memset(&cval, 0, sizeof(CValue));
1166 printf("<");
1167 while (1) {
1168 TOK_GET(&t, &str, &cval);
1169 if (!t)
1170 break;
1171 printf("%s", get_tok_str(t, &cval));
1173 printf(">\n");
1175 #endif
1177 /* parse after #define */
1178 ST_FUNC void parse_define(void)
1180 Sym *s, *first, **ps;
1181 int v, t, varg, is_vaargs, spc;
1182 TokenString str;
1184 v = tok;
1185 if (v < TOK_IDENT)
1186 tcc_error("invalid macro name '%s'", get_tok_str(tok, &tokc));
1187 /* XXX: should check if same macro (ANSI) */
1188 first = NULL;
1189 t = MACRO_OBJ;
1190 /* '(' must be just after macro definition for MACRO_FUNC */
1191 next_nomacro_spc();
1192 if (tok == '(') {
1193 next_nomacro();
1194 ps = &first;
1195 while (tok != ')') {
1196 varg = tok;
1197 next_nomacro();
1198 is_vaargs = 0;
1199 if (varg == TOK_DOTS) {
1200 varg = TOK___VA_ARGS__;
1201 is_vaargs = 1;
1202 } else if (tok == TOK_DOTS && gnu_ext) {
1203 is_vaargs = 1;
1204 next_nomacro();
1206 if (varg < TOK_IDENT)
1207 tcc_error("badly punctuated parameter list");
1208 s = sym_push2(&define_stack, varg | SYM_FIELD, is_vaargs, 0);
1209 *ps = s;
1210 ps = &s->next;
1211 if (tok != ',')
1212 break;
1213 next_nomacro();
1215 if (tok == ')')
1216 next_nomacro_spc();
1217 t = MACRO_FUNC;
1219 tok_str_new(&str);
1220 spc = 2;
1221 /* EOF testing necessary for '-D' handling */
1222 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1223 /* remove spaces around ## and after '#' */
1224 if (TOK_TWOSHARPS == tok) {
1225 if (1 == spc)
1226 --str.len;
1227 spc = 2;
1228 } else if ('#' == tok) {
1229 spc = 2;
1230 } else if (check_space(tok, &spc)) {
1231 goto skip;
1233 tok_str_add2(&str, tok, &tokc);
1234 skip:
1235 next_nomacro_spc();
1237 if (spc == 1)
1238 --str.len; /* remove trailing space */
1239 tok_str_add(&str, 0);
1240 #ifdef PP_DEBUG
1241 printf("define %s %d: ", get_tok_str(v, NULL), t);
1242 tok_print(str.str);
1243 #endif
1244 define_push(v, t, str.str, first);
1247 static inline int hash_cached_include(const char *filename)
1249 const unsigned char *s;
1250 unsigned int h;
1252 h = TOK_HASH_INIT;
1253 s = (unsigned char *) filename;
1254 while (*s) {
1255 h = TOK_HASH_FUNC(h, *s);
1256 s++;
1258 h &= (CACHED_INCLUDES_HASH_SIZE - 1);
1259 return h;
1262 static CachedInclude *search_cached_include(TCCState *s1, const char *filename)
1264 CachedInclude *e;
1265 int i, h;
1266 h = hash_cached_include(filename);
1267 i = s1->cached_includes_hash[h];
1268 for(;;) {
1269 if (i == 0)
1270 break;
1271 e = s1->cached_includes[i - 1];
1272 if (0 == PATHCMP(e->filename, filename))
1273 return e;
1274 i = e->hash_next;
1276 return NULL;
1279 static inline void add_cached_include(TCCState *s1, const char *filename, int ifndef_macro)
1281 CachedInclude *e;
1282 int h;
1284 if (search_cached_include(s1, filename))
1285 return;
1286 #ifdef INC_DEBUG
1287 printf("adding cached '%s' %s\n", filename, get_tok_str(ifndef_macro, NULL));
1288 #endif
1289 e = tcc_malloc(sizeof(CachedInclude) + strlen(filename));
1290 strcpy(e->filename, filename);
1291 e->ifndef_macro = ifndef_macro;
1292 dynarray_add((void ***)&s1->cached_includes, &s1->nb_cached_includes, e);
1293 /* add in hash table */
1294 h = hash_cached_include(filename);
1295 e->hash_next = s1->cached_includes_hash[h];
1296 s1->cached_includes_hash[h] = s1->nb_cached_includes;
1299 static void pragma_parse(TCCState *s1)
1301 int val;
1303 next();
1304 if (tok == TOK_pack) {
1306 This may be:
1307 #pragma pack(1) // set
1308 #pragma pack() // reset to default
1309 #pragma pack(push,1) // push & set
1310 #pragma pack(pop) // restore previous
1312 next();
1313 skip('(');
1314 if (tok == TOK_ASM_pop) {
1315 next();
1316 if (s1->pack_stack_ptr <= s1->pack_stack) {
1317 stk_error:
1318 tcc_error("out of pack stack");
1320 s1->pack_stack_ptr--;
1321 } else {
1322 val = 0;
1323 if (tok != ')') {
1324 if (tok == TOK_ASM_push) {
1325 next();
1326 if (s1->pack_stack_ptr >= s1->pack_stack + PACK_STACK_SIZE - 1)
1327 goto stk_error;
1328 s1->pack_stack_ptr++;
1329 skip(',');
1331 if (tok != TOK_CINT) {
1332 pack_error:
1333 tcc_error("invalid pack pragma");
1335 val = tokc.i;
1336 if (val < 1 || val > 16 || (val & (val - 1)) != 0)
1337 goto pack_error;
1338 next();
1340 *s1->pack_stack_ptr = val;
1341 skip(')');
1346 /* is_bof is true if first non space token at beginning of file */
1347 ST_FUNC void preprocess(int is_bof)
1349 TCCState *s1 = tcc_state;
1350 int i, c, n, saved_parse_flags;
1351 char buf[1024], *q;
1352 Sym *s;
1354 saved_parse_flags = parse_flags;
1355 parse_flags = PARSE_FLAG_PREPROCESS | PARSE_FLAG_TOK_NUM |
1356 PARSE_FLAG_LINEFEED;
1357 next_nomacro();
1358 redo:
1359 switch(tok) {
1360 case TOK_DEFINE:
1361 next_nomacro();
1362 parse_define();
1363 break;
1364 case TOK_UNDEF:
1365 next_nomacro();
1366 s = define_find(tok);
1367 /* undefine symbol by putting an invalid name */
1368 if (s)
1369 define_undef(s);
1370 break;
1371 case TOK_INCLUDE:
1372 case TOK_INCLUDE_NEXT:
1373 ch = file->buf_ptr[0];
1374 /* XXX: incorrect if comments : use next_nomacro with a special mode */
1375 skip_spaces();
1376 if (ch == '<') {
1377 c = '>';
1378 goto read_name;
1379 } else if (ch == '\"') {
1380 c = ch;
1381 read_name:
1382 inp();
1383 q = buf;
1384 while (ch != c && ch != '\n' && ch != CH_EOF) {
1385 if ((q - buf) < sizeof(buf) - 1)
1386 *q++ = ch;
1387 if (ch == '\\') {
1388 if (handle_stray_noerror() == 0)
1389 --q;
1390 } else
1391 inp();
1393 *q = '\0';
1394 minp();
1395 #if 0
1396 /* eat all spaces and comments after include */
1397 /* XXX: slightly incorrect */
1398 while (ch1 != '\n' && ch1 != CH_EOF)
1399 inp();
1400 #endif
1401 } else {
1402 /* computed #include : either we have only strings or
1403 we have anything enclosed in '<>' */
1404 next();
1405 buf[0] = '\0';
1406 if (tok == TOK_STR) {
1407 while (tok != TOK_LINEFEED) {
1408 if (tok != TOK_STR) {
1409 include_syntax:
1410 tcc_error("'#include' expects \"FILENAME\" or <FILENAME>");
1412 pstrcat(buf, sizeof(buf), (char *)tokc.cstr->data);
1413 next();
1415 c = '\"';
1416 } else {
1417 int len;
1418 while (tok != TOK_LINEFEED) {
1419 pstrcat(buf, sizeof(buf), get_tok_str(tok, &tokc));
1420 next();
1422 len = strlen(buf);
1423 /* check syntax and remove '<>' */
1424 if (len < 2 || buf[0] != '<' || buf[len - 1] != '>')
1425 goto include_syntax;
1426 memmove(buf, buf + 1, len - 2);
1427 buf[len - 2] = '\0';
1428 c = '>';
1432 if (s1->include_stack_ptr >= s1->include_stack + INCLUDE_STACK_SIZE)
1433 tcc_error("#include recursion too deep");
1434 /* store current file in stack, but increment stack later below */
1435 *s1->include_stack_ptr = file;
1437 n = s1->nb_include_paths + s1->nb_sysinclude_paths;
1438 for (i = -2; i < n; ++i) {
1439 char buf1[sizeof file->filename];
1440 CachedInclude *e;
1441 BufferedFile **f;
1442 const char *path;
1444 if (i == -2) {
1445 /* check absolute include path */
1446 if (!IS_ABSPATH(buf))
1447 continue;
1448 buf1[0] = 0;
1449 i = n; /* force end loop */
1451 } else if (i == -1) {
1452 /* search in current dir if "header.h" */
1453 if (c != '\"')
1454 continue;
1455 path = file->filename;
1456 pstrncpy(buf1, path, tcc_basename(path) - path);
1458 } else {
1459 /* search in all the include paths */
1460 if (i < s1->nb_include_paths)
1461 path = s1->include_paths[i];
1462 else
1463 path = s1->sysinclude_paths[i - s1->nb_include_paths];
1464 pstrcpy(buf1, sizeof(buf1), path);
1465 pstrcat(buf1, sizeof(buf1), "/");
1468 pstrcat(buf1, sizeof(buf1), buf);
1470 if (tok == TOK_INCLUDE_NEXT)
1471 for (f = s1->include_stack_ptr; f >= s1->include_stack; --f)
1472 if (0 == PATHCMP((*f)->filename, buf1)) {
1473 #ifdef INC_DEBUG
1474 printf("%s: #include_next skipping %s\n", file->filename, buf1);
1475 #endif
1476 goto include_trynext;
1479 e = search_cached_include(s1, buf1);
1480 if (e && define_find(e->ifndef_macro)) {
1481 /* no need to parse the include because the 'ifndef macro'
1482 is defined */
1483 #ifdef INC_DEBUG
1484 printf("%s: skipping cached %s\n", file->filename, buf1);
1485 #endif
1486 goto include_done;
1489 if (tcc_open(s1, buf1) < 0)
1490 include_trynext:
1491 continue;
1493 #ifdef INC_DEBUG
1494 printf("%s: including %s\n", file->prev->filename, file->filename);
1495 #endif
1496 /* update target deps */
1497 dynarray_add((void ***)&s1->target_deps, &s1->nb_target_deps,
1498 tcc_strdup(buf1));
1499 /* push current file in stack */
1500 ++s1->include_stack_ptr;
1501 /* add include file debug info */
1502 if (s1->do_debug)
1503 put_stabs(file->filename, N_BINCL, 0, 0, 0);
1504 tok_flags |= TOK_FLAG_BOF | TOK_FLAG_BOL;
1505 ch = file->buf_ptr[0];
1506 goto the_end;
1508 tcc_error("include file '%s' not found", buf);
1509 include_done:
1510 break;
1511 case TOK_IFNDEF:
1512 c = 1;
1513 goto do_ifdef;
1514 case TOK_IF:
1515 c = expr_preprocess();
1516 goto do_if;
1517 case TOK_IFDEF:
1518 c = 0;
1519 do_ifdef:
1520 next_nomacro();
1521 if (tok < TOK_IDENT)
1522 tcc_error("invalid argument for '#if%sdef'", c ? "n" : "");
1523 if (is_bof) {
1524 if (c) {
1525 #ifdef INC_DEBUG
1526 printf("#ifndef %s\n", get_tok_str(tok, NULL));
1527 #endif
1528 file->ifndef_macro = tok;
1531 c = (define_find(tok) != 0) ^ c;
1532 do_if:
1533 if (s1->ifdef_stack_ptr >= s1->ifdef_stack + IFDEF_STACK_SIZE)
1534 tcc_error("memory full (ifdef)");
1535 *s1->ifdef_stack_ptr++ = c;
1536 goto test_skip;
1537 case TOK_ELSE:
1538 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1539 tcc_error("#else without matching #if");
1540 if (s1->ifdef_stack_ptr[-1] & 2)
1541 tcc_error("#else after #else");
1542 c = (s1->ifdef_stack_ptr[-1] ^= 3);
1543 goto test_else;
1544 case TOK_ELIF:
1545 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1546 tcc_error("#elif without matching #if");
1547 c = s1->ifdef_stack_ptr[-1];
1548 if (c > 1)
1549 tcc_error("#elif after #else");
1550 /* last #if/#elif expression was true: we skip */
1551 if (c == 1)
1552 goto skip;
1553 c = expr_preprocess();
1554 s1->ifdef_stack_ptr[-1] = c;
1555 test_else:
1556 if (s1->ifdef_stack_ptr == file->ifdef_stack_ptr + 1)
1557 file->ifndef_macro = 0;
1558 test_skip:
1559 if (!(c & 1)) {
1560 skip:
1561 preprocess_skip();
1562 is_bof = 0;
1563 goto redo;
1565 break;
1566 case TOK_ENDIF:
1567 if (s1->ifdef_stack_ptr <= file->ifdef_stack_ptr)
1568 tcc_error("#endif without matching #if");
1569 s1->ifdef_stack_ptr--;
1570 /* '#ifndef macro' was at the start of file. Now we check if
1571 an '#endif' is exactly at the end of file */
1572 if (file->ifndef_macro &&
1573 s1->ifdef_stack_ptr == file->ifdef_stack_ptr) {
1574 file->ifndef_macro_saved = file->ifndef_macro;
1575 /* need to set to zero to avoid false matches if another
1576 #ifndef at middle of file */
1577 file->ifndef_macro = 0;
1578 while (tok != TOK_LINEFEED)
1579 next_nomacro();
1580 tok_flags |= TOK_FLAG_ENDIF;
1581 goto the_end;
1583 break;
1584 case TOK_LINE:
1585 next();
1586 if (tok != TOK_CINT)
1587 tcc_error("#line");
1588 file->line_num = tokc.i - 1; /* the line number will be incremented after */
1589 next();
1590 if (tok != TOK_LINEFEED) {
1591 if (tok != TOK_STR)
1592 tcc_error("#line");
1593 pstrcpy(file->filename, sizeof(file->filename),
1594 (char *)tokc.cstr->data);
1596 break;
1597 case TOK_ERROR:
1598 case TOK_WARNING:
1599 c = tok;
1600 ch = file->buf_ptr[0];
1601 skip_spaces();
1602 q = buf;
1603 while (ch != '\n' && ch != CH_EOF) {
1604 if ((q - buf) < sizeof(buf) - 1)
1605 *q++ = ch;
1606 if (ch == '\\') {
1607 if (handle_stray_noerror() == 0)
1608 --q;
1609 } else
1610 inp();
1612 *q = '\0';
1613 if (c == TOK_ERROR)
1614 tcc_error("#error %s", buf);
1615 else
1616 tcc_warning("#warning %s", buf);
1617 break;
1618 case TOK_PRAGMA:
1619 pragma_parse(s1);
1620 break;
1621 default:
1622 if (tok == TOK_LINEFEED || tok == '!' || tok == TOK_PPNUM) {
1623 /* '!' is ignored to allow C scripts. numbers are ignored
1624 to emulate cpp behaviour */
1625 } else {
1626 if (!(saved_parse_flags & PARSE_FLAG_ASM_COMMENTS))
1627 tcc_warning("Ignoring unknown preprocessing directive #%s", get_tok_str(tok, &tokc));
1628 else {
1629 /* this is a gas line comment in an 'S' file. */
1630 file->buf_ptr = parse_line_comment(file->buf_ptr);
1631 goto the_end;
1634 break;
1636 /* ignore other preprocess commands or #! for C scripts */
1637 while (tok != TOK_LINEFEED)
1638 next_nomacro();
1639 the_end:
1640 parse_flags = saved_parse_flags;
1643 /* evaluate escape codes in a string. */
1644 static void parse_escape_string(CString *outstr, const uint8_t *buf, int is_long)
1646 int c, n;
1647 const uint8_t *p;
1649 p = buf;
1650 for(;;) {
1651 c = *p;
1652 if (c == '\0')
1653 break;
1654 if (c == '\\') {
1655 p++;
1656 /* escape */
1657 c = *p;
1658 switch(c) {
1659 case '0': case '1': case '2': case '3':
1660 case '4': case '5': case '6': case '7':
1661 /* at most three octal digits */
1662 n = c - '0';
1663 p++;
1664 c = *p;
1665 if (isoct(c)) {
1666 n = n * 8 + c - '0';
1667 p++;
1668 c = *p;
1669 if (isoct(c)) {
1670 n = n * 8 + c - '0';
1671 p++;
1674 c = n;
1675 goto add_char_nonext;
1676 case 'x':
1677 case 'u':
1678 case 'U':
1679 p++;
1680 n = 0;
1681 for(;;) {
1682 c = *p;
1683 if (c >= 'a' && c <= 'f')
1684 c = c - 'a' + 10;
1685 else if (c >= 'A' && c <= 'F')
1686 c = c - 'A' + 10;
1687 else if (isnum(c))
1688 c = c - '0';
1689 else
1690 break;
1691 n = n * 16 + c;
1692 p++;
1694 c = n;
1695 goto add_char_nonext;
1696 case 'a':
1697 c = '\a';
1698 break;
1699 case 'b':
1700 c = '\b';
1701 break;
1702 case 'f':
1703 c = '\f';
1704 break;
1705 case 'n':
1706 c = '\n';
1707 break;
1708 case 'r':
1709 c = '\r';
1710 break;
1711 case 't':
1712 c = '\t';
1713 break;
1714 case 'v':
1715 c = '\v';
1716 break;
1717 case 'e':
1718 if (!gnu_ext)
1719 goto invalid_escape;
1720 c = 27;
1721 break;
1722 case '\'':
1723 case '\"':
1724 case '\\':
1725 case '?':
1726 break;
1727 default:
1728 invalid_escape:
1729 if (c >= '!' && c <= '~')
1730 tcc_warning("unknown escape sequence: \'\\%c\'", c);
1731 else
1732 tcc_warning("unknown escape sequence: \'\\x%x\'", c);
1733 break;
1736 p++;
1737 add_char_nonext:
1738 if (!is_long)
1739 cstr_ccat(outstr, c);
1740 else
1741 cstr_wccat(outstr, c);
1743 /* add a trailing '\0' */
1744 if (!is_long)
1745 cstr_ccat(outstr, '\0');
1746 else
1747 cstr_wccat(outstr, '\0');
1750 /* we use 64 bit numbers */
1751 #define BN_SIZE 2
1753 /* bn = (bn << shift) | or_val */
1754 static void bn_lshift(unsigned int *bn, int shift, int or_val)
1756 int i;
1757 unsigned int v;
1758 for(i=0;i<BN_SIZE;i++) {
1759 v = bn[i];
1760 bn[i] = (v << shift) | or_val;
1761 or_val = v >> (32 - shift);
1765 static void bn_zero(unsigned int *bn)
1767 int i;
1768 for(i=0;i<BN_SIZE;i++) {
1769 bn[i] = 0;
1773 /* parse number in null terminated string 'p' and return it in the
1774 current token */
1775 static void parse_number(const char *p)
1777 int b, t, shift, frac_bits, s, exp_val, ch;
1778 char *q;
1779 unsigned int bn[BN_SIZE];
1780 double d;
1782 /* number */
1783 q = token_buf;
1784 ch = *p++;
1785 t = ch;
1786 ch = *p++;
1787 *q++ = t;
1788 b = 10;
1789 if (t == '.') {
1790 goto float_frac_parse;
1791 } else if (t == '0') {
1792 if (ch == 'x' || ch == 'X') {
1793 q--;
1794 ch = *p++;
1795 b = 16;
1796 } else if (tcc_ext && (ch == 'b' || ch == 'B')) {
1797 q--;
1798 ch = *p++;
1799 b = 2;
1802 /* parse all digits. cannot check octal numbers at this stage
1803 because of floating point constants */
1804 while (1) {
1805 if (ch >= 'a' && ch <= 'f')
1806 t = ch - 'a' + 10;
1807 else if (ch >= 'A' && ch <= 'F')
1808 t = ch - 'A' + 10;
1809 else if (isnum(ch))
1810 t = ch - '0';
1811 else
1812 break;
1813 if (t >= b)
1814 break;
1815 if (q >= token_buf + STRING_MAX_SIZE) {
1816 num_too_long:
1817 tcc_error("number too long");
1819 *q++ = ch;
1820 ch = *p++;
1822 if (ch == '.' ||
1823 ((ch == 'e' || ch == 'E') && b == 10) ||
1824 ((ch == 'p' || ch == 'P') && (b == 16 || b == 2))) {
1825 if (b != 10) {
1826 /* NOTE: strtox should support that for hexa numbers, but
1827 non ISOC99 libcs do not support it, so we prefer to do
1828 it by hand */
1829 /* hexadecimal or binary floats */
1830 /* XXX: handle overflows */
1831 *q = '\0';
1832 if (b == 16)
1833 shift = 4;
1834 else
1835 shift = 2;
1836 bn_zero(bn);
1837 q = token_buf;
1838 while (1) {
1839 t = *q++;
1840 if (t == '\0') {
1841 break;
1842 } else if (t >= 'a') {
1843 t = t - 'a' + 10;
1844 } else if (t >= 'A') {
1845 t = t - 'A' + 10;
1846 } else {
1847 t = t - '0';
1849 bn_lshift(bn, shift, t);
1851 frac_bits = 0;
1852 if (ch == '.') {
1853 ch = *p++;
1854 while (1) {
1855 t = ch;
1856 if (t >= 'a' && t <= 'f') {
1857 t = t - 'a' + 10;
1858 } else if (t >= 'A' && t <= 'F') {
1859 t = t - 'A' + 10;
1860 } else if (t >= '0' && t <= '9') {
1861 t = t - '0';
1862 } else {
1863 break;
1865 if (t >= b)
1866 tcc_error("invalid digit");
1867 bn_lshift(bn, shift, t);
1868 frac_bits += shift;
1869 ch = *p++;
1872 if (ch != 'p' && ch != 'P')
1873 expect("exponent");
1874 ch = *p++;
1875 s = 1;
1876 exp_val = 0;
1877 if (ch == '+') {
1878 ch = *p++;
1879 } else if (ch == '-') {
1880 s = -1;
1881 ch = *p++;
1883 if (ch < '0' || ch > '9')
1884 expect("exponent digits");
1885 while (ch >= '0' && ch <= '9') {
1886 exp_val = exp_val * 10 + ch - '0';
1887 ch = *p++;
1889 exp_val = exp_val * s;
1891 /* now we can generate the number */
1892 /* XXX: should patch directly float number */
1893 d = (double)bn[1] * 4294967296.0 + (double)bn[0];
1894 d = ldexp(d, exp_val - frac_bits);
1895 t = toup(ch);
1896 if (t == 'F') {
1897 ch = *p++;
1898 tok = TOK_CFLOAT;
1899 /* float : should handle overflow */
1900 tokc.f = (float)d;
1901 } else if (t == 'L') {
1902 ch = *p++;
1903 #ifdef TCC_TARGET_PE
1904 tok = TOK_CDOUBLE;
1905 tokc.d = d;
1906 #else
1907 tok = TOK_CLDOUBLE;
1908 /* XXX: not large enough */
1909 tokc.ld = (long double)d;
1910 #endif
1911 } else {
1912 tok = TOK_CDOUBLE;
1913 tokc.d = d;
1915 } else {
1916 /* decimal floats */
1917 if (ch == '.') {
1918 if (q >= token_buf + STRING_MAX_SIZE)
1919 goto num_too_long;
1920 *q++ = ch;
1921 ch = *p++;
1922 float_frac_parse:
1923 while (ch >= '0' && ch <= '9') {
1924 if (q >= token_buf + STRING_MAX_SIZE)
1925 goto num_too_long;
1926 *q++ = ch;
1927 ch = *p++;
1930 if (ch == 'e' || ch == 'E') {
1931 if (q >= token_buf + STRING_MAX_SIZE)
1932 goto num_too_long;
1933 *q++ = ch;
1934 ch = *p++;
1935 if (ch == '-' || ch == '+') {
1936 if (q >= token_buf + STRING_MAX_SIZE)
1937 goto num_too_long;
1938 *q++ = ch;
1939 ch = *p++;
1941 if (ch < '0' || ch > '9')
1942 expect("exponent digits");
1943 while (ch >= '0' && ch <= '9') {
1944 if (q >= token_buf + STRING_MAX_SIZE)
1945 goto num_too_long;
1946 *q++ = ch;
1947 ch = *p++;
1950 *q = '\0';
1951 t = toup(ch);
1952 errno = 0;
1953 if (t == 'F') {
1954 ch = *p++;
1955 tok = TOK_CFLOAT;
1956 tokc.f = strtof(token_buf, NULL);
1957 } else if (t == 'L') {
1958 ch = *p++;
1959 #ifdef TCC_TARGET_PE
1960 tok = TOK_CDOUBLE;
1961 tokc.d = strtod(token_buf, NULL);
1962 #else
1963 tok = TOK_CLDOUBLE;
1964 tokc.ld = strtold(token_buf, NULL);
1965 #endif
1966 } else {
1967 tok = TOK_CDOUBLE;
1968 tokc.d = strtod(token_buf, NULL);
1971 } else {
1972 unsigned long long n, n1;
1973 int lcount, ucount;
1975 /* integer number */
1976 *q = '\0';
1977 q = token_buf;
1978 if (b == 10 && *q == '0') {
1979 b = 8;
1980 q++;
1982 n = 0;
1983 while(1) {
1984 t = *q++;
1985 /* no need for checks except for base 10 / 8 errors */
1986 if (t == '\0') {
1987 break;
1988 } else if (t >= 'a') {
1989 t = t - 'a' + 10;
1990 } else if (t >= 'A') {
1991 t = t - 'A' + 10;
1992 } else {
1993 t = t - '0';
1994 if (t >= b)
1995 tcc_error("invalid digit");
1997 n1 = n;
1998 n = n * b + t;
1999 /* detect overflow */
2000 /* XXX: this test is not reliable */
2001 if (n < n1)
2002 tcc_error("integer constant overflow");
2005 /* XXX: not exactly ANSI compliant */
2006 if ((n & 0xffffffff00000000LL) != 0) {
2007 if ((n >> 63) != 0)
2008 tok = TOK_CULLONG;
2009 else
2010 tok = TOK_CLLONG;
2011 } else if (n > 0x7fffffff) {
2012 tok = TOK_CUINT;
2013 } else {
2014 tok = TOK_CINT;
2016 lcount = 0;
2017 ucount = 0;
2018 for(;;) {
2019 t = toup(ch);
2020 if (t == 'L') {
2021 if (lcount >= 2)
2022 tcc_error("three 'l's in integer constant");
2023 lcount++;
2024 #if !defined TCC_TARGET_X86_64 || defined TCC_TARGET_PE
2025 if (lcount == 2) {
2026 #endif
2027 if (tok == TOK_CINT)
2028 tok = TOK_CLLONG;
2029 else if (tok == TOK_CUINT)
2030 tok = TOK_CULLONG;
2031 #if !defined TCC_TARGET_X86_64 || defined TCC_TARGET_PE
2033 #endif
2034 ch = *p++;
2035 } else if (t == 'U') {
2036 if (ucount >= 1)
2037 tcc_error("two 'u's in integer constant");
2038 ucount++;
2039 if (tok == TOK_CINT)
2040 tok = TOK_CUINT;
2041 else if (tok == TOK_CLLONG)
2042 tok = TOK_CULLONG;
2043 ch = *p++;
2044 } else {
2045 break;
2048 if (tok == TOK_CINT || tok == TOK_CUINT)
2049 tokc.ui = n;
2050 else
2051 tokc.ull = n;
2053 if (ch)
2054 tcc_error("invalid number\n");
2058 #define PARSE2(c1, tok1, c2, tok2) \
2059 case c1: \
2060 PEEKC(c, p); \
2061 if (c == c2) { \
2062 p++; \
2063 tok = tok2; \
2064 } else { \
2065 tok = tok1; \
2067 break;
2069 /* return next token without macro substitution */
2070 static inline void next_nomacro1(void)
2072 int t, c, is_long;
2073 TokenSym *ts;
2074 uint8_t *p, *p1;
2075 unsigned int h;
2077 p = file->buf_ptr;
2078 redo_no_start:
2079 c = *p;
2080 switch(c) {
2081 case ' ':
2082 case '\t':
2083 tok = c;
2084 p++;
2085 goto keep_tok_flags;
2086 case '\f':
2087 case '\v':
2088 case '\r':
2089 p++;
2090 goto redo_no_start;
2091 case '\\':
2092 /* first look if it is in fact an end of buffer */
2093 if (p >= file->buf_end) {
2094 file->buf_ptr = p;
2095 handle_eob();
2096 p = file->buf_ptr;
2097 if (p >= file->buf_end)
2098 goto parse_eof;
2099 else
2100 goto redo_no_start;
2101 } else {
2102 file->buf_ptr = p;
2103 ch = *p;
2104 handle_stray();
2105 p = file->buf_ptr;
2106 goto redo_no_start;
2108 parse_eof:
2110 TCCState *s1 = tcc_state;
2111 if ((parse_flags & PARSE_FLAG_LINEFEED)
2112 && !(tok_flags & TOK_FLAG_EOF)) {
2113 tok_flags |= TOK_FLAG_EOF;
2114 tok = TOK_LINEFEED;
2115 goto keep_tok_flags;
2116 } else if (!(parse_flags & PARSE_FLAG_PREPROCESS)) {
2117 tok = TOK_EOF;
2118 } else if (s1->ifdef_stack_ptr != file->ifdef_stack_ptr) {
2119 tcc_error("missing #endif");
2120 } else if (s1->include_stack_ptr == s1->include_stack) {
2121 /* no include left : end of file. */
2122 tok = TOK_EOF;
2123 } else {
2124 tok_flags &= ~TOK_FLAG_EOF;
2125 /* pop include file */
2127 /* test if previous '#endif' was after a #ifdef at
2128 start of file */
2129 if (tok_flags & TOK_FLAG_ENDIF) {
2130 #ifdef INC_DEBUG
2131 printf("#endif %s\n", get_tok_str(file->ifndef_macro_saved, NULL));
2132 #endif
2133 add_cached_include(s1, file->filename, file->ifndef_macro_saved);
2134 tok_flags &= ~TOK_FLAG_ENDIF;
2137 /* add end of include file debug info */
2138 if (tcc_state->do_debug) {
2139 put_stabd(N_EINCL, 0, 0);
2141 /* pop include stack */
2142 tcc_close();
2143 s1->include_stack_ptr--;
2144 p = file->buf_ptr;
2145 goto redo_no_start;
2148 break;
2150 case '\n':
2151 file->line_num++;
2152 tok_flags |= TOK_FLAG_BOL;
2153 p++;
2154 maybe_newline:
2155 if (0 == (parse_flags & PARSE_FLAG_LINEFEED))
2156 goto redo_no_start;
2157 tok = TOK_LINEFEED;
2158 goto keep_tok_flags;
2160 case '#':
2161 /* XXX: simplify */
2162 PEEKC(c, p);
2163 if ((tok_flags & TOK_FLAG_BOL) &&
2164 (parse_flags & PARSE_FLAG_PREPROCESS)) {
2165 file->buf_ptr = p;
2166 preprocess(tok_flags & TOK_FLAG_BOF);
2167 p = file->buf_ptr;
2168 goto maybe_newline;
2169 } else {
2170 if (c == '#') {
2171 p++;
2172 tok = TOK_TWOSHARPS;
2173 } else {
2174 if (parse_flags & PARSE_FLAG_ASM_COMMENTS) {
2175 p = parse_line_comment(p - 1);
2176 goto redo_no_start;
2177 } else {
2178 tok = '#';
2182 break;
2184 case 'a': case 'b': case 'c': case 'd':
2185 case 'e': case 'f': case 'g': case 'h':
2186 case 'i': case 'j': case 'k': case 'l':
2187 case 'm': case 'n': case 'o': case 'p':
2188 case 'q': case 'r': case 's': case 't':
2189 case 'u': case 'v': case 'w': case 'x':
2190 case 'y': case 'z':
2191 case 'A': case 'B': case 'C': case 'D':
2192 case 'E': case 'F': case 'G': case 'H':
2193 case 'I': case 'J': case 'K':
2194 case 'M': case 'N': case 'O': case 'P':
2195 case 'Q': case 'R': case 'S': case 'T':
2196 case 'U': case 'V': case 'W': case 'X':
2197 case 'Y': case 'Z':
2198 case '_':
2199 parse_ident_fast:
2200 p1 = p;
2201 h = TOK_HASH_INIT;
2202 h = TOK_HASH_FUNC(h, c);
2203 p++;
2204 for(;;) {
2205 c = *p;
2206 if (!isidnum_table[c-CH_EOF])
2207 break;
2208 h = TOK_HASH_FUNC(h, c);
2209 p++;
2211 if (c != '\\') {
2212 TokenSym **pts;
2213 int len;
2215 /* fast case : no stray found, so we have the full token
2216 and we have already hashed it */
2217 len = p - p1;
2218 h &= (TOK_HASH_SIZE - 1);
2219 pts = &hash_ident[h];
2220 for(;;) {
2221 ts = *pts;
2222 if (!ts)
2223 break;
2224 if (ts->len == len && !memcmp(ts->str, p1, len))
2225 goto token_found;
2226 pts = &(ts->hash_next);
2228 ts = tok_alloc_new(pts, (char *) p1, len);
2229 token_found: ;
2230 } else {
2231 /* slower case */
2232 cstr_reset(&tokcstr);
2234 while (p1 < p) {
2235 cstr_ccat(&tokcstr, *p1);
2236 p1++;
2238 p--;
2239 PEEKC(c, p);
2240 parse_ident_slow:
2241 while (isidnum_table[c-CH_EOF]) {
2242 cstr_ccat(&tokcstr, c);
2243 PEEKC(c, p);
2245 ts = tok_alloc(tokcstr.data, tokcstr.size);
2247 tok = ts->tok;
2248 break;
2249 case 'L':
2250 t = p[1];
2251 if (t != '\\' && t != '\'' && t != '\"') {
2252 /* fast case */
2253 goto parse_ident_fast;
2254 } else {
2255 PEEKC(c, p);
2256 if (c == '\'' || c == '\"') {
2257 is_long = 1;
2258 goto str_const;
2259 } else {
2260 cstr_reset(&tokcstr);
2261 cstr_ccat(&tokcstr, 'L');
2262 goto parse_ident_slow;
2265 break;
2266 case '0': case '1': case '2': case '3':
2267 case '4': case '5': case '6': case '7':
2268 case '8': case '9':
2270 cstr_reset(&tokcstr);
2271 /* after the first digit, accept digits, alpha, '.' or sign if
2272 prefixed by 'eEpP' */
2273 parse_num:
2274 for(;;) {
2275 t = c;
2276 cstr_ccat(&tokcstr, c);
2277 PEEKC(c, p);
2278 if (!(isnum(c) || isid(c) || c == '.' ||
2279 ((c == '+' || c == '-') &&
2280 (t == 'e' || t == 'E' || t == 'p' || t == 'P'))))
2281 break;
2283 /* We add a trailing '\0' to ease parsing */
2284 cstr_ccat(&tokcstr, '\0');
2285 tokc.cstr = &tokcstr;
2286 tok = TOK_PPNUM;
2287 break;
2288 case '.':
2289 /* special dot handling because it can also start a number */
2290 PEEKC(c, p);
2291 if (isnum(c)) {
2292 cstr_reset(&tokcstr);
2293 cstr_ccat(&tokcstr, '.');
2294 goto parse_num;
2295 } else if (c == '.') {
2296 PEEKC(c, p);
2297 if (c != '.')
2298 expect("'.'");
2299 PEEKC(c, p);
2300 tok = TOK_DOTS;
2301 } else {
2302 tok = '.';
2304 break;
2305 case '\'':
2306 case '\"':
2307 is_long = 0;
2308 str_const:
2310 CString str;
2311 int sep;
2313 sep = c;
2315 /* parse the string */
2316 cstr_new(&str);
2317 p = parse_pp_string(p, sep, &str);
2318 cstr_ccat(&str, '\0');
2320 /* eval the escape (should be done as TOK_PPNUM) */
2321 cstr_reset(&tokcstr);
2322 parse_escape_string(&tokcstr, str.data, is_long);
2323 cstr_free(&str);
2325 if (sep == '\'') {
2326 int char_size;
2327 /* XXX: make it portable */
2328 if (!is_long)
2329 char_size = 1;
2330 else
2331 char_size = sizeof(nwchar_t);
2332 if (tokcstr.size <= char_size)
2333 tcc_error("empty character constant");
2334 if (tokcstr.size > 2 * char_size)
2335 tcc_warning("multi-character character constant");
2336 if (!is_long) {
2337 tokc.i = *(int8_t *)tokcstr.data;
2338 tok = TOK_CCHAR;
2339 } else {
2340 tokc.i = *(nwchar_t *)tokcstr.data;
2341 tok = TOK_LCHAR;
2343 } else {
2344 tokc.cstr = &tokcstr;
2345 if (!is_long)
2346 tok = TOK_STR;
2347 else
2348 tok = TOK_LSTR;
2351 break;
2353 case '<':
2354 PEEKC(c, p);
2355 if (c == '=') {
2356 p++;
2357 tok = TOK_LE;
2358 } else if (c == '<') {
2359 PEEKC(c, p);
2360 if (c == '=') {
2361 p++;
2362 tok = TOK_A_SHL;
2363 } else {
2364 tok = TOK_SHL;
2366 } else {
2367 tok = TOK_LT;
2369 break;
2371 case '>':
2372 PEEKC(c, p);
2373 if (c == '=') {
2374 p++;
2375 tok = TOK_GE;
2376 } else if (c == '>') {
2377 PEEKC(c, p);
2378 if (c == '=') {
2379 p++;
2380 tok = TOK_A_SAR;
2381 } else {
2382 tok = TOK_SAR;
2384 } else {
2385 tok = TOK_GT;
2387 break;
2389 case '&':
2390 PEEKC(c, p);
2391 if (c == '&') {
2392 p++;
2393 tok = TOK_LAND;
2394 } else if (c == '=') {
2395 p++;
2396 tok = TOK_A_AND;
2397 } else {
2398 tok = '&';
2400 break;
2402 case '|':
2403 PEEKC(c, p);
2404 if (c == '|') {
2405 p++;
2406 tok = TOK_LOR;
2407 } else if (c == '=') {
2408 p++;
2409 tok = TOK_A_OR;
2410 } else {
2411 tok = '|';
2413 break;
2415 case '+':
2416 PEEKC(c, p);
2417 if (c == '+') {
2418 p++;
2419 tok = TOK_INC;
2420 } else if (c == '=') {
2421 p++;
2422 tok = TOK_A_ADD;
2423 } else {
2424 tok = '+';
2426 break;
2428 case '-':
2429 PEEKC(c, p);
2430 if (c == '-') {
2431 p++;
2432 tok = TOK_DEC;
2433 } else if (c == '=') {
2434 p++;
2435 tok = TOK_A_SUB;
2436 } else if (c == '>') {
2437 p++;
2438 tok = TOK_ARROW;
2439 } else {
2440 tok = '-';
2442 break;
2444 PARSE2('!', '!', '=', TOK_NE)
2445 PARSE2('=', '=', '=', TOK_EQ)
2446 PARSE2('*', '*', '=', TOK_A_MUL)
2447 PARSE2('%', '%', '=', TOK_A_MOD)
2448 PARSE2('^', '^', '=', TOK_A_XOR)
2450 /* comments or operator */
2451 case '/':
2452 PEEKC(c, p);
2453 if (c == '*') {
2454 p = parse_comment(p);
2455 /* comments replaced by a blank */
2456 tok = ' ';
2457 goto keep_tok_flags;
2458 } else if (c == '/') {
2459 p = parse_line_comment(p);
2460 tok = ' ';
2461 goto keep_tok_flags;
2462 } else if (c == '=') {
2463 p++;
2464 tok = TOK_A_DIV;
2465 } else {
2466 tok = '/';
2468 break;
2470 /* simple tokens */
2471 case '(':
2472 case ')':
2473 case '[':
2474 case ']':
2475 case '{':
2476 case '}':
2477 case ',':
2478 case ';':
2479 case ':':
2480 case '?':
2481 case '~':
2482 case '$': /* only used in assembler */
2483 case '@': /* dito */
2484 tok = c;
2485 p++;
2486 break;
2487 default:
2488 tcc_error("unrecognized character \\x%02x", c);
2489 break;
2491 tok_flags = 0;
2492 keep_tok_flags:
2493 file->buf_ptr = p;
2494 #if defined(PARSE_DEBUG)
2495 printf("token = %s\n", get_tok_str(tok, &tokc));
2496 #endif
2499 /* return next token without macro substitution. Can read input from
2500 macro_ptr buffer */
2501 static void next_nomacro_spc(void)
2503 if (macro_ptr) {
2504 redo:
2505 tok = *macro_ptr;
2506 if (tok) {
2507 TOK_GET(&tok, &macro_ptr, &tokc);
2508 if (tok == TOK_LINENUM) {
2509 file->line_num = tokc.i;
2510 goto redo;
2513 } else {
2514 next_nomacro1();
2518 ST_FUNC void next_nomacro(void)
2520 do {
2521 next_nomacro_spc();
2522 } while (is_space(tok));
2525 /* substitute args in macro_str and return allocated string */
2526 static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args)
2528 int last_tok, t, spc;
2529 const int *st;
2530 Sym *s;
2531 TokenString str;
2532 CString cstr;
2533 CValue cval;
2534 memset(&cval, 0, sizeof(CValue));
2536 tok_str_new(&str);
2537 last_tok = 0;
2538 while(1) {
2539 TOK_GET(&t, &macro_str, &cval);
2540 if (!t)
2541 break;
2542 if (t == '#') {
2543 /* stringize */
2544 TOK_GET(&t, &macro_str, &cval);
2545 if (!t)
2546 break;
2547 s = sym_find2(args, t);
2548 if (s) {
2549 cstr_new(&cstr);
2550 st = s->d;
2551 spc = 0;
2552 while (*st) {
2553 TOK_GET(&t, &st, &cval);
2554 if (!check_space(t, &spc))
2555 cstr_cat(&cstr, get_tok_str(t, &cval));
2557 cstr.size -= spc;
2558 cstr_ccat(&cstr, '\0');
2559 #ifdef PP_DEBUG
2560 printf("stringize: %s\n", (char *)cstr.data);
2561 #endif
2562 /* add string */
2563 cval.cstr = &cstr;
2564 tok_str_add2(&str, TOK_STR, &cval);
2565 cstr_free(&cstr);
2566 } else {
2567 tok_str_add2(&str, t, &cval);
2569 } else if (t >= TOK_IDENT) {
2570 s = sym_find2(args, t);
2571 if (s) {
2572 st = s->d;
2573 /* if '##' is present before or after, no arg substitution */
2574 if (*macro_str == TOK_TWOSHARPS || last_tok == TOK_TWOSHARPS) {
2575 /* special case for var arg macros : ## eats the
2576 ',' if empty VA_ARGS variable. */
2577 /* XXX: test of the ',' is not 100%
2578 reliable. should fix it to avoid security
2579 problems */
2580 if (gnu_ext && s->type.t &&
2581 last_tok == TOK_TWOSHARPS &&
2582 str.len >= 2 && str.str[str.len - 2] == ',') {
2583 if (*st == 0) {
2584 /* suppress ',' '##' */
2585 str.len -= 2;
2586 } else {
2587 /* suppress '##' and add variable */
2588 str.len--;
2589 goto add_var;
2591 } else {
2592 int t1;
2593 add_var:
2594 for(;;) {
2595 TOK_GET(&t1, &st, &cval);
2596 if (!t1)
2597 break;
2598 tok_str_add2(&str, t1, &cval);
2601 } else {
2602 /* NOTE: the stream cannot be read when macro
2603 substituing an argument */
2604 macro_subst(&str, nested_list, st, NULL);
2606 } else {
2607 tok_str_add(&str, t);
2609 } else {
2610 tok_str_add2(&str, t, &cval);
2612 last_tok = t;
2614 tok_str_add(&str, 0);
2615 return str.str;
2618 static char const ab_month_name[12][4] =
2620 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
2621 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
2624 /* do macro substitution of current token with macro 's' and add
2625 result to (tok_str,tok_len). 'nested_list' is the list of all
2626 macros we got inside to avoid recursing. Return non zero if no
2627 substitution needs to be done */
2628 static int macro_subst_tok(TokenString *tok_str,
2629 Sym **nested_list, Sym *s, struct macro_level **can_read_stream)
2631 Sym *args, *sa, *sa1;
2632 int mstr_allocated, parlevel, *mstr, t, t1, spc;
2633 const int *p;
2634 TokenString str;
2635 char *cstrval;
2636 CString cstr;
2637 char buf[32];
2638 CValue cval;
2639 memset(&cval, 0, sizeof(CValue));
2641 /* if symbol is a macro, prepare substitution */
2642 /* special macros */
2643 if (tok == TOK___LINE__) {
2644 snprintf(buf, sizeof(buf), "%d", file->line_num);
2645 cstrval = buf;
2646 t1 = TOK_PPNUM;
2647 goto add_cstr1;
2648 } else if (tok == TOK___FILE__) {
2649 cstrval = file->filename;
2650 goto add_cstr;
2651 } else if (tok == TOK___DATE__ || tok == TOK___TIME__) {
2652 time_t ti;
2653 struct tm *tm;
2655 time(&ti);
2656 tm = localtime(&ti);
2657 if (tok == TOK___DATE__) {
2658 snprintf(buf, sizeof(buf), "%s %2d %d",
2659 ab_month_name[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900);
2660 } else {
2661 snprintf(buf, sizeof(buf), "%02d:%02d:%02d",
2662 tm->tm_hour, tm->tm_min, tm->tm_sec);
2664 cstrval = buf;
2665 add_cstr:
2666 t1 = TOK_STR;
2667 add_cstr1:
2668 cstr_new(&cstr);
2669 cstr_cat(&cstr, cstrval);
2670 cstr_ccat(&cstr, '\0');
2671 cval.cstr = &cstr;
2672 tok_str_add2(tok_str, t1, &cval);
2673 cstr_free(&cstr);
2674 } else {
2675 mstr = s->d;
2676 mstr_allocated = 0;
2677 if (s->type.t == MACRO_FUNC) {
2678 /* NOTE: we do not use next_nomacro to avoid eating the
2679 next token. XXX: find better solution */
2680 redo:
2681 if (macro_ptr) {
2682 p = macro_ptr;
2683 while (is_space(t = *p) || TOK_LINEFEED == t)
2684 ++p;
2685 if (t == 0 && can_read_stream) {
2686 /* end of macro stream: we must look at the token
2687 after in the file */
2688 struct macro_level *ml = *can_read_stream;
2689 macro_ptr = NULL;
2690 if (ml)
2692 macro_ptr = ml->p;
2693 ml->p = NULL;
2694 *can_read_stream = ml -> prev;
2696 /* also, end of scope for nested defined symbol */
2697 (*nested_list)->v = -1;
2698 goto redo;
2700 } else {
2701 ch = file->buf_ptr[0];
2702 while (is_space(ch) || ch == '\n' || ch == '/')
2704 if (ch == '/')
2706 int c;
2707 uint8_t *p = file->buf_ptr;
2708 PEEKC(c, p);
2709 if (c == '*') {
2710 p = parse_comment(p);
2711 file->buf_ptr = p - 1;
2712 } else if (c == '/') {
2713 p = parse_line_comment(p);
2714 file->buf_ptr = p - 1;
2715 } else
2716 break;
2718 cinp();
2720 t = ch;
2722 if (t != '(') /* no macro subst */
2723 return -1;
2725 /* argument macro */
2726 next_nomacro();
2727 next_nomacro();
2728 args = NULL;
2729 sa = s->next;
2730 /* NOTE: empty args are allowed, except if no args */
2731 for(;;) {
2732 /* handle '()' case */
2733 if (!args && !sa && tok == ')')
2734 break;
2735 if (!sa)
2736 tcc_error("macro '%s' used with too many args",
2737 get_tok_str(s->v, 0));
2738 tok_str_new(&str);
2739 parlevel = spc = 0;
2740 /* NOTE: non zero sa->t indicates VA_ARGS */
2741 while ((parlevel > 0 ||
2742 (tok != ')' &&
2743 (tok != ',' || sa->type.t))) &&
2744 tok != -1) {
2745 if (tok == '(')
2746 parlevel++;
2747 else if (tok == ')')
2748 parlevel--;
2749 if (tok == TOK_LINEFEED)
2750 tok = ' ';
2751 if (!check_space(tok, &spc))
2752 tok_str_add2(&str, tok, &tokc);
2753 next_nomacro_spc();
2755 str.len -= spc;
2756 tok_str_add(&str, 0);
2757 sa1 = sym_push2(&args, sa->v & ~SYM_FIELD, sa->type.t, 0);
2758 sa1->d = str.str;
2759 sa = sa->next;
2760 if (tok == ')') {
2761 /* special case for gcc var args: add an empty
2762 var arg argument if it is omitted */
2763 if (sa && sa->type.t && gnu_ext)
2764 continue;
2765 else
2766 break;
2768 if (tok != ',')
2769 expect(",");
2770 next_nomacro();
2772 if (sa) {
2773 tcc_error("macro '%s' used with too few args",
2774 get_tok_str(s->v, 0));
2777 /* now subst each arg */
2778 mstr = macro_arg_subst(nested_list, mstr, args);
2779 /* free memory */
2780 sa = args;
2781 while (sa) {
2782 sa1 = sa->prev;
2783 tok_str_free(sa->d);
2784 sym_free(sa);
2785 sa = sa1;
2787 mstr_allocated = 1;
2789 sym_push2(nested_list, s->v, 0, 0);
2790 macro_subst(tok_str, nested_list, mstr, can_read_stream);
2791 /* pop nested defined symbol */
2792 sa1 = *nested_list;
2793 *nested_list = sa1->prev;
2794 sym_free(sa1);
2795 if (mstr_allocated)
2796 tok_str_free(mstr);
2798 return 0;
2801 /* handle the '##' operator. Return NULL if no '##' seen. Otherwise
2802 return the resulting string (which must be freed). */
2803 static inline int *macro_twosharps(const int *macro_str)
2805 const int *ptr;
2806 int t;
2807 TokenString macro_str1;
2808 CString cstr;
2809 int n, start_of_nosubsts;
2811 /* we search the first '##' */
2812 for(ptr = macro_str;;) {
2813 CValue cval;
2814 memset(&cval, 0, sizeof(CValue));
2815 TOK_GET(&t, &ptr, &cval);
2816 if (t == TOK_TWOSHARPS)
2817 break;
2818 /* nothing more to do if end of string */
2819 if (t == 0)
2820 return NULL;
2823 /* we saw '##', so we need more processing to handle it */
2824 start_of_nosubsts = -1;
2825 tok_str_new(&macro_str1);
2826 for(ptr = macro_str;;) {
2827 TOK_GET(&tok, &ptr, &tokc);
2828 if (tok == 0)
2829 break;
2830 if (tok == TOK_TWOSHARPS)
2831 continue;
2832 if (tok == TOK_NOSUBST && start_of_nosubsts < 0)
2833 start_of_nosubsts = macro_str1.len;
2834 while (*ptr == TOK_TWOSHARPS) {
2835 /* given 'a##b', remove nosubsts preceding 'a' */
2836 if (start_of_nosubsts >= 0)
2837 macro_str1.len = start_of_nosubsts;
2838 /* given 'a##b', skip '##' */
2839 t = *++ptr;
2840 /* given 'a##b', remove nosubsts preceding 'b' */
2841 while (t == TOK_NOSUBST)
2842 t = *++ptr;
2843 if (t && t != TOK_TWOSHARPS) {
2844 CValue cval;
2845 memset(&cval, 0, sizeof(CValue));
2846 TOK_GET(&t, &ptr, &cval);
2847 /* We concatenate the two tokens */
2848 cstr_new(&cstr);
2849 cstr_cat(&cstr, get_tok_str(tok, &tokc));
2850 n = cstr.size;
2851 cstr_cat(&cstr, get_tok_str(t, &cval));
2852 cstr_ccat(&cstr, '\0');
2854 tcc_open_bf(tcc_state, ":paste:", cstr.size);
2855 memcpy(file->buffer, cstr.data, cstr.size);
2856 for (;;) {
2857 next_nomacro1();
2858 if (0 == *file->buf_ptr)
2859 break;
2860 tok_str_add2(&macro_str1, tok, &tokc);
2861 tcc_warning("pasting \"%.*s\" and \"%s\" does not give a valid preprocessing token",
2862 n, cstr.data, (char*)cstr.data + n);
2864 tcc_close();
2865 cstr_free(&cstr);
2868 if (tok != TOK_NOSUBST)
2869 start_of_nosubsts = -1;
2870 tok_str_add2(&macro_str1, tok, &tokc);
2872 tok_str_add(&macro_str1, 0);
2873 return macro_str1.str;
2877 /* do macro substitution of macro_str and add result to
2878 (tok_str,tok_len). 'nested_list' is the list of all macros we got
2879 inside to avoid recursing. */
2880 static void macro_subst(TokenString *tok_str, Sym **nested_list,
2881 const int *macro_str, struct macro_level ** can_read_stream)
2883 Sym *s;
2884 int *macro_str1;
2885 const int *ptr;
2886 int t, ret, spc;
2887 struct macro_level ml;
2888 int force_blank;
2889 CValue cval;
2890 memset(&cval, 0, sizeof(CValue));
2892 /* first scan for '##' operator handling */
2893 ptr = macro_str;
2894 macro_str1 = macro_twosharps(ptr);
2896 if (macro_str1)
2897 ptr = macro_str1;
2898 spc = 0;
2899 force_blank = 0;
2901 while (1) {
2902 /* NOTE: ptr == NULL can only happen if tokens are read from
2903 file stream due to a macro function call */
2904 if (ptr == NULL)
2905 break;
2906 TOK_GET(&t, &ptr, &cval);
2907 if (t == 0)
2908 break;
2909 if (t == TOK_NOSUBST) {
2910 /* following token has already been subst'd. just copy it on */
2911 tok_str_add2(tok_str, TOK_NOSUBST, NULL);
2912 TOK_GET(&t, &ptr, &cval);
2913 goto no_subst;
2915 s = define_find(t);
2916 if (s != NULL) {
2917 /* if nested substitution, do nothing */
2918 if (sym_find2(*nested_list, t)) {
2919 /* and mark it as TOK_NOSUBST, so it doesn't get subst'd again */
2920 tok_str_add2(tok_str, TOK_NOSUBST, NULL);
2921 goto no_subst;
2923 ml.p = macro_ptr;
2924 if (can_read_stream)
2925 ml.prev = *can_read_stream, *can_read_stream = &ml;
2926 macro_ptr = (int *)ptr;
2927 tok = t;
2928 ret = macro_subst_tok(tok_str, nested_list, s, can_read_stream);
2929 ptr = (int *)macro_ptr;
2930 macro_ptr = ml.p;
2931 if (can_read_stream && *can_read_stream == &ml)
2932 *can_read_stream = ml.prev;
2933 if (ret != 0)
2934 goto no_subst;
2935 if (parse_flags & PARSE_FLAG_SPACES)
2936 force_blank = 1;
2937 } else {
2938 no_subst:
2939 if (force_blank) {
2940 tok_str_add(tok_str, ' ');
2941 spc = 1;
2942 force_blank = 0;
2944 if (!check_space(t, &spc))
2945 tok_str_add2(tok_str, t, &cval);
2948 if (macro_str1)
2949 tok_str_free(macro_str1);
2952 /* return next token with macro substitution */
2953 ST_FUNC void next(void)
2955 Sym *nested_list, *s;
2956 TokenString str;
2957 struct macro_level *ml;
2959 redo:
2960 if (parse_flags & PARSE_FLAG_SPACES)
2961 next_nomacro_spc();
2962 else
2963 next_nomacro();
2964 if (!macro_ptr) {
2965 /* if not reading from macro substituted string, then try
2966 to substitute macros */
2967 if (tok >= TOK_IDENT &&
2968 (parse_flags & PARSE_FLAG_PREPROCESS)) {
2969 s = define_find(tok);
2970 if (s) {
2971 /* we have a macro: we try to substitute */
2972 tok_str_new(&str);
2973 nested_list = NULL;
2974 ml = NULL;
2975 if (macro_subst_tok(&str, &nested_list, s, &ml) == 0) {
2976 /* substitution done, NOTE: maybe empty */
2977 tok_str_add(&str, 0);
2978 macro_ptr = str.str;
2979 macro_ptr_allocated = str.str;
2980 goto redo;
2984 } else {
2985 if (tok == 0) {
2986 /* end of macro or end of unget buffer */
2987 if (unget_buffer_enabled) {
2988 macro_ptr = unget_saved_macro_ptr;
2989 unget_buffer_enabled = 0;
2990 } else {
2991 /* end of macro string: free it */
2992 tok_str_free(macro_ptr_allocated);
2993 macro_ptr_allocated = NULL;
2994 macro_ptr = NULL;
2996 goto redo;
2997 } else if (tok == TOK_NOSUBST) {
2998 /* discard preprocessor's nosubst markers */
2999 goto redo;
3003 /* convert preprocessor tokens into C tokens */
3004 if (tok == TOK_PPNUM &&
3005 (parse_flags & PARSE_FLAG_TOK_NUM)) {
3006 parse_number((char *)tokc.cstr->data);
3010 /* push back current token and set current token to 'last_tok'. Only
3011 identifier case handled for labels. */
3012 ST_INLN void unget_tok(int last_tok)
3014 int i, n;
3015 int *q;
3016 if (unget_buffer_enabled)
3018 /* assert(macro_ptr == unget_saved_buffer + 1);
3019 assert(*macro_ptr == 0); */
3021 else
3023 unget_saved_macro_ptr = macro_ptr;
3024 unget_buffer_enabled = 1;
3026 q = unget_saved_buffer;
3027 macro_ptr = q;
3028 *q++ = tok;
3029 n = tok_ext_size(tok) - 1;
3030 for(i=0;i<n;i++)
3031 *q++ = tokc.tab[i];
3032 *q = 0; /* end of token string */
3033 tok = last_tok;
3037 /* better than nothing, but needs extension to handle '-E' option
3038 correctly too */
3039 ST_FUNC void preprocess_init(TCCState *s1)
3041 s1->include_stack_ptr = s1->include_stack;
3042 /* XXX: move that before to avoid having to initialize
3043 file->ifdef_stack_ptr ? */
3044 s1->ifdef_stack_ptr = s1->ifdef_stack;
3045 file->ifdef_stack_ptr = s1->ifdef_stack_ptr;
3047 vtop = vstack - 1;
3048 s1->pack_stack[0] = 0;
3049 s1->pack_stack_ptr = s1->pack_stack;
3052 ST_FUNC void preprocess_new(void)
3054 int i, c;
3055 const char *p, *r;
3057 /* init isid table */
3058 for(i=CH_EOF;i<256;i++)
3059 isidnum_table[i-CH_EOF] = isid(i) || isnum(i);
3061 /* add all tokens */
3062 table_ident = NULL;
3063 memset(hash_ident, 0, TOK_HASH_SIZE * sizeof(TokenSym *));
3065 tok_ident = TOK_IDENT;
3066 p = tcc_keywords;
3067 while (*p) {
3068 r = p;
3069 for(;;) {
3070 c = *r++;
3071 if (c == '\0')
3072 break;
3074 tok_alloc(p, r - p - 1);
3075 p = r;
3079 /* Preprocess the current file */
3080 ST_FUNC int tcc_preprocess(TCCState *s1)
3082 Sym *define_start;
3084 BufferedFile *file_ref, **iptr, **iptr_new;
3085 int token_seen, line_ref, d;
3086 const char *s;
3088 preprocess_init(s1);
3089 define_start = define_stack;
3090 ch = file->buf_ptr[0];
3091 tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF;
3092 parse_flags = PARSE_FLAG_ASM_COMMENTS | PARSE_FLAG_PREPROCESS |
3093 PARSE_FLAG_LINEFEED | PARSE_FLAG_SPACES;
3094 token_seen = 0;
3095 line_ref = 0;
3096 file_ref = NULL;
3097 iptr = s1->include_stack_ptr;
3099 for (;;) {
3100 next();
3101 if (tok == TOK_EOF) {
3102 break;
3103 } else if (file != file_ref) {
3104 goto print_line;
3105 } else if (tok == TOK_LINEFEED) {
3106 if (!token_seen)
3107 continue;
3108 ++line_ref;
3109 token_seen = 0;
3110 } else if (!token_seen) {
3111 d = file->line_num - line_ref;
3112 if (file != file_ref || d < 0 || d >= 8) {
3113 print_line:
3114 iptr_new = s1->include_stack_ptr;
3115 s = iptr_new > iptr ? " 1"
3116 : iptr_new < iptr ? " 2"
3117 : iptr_new > s1->include_stack ? " 3"
3118 : ""
3120 iptr = iptr_new;
3121 fprintf(s1->ppfp, "# %d \"%s\"%s\n", file->line_num, file->filename, s);
3122 } else {
3123 while (d)
3124 fputs("\n", s1->ppfp), --d;
3126 line_ref = (file_ref = file)->line_num;
3127 token_seen = tok != TOK_LINEFEED;
3128 if (!token_seen)
3129 continue;
3131 fputs(get_tok_str(tok, &tokc), s1->ppfp);
3133 free_defines(define_start);
3134 return 0;