Fix x86-64 vla
[tinycc.git] / tccpp.c
blob538f671be3ce8cf5413eeff2924c722177a0eb73
1 /*
2 * TCC - Tiny C Compiler
3 *
4 * Copyright (c) 2001-2004 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "tcc.h"
23 /********************************************************/
24 /* global variables */
26 ST_DATA int tok_flags;
27 /* additional informations about token */
28 #define TOK_FLAG_BOL 0x0001 /* beginning of line before */
29 #define TOK_FLAG_BOF 0x0002 /* beginning of file before */
30 #define TOK_FLAG_ENDIF 0x0004 /* a endif was found matching starting #ifdef */
31 #define TOK_FLAG_EOF 0x0008 /* end of file */
33 ST_DATA int parse_flags;
34 #define PARSE_FLAG_PREPROCESS 0x0001 /* activate preprocessing */
35 #define PARSE_FLAG_TOK_NUM 0x0002 /* return numbers instead of TOK_PPNUM */
36 #define PARSE_FLAG_LINEFEED 0x0004 /* line feed is returned as a
37 token. line feed is also
38 returned at eof */
39 #define PARSE_FLAG_ASM_COMMENTS 0x0008 /* '#' can be used for line comment */
40 #define PARSE_FLAG_SPACES 0x0010 /* next() returns space tokens (for -E) */
42 ST_DATA struct BufferedFile *file;
43 ST_DATA int ch, tok;
44 ST_DATA CValue tokc;
45 ST_DATA const int *macro_ptr;
46 ST_DATA CString tokcstr; /* current parsed string, if any */
48 /* display benchmark infos */
49 ST_DATA int total_lines;
50 ST_DATA int total_bytes;
51 ST_DATA int tok_ident;
52 ST_DATA TokenSym **table_ident;
54 /* ------------------------------------------------------------------------- */
56 static int *macro_ptr_allocated;
57 static const int *unget_saved_macro_ptr;
58 static int unget_saved_buffer[TOK_MAX_SIZE + 1];
59 static int unget_buffer_enabled;
60 static TokenSym *hash_ident[TOK_HASH_SIZE];
61 /* true if isid(c) || isnum(c) */
62 static unsigned char isidnum_table[256-CH_EOF];
64 static const char tcc_keywords[] =
65 #define DEF(id, str) str "\0"
66 #include "tcctok.h"
67 #undef DEF
70 /* WARNING: the content of this string encodes token numbers */
71 static const unsigned char tok_two_chars[] =
72 /* outdated -- gr
73 "<=\236>=\235!=\225&&\240||\241++\244--\242==\224<<\1>>\2+=\253"
74 "-=\255*=\252/=\257%=\245&=\246^=\336|=\374->\313..\250##\266";
75 */{
76 '<','=', TOK_LE,
77 '>','=', TOK_GE,
78 '!','=', TOK_NE,
79 '&','&', TOK_LAND,
80 '|','|', TOK_LOR,
81 '+','+', TOK_INC,
82 '-','-', TOK_DEC,
83 '=','=', TOK_EQ,
84 '<','<', TOK_SHL,
85 '>','>', TOK_SAR,
86 '+','=', TOK_A_ADD,
87 '-','=', TOK_A_SUB,
88 '*','=', TOK_A_MUL,
89 '/','=', TOK_A_DIV,
90 '%','=', TOK_A_MOD,
91 '&','=', TOK_A_AND,
92 '^','=', TOK_A_XOR,
93 '|','=', TOK_A_OR,
94 '-','>', TOK_ARROW,
95 '.','.', 0xa8, // C++ token ?
96 '#','#', TOK_TWOSHARPS,
100 struct macro_level {
101 struct macro_level *prev;
102 const int *p;
105 static void next_nomacro_spc(void);
106 static void macro_subst(
107 TokenString *tok_str,
108 Sym **nested_list,
109 const int *macro_str,
110 struct macro_level **can_read_stream
113 ST_FUNC void skip(int c)
115 if (tok != c)
116 tcc_error("'%c' expected (got \"%s\")", c, get_tok_str(tok, &tokc));
117 next();
120 ST_FUNC void expect(const char *msg)
122 tcc_error("%s expected", msg);
125 /* ------------------------------------------------------------------------- */
126 /* CString handling */
127 static void cstr_realloc(CString *cstr, int new_size)
129 int size;
130 void *data;
132 size = cstr->size_allocated;
133 if (size == 0)
134 size = 8; /* no need to allocate a too small first string */
135 while (size < new_size)
136 size = size * 2;
137 data = tcc_realloc(cstr->data_allocated, size);
138 cstr->data_allocated = data;
139 cstr->size_allocated = size;
140 cstr->data = data;
143 /* add a byte */
144 ST_FUNC void cstr_ccat(CString *cstr, int ch)
146 int size;
147 size = cstr->size + 1;
148 if (size > cstr->size_allocated)
149 cstr_realloc(cstr, size);
150 ((unsigned char *)cstr->data)[size - 1] = ch;
151 cstr->size = size;
154 ST_FUNC void cstr_cat(CString *cstr, const char *str)
156 int c;
157 for(;;) {
158 c = *str;
159 if (c == '\0')
160 break;
161 cstr_ccat(cstr, c);
162 str++;
166 /* add a wide char */
167 ST_FUNC void cstr_wccat(CString *cstr, int ch)
169 int size;
170 size = cstr->size + sizeof(nwchar_t);
171 if (size > cstr->size_allocated)
172 cstr_realloc(cstr, size);
173 *(nwchar_t *)(((unsigned char *)cstr->data) + size - sizeof(nwchar_t)) = ch;
174 cstr->size = size;
177 ST_FUNC void cstr_new(CString *cstr)
179 memset(cstr, 0, sizeof(CString));
182 /* free string and reset it to NULL */
183 ST_FUNC void cstr_free(CString *cstr)
185 tcc_free(cstr->data_allocated);
186 cstr_new(cstr);
189 /* reset string to empty */
190 ST_FUNC void cstr_reset(CString *cstr)
192 cstr->size = 0;
195 /* XXX: unicode ? */
196 static void add_char(CString *cstr, int c)
198 if (c == '\'' || c == '\"' || c == '\\') {
199 /* XXX: could be more precise if char or string */
200 cstr_ccat(cstr, '\\');
202 if (c >= 32 && c <= 126) {
203 cstr_ccat(cstr, c);
204 } else {
205 cstr_ccat(cstr, '\\');
206 if (c == '\n') {
207 cstr_ccat(cstr, 'n');
208 } else {
209 cstr_ccat(cstr, '0' + ((c >> 6) & 7));
210 cstr_ccat(cstr, '0' + ((c >> 3) & 7));
211 cstr_ccat(cstr, '0' + (c & 7));
216 /* ------------------------------------------------------------------------- */
217 /* allocate a new token */
218 static TokenSym *tok_alloc_new(TokenSym **pts, const char *str, int len)
220 TokenSym *ts, **ptable;
221 int i;
223 if (tok_ident >= SYM_FIRST_ANOM)
224 tcc_error("memory full (symbols)");
226 /* expand token table if needed */
227 i = tok_ident - TOK_IDENT;
228 if ((i % TOK_ALLOC_INCR) == 0) {
229 ptable = tcc_realloc(table_ident, (i + TOK_ALLOC_INCR) * sizeof(TokenSym *));
230 table_ident = ptable;
233 ts = tcc_malloc(sizeof(TokenSym) + len);
234 table_ident[i] = ts;
235 ts->tok = tok_ident++;
236 ts->sym_define.data = tcc_malloc(sizeof(Sym**));
237 ts->sym_define.off = 0;
238 ts->sym_define.data[0] = NULL;
239 ts->sym_define.size = 1;
240 ts->sym_label = NULL;
241 ts->sym_struct = NULL;
242 ts->sym_identifier = NULL;
243 ts->len = len;
244 ts->hash_next = NULL;
245 memcpy(ts->str, str, len);
246 ts->str[len] = '\0';
247 *pts = ts;
248 return ts;
251 #define TOK_HASH_INIT 1
252 #define TOK_HASH_FUNC(h, c) ((h) * 263 + (c))
254 /* find a token and add it if not found */
255 ST_FUNC TokenSym *tok_alloc(const char *str, int len)
257 TokenSym *ts, **pts;
258 int i;
259 unsigned int h;
261 h = TOK_HASH_INIT;
262 for(i=0;i<len;i++)
263 h = TOK_HASH_FUNC(h, ((unsigned char *)str)[i]);
264 h &= (TOK_HASH_SIZE - 1);
266 pts = &hash_ident[h];
267 for(;;) {
268 ts = *pts;
269 if (!ts)
270 break;
271 if (ts->len == len && !memcmp(ts->str, str, len))
272 return ts;
273 pts = &(ts->hash_next);
275 return tok_alloc_new(pts, str, len);
278 /* XXX: buffer overflow */
279 /* XXX: float tokens */
280 ST_FUNC char *get_tok_str(int v, CValue *cv)
282 static char buf[STRING_MAX_SIZE + 1];
283 static CString cstr_buf;
284 CString *cstr;
285 char *p;
286 int i, len;
288 /* NOTE: to go faster, we give a fixed buffer for small strings */
289 cstr_reset(&cstr_buf);
290 cstr_buf.data = buf;
291 cstr_buf.size_allocated = sizeof(buf);
292 p = buf;
294 /* just an explanation, should never happen:
295 if (v <= TOK_LINENUM && v >= TOK_CINT && cv == NULL)
296 tcc_error("internal error: get_tok_str"); */
298 switch(v) {
299 case TOK_CINT:
300 case TOK_CUINT:
301 /* XXX: not quite exact, but only useful for testing */
302 sprintf(p, "%u", cv->ui);
303 break;
304 case TOK_CLLONG:
305 case TOK_CULLONG:
306 /* XXX: not quite exact, but only useful for testing */
307 #ifdef _WIN32
308 sprintf(p, "%u", (unsigned)cv->ull);
309 #else
310 sprintf(p, "%llu", cv->ull);
311 #endif
312 break;
313 case TOK_LCHAR:
314 cstr_ccat(&cstr_buf, 'L');
315 case TOK_CCHAR:
316 cstr_ccat(&cstr_buf, '\'');
317 add_char(&cstr_buf, cv->i);
318 cstr_ccat(&cstr_buf, '\'');
319 cstr_ccat(&cstr_buf, '\0');
320 break;
321 case TOK_PPNUM:
322 cstr = cv->cstr;
323 len = cstr->size - 1;
324 for(i=0;i<len;i++)
325 add_char(&cstr_buf, ((unsigned char *)cstr->data)[i]);
326 cstr_ccat(&cstr_buf, '\0');
327 break;
328 case TOK_LSTR:
329 cstr_ccat(&cstr_buf, 'L');
330 case TOK_STR:
331 cstr = cv->cstr;
332 cstr_ccat(&cstr_buf, '\"');
333 if (v == TOK_STR) {
334 len = cstr->size - 1;
335 for(i=0;i<len;i++)
336 add_char(&cstr_buf, ((unsigned char *)cstr->data)[i]);
337 } else {
338 len = (cstr->size / sizeof(nwchar_t)) - 1;
339 for(i=0;i<len;i++)
340 add_char(&cstr_buf, ((nwchar_t *)cstr->data)[i]);
342 cstr_ccat(&cstr_buf, '\"');
343 cstr_ccat(&cstr_buf, '\0');
344 break;
346 case TOK_CFLOAT:
347 case TOK_CDOUBLE:
348 case TOK_CLDOUBLE:
349 case TOK_LINENUM:
350 return NULL; /* should not happen */
352 /* above tokens have value, the ones below don't */
354 case TOK_LT:
355 v = '<';
356 goto addv;
357 case TOK_GT:
358 v = '>';
359 goto addv;
360 case TOK_DOTS:
361 return strcpy(p, "...");
362 case TOK_A_SHL:
363 return strcpy(p, "<<=");
364 case TOK_A_SAR:
365 return strcpy(p, ">>=");
366 default:
367 if (v < TOK_IDENT) {
368 /* search in two bytes table */
369 const unsigned char *q = tok_two_chars;
370 while (*q) {
371 if (q[2] == v) {
372 *p++ = q[0];
373 *p++ = q[1];
374 *p = '\0';
375 return buf;
377 q += 3;
379 addv:
380 *p++ = v;
381 *p = '\0';
382 } else if (v < tok_ident) {
383 return table_ident[v - TOK_IDENT]->str;
384 } else if (v >= SYM_FIRST_ANOM) {
385 /* special name for anonymous symbol */
386 sprintf(p, "L.%u", v - SYM_FIRST_ANOM);
387 } else {
388 /* should never happen */
389 return NULL;
391 break;
393 return cstr_buf.data;
396 /* fill input buffer and peek next char */
397 static int tcc_peekc_slow(BufferedFile *bf)
399 int len;
400 /* only tries to read if really end of buffer */
401 if (bf->buf_ptr >= bf->buf_end) {
402 if (bf->fd != -1) {
403 #if defined(PARSE_DEBUG)
404 len = 8;
405 #else
406 len = IO_BUF_SIZE;
407 #endif
408 len = read(bf->fd, bf->buffer, len);
409 if (len < 0)
410 len = 0;
411 } else {
412 len = 0;
414 total_bytes += len;
415 bf->buf_ptr = bf->buffer;
416 bf->buf_end = bf->buffer + len;
417 *bf->buf_end = CH_EOB;
419 if (bf->buf_ptr < bf->buf_end) {
420 return bf->buf_ptr[0];
421 } else {
422 bf->buf_ptr = bf->buf_end;
423 return CH_EOF;
427 /* return the current character, handling end of block if necessary
428 (but not stray) */
429 ST_FUNC int handle_eob(void)
431 return tcc_peekc_slow(file);
434 /* read next char from current input file and handle end of input buffer */
435 ST_INLN void inp(void)
437 ch = *(++(file->buf_ptr));
438 /* end of buffer/file handling */
439 if (ch == CH_EOB)
440 ch = handle_eob();
443 /* handle '\[\r]\n' */
444 static int handle_stray_noerror(void)
446 while (ch == '\\') {
447 inp();
448 if (ch == '\n') {
449 file->line_num++;
450 inp();
451 } else if (ch == '\r') {
452 inp();
453 if (ch != '\n')
454 goto fail;
455 file->line_num++;
456 inp();
457 } else {
458 fail:
459 return 1;
462 return 0;
465 static void handle_stray(void)
467 if (handle_stray_noerror())
468 tcc_error("stray '\\' in program");
471 /* skip the stray and handle the \\n case. Output an error if
472 incorrect char after the stray */
473 static int handle_stray1(uint8_t *p)
475 int c;
477 if (p >= file->buf_end) {
478 file->buf_ptr = p;
479 c = handle_eob();
480 p = file->buf_ptr;
481 if (c == '\\')
482 goto parse_stray;
483 } else {
484 parse_stray:
485 file->buf_ptr = p;
486 ch = *p;
487 handle_stray();
488 p = file->buf_ptr;
489 c = *p;
491 return c;
494 /* handle just the EOB case, but not stray */
495 #define PEEKC_EOB(c, p)\
497 p++;\
498 c = *p;\
499 if (c == '\\') {\
500 file->buf_ptr = p;\
501 c = handle_eob();\
502 p = file->buf_ptr;\
506 /* handle the complicated stray case */
507 #define PEEKC(c, p)\
509 p++;\
510 c = *p;\
511 if (c == '\\') {\
512 c = handle_stray1(p);\
513 p = file->buf_ptr;\
517 /* input with '\[\r]\n' handling. Note that this function cannot
518 handle other characters after '\', so you cannot call it inside
519 strings or comments */
520 ST_FUNC void minp(void)
522 inp();
523 if (ch == '\\')
524 handle_stray();
528 /* single line C++ comments */
529 static uint8_t *parse_line_comment(uint8_t *p)
531 int c;
533 p++;
534 for(;;) {
535 c = *p;
536 redo:
537 if (c == '\n' || c == CH_EOF) {
538 break;
539 } else if (c == '\\') {
540 file->buf_ptr = p;
541 c = handle_eob();
542 p = file->buf_ptr;
543 if (c == '\\') {
544 PEEKC_EOB(c, p);
545 if (c == '\n') {
546 file->line_num++;
547 PEEKC_EOB(c, p);
548 } else if (c == '\r') {
549 PEEKC_EOB(c, p);
550 if (c == '\n') {
551 file->line_num++;
552 PEEKC_EOB(c, p);
555 } else {
556 goto redo;
558 } else {
559 p++;
562 return p;
565 /* C comments */
566 ST_FUNC uint8_t *parse_comment(uint8_t *p)
568 int c;
570 p++;
571 for(;;) {
572 /* fast skip loop */
573 for(;;) {
574 c = *p;
575 if (c == '\n' || c == '*' || c == '\\')
576 break;
577 p++;
578 c = *p;
579 if (c == '\n' || c == '*' || c == '\\')
580 break;
581 p++;
583 /* now we can handle all the cases */
584 if (c == '\n') {
585 file->line_num++;
586 p++;
587 } else if (c == '*') {
588 p++;
589 for(;;) {
590 c = *p;
591 if (c == '*') {
592 p++;
593 } else if (c == '/') {
594 goto end_of_comment;
595 } else if (c == '\\') {
596 file->buf_ptr = p;
597 c = handle_eob();
598 p = file->buf_ptr;
599 if (c == '\\') {
600 /* skip '\[\r]\n', otherwise just skip the stray */
601 while (c == '\\') {
602 PEEKC_EOB(c, p);
603 if (c == '\n') {
604 file->line_num++;
605 PEEKC_EOB(c, p);
606 } else if (c == '\r') {
607 PEEKC_EOB(c, p);
608 if (c == '\n') {
609 file->line_num++;
610 PEEKC_EOB(c, p);
612 } else {
613 goto after_star;
617 } else {
618 break;
621 after_star: ;
622 } else {
623 /* stray, eob or eof */
624 file->buf_ptr = p;
625 c = handle_eob();
626 p = file->buf_ptr;
627 if (c == CH_EOF) {
628 tcc_error("unexpected end of file in comment");
629 } else if (c == '\\') {
630 p++;
634 end_of_comment:
635 p++;
636 return p;
639 #define cinp minp
641 static inline void skip_spaces(void)
643 while (is_space(ch))
644 cinp();
647 static inline int check_space(int t, int *spc)
649 if (is_space(t)) {
650 if (*spc)
651 return 1;
652 *spc = 1;
653 } else
654 *spc = 0;
655 return 0;
658 /* parse a string without interpreting escapes */
659 static uint8_t *parse_pp_string(uint8_t *p,
660 int sep, CString *str)
662 int c;
663 p++;
664 for(;;) {
665 c = *p;
666 if (c == sep) {
667 break;
668 } else if (c == '\\') {
669 file->buf_ptr = p;
670 c = handle_eob();
671 p = file->buf_ptr;
672 if (c == CH_EOF) {
673 unterminated_string:
674 /* XXX: indicate line number of start of string */
675 tcc_error("missing terminating %c character", sep);
676 } else if (c == '\\') {
677 /* escape : just skip \[\r]\n */
678 PEEKC_EOB(c, p);
679 if (c == '\n') {
680 file->line_num++;
681 p++;
682 } else if (c == '\r') {
683 PEEKC_EOB(c, p);
684 if (c != '\n')
685 expect("'\n' after '\r'");
686 file->line_num++;
687 p++;
688 } else if (c == CH_EOF) {
689 goto unterminated_string;
690 } else {
691 if (str) {
692 cstr_ccat(str, '\\');
693 cstr_ccat(str, c);
695 p++;
698 } else if (c == '\n') {
699 file->line_num++;
700 goto add_char;
701 } else if (c == '\r') {
702 PEEKC_EOB(c, p);
703 if (c != '\n') {
704 if (str)
705 cstr_ccat(str, '\r');
706 } else {
707 file->line_num++;
708 goto add_char;
710 } else {
711 add_char:
712 if (str)
713 cstr_ccat(str, c);
714 p++;
717 p++;
718 return p;
721 /* skip block of text until #else, #elif or #endif. skip also pairs of
722 #if/#endif */
723 static void preprocess_skip(void)
725 int a, start_of_line, c, in_warn_or_error;
726 uint8_t *p;
728 p = file->buf_ptr;
729 a = 0;
730 redo_start:
731 start_of_line = 1;
732 in_warn_or_error = 0;
733 for(;;) {
734 redo_no_start:
735 c = *p;
736 switch(c) {
737 case ' ':
738 case '\t':
739 case '\f':
740 case '\v':
741 case '\r':
742 p++;
743 goto redo_no_start;
744 case '\n':
745 file->line_num++;
746 p++;
747 goto redo_start;
748 case '\\':
749 file->buf_ptr = p;
750 c = handle_eob();
751 if (c == CH_EOF) {
752 expect("#endif");
753 } else if (c == '\\') {
754 ch = file->buf_ptr[0];
755 handle_stray_noerror();
757 p = file->buf_ptr;
758 goto redo_no_start;
759 /* skip strings */
760 case '\"':
761 case '\'':
762 if (in_warn_or_error)
763 goto _default;
764 p = parse_pp_string(p, c, NULL);
765 break;
766 /* skip comments */
767 case '/':
768 if (in_warn_or_error)
769 goto _default;
770 file->buf_ptr = p;
771 ch = *p;
772 minp();
773 p = file->buf_ptr;
774 if (ch == '*') {
775 p = parse_comment(p);
776 } else if (ch == '/') {
777 p = parse_line_comment(p);
779 break;
780 case '#':
781 p++;
782 if (start_of_line) {
783 file->buf_ptr = p;
784 next_nomacro();
785 p = file->buf_ptr;
786 if (a == 0 &&
787 (tok == TOK_ELSE || tok == TOK_ELIF || tok == TOK_ENDIF))
788 goto the_end;
789 if (tok == TOK_IF || tok == TOK_IFDEF || tok == TOK_IFNDEF)
790 a++;
791 else if (tok == TOK_ENDIF)
792 a--;
793 else if( tok == TOK_ERROR || tok == TOK_WARNING)
794 in_warn_or_error = 1;
795 else if (tok == TOK_LINEFEED)
796 goto redo_start;
798 break;
799 _default:
800 default:
801 p++;
802 break;
804 start_of_line = 0;
806 the_end: ;
807 file->buf_ptr = p;
810 /* ParseState handling */
812 /* XXX: currently, no include file info is stored. Thus, we cannot display
813 accurate messages if the function or data definition spans multiple
814 files */
816 /* save current parse state in 's' */
817 ST_FUNC void save_parse_state(ParseState *s)
819 s->line_num = file->line_num;
820 s->macro_ptr = macro_ptr;
821 s->tok = tok;
822 s->tokc = tokc;
825 /* restore parse state from 's' */
826 ST_FUNC void restore_parse_state(ParseState *s)
828 file->line_num = s->line_num;
829 macro_ptr = s->macro_ptr;
830 tok = s->tok;
831 tokc = s->tokc;
834 /* return the number of additional 'ints' necessary to store the
835 token */
836 static inline int tok_ext_size(int t)
838 switch(t) {
839 /* 4 bytes */
840 case TOK_CINT:
841 case TOK_CUINT:
842 case TOK_CCHAR:
843 case TOK_LCHAR:
844 case TOK_CFLOAT:
845 case TOK_LINENUM:
846 return 1;
847 case TOK_STR:
848 case TOK_LSTR:
849 case TOK_PPNUM:
850 tcc_error("unsupported token");
851 return 1;
852 case TOK_CDOUBLE:
853 case TOK_CLLONG:
854 case TOK_CULLONG:
855 return 2;
856 case TOK_CLDOUBLE:
857 return LDOUBLE_SIZE / 4;
858 default:
859 return 0;
863 /* token string handling */
865 ST_INLN void tok_str_new(TokenString *s)
867 s->str = NULL;
868 s->len = 0;
869 s->allocated_len = 0;
870 s->last_line_num = -1;
873 ST_FUNC void tok_str_free(int *str)
875 tcc_free(str);
878 static int *tok_str_realloc(TokenString *s)
880 int *str, len;
882 if (s->allocated_len == 0) {
883 len = 8;
884 } else {
885 len = s->allocated_len * 2;
887 str = tcc_realloc(s->str, len * sizeof(int));
888 s->allocated_len = len;
889 s->str = str;
890 return str;
893 ST_FUNC void tok_str_add(TokenString *s, int t)
895 int len, *str;
897 len = s->len;
898 str = s->str;
899 if (len >= s->allocated_len)
900 str = tok_str_realloc(s);
901 str[len++] = t;
902 s->len = len;
905 static void tok_str_add2(TokenString *s, int t, CValue *cv)
907 int len, *str;
909 len = s->len;
910 str = s->str;
912 /* allocate space for worst case */
913 if (len + TOK_MAX_SIZE > s->allocated_len)
914 str = tok_str_realloc(s);
915 str[len++] = t;
916 switch(t) {
917 case TOK_CINT:
918 case TOK_CUINT:
919 case TOK_CCHAR:
920 case TOK_LCHAR:
921 case TOK_CFLOAT:
922 case TOK_LINENUM:
923 str[len++] = cv->tab[0];
924 break;
925 case TOK_PPNUM:
926 case TOK_STR:
927 case TOK_LSTR:
929 int nb_words;
930 CString *cstr;
932 nb_words = (sizeof(CString) + cv->cstr->size + 3) >> 2;
933 while ((len + nb_words) > s->allocated_len)
934 str = tok_str_realloc(s);
935 cstr = (CString *)(str + len);
936 cstr->data = NULL;
937 cstr->size = cv->cstr->size;
938 cstr->data_allocated = NULL;
939 cstr->size_allocated = cstr->size;
940 memcpy((char *)cstr + sizeof(CString),
941 cv->cstr->data, cstr->size);
942 len += nb_words;
944 break;
945 case TOK_CDOUBLE:
946 case TOK_CLLONG:
947 case TOK_CULLONG:
948 #if LDOUBLE_SIZE == 8
949 case TOK_CLDOUBLE:
950 #endif
951 str[len++] = cv->tab[0];
952 str[len++] = cv->tab[1];
953 break;
954 #if LDOUBLE_SIZE == 12
955 case TOK_CLDOUBLE:
956 str[len++] = cv->tab[0];
957 str[len++] = cv->tab[1];
958 str[len++] = cv->tab[2];
959 #elif LDOUBLE_SIZE == 16
960 case TOK_CLDOUBLE:
961 str[len++] = cv->tab[0];
962 str[len++] = cv->tab[1];
963 str[len++] = cv->tab[2];
964 str[len++] = cv->tab[3];
965 #elif LDOUBLE_SIZE != 8
966 #error add long double size support
967 #endif
968 break;
969 default:
970 break;
972 s->len = len;
975 /* add the current parse token in token string 's' */
976 ST_FUNC void tok_str_add_tok(TokenString *s)
978 CValue cval;
980 /* save line number info */
981 if (file->line_num != s->last_line_num) {
982 s->last_line_num = file->line_num;
983 cval.i = s->last_line_num;
984 tok_str_add2(s, TOK_LINENUM, &cval);
986 tok_str_add2(s, tok, &tokc);
989 /* get a token from an integer array and increment pointer
990 accordingly. we code it as a macro to avoid pointer aliasing. */
991 static inline void TOK_GET(int *t, const int **pp, CValue *cv)
993 const int *p = *pp;
994 int n, *tab;
996 tab = cv->tab;
997 switch(*t = *p++) {
998 case TOK_CINT:
999 case TOK_CUINT:
1000 case TOK_CCHAR:
1001 case TOK_LCHAR:
1002 case TOK_CFLOAT:
1003 case TOK_LINENUM:
1004 tab[0] = *p++;
1005 break;
1006 case TOK_STR:
1007 case TOK_LSTR:
1008 case TOK_PPNUM:
1009 cv->cstr = (CString *)p;
1010 cv->cstr->data = (char *)p + sizeof(CString);
1011 p += (sizeof(CString) + cv->cstr->size + 3) >> 2;
1012 break;
1013 case TOK_CDOUBLE:
1014 case TOK_CLLONG:
1015 case TOK_CULLONG:
1016 n = 2;
1017 goto copy;
1018 case TOK_CLDOUBLE:
1019 #if LDOUBLE_SIZE == 16
1020 n = 4;
1021 #elif LDOUBLE_SIZE == 12
1022 n = 3;
1023 #elif LDOUBLE_SIZE == 8
1024 n = 2;
1025 #else
1026 # error add long double size support
1027 #endif
1028 copy:
1030 *tab++ = *p++;
1031 while (--n);
1032 break;
1033 default:
1034 break;
1036 *pp = p;
1039 static int macro_is_equal(const int *a, const int *b)
1041 char buf[STRING_MAX_SIZE + 1];
1042 CValue cv;
1043 int t;
1044 while (*a && *b) {
1045 TOK_GET(&t, &a, &cv);
1046 pstrcpy(buf, sizeof buf, get_tok_str(t, &cv));
1047 TOK_GET(&t, &b, &cv);
1048 if (strcmp(buf, get_tok_str(t, &cv)))
1049 return 0;
1051 return !(*a || *b);
1054 /* defines handling */
1055 ST_INLN void define_push(int v, int macro_type, int *str, Sym *first_arg)
1057 Sym *s;
1058 CSym *def;
1059 s = define_find(v);
1060 if (s && !macro_is_equal(s->d, str))
1061 tcc_warning("%s redefined", get_tok_str(v, NULL));
1062 s = sym_push2(&define_stack, v, macro_type, 0);
1063 s->d = str;
1064 s->next = first_arg;
1065 def = &table_ident[v - TOK_IDENT]->sym_define;
1066 def->data[def->off] = s;
1069 /* undefined a define symbol. Its name is just set to zero */
1070 ST_FUNC void define_undef(Sym *s)
1072 int v;
1073 CSym *def;
1074 v = s->v - TOK_IDENT;
1075 if ((unsigned)v < (unsigned)(tok_ident - TOK_IDENT)){
1076 def = &table_ident[v]->sym_define;
1077 def->data[def->off] = NULL;
1081 ST_INLN Sym *define_find(int v)
1083 CSym *def;
1084 v -= TOK_IDENT;
1085 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1086 return NULL;
1087 def = &table_ident[v]->sym_define;
1088 return def->data[def->off];
1091 /* free define stack until top reaches 'b' */
1092 ST_FUNC void free_defines(Sym *b)
1094 Sym *top, *tmp;
1095 int v;
1096 CSym *def;
1098 top = define_stack;
1099 while (top != b) {
1100 tmp = top->prev;
1101 /* do not free args or predefined defines */
1102 if (top->d)
1103 tok_str_free(top->d);
1104 v = top->v - TOK_IDENT;
1105 if ((unsigned)v < (unsigned)(tok_ident - TOK_IDENT)){
1106 def = &table_ident[v]->sym_define;
1107 if(def->off)
1108 def->off = 0;
1109 if(def->data[0])
1110 def->data[0] = NULL;
1112 sym_free(top);
1113 top = tmp;
1115 define_stack = b;
1118 /* label lookup */
1119 ST_FUNC Sym *label_find(int v)
1121 v -= TOK_IDENT;
1122 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1123 return NULL;
1124 return table_ident[v]->sym_label;
1127 ST_FUNC Sym *label_push(Sym **ptop, int v, int flags)
1129 Sym *s, **ps;
1130 s = sym_push2(ptop, v, 0, 0);
1131 s->r = flags;
1132 ps = &table_ident[v - TOK_IDENT]->sym_label;
1133 if (ptop == &global_label_stack) {
1134 /* modify the top most local identifier, so that
1135 sym_identifier will point to 's' when popped */
1136 while (*ps != NULL)
1137 ps = &(*ps)->prev_tok;
1139 s->prev_tok = *ps;
1140 *ps = s;
1141 return s;
1144 /* pop labels until element last is reached. Look if any labels are
1145 undefined. Define symbols if '&&label' was used. */
1146 ST_FUNC void label_pop(Sym **ptop, Sym *slast)
1148 Sym *s, *s1;
1149 for(s = *ptop; s != slast; s = s1) {
1150 s1 = s->prev;
1151 if (s->r == LABEL_DECLARED) {
1152 tcc_warning("label '%s' declared but not used", get_tok_str(s->v, NULL));
1153 } else if (s->r == LABEL_FORWARD) {
1154 tcc_error("label '%s' used but not defined",
1155 get_tok_str(s->v, NULL));
1156 } else {
1157 if (s->c) {
1158 /* define corresponding symbol. A size of
1159 1 is put. */
1160 put_extern_sym(s, cur_text_section, s->jnext, 1);
1163 /* remove label */
1164 table_ident[s->v - TOK_IDENT]->sym_label = s->prev_tok;
1165 sym_free(s);
1167 *ptop = slast;
1170 /* eval an expression for #if/#elif */
1171 static int expr_preprocess(void)
1173 int c, t;
1174 TokenString str;
1176 tok_str_new(&str);
1177 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1178 next(); /* do macro subst */
1179 if (tok == TOK_DEFINED) {
1180 next_nomacro();
1181 t = tok;
1182 if (t == '(')
1183 next_nomacro();
1184 c = define_find(tok) != 0;
1185 if (t == '(')
1186 next_nomacro();
1187 tok = TOK_CINT;
1188 tokc.i = c;
1189 } else if (tok >= TOK_IDENT) {
1190 /* if undefined macro */
1191 tok = TOK_CINT;
1192 tokc.i = 0;
1194 tok_str_add_tok(&str);
1196 tok_str_add(&str, -1); /* simulate end of file */
1197 tok_str_add(&str, 0);
1198 /* now evaluate C constant expression */
1199 macro_ptr = str.str;
1200 next();
1201 c = expr_const();
1202 macro_ptr = NULL;
1203 tok_str_free(str.str);
1204 return c != 0;
1207 #if defined(PARSE_DEBUG) || defined(PP_DEBUG)
1208 static void tok_print(int *str)
1210 int t;
1211 CValue cval;
1213 printf("<");
1214 while (1) {
1215 TOK_GET(&t, &str, &cval);
1216 if (!t)
1217 break;
1218 printf("%s", get_tok_str(t, &cval));
1220 printf(">\n");
1222 #endif
1224 /* parse after #define */
1225 ST_FUNC void parse_define(void)
1227 Sym *s, *first, **ps;
1228 int v, t, varg, is_vaargs, spc, ptok, macro_list_start;
1229 TokenString str;
1231 v = tok;
1232 if (v < TOK_IDENT)
1233 tcc_error("invalid macro name '%s'", get_tok_str(tok, &tokc));
1234 /* XXX: should check if same macro (ANSI) */
1235 first = NULL;
1236 t = MACRO_OBJ;
1237 /* '(' must be just after macro definition for MACRO_FUNC */
1238 next_nomacro_spc();
1239 if (tok == '(') {
1240 next_nomacro();
1241 ps = &first;
1242 while (tok != ')') {
1243 varg = tok;
1244 next_nomacro();
1245 is_vaargs = 0;
1246 if (varg == TOK_DOTS) {
1247 varg = TOK___VA_ARGS__;
1248 is_vaargs = 1;
1249 } else if (tok == TOK_DOTS && gnu_ext) {
1250 is_vaargs = 1;
1251 next_nomacro();
1253 if (varg < TOK_IDENT)
1254 tcc_error("badly punctuated parameter list");
1255 s = sym_push2(&define_stack, varg | SYM_FIELD, is_vaargs, 0);
1256 *ps = s;
1257 ps = &s->next;
1258 if (tok != ',')
1259 break;
1260 next_nomacro();
1262 if (tok == ')')
1263 next_nomacro_spc();
1264 t = MACRO_FUNC;
1266 tok_str_new(&str);
1267 spc = 2;
1268 /* EOF testing necessary for '-D' handling */
1269 ptok = 0;
1270 macro_list_start = 1;
1271 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1272 if (!macro_list_start && spc == 2 && tok == TOK_TWOSHARPS)
1273 tcc_error("'##' invalid at start of macro");
1274 ptok = tok;
1275 /* remove spaces around ## and after '#' */
1276 if (TOK_TWOSHARPS == tok) {
1277 if (1 == spc)
1278 --str.len;
1279 spc = 2;
1280 } else if ('#' == tok) {
1281 spc = 2;
1282 } else if (check_space(tok, &spc)) {
1283 goto skip;
1285 tok_str_add2(&str, tok, &tokc);
1286 skip:
1287 next_nomacro_spc();
1288 macro_list_start = 0;
1290 if (ptok == TOK_TWOSHARPS)
1291 tcc_error("'##' invalid at end of macro");
1292 if (spc == 1)
1293 --str.len; /* remove trailing space */
1294 tok_str_add(&str, 0);
1295 #ifdef PP_DEBUG
1296 printf("define %s %d: ", get_tok_str(v, NULL), t);
1297 tok_print(str.str);
1298 #endif
1299 define_push(v, t, str.str, first);
1302 static inline int hash_cached_include(const char *filename)
1304 const unsigned char *s;
1305 unsigned int h;
1307 h = TOK_HASH_INIT;
1308 s = (unsigned char *) filename;
1309 while (*s) {
1310 h = TOK_HASH_FUNC(h, *s);
1311 s++;
1313 h &= (CACHED_INCLUDES_HASH_SIZE - 1);
1314 return h;
1317 static CachedInclude *search_cached_include(TCCState *s1, const char *filename)
1319 CachedInclude *e;
1320 int i, h;
1321 h = hash_cached_include(filename);
1322 i = s1->cached_includes_hash[h];
1323 for(;;) {
1324 if (i == 0)
1325 break;
1326 e = s1->cached_includes[i - 1];
1327 if (0 == PATHCMP(e->filename, filename))
1328 return e;
1329 i = e->hash_next;
1331 return NULL;
1334 static inline void add_cached_include(TCCState *s1, const char *filename, int ifndef_macro)
1336 CachedInclude *e;
1337 int h;
1339 if (search_cached_include(s1, filename))
1340 return;
1341 #ifdef INC_DEBUG
1342 printf("adding cached '%s' %s\n", filename, get_tok_str(ifndef_macro, NULL));
1343 #endif
1344 e = tcc_malloc(sizeof(CachedInclude) + strlen(filename));
1345 strcpy(e->filename, filename);
1346 e->ifndef_macro = ifndef_macro;
1347 dynarray_add((void ***)&s1->cached_includes, &s1->nb_cached_includes, e);
1348 /* add in hash table */
1349 h = hash_cached_include(filename);
1350 e->hash_next = s1->cached_includes_hash[h];
1351 s1->cached_includes_hash[h] = s1->nb_cached_includes;
1354 /* is_bof is true if first non space token at beginning of file */
1355 ST_FUNC void preprocess(int is_bof)
1357 TCCState *s1 = tcc_state;
1358 int i, c, n, saved_parse_flags;
1359 uint8_t buf[1024], *p;
1360 Sym *s;
1362 saved_parse_flags = parse_flags;
1363 parse_flags = PARSE_FLAG_PREPROCESS | PARSE_FLAG_TOK_NUM | PARSE_FLAG_LINEFEED;
1364 next_nomacro();
1365 redo:
1366 switch(tok) {
1367 case TOK_DEFINE:
1368 next_nomacro();
1369 parse_define();
1370 break;
1371 case TOK_UNDEF:
1372 next_nomacro();
1373 s = define_find(tok);
1374 /* undefine symbol by putting an invalid name */
1375 if (s)
1376 define_undef(s);
1377 break;
1378 case TOK_INCLUDE:
1379 case TOK_INCLUDE_NEXT:
1380 ch = file->buf_ptr[0];
1381 /* XXX: incorrect if comments : use next_nomacro with a special mode */
1382 skip_spaces();
1383 if (ch == '<') {
1384 c = '>';
1385 goto read_name;
1386 } else if (ch == '\"') {
1387 c = ch;
1388 read_name:
1389 inp();
1390 p = buf;
1391 while (ch != c && ch != '\n' && ch != CH_EOF) {
1392 if ((p - buf) < sizeof(buf) - 1)
1393 *p++ = ch;
1394 if (ch == '\\') {
1395 if (handle_stray_noerror() == 0)
1396 --p;
1397 } else
1398 inp();
1400 if (ch != c)
1401 goto include_syntax;
1402 *p = '\0';
1403 minp();
1404 #if 0
1405 /* eat all spaces and comments after include */
1406 /* XXX: slightly incorrect */
1407 while (ch1 != '\n' && ch1 != CH_EOF)
1408 inp();
1409 #endif
1410 } else {
1411 /* computed #include : either we have only strings or
1412 we have anything enclosed in '<>' */
1413 next();
1414 buf[0] = '\0';
1415 if (tok == TOK_STR) {
1416 while (tok != TOK_LINEFEED) {
1417 if (tok != TOK_STR) {
1418 include_syntax:
1419 tcc_error("'#include' expects \"FILENAME\" or <FILENAME>");
1421 pstrcat(buf, sizeof(buf), (char *)tokc.cstr->data);
1422 next();
1424 c = '\"';
1425 } else {
1426 int len;
1427 while (tok != TOK_LINEFEED) {
1428 pstrcat(buf, sizeof(buf), get_tok_str(tok, &tokc));
1429 next();
1431 len = strlen(buf);
1432 /* check syntax and remove '<>' */
1433 if (len < 2 || buf[0] != '<' || buf[len - 1] != '>')
1434 goto include_syntax;
1435 memmove(buf, buf + 1, len - 2);
1436 buf[len - 2] = '\0';
1437 c = '>';
1440 if(!buf[0])
1441 tcc_error(" empty filename in #include");
1443 if (s1->include_stack_ptr >= s1->include_stack + INCLUDE_STACK_SIZE)
1444 tcc_error("#include recursion too deep");
1445 /* store current file in stack, but increment stack later below */
1446 *s1->include_stack_ptr = file;
1448 n = s1->nb_include_paths + s1->nb_sysinclude_paths;
1449 for (i = -2; i < n; ++i) {
1450 char buf1[sizeof file->filename];
1451 CachedInclude *e;
1452 BufferedFile **f;
1453 const char *path;
1455 if (i == -2) {
1456 /* check absolute include path */
1457 if (!IS_ABSPATH(buf))
1458 continue;
1459 buf1[0] = 0;
1460 i = n; /* force end loop */
1462 } else if (i == -1) {
1463 /* search in current dir if "header.h" */
1464 if (c != '\"')
1465 continue;
1466 path = file->filename;
1467 pstrncpy(buf1, path, tcc_basename(path) - path);
1469 } else {
1470 /* search in all the include paths */
1471 if (i < s1->nb_include_paths)
1472 path = s1->include_paths[i];
1473 else
1474 path = s1->sysinclude_paths[i - s1->nb_include_paths];
1475 pstrcpy(buf1, sizeof(buf1), path);
1476 pstrcat(buf1, sizeof(buf1), "/");
1479 pstrcat(buf1, sizeof(buf1), buf);
1481 if (tok == TOK_INCLUDE_NEXT)
1482 for (f = s1->include_stack_ptr; f >= s1->include_stack; --f)
1483 if (0 == PATHCMP((*f)->filename, buf1)) {
1484 #ifdef INC_DEBUG
1485 printf("%s: #include_next skipping %s\n", file->filename, buf1);
1486 #endif
1487 goto include_trynext;
1490 e = search_cached_include(s1, buf1);
1491 if (e && define_find(e->ifndef_macro)) {
1492 /* no need to parse the include because the 'ifndef macro'
1493 is defined */
1494 #ifdef INC_DEBUG
1495 printf("%s: skipping cached %s\n", file->filename, buf1);
1496 #endif
1497 goto include_done;
1500 if (tcc_open(s1, buf1) < 0)
1501 include_trynext:
1502 continue;
1504 #ifdef INC_DEBUG
1505 printf("%s: including %s\n", file->prev->filename, file->filename);
1506 #endif
1507 /* update target deps */
1508 dynarray_add((void ***)&s1->target_deps, &s1->nb_target_deps, tcc_strdup(buf1));
1509 /* push current file in stack */
1510 ++s1->include_stack_ptr;
1511 /* add include file debug info */
1512 if (s1->do_debug)
1513 put_stabs(file->filename, N_BINCL, 0, 0, 0);
1514 tok_flags |= TOK_FLAG_BOF | TOK_FLAG_BOL;
1515 ch = file->buf_ptr[0];
1516 goto the_end;
1518 tcc_error("include file '%s' not found", buf);
1519 include_done:
1520 break;
1521 case TOK_IFNDEF:
1522 c = 1;
1523 goto do_ifdef;
1524 case TOK_IF:
1525 c = expr_preprocess();
1526 goto do_if;
1527 case TOK_IFDEF:
1528 c = 0;
1529 do_ifdef:
1530 next_nomacro();
1531 if (tok < TOK_IDENT)
1532 tcc_error("invalid argument for '#if%sdef'", c ? "n" : "");
1533 if (is_bof) {
1534 if (c) {
1535 #ifdef INC_DEBUG
1536 printf("#ifndef %s\n", get_tok_str(tok, NULL));
1537 #endif
1538 file->ifndef_macro = tok;
1541 c = !!define_find(tok) ^ c;
1542 do_if:
1543 if (s1->ifdef_stack_ptr >= s1->ifdef_stack + IFDEF_STACK_SIZE)
1544 tcc_error("memory full (ifdef)");
1545 *s1->ifdef_stack_ptr++ = c;
1546 goto test_skip;
1547 case TOK_ELSE:
1548 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1549 tcc_error("#else without matching #if");
1550 if (s1->ifdef_stack_ptr[-1] & 2)
1551 tcc_error("#else after #else");
1552 c = (s1->ifdef_stack_ptr[-1] ^= 3);
1553 goto test_else;
1554 case TOK_ELIF:
1555 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1556 tcc_error("#elif without matching #if");
1557 c = s1->ifdef_stack_ptr[-1];
1558 if (c > 1)
1559 tcc_error("#elif after #else");
1560 /* last #if/#elif expression was true: we skip */
1561 if (c == 1)
1562 goto skip;
1563 c = expr_preprocess();
1564 s1->ifdef_stack_ptr[-1] = c;
1565 test_else:
1566 if (s1->ifdef_stack_ptr == file->ifdef_stack_ptr + 1)
1567 file->ifndef_macro = 0;
1568 test_skip:
1569 if (!(c & 1)) {
1570 skip:
1571 preprocess_skip();
1572 is_bof = 0;
1573 goto redo;
1575 break;
1576 case TOK_ENDIF:
1577 if (s1->ifdef_stack_ptr <= file->ifdef_stack_ptr)
1578 tcc_error("#endif without matching #if");
1579 s1->ifdef_stack_ptr--;
1580 /* '#ifndef macro' was at the start of file. Now we check if
1581 an '#endif' is exactly at the end of file */
1582 if (file->ifndef_macro &&
1583 s1->ifdef_stack_ptr == file->ifdef_stack_ptr) {
1584 file->ifndef_macro_saved = file->ifndef_macro;
1585 /* need to set to zero to avoid false matches if another
1586 #ifndef at middle of file */
1587 file->ifndef_macro = 0;
1588 tok_flags |= TOK_FLAG_ENDIF;
1590 next_nomacro();
1591 if (tok != TOK_LINEFEED)
1592 tcc_warning("Ignoring: %s", get_tok_str(tok, &tokc));
1593 break;
1594 case TOK_LINE:
1595 next();
1596 if (tok != TOK_CINT)
1597 tcc_error("#line");
1598 file->line_num = tokc.i - 1; /* the line number will be incremented after */
1599 next();
1600 if (tok != TOK_LINEFEED) {
1601 if (tok != TOK_STR)
1602 tcc_error("#line");
1603 pstrcpy(file->filename, sizeof(file->filename), (char *)tokc.cstr->data);
1605 break;
1606 case TOK_ERROR:
1607 case TOK_WARNING:
1608 c = tok;
1609 ch = file->buf_ptr[0];
1610 skip_spaces();
1611 p = buf;
1612 while (ch != '\n' && ch != CH_EOF) {
1613 if ((p - buf) < sizeof(buf) - 1)
1614 *p++ = ch;
1615 if (ch == '\\') {
1616 if (handle_stray_noerror() == 0)
1617 --p;
1618 } else
1619 inp();
1621 *p = '\0';
1622 if (c == TOK_ERROR)
1623 tcc_error("#error %s", buf);
1624 else
1625 tcc_warning("#warning %s", buf);
1626 break;
1627 case TOK_PRAGMA:
1628 next();
1629 if (tok == TOK_pack && parse_flags & PARSE_FLAG_PACK) {
1631 This may be:
1632 #pragma pack(1) // set
1633 #pragma pack() // reset to default
1634 #pragma pack(push,1) // push & set
1635 #pragma pack(pop) // restore previous
1637 next();
1638 skip('(');
1639 if (tok == TOK_ASM_pop) {
1640 next();
1641 if (s1->pack_stack_ptr <= s1->pack_stack) {
1642 stk_error:
1643 tcc_error("out of pack stack");
1645 s1->pack_stack_ptr--;
1646 } else {
1647 int val = 0;
1648 if (tok != ')') {
1649 if (tok == TOK_ASM_push) {
1650 next();
1651 s1->pack_stack_ptr++;
1652 if (s1->pack_stack_ptr >= s1->pack_stack + PACK_STACK_SIZE)
1653 goto stk_error;
1654 skip(',');
1656 if (tok != TOK_CINT) {
1657 pack_error:
1658 tcc_error("invalid pack pragma");
1660 val = tokc.i;
1661 if (val < 1 || val > 16)
1662 goto pack_error;
1663 if (val < 1 || val > 16)
1664 tcc_error("Value must be greater than 1 is less than or equal to 16");
1665 if ((val & (val - 1)) != 0)
1666 tcc_error("Value must be a power of 2 curtain");
1667 next();
1669 *s1->pack_stack_ptr = val;
1670 skip(')');
1672 }else if (tok == TOK_PUSH_MACRO || tok == TOK_POP_MACRO) {
1673 TokenSym *ts;
1674 CSym *def;
1675 uint8_t *p1;
1676 int len, t;
1677 t = tok;
1678 ch = file->buf_ptr[0];
1679 skip_spaces();
1680 if (ch != '(')
1681 goto macro_xxx_syntax;
1682 /* XXX: incorrect if comments : use next_nomacro with a special mode */
1683 inp();
1684 skip_spaces();
1685 if (ch == '\"'){
1686 inp();
1687 p = buf;
1688 while (ch != '\"' && ch != '\n' && ch != CH_EOF) {
1689 if ((p - buf) < sizeof(buf) - 1)
1690 *p++ = ch;
1691 if (ch == CH_EOB) {
1692 --p;
1693 handle_stray();
1694 }else
1695 inp();
1697 if(ch != '\"')
1698 goto macro_xxx_syntax;
1699 *p = '\0';
1700 minp();
1701 next();
1702 }else{
1703 /* computed #pragma macro_xxx for #define xxx */
1704 next();
1705 buf[0] = '\0';
1706 while (tok != ')') {
1707 if (tok != TOK_STR) {
1708 macro_xxx_syntax:
1709 tcc_error("'macro_xxx' expects (\"NAME\")");
1711 pstrcat(buf, sizeof(buf), (char *)tokc.cstr->data);
1712 next();
1715 skip (')');
1716 if(!buf[0])
1717 tcc_error(" empty string in #pragma");
1718 /* find TokenSym */
1719 p = buf;
1720 while (is_space(*p))
1721 p++;
1722 p1 = p;
1723 for(;;){
1724 if (!isidnum_table[p[0] - CH_EOF])
1725 break;
1726 ++p;
1728 len = p - p1;
1729 while (is_space(*p))
1730 p++;
1731 if(!p) //'\0'
1732 tcc_error("unrecognized string: %s", buf);
1733 ts = tok_alloc(p1, len);
1734 if(ts){
1735 def = &ts->sym_define;
1736 if(t == TOK_PUSH_MACRO){
1737 void *tmp = def->data[def->off];
1738 if(tmp){
1739 def->off++;
1740 if(def->off >= def->size){
1741 int size = def->size;
1742 size *= 2;
1743 if (size >= MACRO_STACK_SIZE)
1744 tcc_error("stack full");
1745 def->data = tcc_realloc(def->data, size*sizeof(Sym**));
1746 def->size = size;
1748 def->data[def->off] = tmp;
1750 }else{
1751 if(def->off){
1752 --def->off;
1753 }else{
1754 tcc_warning("stack empty");
1758 }else{
1759 fputs("#pragma ", s1->ppfp);
1760 while (tok != TOK_LINEFEED){
1761 fputs(get_tok_str(tok, &tokc), s1->ppfp);
1762 next();
1764 goto the_end;
1766 break;
1767 default:
1768 if (tok == TOK_LINEFEED || tok == '!' || tok == TOK_PPNUM) {
1769 /* '!' is ignored to allow C scripts. numbers are ignored
1770 to emulate cpp behaviour */
1771 } else {
1772 if (!(saved_parse_flags & PARSE_FLAG_ASM_COMMENTS))
1773 tcc_warning("Ignoring unknown preprocessing directive #%s", get_tok_str(tok, &tokc));
1774 else {
1775 /* this is a gas line comment in an 'S' file. */
1776 file->buf_ptr = parse_line_comment(file->buf_ptr);
1777 goto the_end;
1780 break;
1782 /* ignore other preprocess commands or #! for C scripts */
1783 while (tok != TOK_LINEFEED)
1784 next_nomacro();
1785 the_end:
1786 parse_flags = saved_parse_flags;
1789 /* evaluate escape codes in a string. */
1790 static void parse_escape_string(CString *outstr, const uint8_t *buf, int is_long)
1792 int c, n;
1793 const uint8_t *p;
1795 p = buf;
1796 for(;;) {
1797 c = *p;
1798 if (c == '\0')
1799 break;
1800 if (c == '\\') {
1801 p++;
1802 /* escape */
1803 c = *p;
1804 switch(c) {
1805 case '0': case '1': case '2': case '3':
1806 case '4': case '5': case '6': case '7':
1807 /* at most three octal digits */
1808 n = c - '0';
1809 p++;
1810 c = *p;
1811 if (isoct(c)) {
1812 n = n * 8 + c - '0';
1813 p++;
1814 c = *p;
1815 if (isoct(c)) {
1816 n = n * 8 + c - '0';
1817 p++;
1820 c = n;
1821 goto add_char_nonext;
1822 case 'x':
1823 case 'u':
1824 case 'U':
1825 p++;
1826 n = 0;
1827 for(;;) {
1828 c = *p;
1829 if (c >= 'a' && c <= 'f')
1830 c = c - 'a' + 10;
1831 else if (c >= 'A' && c <= 'F')
1832 c = c - 'A' + 10;
1833 else if (isnum(c))
1834 c = c - '0';
1835 else
1836 break;
1837 n = n * 16 + c;
1838 p++;
1840 c = n;
1841 goto add_char_nonext;
1842 case 'a':
1843 c = '\a';
1844 break;
1845 case 'b':
1846 c = '\b';
1847 break;
1848 case 'f':
1849 c = '\f';
1850 break;
1851 case 'n':
1852 c = '\n';
1853 break;
1854 case 'r':
1855 c = '\r';
1856 break;
1857 case 't':
1858 c = '\t';
1859 break;
1860 case 'v':
1861 c = '\v';
1862 break;
1863 case 'e':
1864 if (!gnu_ext)
1865 goto invalid_escape;
1866 c = 27;
1867 break;
1868 case '\'':
1869 case '\"':
1870 case '\\':
1871 case '?':
1872 break;
1873 default:
1874 invalid_escape:
1875 if (c >= '!' && c <= '~')
1876 tcc_warning("unknown escape sequence: \'\\%c\'", c);
1877 else
1878 tcc_warning("unknown escape sequence: \'\\x%x\'", c);
1879 break;
1882 p++;
1883 add_char_nonext:
1884 if (!is_long)
1885 cstr_ccat(outstr, c);
1886 else
1887 cstr_wccat(outstr, c);
1889 /* add a trailing '\0' */
1890 if (!is_long)
1891 cstr_ccat(outstr, '\0');
1892 else
1893 cstr_wccat(outstr, '\0');
1896 /* parse number in null terminated string 'p' and return it in the
1897 current token */
1898 static void parse_number(const char *p)
1900 int b, t, c;
1902 c = *p++;
1903 t = *p++;
1904 b = 10;
1905 if(c=='.'){
1906 --p;
1907 goto float_frac_parse;
1909 if(c == '0'){
1910 if (t == 'x' || t == 'X') {
1911 b = 16;
1912 c = *p++;
1913 } else if (tcc_ext && (t == 'b' || t == 'B')) {
1914 b = 2;
1915 c = *p++;
1916 }else{
1917 --p;
1919 }else
1920 --p;
1921 if(strchr(p , '.') || (b == 10 && (strchr(p,'e') || strchr(p,'E'))) ||
1922 ((b == 2 || b == 16)&& (strchr(p,'p') || strchr(p,'P')))){
1923 long double ld, sh, fb;
1924 int exp;
1925 /* NOTE: strtox should support that for hexa numbers, but
1926 non ISOC99 libcs do not support it, so we prefer to do
1927 it by hand */
1928 /* hexadecimal or binary floats */
1929 /* XXX: handle overflows */
1930 float_frac_parse:
1931 fb = 1.0L/b;
1932 sh = b;
1933 ld = 0.0;
1935 while(1){
1936 if (c == '\0')
1937 break;
1938 if (c >= 'a' && c <= 'f')
1939 t = c - 'a' + 10;
1940 else if (c >= 'A' && c <= 'F')
1941 t = c - 'A' + 10;
1942 else if(isnum(c))
1943 t = c - '0';
1944 else
1945 break;
1946 if (t >= b)
1947 tcc_error("invalid digit");
1948 ld = ld * b + t;
1949 c = *p++;
1951 if (c == '.'){
1952 c = *p++;
1953 sh = fb;
1954 while (1){
1955 if (c == '\0')
1956 break;
1957 if (c >= 'a' && c <= 'f')
1958 t = c - 'a' + 10;
1959 else if (c >= 'A' && c <= 'F')
1960 t = c - 'A' + 10;
1961 else if (isnum(c))
1962 t =c - '0';
1963 else
1964 break;
1965 if (t >= b){
1966 if(b == 10 && (c == 'e' || c == 'E' || c == 'f' || c == 'F'))
1967 break;
1968 tcc_error("invalid digit");
1970 ld += sh*t;
1971 sh*=fb;
1972 c = *p++;
1975 if ((b == 16 || b == 2) && c != 'p' && c != 'P')
1976 expect("exponent");
1977 if(((c == 'e' || c == 'E') && b == 10) ||
1978 ((c == 'p' || c == 'P') && (b == 16 || b == 2))){
1979 c = *p++;
1980 if(c == '+' || c == '-'){
1981 if (c == '-')
1982 sh = fb;
1983 c = *p++;
1984 }else
1985 sh = b;
1986 if (!isnum(c))
1987 expect("exponent digits");
1988 exp = 0;
1990 exp = exp * 10 + c - '0';
1991 c = *p++;
1992 }while(isnum(c));
1993 while (exp != 0){
1994 if (exp & 1)
1995 ld *= sh;
1996 exp >>= 1;
1997 sh *= sh;
2000 t = toup(c);
2001 if (t == 'F') {
2002 c = *p++;
2003 tok = TOK_CFLOAT;
2004 tokc.f = (float)ld;
2005 } else if (t == 'L') {
2006 c = *p++;
2007 #ifdef TCC_TARGET_PE
2008 tok = TOK_CDOUBLE;
2009 tokc.d = (double)ld;
2010 #else
2011 tok = TOK_CLDOUBLE;
2012 tokc.ld = ld;
2013 #endif
2014 } else {
2015 tok = TOK_CDOUBLE;
2016 tokc.d = (double)ld;
2018 } else {
2019 uint64_t n = 0, n1;
2020 int warn = 1;
2021 int lcount, ucount;
2022 if (b == 10 && c == '0') {
2023 b = 8;
2025 while(1){
2026 if (c == '\0')
2027 break;
2028 if (c >= 'a' && c <= 'f')
2029 t = c - 'a' + 10;
2030 else if (c >= 'A' && c <= 'F')
2031 t = c - 'A' + 10;
2032 else if(isnum(c))
2033 t = c - '0';
2034 else
2035 break;
2036 if (t >= b)
2037 tcc_error("invalid digit");
2038 n1 = n;
2039 n = n * b + t;
2040 if (n < n1 && warn){
2041 tcc_warning("integer constant overflow");
2042 warn = 0;
2044 c = *p++;
2046 /* XXX: not exactly ANSI compliant */
2047 if ((n & 0xffffffff00000000LL) != 0) {
2048 if ((n >> 63) != 0)
2049 tok = TOK_CULLONG;
2050 else
2051 tok = TOK_CLLONG;
2052 } else if (n > 0x7fffffff) {
2053 tok = TOK_CUINT;
2054 } else {
2055 tok = TOK_CINT;
2057 lcount = 0;
2058 ucount = 0;
2059 for(;;) {
2060 t = toup(c);
2061 if (t == 'L') {
2062 if (lcount >= 2)
2063 tcc_error("three 'l's in integer constant");
2064 lcount++;
2065 #if !defined TCC_TARGET_X86_64 || defined TCC_TARGET_PE
2066 if (lcount == 2) {
2067 #endif
2068 if (tok == TOK_CINT)
2069 tok = TOK_CLLONG;
2070 else if (tok == TOK_CUINT)
2071 tok = TOK_CULLONG;
2072 #if !defined TCC_TARGET_X86_64 || defined TCC_TARGET_PE
2074 #endif
2075 c = *p++;
2076 } else if (t == 'U') {
2077 if (ucount >= 1)
2078 tcc_error("two 'u's in integer constant");
2079 ucount++;
2080 if (tok == TOK_CINT)
2081 tok = TOK_CUINT;
2082 else if (tok == TOK_CLLONG)
2083 tok = TOK_CULLONG;
2084 c = *p++;
2085 } else {
2086 break;
2089 if (tok == TOK_CINT || tok == TOK_CUINT)
2090 tokc.ui = n;
2091 else
2092 tokc.ull = n;
2094 if (c)
2095 tcc_error("invalid number\n");
2099 #define PARSE2(c1, tok1, c2, tok2) \
2100 case c1: \
2101 PEEKC(c, p); \
2102 if (c == c2) { \
2103 p++; \
2104 tok = tok2; \
2105 } else { \
2106 tok = tok1; \
2108 break;
2110 /* return next token without macro substitution */
2111 static inline void next_nomacro1(void)
2113 int t, c, is_long;
2114 TokenSym *ts;
2115 uint8_t *p, *p1;
2116 unsigned int h;
2118 p = file->buf_ptr;
2119 redo_no_start:
2120 c = *p;
2121 switch(c) {
2122 case ' ':
2123 case '\t':
2124 tok = c;
2125 p++;
2126 goto keep_tok_flags;
2127 case '\f':
2128 case '\v':
2129 case '\r':
2130 p++;
2131 goto redo_no_start;
2132 case '\\':
2133 /* first look if it is in fact an end of buffer */
2134 if (p >= file->buf_end) {
2135 file->buf_ptr = p;
2136 handle_eob();
2137 p = file->buf_ptr;
2138 if (p >= file->buf_end)
2139 goto parse_eof;
2140 else
2141 goto redo_no_start;
2142 } else {
2143 file->buf_ptr = p;
2144 ch = *p;
2145 handle_stray();
2146 p = file->buf_ptr;
2147 goto redo_no_start;
2149 parse_eof:
2151 TCCState *s1 = tcc_state;
2152 if ((parse_flags & PARSE_FLAG_LINEFEED)
2153 && !(tok_flags & TOK_FLAG_EOF)) {
2154 tok_flags |= TOK_FLAG_EOF;
2155 tok = TOK_LINEFEED;
2156 goto keep_tok_flags;
2157 } else if (!(parse_flags & PARSE_FLAG_PREPROCESS)) {
2158 tok = TOK_EOF;
2159 } else if (s1->ifdef_stack_ptr != file->ifdef_stack_ptr) {
2160 tcc_error("missing #endif");
2161 } else if (s1->include_stack_ptr == s1->include_stack) {
2162 /* no include left : end of file. */
2163 tok = TOK_EOF;
2164 } else {
2165 tok_flags &= ~TOK_FLAG_EOF;
2166 /* pop include file */
2168 /* test if previous '#endif' was after a #ifdef at
2169 start of file */
2170 if (tok_flags & TOK_FLAG_ENDIF) {
2171 #ifdef INC_DEBUG
2172 printf("#endif %s\n", get_tok_str(file->ifndef_macro_saved, NULL));
2173 #endif
2174 add_cached_include(s1, file->filename, file->ifndef_macro_saved);
2175 tok_flags &= ~TOK_FLAG_ENDIF;
2178 /* add end of include file debug info */
2179 if (tcc_state->do_debug) {
2180 put_stabd(N_EINCL, 0, 0);
2182 /* pop include stack */
2183 tcc_close();
2184 s1->include_stack_ptr--;
2185 p = file->buf_ptr;
2186 goto redo_no_start;
2189 break;
2191 case '\n':
2192 file->line_num++;
2193 tok_flags |= TOK_FLAG_BOL;
2194 p++;
2195 maybe_newline:
2196 if (0 == (parse_flags & PARSE_FLAG_LINEFEED))
2197 goto redo_no_start;
2198 tok = TOK_LINEFEED;
2199 goto keep_tok_flags;
2201 case '#':
2202 /* XXX: simplify */
2203 PEEKC(c, p);
2204 if ((tok_flags & TOK_FLAG_BOL) &&
2205 (parse_flags & PARSE_FLAG_PREPROCESS)) {
2206 file->buf_ptr = p;
2207 preprocess(tok_flags & TOK_FLAG_BOF);
2208 p = file->buf_ptr;
2209 goto maybe_newline;
2210 } else {
2211 if (c == '#') {
2212 p++;
2213 tok = TOK_TWOSHARPS;
2214 } else {
2215 if (parse_flags & PARSE_FLAG_ASM_COMMENTS) {
2216 p = parse_line_comment(p - 1);
2217 goto redo_no_start;
2218 } else {
2219 tok = '#';
2223 break;
2225 case 'a': case 'b': case 'c': case 'd':
2226 case 'e': case 'f': case 'g': case 'h':
2227 case 'i': case 'j': case 'k': case 'l':
2228 case 'm': case 'n': case 'o': case 'p':
2229 case 'q': case 'r': case 's': case 't':
2230 case 'u': case 'v': case 'w': case 'x':
2231 case 'y': case 'z':
2232 case 'A': case 'B': case 'C': case 'D':
2233 case 'E': case 'F': case 'G': case 'H':
2234 case 'I': case 'J': case 'K':
2235 case 'M': case 'N': case 'O': case 'P':
2236 case 'Q': case 'R': case 'S': case 'T':
2237 case 'U': case 'V': case 'W': case 'X':
2238 case 'Y': case 'Z':
2239 case '_':
2240 parse_ident_fast:
2241 p1 = p;
2242 h = TOK_HASH_INIT;
2243 h = TOK_HASH_FUNC(h, c);
2244 p++;
2245 for(;;) {
2246 c = *p;
2247 if (!isidnum_table[c-CH_EOF])
2248 break;
2249 h = TOK_HASH_FUNC(h, c);
2250 p++;
2252 if (c != '\\') {
2253 TokenSym **pts;
2254 int len;
2256 /* fast case : no stray found, so we have the full token
2257 and we have already hashed it */
2258 len = p - p1;
2259 h &= (TOK_HASH_SIZE - 1);
2260 pts = &hash_ident[h];
2261 for(;;) {
2262 ts = *pts;
2263 if (!ts)
2264 break;
2265 if (ts->len == len && !memcmp(ts->str, p1, len))
2266 goto token_found;
2267 pts = &(ts->hash_next);
2269 ts = tok_alloc_new(pts, (char *) p1, len);
2270 token_found: ;
2271 } else {
2272 /* slower case */
2273 cstr_reset(&tokcstr);
2275 while (p1 < p) {
2276 cstr_ccat(&tokcstr, *p1);
2277 p1++;
2279 p--;
2280 PEEKC(c, p);
2281 parse_ident_slow:
2282 while (isidnum_table[c-CH_EOF]) {
2283 cstr_ccat(&tokcstr, c);
2284 PEEKC(c, p);
2286 ts = tok_alloc(tokcstr.data, tokcstr.size);
2288 tok = ts->tok;
2289 break;
2290 case 'L':
2291 t = p[1];
2292 if (t != '\\' && t != '\'' && t != '\"') {
2293 /* fast case */
2294 goto parse_ident_fast;
2295 } else {
2296 PEEKC(c, p);
2297 if (c == '\'' || c == '\"') {
2298 is_long = 1;
2299 goto str_const;
2300 } else {
2301 cstr_reset(&tokcstr);
2302 cstr_ccat(&tokcstr, 'L');
2303 goto parse_ident_slow;
2306 break;
2307 case '0': case '1': case '2': case '3':
2308 case '4': case '5': case '6': case '7':
2309 case '8': case '9':
2311 cstr_reset(&tokcstr);
2312 /* after the first digit, accept digits, alpha, '.' or sign if
2313 prefixed by 'eEpP' */
2314 parse_num:
2315 for(;;) {
2316 t = c;
2317 cstr_ccat(&tokcstr, c);
2318 PEEKC(c, p);
2319 if (!(isnum(c) || isid(c) || c == '.' ||
2320 ((c == '+' || c == '-') &&
2321 (t == 'e' || t == 'E' || t == 'p' || t == 'P'))))
2322 break;
2324 /* We add a trailing '\0' to ease parsing */
2325 cstr_ccat(&tokcstr, '\0');
2326 tokc.cstr = &tokcstr;
2327 tok = TOK_PPNUM;
2328 break;
2329 case '.':
2330 /* special dot handling because it can also start a number */
2331 PEEKC(c, p);
2332 if (isnum(c)) {
2333 cstr_reset(&tokcstr);
2334 cstr_ccat(&tokcstr, '.');
2335 goto parse_num;
2336 } else if (c == '.') {
2337 PEEKC(c, p);
2338 if (c != '.')
2339 expect("'.'");
2340 PEEKC(c, p);
2341 tok = TOK_DOTS;
2342 } else {
2343 tok = '.';
2345 break;
2346 case '\'':
2347 case '\"':
2348 is_long = 0;
2349 str_const:
2351 CString str;
2352 int sep;
2354 sep = c;
2356 /* parse the string */
2357 cstr_new(&str);
2358 p = parse_pp_string(p, sep, &str);
2359 cstr_ccat(&str, '\0');
2361 /* eval the escape (should be done as TOK_PPNUM) */
2362 cstr_reset(&tokcstr);
2363 parse_escape_string(&tokcstr, str.data, is_long);
2364 cstr_free(&str);
2366 if (sep == '\'') {
2367 int char_size;
2368 /* XXX: make it portable */
2369 if (!is_long)
2370 char_size = 1;
2371 else
2372 char_size = sizeof(nwchar_t);
2373 if (tokcstr.size <= char_size)
2374 tcc_error("empty character constant");
2375 if (tokcstr.size > 2 * char_size)
2376 tcc_warning("multi-character character constant");
2377 if (!is_long) {
2378 tokc.i = *(int8_t *)tokcstr.data;
2379 tok = TOK_CCHAR;
2380 } else {
2381 tokc.i = *(nwchar_t *)tokcstr.data;
2382 tok = TOK_LCHAR;
2384 } else {
2385 tokc.cstr = &tokcstr;
2386 if (!is_long)
2387 tok = TOK_STR;
2388 else
2389 tok = TOK_LSTR;
2392 break;
2394 case '<':
2395 PEEKC(c, p);
2396 if (c == '=') {
2397 p++;
2398 tok = TOK_LE;
2399 } else if (c == '<') {
2400 PEEKC(c, p);
2401 if (c == '=') {
2402 p++;
2403 tok = TOK_A_SHL;
2404 } else {
2405 tok = TOK_SHL;
2407 } else {
2408 tok = TOK_LT;
2410 break;
2412 case '>':
2413 PEEKC(c, p);
2414 if (c == '=') {
2415 p++;
2416 tok = TOK_GE;
2417 } else if (c == '>') {
2418 PEEKC(c, p);
2419 if (c == '=') {
2420 p++;
2421 tok = TOK_A_SAR;
2422 } else {
2423 tok = TOK_SAR;
2425 } else {
2426 tok = TOK_GT;
2428 break;
2430 case '&':
2431 PEEKC(c, p);
2432 if (c == '&') {
2433 p++;
2434 tok = TOK_LAND;
2435 } else if (c == '=') {
2436 p++;
2437 tok = TOK_A_AND;
2438 } else {
2439 tok = '&';
2441 break;
2443 case '|':
2444 PEEKC(c, p);
2445 if (c == '|') {
2446 p++;
2447 tok = TOK_LOR;
2448 } else if (c == '=') {
2449 p++;
2450 tok = TOK_A_OR;
2451 } else {
2452 tok = '|';
2454 break;
2456 case '+':
2457 PEEKC(c, p);
2458 if (c == '+') {
2459 p++;
2460 tok = TOK_INC;
2461 } else if (c == '=') {
2462 p++;
2463 tok = TOK_A_ADD;
2464 } else {
2465 tok = '+';
2467 break;
2469 case '-':
2470 PEEKC(c, p);
2471 if (c == '-') {
2472 p++;
2473 tok = TOK_DEC;
2474 } else if (c == '=') {
2475 p++;
2476 tok = TOK_A_SUB;
2477 } else if (c == '>') {
2478 p++;
2479 tok = TOK_ARROW;
2480 } else {
2481 tok = '-';
2483 break;
2485 PARSE2('!', '!', '=', TOK_NE)
2486 PARSE2('=', '=', '=', TOK_EQ)
2487 PARSE2('*', '*', '=', TOK_A_MUL)
2488 PARSE2('%', '%', '=', TOK_A_MOD)
2489 PARSE2('^', '^', '=', TOK_A_XOR)
2491 /* comments or operator */
2492 case '/':
2493 PEEKC(c, p);
2494 if (c == '*') {
2495 p = parse_comment(p);
2496 /* comments replaced by a blank */
2497 tok = ' ';
2498 goto keep_tok_flags;
2499 } else if (c == '/') {
2500 p = parse_line_comment(p);
2501 tok = ' ';
2502 goto keep_tok_flags;
2503 } else if (c == '=') {
2504 p++;
2505 tok = TOK_A_DIV;
2506 } else {
2507 tok = '/';
2509 break;
2511 /* simple tokens */
2512 case '(':
2513 case ')':
2514 case '[':
2515 case ']':
2516 case '{':
2517 case '}':
2518 case ',':
2519 case ';':
2520 case ':':
2521 case '?':
2522 case '~':
2523 case '$': /* only used in assembler */
2524 case '@': /* dito */
2525 tok = c;
2526 p++;
2527 break;
2528 default:
2529 tcc_error("unrecognized character \\x%02x", c);
2530 break;
2532 tok_flags = 0;
2533 keep_tok_flags:
2534 file->buf_ptr = p;
2535 #if defined(PARSE_DEBUG)
2536 printf("token = %s\n", get_tok_str(tok, &tokc));
2537 #endif
2540 /* return next token without macro substitution. Can read input from
2541 macro_ptr buffer */
2542 static void next_nomacro_spc(void)
2544 if (macro_ptr) {
2545 redo:
2546 tok = *macro_ptr;
2547 if (tok) {
2548 TOK_GET(&tok, &macro_ptr, &tokc);
2549 if (tok == TOK_LINENUM) {
2550 file->line_num = tokc.i;
2551 goto redo;
2554 } else {
2555 next_nomacro1();
2559 ST_FUNC void next_nomacro(void)
2561 do {
2562 next_nomacro_spc();
2563 } while (is_space(tok));
2566 /* substitute arguments in replacement lists in macro_str by the values in
2567 args (field d) and return allocated string */
2568 static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args)
2570 int last_tok, t, spc;
2571 const int *st;
2572 Sym *s;
2573 CValue cval;
2574 TokenString str;
2575 CString cstr;
2577 tok_str_new(&str);
2578 last_tok = 0;
2579 while(1) {
2580 TOK_GET(&t, &macro_str, &cval);
2581 if (!t)
2582 break;
2583 if (t == '#') {
2584 /* stringize */
2585 TOK_GET(&t, &macro_str, &cval);
2586 if (!t)
2587 break;
2588 s = sym_find2(args, t);
2589 if (s) {
2590 cstr_new(&cstr);
2591 st = s->d;
2592 spc = 0;
2593 while (*st) {
2594 TOK_GET(&t, &st, &cval);
2595 if (!check_space(t, &spc))
2596 cstr_cat(&cstr, get_tok_str(t, &cval));
2598 cstr.size -= spc;
2599 cstr_ccat(&cstr, '\0');
2600 #ifdef PP_DEBUG
2601 printf("stringize: %s\n", (char *)cstr.data);
2602 #endif
2603 /* add string */
2604 cval.cstr = &cstr;
2605 tok_str_add2(&str, TOK_STR, &cval);
2606 cstr_free(&cstr);
2607 } else {
2608 tok_str_add2(&str, t, &cval);
2610 } else if (t >= TOK_IDENT) {
2611 s = sym_find2(args, t);
2612 if (s) {
2613 st = s->d;
2614 /* if '##' is present before or after, no arg substitution */
2615 if (*macro_str == TOK_TWOSHARPS || last_tok == TOK_TWOSHARPS) {
2616 /* special case for var arg macros : ## eats the
2617 ',' if empty VA_ARGS variable. */
2618 /* XXX: test of the ',' is not 100%
2619 reliable. should fix it to avoid security
2620 problems */
2621 if (gnu_ext && s->type.t &&
2622 last_tok == TOK_TWOSHARPS &&
2623 str.len >= 2 && str.str[str.len - 2] == ',') {
2624 if (*st == TOK_PLCHLDR) {
2625 /* suppress ',' '##' */
2626 str.len -= 2;
2627 } else {
2628 /* suppress '##' and add variable */
2629 str.len--;
2630 goto add_var;
2632 } else {
2633 int t1;
2634 add_var:
2635 for(;;) {
2636 TOK_GET(&t1, &st, &cval);
2637 if (!t1)
2638 break;
2639 tok_str_add2(&str, t1, &cval);
2642 } else {
2643 /* NOTE: the stream cannot be read when macro
2644 substituing an argument */
2645 macro_subst(&str, nested_list, st, NULL);
2647 } else {
2648 tok_str_add(&str, t);
2650 } else {
2651 tok_str_add2(&str, t, &cval);
2653 last_tok = t;
2655 tok_str_add(&str, 0);
2656 return str.str;
2659 static char const ab_month_name[12][4] =
2661 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
2662 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
2665 /* do macro substitution of current token with macro 's' and add
2666 result to (tok_str,tok_len). 'nested_list' is the list of all
2667 macros we got inside to avoid recursing. Return non zero if no
2668 substitution needs to be done */
2669 static int macro_subst_tok(TokenString *tok_str,
2670 Sym **nested_list, Sym *s, struct macro_level **can_read_stream)
2672 Sym *args, *sa, *sa1;
2673 int mstr_allocated, parlevel, *mstr, t, t1, spc;
2674 const int *p;
2675 TokenString str;
2676 char *cstrval;
2677 CValue cval;
2678 CString cstr;
2679 char buf[32];
2681 /* if symbol is a macro, prepare substitution */
2682 /* special macros */
2683 if (tok == TOK___LINE__) {
2684 snprintf(buf, sizeof(buf), "%d", file->line_num);
2685 cstrval = buf;
2686 t1 = TOK_PPNUM;
2687 goto add_cstr1;
2688 } else if (tok == TOK___FILE__) {
2689 cstrval = file->filename;
2690 goto add_cstr;
2691 } else if (tok == TOK___DATE__ || tok == TOK___TIME__) {
2692 time_t ti;
2693 struct tm *tm;
2695 time(&ti);
2696 tm = localtime(&ti);
2697 if (tok == TOK___DATE__) {
2698 snprintf(buf, sizeof(buf), "%s %2d %d",
2699 ab_month_name[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900);
2700 } else {
2701 snprintf(buf, sizeof(buf), "%02d:%02d:%02d",
2702 tm->tm_hour, tm->tm_min, tm->tm_sec);
2704 cstrval = buf;
2705 add_cstr:
2706 t1 = TOK_STR;
2707 add_cstr1:
2708 cstr_new(&cstr);
2709 cstr_cat(&cstr, cstrval);
2710 cstr_ccat(&cstr, '\0');
2711 cval.cstr = &cstr;
2712 tok_str_add2(tok_str, t1, &cval);
2713 cstr_free(&cstr);
2714 } else {
2715 mstr = s->d;
2716 mstr_allocated = 0;
2717 if (s->type.t == MACRO_FUNC) {
2718 /* NOTE: we do not use next_nomacro to avoid eating the
2719 next token. XXX: find better solution */
2720 redo:
2721 if (macro_ptr) {
2722 p = macro_ptr;
2723 while (is_space(t = *p) || TOK_LINEFEED == t)
2724 ++p;
2725 if (t == 0 && can_read_stream) {
2726 /* end of macro stream: we must look at the token
2727 after in the file */
2728 struct macro_level *ml = *can_read_stream;
2729 macro_ptr = NULL;
2730 if (ml)
2732 macro_ptr = ml->p;
2733 ml->p = NULL;
2734 *can_read_stream = ml -> prev;
2736 /* also, end of scope for nested defined symbol */
2737 (*nested_list)->v = -1;
2738 goto redo;
2740 } else {
2741 ch = file->buf_ptr[0];
2742 while (is_space(ch) || ch == '\n' || ch == '/')
2744 if (ch == '/')
2746 int c;
2747 uint8_t *p = file->buf_ptr;
2748 PEEKC(c, p);
2749 if (c == '*') {
2750 p = parse_comment(p);
2751 file->buf_ptr = p - 1;
2752 } else if (c == '/') {
2753 p = parse_line_comment(p);
2754 file->buf_ptr = p - 1;
2755 } else
2756 break;
2758 cinp();
2760 t = ch;
2762 if (t != '(') /* no macro subst */
2763 return -1;
2765 /* argument macro */
2766 next_nomacro();
2767 next_nomacro();
2768 args = NULL;
2769 sa = s->next;
2770 /* NOTE: empty args are allowed, except if no args */
2771 for(;;) {
2772 /* handle '()' case */
2773 if (!args && !sa && tok == ')')
2774 break;
2775 if (!sa)
2776 tcc_error("macro '%s' used with too many args",
2777 get_tok_str(s->v, 0));
2778 tok_str_new(&str);
2779 parlevel = spc = 0;
2780 /* NOTE: non zero sa->t indicates VA_ARGS */
2781 while ((parlevel > 0 ||
2782 (tok != ')' &&
2783 (tok != ',' || sa->type.t))) &&
2784 tok != -1) {
2785 if (tok == '(')
2786 parlevel++;
2787 else if (tok == ')')
2788 parlevel--;
2789 if (tok == TOK_LINEFEED)
2790 tok = ' ';
2791 if (!check_space(tok, &spc))
2792 tok_str_add2(&str, tok, &tokc);
2793 next_nomacro_spc();
2795 if (!str.len)
2796 tok_str_add(&str, TOK_PLCHLDR);
2797 str.len -= spc;
2798 tok_str_add(&str, 0);
2799 sa1 = sym_push2(&args, sa->v & ~SYM_FIELD, sa->type.t, 0);
2800 sa1->d = str.str;
2801 sa = sa->next;
2802 if (tok == ')') {
2803 /* special case for gcc var args: add an empty
2804 var arg argument if it is omitted */
2805 if (sa && sa->type.t && gnu_ext)
2806 continue;
2807 else
2808 break;
2810 if (tok != ',')
2811 expect(",");
2812 next_nomacro();
2814 if (sa) {
2815 tcc_error("macro '%s' used with too few args",
2816 get_tok_str(s->v, 0));
2819 /* now subst each arg */
2820 mstr = macro_arg_subst(nested_list, mstr, args);
2821 /* free memory */
2822 sa = args;
2823 while (sa) {
2824 sa1 = sa->prev;
2825 tok_str_free(sa->d);
2826 sym_free(sa);
2827 sa = sa1;
2829 mstr_allocated = 1;
2831 sym_push2(nested_list, s->v, 0, 0);
2832 macro_subst(tok_str, nested_list, mstr, can_read_stream);
2833 /* pop nested defined symbol */
2834 sa1 = *nested_list;
2835 *nested_list = sa1->prev;
2836 sym_free(sa1);
2837 if (mstr_allocated)
2838 tok_str_free(mstr);
2840 return 0;
2843 /* handle the '##' operator. Return NULL if no '##' seen. Otherwise
2844 return the resulting string (which must be freed). */
2845 static inline int *macro_twosharps(const int *macro_str)
2847 const int *ptr;
2848 int t;
2849 TokenString macro_str1;
2850 CString cstr;
2851 int n, start_of_nosubsts;
2853 /* we search the first '##' */
2854 for(ptr = macro_str;;) {
2855 CValue cval;
2856 TOK_GET(&t, &ptr, &cval);
2857 if (t == TOK_TWOSHARPS)
2858 break;
2859 /* nothing more to do if end of string */
2860 if (t == 0)
2861 return NULL;
2864 /* we saw '##', so we need more processing to handle it */
2865 start_of_nosubsts = -1;
2866 tok_str_new(&macro_str1);
2867 for(ptr = macro_str;;) {
2868 TOK_GET(&tok, &ptr, &tokc);
2869 if (tok == 0)
2870 break;
2871 if (tok == TOK_TWOSHARPS)
2872 continue;
2873 if (tok == TOK_NOSUBST && start_of_nosubsts < 0)
2874 start_of_nosubsts = macro_str1.len;
2875 while (*ptr == TOK_TWOSHARPS) {
2876 /* given 'a##b', remove nosubsts preceding 'a' */
2877 if (start_of_nosubsts >= 0)
2878 macro_str1.len = start_of_nosubsts;
2879 /* given 'a##b', skip '##' */
2880 t = *++ptr;
2881 /* given 'a##b', remove nosubsts preceding 'b' */
2882 while (t == TOK_NOSUBST)
2883 t = *++ptr;
2884 if (t && t != TOK_TWOSHARPS) {
2885 CValue cval;
2886 TOK_GET(&t, &ptr, &cval);
2887 /* We concatenate the two tokens */
2888 cstr_new(&cstr);
2889 if (tok != TOK_PLCHLDR)
2890 cstr_cat(&cstr, get_tok_str(tok, &tokc));
2891 n = cstr.size;
2892 if (t != TOK_PLCHLDR || tok == TOK_PLCHLDR)
2893 cstr_cat(&cstr, get_tok_str(t, &cval));
2894 cstr_ccat(&cstr, '\0');
2896 tcc_open_bf(tcc_state, ":paste:", cstr.size);
2897 memcpy(file->buffer, cstr.data, cstr.size);
2898 for (;;) {
2899 next_nomacro1();
2900 if (0 == *file->buf_ptr)
2901 break;
2902 tok_str_add2(&macro_str1, tok, &tokc);
2903 tcc_warning("pasting \"%.*s\" and \"%s\" does not give a valid preprocessing token",
2904 n, cstr.data, (char*)cstr.data + n);
2906 tcc_close();
2907 cstr_free(&cstr);
2910 if (tok != TOK_NOSUBST) {
2911 tok_str_add2(&macro_str1, tok, &tokc);
2912 tok = ' ';
2913 start_of_nosubsts = -1;
2915 tok_str_add2(&macro_str1, tok, &tokc);
2917 tok_str_add(&macro_str1, 0);
2918 return macro_str1.str;
2922 /* do macro substitution of macro_str and add result to
2923 (tok_str,tok_len). 'nested_list' is the list of all macros we got
2924 inside to avoid recursing. */
2925 static void macro_subst(TokenString *tok_str, Sym **nested_list,
2926 const int *macro_str, struct macro_level ** can_read_stream)
2928 Sym *s;
2929 int *macro_str1;
2930 const int *ptr;
2931 int t, ret, spc;
2932 CValue cval;
2933 struct macro_level ml;
2934 int force_blank;
2936 /* first scan for '##' operator handling */
2937 ptr = macro_str;
2938 macro_str1 = macro_twosharps(ptr);
2940 if (macro_str1)
2941 ptr = macro_str1;
2942 spc = 0;
2943 force_blank = 0;
2945 while (1) {
2946 /* NOTE: ptr == NULL can only happen if tokens are read from
2947 file stream due to a macro function call */
2948 if (ptr == NULL)
2949 break;
2950 TOK_GET(&t, &ptr, &cval);
2951 if (t == 0)
2952 break;
2953 if (t == TOK_NOSUBST) {
2954 /* following token has already been subst'd. just copy it on */
2955 tok_str_add2(tok_str, TOK_NOSUBST, NULL);
2956 TOK_GET(&t, &ptr, &cval);
2957 goto no_subst;
2959 s = define_find(t);
2960 if (s != NULL) {
2961 /* if nested substitution, do nothing */
2962 if (sym_find2(*nested_list, t)) {
2963 /* and mark it as TOK_NOSUBST, so it doesn't get subst'd again */
2964 tok_str_add2(tok_str, TOK_NOSUBST, NULL);
2965 goto no_subst;
2967 ml.p = macro_ptr;
2968 if (can_read_stream)
2969 ml.prev = *can_read_stream, *can_read_stream = &ml;
2970 macro_ptr = (int *)ptr;
2971 tok = t;
2972 ret = macro_subst_tok(tok_str, nested_list, s, can_read_stream);
2973 ptr = (int *)macro_ptr;
2974 macro_ptr = ml.p;
2975 if (can_read_stream && *can_read_stream == &ml)
2976 *can_read_stream = ml.prev;
2977 if (ret != 0)
2978 goto no_subst;
2979 if (parse_flags & PARSE_FLAG_SPACES)
2980 force_blank = 1;
2981 } else {
2982 no_subst:
2983 if (force_blank) {
2984 tok_str_add(tok_str, ' ');
2985 spc = 1;
2986 force_blank = 0;
2988 if (!check_space(t, &spc))
2989 tok_str_add2(tok_str, t, &cval);
2992 if (macro_str1)
2993 tok_str_free(macro_str1);
2996 /* return next token with macro substitution */
2997 ST_FUNC void next(void)
2999 Sym *nested_list, *s;
3000 TokenString str;
3001 struct macro_level *ml;
3003 redo:
3004 if (parse_flags & PARSE_FLAG_SPACES)
3005 next_nomacro_spc();
3006 else
3007 next_nomacro();
3008 if (!macro_ptr) {
3009 /* if not reading from macro substituted string, then try
3010 to substitute macros */
3011 if (tok >= TOK_IDENT &&
3012 (parse_flags & PARSE_FLAG_PREPROCESS)) {
3013 s = define_find(tok);
3014 if (s) {
3015 /* we have a macro: we try to substitute */
3016 tok_str_new(&str);
3017 nested_list = NULL;
3018 ml = NULL;
3019 if (macro_subst_tok(&str, &nested_list, s, &ml) == 0) {
3020 /* substitution done, NOTE: maybe empty */
3021 tok_str_add(&str, 0);
3022 macro_ptr = str.str;
3023 macro_ptr_allocated = str.str;
3024 goto redo;
3028 } else {
3029 if (tok == 0) {
3030 /* end of macro or end of unget buffer */
3031 if (unget_buffer_enabled) {
3032 macro_ptr = unget_saved_macro_ptr;
3033 unget_buffer_enabled = 0;
3034 } else {
3035 /* end of macro string: free it */
3036 tok_str_free(macro_ptr_allocated);
3037 macro_ptr_allocated = NULL;
3038 macro_ptr = NULL;
3040 goto redo;
3041 } else if (tok == TOK_NOSUBST) {
3042 /* discard preprocessor's nosubst markers */
3043 goto redo;
3047 /* convert preprocessor tokens into C tokens */
3048 if (tok == TOK_PPNUM &&
3049 (parse_flags & PARSE_FLAG_TOK_NUM)) {
3050 parse_number((char *)tokc.cstr->data);
3054 /* push back current token and set current token to 'last_tok'. Only
3055 identifier case handled for labels. */
3056 ST_INLN void unget_tok(int last_tok)
3058 int i, n;
3059 int *q;
3060 if (unget_buffer_enabled)
3062 /* assert(macro_ptr == unget_saved_buffer + 1);
3063 assert(*macro_ptr == 0); */
3065 else
3067 unget_saved_macro_ptr = macro_ptr;
3068 unget_buffer_enabled = 1;
3070 q = unget_saved_buffer;
3071 macro_ptr = q;
3072 *q++ = tok;
3073 n = tok_ext_size(tok) - 1;
3074 for(i=0;i<n;i++)
3075 *q++ = tokc.tab[i];
3076 *q = 0; /* end of token string */
3077 tok = last_tok;
3081 /* better than nothing, but needs extension to handle '-E' option
3082 correctly too */
3083 ST_FUNC void preprocess_init(TCCState *s1)
3085 s1->include_stack_ptr = s1->include_stack;
3086 /* XXX: move that before to avoid having to initialize
3087 file->ifdef_stack_ptr ? */
3088 s1->ifdef_stack_ptr = s1->ifdef_stack;
3089 file->ifdef_stack_ptr = s1->ifdef_stack_ptr;
3091 vtop = vstack - 1;
3092 s1->pack_stack[0] = 0;
3093 s1->pack_stack_ptr = s1->pack_stack;
3096 ST_FUNC void preprocess_new(void)
3098 int i, c;
3099 const char *p, *r;
3101 /* init isid table */
3102 for(i=CH_EOF;i<256;i++)
3103 isidnum_table[i-CH_EOF] = isid(i) || isnum(i);
3105 /* add all tokens */
3106 table_ident = NULL;
3107 memset(hash_ident, 0, TOK_HASH_SIZE * sizeof(TokenSym *));
3109 tok_ident = TOK_IDENT;
3110 p = tcc_keywords;
3111 while (*p) {
3112 r = p;
3113 for(;;) {
3114 c = *r++;
3115 if (c == '\0')
3116 break;
3118 tok_alloc(p, r - p - 1);
3119 p = r;
3123 /* Preprocess the current file */
3124 ST_FUNC int tcc_preprocess(TCCState *s1)
3126 Sym *define_start;
3128 BufferedFile *file_ref, **iptr, **iptr_new;
3129 int token_seen, line_ref, d;
3130 const char *s;
3132 preprocess_init(s1);
3133 define_start = define_stack;
3134 ch = file->buf_ptr[0];
3135 tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF;
3136 parse_flags = PARSE_FLAG_ASM_COMMENTS | PARSE_FLAG_PREPROCESS |
3137 PARSE_FLAG_LINEFEED | PARSE_FLAG_SPACES;
3138 token_seen = 0;
3139 line_ref = 0;
3140 file_ref = NULL;
3141 iptr = s1->include_stack_ptr;
3142 tok = TOK_LINEFEED; /* print line */
3143 goto print_line;
3144 for (;;) {
3145 next();
3146 if (tok == TOK_EOF) {
3147 break;
3148 } else if (file != file_ref) {
3149 goto print_line;
3150 } else if (tok == TOK_LINEFEED) {
3151 if (token_seen)
3152 continue;
3153 ++line_ref;
3154 token_seen = 1;
3155 } else if (token_seen) {
3156 d = file->line_num - line_ref;
3157 if (file != file_ref || d < 0 || d >= 8) {
3158 print_line:
3159 iptr_new = s1->include_stack_ptr;
3160 s = iptr_new > iptr ? " 1"
3161 : iptr_new < iptr ? " 2"
3162 : iptr_new > s1->include_stack ? " 3"
3163 : "";
3164 iptr = iptr_new;
3165 fprintf(s1->ppfp, "# %d \"%s\"%s\n", file->line_num, file->filename, s);
3166 } else {
3167 while (d)
3168 fputs("\n", s1->ppfp), --d;
3170 line_ref = (file_ref = file)->line_num;
3171 token_seen = tok == TOK_LINEFEED;
3172 if (token_seen)
3173 continue;
3175 fputs(get_tok_str(tok, &tokc), s1->ppfp);
3177 free_defines(define_start);
3178 return 0;