Redo "fix line number in macro redefined message"
[tinycc.git] / tccpp.c
blob024f9de9b3ba5c5a35c350170482611b19d26f3b
1 /*
2 * TCC - Tiny C Compiler
3 *
4 * Copyright (c) 2001-2004 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "tcc.h"
23 /********************************************************/
24 /* global variables */
26 ST_DATA int tok_flags;
27 ST_DATA int parse_flags;
29 ST_DATA struct BufferedFile *file;
30 ST_DATA int ch, tok;
31 ST_DATA CValue tokc;
32 ST_DATA const int *macro_ptr;
33 ST_DATA CString tokcstr; /* current parsed string, if any */
35 /* display benchmark infos */
36 ST_DATA int total_lines;
37 ST_DATA int total_bytes;
38 ST_DATA int tok_ident;
39 ST_DATA TokenSym **table_ident;
41 ST_DATA TinyAlloc *toksym_alloc;
42 ST_DATA TinyAlloc *tokstr_alloc;
43 ST_DATA TinyAlloc *cstr_alloc;
45 /* ------------------------------------------------------------------------- */
47 static TokenSym *hash_ident[TOK_HASH_SIZE];
48 static char token_buf[STRING_MAX_SIZE + 1];
49 static CString cstr_buf;
50 static TokenString tokstr_buf;
51 static unsigned char isidnum_table[256 - CH_EOF];
52 static int pp_debug_tok, pp_debug_symv;
53 static void tok_print(const char *msg, const int *str);
55 /* isidnum_table flags: */
56 #define IS_SPC 1
57 #define IS_ID 2
58 #define IS_NUM 4
60 static TokenString *macro_stack;
62 static const char tcc_keywords[] =
63 #define DEF(id, str) str "\0"
64 #include "tcctok.h"
65 #undef DEF
68 /* WARNING: the content of this string encodes token numbers */
69 static const unsigned char tok_two_chars[] =
70 /* outdated -- gr
71 "<=\236>=\235!=\225&&\240||\241++\244--\242==\224<<\1>>\2+=\253"
72 "-=\255*=\252/=\257%=\245&=\246^=\336|=\374->\313..\250##\266";
73 */{
74 '<','=', TOK_LE,
75 '>','=', TOK_GE,
76 '!','=', TOK_NE,
77 '&','&', TOK_LAND,
78 '|','|', TOK_LOR,
79 '+','+', TOK_INC,
80 '-','-', TOK_DEC,
81 '=','=', TOK_EQ,
82 '<','<', TOK_SHL,
83 '>','>', TOK_SAR,
84 '+','=', TOK_A_ADD,
85 '-','=', TOK_A_SUB,
86 '*','=', TOK_A_MUL,
87 '/','=', TOK_A_DIV,
88 '%','=', TOK_A_MOD,
89 '&','=', TOK_A_AND,
90 '^','=', TOK_A_XOR,
91 '|','=', TOK_A_OR,
92 '-','>', TOK_ARROW,
93 '.','.', 0xa8, // C++ token ?
94 '#','#', TOK_TWOSHARPS,
98 static void next_nomacro_spc(void);
100 ST_FUNC void skip(int c)
102 if (tok != c)
103 tcc_error("'%c' expected (got \"%s\")", c, get_tok_str(tok, &tokc));
104 next();
107 ST_FUNC void expect(const char *msg)
109 tcc_error("%s expected", msg);
112 ST_FUNC void begin_macro(TokenString *str, int alloc)
114 str->alloc = alloc;
115 str->prev = macro_stack;
116 str->prev_ptr = macro_ptr;
117 macro_ptr = str->str;
118 macro_stack = str;
121 ST_FUNC void end_macro(void)
123 TokenString *str = macro_stack;
124 macro_stack = str->prev;
125 macro_ptr = str->prev_ptr;
126 if (str->alloc == 2) {
127 str->alloc = 3; /* just mark as finished */
128 } else {
129 tok_str_free(str->str);
130 if (str->alloc == 1)
131 tcc_free(str);
135 ST_FUNC char *trimfront(char *p)
137 while (*p && (unsigned char)*p <= ' ')
138 ++p;
139 return p;
142 ST_FUNC char *trimback(char *a, char *e)
144 while (e > a && (unsigned char)e[-1] <= ' ')
145 --e;
146 *e = 0;;
147 return a;
150 /* ------------------------------------------------------------------------- */
151 /* Custom allocator for tiny objects */
153 ST_FUNC TinyAlloc *tal_new(TinyAlloc **pal, size_t limit, size_t size)
155 TinyAlloc *al = tcc_mallocz(sizeof(TinyAlloc));
156 al->p = al->buffer = tcc_malloc(size);
157 al->limit = limit;
158 al->size = size;
159 if (pal) *pal = al;
160 return al;
163 ST_FUNC void tal_delete(TinyAlloc *al)
165 TinyAlloc *next;
167 tail_call:
168 if (!al)
169 return;
170 #ifdef TAL_INFO
171 fprintf(stderr, "limit=%5d, size=%5g MB, nb_peak=%6d, nb_total=%8d, nb_missed=%6d, usage=%5.1f%%\n",
172 al->limit, al->size / 1024.0 / 1024.0, al->nb_peak, al->nb_total, al->nb_missed,
173 (al->peak_p - al->buffer) * 100.0 / al->size);
174 #endif
175 #ifdef TAL_DEBUG
176 if (al->nb_allocs > 0) {
177 fprintf(stderr, "TAL_DEBUG: mem leak %d chunks (limit= %d)\n",
178 al->nb_allocs, al->limit);
179 uint8_t *p = al->buffer;
180 while (p < al->p) {
181 tal_header_t *header = (tal_header_t *)p;
182 if (header->line_num > 0) {
183 fprintf(stderr, " file %s, line %u: %u bytes\n",
184 header->file_name, header->line_num, header->size);
186 p += header->size + sizeof(tal_header_t);
189 #endif
190 next = al->next;
191 tcc_free(al->buffer);
192 tcc_free(al);
193 al = next;
194 goto tail_call;
197 ST_FUNC void tal_free_impl(TinyAlloc *al, void *p TAL_DEBUG_PARAMS)
199 if (!p)
200 return;
201 tail_call:
202 if (al->buffer <= (uint8_t *)p && (uint8_t *)p < al->buffer + al->size) {
203 #ifdef TAL_DEBUG
204 tal_header_t *header = (((tal_header_t *)p) - 1);
205 if (header->line_num < 0) {
206 fprintf(stderr, "TAL_DEBUG: file %s, line %u double frees chunk from\n",
207 file, line);
208 fprintf(stderr, " file %s, line %u: %u bytes\n",
209 header->file_name, -header->line_num, header->size);
210 } else
211 header->line_num = -header->line_num;
212 #endif
213 al->nb_allocs--;
214 if (!al->nb_allocs)
215 al->p = al->buffer;
216 } else if (al->next) {
217 al = al->next;
218 goto tail_call;
220 else
221 tcc_free(p);
224 ST_FUNC void *tal_realloc_impl(TinyAlloc **pal, void *p, size_t size TAL_DEBUG_PARAMS)
226 tal_header_t *header;
227 void *ret;
228 int is_own;
229 size_t adj_size = (size + 3) & -4;
230 TinyAlloc *al = *pal;
232 tail_call:
233 is_own = (al->buffer <= (uint8_t *)p && (uint8_t *)p < al->buffer + al->size);
234 if ((!p || is_own) && size <= al->limit) {
235 if (al->p + adj_size + sizeof(tal_header_t) < al->buffer + al->size) {
236 header = (tal_header_t *)al->p;
237 header->size = adj_size;
238 #ifdef TAL_DEBUG
239 int ofs = strlen(file) - TAL_DEBUG_FILE_LEN;
240 strncpy(header->file_name, file + (ofs > 0 ? ofs : 0), TAL_DEBUG_FILE_LEN);
241 header->file_name[TAL_DEBUG_FILE_LEN] = 0;
242 header->line_num = line;
243 #endif
244 ret = al->p + sizeof(tal_header_t);
245 al->p += adj_size + sizeof(tal_header_t);
246 if (is_own) {
247 header = (((tal_header_t *)p) - 1);
248 memcpy(ret, p, header->size);
249 #ifdef TAL_DEBUG
250 header->line_num = -header->line_num;
251 #endif
252 } else {
253 al->nb_allocs++;
255 #ifdef TAL_INFO
256 if (al->nb_peak < al->nb_allocs)
257 al->nb_peak = al->nb_allocs;
258 if (al->peak_p < al->p)
259 al->peak_p = al->p;
260 al->nb_total++;
261 #endif
262 return ret;
263 } else if (is_own) {
264 al->nb_allocs--;
265 ret = tal_realloc(*pal, 0, size);
266 header = (((tal_header_t *)p) - 1);
267 memcpy(ret, p, header->size);
268 #ifdef TAL_DEBUG
269 header->line_num = -header->line_num;
270 #endif
271 return ret;
273 if (al->next) {
274 al = al->next;
275 } else {
276 TinyAlloc *bottom = al, *next = al->top ? al->top : al;
278 al = tal_new(pal, next->limit, next->size * 2);
279 al->next = next;
280 bottom->top = al;
282 goto tail_call;
284 if (is_own) {
285 al->nb_allocs--;
286 ret = tcc_malloc(size);
287 header = (((tal_header_t *)p) - 1);
288 memcpy(ret, p, header->size);
289 #ifdef TAL_DEBUG
290 header->line_num = -header->line_num;
291 #endif
292 } else if (al->next) {
293 al = al->next;
294 goto tail_call;
295 } else
296 ret = tcc_realloc(p, size);
297 #ifdef TAL_INFO
298 al->nb_missed++;
299 #endif
300 return ret;
303 /* ------------------------------------------------------------------------- */
304 /* CString handling */
305 static void cstr_realloc(CString *cstr, int new_size)
307 int size;
308 void *data;
310 size = cstr->size_allocated;
311 if (size < 8)
312 size = 8; /* no need to allocate a too small first string */
313 while (size < new_size)
314 size = size * 2;
315 data = tal_realloc(cstr_alloc, cstr->data_allocated, size);
316 cstr->data_allocated = data;
317 cstr->size_allocated = size;
318 cstr->data = data;
321 /* add a byte */
322 ST_FUNC void cstr_ccat(CString *cstr, int ch)
324 int size;
325 size = cstr->size + 1;
326 if (size > cstr->size_allocated)
327 cstr_realloc(cstr, size);
328 ((unsigned char *)cstr->data)[size - 1] = ch;
329 cstr->size = size;
332 ST_FUNC void cstr_cat(CString *cstr, const char *str, int len)
334 int size;
335 if (len <= 0)
336 len = strlen(str) + 1 + len;
337 size = cstr->size + len;
338 if (size > cstr->size_allocated)
339 cstr_realloc(cstr, size);
340 memmove(((unsigned char *)cstr->data) + cstr->size, str, len);
341 cstr->size = size;
344 /* add a wide char */
345 ST_FUNC void cstr_wccat(CString *cstr, int ch)
347 int size;
348 size = cstr->size + sizeof(nwchar_t);
349 if (size > cstr->size_allocated)
350 cstr_realloc(cstr, size);
351 *(nwchar_t *)(((unsigned char *)cstr->data) + size - sizeof(nwchar_t)) = ch;
352 cstr->size = size;
355 ST_FUNC void cstr_new(CString *cstr)
357 memset(cstr, 0, sizeof(CString));
360 /* free string and reset it to NULL */
361 ST_FUNC void cstr_free(CString *cstr)
363 tal_free(cstr_alloc, cstr->data_allocated);
364 cstr_new(cstr);
367 /* reset string to empty */
368 ST_FUNC void cstr_reset(CString *cstr)
370 cstr->size = 0;
373 /* XXX: unicode ? */
374 static void add_char(CString *cstr, int c)
376 if (c == '\'' || c == '\"' || c == '\\') {
377 /* XXX: could be more precise if char or string */
378 cstr_ccat(cstr, '\\');
380 if (c >= 32 && c <= 126) {
381 cstr_ccat(cstr, c);
382 } else {
383 cstr_ccat(cstr, '\\');
384 if (c == '\n') {
385 cstr_ccat(cstr, 'n');
386 } else {
387 cstr_ccat(cstr, '0' + ((c >> 6) & 7));
388 cstr_ccat(cstr, '0' + ((c >> 3) & 7));
389 cstr_ccat(cstr, '0' + (c & 7));
394 /* ------------------------------------------------------------------------- */
395 /* allocate a new token */
396 static TokenSym *tok_alloc_new(TokenSym **pts, const char *str, int len)
398 TokenSym *ts, **ptable;
399 int i;
401 if (tok_ident >= SYM_FIRST_ANOM)
402 tcc_error("memory full (symbols)");
404 /* expand token table if needed */
405 i = tok_ident - TOK_IDENT;
406 if ((i % TOK_ALLOC_INCR) == 0) {
407 ptable = tcc_realloc(table_ident, (i + TOK_ALLOC_INCR) * sizeof(TokenSym *));
408 table_ident = ptable;
411 ts = tal_realloc(toksym_alloc, 0, sizeof(TokenSym) + len);
412 table_ident[i] = ts;
413 ts->tok = tok_ident++;
414 ts->sym_define = NULL;
415 ts->sym_label = NULL;
416 ts->sym_struct = NULL;
417 ts->sym_identifier = NULL;
418 ts->len = len;
419 ts->hash_next = NULL;
420 memcpy(ts->str, str, len);
421 ts->str[len] = '\0';
422 *pts = ts;
423 return ts;
426 #define TOK_HASH_INIT 1
427 #define TOK_HASH_FUNC(h, c) ((h) + ((h) << 5) + ((h) >> 27) + (c))
430 /* find a token and add it if not found */
431 ST_FUNC TokenSym *tok_alloc(const char *str, int len)
433 TokenSym *ts, **pts;
434 int i;
435 unsigned int h;
437 h = TOK_HASH_INIT;
438 for(i=0;i<len;i++)
439 h = TOK_HASH_FUNC(h, ((unsigned char *)str)[i]);
440 h &= (TOK_HASH_SIZE - 1);
442 pts = &hash_ident[h];
443 for(;;) {
444 ts = *pts;
445 if (!ts)
446 break;
447 if (ts->len == len && !memcmp(ts->str, str, len))
448 return ts;
449 pts = &(ts->hash_next);
451 return tok_alloc_new(pts, str, len);
454 /* XXX: buffer overflow */
455 /* XXX: float tokens */
456 ST_FUNC const char *get_tok_str(int v, CValue *cv)
458 char *p;
459 int i, len;
461 cstr_reset(&cstr_buf);
462 p = cstr_buf.data;
464 switch(v) {
465 case TOK_CINT:
466 case TOK_CUINT:
467 /* XXX: not quite exact, but only useful for testing */
468 sprintf(p, "%llu", (unsigned long long)cv->i);
469 break;
470 case TOK_CLLONG:
471 case TOK_CULLONG:
472 /* XXX: not quite exact, but only useful for testing */
473 #ifdef _WIN32
474 sprintf(p, "%u", (unsigned)cv->i);
475 #else
476 sprintf(p, "%llu", (unsigned long long)cv->i);
477 #endif
478 break;
479 case TOK_LCHAR:
480 cstr_ccat(&cstr_buf, 'L');
481 case TOK_CCHAR:
482 cstr_ccat(&cstr_buf, '\'');
483 add_char(&cstr_buf, cv->i);
484 cstr_ccat(&cstr_buf, '\'');
485 cstr_ccat(&cstr_buf, '\0');
486 break;
487 case TOK_PPNUM:
488 case TOK_PPSTR:
489 return (char*)cv->str.data;
490 case TOK_LSTR:
491 cstr_ccat(&cstr_buf, 'L');
492 case TOK_STR:
493 cstr_ccat(&cstr_buf, '\"');
494 if (v == TOK_STR) {
495 len = cv->str.size - 1;
496 for(i=0;i<len;i++)
497 add_char(&cstr_buf, ((unsigned char *)cv->str.data)[i]);
498 } else {
499 len = (cv->str.size / sizeof(nwchar_t)) - 1;
500 for(i=0;i<len;i++)
501 add_char(&cstr_buf, ((nwchar_t *)cv->str.data)[i]);
503 cstr_ccat(&cstr_buf, '\"');
504 cstr_ccat(&cstr_buf, '\0');
505 break;
507 case TOK_CFLOAT:
508 cstr_cat(&cstr_buf, "<float>", 0);
509 break;
510 case TOK_CDOUBLE:
511 cstr_cat(&cstr_buf, "<double>", 0);
512 break;
513 case TOK_CLDOUBLE:
514 cstr_cat(&cstr_buf, "<long double>", 0);
515 break;
516 case TOK_LINENUM:
517 cstr_cat(&cstr_buf, "<linenumber>", 0);
518 break;
520 /* above tokens have value, the ones below don't */
522 case TOK_LT:
523 v = '<';
524 goto addv;
525 case TOK_GT:
526 v = '>';
527 goto addv;
528 case TOK_DOTS:
529 return strcpy(p, "...");
530 case TOK_A_SHL:
531 return strcpy(p, "<<=");
532 case TOK_A_SAR:
533 return strcpy(p, ">>=");
534 default:
535 if (v < TOK_IDENT) {
536 /* search in two bytes table */
537 const unsigned char *q = tok_two_chars;
538 while (*q) {
539 if (q[2] == v) {
540 *p++ = q[0];
541 *p++ = q[1];
542 *p = '\0';
543 return cstr_buf.data;
545 q += 3;
547 if (v >= 127) {
548 sprintf(cstr_buf.data, "<%02x>", v);
549 return cstr_buf.data;
551 addv:
552 *p++ = v;
553 *p = '\0';
554 } else if (v < tok_ident) {
555 return table_ident[v - TOK_IDENT]->str;
556 } else if (v >= SYM_FIRST_ANOM) {
557 /* special name for anonymous symbol */
558 sprintf(p, "L.%u", v - SYM_FIRST_ANOM);
559 } else {
560 /* should never happen */
561 return NULL;
563 break;
565 return cstr_buf.data;
568 /* return the current character, handling end of block if necessary
569 (but not stray) */
570 ST_FUNC int handle_eob(void)
572 BufferedFile *bf = file;
573 int len;
575 /* only tries to read if really end of buffer */
576 if (bf->buf_ptr >= bf->buf_end) {
577 if (bf->fd != -1) {
578 #if defined(PARSE_DEBUG)
579 len = 1;
580 #else
581 len = IO_BUF_SIZE;
582 #endif
583 len = read(bf->fd, bf->buffer, len);
584 if (len < 0)
585 len = 0;
586 } else {
587 len = 0;
589 total_bytes += len;
590 bf->buf_ptr = bf->buffer;
591 bf->buf_end = bf->buffer + len;
592 *bf->buf_end = CH_EOB;
594 if (bf->buf_ptr < bf->buf_end) {
595 return bf->buf_ptr[0];
596 } else {
597 bf->buf_ptr = bf->buf_end;
598 return CH_EOF;
602 /* read next char from current input file and handle end of input buffer */
603 ST_INLN void inp(void)
605 ch = *(++(file->buf_ptr));
606 /* end of buffer/file handling */
607 if (ch == CH_EOB)
608 ch = handle_eob();
611 /* handle '\[\r]\n' */
612 static int handle_stray_noerror(void)
614 while (ch == '\\') {
615 inp();
616 if (ch == '\n') {
617 file->line_num++;
618 inp();
619 } else if (ch == '\r') {
620 inp();
621 if (ch != '\n')
622 goto fail;
623 file->line_num++;
624 inp();
625 } else {
626 fail:
627 return 1;
630 return 0;
633 static void handle_stray(void)
635 if (handle_stray_noerror())
636 tcc_error("stray '\\' in program");
639 /* skip the stray and handle the \\n case. Output an error if
640 incorrect char after the stray */
641 static int handle_stray1(uint8_t *p)
643 int c;
645 file->buf_ptr = p;
646 if (p >= file->buf_end) {
647 c = handle_eob();
648 if (c != '\\')
649 return c;
650 p = file->buf_ptr;
652 ch = *p;
653 if (handle_stray_noerror()) {
654 if (!(parse_flags & PARSE_FLAG_ACCEPT_STRAYS))
655 tcc_error("stray '\\' in program");
656 *--file->buf_ptr = '\\';
658 p = file->buf_ptr;
659 c = *p;
660 return c;
663 /* handle just the EOB case, but not stray */
664 #define PEEKC_EOB(c, p)\
666 p++;\
667 c = *p;\
668 if (c == '\\') {\
669 file->buf_ptr = p;\
670 c = handle_eob();\
671 p = file->buf_ptr;\
675 /* handle the complicated stray case */
676 #define PEEKC(c, p)\
678 p++;\
679 c = *p;\
680 if (c == '\\') {\
681 c = handle_stray1(p);\
682 p = file->buf_ptr;\
686 /* input with '\[\r]\n' handling. Note that this function cannot
687 handle other characters after '\', so you cannot call it inside
688 strings or comments */
689 ST_FUNC void minp(void)
691 inp();
692 if (ch == '\\')
693 handle_stray();
696 /* single line C++ comments */
697 static uint8_t *parse_line_comment(uint8_t *p)
699 int c;
701 p++;
702 for(;;) {
703 c = *p;
704 redo:
705 if (c == '\n' || c == CH_EOF) {
706 break;
707 } else if (c == '\\') {
708 file->buf_ptr = p;
709 c = handle_eob();
710 p = file->buf_ptr;
711 if (c == '\\') {
712 PEEKC_EOB(c, p);
713 if (c == '\n') {
714 file->line_num++;
715 PEEKC_EOB(c, p);
716 } else if (c == '\r') {
717 PEEKC_EOB(c, p);
718 if (c == '\n') {
719 file->line_num++;
720 PEEKC_EOB(c, p);
723 } else {
724 goto redo;
726 } else {
727 p++;
730 return p;
733 /* C comments */
734 ST_FUNC uint8_t *parse_comment(uint8_t *p)
736 int c;
738 p++;
739 for(;;) {
740 /* fast skip loop */
741 for(;;) {
742 c = *p;
743 if (c == '\n' || c == '*' || c == '\\')
744 break;
745 p++;
746 c = *p;
747 if (c == '\n' || c == '*' || c == '\\')
748 break;
749 p++;
751 /* now we can handle all the cases */
752 if (c == '\n') {
753 file->line_num++;
754 p++;
755 } else if (c == '*') {
756 p++;
757 for(;;) {
758 c = *p;
759 if (c == '*') {
760 p++;
761 } else if (c == '/') {
762 goto end_of_comment;
763 } else if (c == '\\') {
764 file->buf_ptr = p;
765 c = handle_eob();
766 p = file->buf_ptr;
767 if (c == CH_EOF)
768 tcc_error("unexpected end of file in comment");
769 if (c == '\\') {
770 /* skip '\[\r]\n', otherwise just skip the stray */
771 while (c == '\\') {
772 PEEKC_EOB(c, p);
773 if (c == '\n') {
774 file->line_num++;
775 PEEKC_EOB(c, p);
776 } else if (c == '\r') {
777 PEEKC_EOB(c, p);
778 if (c == '\n') {
779 file->line_num++;
780 PEEKC_EOB(c, p);
782 } else {
783 goto after_star;
787 } else {
788 break;
791 after_star: ;
792 } else {
793 /* stray, eob or eof */
794 file->buf_ptr = p;
795 c = handle_eob();
796 p = file->buf_ptr;
797 if (c == CH_EOF) {
798 tcc_error("unexpected end of file in comment");
799 } else if (c == '\\') {
800 p++;
804 end_of_comment:
805 p++;
806 return p;
809 #define cinp minp
811 static inline void skip_spaces(void)
813 while (isidnum_table[ch - CH_EOF] & IS_SPC)
814 cinp();
817 static inline int check_space(int t, int *spc)
819 if (t < 256 && (isidnum_table[t - CH_EOF] & IS_SPC)) {
820 if (*spc)
821 return 1;
822 *spc = 1;
823 } else
824 *spc = 0;
825 return 0;
828 /* parse a string without interpreting escapes */
829 static uint8_t *parse_pp_string(uint8_t *p,
830 int sep, CString *str)
832 int c;
833 p++;
834 for(;;) {
835 c = *p;
836 if (c == sep) {
837 break;
838 } else if (c == '\\') {
839 file->buf_ptr = p;
840 c = handle_eob();
841 p = file->buf_ptr;
842 if (c == CH_EOF) {
843 unterminated_string:
844 /* XXX: indicate line number of start of string */
845 tcc_error("missing terminating %c character", sep);
846 } else if (c == '\\') {
847 /* escape : just skip \[\r]\n */
848 PEEKC_EOB(c, p);
849 if (c == '\n') {
850 file->line_num++;
851 p++;
852 } else if (c == '\r') {
853 PEEKC_EOB(c, p);
854 if (c != '\n')
855 expect("'\n' after '\r'");
856 file->line_num++;
857 p++;
858 } else if (c == CH_EOF) {
859 goto unterminated_string;
860 } else {
861 if (str) {
862 cstr_ccat(str, '\\');
863 cstr_ccat(str, c);
865 p++;
868 } else if (c == '\n') {
869 file->line_num++;
870 goto add_char;
871 } else if (c == '\r') {
872 PEEKC_EOB(c, p);
873 if (c != '\n') {
874 if (str)
875 cstr_ccat(str, '\r');
876 } else {
877 file->line_num++;
878 goto add_char;
880 } else {
881 add_char:
882 if (str)
883 cstr_ccat(str, c);
884 p++;
887 p++;
888 return p;
891 /* skip block of text until #else, #elif or #endif. skip also pairs of
892 #if/#endif */
893 static void preprocess_skip(void)
895 int a, start_of_line, c, in_warn_or_error;
896 uint8_t *p;
898 p = file->buf_ptr;
899 a = 0;
900 redo_start:
901 start_of_line = 1;
902 in_warn_or_error = 0;
903 for(;;) {
904 redo_no_start:
905 c = *p;
906 switch(c) {
907 case ' ':
908 case '\t':
909 case '\f':
910 case '\v':
911 case '\r':
912 p++;
913 goto redo_no_start;
914 case '\n':
915 file->line_num++;
916 p++;
917 goto redo_start;
918 case '\\':
919 file->buf_ptr = p;
920 c = handle_eob();
921 if (c == CH_EOF) {
922 expect("#endif");
923 } else if (c == '\\') {
924 ch = file->buf_ptr[0];
925 handle_stray_noerror();
927 p = file->buf_ptr;
928 goto redo_no_start;
929 /* skip strings */
930 case '\"':
931 case '\'':
932 if (in_warn_or_error)
933 goto _default;
934 p = parse_pp_string(p, c, NULL);
935 break;
936 /* skip comments */
937 case '/':
938 if (in_warn_or_error)
939 goto _default;
940 file->buf_ptr = p;
941 ch = *p;
942 minp();
943 p = file->buf_ptr;
944 if (ch == '*') {
945 p = parse_comment(p);
946 } else if (ch == '/') {
947 p = parse_line_comment(p);
949 break;
950 case '#':
951 p++;
952 if (start_of_line) {
953 file->buf_ptr = p;
954 next_nomacro();
955 p = file->buf_ptr;
956 if (a == 0 &&
957 (tok == TOK_ELSE || tok == TOK_ELIF || tok == TOK_ENDIF))
958 goto the_end;
959 if (tok == TOK_IF || tok == TOK_IFDEF || tok == TOK_IFNDEF)
960 a++;
961 else if (tok == TOK_ENDIF)
962 a--;
963 else if( tok == TOK_ERROR || tok == TOK_WARNING)
964 in_warn_or_error = 1;
965 else if (tok == TOK_LINEFEED)
966 goto redo_start;
967 else if (parse_flags & PARSE_FLAG_ASM_FILE)
968 p = parse_line_comment(p);
969 } else if (parse_flags & PARSE_FLAG_ASM_FILE)
970 p = parse_line_comment(p);
971 break;
972 _default:
973 default:
974 p++;
975 break;
977 start_of_line = 0;
979 the_end: ;
980 file->buf_ptr = p;
983 /* ParseState handling */
985 /* XXX: currently, no include file info is stored. Thus, we cannot display
986 accurate messages if the function or data definition spans multiple
987 files */
989 /* save current parse state in 's' */
990 ST_FUNC void save_parse_state(ParseState *s)
992 s->line_num = file->line_num;
993 s->macro_ptr = macro_ptr;
994 s->tok = tok;
995 s->tokc = tokc;
998 /* restore parse state from 's' */
999 ST_FUNC void restore_parse_state(ParseState *s)
1001 file->line_num = s->line_num;
1002 macro_ptr = s->macro_ptr;
1003 tok = s->tok;
1004 tokc = s->tokc;
1007 /* return the number of additional 'ints' necessary to store the
1008 token */
1009 static inline int tok_size(const int *p)
1011 switch(*p) {
1012 /* 4 bytes */
1013 case TOK_CINT:
1014 case TOK_CUINT:
1015 case TOK_CCHAR:
1016 case TOK_LCHAR:
1017 case TOK_CFLOAT:
1018 case TOK_LINENUM:
1019 return 1 + 1;
1020 case TOK_STR:
1021 case TOK_LSTR:
1022 case TOK_PPNUM:
1023 case TOK_PPSTR:
1024 return 1 + ((sizeof(CString) + ((CString *)(p+1))->size + 3) >> 2);
1025 case TOK_CDOUBLE:
1026 case TOK_CLLONG:
1027 case TOK_CULLONG:
1028 return 1 + 2;
1029 case TOK_CLDOUBLE:
1030 return 1 + LDOUBLE_SIZE / 4;
1031 default:
1032 return 1 + 0;
1036 /* token string handling */
1038 ST_INLN void tok_str_new(TokenString *s)
1040 s->str = NULL;
1041 s->len = 0;
1042 s->allocated_len = 0;
1043 s->last_line_num = -1;
1046 ST_FUNC int *tok_str_dup(TokenString *s)
1048 int *str;
1050 str = tal_realloc(tokstr_alloc, 0, s->len * sizeof(int));
1051 memcpy(str, s->str, s->len * sizeof(int));
1052 return str;
1055 ST_FUNC void tok_str_free(int *str)
1057 tal_free(tokstr_alloc, str);
1060 ST_FUNC int *tok_str_realloc(TokenString *s, int new_size)
1062 int *str, size;
1064 size = s->allocated_len;
1065 if (size < 16)
1066 size = 16;
1067 while (size < new_size)
1068 size = size * 2;
1069 TCC_ASSERT((size & (size -1)) == 0);
1070 if (size > s->allocated_len) {
1071 str = tal_realloc(tokstr_alloc, s->str, size * sizeof(int));
1072 s->allocated_len = size;
1073 s->str = str;
1075 return s->str;
1078 ST_FUNC void tok_str_add(TokenString *s, int t)
1080 int len, *str;
1082 len = s->len;
1083 str = s->str;
1084 if (len >= s->allocated_len)
1085 str = tok_str_realloc(s, len + 1);
1086 str[len++] = t;
1087 s->len = len;
1090 static void tok_str_add2(TokenString *s, int t, CValue *cv)
1092 int len, *str;
1094 len = s->len;
1095 str = s->str;
1097 /* allocate space for worst case */
1098 if (len + TOK_MAX_SIZE >= s->allocated_len)
1099 str = tok_str_realloc(s, len + TOK_MAX_SIZE + 1);
1100 str[len++] = t;
1101 switch(t) {
1102 case TOK_CINT:
1103 case TOK_CUINT:
1104 case TOK_CCHAR:
1105 case TOK_LCHAR:
1106 case TOK_CFLOAT:
1107 case TOK_LINENUM:
1108 str[len++] = cv->tab[0];
1109 break;
1110 case TOK_PPNUM:
1111 case TOK_PPSTR:
1112 case TOK_STR:
1113 case TOK_LSTR:
1115 /* Insert the string into the int array. */
1116 size_t nb_words =
1117 1 + (cv->str.size + sizeof(int) - 1) / sizeof(int);
1118 if (len + nb_words >= s->allocated_len)
1119 str = tok_str_realloc(s, len + nb_words + 1);
1120 str[len] = cv->str.size;
1121 memcpy(&str[len + 1], cv->str.data, cv->str.size);
1122 len += nb_words;
1124 break;
1125 case TOK_CDOUBLE:
1126 case TOK_CLLONG:
1127 case TOK_CULLONG:
1128 #if LDOUBLE_SIZE == 8
1129 case TOK_CLDOUBLE:
1130 #endif
1131 str[len++] = cv->tab[0];
1132 str[len++] = cv->tab[1];
1133 break;
1134 #if LDOUBLE_SIZE == 12
1135 case TOK_CLDOUBLE:
1136 str[len++] = cv->tab[0];
1137 str[len++] = cv->tab[1];
1138 str[len++] = cv->tab[2];
1139 #elif LDOUBLE_SIZE == 16
1140 case TOK_CLDOUBLE:
1141 str[len++] = cv->tab[0];
1142 str[len++] = cv->tab[1];
1143 str[len++] = cv->tab[2];
1144 str[len++] = cv->tab[3];
1145 #elif LDOUBLE_SIZE != 8
1146 #error add long double size support
1147 #endif
1148 break;
1149 default:
1150 break;
1152 s->len = len;
1155 /* add the current parse token in token string 's' */
1156 ST_FUNC void tok_str_add_tok(TokenString *s)
1158 CValue cval;
1160 /* save line number info */
1161 if (file->line_num != s->last_line_num) {
1162 s->last_line_num = file->line_num;
1163 cval.i = s->last_line_num;
1164 tok_str_add2(s, TOK_LINENUM, &cval);
1166 tok_str_add2(s, tok, &tokc);
1169 /* get a token from an integer array and increment pointer
1170 accordingly. we code it as a macro to avoid pointer aliasing. */
1171 static inline void TOK_GET(int *t, const int **pp, CValue *cv)
1173 const int *p = *pp;
1174 int n, *tab;
1176 tab = cv->tab;
1177 switch(*t = *p++) {
1178 case TOK_CINT:
1179 case TOK_CUINT:
1180 case TOK_CCHAR:
1181 case TOK_LCHAR:
1182 case TOK_CFLOAT:
1183 case TOK_LINENUM:
1184 tab[0] = *p++;
1185 break;
1186 case TOK_STR:
1187 case TOK_LSTR:
1188 case TOK_PPNUM:
1189 case TOK_PPSTR:
1190 cv->str.size = *p++;
1191 cv->str.data = p;
1192 cv->str.data_allocated = 0;
1193 p += (cv->str.size + sizeof(int) - 1) / sizeof(int);
1194 break;
1195 case TOK_CDOUBLE:
1196 case TOK_CLLONG:
1197 case TOK_CULLONG:
1198 n = 2;
1199 goto copy;
1200 case TOK_CLDOUBLE:
1201 #if LDOUBLE_SIZE == 16
1202 n = 4;
1203 #elif LDOUBLE_SIZE == 12
1204 n = 3;
1205 #elif LDOUBLE_SIZE == 8
1206 n = 2;
1207 #else
1208 # error add long double size support
1209 #endif
1210 copy:
1212 *tab++ = *p++;
1213 while (--n);
1214 break;
1215 default:
1216 break;
1218 *pp = p;
1221 /* Calling this function is expensive, but it is not possible
1222 to read a token string backwards. */
1223 static int tok_last(const int *str0, const int *str1)
1225 const int *str = str0;
1226 int tok = 0;
1227 CValue cval;
1229 while (str < str1)
1230 TOK_GET(&tok, &str, &cval);
1231 return tok;
1234 static int macro_is_equal(const int *a, const int *b)
1236 CValue cv;
1237 int t;
1239 if (!a || !b)
1240 return 1;
1242 while (*a && *b) {
1243 /* first time preallocate static cstr_buf, next time only reset position to start */
1244 cstr_reset(&cstr_buf);
1245 TOK_GET(&t, &a, &cv);
1246 cstr_cat(&cstr_buf, get_tok_str(t, &cv), 0);
1247 TOK_GET(&t, &b, &cv);
1248 if (strcmp(cstr_buf.data, get_tok_str(t, &cv)))
1249 return 0;
1251 return !(*a || *b);
1254 /* defines handling */
1255 ST_INLN void define_push(int v, int macro_type, TokenString *str, Sym *first_arg)
1257 Sym *s, *o;
1259 o = define_find(v);
1260 s = sym_push2(&define_stack, v, macro_type, 0);
1261 s->d = str ? tok_str_dup(str) : NULL;
1262 s->next = first_arg;
1263 table_ident[v - TOK_IDENT]->sym_define = s;
1265 if (o && !macro_is_equal(o->d, s->d))
1266 tcc_warning("%s redefined", get_tok_str(v, NULL));
1269 /* undefined a define symbol. Its name is just set to zero */
1270 ST_FUNC void define_undef(Sym *s)
1272 int v = s->v;
1273 if (v >= TOK_IDENT && v < tok_ident)
1274 table_ident[v - TOK_IDENT]->sym_define = NULL;
1277 ST_INLN Sym *define_find(int v)
1279 v -= TOK_IDENT;
1280 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1281 return NULL;
1282 return table_ident[v]->sym_define;
1285 /* free define stack until top reaches 'b' */
1286 ST_FUNC void free_defines(Sym *b)
1288 Sym *top, *top1;
1289 int v;
1291 top = define_stack;
1292 while (top != b) {
1293 top1 = top->prev;
1294 /* do not free args or predefined defines */
1295 if (top->d)
1296 tok_str_free(top->d);
1297 v = top->v;
1298 if (v >= TOK_IDENT && v < tok_ident)
1299 table_ident[v - TOK_IDENT]->sym_define = NULL;
1300 sym_free(top);
1301 top = top1;
1303 define_stack = b;
1306 /* label lookup */
1307 ST_FUNC Sym *label_find(int v)
1309 v -= TOK_IDENT;
1310 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1311 return NULL;
1312 return table_ident[v]->sym_label;
1315 ST_FUNC Sym *label_push(Sym **ptop, int v, int flags)
1317 Sym *s, **ps;
1318 s = sym_push2(ptop, v, 0, 0);
1319 s->r = flags;
1320 ps = &table_ident[v - TOK_IDENT]->sym_label;
1321 if (ptop == &global_label_stack) {
1322 /* modify the top most local identifier, so that
1323 sym_identifier will point to 's' when popped */
1324 while (*ps != NULL)
1325 ps = &(*ps)->prev_tok;
1327 s->prev_tok = *ps;
1328 *ps = s;
1329 return s;
1332 /* pop labels until element last is reached. Look if any labels are
1333 undefined. Define symbols if '&&label' was used. */
1334 ST_FUNC void label_pop(Sym **ptop, Sym *slast)
1336 Sym *s, *s1;
1337 for(s = *ptop; s != slast; s = s1) {
1338 s1 = s->prev;
1339 if (s->r == LABEL_DECLARED) {
1340 tcc_warning("label '%s' declared but not used", get_tok_str(s->v, NULL));
1341 } else if (s->r == LABEL_FORWARD) {
1342 tcc_error("label '%s' used but not defined",
1343 get_tok_str(s->v, NULL));
1344 } else {
1345 if (s->c) {
1346 /* define corresponding symbol. A size of
1347 1 is put. */
1348 put_extern_sym(s, cur_text_section, s->jnext, 1);
1351 /* remove label */
1352 table_ident[s->v - TOK_IDENT]->sym_label = s->prev_tok;
1353 sym_free(s);
1355 *ptop = slast;
1358 /* eval an expression for #if/#elif */
1359 static int expr_preprocess(void)
1361 int c, t;
1362 TokenString str;
1364 tok_str_new(&str);
1365 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1366 next(); /* do macro subst */
1367 if (tok == TOK_DEFINED) {
1368 next_nomacro();
1369 t = tok;
1370 if (t == '(')
1371 next_nomacro();
1372 c = define_find(tok) != 0;
1373 if (t == '(')
1374 next_nomacro();
1375 tok = TOK_CINT;
1376 tokc.i = c;
1377 } else if (tok >= TOK_IDENT) {
1378 /* if undefined macro */
1379 tok = TOK_CINT;
1380 tokc.i = 0;
1382 tok_str_add_tok(&str);
1384 tok_str_add(&str, -1); /* simulate end of file */
1385 tok_str_add(&str, 0);
1386 /* now evaluate C constant expression */
1387 begin_macro(&str, 0);
1388 next();
1389 c = expr_const();
1390 end_macro();
1391 return c != 0;
1395 /* parse after #define */
1396 ST_FUNC void parse_define(void)
1398 Sym *s, *first, **ps;
1399 int v, t, varg, is_vaargs, spc;
1400 int saved_parse_flags = parse_flags;
1402 v = tok;
1403 if (v < TOK_IDENT)
1404 tcc_error("invalid macro name '%s'", get_tok_str(tok, &tokc));
1405 /* XXX: should check if same macro (ANSI) */
1406 first = NULL;
1407 t = MACRO_OBJ;
1408 /* '(' must be just after macro definition for MACRO_FUNC */
1409 parse_flags |= PARSE_FLAG_SPACES;
1410 next_nomacro_spc();
1411 if (tok == '(') {
1412 /* must be able to parse TOK_DOTS (in asm mode '.' can be part of identifier) */
1413 parse_flags &= ~PARSE_FLAG_ASM_FILE;
1414 isidnum_table['.' - CH_EOF] = 0;
1415 next_nomacro();
1416 ps = &first;
1417 if (tok != ')') for (;;) {
1418 varg = tok;
1419 next_nomacro();
1420 is_vaargs = 0;
1421 if (varg == TOK_DOTS) {
1422 varg = TOK___VA_ARGS__;
1423 is_vaargs = 1;
1424 } else if (tok == TOK_DOTS && gnu_ext) {
1425 is_vaargs = 1;
1426 next_nomacro();
1428 if (varg < TOK_IDENT)
1429 bad_list:
1430 tcc_error("bad macro parameter list");
1431 s = sym_push2(&define_stack, varg | SYM_FIELD, is_vaargs, 0);
1432 *ps = s;
1433 ps = &s->next;
1434 if (tok == ')')
1435 break;
1436 if (tok != ',' || is_vaargs)
1437 goto bad_list;
1438 next_nomacro();
1440 next_nomacro_spc();
1441 t = MACRO_FUNC;
1442 parse_flags |= (saved_parse_flags & PARSE_FLAG_ASM_FILE);
1443 isidnum_table['.' - CH_EOF] =
1444 (parse_flags & PARSE_FLAG_ASM_FILE) ? IS_ID : 0;
1447 tokstr_buf.len = 0;
1448 spc = 2;
1449 parse_flags |= PARSE_FLAG_ACCEPT_STRAYS | PARSE_FLAG_SPACES | PARSE_FLAG_LINEFEED;
1450 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1451 /* remove spaces around ## and after '#' */
1452 if (TOK_TWOSHARPS == tok) {
1453 if (2 == spc)
1454 goto bad_twosharp;
1455 if (1 == spc)
1456 --tokstr_buf.len;
1457 spc = 3;
1458 } else if ('#' == tok) {
1459 spc = 4;
1460 } else if (check_space(tok, &spc)) {
1461 goto skip;
1463 tok_str_add2(&tokstr_buf, tok, &tokc);
1464 skip:
1465 next_nomacro_spc();
1468 parse_flags = saved_parse_flags;
1469 if (spc == 1)
1470 --tokstr_buf.len; /* remove trailing space */
1471 tok_str_add(&tokstr_buf, 0);
1472 if (3 == spc)
1473 bad_twosharp:
1474 tcc_error("'##' cannot appear at either end of macro");
1475 define_push(v, t, &tokstr_buf, first);
1478 static inline int hash_cached_include(const char *filename)
1480 const unsigned char *s;
1481 unsigned int h;
1483 h = TOK_HASH_INIT;
1484 s = (unsigned char *) filename;
1485 while (*s) {
1486 h = TOK_HASH_FUNC(h, *s);
1487 s++;
1489 h &= (CACHED_INCLUDES_HASH_SIZE - 1);
1490 return h;
1493 static CachedInclude *search_cached_include(TCCState *s1, const char *filename)
1495 CachedInclude *e;
1496 int i, h;
1497 h = hash_cached_include(filename);
1498 i = s1->cached_includes_hash[h];
1499 for(;;) {
1500 if (i == 0)
1501 break;
1502 e = s1->cached_includes[i - 1];
1503 if (0 == PATHCMP(e->filename, filename))
1504 return e;
1505 i = e->hash_next;
1507 return NULL;
1510 static inline void add_cached_include(TCCState *s1, const char *filename, int ifndef_macro)
1512 CachedInclude *e;
1513 int h;
1515 if (search_cached_include(s1, filename))
1516 return;
1517 #ifdef INC_DEBUG
1518 printf("adding cached '%s' %s\n", filename, get_tok_str(ifndef_macro, NULL));
1519 #endif
1520 e = tcc_malloc(sizeof(CachedInclude) + strlen(filename));
1521 strcpy(e->filename, filename);
1522 e->ifndef_macro = ifndef_macro;
1523 dynarray_add((void ***)&s1->cached_includes, &s1->nb_cached_includes, e);
1524 /* add in hash table */
1525 h = hash_cached_include(filename);
1526 e->hash_next = s1->cached_includes_hash[h];
1527 s1->cached_includes_hash[h] = s1->nb_cached_includes;
1530 #define ONCE_PREFIX "#ONCE#"
1532 static void pragma_parse(TCCState *s1)
1534 next_nomacro();
1535 if (tok == TOK_push_macro || tok == TOK_pop_macro) {
1536 int t = tok, v;
1537 Sym *s;
1539 if (next(), tok != '(')
1540 goto pragma_err;
1541 if (next(), tok != TOK_STR)
1542 goto pragma_err;
1543 v = tok_alloc(tokc.str.data, tokc.str.size - 1)->tok;
1544 if (next(), tok != ')')
1545 goto pragma_err;
1546 if (t == TOK_push_macro) {
1547 while (NULL == (s = define_find(v)))
1548 define_push(v, 0, NULL, NULL);
1549 s->type.ref = s; /* set push boundary */
1550 } else {
1551 for (s = define_stack; s; s = s->prev)
1552 if (s->v == v && s->type.ref == s) {
1553 s->type.ref = NULL;
1554 break;
1557 if (s)
1558 table_ident[v - TOK_IDENT]->sym_define = s->d ? s : NULL;
1559 else
1560 tcc_warning("unbalanced #pragma pop_macro");
1561 pp_debug_tok = t, pp_debug_symv = v;
1563 } else if (tok == TOK_once) {
1564 char buf1[sizeof(file->filename) + sizeof(ONCE_PREFIX)];
1565 strcpy(buf1, ONCE_PREFIX);
1566 strcat(buf1, file->filename);
1567 #ifdef PATH_NOCASE
1568 strupr(buf1);
1569 #endif
1570 add_cached_include(s1, file->filename, tok_alloc(buf1, strlen(buf1))->tok);
1571 } else if (s1->ppfp) {
1572 /* tcc -E: keep pragmas below unchanged */
1573 unget_tok(' ');
1574 unget_tok(TOK_PRAGMA);
1575 unget_tok('#');
1576 unget_tok(TOK_LINEFEED);
1578 } else if (tok == TOK_pack) {
1579 /* This may be:
1580 #pragma pack(1) // set
1581 #pragma pack() // reset to default
1582 #pragma pack(push,1) // push & set
1583 #pragma pack(pop) // restore previous */
1584 next();
1585 skip('(');
1586 if (tok == TOK_ASM_pop) {
1587 next();
1588 if (s1->pack_stack_ptr <= s1->pack_stack) {
1589 stk_error:
1590 tcc_error("out of pack stack");
1592 s1->pack_stack_ptr--;
1593 } else {
1594 int val = 0;
1595 if (tok != ')') {
1596 if (tok == TOK_ASM_push) {
1597 next();
1598 if (s1->pack_stack_ptr >= s1->pack_stack + PACK_STACK_SIZE - 1)
1599 goto stk_error;
1600 s1->pack_stack_ptr++;
1601 skip(',');
1603 if (tok != TOK_CINT)
1604 goto pragma_err;
1605 val = tokc.i;
1606 if (val < 1 || val > 16 || (val & (val - 1)) != 0)
1607 goto pragma_err;
1608 next();
1610 *s1->pack_stack_ptr = val;
1612 if (tok != ')')
1613 goto pragma_err;
1615 } else if (tok == TOK_comment) {
1616 char *file;
1617 next();
1618 skip('(');
1619 if (tok != TOK_lib)
1620 goto pragma_warn;
1621 next();
1622 skip(',');
1623 if (tok != TOK_STR)
1624 goto pragma_err;
1625 file = tcc_strdup((char *)tokc.str.data);
1626 dynarray_add((void ***)&s1->pragma_libs, &s1->nb_pragma_libs, file);
1627 next();
1628 if (tok != ')')
1629 goto pragma_err;
1630 } else {
1631 pragma_warn:
1632 if (s1->warn_unsupported)
1633 tcc_warning("#pragma %s is ignored", get_tok_str(tok, &tokc));
1635 return;
1637 pragma_err:
1638 tcc_error("malformed #pragma directive");
1639 return;
1642 /* is_bof is true if first non space token at beginning of file */
1643 ST_FUNC void preprocess(int is_bof)
1645 TCCState *s1 = tcc_state;
1646 int i, c, n, saved_parse_flags;
1647 char buf[1024], *q;
1648 Sym *s;
1650 saved_parse_flags = parse_flags;
1651 parse_flags = PARSE_FLAG_PREPROCESS
1652 | PARSE_FLAG_TOK_NUM
1653 | PARSE_FLAG_TOK_STR
1654 | PARSE_FLAG_LINEFEED
1655 | (parse_flags & PARSE_FLAG_ASM_FILE)
1658 next_nomacro();
1659 redo:
1660 switch(tok) {
1661 case TOK_DEFINE:
1662 pp_debug_tok = tok;
1663 next_nomacro();
1664 pp_debug_symv = tok;
1665 parse_define();
1666 break;
1667 case TOK_UNDEF:
1668 pp_debug_tok = tok;
1669 next_nomacro();
1670 pp_debug_symv = tok;
1671 s = define_find(tok);
1672 /* undefine symbol by putting an invalid name */
1673 if (s)
1674 define_undef(s);
1675 break;
1676 case TOK_INCLUDE:
1677 case TOK_INCLUDE_NEXT:
1678 ch = file->buf_ptr[0];
1679 /* XXX: incorrect if comments : use next_nomacro with a special mode */
1680 skip_spaces();
1681 if (ch == '<') {
1682 c = '>';
1683 goto read_name;
1684 } else if (ch == '\"') {
1685 c = ch;
1686 read_name:
1687 inp();
1688 q = buf;
1689 while (ch != c && ch != '\n' && ch != CH_EOF) {
1690 if ((q - buf) < sizeof(buf) - 1)
1691 *q++ = ch;
1692 if (ch == '\\') {
1693 if (handle_stray_noerror() == 0)
1694 --q;
1695 } else
1696 inp();
1698 *q = '\0';
1699 minp();
1700 #if 0
1701 /* eat all spaces and comments after include */
1702 /* XXX: slightly incorrect */
1703 while (ch1 != '\n' && ch1 != CH_EOF)
1704 inp();
1705 #endif
1706 } else {
1707 /* computed #include : either we have only strings or
1708 we have anything enclosed in '<>' */
1709 next();
1710 buf[0] = '\0';
1711 if (tok == TOK_STR) {
1712 while (tok != TOK_LINEFEED) {
1713 if (tok != TOK_STR) {
1714 include_syntax:
1715 tcc_error("'#include' expects \"FILENAME\" or <FILENAME>");
1717 pstrcat(buf, sizeof(buf), (char *)tokc.str.data);
1718 next();
1720 c = '\"';
1721 } else {
1722 int len;
1723 while (tok != TOK_LINEFEED) {
1724 pstrcat(buf, sizeof(buf), get_tok_str(tok, &tokc));
1725 next();
1727 len = strlen(buf);
1728 /* check syntax and remove '<>' */
1729 if (len < 2 || buf[0] != '<' || buf[len - 1] != '>')
1730 goto include_syntax;
1731 memmove(buf, buf + 1, len - 2);
1732 buf[len - 2] = '\0';
1733 c = '>';
1737 if (s1->include_stack_ptr >= s1->include_stack + INCLUDE_STACK_SIZE)
1738 tcc_error("#include recursion too deep");
1739 /* store current file in stack, but increment stack later below */
1740 *s1->include_stack_ptr = file;
1741 i = tok == TOK_INCLUDE_NEXT ? file->include_next_index : 0;
1742 n = 2 + s1->nb_include_paths + s1->nb_sysinclude_paths;
1743 for (; i < n; ++i) {
1744 char buf1[sizeof file->filename];
1745 CachedInclude *e;
1746 const char *path;
1748 if (i == 0) {
1749 /* check absolute include path */
1750 if (!IS_ABSPATH(buf))
1751 continue;
1752 buf1[0] = 0;
1754 } else if (i == 1) {
1755 /* search in current dir if "header.h" */
1756 if (c != '\"')
1757 continue;
1758 path = file->filename;
1759 pstrncpy(buf1, path, tcc_basename(path) - path);
1761 } else {
1762 /* search in all the include paths */
1763 int j = i - 2, k = j - s1->nb_include_paths;
1764 path = k < 0 ? s1->include_paths[j] : s1->sysinclude_paths[k];
1765 if (path == 0) continue;
1766 pstrcpy(buf1, sizeof(buf1), path);
1767 pstrcat(buf1, sizeof(buf1), "/");
1770 pstrcat(buf1, sizeof(buf1), buf);
1771 e = search_cached_include(s1, buf1);
1772 if (e && define_find(e->ifndef_macro)) {
1773 /* no need to parse the include because the 'ifndef macro'
1774 is defined */
1775 #ifdef INC_DEBUG
1776 printf("%s: skipping cached %s\n", file->filename, buf1);
1777 #endif
1778 goto include_done;
1781 if (tcc_open(s1, buf1) < 0)
1782 continue;
1784 file->include_next_index = i + 1;
1785 #ifdef INC_DEBUG
1786 printf("%s: including %s\n", file->prev->filename, file->filename);
1787 #endif
1788 /* update target deps */
1789 dynarray_add((void ***)&s1->target_deps, &s1->nb_target_deps,
1790 tcc_strdup(buf1));
1791 /* push current file in stack */
1792 ++s1->include_stack_ptr;
1793 /* add include file debug info */
1794 if (s1->do_debug)
1795 put_stabs(file->filename, N_BINCL, 0, 0, 0);
1796 tok_flags |= TOK_FLAG_BOF | TOK_FLAG_BOL;
1797 ch = file->buf_ptr[0];
1798 goto the_end;
1800 tcc_error("include file '%s' not found", buf);
1801 include_done:
1802 break;
1803 case TOK_IFNDEF:
1804 c = 1;
1805 goto do_ifdef;
1806 case TOK_IF:
1807 c = expr_preprocess();
1808 goto do_if;
1809 case TOK_IFDEF:
1810 c = 0;
1811 do_ifdef:
1812 next_nomacro();
1813 if (tok < TOK_IDENT)
1814 tcc_error("invalid argument for '#if%sdef'", c ? "n" : "");
1815 if (is_bof) {
1816 if (c) {
1817 #ifdef INC_DEBUG
1818 printf("#ifndef %s\n", get_tok_str(tok, NULL));
1819 #endif
1820 file->ifndef_macro = tok;
1823 c = (define_find(tok) != 0) ^ c;
1824 do_if:
1825 if (s1->ifdef_stack_ptr >= s1->ifdef_stack + IFDEF_STACK_SIZE)
1826 tcc_error("memory full (ifdef)");
1827 *s1->ifdef_stack_ptr++ = c;
1828 goto test_skip;
1829 case TOK_ELSE:
1830 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1831 tcc_error("#else without matching #if");
1832 if (s1->ifdef_stack_ptr[-1] & 2)
1833 tcc_error("#else after #else");
1834 c = (s1->ifdef_stack_ptr[-1] ^= 3);
1835 goto test_else;
1836 case TOK_ELIF:
1837 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1838 tcc_error("#elif without matching #if");
1839 c = s1->ifdef_stack_ptr[-1];
1840 if (c > 1)
1841 tcc_error("#elif after #else");
1842 /* last #if/#elif expression was true: we skip */
1843 if (c == 1)
1844 goto skip;
1845 c = expr_preprocess();
1846 s1->ifdef_stack_ptr[-1] = c;
1847 test_else:
1848 if (s1->ifdef_stack_ptr == file->ifdef_stack_ptr + 1)
1849 file->ifndef_macro = 0;
1850 test_skip:
1851 if (!(c & 1)) {
1852 skip:
1853 preprocess_skip();
1854 is_bof = 0;
1855 goto redo;
1857 break;
1858 case TOK_ENDIF:
1859 if (s1->ifdef_stack_ptr <= file->ifdef_stack_ptr)
1860 tcc_error("#endif without matching #if");
1861 s1->ifdef_stack_ptr--;
1862 /* '#ifndef macro' was at the start of file. Now we check if
1863 an '#endif' is exactly at the end of file */
1864 if (file->ifndef_macro &&
1865 s1->ifdef_stack_ptr == file->ifdef_stack_ptr) {
1866 file->ifndef_macro_saved = file->ifndef_macro;
1867 /* need to set to zero to avoid false matches if another
1868 #ifndef at middle of file */
1869 file->ifndef_macro = 0;
1870 while (tok != TOK_LINEFEED)
1871 next_nomacro();
1872 tok_flags |= TOK_FLAG_ENDIF;
1873 goto the_end;
1875 break;
1876 case TOK_PPNUM:
1877 n = strtoul((char*)tokc.str.data, &q, 10);
1878 goto _line_num;
1879 case TOK_LINE:
1880 next();
1881 if (tok != TOK_CINT)
1882 _line_err:
1883 tcc_error("wrong #line format");
1884 n = tokc.i;
1885 _line_num:
1886 next();
1887 if (tok != TOK_LINEFEED) {
1888 if (tok == TOK_STR)
1889 pstrcpy(file->filename, sizeof(file->filename), (char *)tokc.str.data);
1890 else if (parse_flags & PARSE_FLAG_ASM_FILE)
1891 break;
1892 else
1893 goto _line_err;
1894 --n;
1896 if (file->fd > 0)
1897 total_lines += file->line_num - n;
1898 file->line_num = n;
1899 if (s1->do_debug)
1900 put_stabs(file->filename, N_BINCL, 0, 0, 0);
1901 break;
1902 case TOK_ERROR:
1903 case TOK_WARNING:
1904 c = tok;
1905 ch = file->buf_ptr[0];
1906 skip_spaces();
1907 q = buf;
1908 while (ch != '\n' && ch != CH_EOF) {
1909 if ((q - buf) < sizeof(buf) - 1)
1910 *q++ = ch;
1911 if (ch == '\\') {
1912 if (handle_stray_noerror() == 0)
1913 --q;
1914 } else
1915 inp();
1917 *q = '\0';
1918 if (c == TOK_ERROR)
1919 tcc_error("#error %s", buf);
1920 else
1921 tcc_warning("#warning %s", buf);
1922 break;
1923 case TOK_PRAGMA:
1924 pragma_parse(s1);
1925 break;
1926 case TOK_LINEFEED:
1927 goto the_end;
1928 default:
1929 /* ignore gas line comment in an 'S' file. */
1930 if (saved_parse_flags & PARSE_FLAG_ASM_FILE)
1931 goto ignore;
1932 if (tok == '!' && is_bof)
1933 /* '!' is ignored at beginning to allow C scripts. */
1934 goto ignore;
1935 tcc_warning("Ignoring unknown preprocessing directive #%s", get_tok_str(tok, &tokc));
1936 ignore:
1937 file->buf_ptr = parse_line_comment(file->buf_ptr);
1938 goto the_end;
1940 /* ignore other preprocess commands or #! for C scripts */
1941 while (tok != TOK_LINEFEED)
1942 next_nomacro();
1943 the_end:
1944 parse_flags = saved_parse_flags;
1947 /* evaluate escape codes in a string. */
1948 static void parse_escape_string(CString *outstr, const uint8_t *buf, int is_long)
1950 int c, n;
1951 const uint8_t *p;
1953 p = buf;
1954 for(;;) {
1955 c = *p;
1956 if (c == '\0')
1957 break;
1958 if (c == '\\') {
1959 p++;
1960 /* escape */
1961 c = *p;
1962 switch(c) {
1963 case '0': case '1': case '2': case '3':
1964 case '4': case '5': case '6': case '7':
1965 /* at most three octal digits */
1966 n = c - '0';
1967 p++;
1968 c = *p;
1969 if (isoct(c)) {
1970 n = n * 8 + c - '0';
1971 p++;
1972 c = *p;
1973 if (isoct(c)) {
1974 n = n * 8 + c - '0';
1975 p++;
1978 c = n;
1979 goto add_char_nonext;
1980 case 'x':
1981 case 'u':
1982 case 'U':
1983 p++;
1984 n = 0;
1985 for(;;) {
1986 c = *p;
1987 if (c >= 'a' && c <= 'f')
1988 c = c - 'a' + 10;
1989 else if (c >= 'A' && c <= 'F')
1990 c = c - 'A' + 10;
1991 else if (isnum(c))
1992 c = c - '0';
1993 else
1994 break;
1995 n = n * 16 + c;
1996 p++;
1998 c = n;
1999 goto add_char_nonext;
2000 case 'a':
2001 c = '\a';
2002 break;
2003 case 'b':
2004 c = '\b';
2005 break;
2006 case 'f':
2007 c = '\f';
2008 break;
2009 case 'n':
2010 c = '\n';
2011 break;
2012 case 'r':
2013 c = '\r';
2014 break;
2015 case 't':
2016 c = '\t';
2017 break;
2018 case 'v':
2019 c = '\v';
2020 break;
2021 case 'e':
2022 if (!gnu_ext)
2023 goto invalid_escape;
2024 c = 27;
2025 break;
2026 case '\'':
2027 case '\"':
2028 case '\\':
2029 case '?':
2030 break;
2031 default:
2032 invalid_escape:
2033 if (c >= '!' && c <= '~')
2034 tcc_warning("unknown escape sequence: \'\\%c\'", c);
2035 else
2036 tcc_warning("unknown escape sequence: \'\\x%x\'", c);
2037 break;
2040 p++;
2041 add_char_nonext:
2042 if (!is_long)
2043 cstr_ccat(outstr, c);
2044 else
2045 cstr_wccat(outstr, c);
2047 /* add a trailing '\0' */
2048 if (!is_long)
2049 cstr_ccat(outstr, '\0');
2050 else
2051 cstr_wccat(outstr, '\0');
2054 void parse_string(const char *s, int len)
2056 uint8_t buf[1000], *p = buf;
2057 int is_long, sep;
2059 if ((is_long = *s == 'L'))
2060 ++s, --len;
2061 sep = *s++;
2062 len -= 2;
2063 if (len >= sizeof buf)
2064 p = tcc_malloc(len + 1);
2065 memcpy(p, s, len);
2066 p[len] = 0;
2068 cstr_reset(&tokcstr);
2069 parse_escape_string(&tokcstr, p, is_long);
2070 if (p != buf)
2071 tcc_free(p);
2073 if (sep == '\'') {
2074 int char_size;
2075 /* XXX: make it portable */
2076 if (!is_long)
2077 char_size = 1;
2078 else
2079 char_size = sizeof(nwchar_t);
2080 if (tokcstr.size <= char_size)
2081 tcc_error("empty character constant");
2082 if (tokcstr.size > 2 * char_size)
2083 tcc_warning("multi-character character constant");
2084 if (!is_long) {
2085 tokc.i = *(int8_t *)tokcstr.data;
2086 tok = TOK_CCHAR;
2087 } else {
2088 tokc.i = *(nwchar_t *)tokcstr.data;
2089 tok = TOK_LCHAR;
2091 } else {
2092 tokc.str.size = tokcstr.size;
2093 tokc.str.data = tokcstr.data;
2094 tokc.str.data_allocated = tokcstr.data_allocated;
2095 if (!is_long)
2096 tok = TOK_STR;
2097 else
2098 tok = TOK_LSTR;
2102 /* we use 64 bit numbers */
2103 #define BN_SIZE 2
2105 /* bn = (bn << shift) | or_val */
2106 static void bn_lshift(unsigned int *bn, int shift, int or_val)
2108 int i;
2109 unsigned int v;
2110 for(i=0;i<BN_SIZE;i++) {
2111 v = bn[i];
2112 bn[i] = (v << shift) | or_val;
2113 or_val = v >> (32 - shift);
2117 static void bn_zero(unsigned int *bn)
2119 int i;
2120 for(i=0;i<BN_SIZE;i++) {
2121 bn[i] = 0;
2125 /* parse number in null terminated string 'p' and return it in the
2126 current token */
2127 static void parse_number(const char *p)
2129 int b, t, shift, frac_bits, s, exp_val, ch;
2130 char *q;
2131 unsigned int bn[BN_SIZE];
2132 double d;
2134 /* number */
2135 q = token_buf;
2136 ch = *p++;
2137 t = ch;
2138 ch = *p++;
2139 *q++ = t;
2140 b = 10;
2141 if (t == '.') {
2142 goto float_frac_parse;
2143 } else if (t == '0') {
2144 if (ch == 'x' || ch == 'X') {
2145 q--;
2146 ch = *p++;
2147 b = 16;
2148 } else if (tcc_ext && (ch == 'b' || ch == 'B')) {
2149 q--;
2150 ch = *p++;
2151 b = 2;
2154 /* parse all digits. cannot check octal numbers at this stage
2155 because of floating point constants */
2156 while (1) {
2157 if (ch >= 'a' && ch <= 'f')
2158 t = ch - 'a' + 10;
2159 else if (ch >= 'A' && ch <= 'F')
2160 t = ch - 'A' + 10;
2161 else if (isnum(ch))
2162 t = ch - '0';
2163 else
2164 break;
2165 if (t >= b)
2166 break;
2167 if (q >= token_buf + STRING_MAX_SIZE) {
2168 num_too_long:
2169 tcc_error("number too long");
2171 *q++ = ch;
2172 ch = *p++;
2174 if (ch == '.' ||
2175 ((ch == 'e' || ch == 'E') && b == 10) ||
2176 ((ch == 'p' || ch == 'P') && (b == 16 || b == 2))) {
2177 if (b != 10) {
2178 /* NOTE: strtox should support that for hexa numbers, but
2179 non ISOC99 libcs do not support it, so we prefer to do
2180 it by hand */
2181 /* hexadecimal or binary floats */
2182 /* XXX: handle overflows */
2183 *q = '\0';
2184 if (b == 16)
2185 shift = 4;
2186 else
2187 shift = 1;
2188 bn_zero(bn);
2189 q = token_buf;
2190 while (1) {
2191 t = *q++;
2192 if (t == '\0') {
2193 break;
2194 } else if (t >= 'a') {
2195 t = t - 'a' + 10;
2196 } else if (t >= 'A') {
2197 t = t - 'A' + 10;
2198 } else {
2199 t = t - '0';
2201 bn_lshift(bn, shift, t);
2203 frac_bits = 0;
2204 if (ch == '.') {
2205 ch = *p++;
2206 while (1) {
2207 t = ch;
2208 if (t >= 'a' && t <= 'f') {
2209 t = t - 'a' + 10;
2210 } else if (t >= 'A' && t <= 'F') {
2211 t = t - 'A' + 10;
2212 } else if (t >= '0' && t <= '9') {
2213 t = t - '0';
2214 } else {
2215 break;
2217 if (t >= b)
2218 tcc_error("invalid digit");
2219 bn_lshift(bn, shift, t);
2220 frac_bits += shift;
2221 ch = *p++;
2224 if (ch != 'p' && ch != 'P')
2225 expect("exponent");
2226 ch = *p++;
2227 s = 1;
2228 exp_val = 0;
2229 if (ch == '+') {
2230 ch = *p++;
2231 } else if (ch == '-') {
2232 s = -1;
2233 ch = *p++;
2235 if (ch < '0' || ch > '9')
2236 expect("exponent digits");
2237 while (ch >= '0' && ch <= '9') {
2238 exp_val = exp_val * 10 + ch - '0';
2239 ch = *p++;
2241 exp_val = exp_val * s;
2243 /* now we can generate the number */
2244 /* XXX: should patch directly float number */
2245 d = (double)bn[1] * 4294967296.0 + (double)bn[0];
2246 d = ldexp(d, exp_val - frac_bits);
2247 t = toup(ch);
2248 if (t == 'F') {
2249 ch = *p++;
2250 tok = TOK_CFLOAT;
2251 /* float : should handle overflow */
2252 tokc.f = (float)d;
2253 } else if (t == 'L') {
2254 ch = *p++;
2255 #ifdef TCC_TARGET_PE
2256 tok = TOK_CDOUBLE;
2257 tokc.d = d;
2258 #else
2259 tok = TOK_CLDOUBLE;
2260 /* XXX: not large enough */
2261 tokc.ld = (long double)d;
2262 #endif
2263 } else {
2264 tok = TOK_CDOUBLE;
2265 tokc.d = d;
2267 } else {
2268 /* decimal floats */
2269 if (ch == '.') {
2270 if (q >= token_buf + STRING_MAX_SIZE)
2271 goto num_too_long;
2272 *q++ = ch;
2273 ch = *p++;
2274 float_frac_parse:
2275 while (ch >= '0' && ch <= '9') {
2276 if (q >= token_buf + STRING_MAX_SIZE)
2277 goto num_too_long;
2278 *q++ = ch;
2279 ch = *p++;
2282 if (ch == 'e' || ch == 'E') {
2283 if (q >= token_buf + STRING_MAX_SIZE)
2284 goto num_too_long;
2285 *q++ = ch;
2286 ch = *p++;
2287 if (ch == '-' || ch == '+') {
2288 if (q >= token_buf + STRING_MAX_SIZE)
2289 goto num_too_long;
2290 *q++ = ch;
2291 ch = *p++;
2293 if (ch < '0' || ch > '9')
2294 expect("exponent digits");
2295 while (ch >= '0' && ch <= '9') {
2296 if (q >= token_buf + STRING_MAX_SIZE)
2297 goto num_too_long;
2298 *q++ = ch;
2299 ch = *p++;
2302 *q = '\0';
2303 t = toup(ch);
2304 errno = 0;
2305 if (t == 'F') {
2306 ch = *p++;
2307 tok = TOK_CFLOAT;
2308 tokc.f = strtof(token_buf, NULL);
2309 } else if (t == 'L') {
2310 ch = *p++;
2311 #ifdef TCC_TARGET_PE
2312 tok = TOK_CDOUBLE;
2313 tokc.d = strtod(token_buf, NULL);
2314 #else
2315 tok = TOK_CLDOUBLE;
2316 tokc.ld = strtold(token_buf, NULL);
2317 #endif
2318 } else {
2319 tok = TOK_CDOUBLE;
2320 tokc.d = strtod(token_buf, NULL);
2323 } else {
2324 unsigned long long n, n1;
2325 int lcount, ucount, must_64bit;
2326 const char *p1;
2328 /* integer number */
2329 *q = '\0';
2330 q = token_buf;
2331 if (b == 10 && *q == '0') {
2332 b = 8;
2333 q++;
2335 n = 0;
2336 while(1) {
2337 t = *q++;
2338 /* no need for checks except for base 10 / 8 errors */
2339 if (t == '\0')
2340 break;
2341 else if (t >= 'a')
2342 t = t - 'a' + 10;
2343 else if (t >= 'A')
2344 t = t - 'A' + 10;
2345 else
2346 t = t - '0';
2347 if (t >= b)
2348 tcc_error("invalid digit");
2349 n1 = n;
2350 n = n * b + t;
2351 /* detect overflow */
2352 /* XXX: this test is not reliable */
2353 if (n < n1)
2354 tcc_error("integer constant overflow");
2357 /* Determine the characteristics (unsigned and/or 64bit) the type of
2358 the constant must have according to the constant suffix(es) */
2359 lcount = ucount = must_64bit = 0;
2360 p1 = p;
2361 for(;;) {
2362 t = toup(ch);
2363 if (t == 'L') {
2364 if (lcount >= 2)
2365 tcc_error("three 'l's in integer constant");
2366 if (lcount && *(p - 1) != ch)
2367 tcc_error("incorrect integer suffix: %s", p1);
2368 lcount++;
2369 #if !defined TCC_TARGET_X86_64 || defined TCC_TARGET_PE
2370 if (lcount == 2)
2371 #endif
2372 must_64bit = 1;
2373 ch = *p++;
2374 } else if (t == 'U') {
2375 if (ucount >= 1)
2376 tcc_error("two 'u's in integer constant");
2377 ucount++;
2378 ch = *p++;
2379 } else {
2380 break;
2384 /* Whether 64 bits are needed to hold the constant's value */
2385 if (n & 0xffffffff00000000LL || must_64bit) {
2386 tok = TOK_CLLONG;
2387 n1 = n >> 32;
2388 } else {
2389 tok = TOK_CINT;
2390 n1 = n;
2393 /* Whether type must be unsigned to hold the constant's value */
2394 if (ucount || ((n1 >> 31) && (b != 10))) {
2395 if (tok == TOK_CLLONG)
2396 tok = TOK_CULLONG;
2397 else
2398 tok = TOK_CUINT;
2399 /* If decimal and no unsigned suffix, bump to 64 bits or throw error */
2400 } else if (n1 >> 31) {
2401 if (tok == TOK_CINT)
2402 tok = TOK_CLLONG;
2403 else
2404 tcc_error("integer constant overflow");
2407 if (tok == TOK_CINT || tok == TOK_CUINT)
2408 tokc.i = n;
2409 else
2410 tokc.i = n;
2412 if (ch)
2413 tcc_error("invalid number\n");
2417 #define PARSE2(c1, tok1, c2, tok2) \
2418 case c1: \
2419 PEEKC(c, p); \
2420 if (c == c2) { \
2421 p++; \
2422 tok = tok2; \
2423 } else { \
2424 tok = tok1; \
2426 break;
2428 /* return next token without macro substitution */
2429 static inline void next_nomacro1(void)
2431 int t, c, is_long, len;
2432 TokenSym *ts;
2433 uint8_t *p, *p1;
2434 unsigned int h;
2436 p = file->buf_ptr;
2437 redo_no_start:
2438 c = *p;
2439 #if (__TINYC__ || __GNUC__)
2440 #else
2441 if (c & 0x80)
2442 goto parse_ident_fast;
2443 #endif
2444 switch(c) {
2445 case ' ':
2446 case '\t':
2447 tok = c;
2448 p++;
2449 if (parse_flags & PARSE_FLAG_SPACES)
2450 goto keep_tok_flags;
2451 while (isidnum_table[*p - CH_EOF] & IS_SPC)
2452 ++p;
2453 goto redo_no_start;
2454 case '\f':
2455 case '\v':
2456 case '\r':
2457 p++;
2458 goto redo_no_start;
2459 case '\\':
2460 /* first look if it is in fact an end of buffer */
2461 c = handle_stray1(p);
2462 p = file->buf_ptr;
2463 if (c == '\\')
2464 goto parse_simple;
2465 if (c != CH_EOF)
2466 goto redo_no_start;
2468 TCCState *s1 = tcc_state;
2469 if ((parse_flags & PARSE_FLAG_LINEFEED)
2470 && !(tok_flags & TOK_FLAG_EOF)) {
2471 tok_flags |= TOK_FLAG_EOF;
2472 tok = TOK_LINEFEED;
2473 goto keep_tok_flags;
2474 } else if (!(parse_flags & PARSE_FLAG_PREPROCESS)) {
2475 tok = TOK_EOF;
2476 } else if (s1->ifdef_stack_ptr != file->ifdef_stack_ptr) {
2477 tcc_error("missing #endif");
2478 } else if (s1->include_stack_ptr == s1->include_stack) {
2479 /* no include left : end of file. */
2480 tok = TOK_EOF;
2481 } else {
2482 tok_flags &= ~TOK_FLAG_EOF;
2483 /* pop include file */
2485 /* test if previous '#endif' was after a #ifdef at
2486 start of file */
2487 if (tok_flags & TOK_FLAG_ENDIF) {
2488 #ifdef INC_DEBUG
2489 printf("#endif %s\n", get_tok_str(file->ifndef_macro_saved, NULL));
2490 #endif
2491 add_cached_include(s1, file->filename, file->ifndef_macro_saved);
2492 tok_flags &= ~TOK_FLAG_ENDIF;
2495 /* add end of include file debug info */
2496 if (tcc_state->do_debug) {
2497 put_stabd(N_EINCL, 0, 0);
2499 /* pop include stack */
2500 tcc_close();
2501 s1->include_stack_ptr--;
2502 p = file->buf_ptr;
2503 goto redo_no_start;
2506 break;
2508 case '\n':
2509 file->line_num++;
2510 tok_flags |= TOK_FLAG_BOL;
2511 p++;
2512 maybe_newline:
2513 if (0 == (parse_flags & PARSE_FLAG_LINEFEED))
2514 goto redo_no_start;
2515 tok = TOK_LINEFEED;
2516 goto keep_tok_flags;
2518 case '#':
2519 /* XXX: simplify */
2520 PEEKC(c, p);
2521 if ((tok_flags & TOK_FLAG_BOL) &&
2522 (parse_flags & PARSE_FLAG_PREPROCESS)) {
2523 file->buf_ptr = p;
2524 preprocess(tok_flags & TOK_FLAG_BOF);
2525 p = file->buf_ptr;
2526 goto maybe_newline;
2527 } else {
2528 if (c == '#') {
2529 p++;
2530 tok = TOK_TWOSHARPS;
2531 } else {
2532 if (parse_flags & PARSE_FLAG_ASM_FILE) {
2533 p = parse_line_comment(p - 1);
2534 goto redo_no_start;
2535 } else {
2536 tok = '#';
2540 break;
2542 /* dollar is allowed to start identifiers when not parsing asm */
2543 case '$':
2544 if (!(isidnum_table[c - CH_EOF] & IS_ID)
2545 || (parse_flags & PARSE_FLAG_ASM_FILE))
2546 goto parse_simple;
2548 #if (__TINYC__ || __GNUC__)
2549 case 'a' ... 'z':
2550 case 'A' ... 'K':
2551 case 'M' ... 'Z':
2552 case '_':
2553 case 0x80 ... 0xFF:
2554 #else
2555 case 'a': case 'b': case 'c': case 'd':
2556 case 'e': case 'f': case 'g': case 'h':
2557 case 'i': case 'j': case 'k': case 'l':
2558 case 'm': case 'n': case 'o': case 'p':
2559 case 'q': case 'r': case 's': case 't':
2560 case 'u': case 'v': case 'w': case 'x':
2561 case 'y': case 'z':
2562 case 'A': case 'B': case 'C': case 'D':
2563 case 'E': case 'F': case 'G': case 'H':
2564 case 'I': case 'J': case 'K':
2565 case 'M': case 'N': case 'O': case 'P':
2566 case 'Q': case 'R': case 'S': case 'T':
2567 case 'U': case 'V': case 'W': case 'X':
2568 case 'Y': case 'Z':
2569 case '_':
2570 #endif
2571 parse_ident_fast:
2572 p1 = p;
2573 h = TOK_HASH_INIT;
2574 h = TOK_HASH_FUNC(h, c);
2575 while (c = *++p, isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM))
2576 h = TOK_HASH_FUNC(h, c);
2577 len = p - p1;
2578 if (c != '\\') {
2579 TokenSym **pts;
2581 /* fast case : no stray found, so we have the full token
2582 and we have already hashed it */
2583 h &= (TOK_HASH_SIZE - 1);
2584 pts = &hash_ident[h];
2585 for(;;) {
2586 ts = *pts;
2587 if (!ts)
2588 break;
2589 if (ts->len == len && !memcmp(ts->str, p1, len))
2590 goto token_found;
2591 pts = &(ts->hash_next);
2593 ts = tok_alloc_new(pts, (char *) p1, len);
2594 token_found: ;
2595 } else {
2596 /* slower case */
2597 cstr_reset(&tokcstr);
2598 cstr_cat(&tokcstr, p1, len);
2599 p--;
2600 PEEKC(c, p);
2601 parse_ident_slow:
2602 while (isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM))
2604 cstr_ccat(&tokcstr, c);
2605 PEEKC(c, p);
2607 ts = tok_alloc(tokcstr.data, tokcstr.size);
2609 tok = ts->tok;
2610 break;
2611 case 'L':
2612 t = p[1];
2613 if (t != '\\' && t != '\'' && t != '\"') {
2614 /* fast case */
2615 goto parse_ident_fast;
2616 } else {
2617 PEEKC(c, p);
2618 if (c == '\'' || c == '\"') {
2619 is_long = 1;
2620 goto str_const;
2621 } else {
2622 cstr_reset(&tokcstr);
2623 cstr_ccat(&tokcstr, 'L');
2624 goto parse_ident_slow;
2627 break;
2629 case '0': case '1': case '2': case '3':
2630 case '4': case '5': case '6': case '7':
2631 case '8': case '9':
2632 cstr_reset(&tokcstr);
2633 /* after the first digit, accept digits, alpha, '.' or sign if
2634 prefixed by 'eEpP' */
2635 parse_num:
2636 for(;;) {
2637 t = c;
2638 cstr_ccat(&tokcstr, c);
2639 PEEKC(c, p);
2640 if (!((isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM))
2641 || c == '.'
2642 || ((c == '+' || c == '-')
2643 && (t == 'e' || t == 'E' || t == 'p' || t == 'P')
2644 && !(parse_flags & PARSE_FLAG_ASM_FILE)
2646 break;
2648 /* We add a trailing '\0' to ease parsing */
2649 cstr_ccat(&tokcstr, '\0');
2650 tokc.str.size = tokcstr.size;
2651 tokc.str.data = tokcstr.data;
2652 tokc.str.data_allocated = tokcstr.data_allocated;
2653 tok = TOK_PPNUM;
2654 break;
2656 case '.':
2657 /* special dot handling because it can also start a number */
2658 PEEKC(c, p);
2659 if (isnum(c)) {
2660 cstr_reset(&tokcstr);
2661 cstr_ccat(&tokcstr, '.');
2662 goto parse_num;
2663 } else if ((parse_flags & PARSE_FLAG_ASM_FILE)
2664 && (isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM))) {
2665 *--p = c = '.';
2666 goto parse_ident_fast;
2667 } else if (c == '.') {
2668 PEEKC(c, p);
2669 if (c == '.') {
2670 p++;
2671 tok = TOK_DOTS;
2672 } else {
2673 *--p = '.'; /* may underflow into file->unget[] */
2674 tok = '.';
2676 } else {
2677 tok = '.';
2679 break;
2680 case '\'':
2681 case '\"':
2682 is_long = 0;
2683 str_const:
2684 cstr_reset(&tokcstr);
2685 if (is_long)
2686 cstr_ccat(&tokcstr, 'L');
2687 cstr_ccat(&tokcstr, c);
2688 p = parse_pp_string(p, c, &tokcstr);
2689 cstr_ccat(&tokcstr, c);
2690 cstr_ccat(&tokcstr, '\0');
2691 tokc.str.size = tokcstr.size;
2692 tokc.str.data = tokcstr.data;
2693 tokc.str.data_allocated = tokcstr.data_allocated;
2694 tok = TOK_PPSTR;
2695 break;
2697 case '<':
2698 PEEKC(c, p);
2699 if (c == '=') {
2700 p++;
2701 tok = TOK_LE;
2702 } else if (c == '<') {
2703 PEEKC(c, p);
2704 if (c == '=') {
2705 p++;
2706 tok = TOK_A_SHL;
2707 } else {
2708 tok = TOK_SHL;
2710 } else {
2711 tok = TOK_LT;
2713 break;
2714 case '>':
2715 PEEKC(c, p);
2716 if (c == '=') {
2717 p++;
2718 tok = TOK_GE;
2719 } else if (c == '>') {
2720 PEEKC(c, p);
2721 if (c == '=') {
2722 p++;
2723 tok = TOK_A_SAR;
2724 } else {
2725 tok = TOK_SAR;
2727 } else {
2728 tok = TOK_GT;
2730 break;
2732 case '&':
2733 PEEKC(c, p);
2734 if (c == '&') {
2735 p++;
2736 tok = TOK_LAND;
2737 } else if (c == '=') {
2738 p++;
2739 tok = TOK_A_AND;
2740 } else {
2741 tok = '&';
2743 break;
2745 case '|':
2746 PEEKC(c, p);
2747 if (c == '|') {
2748 p++;
2749 tok = TOK_LOR;
2750 } else if (c == '=') {
2751 p++;
2752 tok = TOK_A_OR;
2753 } else {
2754 tok = '|';
2756 break;
2758 case '+':
2759 PEEKC(c, p);
2760 if (c == '+') {
2761 p++;
2762 tok = TOK_INC;
2763 } else if (c == '=') {
2764 p++;
2765 tok = TOK_A_ADD;
2766 } else {
2767 tok = '+';
2769 break;
2771 case '-':
2772 PEEKC(c, p);
2773 if (c == '-') {
2774 p++;
2775 tok = TOK_DEC;
2776 } else if (c == '=') {
2777 p++;
2778 tok = TOK_A_SUB;
2779 } else if (c == '>') {
2780 p++;
2781 tok = TOK_ARROW;
2782 } else {
2783 tok = '-';
2785 break;
2787 PARSE2('!', '!', '=', TOK_NE)
2788 PARSE2('=', '=', '=', TOK_EQ)
2789 PARSE2('*', '*', '=', TOK_A_MUL)
2790 PARSE2('%', '%', '=', TOK_A_MOD)
2791 PARSE2('^', '^', '=', TOK_A_XOR)
2793 /* comments or operator */
2794 case '/':
2795 PEEKC(c, p);
2796 if (c == '*') {
2797 p = parse_comment(p);
2798 /* comments replaced by a blank */
2799 tok = ' ';
2800 goto keep_tok_flags;
2801 } else if (c == '/') {
2802 p = parse_line_comment(p);
2803 tok = ' ';
2804 goto keep_tok_flags;
2805 } else if (c == '=') {
2806 p++;
2807 tok = TOK_A_DIV;
2808 } else {
2809 tok = '/';
2811 break;
2813 /* simple tokens */
2814 case '(':
2815 case ')':
2816 case '[':
2817 case ']':
2818 case '{':
2819 case '}':
2820 case ',':
2821 case ';':
2822 case ':':
2823 case '?':
2824 case '~':
2825 case '@': /* only used in assembler */
2826 parse_simple:
2827 tok = c;
2828 p++;
2829 break;
2830 default:
2831 if (parse_flags & PARSE_FLAG_ASM_FILE)
2832 goto parse_simple;
2833 tcc_error("unrecognized character \\x%02x", c);
2834 break;
2836 tok_flags = 0;
2837 keep_tok_flags:
2838 file->buf_ptr = p;
2839 #if defined(PARSE_DEBUG)
2840 printf("token = %s\n", get_tok_str(tok, &tokc));
2841 #endif
2844 /* return next token without macro substitution. Can read input from
2845 macro_ptr buffer */
2846 static void next_nomacro_spc(void)
2848 if (macro_ptr) {
2849 redo:
2850 tok = *macro_ptr;
2851 if (tok) {
2852 TOK_GET(&tok, &macro_ptr, &tokc);
2853 if (tok == TOK_LINENUM) {
2854 file->line_num = tokc.i;
2855 goto redo;
2858 } else {
2859 next_nomacro1();
2863 ST_FUNC void next_nomacro(void)
2865 do {
2866 next_nomacro_spc();
2867 } while (tok < 256 && (isidnum_table[tok - CH_EOF] & IS_SPC));
2871 static void macro_subst(
2872 TokenString *tok_str,
2873 Sym **nested_list,
2874 const int *macro_str,
2875 int can_read_stream
2878 /* substitute arguments in replacement lists in macro_str by the values in
2879 args (field d) and return allocated string */
2880 static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args)
2882 int t, t0, t1, spc;
2883 const int *st;
2884 Sym *s;
2885 CValue cval;
2886 TokenString str;
2887 CString cstr;
2889 tok_str_new(&str);
2890 t0 = t1 = 0;
2891 while(1) {
2892 TOK_GET(&t, &macro_str, &cval);
2893 if (!t)
2894 break;
2895 if (t == '#') {
2896 /* stringize */
2897 TOK_GET(&t, &macro_str, &cval);
2898 if (!t)
2899 goto bad_stringy;
2900 s = sym_find2(args, t);
2901 if (s) {
2902 cstr_new(&cstr);
2903 cstr_ccat(&cstr, '\"');
2904 st = s->d;
2905 spc = 0;
2906 while (*st) {
2907 TOK_GET(&t, &st, &cval);
2908 if (t != TOK_PLCHLDR
2909 && t != TOK_NOSUBST
2910 && 0 == check_space(t, &spc)) {
2911 const char *s = get_tok_str(t, &cval);
2912 while (*s) {
2913 if (t == TOK_PPSTR && *s != '\'')
2914 add_char(&cstr, *s);
2915 else
2916 cstr_ccat(&cstr, *s);
2917 ++s;
2921 cstr.size -= spc;
2922 cstr_ccat(&cstr, '\"');
2923 cstr_ccat(&cstr, '\0');
2924 #ifdef PP_DEBUG
2925 printf("\nstringize: <%s>\n", (char *)cstr.data);
2926 #endif
2927 /* add string */
2928 cval.str.size = cstr.size;
2929 cval.str.data = cstr.data;
2930 cval.str.data_allocated = cstr.data_allocated;
2931 tok_str_add2(&str, TOK_PPSTR, &cval);
2932 cstr_free(&cstr);
2933 } else {
2934 bad_stringy:
2935 expect("macro parameter after '#'");
2937 } else if (t >= TOK_IDENT) {
2938 s = sym_find2(args, t);
2939 if (s) {
2940 int l0 = str.len;
2941 st = s->d;
2942 /* if '##' is present before or after, no arg substitution */
2943 if (*macro_str == TOK_TWOSHARPS || t1 == TOK_TWOSHARPS) {
2944 /* special case for var arg macros : ## eats the ','
2945 if empty VA_ARGS variable. */
2946 if (t1 == TOK_TWOSHARPS && t0 == ',' && gnu_ext && s->type.t) {
2947 if (*st == 0) {
2948 /* suppress ',' '##' */
2949 str.len -= 2;
2950 } else {
2951 /* suppress '##' and add variable */
2952 str.len--;
2953 goto add_var;
2955 } else {
2956 for(;;) {
2957 int t1;
2958 TOK_GET(&t1, &st, &cval);
2959 if (!t1)
2960 break;
2961 tok_str_add2(&str, t1, &cval);
2965 } else {
2966 add_var:
2967 /* NOTE: the stream cannot be read when macro
2968 substituing an argument */
2969 macro_subst(&str, nested_list, st, 0);
2971 if (str.len == l0) /* exanded to empty string */
2972 tok_str_add(&str, TOK_PLCHLDR);
2973 } else {
2974 tok_str_add(&str, t);
2976 } else {
2977 tok_str_add2(&str, t, &cval);
2979 t0 = t1, t1 = t;
2981 tok_str_add(&str, 0);
2982 return str.str;
2985 static char const ab_month_name[12][4] =
2987 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
2988 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
2991 /* peek or read [ws_str == NULL] next token from function macro call,
2992 walking up macro levels up to the file if necessary */
2993 static int next_argstream(Sym **nested_list, int can_read_stream, TokenString *ws_str)
2995 int t;
2996 const int *p;
2997 Sym *sa;
2999 for (;;) {
3000 if (macro_ptr) {
3001 p = macro_ptr, t = *p;
3002 if (ws_str) {
3003 while (is_space(t) || TOK_LINEFEED == t)
3004 tok_str_add(ws_str, t), t = *++p;
3006 if (t == 0 && can_read_stream) {
3007 end_macro();
3008 /* also, end of scope for nested defined symbol */
3009 sa = *nested_list;
3010 while (sa && sa->v == 0)
3011 sa = sa->prev;
3012 if (sa)
3013 sa->v = 0;
3014 continue;
3016 } else {
3017 ch = handle_eob();
3018 if (ws_str) {
3019 while (is_space(ch) || ch == '\n' || ch == '/') {
3020 if (ch == '/') {
3021 int c;
3022 uint8_t *p = file->buf_ptr;
3023 PEEKC(c, p);
3024 if (c == '*') {
3025 p = parse_comment(p);
3026 file->buf_ptr = p - 1;
3027 } else if (c == '/') {
3028 p = parse_line_comment(p);
3029 file->buf_ptr = p - 1;
3030 } else
3031 break;
3032 ch = ' ';
3034 tok_str_add(ws_str, ch);
3035 cinp();
3038 t = ch;
3041 if (ws_str)
3042 return t;
3043 next_nomacro_spc();
3044 return tok;
3048 /* do macro substitution of current token with macro 's' and add
3049 result to (tok_str,tok_len). 'nested_list' is the list of all
3050 macros we got inside to avoid recursing. Return non zero if no
3051 substitution needs to be done */
3052 static int macro_subst_tok(
3053 TokenString *tok_str,
3054 Sym **nested_list,
3055 Sym *s,
3056 int can_read_stream)
3058 Sym *args, *sa, *sa1;
3059 int parlevel, *mstr, t, t1, spc;
3060 TokenString str;
3061 char *cstrval;
3062 CValue cval;
3063 CString cstr;
3064 char buf[32];
3066 /* if symbol is a macro, prepare substitution */
3067 /* special macros */
3068 if (tok == TOK___LINE__) {
3069 snprintf(buf, sizeof(buf), "%d", file->line_num);
3070 cstrval = buf;
3071 t1 = TOK_PPNUM;
3072 goto add_cstr1;
3073 } else if (tok == TOK___FILE__) {
3074 cstrval = file->filename;
3075 goto add_cstr;
3076 } else if (tok == TOK___DATE__ || tok == TOK___TIME__) {
3077 time_t ti;
3078 struct tm *tm;
3080 time(&ti);
3081 tm = localtime(&ti);
3082 if (tok == TOK___DATE__) {
3083 snprintf(buf, sizeof(buf), "%s %2d %d",
3084 ab_month_name[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900);
3085 } else {
3086 snprintf(buf, sizeof(buf), "%02d:%02d:%02d",
3087 tm->tm_hour, tm->tm_min, tm->tm_sec);
3089 cstrval = buf;
3090 add_cstr:
3091 t1 = TOK_STR;
3092 add_cstr1:
3093 cstr_new(&cstr);
3094 cstr_cat(&cstr, cstrval, 0);
3095 cval.str.size = cstr.size;
3096 cval.str.data = cstr.data;
3097 cval.str.data_allocated = cstr.data_allocated;
3098 tok_str_add2(tok_str, t1, &cval);
3099 cstr_free(&cstr);
3100 } else {
3101 int saved_parse_flags = parse_flags;
3103 mstr = s->d;
3104 if (s->type.t == MACRO_FUNC) {
3105 /* whitespace between macro name and argument list */
3106 TokenString ws_str;
3107 tok_str_new(&ws_str);
3109 spc = 0;
3110 parse_flags |= PARSE_FLAG_SPACES | PARSE_FLAG_LINEFEED
3111 | PARSE_FLAG_ACCEPT_STRAYS;
3113 /* get next token from argument stream */
3114 t = next_argstream(nested_list, can_read_stream, &ws_str);
3115 if (t != '(') {
3116 /* not a macro substitution after all, restore the
3117 * macro token plus all whitespace we've read.
3118 * whitespace is intentionally not merged to preserve
3119 * newlines. */
3120 parse_flags = saved_parse_flags;
3121 tok_str_add(tok_str, tok);
3122 if (parse_flags & PARSE_FLAG_SPACES) {
3123 int i;
3124 for (i = 0; i < ws_str.len; i++)
3125 tok_str_add(tok_str, ws_str.str[i]);
3127 tok_str_free(ws_str.str);
3128 return 0;
3129 } else {
3130 tok_str_free(ws_str.str);
3132 next_nomacro(); /* eat '(' */
3134 /* argument macro */
3135 args = NULL;
3136 sa = s->next;
3137 /* NOTE: empty args are allowed, except if no args */
3138 for(;;) {
3139 do {
3140 next_argstream(nested_list, can_read_stream, NULL);
3141 } while (is_space(tok) || TOK_LINEFEED == tok);
3142 empty_arg:
3143 /* handle '()' case */
3144 if (!args && !sa && tok == ')')
3145 break;
3146 if (!sa)
3147 tcc_error("macro '%s' used with too many args",
3148 get_tok_str(s->v, 0));
3149 tok_str_new(&str);
3150 parlevel = spc = 0;
3151 /* NOTE: non zero sa->t indicates VA_ARGS */
3152 while ((parlevel > 0 ||
3153 (tok != ')' &&
3154 (tok != ',' || sa->type.t)))) {
3155 if (tok == TOK_EOF || tok == 0)
3156 break;
3157 if (tok == '(')
3158 parlevel++;
3159 else if (tok == ')')
3160 parlevel--;
3161 if (tok == TOK_LINEFEED)
3162 tok = ' ';
3163 if (!check_space(tok, &spc))
3164 tok_str_add2(&str, tok, &tokc);
3165 next_argstream(nested_list, can_read_stream, NULL);
3167 if (parlevel)
3168 expect(")");
3169 str.len -= spc;
3170 tok_str_add(&str, 0);
3171 sa1 = sym_push2(&args, sa->v & ~SYM_FIELD, sa->type.t, 0);
3172 sa1->d = str.str;
3173 sa = sa->next;
3174 if (tok == ')') {
3175 /* special case for gcc var args: add an empty
3176 var arg argument if it is omitted */
3177 if (sa && sa->type.t && gnu_ext)
3178 goto empty_arg;
3179 break;
3181 if (tok != ',')
3182 expect(",");
3184 if (sa) {
3185 tcc_error("macro '%s' used with too few args",
3186 get_tok_str(s->v, 0));
3189 parse_flags = saved_parse_flags;
3191 /* now subst each arg */
3192 mstr = macro_arg_subst(nested_list, mstr, args);
3193 /* free memory */
3194 sa = args;
3195 while (sa) {
3196 sa1 = sa->prev;
3197 tok_str_free(sa->d);
3198 sym_free(sa);
3199 sa = sa1;
3203 sym_push2(nested_list, s->v, 0, 0);
3204 parse_flags = saved_parse_flags;
3205 macro_subst(tok_str, nested_list, mstr, can_read_stream | 2);
3207 /* pop nested defined symbol */
3208 sa1 = *nested_list;
3209 *nested_list = sa1->prev;
3210 sym_free(sa1);
3211 if (mstr != s->d)
3212 tok_str_free(mstr);
3214 return 0;
3217 int paste_tokens(int t1, CValue *v1, int t2, CValue *v2)
3219 CString cstr;
3220 int n;
3222 cstr_new(&cstr);
3223 if (t1 != TOK_PLCHLDR)
3224 cstr_cat(&cstr, get_tok_str(t1, v1), -1);
3225 n = cstr.size;
3226 if (t2 != TOK_PLCHLDR)
3227 cstr_cat(&cstr, get_tok_str(t2, v2), -1);
3228 cstr_ccat(&cstr, '\0');
3230 tcc_open_bf(tcc_state, ":paste:", cstr.size);
3231 memcpy(file->buffer, cstr.data, cstr.size);
3232 for (;;) {
3233 next_nomacro1();
3234 if (0 == *file->buf_ptr)
3235 break;
3236 if (is_space(tok))
3237 continue;
3238 tcc_warning("pasting <%.*s> and <%s> does not give a valid preprocessing token",
3239 n, cstr.data, (char*)cstr.data + n);
3240 break;
3242 tcc_close();
3244 //printf("paste <%s>\n", (char*)cstr.data);
3245 cstr_free(&cstr);
3246 return 0;
3249 /* handle the '##' operator. Return NULL if no '##' seen. Otherwise
3250 return the resulting string (which must be freed). */
3251 static inline int *macro_twosharps(const int *ptr0)
3253 int t;
3254 CValue cval;
3255 TokenString macro_str1;
3256 int start_of_nosubsts = -1;
3257 const int *ptr;
3259 /* we search the first '##' */
3260 for (ptr = ptr0;;) {
3261 TOK_GET(&t, &ptr, &cval);
3262 if (t == TOK_TWOSHARPS)
3263 break;
3264 if (t == 0)
3265 return NULL;
3268 tok_str_new(&macro_str1);
3270 //tok_print(" $$$", ptr0);
3271 for (ptr = ptr0;;) {
3272 TOK_GET(&t, &ptr, &cval);
3273 if (t == 0)
3274 break;
3275 if (t == TOK_TWOSHARPS)
3276 continue;
3277 while (*ptr == TOK_TWOSHARPS) {
3278 int t1; CValue cv1;
3279 /* given 'a##b', remove nosubsts preceding 'a' */
3280 if (start_of_nosubsts >= 0)
3281 macro_str1.len = start_of_nosubsts;
3282 /* given 'a##b', remove nosubsts preceding 'b' */
3283 while ((t1 = *++ptr) == TOK_NOSUBST)
3285 if (t1 && t1 != TOK_TWOSHARPS
3286 && t1 != ':') /* 'a##:' don't build a new token */
3288 TOK_GET(&t1, &ptr, &cv1);
3289 if (t != TOK_PLCHLDR || t1 != TOK_PLCHLDR) {
3290 paste_tokens(t, &cval, t1, &cv1);
3291 t = tok, cval = tokc;
3295 if (t == TOK_NOSUBST) {
3296 if (start_of_nosubsts < 0)
3297 start_of_nosubsts = macro_str1.len;
3298 } else {
3299 start_of_nosubsts = -1;
3301 tok_str_add2(&macro_str1, t, &cval);
3303 tok_str_add(&macro_str1, 0);
3304 //tok_print(" ###", macro_str1.str);
3305 return macro_str1.str;
3308 /* do macro substitution of macro_str and add result to
3309 (tok_str,tok_len). 'nested_list' is the list of all macros we got
3310 inside to avoid recursing. */
3311 static void macro_subst(
3312 TokenString *tok_str,
3313 Sym **nested_list,
3314 const int *macro_str,
3315 int can_read_stream
3318 Sym *s;
3319 const int *ptr;
3320 int t, spc, nosubst;
3321 CValue cval;
3322 int *macro_str1 = NULL;
3324 /* first scan for '##' operator handling */
3325 ptr = macro_str;
3326 spc = nosubst = 0;
3328 /* first scan for '##' operator handling */
3329 if (can_read_stream & 1) {
3330 macro_str1 = macro_twosharps(ptr);
3331 if (macro_str1)
3332 ptr = macro_str1;
3335 while (1) {
3336 TOK_GET(&t, &ptr, &cval);
3337 if (t == 0)
3338 break;
3340 if (t >= TOK_IDENT && 0 == nosubst) {
3341 s = define_find(t);
3342 if (s == NULL)
3343 goto no_subst;
3345 /* if nested substitution, do nothing */
3346 if (sym_find2(*nested_list, t)) {
3347 /* and mark it as TOK_NOSUBST, so it doesn't get subst'd again */
3348 tok_str_add2(tok_str, TOK_NOSUBST, NULL);
3349 goto no_subst;
3353 TokenString str;
3354 str.str = (int*)ptr;
3355 begin_macro(&str, 2);
3357 tok = t;
3358 macro_subst_tok(tok_str, nested_list, s, can_read_stream);
3360 if (str.alloc == 3) {
3361 /* already finished by reading function macro arguments */
3362 break;
3365 ptr = macro_ptr;
3366 end_macro ();
3369 spc = (tok_str->len &&
3370 is_space(tok_last(tok_str->str,
3371 tok_str->str + tok_str->len)));
3373 } else {
3375 if (t == '\\' && !(parse_flags & PARSE_FLAG_ACCEPT_STRAYS))
3376 tcc_error("stray '\\' in program");
3378 no_subst:
3379 if (!check_space(t, &spc))
3380 tok_str_add2(tok_str, t, &cval);
3381 nosubst = 0;
3382 if (t == TOK_NOSUBST)
3383 nosubst = 1;
3386 if (macro_str1)
3387 tok_str_free(macro_str1);
3391 /* return next token with macro substitution */
3392 ST_FUNC void next(void)
3394 redo:
3395 if (parse_flags & PARSE_FLAG_SPACES)
3396 next_nomacro_spc();
3397 else
3398 next_nomacro();
3400 if (macro_ptr) {
3401 if (tok == TOK_NOSUBST || tok == TOK_PLCHLDR) {
3402 /* discard preprocessor markers */
3403 goto redo;
3404 } else if (tok == 0) {
3405 /* end of macro or unget token string */
3406 end_macro();
3407 goto redo;
3409 } else if (tok >= TOK_IDENT && (parse_flags & PARSE_FLAG_PREPROCESS)) {
3410 Sym *s;
3411 /* if reading from file, try to substitute macros */
3412 s = define_find(tok);
3413 if (s) {
3414 Sym *nested_list = NULL;
3415 tokstr_buf.len = 0;
3416 nested_list = NULL;
3417 macro_subst_tok(&tokstr_buf, &nested_list, s, 1);
3418 tok_str_add(&tokstr_buf, 0);
3419 begin_macro(&tokstr_buf, 2);
3420 goto redo;
3423 /* convert preprocessor tokens into C tokens */
3424 if (tok == TOK_PPNUM) {
3425 if (parse_flags & PARSE_FLAG_TOK_NUM)
3426 parse_number((char *)tokc.str.data);
3427 } else if (tok == TOK_PPSTR) {
3428 if (parse_flags & PARSE_FLAG_TOK_STR)
3429 parse_string((char *)tokc.str.data, tokc.str.size - 1);
3433 /* push back current token and set current token to 'last_tok'. Only
3434 identifier case handled for labels. */
3435 ST_INLN void unget_tok(int last_tok)
3437 TokenString *str = tcc_malloc(sizeof *str);
3438 tok_str_new(str);
3439 tok_str_add2(str, tok, &tokc);
3440 tok_str_add(str, 0);
3441 begin_macro(str, 1);
3442 tok = last_tok;
3445 ST_FUNC void preprocess_init(TCCState *s1)
3447 s1->include_stack_ptr = s1->include_stack;
3448 /* XXX: move that before to avoid having to initialize
3449 file->ifdef_stack_ptr ? */
3450 s1->ifdef_stack_ptr = s1->ifdef_stack;
3451 file->ifdef_stack_ptr = s1->ifdef_stack_ptr;
3453 pvtop = vtop = vstack - 1;
3454 s1->pack_stack[0] = 0;
3455 s1->pack_stack_ptr = s1->pack_stack;
3457 isidnum_table['$' - CH_EOF] =
3458 s1->dollars_in_identifiers ? IS_ID : 0;
3459 isidnum_table['.' - CH_EOF] =
3460 (parse_flags & PARSE_FLAG_ASM_FILE) ? IS_ID : 0;
3463 ST_FUNC void preprocess_new(void)
3465 int i, c;
3466 const char *p, *r;
3468 /* init isid table */
3469 for(i = CH_EOF; i<128; i++)
3470 isidnum_table[i - CH_EOF]
3471 = is_space(i) ? IS_SPC
3472 : isid(i) ? IS_ID
3473 : isnum(i) ? IS_NUM
3474 : 0;
3476 for(i = 128; i<256; i++)
3477 isidnum_table[i - CH_EOF] = IS_ID;
3479 /* init allocators */
3480 tal_new(&toksym_alloc, TOKSYM_TAL_LIMIT, TOKSYM_TAL_SIZE);
3481 tal_new(&tokstr_alloc, TOKSTR_TAL_LIMIT, TOKSTR_TAL_SIZE);
3482 tal_new(&cstr_alloc, CSTR_TAL_LIMIT, CSTR_TAL_SIZE);
3484 memset(hash_ident, 0, TOK_HASH_SIZE * sizeof(TokenSym *));
3485 cstr_new(&cstr_buf);
3486 cstr_realloc(&cstr_buf, STRING_MAX_SIZE);
3487 tok_str_new(&tokstr_buf);
3488 tok_str_realloc(&tokstr_buf, TOKSTR_MAX_SIZE);
3490 tok_ident = TOK_IDENT;
3491 p = tcc_keywords;
3492 while (*p) {
3493 r = p;
3494 for(;;) {
3495 c = *r++;
3496 if (c == '\0')
3497 break;
3499 tok_alloc(p, r - p - 1);
3500 p = r;
3504 ST_FUNC void preprocess_delete(void)
3506 int i, n;
3508 /* free -D and compiler defines */
3509 free_defines(NULL);
3511 /* cleanup from error/setjmp */
3512 while (macro_stack)
3513 end_macro();
3514 macro_ptr = NULL;
3516 /* free tokens */
3517 n = tok_ident - TOK_IDENT;
3518 for(i = 0; i < n; i++)
3519 tal_free(toksym_alloc, table_ident[i]);
3520 tcc_free(table_ident);
3521 table_ident = NULL;
3523 /* free static buffers */
3524 cstr_free(&tokcstr);
3525 cstr_free(&cstr_buf);
3526 tok_str_free(tokstr_buf.str);
3528 /* free allocators */
3529 tal_delete(toksym_alloc);
3530 toksym_alloc = NULL;
3531 tal_delete(tokstr_alloc);
3532 tokstr_alloc = NULL;
3533 tal_delete(cstr_alloc);
3534 cstr_alloc = NULL;
3537 /* ------------------------------------------------------------------------- */
3538 /* tcc -E [-P[1]] [-dD} support */
3540 static void tok_print(const char *msg, const int *str)
3542 FILE *fp;
3543 int t;
3544 CValue cval;
3546 fp = tcc_state->ppfp;
3547 if (!fp || !tcc_state->dflag)
3548 fp = stdout;
3550 fprintf(fp, "%s ", msg);
3551 while (str) {
3552 TOK_GET(&t, &str, &cval);
3553 if (!t)
3554 break;
3555 fprintf(fp,"%s", get_tok_str(t, &cval));
3557 fprintf(fp, "\n");
3560 static void pp_line(TCCState *s1, BufferedFile *f, int level)
3562 int d = f->line_num - f->line_ref;
3564 if (s1->dflag & 4)
3565 return;
3567 if (s1->Pflag == LINE_MACRO_OUTPUT_FORMAT_NONE) {
3568 if (level == 0 && f->line_ref && d) {
3569 d = 1;
3570 goto simple;
3572 } else if (level == 0 && f->line_ref && d < 8) {
3573 simple:
3574 while (d > 0)
3575 fputs("\n", s1->ppfp), --d;
3576 } else if (s1->Pflag == LINE_MACRO_OUTPUT_FORMAT_STD) {
3577 fprintf(s1->ppfp, "#line %d \"%s\"\n", f->line_num, f->filename);
3578 } else {
3579 fprintf(s1->ppfp, "# %d \"%s\"%s\n", f->line_num, f->filename,
3580 level > 0 ? " 1" : level < 0 ? " 2" : "");
3582 f->line_ref = f->line_num;
3585 static void define_print(TCCState *s1, int v)
3587 FILE *fp;
3588 Sym *s;
3590 s = define_find(v);
3591 if (NULL == s || NULL == s->d)
3592 return;
3594 fp = s1->ppfp;
3595 fprintf(fp, "#define %s", get_tok_str(v, NULL));
3596 if (s->type.t == MACRO_FUNC) {
3597 Sym *a = s->next;
3598 fprintf(fp,"(");
3599 if (a)
3600 for (;;) {
3601 fprintf(fp,"%s", get_tok_str(a->v & ~SYM_FIELD, NULL));
3602 if (!(a = a->next))
3603 break;
3604 fprintf(fp,",");
3606 fprintf(fp,")");
3608 tok_print("", s->d);
3611 static void pp_debug_defines(TCCState *s1)
3613 int v, t;
3614 const char *vs;
3615 FILE *fp;
3617 t = pp_debug_tok;
3618 if (t == 0)
3619 return;
3621 file->line_num--;
3622 pp_line(s1, file, 0);
3623 file->line_ref = ++file->line_num;
3625 fp = s1->ppfp;
3626 v = pp_debug_symv;
3627 vs = get_tok_str(v, NULL);
3628 if (t == TOK_DEFINE) {
3629 define_print(s1, v);
3630 } else if (t == TOK_UNDEF) {
3631 fprintf(fp, "#undef %s\n", vs);
3632 } else if (t == TOK_push_macro) {
3633 fprintf(fp, "#pragma push_macro(\"%s\")\n", vs);
3634 } else if (t == TOK_pop_macro) {
3635 fprintf(fp, "#pragma pop_macro(\"%s\")\n", vs);
3637 pp_debug_tok = 0;
3640 static void pp_debug_builtins(TCCState *s1)
3642 int v;
3643 for (v = TOK_IDENT; v < tok_ident; ++v)
3644 define_print(s1, v);
3647 static int need_space(int prev_tok, int tok, const char *tokstr)
3649 const char *sp_chars = "";
3650 if ((prev_tok >= TOK_IDENT || prev_tok == TOK_PPNUM) &&
3651 (tok >= TOK_IDENT || tok == TOK_PPNUM))
3652 return 1;
3653 switch (prev_tok) {
3654 case '+':
3655 sp_chars = "+=";
3656 break;
3657 case '-':
3658 sp_chars = "-=>";
3659 break;
3660 case '*':
3661 case '/':
3662 case '%':
3663 case '^':
3664 case '=':
3665 case '!':
3666 case TOK_A_SHL:
3667 case TOK_A_SAR:
3668 sp_chars = "=";
3669 break;
3670 case '&':
3671 sp_chars = "&=";
3672 break;
3673 case '|':
3674 sp_chars = "|=";
3675 break;
3676 case '<':
3677 sp_chars = "<=";
3678 break;
3679 case '>':
3680 sp_chars = ">=";
3681 break;
3682 case '.':
3683 sp_chars = ".";
3684 break;
3685 case '#':
3686 sp_chars = "#";
3687 break;
3688 case TOK_PPNUM:
3689 sp_chars = "+-";
3690 break;
3692 return !!strchr(sp_chars, tokstr[0]);
3695 /* Preprocess the current file */
3696 ST_FUNC int tcc_preprocess(TCCState *s1)
3698 BufferedFile **iptr;
3699 int token_seen, spcs, level;
3700 Sym *define_start;
3701 const char *tokstr;
3703 preprocess_init(s1);
3704 ch = file->buf_ptr[0];
3705 tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF;
3706 parse_flags = PARSE_FLAG_PREPROCESS
3707 | (parse_flags & PARSE_FLAG_ASM_FILE)
3708 | PARSE_FLAG_LINEFEED
3709 | PARSE_FLAG_SPACES
3710 | PARSE_FLAG_ACCEPT_STRAYS
3712 define_start = define_stack;
3714 /* Credits to Fabrice Bellard's initial revision to demonstrate its
3715 capability to compile and run itself, provided all numbers are
3716 given as decimals. tcc -E -P10 will do. */
3717 if (s1->Pflag == 1 + 10)
3718 parse_flags |= PARSE_FLAG_TOK_NUM, s1->Pflag = 1;
3720 #ifdef PP_BENCH
3721 /* for PP benchmarks */
3722 do next(); while (tok != TOK_EOF); return 0;
3723 #endif
3725 if (s1->dflag & 1) {
3726 pp_debug_builtins(s1);
3727 s1->dflag &= ~1;
3730 token_seen = TOK_LINEFEED, spcs = 0;
3731 pp_line(s1, file, 0);
3733 for (;;) {
3734 iptr = s1->include_stack_ptr;
3735 next();
3736 if (tok == TOK_EOF)
3737 break;
3738 level = s1->include_stack_ptr - iptr;
3739 if (level) {
3740 if (level > 0)
3741 pp_line(s1, *iptr, 0);
3742 pp_line(s1, file, level);
3745 if (s1->dflag) {
3746 pp_debug_defines(s1);
3747 if (s1->dflag & 4)
3748 continue;
3751 if (token_seen == TOK_LINEFEED) {
3752 if (tok == ' ') {
3753 ++spcs;
3754 continue;
3756 if (tok == TOK_LINEFEED) {
3757 spcs = 0;
3758 continue;
3760 pp_line(s1, file, 0);
3761 } else if (tok == TOK_LINEFEED) {
3762 ++file->line_ref;
3765 tokstr = get_tok_str(tok, &tokc);
3766 if (!spcs && need_space(token_seen, tok, tokstr))
3767 ++spcs;
3768 while (spcs)
3769 fputs(" ", s1->ppfp), --spcs;
3770 fputs(tokstr, s1->ppfp);
3772 token_seen = tok;
3774 /* reset define stack, but keep -D and built-ins */
3775 free_defines(define_start);
3776 return 0;
3779 /* ------------------------------------------------------------------------- */