Make get_tok_str support NULL as second param.
[tinycc.git] / tccpp.c
blobf4b7b1a1bbfa1bb7e07f0d75e7be2ac5854092fe
1 /*
2 * TCC - Tiny C Compiler
3 *
4 * Copyright (c) 2001-2004 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "tcc.h"
23 /********************************************************/
24 /* global variables */
26 ST_DATA int tok_flags;
27 /* additional informations about token */
28 #define TOK_FLAG_BOL 0x0001 /* beginning of line before */
29 #define TOK_FLAG_BOF 0x0002 /* beginning of file before */
30 #define TOK_FLAG_ENDIF 0x0004 /* a endif was found matching starting #ifdef */
31 #define TOK_FLAG_EOF 0x0008 /* end of file */
33 ST_DATA int parse_flags;
34 #define PARSE_FLAG_PREPROCESS 0x0001 /* activate preprocessing */
35 #define PARSE_FLAG_TOK_NUM 0x0002 /* return numbers instead of TOK_PPNUM */
36 #define PARSE_FLAG_LINEFEED 0x0004 /* line feed is returned as a
37 token. line feed is also
38 returned at eof */
39 #define PARSE_FLAG_ASM_COMMENTS 0x0008 /* '#' can be used for line comment */
40 #define PARSE_FLAG_SPACES 0x0010 /* next() returns space tokens (for -E) */
42 ST_DATA struct BufferedFile *file;
43 ST_DATA int ch, tok;
44 ST_DATA CValue tokc;
45 ST_DATA const int *macro_ptr;
46 ST_DATA CString tokcstr; /* current parsed string, if any */
48 /* display benchmark infos */
49 ST_DATA int total_lines;
50 ST_DATA int total_bytes;
51 ST_DATA int tok_ident;
52 ST_DATA TokenSym **table_ident;
54 /* ------------------------------------------------------------------------- */
56 static int *macro_ptr_allocated;
57 static const int *unget_saved_macro_ptr;
58 static int unget_saved_buffer[TOK_MAX_SIZE + 1];
59 static int unget_buffer_enabled;
60 static TokenSym *hash_ident[TOK_HASH_SIZE];
61 static char token_buf[STRING_MAX_SIZE + 1];
62 /* true if isid(c) || isnum(c) */
63 static unsigned char isidnum_table[256-CH_EOF];
65 static const char tcc_keywords[] =
66 #define DEF(id, str) str "\0"
67 #include "tcctok.h"
68 #undef DEF
71 /* WARNING: the content of this string encodes token numbers */
72 static const unsigned char tok_two_chars[] =
73 "<=\236>=\235!=\225&&\240||\241++\244--\242==\224<<\1>>\2+=\253"
74 "-=\255*=\252/=\257%=\245&=\246^=\336|=\374->\313..\250##\266";
76 struct macro_level {
77 struct macro_level *prev;
78 const int *p;
81 static void next_nomacro_spc(void);
82 static void macro_subst(
83 TokenString *tok_str,
84 Sym **nested_list,
85 const int *macro_str,
86 struct macro_level **can_read_stream
89 ST_FUNC void skip(int c)
91 if (tok != c)
92 tcc_error("'%c' expected (got \"%s\")", c, get_tok_str(tok, &tokc));
93 next();
96 ST_FUNC void expect(const char *msg)
98 tcc_error("%s expected", msg);
101 /* ------------------------------------------------------------------------- */
102 /* CString handling */
103 static void cstr_realloc(CString *cstr, int new_size)
105 int size;
106 void *data;
108 size = cstr->size_allocated;
109 if (size == 0)
110 size = 8; /* no need to allocate a too small first string */
111 while (size < new_size)
112 size = size * 2;
113 data = tcc_realloc(cstr->data_allocated, size);
114 cstr->data_allocated = data;
115 cstr->size_allocated = size;
116 cstr->data = data;
119 /* add a byte */
120 ST_FUNC void cstr_ccat(CString *cstr, int ch)
122 int size;
123 size = cstr->size + 1;
124 if (size > cstr->size_allocated)
125 cstr_realloc(cstr, size);
126 ((unsigned char *)cstr->data)[size - 1] = ch;
127 cstr->size = size;
130 ST_FUNC void cstr_cat(CString *cstr, const char *str)
132 int c;
133 for(;;) {
134 c = *str;
135 if (c == '\0')
136 break;
137 cstr_ccat(cstr, c);
138 str++;
142 /* add a wide char */
143 ST_FUNC void cstr_wccat(CString *cstr, int ch)
145 int size;
146 size = cstr->size + sizeof(nwchar_t);
147 if (size > cstr->size_allocated)
148 cstr_realloc(cstr, size);
149 *(nwchar_t *)(((unsigned char *)cstr->data) + size - sizeof(nwchar_t)) = ch;
150 cstr->size = size;
153 ST_FUNC void cstr_new(CString *cstr)
155 memset(cstr, 0, sizeof(CString));
158 /* free string and reset it to NULL */
159 ST_FUNC void cstr_free(CString *cstr)
161 tcc_free(cstr->data_allocated);
162 cstr_new(cstr);
165 /* reset string to empty */
166 ST_FUNC void cstr_reset(CString *cstr)
168 cstr->size = 0;
171 /* XXX: unicode ? */
172 static void add_char(CString *cstr, int c)
174 if (c == '\'' || c == '\"' || c == '\\') {
175 /* XXX: could be more precise if char or string */
176 cstr_ccat(cstr, '\\');
178 if (c >= 32 && c <= 126) {
179 cstr_ccat(cstr, c);
180 } else {
181 cstr_ccat(cstr, '\\');
182 if (c == '\n') {
183 cstr_ccat(cstr, 'n');
184 } else {
185 cstr_ccat(cstr, '0' + ((c >> 6) & 7));
186 cstr_ccat(cstr, '0' + ((c >> 3) & 7));
187 cstr_ccat(cstr, '0' + (c & 7));
192 /* ------------------------------------------------------------------------- */
193 /* allocate a new token */
194 static TokenSym *tok_alloc_new(TokenSym **pts, const char *str, int len)
196 TokenSym *ts, **ptable;
197 int i;
199 if (tok_ident >= SYM_FIRST_ANOM)
200 tcc_error("memory full (symbols)");
202 /* expand token table if needed */
203 i = tok_ident - TOK_IDENT;
204 if ((i % TOK_ALLOC_INCR) == 0) {
205 ptable = tcc_realloc(table_ident, (i + TOK_ALLOC_INCR) * sizeof(TokenSym *));
206 table_ident = ptable;
209 ts = tcc_malloc(sizeof(TokenSym) + len);
210 table_ident[i] = ts;
211 ts->tok = tok_ident++;
212 ts->sym_define = NULL;
213 ts->sym_label = NULL;
214 ts->sym_struct = NULL;
215 ts->sym_identifier = NULL;
216 ts->len = len;
217 ts->hash_next = NULL;
218 memcpy(ts->str, str, len);
219 ts->str[len] = '\0';
220 *pts = ts;
221 return ts;
224 #define TOK_HASH_INIT 1
225 #define TOK_HASH_FUNC(h, c) ((h) * 263 + (c))
227 /* find a token and add it if not found */
228 ST_FUNC TokenSym *tok_alloc(const char *str, int len)
230 TokenSym *ts, **pts;
231 int i;
232 unsigned int h;
234 h = TOK_HASH_INIT;
235 for(i=0;i<len;i++)
236 h = TOK_HASH_FUNC(h, ((unsigned char *)str)[i]);
237 h &= (TOK_HASH_SIZE - 1);
239 pts = &hash_ident[h];
240 for(;;) {
241 ts = *pts;
242 if (!ts)
243 break;
244 if (ts->len == len && !memcmp(ts->str, str, len))
245 return ts;
246 pts = &(ts->hash_next);
248 return tok_alloc_new(pts, str, len);
251 /* XXX: buffer overflow */
252 /* XXX: float tokens */
253 ST_FUNC char *get_tok_str(int v, CValue *cv)
255 static char buf[STRING_MAX_SIZE + 1];
256 static CString cstr_buf;
257 CString *cstr;
258 CValue cval;
259 char *p;
260 int i, len;
262 if (!cv) {
263 cval.ull = 0;
264 cv = &cval;
267 /* NOTE: to go faster, we give a fixed buffer for small strings */
268 cstr_reset(&cstr_buf);
269 cstr_buf.data = buf;
270 cstr_buf.size_allocated = sizeof(buf);
271 p = buf;
273 switch(v) {
274 case TOK_CINT:
275 case TOK_CUINT:
276 /* XXX: not quite exact, but only useful for testing */
277 sprintf(p, "%u", cv->ui);
278 break;
279 case TOK_CLLONG:
280 case TOK_CULLONG:
281 /* XXX: not quite exact, but only useful for testing */
282 #ifdef _WIN32
283 sprintf(p, "%u", (unsigned)cv->ull);
284 #else
285 sprintf(p, "%llu", cv->ull);
286 #endif
287 break;
288 case TOK_LCHAR:
289 cstr_ccat(&cstr_buf, 'L');
290 case TOK_CCHAR:
291 cstr_ccat(&cstr_buf, '\'');
292 add_char(&cstr_buf, cv->i);
293 cstr_ccat(&cstr_buf, '\'');
294 cstr_ccat(&cstr_buf, '\0');
295 break;
296 case TOK_PPNUM:
297 cstr = cv->cstr;
298 len = cstr->size - 1;
299 for(i=0;i<len;i++)
300 add_char(&cstr_buf, ((unsigned char *)cstr->data)[i]);
301 cstr_ccat(&cstr_buf, '\0');
302 break;
303 case TOK_LSTR:
304 cstr_ccat(&cstr_buf, 'L');
305 case TOK_STR:
306 cstr = cv->cstr;
307 cstr_ccat(&cstr_buf, '\"');
308 if (v == TOK_STR) {
309 len = cstr->size - 1;
310 for(i=0;i<len;i++)
311 add_char(&cstr_buf, ((unsigned char *)cstr->data)[i]);
312 } else {
313 len = (cstr->size / sizeof(nwchar_t)) - 1;
314 for(i=0;i<len;i++)
315 add_char(&cstr_buf, ((nwchar_t *)cstr->data)[i]);
317 cstr_ccat(&cstr_buf, '\"');
318 cstr_ccat(&cstr_buf, '\0');
319 break;
320 case TOK_LT:
321 v = '<';
322 goto addv;
323 case TOK_GT:
324 v = '>';
325 goto addv;
326 case TOK_DOTS:
327 return strcpy(p, "...");
328 case TOK_A_SHL:
329 return strcpy(p, "<<=");
330 case TOK_A_SAR:
331 return strcpy(p, ">>=");
332 default:
333 if (v < TOK_IDENT) {
334 /* search in two bytes table */
335 const unsigned char *q = tok_two_chars;
336 while (*q) {
337 if (q[2] == v) {
338 *p++ = q[0];
339 *p++ = q[1];
340 *p = '\0';
341 return buf;
343 q += 3;
345 addv:
346 *p++ = v;
347 *p = '\0';
348 } else if (v < tok_ident) {
349 return table_ident[v - TOK_IDENT]->str;
350 } else if (v >= SYM_FIRST_ANOM) {
351 /* special name for anonymous symbol */
352 sprintf(p, "L.%u", v - SYM_FIRST_ANOM);
353 } else {
354 /* should never happen */
355 return NULL;
357 break;
359 return cstr_buf.data;
362 /* fill input buffer and peek next char */
363 static int tcc_peekc_slow(BufferedFile *bf)
365 int len;
366 /* only tries to read if really end of buffer */
367 if (bf->buf_ptr >= bf->buf_end) {
368 if (bf->fd != -1) {
369 #if defined(PARSE_DEBUG)
370 len = 8;
371 #else
372 len = IO_BUF_SIZE;
373 #endif
374 len = read(bf->fd, bf->buffer, len);
375 if (len < 0)
376 len = 0;
377 } else {
378 len = 0;
380 total_bytes += len;
381 bf->buf_ptr = bf->buffer;
382 bf->buf_end = bf->buffer + len;
383 *bf->buf_end = CH_EOB;
385 if (bf->buf_ptr < bf->buf_end) {
386 return bf->buf_ptr[0];
387 } else {
388 bf->buf_ptr = bf->buf_end;
389 return CH_EOF;
393 /* return the current character, handling end of block if necessary
394 (but not stray) */
395 ST_FUNC int handle_eob(void)
397 return tcc_peekc_slow(file);
400 /* read next char from current input file and handle end of input buffer */
401 ST_INLN void inp(void)
403 ch = *(++(file->buf_ptr));
404 /* end of buffer/file handling */
405 if (ch == CH_EOB)
406 ch = handle_eob();
409 /* handle '\[\r]\n' */
410 static int handle_stray_noerror(void)
412 while (ch == '\\') {
413 inp();
414 if (ch == '\n') {
415 file->line_num++;
416 inp();
417 } else if (ch == '\r') {
418 inp();
419 if (ch != '\n')
420 goto fail;
421 file->line_num++;
422 inp();
423 } else {
424 fail:
425 return 1;
428 return 0;
431 static void handle_stray(void)
433 if (handle_stray_noerror())
434 tcc_error("stray '\\' in program");
437 /* skip the stray and handle the \\n case. Output an error if
438 incorrect char after the stray */
439 static int handle_stray1(uint8_t *p)
441 int c;
443 if (p >= file->buf_end) {
444 file->buf_ptr = p;
445 c = handle_eob();
446 p = file->buf_ptr;
447 if (c == '\\')
448 goto parse_stray;
449 } else {
450 parse_stray:
451 file->buf_ptr = p;
452 ch = *p;
453 handle_stray();
454 p = file->buf_ptr;
455 c = *p;
457 return c;
460 /* handle just the EOB case, but not stray */
461 #define PEEKC_EOB(c, p)\
463 p++;\
464 c = *p;\
465 if (c == '\\') {\
466 file->buf_ptr = p;\
467 c = handle_eob();\
468 p = file->buf_ptr;\
472 /* handle the complicated stray case */
473 #define PEEKC(c, p)\
475 p++;\
476 c = *p;\
477 if (c == '\\') {\
478 c = handle_stray1(p);\
479 p = file->buf_ptr;\
483 /* input with '\[\r]\n' handling. Note that this function cannot
484 handle other characters after '\', so you cannot call it inside
485 strings or comments */
486 ST_FUNC void minp(void)
488 inp();
489 if (ch == '\\')
490 handle_stray();
494 /* single line C++ comments */
495 static uint8_t *parse_line_comment(uint8_t *p)
497 int c;
499 p++;
500 for(;;) {
501 c = *p;
502 redo:
503 if (c == '\n' || c == CH_EOF) {
504 break;
505 } else if (c == '\\') {
506 file->buf_ptr = p;
507 c = handle_eob();
508 p = file->buf_ptr;
509 if (c == '\\') {
510 PEEKC_EOB(c, p);
511 if (c == '\n') {
512 file->line_num++;
513 PEEKC_EOB(c, p);
514 } else if (c == '\r') {
515 PEEKC_EOB(c, p);
516 if (c == '\n') {
517 file->line_num++;
518 PEEKC_EOB(c, p);
521 } else {
522 goto redo;
524 } else {
525 p++;
528 return p;
531 /* C comments */
532 ST_FUNC uint8_t *parse_comment(uint8_t *p)
534 int c;
536 p++;
537 for(;;) {
538 /* fast skip loop */
539 for(;;) {
540 c = *p;
541 if (c == '\n' || c == '*' || c == '\\')
542 break;
543 p++;
544 c = *p;
545 if (c == '\n' || c == '*' || c == '\\')
546 break;
547 p++;
549 /* now we can handle all the cases */
550 if (c == '\n') {
551 file->line_num++;
552 p++;
553 } else if (c == '*') {
554 p++;
555 for(;;) {
556 c = *p;
557 if (c == '*') {
558 p++;
559 } else if (c == '/') {
560 goto end_of_comment;
561 } else if (c == '\\') {
562 file->buf_ptr = p;
563 c = handle_eob();
564 p = file->buf_ptr;
565 if (c == '\\') {
566 /* skip '\[\r]\n', otherwise just skip the stray */
567 while (c == '\\') {
568 PEEKC_EOB(c, p);
569 if (c == '\n') {
570 file->line_num++;
571 PEEKC_EOB(c, p);
572 } else if (c == '\r') {
573 PEEKC_EOB(c, p);
574 if (c == '\n') {
575 file->line_num++;
576 PEEKC_EOB(c, p);
578 } else {
579 goto after_star;
583 } else {
584 break;
587 after_star: ;
588 } else {
589 /* stray, eob or eof */
590 file->buf_ptr = p;
591 c = handle_eob();
592 p = file->buf_ptr;
593 if (c == CH_EOF) {
594 tcc_error("unexpected end of file in comment");
595 } else if (c == '\\') {
596 p++;
600 end_of_comment:
601 p++;
602 return p;
605 #define cinp minp
607 static inline void skip_spaces(void)
609 while (is_space(ch))
610 cinp();
613 static inline int check_space(int t, int *spc)
615 if (is_space(t)) {
616 if (*spc)
617 return 1;
618 *spc = 1;
619 } else
620 *spc = 0;
621 return 0;
624 /* parse a string without interpreting escapes */
625 static uint8_t *parse_pp_string(uint8_t *p,
626 int sep, CString *str)
628 int c;
629 p++;
630 for(;;) {
631 c = *p;
632 if (c == sep) {
633 break;
634 } else if (c == '\\') {
635 file->buf_ptr = p;
636 c = handle_eob();
637 p = file->buf_ptr;
638 if (c == CH_EOF) {
639 unterminated_string:
640 /* XXX: indicate line number of start of string */
641 tcc_error("missing terminating %c character", sep);
642 } else if (c == '\\') {
643 /* escape : just skip \[\r]\n */
644 PEEKC_EOB(c, p);
645 if (c == '\n') {
646 file->line_num++;
647 p++;
648 } else if (c == '\r') {
649 PEEKC_EOB(c, p);
650 if (c != '\n')
651 expect("'\n' after '\r'");
652 file->line_num++;
653 p++;
654 } else if (c == CH_EOF) {
655 goto unterminated_string;
656 } else {
657 if (str) {
658 cstr_ccat(str, '\\');
659 cstr_ccat(str, c);
661 p++;
664 } else if (c == '\n') {
665 file->line_num++;
666 goto add_char;
667 } else if (c == '\r') {
668 PEEKC_EOB(c, p);
669 if (c != '\n') {
670 if (str)
671 cstr_ccat(str, '\r');
672 } else {
673 file->line_num++;
674 goto add_char;
676 } else {
677 add_char:
678 if (str)
679 cstr_ccat(str, c);
680 p++;
683 p++;
684 return p;
687 /* skip block of text until #else, #elif or #endif. skip also pairs of
688 #if/#endif */
689 static void preprocess_skip(void)
691 int a, start_of_line, c, in_warn_or_error;
692 uint8_t *p;
694 p = file->buf_ptr;
695 a = 0;
696 redo_start:
697 start_of_line = 1;
698 in_warn_or_error = 0;
699 for(;;) {
700 redo_no_start:
701 c = *p;
702 switch(c) {
703 case ' ':
704 case '\t':
705 case '\f':
706 case '\v':
707 case '\r':
708 p++;
709 goto redo_no_start;
710 case '\n':
711 file->line_num++;
712 p++;
713 goto redo_start;
714 case '\\':
715 file->buf_ptr = p;
716 c = handle_eob();
717 if (c == CH_EOF) {
718 expect("#endif");
719 } else if (c == '\\') {
720 ch = file->buf_ptr[0];
721 handle_stray_noerror();
723 p = file->buf_ptr;
724 goto redo_no_start;
725 /* skip strings */
726 case '\"':
727 case '\'':
728 if (in_warn_or_error)
729 goto _default;
730 p = parse_pp_string(p, c, NULL);
731 break;
732 /* skip comments */
733 case '/':
734 if (in_warn_or_error)
735 goto _default;
736 file->buf_ptr = p;
737 ch = *p;
738 minp();
739 p = file->buf_ptr;
740 if (ch == '*') {
741 p = parse_comment(p);
742 } else if (ch == '/') {
743 p = parse_line_comment(p);
745 break;
746 case '#':
747 p++;
748 if (start_of_line) {
749 file->buf_ptr = p;
750 next_nomacro();
751 p = file->buf_ptr;
752 if (a == 0 &&
753 (tok == TOK_ELSE || tok == TOK_ELIF || tok == TOK_ENDIF))
754 goto the_end;
755 if (tok == TOK_IF || tok == TOK_IFDEF || tok == TOK_IFNDEF)
756 a++;
757 else if (tok == TOK_ENDIF)
758 a--;
759 else if( tok == TOK_ERROR || tok == TOK_WARNING)
760 in_warn_or_error = 1;
761 else if (tok == TOK_LINEFEED)
762 goto redo_start;
764 break;
765 _default:
766 default:
767 p++;
768 break;
770 start_of_line = 0;
772 the_end: ;
773 file->buf_ptr = p;
776 /* ParseState handling */
778 /* XXX: currently, no include file info is stored. Thus, we cannot display
779 accurate messages if the function or data definition spans multiple
780 files */
782 /* save current parse state in 's' */
783 ST_FUNC void save_parse_state(ParseState *s)
785 s->line_num = file->line_num;
786 s->macro_ptr = macro_ptr;
787 s->tok = tok;
788 s->tokc = tokc;
791 /* restore parse state from 's' */
792 ST_FUNC void restore_parse_state(ParseState *s)
794 file->line_num = s->line_num;
795 macro_ptr = s->macro_ptr;
796 tok = s->tok;
797 tokc = s->tokc;
800 /* return the number of additional 'ints' necessary to store the
801 token */
802 static inline int tok_ext_size(int t)
804 switch(t) {
805 /* 4 bytes */
806 case TOK_CINT:
807 case TOK_CUINT:
808 case TOK_CCHAR:
809 case TOK_LCHAR:
810 case TOK_CFLOAT:
811 case TOK_LINENUM:
812 return 1;
813 case TOK_STR:
814 case TOK_LSTR:
815 case TOK_PPNUM:
816 tcc_error("unsupported token");
817 return 1;
818 case TOK_CDOUBLE:
819 case TOK_CLLONG:
820 case TOK_CULLONG:
821 return 2;
822 case TOK_CLDOUBLE:
823 return LDOUBLE_SIZE / 4;
824 default:
825 return 0;
829 /* token string handling */
831 ST_INLN void tok_str_new(TokenString *s)
833 s->str = NULL;
834 s->len = 0;
835 s->allocated_len = 0;
836 s->last_line_num = -1;
839 ST_FUNC void tok_str_free(int *str)
841 tcc_free(str);
844 static int *tok_str_realloc(TokenString *s)
846 int *str, len;
848 if (s->allocated_len == 0) {
849 len = 8;
850 } else {
851 len = s->allocated_len * 2;
853 str = tcc_realloc(s->str, len * sizeof(int));
854 s->allocated_len = len;
855 s->str = str;
856 return str;
859 ST_FUNC void tok_str_add(TokenString *s, int t)
861 int len, *str;
863 len = s->len;
864 str = s->str;
865 if (len >= s->allocated_len)
866 str = tok_str_realloc(s);
867 str[len++] = t;
868 s->len = len;
871 static void tok_str_add2(TokenString *s, int t, CValue *cv)
873 int len, *str;
875 len = s->len;
876 str = s->str;
878 /* allocate space for worst case */
879 if (len + TOK_MAX_SIZE > s->allocated_len)
880 str = tok_str_realloc(s);
881 str[len++] = t;
882 switch(t) {
883 case TOK_CINT:
884 case TOK_CUINT:
885 case TOK_CCHAR:
886 case TOK_LCHAR:
887 case TOK_CFLOAT:
888 case TOK_LINENUM:
889 str[len++] = cv->tab[0];
890 break;
891 case TOK_PPNUM:
892 case TOK_STR:
893 case TOK_LSTR:
895 int nb_words;
896 CString *cstr;
898 nb_words = (sizeof(CString) + cv->cstr->size + 3) >> 2;
899 while ((len + nb_words) > s->allocated_len)
900 str = tok_str_realloc(s);
901 cstr = (CString *)(str + len);
902 cstr->data = NULL;
903 cstr->size = cv->cstr->size;
904 cstr->data_allocated = NULL;
905 cstr->size_allocated = cstr->size;
906 memcpy((char *)cstr + sizeof(CString),
907 cv->cstr->data, cstr->size);
908 len += nb_words;
910 break;
911 case TOK_CDOUBLE:
912 case TOK_CLLONG:
913 case TOK_CULLONG:
914 #if LDOUBLE_SIZE == 8
915 case TOK_CLDOUBLE:
916 #endif
917 str[len++] = cv->tab[0];
918 str[len++] = cv->tab[1];
919 break;
920 #if LDOUBLE_SIZE == 12
921 case TOK_CLDOUBLE:
922 str[len++] = cv->tab[0];
923 str[len++] = cv->tab[1];
924 str[len++] = cv->tab[2];
925 #elif LDOUBLE_SIZE == 16
926 case TOK_CLDOUBLE:
927 str[len++] = cv->tab[0];
928 str[len++] = cv->tab[1];
929 str[len++] = cv->tab[2];
930 str[len++] = cv->tab[3];
931 #elif LDOUBLE_SIZE != 8
932 #error add long double size support
933 #endif
934 break;
935 default:
936 break;
938 s->len = len;
941 /* add the current parse token in token string 's' */
942 ST_FUNC void tok_str_add_tok(TokenString *s)
944 CValue cval;
945 memset(&cval, 0, sizeof(CValue));
947 /* save line number info */
948 if (file->line_num != s->last_line_num) {
949 s->last_line_num = file->line_num;
950 cval.i = s->last_line_num;
951 tok_str_add2(s, TOK_LINENUM, &cval);
953 tok_str_add2(s, tok, &tokc);
956 /* get a token from an integer array and increment pointer
957 accordingly. we code it as a macro to avoid pointer aliasing. */
958 static inline void TOK_GET(int *t, const int **pp, CValue *cv)
960 const int *p = *pp;
961 int n, *tab;
963 tab = cv->tab;
964 switch(*t = *p++) {
965 case TOK_CINT:
966 case TOK_CUINT:
967 case TOK_CCHAR:
968 case TOK_LCHAR:
969 case TOK_CFLOAT:
970 case TOK_LINENUM:
971 tab[0] = *p++;
972 break;
973 case TOK_STR:
974 case TOK_LSTR:
975 case TOK_PPNUM:
976 cv->cstr = (CString *)p;
977 cv->cstr->data = (char *)p + sizeof(CString);
978 p += (sizeof(CString) + cv->cstr->size + 3) >> 2;
979 break;
980 case TOK_CDOUBLE:
981 case TOK_CLLONG:
982 case TOK_CULLONG:
983 n = 2;
984 goto copy;
985 case TOK_CLDOUBLE:
986 #if LDOUBLE_SIZE == 16
987 n = 4;
988 #elif LDOUBLE_SIZE == 12
989 n = 3;
990 #elif LDOUBLE_SIZE == 8
991 n = 2;
992 #else
993 # error add long double size support
994 #endif
995 copy:
997 *tab++ = *p++;
998 while (--n);
999 break;
1000 default:
1001 break;
1003 *pp = p;
1006 static int macro_is_equal(const int *a, const int *b)
1008 char buf[STRING_MAX_SIZE + 1];
1009 int t;
1010 CValue cv;
1011 memset(&cv, 0, sizeof(CValue));
1012 while (*a && *b) {
1013 TOK_GET(&t, &a, &cv);
1014 pstrcpy(buf, sizeof buf, get_tok_str(t, &cv));
1015 TOK_GET(&t, &b, &cv);
1016 if (strcmp(buf, get_tok_str(t, &cv)))
1017 return 0;
1019 return !(*a || *b);
1022 /* defines handling */
1023 ST_INLN void define_push(int v, int macro_type, int *str, Sym *first_arg)
1025 Sym *s;
1027 s = define_find(v);
1028 if (s && !macro_is_equal(s->d, str))
1029 tcc_warning("%s redefined", get_tok_str(v, NULL));
1031 s = sym_push2(&define_stack, v, macro_type, 0);
1032 s->d = str;
1033 s->next = first_arg;
1034 table_ident[v - TOK_IDENT]->sym_define = s;
1037 /* undefined a define symbol. Its name is just set to zero */
1038 ST_FUNC void define_undef(Sym *s)
1040 int v;
1041 v = s->v;
1042 if (v >= TOK_IDENT && v < tok_ident)
1043 table_ident[v - TOK_IDENT]->sym_define = NULL;
1044 s->v = 0;
1047 ST_INLN Sym *define_find(int v)
1049 v -= TOK_IDENT;
1050 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1051 return NULL;
1052 return table_ident[v]->sym_define;
1055 /* free define stack until top reaches 'b' */
1056 ST_FUNC void free_defines(Sym *b)
1058 Sym *top, *top1;
1059 int v;
1061 top = define_stack;
1062 while (top != b) {
1063 top1 = top->prev;
1064 /* do not free args or predefined defines */
1065 if (top->d)
1066 tok_str_free(top->d);
1067 v = top->v;
1068 if (v >= TOK_IDENT && v < tok_ident)
1069 table_ident[v - TOK_IDENT]->sym_define = NULL;
1070 sym_free(top);
1071 top = top1;
1073 define_stack = b;
1076 /* label lookup */
1077 ST_FUNC Sym *label_find(int v)
1079 v -= TOK_IDENT;
1080 if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1081 return NULL;
1082 return table_ident[v]->sym_label;
1085 ST_FUNC Sym *label_push(Sym **ptop, int v, int flags)
1087 Sym *s, **ps;
1088 s = sym_push2(ptop, v, 0, 0);
1089 s->r = flags;
1090 ps = &table_ident[v - TOK_IDENT]->sym_label;
1091 if (ptop == &global_label_stack) {
1092 /* modify the top most local identifier, so that
1093 sym_identifier will point to 's' when popped */
1094 while (*ps != NULL)
1095 ps = &(*ps)->prev_tok;
1097 s->prev_tok = *ps;
1098 *ps = s;
1099 return s;
1102 /* pop labels until element last is reached. Look if any labels are
1103 undefined. Define symbols if '&&label' was used. */
1104 ST_FUNC void label_pop(Sym **ptop, Sym *slast)
1106 Sym *s, *s1;
1107 for(s = *ptop; s != slast; s = s1) {
1108 s1 = s->prev;
1109 if (s->r == LABEL_DECLARED) {
1110 tcc_warning("label '%s' declared but not used", get_tok_str(s->v, NULL));
1111 } else if (s->r == LABEL_FORWARD) {
1112 tcc_error("label '%s' used but not defined",
1113 get_tok_str(s->v, NULL));
1114 } else {
1115 if (s->c) {
1116 /* define corresponding symbol. A size of
1117 1 is put. */
1118 put_extern_sym(s, cur_text_section, s->jnext, 1);
1121 /* remove label */
1122 table_ident[s->v - TOK_IDENT]->sym_label = s->prev_tok;
1123 sym_free(s);
1125 *ptop = slast;
1128 /* eval an expression for #if/#elif */
1129 static int expr_preprocess(void)
1131 int c, t;
1132 TokenString str;
1134 tok_str_new(&str);
1135 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1136 next(); /* do macro subst */
1137 if (tok == TOK_DEFINED) {
1138 next_nomacro();
1139 t = tok;
1140 if (t == '(')
1141 next_nomacro();
1142 c = define_find(tok) != 0;
1143 if (t == '(')
1144 next_nomacro();
1145 tok = TOK_CINT;
1146 tokc.i = c;
1147 } else if (tok >= TOK_IDENT) {
1148 /* if undefined macro */
1149 tok = TOK_CINT;
1150 tokc.i = 0;
1152 tok_str_add_tok(&str);
1154 tok_str_add(&str, -1); /* simulate end of file */
1155 tok_str_add(&str, 0);
1156 /* now evaluate C constant expression */
1157 macro_ptr = str.str;
1158 next();
1159 c = expr_const();
1160 macro_ptr = NULL;
1161 tok_str_free(str.str);
1162 return c != 0;
1165 #if defined(PARSE_DEBUG) || defined(PP_DEBUG)
1166 static void tok_print(int *str)
1168 int t;
1169 CValue cval;
1170 memset(&cval, 0, sizeof(CValue));
1172 printf("<");
1173 while (1) {
1174 TOK_GET(&t, &str, &cval);
1175 if (!t)
1176 break;
1177 printf("%s", get_tok_str(t, &cval));
1179 printf(">\n");
1181 #endif
1183 /* parse after #define */
1184 ST_FUNC void parse_define(void)
1186 Sym *s, *first, **ps;
1187 int v, t, varg, is_vaargs, spc;
1188 TokenString str;
1190 v = tok;
1191 if (v < TOK_IDENT)
1192 tcc_error("invalid macro name '%s'", get_tok_str(tok, &tokc));
1193 /* XXX: should check if same macro (ANSI) */
1194 first = NULL;
1195 t = MACRO_OBJ;
1196 /* '(' must be just after macro definition for MACRO_FUNC */
1197 next_nomacro_spc();
1198 if (tok == '(') {
1199 next_nomacro();
1200 ps = &first;
1201 while (tok != ')') {
1202 varg = tok;
1203 next_nomacro();
1204 is_vaargs = 0;
1205 if (varg == TOK_DOTS) {
1206 varg = TOK___VA_ARGS__;
1207 is_vaargs = 1;
1208 } else if (tok == TOK_DOTS && gnu_ext) {
1209 is_vaargs = 1;
1210 next_nomacro();
1212 if (varg < TOK_IDENT)
1213 tcc_error("badly punctuated parameter list");
1214 s = sym_push2(&define_stack, varg | SYM_FIELD, is_vaargs, 0);
1215 *ps = s;
1216 ps = &s->next;
1217 if (tok != ',')
1218 break;
1219 next_nomacro();
1221 if (tok == ')')
1222 next_nomacro_spc();
1223 t = MACRO_FUNC;
1225 tok_str_new(&str);
1226 spc = 2;
1227 /* EOF testing necessary for '-D' handling */
1228 while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1229 /* remove spaces around ## and after '#' */
1230 if (TOK_TWOSHARPS == tok) {
1231 if (1 == spc)
1232 --str.len;
1233 spc = 2;
1234 } else if ('#' == tok) {
1235 spc = 2;
1236 } else if (check_space(tok, &spc)) {
1237 goto skip;
1239 tok_str_add2(&str, tok, &tokc);
1240 skip:
1241 next_nomacro_spc();
1243 if (spc == 1)
1244 --str.len; /* remove trailing space */
1245 tok_str_add(&str, 0);
1246 #ifdef PP_DEBUG
1247 printf("define %s %d: ", get_tok_str(v, NULL), t);
1248 tok_print(str.str);
1249 #endif
1250 define_push(v, t, str.str, first);
1253 static inline int hash_cached_include(const char *filename)
1255 const unsigned char *s;
1256 unsigned int h;
1258 h = TOK_HASH_INIT;
1259 s = (unsigned char *) filename;
1260 while (*s) {
1261 h = TOK_HASH_FUNC(h, *s);
1262 s++;
1264 h &= (CACHED_INCLUDES_HASH_SIZE - 1);
1265 return h;
1268 static CachedInclude *search_cached_include(TCCState *s1, const char *filename)
1270 CachedInclude *e;
1271 int i, h;
1272 h = hash_cached_include(filename);
1273 i = s1->cached_includes_hash[h];
1274 for(;;) {
1275 if (i == 0)
1276 break;
1277 e = s1->cached_includes[i - 1];
1278 if (0 == PATHCMP(e->filename, filename))
1279 return e;
1280 i = e->hash_next;
1282 return NULL;
1285 static inline void add_cached_include(TCCState *s1, const char *filename, int ifndef_macro)
1287 CachedInclude *e;
1288 int h;
1290 if (search_cached_include(s1, filename))
1291 return;
1292 #ifdef INC_DEBUG
1293 printf("adding cached '%s' %s\n", filename, get_tok_str(ifndef_macro, NULL));
1294 #endif
1295 e = tcc_malloc(sizeof(CachedInclude) + strlen(filename));
1296 strcpy(e->filename, filename);
1297 e->ifndef_macro = ifndef_macro;
1298 dynarray_add((void ***)&s1->cached_includes, &s1->nb_cached_includes, e);
1299 /* add in hash table */
1300 h = hash_cached_include(filename);
1301 e->hash_next = s1->cached_includes_hash[h];
1302 s1->cached_includes_hash[h] = s1->nb_cached_includes;
1305 static void pragma_parse(TCCState *s1)
1307 int val;
1309 next();
1310 if (tok == TOK_pack) {
1312 This may be:
1313 #pragma pack(1) // set
1314 #pragma pack() // reset to default
1315 #pragma pack(push,1) // push & set
1316 #pragma pack(pop) // restore previous
1318 next();
1319 skip('(');
1320 if (tok == TOK_ASM_pop) {
1321 next();
1322 if (s1->pack_stack_ptr <= s1->pack_stack) {
1323 stk_error:
1324 tcc_error("out of pack stack");
1326 s1->pack_stack_ptr--;
1327 } else {
1328 val = 0;
1329 if (tok != ')') {
1330 if (tok == TOK_ASM_push) {
1331 next();
1332 if (s1->pack_stack_ptr >= s1->pack_stack + PACK_STACK_SIZE - 1)
1333 goto stk_error;
1334 s1->pack_stack_ptr++;
1335 skip(',');
1337 if (tok != TOK_CINT) {
1338 pack_error:
1339 tcc_error("invalid pack pragma");
1341 val = tokc.i;
1342 if (val < 1 || val > 16 || (val & (val - 1)) != 0)
1343 goto pack_error;
1344 next();
1346 *s1->pack_stack_ptr = val;
1347 skip(')');
1352 /* is_bof is true if first non space token at beginning of file */
1353 ST_FUNC void preprocess(int is_bof)
1355 TCCState *s1 = tcc_state;
1356 int i, c, n, saved_parse_flags;
1357 char buf[1024], *q;
1358 Sym *s;
1360 saved_parse_flags = parse_flags;
1361 parse_flags = PARSE_FLAG_PREPROCESS | PARSE_FLAG_TOK_NUM |
1362 PARSE_FLAG_LINEFEED;
1363 next_nomacro();
1364 redo:
1365 switch(tok) {
1366 case TOK_DEFINE:
1367 next_nomacro();
1368 parse_define();
1369 break;
1370 case TOK_UNDEF:
1371 next_nomacro();
1372 s = define_find(tok);
1373 /* undefine symbol by putting an invalid name */
1374 if (s)
1375 define_undef(s);
1376 break;
1377 case TOK_INCLUDE:
1378 case TOK_INCLUDE_NEXT:
1379 ch = file->buf_ptr[0];
1380 /* XXX: incorrect if comments : use next_nomacro with a special mode */
1381 skip_spaces();
1382 if (ch == '<') {
1383 c = '>';
1384 goto read_name;
1385 } else if (ch == '\"') {
1386 c = ch;
1387 read_name:
1388 inp();
1389 q = buf;
1390 while (ch != c && ch != '\n' && ch != CH_EOF) {
1391 if ((q - buf) < sizeof(buf) - 1)
1392 *q++ = ch;
1393 if (ch == '\\') {
1394 if (handle_stray_noerror() == 0)
1395 --q;
1396 } else
1397 inp();
1399 *q = '\0';
1400 minp();
1401 #if 0
1402 /* eat all spaces and comments after include */
1403 /* XXX: slightly incorrect */
1404 while (ch1 != '\n' && ch1 != CH_EOF)
1405 inp();
1406 #endif
1407 } else {
1408 /* computed #include : either we have only strings or
1409 we have anything enclosed in '<>' */
1410 next();
1411 buf[0] = '\0';
1412 if (tok == TOK_STR) {
1413 while (tok != TOK_LINEFEED) {
1414 if (tok != TOK_STR) {
1415 include_syntax:
1416 tcc_error("'#include' expects \"FILENAME\" or <FILENAME>");
1418 pstrcat(buf, sizeof(buf), (char *)tokc.cstr->data);
1419 next();
1421 c = '\"';
1422 } else {
1423 int len;
1424 while (tok != TOK_LINEFEED) {
1425 pstrcat(buf, sizeof(buf), get_tok_str(tok, &tokc));
1426 next();
1428 len = strlen(buf);
1429 /* check syntax and remove '<>' */
1430 if (len < 2 || buf[0] != '<' || buf[len - 1] != '>')
1431 goto include_syntax;
1432 memmove(buf, buf + 1, len - 2);
1433 buf[len - 2] = '\0';
1434 c = '>';
1438 if (s1->include_stack_ptr >= s1->include_stack + INCLUDE_STACK_SIZE)
1439 tcc_error("#include recursion too deep");
1440 /* store current file in stack, but increment stack later below */
1441 *s1->include_stack_ptr = file;
1443 n = s1->nb_include_paths + s1->nb_sysinclude_paths;
1444 for (i = -2; i < n; ++i) {
1445 char buf1[sizeof file->filename];
1446 CachedInclude *e;
1447 BufferedFile **f;
1448 const char *path;
1450 if (i == -2) {
1451 /* check absolute include path */
1452 if (!IS_ABSPATH(buf))
1453 continue;
1454 buf1[0] = 0;
1455 i = n; /* force end loop */
1457 } else if (i == -1) {
1458 /* search in current dir if "header.h" */
1459 if (c != '\"')
1460 continue;
1461 path = file->filename;
1462 pstrncpy(buf1, path, tcc_basename(path) - path);
1464 } else {
1465 /* search in all the include paths */
1466 if (i < s1->nb_include_paths)
1467 path = s1->include_paths[i];
1468 else
1469 path = s1->sysinclude_paths[i - s1->nb_include_paths];
1470 pstrcpy(buf1, sizeof(buf1), path);
1471 pstrcat(buf1, sizeof(buf1), "/");
1474 pstrcat(buf1, sizeof(buf1), buf);
1476 if (tok == TOK_INCLUDE_NEXT)
1477 for (f = s1->include_stack_ptr; f >= s1->include_stack; --f)
1478 if (0 == PATHCMP((*f)->filename, buf1)) {
1479 #ifdef INC_DEBUG
1480 printf("%s: #include_next skipping %s\n", file->filename, buf1);
1481 #endif
1482 goto include_trynext;
1485 e = search_cached_include(s1, buf1);
1486 if (e && define_find(e->ifndef_macro)) {
1487 /* no need to parse the include because the 'ifndef macro'
1488 is defined */
1489 #ifdef INC_DEBUG
1490 printf("%s: skipping cached %s\n", file->filename, buf1);
1491 #endif
1492 goto include_done;
1495 if (tcc_open(s1, buf1) < 0)
1496 include_trynext:
1497 continue;
1499 #ifdef INC_DEBUG
1500 printf("%s: including %s\n", file->prev->filename, file->filename);
1501 #endif
1502 /* update target deps */
1503 dynarray_add((void ***)&s1->target_deps, &s1->nb_target_deps,
1504 tcc_strdup(buf1));
1505 /* push current file in stack */
1506 ++s1->include_stack_ptr;
1507 /* add include file debug info */
1508 if (s1->do_debug)
1509 put_stabs(file->filename, N_BINCL, 0, 0, 0);
1510 tok_flags |= TOK_FLAG_BOF | TOK_FLAG_BOL;
1511 ch = file->buf_ptr[0];
1512 goto the_end;
1514 tcc_error("include file '%s' not found", buf);
1515 include_done:
1516 break;
1517 case TOK_IFNDEF:
1518 c = 1;
1519 goto do_ifdef;
1520 case TOK_IF:
1521 c = expr_preprocess();
1522 goto do_if;
1523 case TOK_IFDEF:
1524 c = 0;
1525 do_ifdef:
1526 next_nomacro();
1527 if (tok < TOK_IDENT)
1528 tcc_error("invalid argument for '#if%sdef'", c ? "n" : "");
1529 if (is_bof) {
1530 if (c) {
1531 #ifdef INC_DEBUG
1532 printf("#ifndef %s\n", get_tok_str(tok, NULL));
1533 #endif
1534 file->ifndef_macro = tok;
1537 c = (define_find(tok) != 0) ^ c;
1538 do_if:
1539 if (s1->ifdef_stack_ptr >= s1->ifdef_stack + IFDEF_STACK_SIZE)
1540 tcc_error("memory full (ifdef)");
1541 *s1->ifdef_stack_ptr++ = c;
1542 goto test_skip;
1543 case TOK_ELSE:
1544 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1545 tcc_error("#else without matching #if");
1546 if (s1->ifdef_stack_ptr[-1] & 2)
1547 tcc_error("#else after #else");
1548 c = (s1->ifdef_stack_ptr[-1] ^= 3);
1549 goto test_else;
1550 case TOK_ELIF:
1551 if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1552 tcc_error("#elif without matching #if");
1553 c = s1->ifdef_stack_ptr[-1];
1554 if (c > 1)
1555 tcc_error("#elif after #else");
1556 /* last #if/#elif expression was true: we skip */
1557 if (c == 1)
1558 goto skip;
1559 c = expr_preprocess();
1560 s1->ifdef_stack_ptr[-1] = c;
1561 test_else:
1562 if (s1->ifdef_stack_ptr == file->ifdef_stack_ptr + 1)
1563 file->ifndef_macro = 0;
1564 test_skip:
1565 if (!(c & 1)) {
1566 skip:
1567 preprocess_skip();
1568 is_bof = 0;
1569 goto redo;
1571 break;
1572 case TOK_ENDIF:
1573 if (s1->ifdef_stack_ptr <= file->ifdef_stack_ptr)
1574 tcc_error("#endif without matching #if");
1575 s1->ifdef_stack_ptr--;
1576 /* '#ifndef macro' was at the start of file. Now we check if
1577 an '#endif' is exactly at the end of file */
1578 if (file->ifndef_macro &&
1579 s1->ifdef_stack_ptr == file->ifdef_stack_ptr) {
1580 file->ifndef_macro_saved = file->ifndef_macro;
1581 /* need to set to zero to avoid false matches if another
1582 #ifndef at middle of file */
1583 file->ifndef_macro = 0;
1584 while (tok != TOK_LINEFEED)
1585 next_nomacro();
1586 tok_flags |= TOK_FLAG_ENDIF;
1587 goto the_end;
1589 break;
1590 case TOK_LINE:
1591 next();
1592 if (tok != TOK_CINT)
1593 tcc_error("#line");
1594 file->line_num = tokc.i - 1; /* the line number will be incremented after */
1595 next();
1596 if (tok != TOK_LINEFEED) {
1597 if (tok != TOK_STR)
1598 tcc_error("#line");
1599 pstrcpy(file->filename, sizeof(file->filename),
1600 (char *)tokc.cstr->data);
1602 break;
1603 case TOK_ERROR:
1604 case TOK_WARNING:
1605 c = tok;
1606 ch = file->buf_ptr[0];
1607 skip_spaces();
1608 q = buf;
1609 while (ch != '\n' && ch != CH_EOF) {
1610 if ((q - buf) < sizeof(buf) - 1)
1611 *q++ = ch;
1612 if (ch == '\\') {
1613 if (handle_stray_noerror() == 0)
1614 --q;
1615 } else
1616 inp();
1618 *q = '\0';
1619 if (c == TOK_ERROR)
1620 tcc_error("#error %s", buf);
1621 else
1622 tcc_warning("#warning %s", buf);
1623 break;
1624 case TOK_PRAGMA:
1625 pragma_parse(s1);
1626 break;
1627 default:
1628 if (tok == TOK_LINEFEED || tok == '!' || tok == TOK_PPNUM) {
1629 /* '!' is ignored to allow C scripts. numbers are ignored
1630 to emulate cpp behaviour */
1631 } else {
1632 if (!(saved_parse_flags & PARSE_FLAG_ASM_COMMENTS))
1633 tcc_warning("Ignoring unknown preprocessing directive #%s", get_tok_str(tok, &tokc));
1634 else {
1635 /* this is a gas line comment in an 'S' file. */
1636 file->buf_ptr = parse_line_comment(file->buf_ptr);
1637 goto the_end;
1640 break;
1642 /* ignore other preprocess commands or #! for C scripts */
1643 while (tok != TOK_LINEFEED)
1644 next_nomacro();
1645 the_end:
1646 parse_flags = saved_parse_flags;
1649 /* evaluate escape codes in a string. */
1650 static void parse_escape_string(CString *outstr, const uint8_t *buf, int is_long)
1652 int c, n;
1653 const uint8_t *p;
1655 p = buf;
1656 for(;;) {
1657 c = *p;
1658 if (c == '\0')
1659 break;
1660 if (c == '\\') {
1661 p++;
1662 /* escape */
1663 c = *p;
1664 switch(c) {
1665 case '0': case '1': case '2': case '3':
1666 case '4': case '5': case '6': case '7':
1667 /* at most three octal digits */
1668 n = c - '0';
1669 p++;
1670 c = *p;
1671 if (isoct(c)) {
1672 n = n * 8 + c - '0';
1673 p++;
1674 c = *p;
1675 if (isoct(c)) {
1676 n = n * 8 + c - '0';
1677 p++;
1680 c = n;
1681 goto add_char_nonext;
1682 case 'x':
1683 case 'u':
1684 case 'U':
1685 p++;
1686 n = 0;
1687 for(;;) {
1688 c = *p;
1689 if (c >= 'a' && c <= 'f')
1690 c = c - 'a' + 10;
1691 else if (c >= 'A' && c <= 'F')
1692 c = c - 'A' + 10;
1693 else if (isnum(c))
1694 c = c - '0';
1695 else
1696 break;
1697 n = n * 16 + c;
1698 p++;
1700 c = n;
1701 goto add_char_nonext;
1702 case 'a':
1703 c = '\a';
1704 break;
1705 case 'b':
1706 c = '\b';
1707 break;
1708 case 'f':
1709 c = '\f';
1710 break;
1711 case 'n':
1712 c = '\n';
1713 break;
1714 case 'r':
1715 c = '\r';
1716 break;
1717 case 't':
1718 c = '\t';
1719 break;
1720 case 'v':
1721 c = '\v';
1722 break;
1723 case 'e':
1724 if (!gnu_ext)
1725 goto invalid_escape;
1726 c = 27;
1727 break;
1728 case '\'':
1729 case '\"':
1730 case '\\':
1731 case '?':
1732 break;
1733 default:
1734 invalid_escape:
1735 if (c >= '!' && c <= '~')
1736 tcc_warning("unknown escape sequence: \'\\%c\'", c);
1737 else
1738 tcc_warning("unknown escape sequence: \'\\x%x\'", c);
1739 break;
1742 p++;
1743 add_char_nonext:
1744 if (!is_long)
1745 cstr_ccat(outstr, c);
1746 else
1747 cstr_wccat(outstr, c);
1749 /* add a trailing '\0' */
1750 if (!is_long)
1751 cstr_ccat(outstr, '\0');
1752 else
1753 cstr_wccat(outstr, '\0');
1756 /* we use 64 bit numbers */
1757 #define BN_SIZE 2
1759 /* bn = (bn << shift) | or_val */
1760 static void bn_lshift(unsigned int *bn, int shift, int or_val)
1762 int i;
1763 unsigned int v;
1764 for(i=0;i<BN_SIZE;i++) {
1765 v = bn[i];
1766 bn[i] = (v << shift) | or_val;
1767 or_val = v >> (32 - shift);
1771 static void bn_zero(unsigned int *bn)
1773 int i;
1774 for(i=0;i<BN_SIZE;i++) {
1775 bn[i] = 0;
1779 /* parse number in null terminated string 'p' and return it in the
1780 current token */
1781 static void parse_number(const char *p)
1783 int b, t, shift, frac_bits, s, exp_val, ch;
1784 char *q;
1785 unsigned int bn[BN_SIZE];
1786 double d;
1788 /* number */
1789 q = token_buf;
1790 ch = *p++;
1791 t = ch;
1792 ch = *p++;
1793 *q++ = t;
1794 b = 10;
1795 if (t == '.') {
1796 goto float_frac_parse;
1797 } else if (t == '0') {
1798 if (ch == 'x' || ch == 'X') {
1799 q--;
1800 ch = *p++;
1801 b = 16;
1802 } else if (tcc_ext && (ch == 'b' || ch == 'B')) {
1803 q--;
1804 ch = *p++;
1805 b = 2;
1808 /* parse all digits. cannot check octal numbers at this stage
1809 because of floating point constants */
1810 while (1) {
1811 if (ch >= 'a' && ch <= 'f')
1812 t = ch - 'a' + 10;
1813 else if (ch >= 'A' && ch <= 'F')
1814 t = ch - 'A' + 10;
1815 else if (isnum(ch))
1816 t = ch - '0';
1817 else
1818 break;
1819 if (t >= b)
1820 break;
1821 if (q >= token_buf + STRING_MAX_SIZE) {
1822 num_too_long:
1823 tcc_error("number too long");
1825 *q++ = ch;
1826 ch = *p++;
1828 if (ch == '.' ||
1829 ((ch == 'e' || ch == 'E') && b == 10) ||
1830 ((ch == 'p' || ch == 'P') && (b == 16 || b == 2))) {
1831 if (b != 10) {
1832 /* NOTE: strtox should support that for hexa numbers, but
1833 non ISOC99 libcs do not support it, so we prefer to do
1834 it by hand */
1835 /* hexadecimal or binary floats */
1836 /* XXX: handle overflows */
1837 *q = '\0';
1838 if (b == 16)
1839 shift = 4;
1840 else
1841 shift = 2;
1842 bn_zero(bn);
1843 q = token_buf;
1844 while (1) {
1845 t = *q++;
1846 if (t == '\0') {
1847 break;
1848 } else if (t >= 'a') {
1849 t = t - 'a' + 10;
1850 } else if (t >= 'A') {
1851 t = t - 'A' + 10;
1852 } else {
1853 t = t - '0';
1855 bn_lshift(bn, shift, t);
1857 frac_bits = 0;
1858 if (ch == '.') {
1859 ch = *p++;
1860 while (1) {
1861 t = ch;
1862 if (t >= 'a' && t <= 'f') {
1863 t = t - 'a' + 10;
1864 } else if (t >= 'A' && t <= 'F') {
1865 t = t - 'A' + 10;
1866 } else if (t >= '0' && t <= '9') {
1867 t = t - '0';
1868 } else {
1869 break;
1871 if (t >= b)
1872 tcc_error("invalid digit");
1873 bn_lshift(bn, shift, t);
1874 frac_bits += shift;
1875 ch = *p++;
1878 if (ch != 'p' && ch != 'P')
1879 expect("exponent");
1880 ch = *p++;
1881 s = 1;
1882 exp_val = 0;
1883 if (ch == '+') {
1884 ch = *p++;
1885 } else if (ch == '-') {
1886 s = -1;
1887 ch = *p++;
1889 if (ch < '0' || ch > '9')
1890 expect("exponent digits");
1891 while (ch >= '0' && ch <= '9') {
1892 exp_val = exp_val * 10 + ch - '0';
1893 ch = *p++;
1895 exp_val = exp_val * s;
1897 /* now we can generate the number */
1898 /* XXX: should patch directly float number */
1899 d = (double)bn[1] * 4294967296.0 + (double)bn[0];
1900 d = ldexp(d, exp_val - frac_bits);
1901 t = toup(ch);
1902 if (t == 'F') {
1903 ch = *p++;
1904 tok = TOK_CFLOAT;
1905 /* float : should handle overflow */
1906 tokc.f = (float)d;
1907 } else if (t == 'L') {
1908 ch = *p++;
1909 #ifdef TCC_TARGET_PE
1910 tok = TOK_CDOUBLE;
1911 tokc.d = d;
1912 #else
1913 tok = TOK_CLDOUBLE;
1914 /* XXX: not large enough */
1915 tokc.ld = (long double)d;
1916 #endif
1917 } else {
1918 tok = TOK_CDOUBLE;
1919 tokc.d = d;
1921 } else {
1922 /* decimal floats */
1923 if (ch == '.') {
1924 if (q >= token_buf + STRING_MAX_SIZE)
1925 goto num_too_long;
1926 *q++ = ch;
1927 ch = *p++;
1928 float_frac_parse:
1929 while (ch >= '0' && ch <= '9') {
1930 if (q >= token_buf + STRING_MAX_SIZE)
1931 goto num_too_long;
1932 *q++ = ch;
1933 ch = *p++;
1936 if (ch == 'e' || ch == 'E') {
1937 if (q >= token_buf + STRING_MAX_SIZE)
1938 goto num_too_long;
1939 *q++ = ch;
1940 ch = *p++;
1941 if (ch == '-' || ch == '+') {
1942 if (q >= token_buf + STRING_MAX_SIZE)
1943 goto num_too_long;
1944 *q++ = ch;
1945 ch = *p++;
1947 if (ch < '0' || ch > '9')
1948 expect("exponent digits");
1949 while (ch >= '0' && ch <= '9') {
1950 if (q >= token_buf + STRING_MAX_SIZE)
1951 goto num_too_long;
1952 *q++ = ch;
1953 ch = *p++;
1956 *q = '\0';
1957 t = toup(ch);
1958 errno = 0;
1959 if (t == 'F') {
1960 ch = *p++;
1961 tok = TOK_CFLOAT;
1962 tokc.f = strtof(token_buf, NULL);
1963 } else if (t == 'L') {
1964 ch = *p++;
1965 #ifdef TCC_TARGET_PE
1966 tok = TOK_CDOUBLE;
1967 tokc.d = strtod(token_buf, NULL);
1968 #else
1969 tok = TOK_CLDOUBLE;
1970 tokc.ld = strtold(token_buf, NULL);
1971 #endif
1972 } else {
1973 tok = TOK_CDOUBLE;
1974 tokc.d = strtod(token_buf, NULL);
1977 } else {
1978 unsigned long long n, n1;
1979 int lcount, ucount;
1981 /* integer number */
1982 *q = '\0';
1983 q = token_buf;
1984 if (b == 10 && *q == '0') {
1985 b = 8;
1986 q++;
1988 n = 0;
1989 while(1) {
1990 t = *q++;
1991 /* no need for checks except for base 10 / 8 errors */
1992 if (t == '\0') {
1993 break;
1994 } else if (t >= 'a') {
1995 t = t - 'a' + 10;
1996 } else if (t >= 'A') {
1997 t = t - 'A' + 10;
1998 } else {
1999 t = t - '0';
2000 if (t >= b)
2001 tcc_error("invalid digit");
2003 n1 = n;
2004 n = n * b + t;
2005 /* detect overflow */
2006 /* XXX: this test is not reliable */
2007 if (n < n1)
2008 tcc_error("integer constant overflow");
2011 /* XXX: not exactly ANSI compliant */
2012 if ((n & 0xffffffff00000000LL) != 0) {
2013 if ((n >> 63) != 0)
2014 tok = TOK_CULLONG;
2015 else
2016 tok = TOK_CLLONG;
2017 } else if (n > 0x7fffffff) {
2018 tok = TOK_CUINT;
2019 } else {
2020 tok = TOK_CINT;
2022 lcount = 0;
2023 ucount = 0;
2024 for(;;) {
2025 t = toup(ch);
2026 if (t == 'L') {
2027 if (lcount >= 2)
2028 tcc_error("three 'l's in integer constant");
2029 lcount++;
2030 #if !defined TCC_TARGET_X86_64 || defined TCC_TARGET_PE
2031 if (lcount == 2) {
2032 #endif
2033 if (tok == TOK_CINT)
2034 tok = TOK_CLLONG;
2035 else if (tok == TOK_CUINT)
2036 tok = TOK_CULLONG;
2037 #if !defined TCC_TARGET_X86_64 || defined TCC_TARGET_PE
2039 #endif
2040 ch = *p++;
2041 } else if (t == 'U') {
2042 if (ucount >= 1)
2043 tcc_error("two 'u's in integer constant");
2044 ucount++;
2045 if (tok == TOK_CINT)
2046 tok = TOK_CUINT;
2047 else if (tok == TOK_CLLONG)
2048 tok = TOK_CULLONG;
2049 ch = *p++;
2050 } else {
2051 break;
2054 if (tok == TOK_CINT || tok == TOK_CUINT)
2055 tokc.ui = n;
2056 else
2057 tokc.ull = n;
2059 if (ch)
2060 tcc_error("invalid number\n");
2064 #define PARSE2(c1, tok1, c2, tok2) \
2065 case c1: \
2066 PEEKC(c, p); \
2067 if (c == c2) { \
2068 p++; \
2069 tok = tok2; \
2070 } else { \
2071 tok = tok1; \
2073 break;
2075 /* return next token without macro substitution */
2076 static inline void next_nomacro1(void)
2078 int t, c, is_long;
2079 TokenSym *ts;
2080 uint8_t *p, *p1;
2081 unsigned int h;
2083 p = file->buf_ptr;
2084 redo_no_start:
2085 c = *p;
2086 switch(c) {
2087 case ' ':
2088 case '\t':
2089 tok = c;
2090 p++;
2091 goto keep_tok_flags;
2092 case '\f':
2093 case '\v':
2094 case '\r':
2095 p++;
2096 goto redo_no_start;
2097 case '\\':
2098 /* first look if it is in fact an end of buffer */
2099 if (p >= file->buf_end) {
2100 file->buf_ptr = p;
2101 handle_eob();
2102 p = file->buf_ptr;
2103 if (p >= file->buf_end)
2104 goto parse_eof;
2105 else
2106 goto redo_no_start;
2107 } else {
2108 file->buf_ptr = p;
2109 ch = *p;
2110 handle_stray();
2111 p = file->buf_ptr;
2112 goto redo_no_start;
2114 parse_eof:
2116 TCCState *s1 = tcc_state;
2117 if ((parse_flags & PARSE_FLAG_LINEFEED)
2118 && !(tok_flags & TOK_FLAG_EOF)) {
2119 tok_flags |= TOK_FLAG_EOF;
2120 tok = TOK_LINEFEED;
2121 goto keep_tok_flags;
2122 } else if (!(parse_flags & PARSE_FLAG_PREPROCESS)) {
2123 tok = TOK_EOF;
2124 } else if (s1->ifdef_stack_ptr != file->ifdef_stack_ptr) {
2125 tcc_error("missing #endif");
2126 } else if (s1->include_stack_ptr == s1->include_stack) {
2127 /* no include left : end of file. */
2128 tok = TOK_EOF;
2129 } else {
2130 tok_flags &= ~TOK_FLAG_EOF;
2131 /* pop include file */
2133 /* test if previous '#endif' was after a #ifdef at
2134 start of file */
2135 if (tok_flags & TOK_FLAG_ENDIF) {
2136 #ifdef INC_DEBUG
2137 printf("#endif %s\n", get_tok_str(file->ifndef_macro_saved, NULL));
2138 #endif
2139 add_cached_include(s1, file->filename, file->ifndef_macro_saved);
2140 tok_flags &= ~TOK_FLAG_ENDIF;
2143 /* add end of include file debug info */
2144 if (tcc_state->do_debug) {
2145 put_stabd(N_EINCL, 0, 0);
2147 /* pop include stack */
2148 tcc_close();
2149 s1->include_stack_ptr--;
2150 p = file->buf_ptr;
2151 goto redo_no_start;
2154 break;
2156 case '\n':
2157 file->line_num++;
2158 tok_flags |= TOK_FLAG_BOL;
2159 p++;
2160 maybe_newline:
2161 if (0 == (parse_flags & PARSE_FLAG_LINEFEED))
2162 goto redo_no_start;
2163 tok = TOK_LINEFEED;
2164 goto keep_tok_flags;
2166 case '#':
2167 /* XXX: simplify */
2168 PEEKC(c, p);
2169 if ((tok_flags & TOK_FLAG_BOL) &&
2170 (parse_flags & PARSE_FLAG_PREPROCESS)) {
2171 file->buf_ptr = p;
2172 preprocess(tok_flags & TOK_FLAG_BOF);
2173 p = file->buf_ptr;
2174 goto maybe_newline;
2175 } else {
2176 if (c == '#') {
2177 p++;
2178 tok = TOK_TWOSHARPS;
2179 } else {
2180 if (parse_flags & PARSE_FLAG_ASM_COMMENTS) {
2181 p = parse_line_comment(p - 1);
2182 goto redo_no_start;
2183 } else {
2184 tok = '#';
2188 break;
2190 case 'a': case 'b': case 'c': case 'd':
2191 case 'e': case 'f': case 'g': case 'h':
2192 case 'i': case 'j': case 'k': case 'l':
2193 case 'm': case 'n': case 'o': case 'p':
2194 case 'q': case 'r': case 's': case 't':
2195 case 'u': case 'v': case 'w': case 'x':
2196 case 'y': case 'z':
2197 case 'A': case 'B': case 'C': case 'D':
2198 case 'E': case 'F': case 'G': case 'H':
2199 case 'I': case 'J': case 'K':
2200 case 'M': case 'N': case 'O': case 'P':
2201 case 'Q': case 'R': case 'S': case 'T':
2202 case 'U': case 'V': case 'W': case 'X':
2203 case 'Y': case 'Z':
2204 case '_':
2205 parse_ident_fast:
2206 p1 = p;
2207 h = TOK_HASH_INIT;
2208 h = TOK_HASH_FUNC(h, c);
2209 p++;
2210 for(;;) {
2211 c = *p;
2212 if (!isidnum_table[c-CH_EOF])
2213 break;
2214 h = TOK_HASH_FUNC(h, c);
2215 p++;
2217 if (c != '\\') {
2218 TokenSym **pts;
2219 int len;
2221 /* fast case : no stray found, so we have the full token
2222 and we have already hashed it */
2223 len = p - p1;
2224 h &= (TOK_HASH_SIZE - 1);
2225 pts = &hash_ident[h];
2226 for(;;) {
2227 ts = *pts;
2228 if (!ts)
2229 break;
2230 if (ts->len == len && !memcmp(ts->str, p1, len))
2231 goto token_found;
2232 pts = &(ts->hash_next);
2234 ts = tok_alloc_new(pts, (char *) p1, len);
2235 token_found: ;
2236 } else {
2237 /* slower case */
2238 cstr_reset(&tokcstr);
2240 while (p1 < p) {
2241 cstr_ccat(&tokcstr, *p1);
2242 p1++;
2244 p--;
2245 PEEKC(c, p);
2246 parse_ident_slow:
2247 while (isidnum_table[c-CH_EOF]) {
2248 cstr_ccat(&tokcstr, c);
2249 PEEKC(c, p);
2251 ts = tok_alloc(tokcstr.data, tokcstr.size);
2253 tok = ts->tok;
2254 break;
2255 case 'L':
2256 t = p[1];
2257 if (t != '\\' && t != '\'' && t != '\"') {
2258 /* fast case */
2259 goto parse_ident_fast;
2260 } else {
2261 PEEKC(c, p);
2262 if (c == '\'' || c == '\"') {
2263 is_long = 1;
2264 goto str_const;
2265 } else {
2266 cstr_reset(&tokcstr);
2267 cstr_ccat(&tokcstr, 'L');
2268 goto parse_ident_slow;
2271 break;
2272 case '0': case '1': case '2': case '3':
2273 case '4': case '5': case '6': case '7':
2274 case '8': case '9':
2276 cstr_reset(&tokcstr);
2277 /* after the first digit, accept digits, alpha, '.' or sign if
2278 prefixed by 'eEpP' */
2279 parse_num:
2280 for(;;) {
2281 t = c;
2282 cstr_ccat(&tokcstr, c);
2283 PEEKC(c, p);
2284 if (!(isnum(c) || isid(c) || c == '.' ||
2285 ((c == '+' || c == '-') &&
2286 (t == 'e' || t == 'E' || t == 'p' || t == 'P'))))
2287 break;
2289 /* We add a trailing '\0' to ease parsing */
2290 cstr_ccat(&tokcstr, '\0');
2291 tokc.cstr = &tokcstr;
2292 tok = TOK_PPNUM;
2293 break;
2294 case '.':
2295 /* special dot handling because it can also start a number */
2296 PEEKC(c, p);
2297 if (isnum(c)) {
2298 cstr_reset(&tokcstr);
2299 cstr_ccat(&tokcstr, '.');
2300 goto parse_num;
2301 } else if (c == '.') {
2302 PEEKC(c, p);
2303 if (c != '.')
2304 expect("'.'");
2305 PEEKC(c, p);
2306 tok = TOK_DOTS;
2307 } else {
2308 tok = '.';
2310 break;
2311 case '\'':
2312 case '\"':
2313 is_long = 0;
2314 str_const:
2316 CString str;
2317 int sep;
2319 sep = c;
2321 /* parse the string */
2322 cstr_new(&str);
2323 p = parse_pp_string(p, sep, &str);
2324 cstr_ccat(&str, '\0');
2326 /* eval the escape (should be done as TOK_PPNUM) */
2327 cstr_reset(&tokcstr);
2328 parse_escape_string(&tokcstr, str.data, is_long);
2329 cstr_free(&str);
2331 if (sep == '\'') {
2332 int char_size;
2333 /* XXX: make it portable */
2334 if (!is_long)
2335 char_size = 1;
2336 else
2337 char_size = sizeof(nwchar_t);
2338 if (tokcstr.size <= char_size)
2339 tcc_error("empty character constant");
2340 if (tokcstr.size > 2 * char_size)
2341 tcc_warning("multi-character character constant");
2342 if (!is_long) {
2343 tokc.i = *(int8_t *)tokcstr.data;
2344 tok = TOK_CCHAR;
2345 } else {
2346 tokc.i = *(nwchar_t *)tokcstr.data;
2347 tok = TOK_LCHAR;
2349 } else {
2350 tokc.cstr = &tokcstr;
2351 if (!is_long)
2352 tok = TOK_STR;
2353 else
2354 tok = TOK_LSTR;
2357 break;
2359 case '<':
2360 PEEKC(c, p);
2361 if (c == '=') {
2362 p++;
2363 tok = TOK_LE;
2364 } else if (c == '<') {
2365 PEEKC(c, p);
2366 if (c == '=') {
2367 p++;
2368 tok = TOK_A_SHL;
2369 } else {
2370 tok = TOK_SHL;
2372 } else {
2373 tok = TOK_LT;
2375 break;
2377 case '>':
2378 PEEKC(c, p);
2379 if (c == '=') {
2380 p++;
2381 tok = TOK_GE;
2382 } else if (c == '>') {
2383 PEEKC(c, p);
2384 if (c == '=') {
2385 p++;
2386 tok = TOK_A_SAR;
2387 } else {
2388 tok = TOK_SAR;
2390 } else {
2391 tok = TOK_GT;
2393 break;
2395 case '&':
2396 PEEKC(c, p);
2397 if (c == '&') {
2398 p++;
2399 tok = TOK_LAND;
2400 } else if (c == '=') {
2401 p++;
2402 tok = TOK_A_AND;
2403 } else {
2404 tok = '&';
2406 break;
2408 case '|':
2409 PEEKC(c, p);
2410 if (c == '|') {
2411 p++;
2412 tok = TOK_LOR;
2413 } else if (c == '=') {
2414 p++;
2415 tok = TOK_A_OR;
2416 } else {
2417 tok = '|';
2419 break;
2421 case '+':
2422 PEEKC(c, p);
2423 if (c == '+') {
2424 p++;
2425 tok = TOK_INC;
2426 } else if (c == '=') {
2427 p++;
2428 tok = TOK_A_ADD;
2429 } else {
2430 tok = '+';
2432 break;
2434 case '-':
2435 PEEKC(c, p);
2436 if (c == '-') {
2437 p++;
2438 tok = TOK_DEC;
2439 } else if (c == '=') {
2440 p++;
2441 tok = TOK_A_SUB;
2442 } else if (c == '>') {
2443 p++;
2444 tok = TOK_ARROW;
2445 } else {
2446 tok = '-';
2448 break;
2450 PARSE2('!', '!', '=', TOK_NE)
2451 PARSE2('=', '=', '=', TOK_EQ)
2452 PARSE2('*', '*', '=', TOK_A_MUL)
2453 PARSE2('%', '%', '=', TOK_A_MOD)
2454 PARSE2('^', '^', '=', TOK_A_XOR)
2456 /* comments or operator */
2457 case '/':
2458 PEEKC(c, p);
2459 if (c == '*') {
2460 p = parse_comment(p);
2461 /* comments replaced by a blank */
2462 tok = ' ';
2463 goto keep_tok_flags;
2464 } else if (c == '/') {
2465 p = parse_line_comment(p);
2466 tok = ' ';
2467 goto keep_tok_flags;
2468 } else if (c == '=') {
2469 p++;
2470 tok = TOK_A_DIV;
2471 } else {
2472 tok = '/';
2474 break;
2476 /* simple tokens */
2477 case '(':
2478 case ')':
2479 case '[':
2480 case ']':
2481 case '{':
2482 case '}':
2483 case ',':
2484 case ';':
2485 case ':':
2486 case '?':
2487 case '~':
2488 case '$': /* only used in assembler */
2489 case '@': /* dito */
2490 tok = c;
2491 p++;
2492 break;
2493 default:
2494 tcc_error("unrecognized character \\x%02x", c);
2495 break;
2497 tok_flags = 0;
2498 keep_tok_flags:
2499 file->buf_ptr = p;
2500 #if defined(PARSE_DEBUG)
2501 printf("token = %s\n", get_tok_str(tok, &tokc));
2502 #endif
2505 /* return next token without macro substitution. Can read input from
2506 macro_ptr buffer */
2507 static void next_nomacro_spc(void)
2509 if (macro_ptr) {
2510 redo:
2511 tok = *macro_ptr;
2512 if (tok) {
2513 TOK_GET(&tok, &macro_ptr, &tokc);
2514 if (tok == TOK_LINENUM) {
2515 file->line_num = tokc.i;
2516 goto redo;
2519 } else {
2520 next_nomacro1();
2524 ST_FUNC void next_nomacro(void)
2526 do {
2527 next_nomacro_spc();
2528 } while (is_space(tok));
2531 /* substitute args in macro_str and return allocated string */
2532 static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args)
2534 int last_tok, t, spc;
2535 const int *st;
2536 Sym *s;
2537 TokenString str;
2538 CString cstr;
2539 CValue cval;
2540 memset(&cval, 0, sizeof(CValue));
2542 tok_str_new(&str);
2543 last_tok = 0;
2544 while(1) {
2545 TOK_GET(&t, &macro_str, &cval);
2546 if (!t)
2547 break;
2548 if (t == '#') {
2549 /* stringize */
2550 TOK_GET(&t, &macro_str, &cval);
2551 if (!t)
2552 break;
2553 s = sym_find2(args, t);
2554 if (s) {
2555 cstr_new(&cstr);
2556 st = s->d;
2557 spc = 0;
2558 while (*st) {
2559 TOK_GET(&t, &st, &cval);
2560 if (!check_space(t, &spc))
2561 cstr_cat(&cstr, get_tok_str(t, &cval));
2563 cstr.size -= spc;
2564 cstr_ccat(&cstr, '\0');
2565 #ifdef PP_DEBUG
2566 printf("stringize: %s\n", (char *)cstr.data);
2567 #endif
2568 /* add string */
2569 cval.cstr = &cstr;
2570 tok_str_add2(&str, TOK_STR, &cval);
2571 cstr_free(&cstr);
2572 } else {
2573 tok_str_add2(&str, t, &cval);
2575 } else if (t >= TOK_IDENT) {
2576 s = sym_find2(args, t);
2577 if (s) {
2578 st = s->d;
2579 /* if '##' is present before or after, no arg substitution */
2580 if (*macro_str == TOK_TWOSHARPS || last_tok == TOK_TWOSHARPS) {
2581 /* special case for var arg macros : ## eats the
2582 ',' if empty VA_ARGS variable. */
2583 /* XXX: test of the ',' is not 100%
2584 reliable. should fix it to avoid security
2585 problems */
2586 if (gnu_ext && s->type.t &&
2587 last_tok == TOK_TWOSHARPS &&
2588 str.len >= 2 && str.str[str.len - 2] == ',') {
2589 if (*st == 0) {
2590 /* suppress ',' '##' */
2591 str.len -= 2;
2592 } else {
2593 /* suppress '##' and add variable */
2594 str.len--;
2595 goto add_var;
2597 } else {
2598 int t1;
2599 add_var:
2600 for(;;) {
2601 TOK_GET(&t1, &st, &cval);
2602 if (!t1)
2603 break;
2604 tok_str_add2(&str, t1, &cval);
2607 } else {
2608 /* NOTE: the stream cannot be read when macro
2609 substituing an argument */
2610 macro_subst(&str, nested_list, st, NULL);
2612 } else {
2613 tok_str_add(&str, t);
2615 } else {
2616 tok_str_add2(&str, t, &cval);
2618 last_tok = t;
2620 tok_str_add(&str, 0);
2621 return str.str;
2624 static char const ab_month_name[12][4] =
2626 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
2627 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
2630 /* do macro substitution of current token with macro 's' and add
2631 result to (tok_str,tok_len). 'nested_list' is the list of all
2632 macros we got inside to avoid recursing. Return non zero if no
2633 substitution needs to be done */
2634 static int macro_subst_tok(TokenString *tok_str,
2635 Sym **nested_list, Sym *s, struct macro_level **can_read_stream)
2637 Sym *args, *sa, *sa1;
2638 int mstr_allocated, parlevel, *mstr, t, t1, spc;
2639 const int *p;
2640 TokenString str;
2641 char *cstrval;
2642 CString cstr;
2643 char buf[32];
2644 CValue cval;
2645 memset(&cval, 0, sizeof(CValue));
2647 /* if symbol is a macro, prepare substitution */
2648 /* special macros */
2649 if (tok == TOK___LINE__) {
2650 snprintf(buf, sizeof(buf), "%d", file->line_num);
2651 cstrval = buf;
2652 t1 = TOK_PPNUM;
2653 goto add_cstr1;
2654 } else if (tok == TOK___FILE__) {
2655 cstrval = file->filename;
2656 goto add_cstr;
2657 } else if (tok == TOK___DATE__ || tok == TOK___TIME__) {
2658 time_t ti;
2659 struct tm *tm;
2661 time(&ti);
2662 tm = localtime(&ti);
2663 if (tok == TOK___DATE__) {
2664 snprintf(buf, sizeof(buf), "%s %2d %d",
2665 ab_month_name[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900);
2666 } else {
2667 snprintf(buf, sizeof(buf), "%02d:%02d:%02d",
2668 tm->tm_hour, tm->tm_min, tm->tm_sec);
2670 cstrval = buf;
2671 add_cstr:
2672 t1 = TOK_STR;
2673 add_cstr1:
2674 cstr_new(&cstr);
2675 cstr_cat(&cstr, cstrval);
2676 cstr_ccat(&cstr, '\0');
2677 cval.cstr = &cstr;
2678 tok_str_add2(tok_str, t1, &cval);
2679 cstr_free(&cstr);
2680 } else {
2681 mstr = s->d;
2682 mstr_allocated = 0;
2683 if (s->type.t == MACRO_FUNC) {
2684 /* NOTE: we do not use next_nomacro to avoid eating the
2685 next token. XXX: find better solution */
2686 redo:
2687 if (macro_ptr) {
2688 p = macro_ptr;
2689 while (is_space(t = *p) || TOK_LINEFEED == t)
2690 ++p;
2691 if (t == 0 && can_read_stream) {
2692 /* end of macro stream: we must look at the token
2693 after in the file */
2694 struct macro_level *ml = *can_read_stream;
2695 macro_ptr = NULL;
2696 if (ml)
2698 macro_ptr = ml->p;
2699 ml->p = NULL;
2700 *can_read_stream = ml -> prev;
2702 /* also, end of scope for nested defined symbol */
2703 (*nested_list)->v = -1;
2704 goto redo;
2706 } else {
2707 ch = file->buf_ptr[0];
2708 while (is_space(ch) || ch == '\n' || ch == '/')
2710 if (ch == '/')
2712 int c;
2713 uint8_t *p = file->buf_ptr;
2714 PEEKC(c, p);
2715 if (c == '*') {
2716 p = parse_comment(p);
2717 file->buf_ptr = p - 1;
2718 } else if (c == '/') {
2719 p = parse_line_comment(p);
2720 file->buf_ptr = p - 1;
2721 } else
2722 break;
2724 cinp();
2726 t = ch;
2728 if (t != '(') /* no macro subst */
2729 return -1;
2731 /* argument macro */
2732 next_nomacro();
2733 next_nomacro();
2734 args = NULL;
2735 sa = s->next;
2736 /* NOTE: empty args are allowed, except if no args */
2737 for(;;) {
2738 /* handle '()' case */
2739 if (!args && !sa && tok == ')')
2740 break;
2741 if (!sa)
2742 tcc_error("macro '%s' used with too many args",
2743 get_tok_str(s->v, 0));
2744 tok_str_new(&str);
2745 parlevel = spc = 0;
2746 /* NOTE: non zero sa->t indicates VA_ARGS */
2747 while ((parlevel > 0 ||
2748 (tok != ')' &&
2749 (tok != ',' || sa->type.t))) &&
2750 tok != -1) {
2751 if (tok == '(')
2752 parlevel++;
2753 else if (tok == ')')
2754 parlevel--;
2755 if (tok == TOK_LINEFEED)
2756 tok = ' ';
2757 if (!check_space(tok, &spc))
2758 tok_str_add2(&str, tok, &tokc);
2759 next_nomacro_spc();
2761 str.len -= spc;
2762 tok_str_add(&str, 0);
2763 sa1 = sym_push2(&args, sa->v & ~SYM_FIELD, sa->type.t, 0);
2764 sa1->d = str.str;
2765 sa = sa->next;
2766 if (tok == ')') {
2767 /* special case for gcc var args: add an empty
2768 var arg argument if it is omitted */
2769 if (sa && sa->type.t && gnu_ext)
2770 continue;
2771 else
2772 break;
2774 if (tok != ',')
2775 expect(",");
2776 next_nomacro();
2778 if (sa) {
2779 tcc_error("macro '%s' used with too few args",
2780 get_tok_str(s->v, 0));
2783 /* now subst each arg */
2784 mstr = macro_arg_subst(nested_list, mstr, args);
2785 /* free memory */
2786 sa = args;
2787 while (sa) {
2788 sa1 = sa->prev;
2789 tok_str_free(sa->d);
2790 sym_free(sa);
2791 sa = sa1;
2793 mstr_allocated = 1;
2795 sym_push2(nested_list, s->v, 0, 0);
2796 macro_subst(tok_str, nested_list, mstr, can_read_stream);
2797 /* pop nested defined symbol */
2798 sa1 = *nested_list;
2799 *nested_list = sa1->prev;
2800 sym_free(sa1);
2801 if (mstr_allocated)
2802 tok_str_free(mstr);
2804 return 0;
2807 /* handle the '##' operator. Return NULL if no '##' seen. Otherwise
2808 return the resulting string (which must be freed). */
2809 static inline int *macro_twosharps(const int *macro_str)
2811 const int *ptr;
2812 int t;
2813 TokenString macro_str1;
2814 CString cstr;
2815 int n, start_of_nosubsts;
2817 /* we search the first '##' */
2818 for(ptr = macro_str;;) {
2819 CValue cval;
2820 memset(&cval, 0, sizeof(CValue));
2821 TOK_GET(&t, &ptr, &cval);
2822 if (t == TOK_TWOSHARPS)
2823 break;
2824 /* nothing more to do if end of string */
2825 if (t == 0)
2826 return NULL;
2829 /* we saw '##', so we need more processing to handle it */
2830 start_of_nosubsts = -1;
2831 tok_str_new(&macro_str1);
2832 for(ptr = macro_str;;) {
2833 TOK_GET(&tok, &ptr, &tokc);
2834 if (tok == 0)
2835 break;
2836 if (tok == TOK_TWOSHARPS)
2837 continue;
2838 if (tok == TOK_NOSUBST && start_of_nosubsts < 0)
2839 start_of_nosubsts = macro_str1.len;
2840 while (*ptr == TOK_TWOSHARPS) {
2841 /* given 'a##b', remove nosubsts preceding 'a' */
2842 if (start_of_nosubsts >= 0)
2843 macro_str1.len = start_of_nosubsts;
2844 /* given 'a##b', skip '##' */
2845 t = *++ptr;
2846 /* given 'a##b', remove nosubsts preceding 'b' */
2847 while (t == TOK_NOSUBST)
2848 t = *++ptr;
2849 if (t && t != TOK_TWOSHARPS) {
2850 CValue cval;
2851 memset(&cval, 0, sizeof(CValue));
2852 TOK_GET(&t, &ptr, &cval);
2853 /* We concatenate the two tokens */
2854 cstr_new(&cstr);
2855 cstr_cat(&cstr, get_tok_str(tok, &tokc));
2856 n = cstr.size;
2857 cstr_cat(&cstr, get_tok_str(t, &cval));
2858 cstr_ccat(&cstr, '\0');
2860 tcc_open_bf(tcc_state, ":paste:", cstr.size);
2861 memcpy(file->buffer, cstr.data, cstr.size);
2862 for (;;) {
2863 next_nomacro1();
2864 if (0 == *file->buf_ptr)
2865 break;
2866 tok_str_add2(&macro_str1, tok, &tokc);
2867 tcc_warning("pasting \"%.*s\" and \"%s\" does not give a valid preprocessing token",
2868 n, cstr.data, (char*)cstr.data + n);
2870 tcc_close();
2871 cstr_free(&cstr);
2874 if (tok != TOK_NOSUBST)
2875 start_of_nosubsts = -1;
2876 tok_str_add2(&macro_str1, tok, &tokc);
2878 tok_str_add(&macro_str1, 0);
2879 return macro_str1.str;
2883 /* do macro substitution of macro_str and add result to
2884 (tok_str,tok_len). 'nested_list' is the list of all macros we got
2885 inside to avoid recursing. */
2886 static void macro_subst(TokenString *tok_str, Sym **nested_list,
2887 const int *macro_str, struct macro_level ** can_read_stream)
2889 Sym *s;
2890 int *macro_str1;
2891 const int *ptr;
2892 int t, ret, spc;
2893 struct macro_level ml;
2894 int force_blank;
2895 CValue cval;
2896 memset(&cval, 0, sizeof(CValue));
2898 /* first scan for '##' operator handling */
2899 ptr = macro_str;
2900 macro_str1 = macro_twosharps(ptr);
2902 if (macro_str1)
2903 ptr = macro_str1;
2904 spc = 0;
2905 force_blank = 0;
2907 while (1) {
2908 /* NOTE: ptr == NULL can only happen if tokens are read from
2909 file stream due to a macro function call */
2910 if (ptr == NULL)
2911 break;
2912 TOK_GET(&t, &ptr, &cval);
2913 if (t == 0)
2914 break;
2915 if (t == TOK_NOSUBST) {
2916 /* following token has already been subst'd. just copy it on */
2917 tok_str_add2(tok_str, TOK_NOSUBST, NULL);
2918 TOK_GET(&t, &ptr, &cval);
2919 goto no_subst;
2921 s = define_find(t);
2922 if (s != NULL) {
2923 /* if nested substitution, do nothing */
2924 if (sym_find2(*nested_list, t)) {
2925 /* and mark it as TOK_NOSUBST, so it doesn't get subst'd again */
2926 tok_str_add2(tok_str, TOK_NOSUBST, NULL);
2927 goto no_subst;
2929 ml.p = macro_ptr;
2930 if (can_read_stream)
2931 ml.prev = *can_read_stream, *can_read_stream = &ml;
2932 macro_ptr = (int *)ptr;
2933 tok = t;
2934 ret = macro_subst_tok(tok_str, nested_list, s, can_read_stream);
2935 ptr = (int *)macro_ptr;
2936 macro_ptr = ml.p;
2937 if (can_read_stream && *can_read_stream == &ml)
2938 *can_read_stream = ml.prev;
2939 if (ret != 0)
2940 goto no_subst;
2941 if (parse_flags & PARSE_FLAG_SPACES)
2942 force_blank = 1;
2943 } else {
2944 no_subst:
2945 if (force_blank) {
2946 tok_str_add(tok_str, ' ');
2947 spc = 1;
2948 force_blank = 0;
2950 if (!check_space(t, &spc))
2951 tok_str_add2(tok_str, t, &cval);
2954 if (macro_str1)
2955 tok_str_free(macro_str1);
2958 /* return next token with macro substitution */
2959 ST_FUNC void next(void)
2961 Sym *nested_list, *s;
2962 TokenString str;
2963 struct macro_level *ml;
2965 redo:
2966 if (parse_flags & PARSE_FLAG_SPACES)
2967 next_nomacro_spc();
2968 else
2969 next_nomacro();
2970 if (!macro_ptr) {
2971 /* if not reading from macro substituted string, then try
2972 to substitute macros */
2973 if (tok >= TOK_IDENT &&
2974 (parse_flags & PARSE_FLAG_PREPROCESS)) {
2975 s = define_find(tok);
2976 if (s) {
2977 /* we have a macro: we try to substitute */
2978 tok_str_new(&str);
2979 nested_list = NULL;
2980 ml = NULL;
2981 if (macro_subst_tok(&str, &nested_list, s, &ml) == 0) {
2982 /* substitution done, NOTE: maybe empty */
2983 tok_str_add(&str, 0);
2984 macro_ptr = str.str;
2985 macro_ptr_allocated = str.str;
2986 goto redo;
2990 } else {
2991 if (tok == 0) {
2992 /* end of macro or end of unget buffer */
2993 if (unget_buffer_enabled) {
2994 macro_ptr = unget_saved_macro_ptr;
2995 unget_buffer_enabled = 0;
2996 } else {
2997 /* end of macro string: free it */
2998 tok_str_free(macro_ptr_allocated);
2999 macro_ptr_allocated = NULL;
3000 macro_ptr = NULL;
3002 goto redo;
3003 } else if (tok == TOK_NOSUBST) {
3004 /* discard preprocessor's nosubst markers */
3005 goto redo;
3009 /* convert preprocessor tokens into C tokens */
3010 if (tok == TOK_PPNUM &&
3011 (parse_flags & PARSE_FLAG_TOK_NUM)) {
3012 parse_number((char *)tokc.cstr->data);
3016 /* push back current token and set current token to 'last_tok'. Only
3017 identifier case handled for labels. */
3018 ST_INLN void unget_tok(int last_tok)
3020 int i, n;
3021 int *q;
3022 if (unget_buffer_enabled)
3024 /* assert(macro_ptr == unget_saved_buffer + 1);
3025 assert(*macro_ptr == 0); */
3027 else
3029 unget_saved_macro_ptr = macro_ptr;
3030 unget_buffer_enabled = 1;
3032 q = unget_saved_buffer;
3033 macro_ptr = q;
3034 *q++ = tok;
3035 n = tok_ext_size(tok) - 1;
3036 for(i=0;i<n;i++)
3037 *q++ = tokc.tab[i];
3038 *q = 0; /* end of token string */
3039 tok = last_tok;
3043 /* better than nothing, but needs extension to handle '-E' option
3044 correctly too */
3045 ST_FUNC void preprocess_init(TCCState *s1)
3047 s1->include_stack_ptr = s1->include_stack;
3048 /* XXX: move that before to avoid having to initialize
3049 file->ifdef_stack_ptr ? */
3050 s1->ifdef_stack_ptr = s1->ifdef_stack;
3051 file->ifdef_stack_ptr = s1->ifdef_stack_ptr;
3053 vtop = vstack - 1;
3054 s1->pack_stack[0] = 0;
3055 s1->pack_stack_ptr = s1->pack_stack;
3058 ST_FUNC void preprocess_new(void)
3060 int i, c;
3061 const char *p, *r;
3063 /* init isid table */
3064 for(i=CH_EOF;i<256;i++)
3065 isidnum_table[i-CH_EOF] = isid(i) || isnum(i);
3067 /* add all tokens */
3068 table_ident = NULL;
3069 memset(hash_ident, 0, TOK_HASH_SIZE * sizeof(TokenSym *));
3071 tok_ident = TOK_IDENT;
3072 p = tcc_keywords;
3073 while (*p) {
3074 r = p;
3075 for(;;) {
3076 c = *r++;
3077 if (c == '\0')
3078 break;
3080 tok_alloc(p, r - p - 1);
3081 p = r;
3085 /* Preprocess the current file */
3086 ST_FUNC int tcc_preprocess(TCCState *s1)
3088 Sym *define_start;
3090 BufferedFile *file_ref, **iptr, **iptr_new;
3091 int token_seen, line_ref, d;
3092 const char *s;
3094 preprocess_init(s1);
3095 define_start = define_stack;
3096 ch = file->buf_ptr[0];
3097 tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF;
3098 parse_flags = PARSE_FLAG_ASM_COMMENTS | PARSE_FLAG_PREPROCESS |
3099 PARSE_FLAG_LINEFEED | PARSE_FLAG_SPACES;
3100 token_seen = 0;
3101 line_ref = 0;
3102 file_ref = NULL;
3103 iptr = s1->include_stack_ptr;
3105 for (;;) {
3106 next();
3107 if (tok == TOK_EOF) {
3108 break;
3109 } else if (file != file_ref) {
3110 goto print_line;
3111 } else if (tok == TOK_LINEFEED) {
3112 if (!token_seen)
3113 continue;
3114 ++line_ref;
3115 token_seen = 0;
3116 } else if (!token_seen) {
3117 d = file->line_num - line_ref;
3118 if (file != file_ref || d < 0 || d >= 8) {
3119 print_line:
3120 iptr_new = s1->include_stack_ptr;
3121 s = iptr_new > iptr ? " 1"
3122 : iptr_new < iptr ? " 2"
3123 : iptr_new > s1->include_stack ? " 3"
3124 : ""
3126 iptr = iptr_new;
3127 fprintf(s1->ppfp, "# %d \"%s\"%s\n", file->line_num, file->filename, s);
3128 } else {
3129 while (d)
3130 fputs("\n", s1->ppfp), --d;
3132 line_ref = (file_ref = file)->line_num;
3133 token_seen = tok != TOK_LINEFEED;
3134 if (!token_seen)
3135 continue;
3137 fputs(get_tok_str(tok, &tokc), s1->ppfp);
3139 free_defines(define_start);
3140 return 0;