9 #define MACRO_FLAG_OBJECTLIKE (1U<<31)
10 #define MACRO_FLAG_VARIADIC (1U<<30)
11 #define MACRO_ARGCOUNT_MASK (~(0|MACRO_FLAG_OBJECTLIKE|MACRO_FLAG_VARIADIC))
13 #define OBJECTLIKE(M) (M->num_args & MACRO_FLAG_OBJECTLIKE)
14 #define FUNCTIONLIKE(M) (!(OBJECTLIKE(M)))
15 #define MACRO_ARGCOUNT(M) (M->num_args & MACRO_ARGCOUNT_MASK)
16 #define MACRO_VARIADIC(M) (M->num_args & MACRO_FLAG_VARIADIC)
18 #define MAX_RECURSION 32
20 static unsigned string_hash(const char* s
) {
32 char *str_contents_buf
;
33 tglist(char*) argnames
;
37 tglist(char*) includedirs
;
38 hbmap(char*, struct macro
, 128) *macros
;
39 const char *last_file
;
41 struct tokenizer
*tchain
[MAX_RECURSION
];
44 static int token_needs_string(struct token
*tok
) {
48 case TT_WIDESTRING_LIT
:
63 static void tokenizer_from_file(struct tokenizer
*t
, FILE* f
) {
64 tokenizer_init(t
, f
, TF_PARSE_STRINGS
);
65 tokenizer_set_filename(t
, "<macro>");
69 static int strptrcmp(const void *a
, const void *b
) {
70 const char * const *x
= a
;
71 const char * const *y
= b
;
72 return strcmp(*x
, *y
);
75 static struct macro
* get_macro(struct cpp
*cpp
, const char *name
) {
76 return hbmap_get(cpp
->macros
, name
);
79 static void add_macro(struct cpp
*cpp
, const char *name
, struct macro
*m
) {
80 hbmap_insert(cpp
->macros
, name
, *m
);
83 static int undef_macro(struct cpp
*cpp
, const char *name
) {
84 hbmap_iter k
= hbmap_find(cpp
->macros
, name
);
85 if(k
== (hbmap_iter
) -1) return 0;
86 struct macro
*m
= &hbmap_getval(cpp
->macros
, k
);
87 free(hbmap_getkey(cpp
->macros
, k
));
88 if(m
->str_contents
) fclose(m
->str_contents
);
89 free(m
->str_contents_buf
);
90 tglist_free_values(&m
->argnames
);
91 tglist_free_items(&m
->argnames
);
92 hbmap_delete(cpp
->macros
, k
);
96 static void free_macros(struct cpp
*cpp
) {
98 hbmap_foreach(cpp
->macros
, i
) {
99 while(hbmap_iter_index_valid(cpp
->macros
, i
))
100 undef_macro(cpp
, hbmap_getkey(cpp
->macros
, i
));
102 hbmap_fini(cpp
->macros
, 1);
106 static void error_or_warning(const char *err
, const char* type
, struct tokenizer
*t
, struct token
*curr
) {
107 unsigned column
= curr
? curr
->column
: t
->column
;
108 unsigned line
= curr
? curr
->line
: t
->line
;
109 dprintf(2, "<%s> %u:%u %s: '%s'\n", t
->filename
, line
, column
, type
, err
);
110 dprintf(2, "%s\n", t
->buf
);
111 for(int i
= 0; i
< strlen(t
->buf
); i
++)
115 static void error(const char *err
, struct tokenizer
*t
, struct token
*curr
) {
116 error_or_warning(err
, "error", t
, curr
);
118 static void warning(const char *err
, struct tokenizer
*t
, struct token
*curr
) {
119 error_or_warning(err
, "warning", t
, curr
);
122 static void emit(FILE *out
, const char *s
) {
123 fprintf(out
, "%s", s
);
126 static int x_tokenizer_next_of(struct tokenizer
*t
, struct token
*tok
, int fail_unk
) {
127 int ret
= tokenizer_next(t
, tok
);
128 if(tok
->type
== TT_OVERFLOW
) {
129 error("max token length of 4095 exceeded!", t
, tok
);
131 } else if (fail_unk
&& ret
== 0) {
132 error("tokenizer encountered unknown token", t
, tok
);
134 } else if (tok
->type
== TT_SEP
&& tok
->value
== '\t') {
140 #define tokenizer_next(T, TOK) x_tokenizer_next_of(T, TOK, 0)
141 #define x_tokenizer_next(T, TOK) x_tokenizer_next_of(T, TOK, 1)
143 static int is_whitespace_token(struct token
*token
)
145 return token
->type
== TT_SEP
&&
146 (token
->value
== ' ' || token
->value
== '\t');
149 /* return index of matching item in values array, or -1 on error */
150 static int expect(struct tokenizer
*t
, enum tokentype tt
, const char* values
[], struct token
*token
)
154 ret
= tokenizer_next(t
, token
);
155 if(ret
== 0 || token
->type
== TT_EOF
) goto err
;
156 } while(is_whitespace_token(token
));
158 if(token
->type
!= tt
) {
160 error("unexpected token", t
, token
);
165 if(!strcmp(values
[i
], t
->buf
))
172 static int is_char(struct token
*tok
, int ch
) {
173 return tok
->type
== TT_SEP
&& tok
->value
== ch
;
176 static void flush_whitespace(FILE *out
, int *ws_count
) {
177 while(*ws_count
> 0) {
183 /* skips until the next non-whitespace token (if the current one is one too)*/
184 static int eat_whitespace(struct tokenizer
*t
, struct token
*token
, int *count
) {
187 while (is_whitespace_token(token
)) {
189 ret
= x_tokenizer_next(t
, token
);
194 /* fetches the next token until it is non-whitespace */
195 static int skip_next_and_ws(struct tokenizer
*t
, struct token
*tok
) {
196 int ret
= tokenizer_next(t
, tok
);
199 ret
= eat_whitespace(t
, tok
, &ws_count
);
203 static void emit_token(FILE* out
, struct token
*tok
, const char* strbuf
) {
204 if(tok
->type
== TT_SEP
) {
205 fprintf(out
, "%c", tok
->value
);
206 } else if(strbuf
&& token_needs_string(tok
)) {
207 fprintf(out
, "%s", strbuf
);
209 dprintf(2, "oops, dunno how to handle tt %d (%s)\n", (int) tok
->type
, strbuf
);
213 int parse_file(struct cpp
* cpp
, FILE *f
, const char*, FILE *out
);
214 static int include_file(struct cpp
* cpp
, struct tokenizer
*t
, FILE* out
) {
215 static const char* inc_chars
[] = { "\"", "<", 0};
216 static const char* inc_chars_end
[] = { "\"", ">", 0};
218 tokenizer_set_flags(t
, 0); // disable string tokenization
220 int inc1sep
= expect(t
, TT_SEP
, inc_chars
, &tok
);
222 error("expected one of [\"<]", t
, &tok
);
225 int ret
= tokenizer_read_until(t
, inc_chars_end
[inc1sep
], 1);
227 error("error parsing filename", t
, &tok
);
230 // TODO: different path lookup depending on whether " or <
233 tglist_foreach(&cpp
->includedirs
, i
) {
235 snprintf(buf
, sizeof buf
, "%s/%s", tglist_get(&cpp
->includedirs
, i
), t
->buf
);
240 dprintf(2, "%s: ", t
->buf
);
244 const char *fn
= strdup(t
->buf
);
245 assert(tokenizer_next(t
, &tok
) && is_char(&tok
, inc_chars_end
[inc1sep
][0]));
247 tokenizer_set_flags(t
, TF_PARSE_STRINGS
);
248 return parse_file(cpp
, f
, fn
, out
);
251 static int emit_error_or_warning(struct tokenizer
*t
, int is_error
) {
253 int ret
= tokenizer_skip_chars(t
, " \t", &ws_count
);
255 struct token tmp
= {.column
= t
->column
, .line
= t
->line
};
256 ret
= tokenizer_read_until(t
, "\n", 1);
258 error(t
->buf
, t
, &tmp
);
261 warning(t
->buf
, t
, &tmp
);
265 static FILE *freopen_r(FILE *f
, char **buf
, size_t *size
) {
268 return fmemopen(*buf
, *size
, "r");
271 static int consume_nl_and_ws(struct tokenizer
*t
, struct token
*tok
, int expected
) {
272 if(!x_tokenizer_next(t
, tok
)) {
274 error("unexpected", t
, tok
);
278 if(tok
->type
!= TT_SEP
|| tok
->value
!= expected
) goto err
;
280 case '\\' : expected
= '\n'; break;
281 case '\n' : expected
= 0; break;
284 if(is_whitespace_token(tok
)) ;
285 else if(is_char(tok
, '\\')) expected
= '\n';
288 return consume_nl_and_ws(t
, tok
, expected
);
291 static int expand_macro(struct cpp
*cpp
, struct tokenizer
*t
, FILE* out
, const char* name
, unsigned rec_level
, char *visited
[]);
293 static int parse_macro(struct cpp
*cpp
, struct tokenizer
*t
) {
295 int ret
= tokenizer_skip_chars(t
, " \t", &ws_count
);
297 struct token curr
; //tmp = {.column = t->column, .line = t->line};
298 ret
= tokenizer_next(t
, &curr
) && curr
.type
!= TT_EOF
;
300 error("parsing macro name", t
, &curr
);
303 if(curr
.type
!= TT_IDENTIFIER
) {
304 error("expected identifier", t
, &curr
);
307 const char* macroname
= strdup(t
->buf
);
309 dprintf(2, "parsing macro %s\n", macroname
);
312 if(get_macro(cpp
, macroname
)) {
313 if(!strcmp(macroname
, "defined")) {
314 error("\"defined\" cannot be used as a macro name", t
, &curr
);
320 struct macro
new = { 0 };
321 unsigned macro_flags
= MACRO_FLAG_OBJECTLIKE
;
322 tglist_init(&new.argnames
);
324 ret
= x_tokenizer_next(t
, &curr
) && curr
.type
!= TT_EOF
;
327 if (is_char(&curr
, '(')) {
329 unsigned expected
= 0;
331 /* process next function argument identifier */
332 ret
= consume_nl_and_ws(t
, &curr
, expected
);
334 error("unexpected", t
, &curr
);
338 if(curr
.type
== TT_SEP
) {
346 ret
= tokenizer_skip_chars(t
, " \t", &ws_count
);
350 error("unexpected character", t
, &curr
);
353 } else if(!(curr
.type
== TT_IDENTIFIER
|| curr
.type
== TT_ELLIPSIS
)) {
354 error("expected identifier for macro arg", t
, &curr
);
358 if(curr
.type
== TT_ELLIPSIS
) {
359 if(macro_flags
& MACRO_FLAG_VARIADIC
) {
360 error("\"...\" isn't the last parameter", t
, &curr
);
363 macro_flags
|= MACRO_FLAG_VARIADIC
;
365 char *tmps
= strdup(t
->buf
);
366 tglist_add(&new.argnames
, tmps
);
371 } else if(is_whitespace_token(&curr
)) {
372 ret
= tokenizer_skip_chars(t
, " \t", &ws_count
);
374 } else if(is_char(&curr
, '\n')) {
375 /* content-less macro */
379 struct FILE_container
{
384 contents
.f
= open_memstream(&contents
.buf
, &contents
.len
);
386 int backslash_seen
= 0;
388 /* ignore unknown tokens in macro body */
389 ret
= tokenizer_next(t
, &curr
);
391 if(curr
.type
== TT_EOF
) break;
392 if (curr
.type
== TT_SEP
) {
393 if(curr
.value
== '\\')
396 if(curr
.value
== '\n' && !backslash_seen
) break;
397 emit_token(contents
.f
, &curr
, t
->buf
);
401 emit_token(contents
.f
, &curr
, t
->buf
);
404 new.str_contents
= freopen_r(contents
.f
, &contents
.buf
, &contents
.len
);
405 new.str_contents_buf
= contents
.buf
;
408 struct macro
*old
= get_macro(cpp
, macroname
);
409 char *s_old
= old
->str_contents_buf
? old
->str_contents_buf
: "";
410 char *s_new
= new.str_contents_buf
? new.str_contents_buf
: "";
411 if(strcmp(s_old
, s_new
)) {
413 sprintf(buf
, "redefinition of macro %s", macroname
);
417 new.num_args
|= macro_flags
;
418 add_macro(cpp
, macroname
, &new);
422 static size_t macro_arglist_pos(struct macro
*m
, const char* iden
) {
424 for(i
= 0; i
< tglist_getsize(&m
->argnames
); i
++) {
425 char *item
= tglist_get(&m
->argnames
, i
);
426 if(!strcmp(item
, iden
)) return i
;
439 static int was_visited(const char *name
, char*visited
[], unsigned rec_level
) {
441 for(x
= rec_level
; x
>= 0; --x
) {
442 if(!strcmp(visited
[x
], name
)) return 1;
447 unsigned get_macro_info(struct cpp
* cpp
,
449 struct macro_info
*mi_list
, size_t *mi_cnt
,
450 unsigned nest
, unsigned tpos
, const char *name
,
451 char* visited
[], unsigned rec_level
456 int ret
= tokenizer_next(t
, &tok
);
457 if(!ret
|| tok
.type
== TT_EOF
) break;
459 dprintf(2, "(%s) nest %d, brace %u t: %s\n", name
, nest
, brace_lvl
, t
->buf
);
462 if(tok
.type
== TT_IDENTIFIER
&& (m
= get_macro(cpp
, t
->buf
)) && !was_visited(t
->buf
, visited
, rec_level
)) {
463 const char* newname
= strdup(t
->buf
);
464 if(FUNCTIONLIKE(m
)) {
465 if(tokenizer_peek(t
) == '(') {
466 unsigned tpos_save
= tpos
;
467 tpos
= get_macro_info(cpp
, t
, mi_list
, mi_cnt
, nest
+1, tpos
+1, newname
, visited
, rec_level
);
468 mi_list
[*mi_cnt
] = (struct macro_info
) {
475 /* suppress expansion */
478 mi_list
[*mi_cnt
] = (struct macro_info
) {
485 } else if(is_char(&tok
, '(')) {
487 } else if(is_char(&tok
, ')')) {
489 if(brace_lvl
== 0 && nest
!= 0) break;
496 struct FILE_container
{
503 static void free_file_container(struct FILE_container
*fc
) {
508 static int mem_tokenizers_join(
509 struct FILE_container
* org
, struct FILE_container
*inj
,
510 struct FILE_container
* result
,
511 int first
, off_t lastpos
) {
512 result
->f
= open_memstream(&result
->buf
, &result
->len
);
516 tokenizer_rewind(&org
->t
);
517 for(i
=0; i
<first
; ++i
) {
518 ret
= tokenizer_next(&org
->t
, &tok
);
519 assert(ret
&& tok
.type
!= TT_EOF
);
520 emit_token(result
->f
, &tok
, org
->t
.buf
);
522 int cnt
= 0, last
= first
;
524 ret
= tokenizer_next(&inj
->t
, &tok
);
525 if(!ret
|| tok
.type
== TT_EOF
) break;
526 emit_token(result
->f
, &tok
, inj
->t
.buf
);
529 while(tokenizer_ftello(&org
->t
) < lastpos
) {
530 ret
= tokenizer_next(&org
->t
, &tok
);
534 int diff
= cnt
- ((int) last
- (int) first
);
537 ret
= tokenizer_next(&org
->t
, &tok
);
538 if(!ret
|| tok
.type
== TT_EOF
) break;
539 emit_token(result
->f
, &tok
, org
->t
.buf
);
542 result
->f
= freopen_r(result
->f
, &result
->buf
, &result
->len
);
543 tokenizer_from_file(&result
->t
, result
->f
);
547 static int tchain_parens_follows(struct cpp
*cpp
, int rec_level
) {
549 for(i
=rec_level
;i
>=0;--i
) {
550 c
= tokenizer_peek(cpp
->tchain
[i
]);
551 if(c
== EOF
) continue;
552 if(c
== '(') return i
;
558 static int stringify(struct cpp
*ccp
, struct tokenizer
*t
, FILE* output
) {
563 ret
= tokenizer_next(t
, &tok
);
565 if(tok
.type
== TT_EOF
) break;
566 if(is_char(&tok
, '\n')) continue;
567 if(is_char(&tok
, '\\') && tokenizer_peek(t
) == '\n') continue;
568 if(tok
.type
== TT_DQSTRING_LIT
) {
573 emit(output
, "\\\"");
574 } else if (*s
== '\\') {
575 emit(output
, "\\\\");
583 emit_token(output
, &tok
, t
->buf
);
589 /* rec_level -1 serves as a magic value to signal we're using
590 expand_macro from the if-evaluator code, which means activating
591 the "define" macro */
592 static int expand_macro(struct cpp
* cpp
, struct tokenizer
*t
, FILE* out
, const char* name
, unsigned rec_level
, char* visited
[]) {
593 int is_define
= !strcmp(name
, "defined");
596 if(is_define
&& rec_level
!= -1)
598 else m
= get_macro(cpp
, name
);
603 if(rec_level
== -1) rec_level
= 0;
604 if(rec_level
>= MAX_RECURSION
) {
605 error("max recursion level reached", t
, 0);
609 dprintf(2, "lvl %u: expanding macro %s (%s)\n", rec_level
, name
, m
->str_contents_buf
);
612 if(rec_level
== 0 && strcmp(t
->filename
, "<macro>")) {
613 cpp
->last_file
= t
->filename
;
614 cpp
->last_line
= t
->line
;
616 if(!strcmp(name
, "__FILE__")) {
618 emit(out
, cpp
->last_file
);
621 } else if(!strcmp(name
, "__LINE__")) {
623 sprintf(buf
, "%d", cpp
->last_line
);
628 if(visited
[rec_level
]) free(visited
[rec_level
]);
629 visited
[rec_level
] = strdup(name
);
630 cpp
->tchain
[rec_level
] = t
;
634 unsigned num_args
= MACRO_ARGCOUNT(m
);
635 struct FILE_container
*argvalues
= calloc(MACRO_VARIADIC(m
) ? num_args
+ 1 : num_args
, sizeof(struct FILE_container
));
637 for(i
=0; i
< num_args
; i
++)
638 argvalues
[i
].f
= open_memstream(&argvalues
[i
].buf
, &argvalues
[i
].len
);
640 /* replace named arguments in the contents of the macro call */
641 if(FUNCTIONLIKE(m
)) {
643 if((ret
= tokenizer_peek(t
)) != '(') {
644 /* function-like macro shall not be expanded if not followed by '(' */
645 if(ret
== EOF
&& rec_level
> 0 && (ret
= tchain_parens_follows(cpp
, rec_level
-1)) != -1) {
646 // warning("Replacement text involved subsequent text", t, 0);
647 t
= cpp
->tchain
[ret
];
653 ret
= x_tokenizer_next(t
, &tok
);
654 assert(ret
&& is_char(&tok
, '('));
656 unsigned curr_arg
= 0, need_arg
= 1, parens
= 0;
658 if(!tokenizer_skip_chars(t
, " \t", &ws_count
)) return 0;
661 if(num_args
== 1 && MACRO_VARIADIC(m
)) varargs
= 1;
663 int ret
= tokenizer_next(t
, &tok
);
665 if( tok
.type
== TT_EOF
) {
666 dprintf(2, "warning EOF\n");
669 if(!parens
&& is_char(&tok
, ',') && !varargs
) {
670 if(need_arg
&& !ws_count
) {
671 /* empty argument is OK */
674 if(!varargs
) curr_arg
++;
675 if(curr_arg
+ 1 == num_args
&& MACRO_VARIADIC(m
)) {
677 } else if(curr_arg
>= num_args
) {
678 error("too many arguments for function macro", t
, &tok
);
681 ret
= tokenizer_skip_chars(t
, " \t", &ws_count
);
684 } else if(is_char(&tok
, '(')) {
686 } else if(is_char(&tok
, ')')) {
688 if(curr_arg
+ num_args
&& curr_arg
< num_args
-1) {
689 error("too few args for function macro", t
, &tok
);
695 } else if(is_char(&tok
, '\\')) {
696 if(tokenizer_peek(t
) == '\n') continue;
699 emit_token(argvalues
[curr_arg
].f
, &tok
, t
->buf
);
703 for(i
=0; i
< num_args
; i
++) {
704 argvalues
[i
].f
= freopen_r(argvalues
[i
].f
, &argvalues
[i
].buf
, &argvalues
[i
].len
);
705 tokenizer_from_file(&argvalues
[i
].t
, argvalues
[i
].f
);
707 dprintf(2, "macro argument %i: %s\n", (int) i
, argvalues
[i
].buf
);
712 if(get_macro(cpp
, argvalues
[0].buf
))
718 if(!m
->str_contents
) goto cleanup
;
720 struct FILE_container cwae
= {0}; /* contents_with_args_expanded */
721 cwae
.f
= open_memstream(&cwae
.buf
, &cwae
.len
);
722 FILE* output
= cwae
.f
;
725 tokenizer_from_file(&t2
, m
->str_contents
);
730 ret
= tokenizer_next(&t2
, &tok
);
732 if(tok
.type
== TT_EOF
) break;
733 if(tok
.type
== TT_IDENTIFIER
) {
734 flush_whitespace(output
, &ws_count
);
736 if(MACRO_VARIADIC(m
) && !strcmp(t2
.buf
, "__VA_ARGS__")) {
739 size_t arg_nr
= macro_arglist_pos(m
, id
);
740 if(arg_nr
!= (size_t) -1) {
741 tokenizer_rewind(&argvalues
[arg_nr
].t
);
742 if(hash_count
== 1) ret
= stringify(cpp
, &argvalues
[arg_nr
].t
, output
);
744 ret
= tokenizer_next(&argvalues
[arg_nr
].t
, &tok
);
746 if(tok
.type
== TT_EOF
) break;
747 emit_token(output
, &tok
, argvalues
[arg_nr
].t
.buf
);
751 if(hash_count
== 1) {
753 error("'#' is not followed by macro parameter", &t2
, &tok
);
756 emit_token(output
, &tok
, t2
.buf
);
758 } else if(is_char(&tok
, '#')) {
764 /* in a real cpp we'd need to look for '\\' first */
765 while(tokenizer_peek(&t2
) == '\n') {
766 x_tokenizer_next(&t2
, &tok
);
768 if(tokenizer_peek(&t2
) == '#') x_tokenizer_next(&t2
, &tok
);
771 if(hash_count
== 1) flush_whitespace(output
, &ws_count
);
772 else if(hash_count
> 2) {
773 error("only two '#' characters allowed for macro expansion", &t2
, &tok
);
777 ret
= tokenizer_skip_chars(&t2
, " \t\n", &ws_count
);
779 ret
= tokenizer_skip_chars(&t2
, " \t", &ws_count
);
784 } else if(is_whitespace_token(&tok
)) {
787 if(hash_count
== 1) goto hash_err
;
788 flush_whitespace(output
, &ws_count
);
789 emit_token(output
, &tok
, t2
.buf
);
792 flush_whitespace(output
, &ws_count
);
794 /* we need to expand macros after the macro arguments have been inserted */
796 cwae
.f
= freopen_r(cwae
.f
, &cwae
.buf
, &cwae
.len
);
798 dprintf(2, "contents with args expanded: %s\n", cwae
.buf
);
800 tokenizer_from_file(&cwae
.t
, cwae
.f
);
803 int ret
= tokenizer_next(&cwae
.t
, &tok
);
805 if(tok
.type
== TT_EOF
) break;
806 if(tok
.type
== TT_IDENTIFIER
&& get_macro(cpp
, cwae
.t
.buf
))
810 tokenizer_rewind(&cwae
.t
);
811 struct macro_info
*mcs
= calloc(mac_cnt
, sizeof(struct macro_info
));
814 get_macro_info(cpp
, &cwae
.t
, mcs
, &mac_iter
, 0, 0, "null", visited
, rec_level
);
815 /* some of the macros might not expand at this stage (without braces)*/
816 while(mac_cnt
&& mcs
[mac_cnt
-1].name
== 0)
819 size_t i
; int depth
= 0;
820 for(i
= 0; i
< mac_cnt
; ++i
) {
821 if(mcs
[i
].nest
> depth
) depth
= mcs
[i
].nest
;
824 for(i
= 0; i
< mac_cnt
; ++i
) if(mcs
[i
].nest
== depth
) {
825 struct macro_info
*mi
= &mcs
[i
];
826 tokenizer_rewind(&cwae
.t
);
829 for(j
= 0; j
< mi
->first
+1; ++j
)
830 tokenizer_next(&cwae
.t
, &utok
);
831 struct FILE_container t2
= {0}, tmp
= {0};
832 t2
.f
= open_memstream(&t2
.buf
, &t2
.len
);
833 if(!expand_macro(cpp
, &cwae
.t
, t2
.f
, mi
->name
, rec_level
+1, visited
))
835 t2
.f
= freopen_r(t2
.f
, &t2
.buf
, &t2
.len
);
836 tokenizer_from_file(&t2
.t
, t2
.f
);
837 /* manipulating the stream in case more stuff has been consumed */
838 off_t cwae_pos
= tokenizer_ftello(&cwae
.t
);
839 tokenizer_rewind(&cwae
.t
);
841 dprintf(2, "merging %s with %s\n", cwae
.buf
, t2
.buf
);
843 int diff
= mem_tokenizers_join(&cwae
, &t2
, &tmp
, mi
->first
, cwae_pos
);
844 free_file_container(&cwae
);
845 free_file_container(&t2
);
848 dprintf(2, "result: %s\n", cwae
.buf
);
850 if(diff
== 0) continue;
851 for(j
= 0; j
< mac_cnt
; ++j
) {
853 struct macro_info
*mi2
= &mcs
[j
];
854 /* modified element mi can be either inside, after or before
855 another macro. the after case doesn't affect us. */
856 if(mi
->first
>= mi2
->first
&& mi
->last
<= mi2
->last
) {
859 } else if (mi
->first
< mi2
->first
) {
868 tokenizer_rewind(&cwae
.t
);
871 tokenizer_next(&cwae
.t
, &tok
);
872 if(tok
.type
== TT_EOF
) break;
873 if(tok
.type
== TT_IDENTIFIER
&& tokenizer_peek(&cwae
.t
) == EOF
&&
874 (ma
= get_macro(cpp
, cwae
.t
.buf
)) && FUNCTIONLIKE(ma
) && tchain_parens_follows(cpp
, rec_level
) != -1
876 int ret
= expand_macro(cpp
, &cwae
.t
, out
, cwae
.t
.buf
, rec_level
+1, visited
);
879 emit_token(out
, &tok
, cwae
.t
.buf
);
884 free_file_container(&cwae
);
887 for(i
=0; i
< num_args
; i
++) {
888 fclose(argvalues
[i
].f
);
889 free(argvalues
[i
].buf
);
895 #define TT_LAND TT_CUSTOM+0
896 #define TT_LOR TT_CUSTOM+1
897 #define TT_LTE TT_CUSTOM+2
898 #define TT_GTE TT_CUSTOM+3
899 #define TT_SHL TT_CUSTOM+4
900 #define TT_SHR TT_CUSTOM+5
901 #define TT_EQ TT_CUSTOM+6
902 #define TT_NEQ TT_CUSTOM+7
903 #define TT_LT TT_CUSTOM+8
904 #define TT_GT TT_CUSTOM+9
905 #define TT_BAND TT_CUSTOM+10
906 #define TT_BOR TT_CUSTOM+11
907 #define TT_XOR TT_CUSTOM+12
908 #define TT_NEG TT_CUSTOM+13
909 #define TT_PLUS TT_CUSTOM+14
910 #define TT_MINUS TT_CUSTOM+15
911 #define TT_MUL TT_CUSTOM+16
912 #define TT_DIV TT_CUSTOM+17
913 #define TT_MOD TT_CUSTOM+18
914 #define TT_LPAREN TT_CUSTOM+19
915 #define TT_RPAREN TT_CUSTOM+20
916 #define TT_LNOT TT_CUSTOM+21
918 #define TTINT(X) X-TT_CUSTOM
919 #define TTENT(X, Y) [TTINT(X)] = Y
921 static int bp(int tokentype
) {
922 static const int bplist
[] = {
923 TTENT(TT_LOR
, 1 << 4),
924 TTENT(TT_LAND
, 1 << 5),
925 TTENT(TT_BOR
, 1 << 6),
926 TTENT(TT_XOR
, 1 << 7),
927 TTENT(TT_BAND
, 1 << 8),
928 TTENT(TT_EQ
, 1 << 9),
929 TTENT(TT_NEQ
, 1 << 9),
930 TTENT(TT_LTE
, 1 << 10),
931 TTENT(TT_GTE
, 1 << 10),
932 TTENT(TT_LT
, 1 << 10),
933 TTENT(TT_GT
, 1 << 10),
934 TTENT(TT_SHL
, 1 << 11),
935 TTENT(TT_SHR
, 1 << 11),
936 TTENT(TT_PLUS
, 1 << 12),
937 TTENT(TT_MINUS
, 1 << 12),
938 TTENT(TT_MUL
, 1 << 13),
939 TTENT(TT_DIV
, 1 << 13),
940 TTENT(TT_MOD
, 1 << 13),
941 TTENT(TT_NEG
, 1 << 14),
942 TTENT(TT_LNOT
, 1 << 14),
943 TTENT(TT_LPAREN
, 1 << 15),
944 // TTENT(TT_RPAREN, 1 << 15),
945 // TTENT(TT_LPAREN, 0),
948 if(TTINT(tokentype
) < sizeof(bplist
)/sizeof(bplist
[0])) return bplist
[TTINT(tokentype
)];
952 static int expr(struct tokenizer
*t
, int rbp
, int *err
);
954 static int charlit_to_int(const char *lit
) {
955 if(lit
[1] == '\\') switch(lit
[2]) {
960 case 'x': return strtol(lit
+3, NULL
, 16);
961 default: return lit
[2];
966 static int nud(struct tokenizer
*t
, struct token
*tok
, int *err
) {
967 switch((unsigned) tok
->type
) {
968 case TT_IDENTIFIER
: return 0;
969 case TT_WIDECHAR_LIT
:
970 case TT_SQSTRING_LIT
: return charlit_to_int(t
->buf
);
974 return strtol(t
->buf
, NULL
, 0);
975 case TT_NEG
: return ~ expr(t
, bp(tok
->type
), err
);
976 case TT_PLUS
: return expr(t
, bp(tok
->type
), err
);
977 case TT_MINUS
: return - expr(t
, bp(tok
->type
), err
);
978 case TT_LNOT
: return !expr(t
, bp(tok
->type
), err
);
980 int inner
= expr(t
, 0, err
);
981 if(0!=expect(t
, TT_RPAREN
, (const char*[]){")", 0}, tok
)) {
982 error("missing ')'", t
, tok
);
988 error("floating constant in preprocessor expression", t
, tok
);
993 error("unexpected token", t
, tok
);
999 static int led(struct tokenizer
*t
, int left
, struct token
*tok
, int *err
) {
1001 switch((unsigned) tok
->type
) {
1004 right
= expr(t
, bp(tok
->type
), err
);
1005 if(tok
->type
== TT_LAND
) return left
&& right
;
1006 return left
|| right
;
1007 case TT_LTE
: return left
<= expr(t
, bp(tok
->type
), err
);
1008 case TT_GTE
: return left
>= expr(t
, bp(tok
->type
), err
);
1009 case TT_SHL
: return left
<< expr(t
, bp(tok
->type
), err
);
1010 case TT_SHR
: return left
>> expr(t
, bp(tok
->type
), err
);
1011 case TT_EQ
: return left
== expr(t
, bp(tok
->type
), err
);
1012 case TT_NEQ
: return left
!= expr(t
, bp(tok
->type
), err
);
1013 case TT_LT
: return left
< expr(t
, bp(tok
->type
), err
);
1014 case TT_GT
: return left
> expr(t
, bp(tok
->type
), err
);
1015 case TT_BAND
: return left
& expr(t
, bp(tok
->type
), err
);
1016 case TT_BOR
: return left
| expr(t
, bp(tok
->type
), err
);
1017 case TT_XOR
: return left
^ expr(t
, bp(tok
->type
), err
);
1018 case TT_PLUS
: return left
+ expr(t
, bp(tok
->type
), err
);
1019 case TT_MINUS
:return left
- expr(t
, bp(tok
->type
), err
);
1020 case TT_MUL
: return left
* expr(t
, bp(tok
->type
), err
);
1023 right
= expr(t
, bp(tok
->type
), err
);
1025 error("eval: div by zero", t
, tok
);
1028 else if(tok
->type
== TT_DIV
) return left
/ right
;
1029 else if(tok
->type
== TT_MOD
) return left
% right
;
1032 error("eval: unexpect token", t
, tok
);
1039 static int tokenizer_peek_next_non_ws(struct tokenizer
*t
, struct token
*tok
)
1043 ret
= tokenizer_peek_token(t
, tok
);
1044 if(is_whitespace_token(tok
))
1045 x_tokenizer_next(t
, tok
);
1051 static int expr(struct tokenizer
*t
, int rbp
, int*err
) {
1053 int ret
= skip_next_and_ws(t
, &tok
);
1054 if(tok
.type
== TT_EOF
) return 0;
1055 int left
= nud(t
, &tok
, err
);
1057 ret
= tokenizer_peek_next_non_ws(t
, &tok
);
1058 if(bp(tok
.type
) <= rbp
) break;
1059 ret
= tokenizer_next(t
, &tok
);
1060 if(tok
.type
== TT_EOF
) break;
1061 left
= led(t
, left
, &tok
, err
);
1067 static int do_eval(struct tokenizer
*t
, int *result
) {
1068 tokenizer_register_custom_token(t
, TT_LAND
, "&&");
1069 tokenizer_register_custom_token(t
, TT_LOR
, "||");
1070 tokenizer_register_custom_token(t
, TT_LTE
, "<=");
1071 tokenizer_register_custom_token(t
, TT_GTE
, ">=");
1072 tokenizer_register_custom_token(t
, TT_SHL
, "<<");
1073 tokenizer_register_custom_token(t
, TT_SHR
, ">>");
1074 tokenizer_register_custom_token(t
, TT_EQ
, "==");
1075 tokenizer_register_custom_token(t
, TT_NEQ
, "!=");
1077 tokenizer_register_custom_token(t
, TT_LT
, "<");
1078 tokenizer_register_custom_token(t
, TT_GT
, ">");
1080 tokenizer_register_custom_token(t
, TT_BAND
, "&");
1081 tokenizer_register_custom_token(t
, TT_BOR
, "|");
1082 tokenizer_register_custom_token(t
, TT_XOR
, "^");
1083 tokenizer_register_custom_token(t
, TT_NEG
, "~");
1085 tokenizer_register_custom_token(t
, TT_PLUS
, "+");
1086 tokenizer_register_custom_token(t
, TT_MINUS
, "-");
1087 tokenizer_register_custom_token(t
, TT_MUL
, "*");
1088 tokenizer_register_custom_token(t
, TT_DIV
, "/");
1089 tokenizer_register_custom_token(t
, TT_MOD
, "%");
1091 tokenizer_register_custom_token(t
, TT_LPAREN
, "(");
1092 tokenizer_register_custom_token(t
, TT_RPAREN
, ")");
1093 tokenizer_register_custom_token(t
, TT_LNOT
, "!");
1096 *result
= expr(t
, 0, &err
);
1098 dprintf(2, "eval result: %d\n", *result
);
1103 static int evaluate_condition(struct cpp
*cpp
, struct tokenizer
*t
, int *result
, char *visited
[]) {
1104 int ret
, backslash_seen
= 0;
1108 int tflags
= tokenizer_get_flags(t
);
1109 tokenizer_set_flags(t
, tflags
| TF_PARSE_WIDE_STRINGS
);
1110 ret
= tokenizer_next(t
, &curr
);
1111 if(!ret
) return ret
;
1112 if(!is_whitespace_token(&curr
)) {
1113 error("expected whitespace after if/elif", t
, &curr
);
1116 FILE *f
= open_memstream(&bufp
, &size
);
1118 ret
= tokenizer_next(t
, &curr
);
1119 if(!ret
) return ret
;
1120 if(curr
.type
== TT_IDENTIFIER
) {
1121 if(!expand_macro(cpp
, t
, f
, t
->buf
, -1, visited
)) return 0;
1122 } else if(curr
.type
== TT_SEP
) {
1123 if(curr
.value
== '\\')
1126 if(curr
.value
== '\n') {
1127 if(!backslash_seen
) break;
1129 emit_token(f
, &curr
, t
->buf
);
1134 emit_token(f
, &curr
, t
->buf
);
1137 f
= freopen_r(f
, &bufp
, &size
);
1138 if(!f
|| size
== 0) {
1139 error("#(el)if with no expression", t
, &curr
);
1143 dprintf(2, "evaluating condition %s\n", bufp
);
1145 struct tokenizer t2
;
1146 tokenizer_from_file(&t2
, f
);
1147 ret
= do_eval(&t2
, result
);
1150 tokenizer_set_flags(t
, tflags
);
1154 static void free_visited(char *visited
[]) {
1156 for(i
=0; i
< MAX_RECURSION
; i
++)
1157 if(visited
[i
]) free(visited
[i
]);
1161 int parse_file(struct cpp
*cpp
, FILE *f
, const char *fn
, FILE *out
) {
1164 tokenizer_init(&t
, f
, TF_PARSE_STRINGS
);
1165 tokenizer_set_filename(&t
, fn
);
1166 tokenizer_register_marker(&t
, MT_MULTILINE_COMMENT_START
, "/*"); /**/
1167 tokenizer_register_marker(&t
, MT_MULTILINE_COMMENT_END
, "*/");
1168 tokenizer_register_marker(&t
, MT_SINGLELINE_COMMENT_START
, "//");
1169 int ret
, newline
=1, ws_count
= 0;
1171 int if_level
= 0, if_level_active
= 0, if_level_satisfied
= 0;
1173 #define all_levels_active() (if_level_active == if_level)
1174 #define prev_level_active() (if_level_active == if_level-1)
1175 #define set_level(X, V) do { \
1176 if(if_level_active > X) if_level_active = X; \
1177 if(if_level_satisfied > X) if_level_satisfied = X; \
1179 if(V) if_level_active = X; \
1180 else if(if_level_active == X) if_level_active = X-1; \
1181 if(V && if_level_active == X) if_level_satisfied = X; \
1185 #define skip_conditional_block (if_level > if_level_active)
1187 static const char* directives
[] = {"include", "error", "warning", "define", "undef", "if", "elif", "else", "ifdef", "ifndef", "endif", "line", "pragma", 0};
1188 while((ret
= tokenizer_next(&t
, &curr
)) && curr
.type
!= TT_EOF
) {
1189 newline
= curr
.column
== 0;
1191 ret
= eat_whitespace(&t
, &curr
, &ws_count
);
1192 if(!ret
) return ret
;
1194 if(curr
.type
== TT_EOF
) break;
1195 if(skip_conditional_block
&& !(newline
&& is_char(&curr
, '#'))) continue;
1196 if(is_char(&curr
, '#')) {
1198 error("stray #", &t
, &curr
);
1201 int index
= expect(&t
, TT_IDENTIFIER
, directives
, &curr
);
1203 if(skip_conditional_block
) continue;
1204 error("invalid preprocessing directive", &t
, &curr
);
1207 if(skip_conditional_block
) switch(index
) {
1208 case 0: case 1: case 2: case 3: case 4:
1214 ret
= include_file(cpp
, &t
, out
);
1215 if(!ret
) return ret
;
1218 ret
= emit_error_or_warning(&t
, 1);
1219 if(!ret
) return ret
;
1222 ret
= emit_error_or_warning(&t
, 0);
1223 if(!ret
) return ret
;
1226 ret
= parse_macro(cpp
, &t
);
1227 if(!ret
) return ret
;
1230 if(!skip_next_and_ws(&t
, &curr
)) return 0;
1231 if(curr
.type
!= TT_IDENTIFIER
) {
1232 error("expected identifier", &t
, &curr
);
1235 undef_macro(cpp
, t
.buf
);
1238 if(all_levels_active()) {
1239 char* visited
[MAX_RECURSION
] = {0};
1240 if(!evaluate_condition(cpp
, &t
, &ret
, visited
)) return 0;
1241 free_visited(visited
);
1242 set_level(if_level
+ 1, ret
);
1244 set_level(if_level
+ 1, 0);
1248 if(prev_level_active() && if_level_satisfied
< if_level
) {
1249 char* visited
[MAX_RECURSION
] = {0};
1250 if(!evaluate_condition(cpp
, &t
, &ret
, visited
)) return 0;
1251 free_visited(visited
);
1253 if_level_active
= if_level
;
1254 if_level_satisfied
= if_level
;
1256 } else if(if_level_active
== if_level
) {
1261 if(prev_level_active() && if_level_satisfied
< if_level
) {
1263 if_level_active
= if_level
;
1264 if_level_satisfied
= if_level
;
1266 } else if(if_level_active
== if_level
) {
1272 if(!skip_next_and_ws(&t
, &curr
) || curr
.type
== TT_EOF
) return 0;
1273 ret
= !!get_macro(cpp
, t
.buf
);
1274 if(index
== 9) ret
= !ret
;
1276 if(all_levels_active()) {
1277 set_level(if_level
+ 1, ret
);
1279 set_level(if_level
+ 1, 0);
1283 set_level(if_level
-1, -1);
1286 ret
= tokenizer_read_until(&t
, "\n", 1);
1288 error("unknown", &t
, &curr
);
1293 emit(out
, "#pragma");
1294 while((ret
= x_tokenizer_next(&t
, &curr
)) && curr
.type
!= TT_EOF
) {
1295 emit_token(out
, &curr
, t
.buf
);
1296 if(is_char(&curr
, '\n')) break;
1298 if(!ret
) return ret
;
1311 dprintf(2, "(stdin:%u,%u) ", curr
.line
, curr
.column
);
1312 if(curr
.type
== TT_SEP
)
1313 dprintf(2, "separator: %c\n", curr
.value
== '\n'? ' ' : curr
.value
);
1315 dprintf(2, "%s: %s\n", tokentype_to_str(curr
.type
), t
.buf
);
1317 if(curr
.type
== TT_IDENTIFIER
) {
1318 char* visited
[MAX_RECURSION
] = {0};
1319 if(!expand_macro(cpp
, &t
, out
, t
.buf
, 0, visited
))
1321 free_visited(visited
);
1323 emit_token(out
, &curr
, t
.buf
);
1327 error("unterminated #if", &t
, &curr
);
1333 struct cpp
* cpp_new(void) {
1334 struct cpp
* ret
= calloc(1, sizeof(struct cpp
));
1335 if(!ret
) return ret
;
1336 tglist_init(&ret
->includedirs
);
1337 cpp_add_includedir(ret
, ".");
1338 ret
->macros
= hbmap_new(strptrcmp
, string_hash
, 128);
1339 struct macro m
= {.num_args
= 1};
1340 add_macro(ret
, strdup("defined"), &m
);
1341 m
.num_args
= MACRO_FLAG_OBJECTLIKE
;
1342 add_macro(ret
, strdup("__FILE__"), &m
);
1343 add_macro(ret
, strdup("__LINE__"), &m
);
1347 void cpp_free(struct cpp
*cpp
) {
1349 tglist_free_values(&cpp
->includedirs
);
1350 tglist_free_items(&cpp
->includedirs
);
1353 void cpp_add_includedir(struct cpp
*cpp
, const char* includedir
) {
1354 tglist_add(&cpp
->includedirs
, strdup(includedir
));
1357 int cpp_add_define(struct cpp
*cpp
, const char *mdecl
) {
1358 struct FILE_container tmp
= {0};
1359 tmp
.f
= open_memstream(&tmp
.buf
, &tmp
.len
);
1360 fprintf(tmp
.f
, "%s\n", mdecl
);
1361 tmp
.f
= freopen_r(tmp
.f
, &tmp
.buf
, &tmp
.len
);
1362 tokenizer_from_file(&tmp
.t
, tmp
.f
);
1363 int ret
= parse_macro(cpp
, &tmp
.t
);
1364 free_file_container(&tmp
);
1368 int cpp_run(struct cpp
*cpp
, FILE* in
, FILE* out
, const char* inname
) {
1370 setvbuf(out
, NULL
, _IONBF
, 0);
1372 return parse_file(cpp
, in
, inname
, out
);