1 #define __STDC_WANT_LIB_EXT2__ 1
11 #define MACRO_FLAG_OBJECTLIKE (1U<<31)
12 #define MACRO_FLAG_VARIADIC (1U<<30)
13 #define MACRO_ARGCOUNT_MASK (~(0|MACRO_FLAG_OBJECTLIKE|MACRO_FLAG_VARIADIC))
15 #define OBJECTLIKE(M) (M->num_args & MACRO_FLAG_OBJECTLIKE)
16 #define FUNCTIONLIKE(M) (!(OBJECTLIKE(M)))
17 #define MACRO_ARGCOUNT(M) (M->num_args & MACRO_ARGCOUNT_MASK)
18 #define MACRO_VARIADIC(M) (M->num_args & MACRO_FLAG_VARIADIC)
20 #define MAX_RECURSION 32
22 static unsigned string_hash(const char* s
) {
34 char *str_contents_buf
;
35 tglist(char*) argnames
;
39 tglist(char*) includedirs
;
40 hbmap(char*, struct macro
, 128) *macros
;
41 const char *last_file
;
43 struct tokenizer
*tchain
[MAX_RECURSION
];
46 static int token_needs_string(struct token
*tok
) {
50 case TT_WIDESTRING_LIT
:
65 static void tokenizer_from_file(struct tokenizer
*t
, FILE* f
) {
66 tokenizer_init(t
, f
, TF_PARSE_STRINGS
);
67 tokenizer_set_filename(t
, "<macro>");
71 static int strptrcmp(const void *a
, const void *b
) {
72 const char * const *x
= a
;
73 const char * const *y
= b
;
74 return strcmp(*x
, *y
);
77 static struct macro
* get_macro(struct cpp
*cpp
, const char *name
) {
78 return hbmap_get(cpp
->macros
, name
);
81 static void add_macro(struct cpp
*cpp
, const char *name
, struct macro
*m
) {
82 hbmap_insert(cpp
->macros
, name
, *m
);
85 static int undef_macro(struct cpp
*cpp
, const char *name
) {
86 hbmap_iter k
= hbmap_find(cpp
->macros
, name
);
87 if(k
== (hbmap_iter
) -1) return 0;
88 struct macro
*m
= &hbmap_getval(cpp
->macros
, k
);
89 free(hbmap_getkey(cpp
->macros
, k
));
90 if(m
->str_contents
) fclose(m
->str_contents
);
91 free(m
->str_contents_buf
);
92 tglist_free_values(&m
->argnames
);
93 tglist_free_items(&m
->argnames
);
94 hbmap_delete(cpp
->macros
, k
);
98 static void free_macros(struct cpp
*cpp
) {
100 hbmap_foreach(cpp
->macros
, i
) {
101 while(hbmap_iter_index_valid(cpp
->macros
, i
))
102 undef_macro(cpp
, hbmap_getkey(cpp
->macros
, i
));
104 hbmap_fini(cpp
->macros
, 1);
108 static void error_or_warning(const char *err
, const char* type
, struct tokenizer
*t
, struct token
*curr
) {
109 unsigned column
= curr
? curr
->column
: t
->column
;
110 unsigned line
= curr
? curr
->line
: t
->line
;
111 fprintf(stderr
, "<%s> %u:%u %s: '%s'\n", t
->filename
, line
, column
, type
, err
);
112 fprintf(stderr
, "%s\n", t
->buf
);
113 for(int i
= 0; i
< strlen(t
->buf
); i
++)
114 fprintf(stderr
, "^");
115 fprintf(stderr
, "\n");
117 static void error(const char *err
, struct tokenizer
*t
, struct token
*curr
) {
118 error_or_warning(err
, "error", t
, curr
);
120 static void warning(const char *err
, struct tokenizer
*t
, struct token
*curr
) {
121 error_or_warning(err
, "warning", t
, curr
);
124 static void emit(FILE *out
, const char *s
) {
125 fprintf(out
, "%s", s
);
128 static int x_tokenizer_next_of(struct tokenizer
*t
, struct token
*tok
, int fail_unk
) {
129 int ret
= tokenizer_next(t
, tok
);
130 if(tok
->type
== TT_OVERFLOW
) {
131 error("max token length of 4095 exceeded!", t
, tok
);
133 } else if (fail_unk
&& ret
== 0) {
134 error("tokenizer encountered unknown token", t
, tok
);
136 } else if (tok
->type
== TT_SEP
&& tok
->value
== '\t') {
142 #define tokenizer_next(T, TOK) x_tokenizer_next_of(T, TOK, 0)
143 #define x_tokenizer_next(T, TOK) x_tokenizer_next_of(T, TOK, 1)
145 static int is_whitespace_token(struct token
*token
)
147 return token
->type
== TT_SEP
&&
148 (token
->value
== ' ' || token
->value
== '\t');
151 /* return index of matching item in values array, or -1 on error */
152 static int expect(struct tokenizer
*t
, enum tokentype tt
, const char* values
[], struct token
*token
)
156 ret
= tokenizer_next(t
, token
);
157 if(ret
== 0 || token
->type
== TT_EOF
) goto err
;
158 } while(is_whitespace_token(token
));
160 if(token
->type
!= tt
) {
162 error("unexpected token", t
, token
);
167 if(!strcmp(values
[i
], t
->buf
))
174 static int is_char(struct token
*tok
, int ch
) {
175 return tok
->type
== TT_SEP
&& tok
->value
== ch
;
178 static void flush_whitespace(FILE *out
, int *ws_count
) {
179 while(*ws_count
> 0) {
185 /* skips until the next non-whitespace token (if the current one is one too)*/
186 static int eat_whitespace(struct tokenizer
*t
, struct token
*token
, int *count
) {
189 while (is_whitespace_token(token
)) {
191 ret
= x_tokenizer_next(t
, token
);
196 /* fetches the next token until it is non-whitespace */
197 static int skip_next_and_ws(struct tokenizer
*t
, struct token
*tok
) {
198 int ret
= tokenizer_next(t
, tok
);
201 ret
= eat_whitespace(t
, tok
, &ws_count
);
205 static void emit_token(FILE* out
, struct token
*tok
, const char* strbuf
) {
206 if(tok
->type
== TT_SEP
) {
207 fprintf(out
, "%c", tok
->value
);
208 } else if(strbuf
&& token_needs_string(tok
)) {
209 fprintf(out
, "%s", strbuf
);
211 fprintf(stderr
, "oops, dunno how to handle tt %d (%s)\n", (int) tok
->type
, strbuf
);
215 int parse_file(struct cpp
* cpp
, FILE *f
, const char*, FILE *out
);
216 static int include_file(struct cpp
* cpp
, struct tokenizer
*t
, FILE* out
) {
217 static const char* inc_chars
[] = { "\"", "<", 0};
218 static const char* inc_chars_end
[] = { "\"", ">", 0};
220 tokenizer_set_flags(t
, 0); // disable string tokenization
222 int inc1sep
= expect(t
, TT_SEP
, inc_chars
, &tok
);
224 error("expected one of [\"<]", t
, &tok
);
227 int ret
= tokenizer_read_until(t
, inc_chars_end
[inc1sep
], 1);
229 error("error parsing filename", t
, &tok
);
232 // TODO: different path lookup depending on whether " or <
235 tglist_foreach(&cpp
->includedirs
, i
) {
237 snprintf(buf
, sizeof buf
, "%s/%s", tglist_get(&cpp
->includedirs
, i
), t
->buf
);
242 fprintf(stderr
, "%s: ", t
->buf
);
246 const char *fn
= strdup(t
->buf
);
247 assert(tokenizer_next(t
, &tok
) && is_char(&tok
, inc_chars_end
[inc1sep
][0]));
249 tokenizer_set_flags(t
, TF_PARSE_STRINGS
);
250 return parse_file(cpp
, f
, fn
, out
);
253 static int emit_error_or_warning(struct tokenizer
*t
, int is_error
) {
255 int ret
= tokenizer_skip_chars(t
, " \t", &ws_count
);
257 struct token tmp
= {.column
= t
->column
, .line
= t
->line
};
258 ret
= tokenizer_read_until(t
, "\n", 1);
260 error(t
->buf
, t
, &tmp
);
263 warning(t
->buf
, t
, &tmp
);
267 static FILE *freopen_r(FILE *f
, char **buf
, size_t *size
) {
270 return fmemopen(*buf
, *size
, "r");
273 static int consume_nl_and_ws(struct tokenizer
*t
, struct token
*tok
, int expected
) {
274 if(!x_tokenizer_next(t
, tok
)) {
276 error("unexpected", t
, tok
);
280 if(tok
->type
!= TT_SEP
|| tok
->value
!= expected
) goto err
;
282 case '\\' : expected
= '\n'; break;
283 case '\n' : expected
= 0; break;
286 if(is_whitespace_token(tok
)) ;
287 else if(is_char(tok
, '\\')) expected
= '\n';
290 return consume_nl_and_ws(t
, tok
, expected
);
293 static int expand_macro(struct cpp
*cpp
, struct tokenizer
*t
, FILE* out
, const char* name
, unsigned rec_level
, char *visited
[]);
295 static int parse_macro(struct cpp
*cpp
, struct tokenizer
*t
) {
297 int ret
= tokenizer_skip_chars(t
, " \t", &ws_count
);
299 struct token curr
; //tmp = {.column = t->column, .line = t->line};
300 ret
= tokenizer_next(t
, &curr
) && curr
.type
!= TT_EOF
;
302 error("parsing macro name", t
, &curr
);
305 if(curr
.type
!= TT_IDENTIFIER
) {
306 error("expected identifier", t
, &curr
);
309 const char* macroname
= strdup(t
->buf
);
311 fprintf(stderr
, "parsing macro %s\n", macroname
);
314 if(get_macro(cpp
, macroname
)) {
315 if(!strcmp(macroname
, "defined")) {
316 error("\"defined\" cannot be used as a macro name", t
, &curr
);
322 struct macro
new = { 0 };
323 unsigned macro_flags
= MACRO_FLAG_OBJECTLIKE
;
324 tglist_init(&new.argnames
);
326 ret
= x_tokenizer_next(t
, &curr
) && curr
.type
!= TT_EOF
;
329 if (is_char(&curr
, '(')) {
331 unsigned expected
= 0;
333 /* process next function argument identifier */
334 ret
= consume_nl_and_ws(t
, &curr
, expected
);
336 error("unexpected", t
, &curr
);
340 if(curr
.type
== TT_SEP
) {
348 ret
= tokenizer_skip_chars(t
, " \t", &ws_count
);
352 error("unexpected character", t
, &curr
);
355 } else if(!(curr
.type
== TT_IDENTIFIER
|| curr
.type
== TT_ELLIPSIS
)) {
356 error("expected identifier for macro arg", t
, &curr
);
360 if(curr
.type
== TT_ELLIPSIS
) {
361 if(macro_flags
& MACRO_FLAG_VARIADIC
) {
362 error("\"...\" isn't the last parameter", t
, &curr
);
365 macro_flags
|= MACRO_FLAG_VARIADIC
;
367 char *tmps
= strdup(t
->buf
);
368 tglist_add(&new.argnames
, tmps
);
373 } else if(is_whitespace_token(&curr
)) {
374 ret
= tokenizer_skip_chars(t
, " \t", &ws_count
);
376 } else if(is_char(&curr
, '\n')) {
377 /* content-less macro */
381 struct FILE_container
{
386 contents
.f
= open_memstream(&contents
.buf
, &contents
.len
);
388 int backslash_seen
= 0;
390 /* ignore unknown tokens in macro body */
391 ret
= tokenizer_next(t
, &curr
);
393 if(curr
.type
== TT_EOF
) break;
394 if (curr
.type
== TT_SEP
) {
395 if(curr
.value
== '\\')
398 if(curr
.value
== '\n' && !backslash_seen
) break;
399 emit_token(contents
.f
, &curr
, t
->buf
);
403 emit_token(contents
.f
, &curr
, t
->buf
);
406 new.str_contents
= freopen_r(contents
.f
, &contents
.buf
, &contents
.len
);
407 new.str_contents_buf
= contents
.buf
;
410 struct macro
*old
= get_macro(cpp
, macroname
);
411 char *s_old
= old
->str_contents_buf
? old
->str_contents_buf
: "";
412 char *s_new
= new.str_contents_buf
? new.str_contents_buf
: "";
413 if(strcmp(s_old
, s_new
)) {
415 sprintf(buf
, "redefinition of macro %s", macroname
);
419 new.num_args
|= macro_flags
;
420 add_macro(cpp
, macroname
, &new);
424 static size_t macro_arglist_pos(struct macro
*m
, const char* iden
) {
426 for(i
= 0; i
< tglist_getsize(&m
->argnames
); i
++) {
427 char *item
= tglist_get(&m
->argnames
, i
);
428 if(!strcmp(item
, iden
)) return i
;
441 static int was_visited(const char *name
, char*visited
[], unsigned rec_level
) {
443 for(x
= rec_level
; x
>= 0; --x
) {
444 if(!strcmp(visited
[x
], name
)) return 1;
449 unsigned get_macro_info(struct cpp
* cpp
,
451 struct macro_info
*mi_list
, size_t *mi_cnt
,
452 unsigned nest
, unsigned tpos
, const char *name
,
453 char* visited
[], unsigned rec_level
458 int ret
= tokenizer_next(t
, &tok
);
459 if(!ret
|| tok
.type
== TT_EOF
) break;
461 fprintf(stderr
, "(%s) nest %d, brace %u t: %s\n", name
, nest
, brace_lvl
, t
->buf
);
464 if(tok
.type
== TT_IDENTIFIER
&& (m
= get_macro(cpp
, t
->buf
)) && !was_visited(t
->buf
, visited
, rec_level
)) {
465 const char* newname
= strdup(t
->buf
);
466 if(FUNCTIONLIKE(m
)) {
467 if(tokenizer_peek(t
) == '(') {
468 unsigned tpos_save
= tpos
;
469 tpos
= get_macro_info(cpp
, t
, mi_list
, mi_cnt
, nest
+1, tpos
+1, newname
, visited
, rec_level
);
470 mi_list
[*mi_cnt
] = (struct macro_info
) {
477 /* suppress expansion */
480 mi_list
[*mi_cnt
] = (struct macro_info
) {
487 } else if(is_char(&tok
, '(')) {
489 } else if(is_char(&tok
, ')')) {
491 if(brace_lvl
== 0 && nest
!= 0) break;
498 struct FILE_container
{
505 static void free_file_container(struct FILE_container
*fc
) {
510 static int mem_tokenizers_join(
511 struct FILE_container
* org
, struct FILE_container
*inj
,
512 struct FILE_container
* result
,
513 int first
, off_t lastpos
) {
514 result
->f
= open_memstream(&result
->buf
, &result
->len
);
518 tokenizer_rewind(&org
->t
);
519 for(i
=0; i
<first
; ++i
) {
520 ret
= tokenizer_next(&org
->t
, &tok
);
521 assert(ret
&& tok
.type
!= TT_EOF
);
522 emit_token(result
->f
, &tok
, org
->t
.buf
);
524 int cnt
= 0, last
= first
;
526 ret
= tokenizer_next(&inj
->t
, &tok
);
527 if(!ret
|| tok
.type
== TT_EOF
) break;
528 emit_token(result
->f
, &tok
, inj
->t
.buf
);
531 while(tokenizer_ftello(&org
->t
) < lastpos
) {
532 ret
= tokenizer_next(&org
->t
, &tok
);
536 int diff
= cnt
- ((int) last
- (int) first
);
539 ret
= tokenizer_next(&org
->t
, &tok
);
540 if(!ret
|| tok
.type
== TT_EOF
) break;
541 emit_token(result
->f
, &tok
, org
->t
.buf
);
544 result
->f
= freopen_r(result
->f
, &result
->buf
, &result
->len
);
545 tokenizer_from_file(&result
->t
, result
->f
);
549 static int tchain_parens_follows(struct cpp
*cpp
, int rec_level
) {
551 for(i
=rec_level
;i
>=0;--i
) {
552 c
= tokenizer_peek(cpp
->tchain
[i
]);
553 if(c
== EOF
) continue;
554 if(c
== '(') return i
;
560 static int stringify(struct cpp
*ccp
, struct tokenizer
*t
, FILE* output
) {
565 ret
= tokenizer_next(t
, &tok
);
567 if(tok
.type
== TT_EOF
) break;
568 if(is_char(&tok
, '\n')) continue;
569 if(is_char(&tok
, '\\') && tokenizer_peek(t
) == '\n') continue;
570 if(tok
.type
== TT_DQSTRING_LIT
) {
575 emit(output
, "\\\"");
576 } else if (*s
== '\\') {
577 emit(output
, "\\\\");
585 emit_token(output
, &tok
, t
->buf
);
591 /* rec_level -1 serves as a magic value to signal we're using
592 expand_macro from the if-evaluator code, which means activating
593 the "define" macro */
594 static int expand_macro(struct cpp
* cpp
, struct tokenizer
*t
, FILE* out
, const char* name
, unsigned rec_level
, char* visited
[]) {
595 int is_define
= !strcmp(name
, "defined");
598 if(is_define
&& rec_level
!= -1)
600 else m
= get_macro(cpp
, name
);
605 if(rec_level
== -1) rec_level
= 0;
606 if(rec_level
>= MAX_RECURSION
) {
607 error("max recursion level reached", t
, 0);
611 fprintf(stderr
, "lvl %u: expanding macro %s (%s)\n", rec_level
, name
, m
->str_contents_buf
);
614 if(rec_level
== 0 && strcmp(t
->filename
, "<macro>")) {
615 cpp
->last_file
= t
->filename
;
616 cpp
->last_line
= t
->line
;
618 if(!strcmp(name
, "__FILE__")) {
620 emit(out
, cpp
->last_file
);
623 } else if(!strcmp(name
, "__LINE__")) {
625 sprintf(buf
, "%d", cpp
->last_line
);
630 if(visited
[rec_level
]) free(visited
[rec_level
]);
631 visited
[rec_level
] = strdup(name
);
632 cpp
->tchain
[rec_level
] = t
;
636 unsigned num_args
= MACRO_ARGCOUNT(m
);
637 struct FILE_container
*argvalues
= calloc(MACRO_VARIADIC(m
) ? num_args
+ 1 : num_args
, sizeof(struct FILE_container
));
639 for(i
=0; i
< num_args
; i
++)
640 argvalues
[i
].f
= open_memstream(&argvalues
[i
].buf
, &argvalues
[i
].len
);
642 /* replace named arguments in the contents of the macro call */
643 if(FUNCTIONLIKE(m
)) {
645 if((ret
= tokenizer_peek(t
)) != '(') {
646 /* function-like macro shall not be expanded if not followed by '(' */
647 if(ret
== EOF
&& rec_level
> 0 && (ret
= tchain_parens_follows(cpp
, rec_level
-1)) != -1) {
648 // warning("Replacement text involved subsequent text", t, 0);
649 t
= cpp
->tchain
[ret
];
655 ret
= x_tokenizer_next(t
, &tok
);
656 assert(ret
&& is_char(&tok
, '('));
658 unsigned curr_arg
= 0, need_arg
= 1, parens
= 0;
660 if(!tokenizer_skip_chars(t
, " \t", &ws_count
)) return 0;
663 if(num_args
== 1 && MACRO_VARIADIC(m
)) varargs
= 1;
665 int ret
= tokenizer_next(t
, &tok
);
667 if( tok
.type
== TT_EOF
) {
668 fprintf(stderr
, "warning EOF\n");
671 if(!parens
&& is_char(&tok
, ',') && !varargs
) {
672 if(need_arg
&& !ws_count
) {
673 /* empty argument is OK */
676 if(!varargs
) curr_arg
++;
677 if(curr_arg
+ 1 == num_args
&& MACRO_VARIADIC(m
)) {
679 } else if(curr_arg
>= num_args
) {
680 error("too many arguments for function macro", t
, &tok
);
683 ret
= tokenizer_skip_chars(t
, " \t", &ws_count
);
686 } else if(is_char(&tok
, '(')) {
688 } else if(is_char(&tok
, ')')) {
690 if(curr_arg
+ num_args
&& curr_arg
< num_args
-1) {
691 error("too few args for function macro", t
, &tok
);
697 } else if(is_char(&tok
, '\\')) {
698 if(tokenizer_peek(t
) == '\n') continue;
701 emit_token(argvalues
[curr_arg
].f
, &tok
, t
->buf
);
705 for(i
=0; i
< num_args
; i
++) {
706 argvalues
[i
].f
= freopen_r(argvalues
[i
].f
, &argvalues
[i
].buf
, &argvalues
[i
].len
);
707 tokenizer_from_file(&argvalues
[i
].t
, argvalues
[i
].f
);
709 fprintf(stderr
, "macro argument %i: %s\n", (int) i
, argvalues
[i
].buf
);
714 if(get_macro(cpp
, argvalues
[0].buf
))
720 if(!m
->str_contents
) goto cleanup
;
722 struct FILE_container cwae
= {0}; /* contents_with_args_expanded */
723 cwae
.f
= open_memstream(&cwae
.buf
, &cwae
.len
);
724 FILE* output
= cwae
.f
;
727 tokenizer_from_file(&t2
, m
->str_contents
);
732 ret
= tokenizer_next(&t2
, &tok
);
734 if(tok
.type
== TT_EOF
) break;
735 if(tok
.type
== TT_IDENTIFIER
) {
736 flush_whitespace(output
, &ws_count
);
738 if(MACRO_VARIADIC(m
) && !strcmp(t2
.buf
, "__VA_ARGS__")) {
741 size_t arg_nr
= macro_arglist_pos(m
, id
);
742 if(arg_nr
!= (size_t) -1) {
743 tokenizer_rewind(&argvalues
[arg_nr
].t
);
744 if(hash_count
== 1) ret
= stringify(cpp
, &argvalues
[arg_nr
].t
, output
);
746 ret
= tokenizer_next(&argvalues
[arg_nr
].t
, &tok
);
748 if(tok
.type
== TT_EOF
) break;
749 emit_token(output
, &tok
, argvalues
[arg_nr
].t
.buf
);
753 if(hash_count
== 1) {
755 error("'#' is not followed by macro parameter", &t2
, &tok
);
758 emit_token(output
, &tok
, t2
.buf
);
760 } else if(is_char(&tok
, '#')) {
766 /* in a real cpp we'd need to look for '\\' first */
767 while(tokenizer_peek(&t2
) == '\n') {
768 x_tokenizer_next(&t2
, &tok
);
770 if(tokenizer_peek(&t2
) == '#') x_tokenizer_next(&t2
, &tok
);
773 if(hash_count
== 1) flush_whitespace(output
, &ws_count
);
774 else if(hash_count
> 2) {
775 error("only two '#' characters allowed for macro expansion", &t2
, &tok
);
779 ret
= tokenizer_skip_chars(&t2
, " \t\n", &ws_count
);
781 ret
= tokenizer_skip_chars(&t2
, " \t", &ws_count
);
786 } else if(is_whitespace_token(&tok
)) {
789 if(hash_count
== 1) goto hash_err
;
790 flush_whitespace(output
, &ws_count
);
791 emit_token(output
, &tok
, t2
.buf
);
794 flush_whitespace(output
, &ws_count
);
796 /* we need to expand macros after the macro arguments have been inserted */
798 cwae
.f
= freopen_r(cwae
.f
, &cwae
.buf
, &cwae
.len
);
800 fprintf(stderr
, "contents with args expanded: %s\n", cwae
.buf
);
802 tokenizer_from_file(&cwae
.t
, cwae
.f
);
805 int ret
= tokenizer_next(&cwae
.t
, &tok
);
807 if(tok
.type
== TT_EOF
) break;
808 if(tok
.type
== TT_IDENTIFIER
&& get_macro(cpp
, cwae
.t
.buf
))
812 tokenizer_rewind(&cwae
.t
);
813 struct macro_info
*mcs
= calloc(mac_cnt
, sizeof(struct macro_info
));
816 get_macro_info(cpp
, &cwae
.t
, mcs
, &mac_iter
, 0, 0, "null", visited
, rec_level
);
817 /* some of the macros might not expand at this stage (without braces)*/
818 while(mac_cnt
&& mcs
[mac_cnt
-1].name
== 0)
821 size_t i
; int depth
= 0;
822 for(i
= 0; i
< mac_cnt
; ++i
) {
823 if(mcs
[i
].nest
> depth
) depth
= mcs
[i
].nest
;
826 for(i
= 0; i
< mac_cnt
; ++i
) if(mcs
[i
].nest
== depth
) {
827 struct macro_info
*mi
= &mcs
[i
];
828 tokenizer_rewind(&cwae
.t
);
831 for(j
= 0; j
< mi
->first
+1; ++j
)
832 tokenizer_next(&cwae
.t
, &utok
);
833 struct FILE_container t2
= {0}, tmp
= {0};
834 t2
.f
= open_memstream(&t2
.buf
, &t2
.len
);
835 if(!expand_macro(cpp
, &cwae
.t
, t2
.f
, mi
->name
, rec_level
+1, visited
))
837 t2
.f
= freopen_r(t2
.f
, &t2
.buf
, &t2
.len
);
838 tokenizer_from_file(&t2
.t
, t2
.f
);
839 /* manipulating the stream in case more stuff has been consumed */
840 off_t cwae_pos
= tokenizer_ftello(&cwae
.t
);
841 tokenizer_rewind(&cwae
.t
);
843 fprintf(stderr
, "merging %s with %s\n", cwae
.buf
, t2
.buf
);
845 int diff
= mem_tokenizers_join(&cwae
, &t2
, &tmp
, mi
->first
, cwae_pos
);
846 free_file_container(&cwae
);
847 free_file_container(&t2
);
850 fprintf(stderr
, "result: %s\n", cwae
.buf
);
852 if(diff
== 0) continue;
853 for(j
= 0; j
< mac_cnt
; ++j
) {
855 struct macro_info
*mi2
= &mcs
[j
];
856 /* modified element mi can be either inside, after or before
857 another macro. the after case doesn't affect us. */
858 if(mi
->first
>= mi2
->first
&& mi
->last
<= mi2
->last
) {
861 } else if (mi
->first
< mi2
->first
) {
870 tokenizer_rewind(&cwae
.t
);
873 tokenizer_next(&cwae
.t
, &tok
);
874 if(tok
.type
== TT_EOF
) break;
875 if(tok
.type
== TT_IDENTIFIER
&& tokenizer_peek(&cwae
.t
) == EOF
&&
876 (ma
= get_macro(cpp
, cwae
.t
.buf
)) && FUNCTIONLIKE(ma
) && tchain_parens_follows(cpp
, rec_level
) != -1
878 int ret
= expand_macro(cpp
, &cwae
.t
, out
, cwae
.t
.buf
, rec_level
+1, visited
);
881 emit_token(out
, &tok
, cwae
.t
.buf
);
886 free_file_container(&cwae
);
889 for(i
=0; i
< num_args
; i
++) {
890 fclose(argvalues
[i
].f
);
891 free(argvalues
[i
].buf
);
897 #define TT_LAND TT_CUSTOM+0
898 #define TT_LOR TT_CUSTOM+1
899 #define TT_LTE TT_CUSTOM+2
900 #define TT_GTE TT_CUSTOM+3
901 #define TT_SHL TT_CUSTOM+4
902 #define TT_SHR TT_CUSTOM+5
903 #define TT_EQ TT_CUSTOM+6
904 #define TT_NEQ TT_CUSTOM+7
905 #define TT_LT TT_CUSTOM+8
906 #define TT_GT TT_CUSTOM+9
907 #define TT_BAND TT_CUSTOM+10
908 #define TT_BOR TT_CUSTOM+11
909 #define TT_XOR TT_CUSTOM+12
910 #define TT_NEG TT_CUSTOM+13
911 #define TT_PLUS TT_CUSTOM+14
912 #define TT_MINUS TT_CUSTOM+15
913 #define TT_MUL TT_CUSTOM+16
914 #define TT_DIV TT_CUSTOM+17
915 #define TT_MOD TT_CUSTOM+18
916 #define TT_LPAREN TT_CUSTOM+19
917 #define TT_RPAREN TT_CUSTOM+20
918 #define TT_LNOT TT_CUSTOM+21
920 #define TTINT(X) X-TT_CUSTOM
921 #define TTENT(X, Y) [TTINT(X)] = Y
923 static int bp(int tokentype
) {
924 static const int bplist
[] = {
925 TTENT(TT_LOR
, 1 << 4),
926 TTENT(TT_LAND
, 1 << 5),
927 TTENT(TT_BOR
, 1 << 6),
928 TTENT(TT_XOR
, 1 << 7),
929 TTENT(TT_BAND
, 1 << 8),
930 TTENT(TT_EQ
, 1 << 9),
931 TTENT(TT_NEQ
, 1 << 9),
932 TTENT(TT_LTE
, 1 << 10),
933 TTENT(TT_GTE
, 1 << 10),
934 TTENT(TT_LT
, 1 << 10),
935 TTENT(TT_GT
, 1 << 10),
936 TTENT(TT_SHL
, 1 << 11),
937 TTENT(TT_SHR
, 1 << 11),
938 TTENT(TT_PLUS
, 1 << 12),
939 TTENT(TT_MINUS
, 1 << 12),
940 TTENT(TT_MUL
, 1 << 13),
941 TTENT(TT_DIV
, 1 << 13),
942 TTENT(TT_MOD
, 1 << 13),
943 TTENT(TT_NEG
, 1 << 14),
944 TTENT(TT_LNOT
, 1 << 14),
945 TTENT(TT_LPAREN
, 1 << 15),
946 // TTENT(TT_RPAREN, 1 << 15),
947 // TTENT(TT_LPAREN, 0),
950 if(TTINT(tokentype
) < sizeof(bplist
)/sizeof(bplist
[0])) return bplist
[TTINT(tokentype
)];
954 static int expr(struct tokenizer
*t
, int rbp
, int *err
);
956 static int charlit_to_int(const char *lit
) {
957 if(lit
[1] == '\\') switch(lit
[2]) {
962 case 'x': return strtol(lit
+3, NULL
, 16);
963 default: return lit
[2];
968 static int nud(struct tokenizer
*t
, struct token
*tok
, int *err
) {
969 switch((unsigned) tok
->type
) {
970 case TT_IDENTIFIER
: return 0;
971 case TT_WIDECHAR_LIT
:
972 case TT_SQSTRING_LIT
: return charlit_to_int(t
->buf
);
976 return strtol(t
->buf
, NULL
, 0);
977 case TT_NEG
: return ~ expr(t
, bp(tok
->type
), err
);
978 case TT_PLUS
: return expr(t
, bp(tok
->type
), err
);
979 case TT_MINUS
: return - expr(t
, bp(tok
->type
), err
);
980 case TT_LNOT
: return !expr(t
, bp(tok
->type
), err
);
982 int inner
= expr(t
, 0, err
);
983 if(0!=expect(t
, TT_RPAREN
, (const char*[]){")", 0}, tok
)) {
984 error("missing ')'", t
, tok
);
990 error("floating constant in preprocessor expression", t
, tok
);
995 error("unexpected token", t
, tok
);
1001 static int led(struct tokenizer
*t
, int left
, struct token
*tok
, int *err
) {
1003 switch((unsigned) tok
->type
) {
1006 right
= expr(t
, bp(tok
->type
), err
);
1007 if(tok
->type
== TT_LAND
) return left
&& right
;
1008 return left
|| right
;
1009 case TT_LTE
: return left
<= expr(t
, bp(tok
->type
), err
);
1010 case TT_GTE
: return left
>= expr(t
, bp(tok
->type
), err
);
1011 case TT_SHL
: return left
<< expr(t
, bp(tok
->type
), err
);
1012 case TT_SHR
: return left
>> expr(t
, bp(tok
->type
), err
);
1013 case TT_EQ
: return left
== expr(t
, bp(tok
->type
), err
);
1014 case TT_NEQ
: return left
!= expr(t
, bp(tok
->type
), err
);
1015 case TT_LT
: return left
< expr(t
, bp(tok
->type
), err
);
1016 case TT_GT
: return left
> expr(t
, bp(tok
->type
), err
);
1017 case TT_BAND
: return left
& expr(t
, bp(tok
->type
), err
);
1018 case TT_BOR
: return left
| expr(t
, bp(tok
->type
), err
);
1019 case TT_XOR
: return left
^ expr(t
, bp(tok
->type
), err
);
1020 case TT_PLUS
: return left
+ expr(t
, bp(tok
->type
), err
);
1021 case TT_MINUS
:return left
- expr(t
, bp(tok
->type
), err
);
1022 case TT_MUL
: return left
* expr(t
, bp(tok
->type
), err
);
1025 right
= expr(t
, bp(tok
->type
), err
);
1027 error("eval: div by zero", t
, tok
);
1030 else if(tok
->type
== TT_DIV
) return left
/ right
;
1031 else if(tok
->type
== TT_MOD
) return left
% right
;
1034 error("eval: unexpect token", t
, tok
);
1041 static int tokenizer_peek_next_non_ws(struct tokenizer
*t
, struct token
*tok
)
1045 ret
= tokenizer_peek_token(t
, tok
);
1046 if(is_whitespace_token(tok
))
1047 x_tokenizer_next(t
, tok
);
1053 static int expr(struct tokenizer
*t
, int rbp
, int*err
) {
1055 int ret
= skip_next_and_ws(t
, &tok
);
1056 if(tok
.type
== TT_EOF
) return 0;
1057 int left
= nud(t
, &tok
, err
);
1059 ret
= tokenizer_peek_next_non_ws(t
, &tok
);
1060 if(bp(tok
.type
) <= rbp
) break;
1061 ret
= tokenizer_next(t
, &tok
);
1062 if(tok
.type
== TT_EOF
) break;
1063 left
= led(t
, left
, &tok
, err
);
1069 static int do_eval(struct tokenizer
*t
, int *result
) {
1070 tokenizer_register_custom_token(t
, TT_LAND
, "&&");
1071 tokenizer_register_custom_token(t
, TT_LOR
, "||");
1072 tokenizer_register_custom_token(t
, TT_LTE
, "<=");
1073 tokenizer_register_custom_token(t
, TT_GTE
, ">=");
1074 tokenizer_register_custom_token(t
, TT_SHL
, "<<");
1075 tokenizer_register_custom_token(t
, TT_SHR
, ">>");
1076 tokenizer_register_custom_token(t
, TT_EQ
, "==");
1077 tokenizer_register_custom_token(t
, TT_NEQ
, "!=");
1079 tokenizer_register_custom_token(t
, TT_LT
, "<");
1080 tokenizer_register_custom_token(t
, TT_GT
, ">");
1082 tokenizer_register_custom_token(t
, TT_BAND
, "&");
1083 tokenizer_register_custom_token(t
, TT_BOR
, "|");
1084 tokenizer_register_custom_token(t
, TT_XOR
, "^");
1085 tokenizer_register_custom_token(t
, TT_NEG
, "~");
1087 tokenizer_register_custom_token(t
, TT_PLUS
, "+");
1088 tokenizer_register_custom_token(t
, TT_MINUS
, "-");
1089 tokenizer_register_custom_token(t
, TT_MUL
, "*");
1090 tokenizer_register_custom_token(t
, TT_DIV
, "/");
1091 tokenizer_register_custom_token(t
, TT_MOD
, "%");
1093 tokenizer_register_custom_token(t
, TT_LPAREN
, "(");
1094 tokenizer_register_custom_token(t
, TT_RPAREN
, ")");
1095 tokenizer_register_custom_token(t
, TT_LNOT
, "!");
1098 *result
= expr(t
, 0, &err
);
1100 fprintf(stderr
, "eval result: %d\n", *result
);
1105 static int evaluate_condition(struct cpp
*cpp
, struct tokenizer
*t
, int *result
, char *visited
[]) {
1106 int ret
, backslash_seen
= 0;
1110 int tflags
= tokenizer_get_flags(t
);
1111 tokenizer_set_flags(t
, tflags
| TF_PARSE_WIDE_STRINGS
);
1112 ret
= tokenizer_next(t
, &curr
);
1113 if(!ret
) return ret
;
1114 if(!is_whitespace_token(&curr
)) {
1115 error("expected whitespace after if/elif", t
, &curr
);
1118 FILE *f
= open_memstream(&bufp
, &size
);
1120 ret
= tokenizer_next(t
, &curr
);
1121 if(!ret
) return ret
;
1122 if(curr
.type
== TT_IDENTIFIER
) {
1123 if(!expand_macro(cpp
, t
, f
, t
->buf
, -1, visited
)) return 0;
1124 } else if(curr
.type
== TT_SEP
) {
1125 if(curr
.value
== '\\')
1128 if(curr
.value
== '\n') {
1129 if(!backslash_seen
) break;
1131 emit_token(f
, &curr
, t
->buf
);
1136 emit_token(f
, &curr
, t
->buf
);
1139 f
= freopen_r(f
, &bufp
, &size
);
1140 if(!f
|| size
== 0) {
1141 error("#(el)if with no expression", t
, &curr
);
1145 fprintf(stderr
, "evaluating condition %s\n", bufp
);
1147 struct tokenizer t2
;
1148 tokenizer_from_file(&t2
, f
);
1149 ret
= do_eval(&t2
, result
);
1152 tokenizer_set_flags(t
, tflags
);
1156 static void free_visited(char *visited
[]) {
1158 for(i
=0; i
< MAX_RECURSION
; i
++)
1159 if(visited
[i
]) free(visited
[i
]);
1163 int parse_file(struct cpp
*cpp
, FILE *f
, const char *fn
, FILE *out
) {
1166 tokenizer_init(&t
, f
, TF_PARSE_STRINGS
);
1167 tokenizer_set_filename(&t
, fn
);
1168 tokenizer_register_marker(&t
, MT_MULTILINE_COMMENT_START
, "/*"); /**/
1169 tokenizer_register_marker(&t
, MT_MULTILINE_COMMENT_END
, "*/");
1170 tokenizer_register_marker(&t
, MT_SINGLELINE_COMMENT_START
, "//");
1171 int ret
, newline
=1, ws_count
= 0;
1173 int if_level
= 0, if_level_active
= 0, if_level_satisfied
= 0;
1175 #define all_levels_active() (if_level_active == if_level)
1176 #define prev_level_active() (if_level_active == if_level-1)
1177 #define set_level(X, V) do { \
1178 if(if_level_active > X) if_level_active = X; \
1179 if(if_level_satisfied > X) if_level_satisfied = X; \
1181 if(V) if_level_active = X; \
1182 else if(if_level_active == X) if_level_active = X-1; \
1183 if(V && if_level_active == X) if_level_satisfied = X; \
1187 #define skip_conditional_block (if_level > if_level_active)
1189 static const char* directives
[] = {"include", "error", "warning", "define", "undef", "if", "elif", "else", "ifdef", "ifndef", "endif", "line", "pragma", 0};
1190 while((ret
= tokenizer_next(&t
, &curr
)) && curr
.type
!= TT_EOF
) {
1191 newline
= curr
.column
== 0;
1193 ret
= eat_whitespace(&t
, &curr
, &ws_count
);
1194 if(!ret
) return ret
;
1196 if(curr
.type
== TT_EOF
) break;
1197 if(skip_conditional_block
&& !(newline
&& is_char(&curr
, '#'))) continue;
1198 if(is_char(&curr
, '#')) {
1200 error("stray #", &t
, &curr
);
1203 int index
= expect(&t
, TT_IDENTIFIER
, directives
, &curr
);
1205 if(skip_conditional_block
) continue;
1206 error("invalid preprocessing directive", &t
, &curr
);
1209 if(skip_conditional_block
) switch(index
) {
1210 case 0: case 1: case 2: case 3: case 4:
1216 ret
= include_file(cpp
, &t
, out
);
1217 if(!ret
) return ret
;
1220 ret
= emit_error_or_warning(&t
, 1);
1221 if(!ret
) return ret
;
1224 ret
= emit_error_or_warning(&t
, 0);
1225 if(!ret
) return ret
;
1228 ret
= parse_macro(cpp
, &t
);
1229 if(!ret
) return ret
;
1232 if(!skip_next_and_ws(&t
, &curr
)) return 0;
1233 if(curr
.type
!= TT_IDENTIFIER
) {
1234 error("expected identifier", &t
, &curr
);
1237 undef_macro(cpp
, t
.buf
);
1240 if(all_levels_active()) {
1241 char* visited
[MAX_RECURSION
] = {0};
1242 if(!evaluate_condition(cpp
, &t
, &ret
, visited
)) return 0;
1243 free_visited(visited
);
1244 set_level(if_level
+ 1, ret
);
1246 set_level(if_level
+ 1, 0);
1250 if(prev_level_active() && if_level_satisfied
< if_level
) {
1251 char* visited
[MAX_RECURSION
] = {0};
1252 if(!evaluate_condition(cpp
, &t
, &ret
, visited
)) return 0;
1253 free_visited(visited
);
1255 if_level_active
= if_level
;
1256 if_level_satisfied
= if_level
;
1258 } else if(if_level_active
== if_level
) {
1263 if(prev_level_active() && if_level_satisfied
< if_level
) {
1265 if_level_active
= if_level
;
1266 if_level_satisfied
= if_level
;
1268 } else if(if_level_active
== if_level
) {
1274 if(!skip_next_and_ws(&t
, &curr
) || curr
.type
== TT_EOF
) return 0;
1275 ret
= !!get_macro(cpp
, t
.buf
);
1276 if(index
== 9) ret
= !ret
;
1278 if(all_levels_active()) {
1279 set_level(if_level
+ 1, ret
);
1281 set_level(if_level
+ 1, 0);
1285 set_level(if_level
-1, -1);
1288 ret
= tokenizer_read_until(&t
, "\n", 1);
1290 error("unknown", &t
, &curr
);
1295 emit(out
, "#pragma");
1296 while((ret
= x_tokenizer_next(&t
, &curr
)) && curr
.type
!= TT_EOF
) {
1297 emit_token(out
, &curr
, t
.buf
);
1298 if(is_char(&curr
, '\n')) break;
1300 if(!ret
) return ret
;
1313 fprintf(stderr
, "(stdin:%u,%u) ", curr
.line
, curr
.column
);
1314 if(curr
.type
== TT_SEP
)
1315 fprintf(stderr
, "separator: %c\n", curr
.value
== '\n'? ' ' : curr
.value
);
1317 fprintf(stderr
, "%s: %s\n", tokentype_to_str(curr
.type
), t
.buf
);
1319 if(curr
.type
== TT_IDENTIFIER
) {
1320 char* visited
[MAX_RECURSION
] = {0};
1321 if(!expand_macro(cpp
, &t
, out
, t
.buf
, 0, visited
))
1323 free_visited(visited
);
1325 emit_token(out
, &curr
, t
.buf
);
1329 error("unterminated #if", &t
, &curr
);
1335 struct cpp
* cpp_new(void) {
1336 struct cpp
* ret
= calloc(1, sizeof(struct cpp
));
1337 if(!ret
) return ret
;
1338 tglist_init(&ret
->includedirs
);
1339 cpp_add_includedir(ret
, ".");
1340 ret
->macros
= hbmap_new(strptrcmp
, string_hash
, 128);
1341 struct macro m
= {.num_args
= 1};
1342 add_macro(ret
, strdup("defined"), &m
);
1343 m
.num_args
= MACRO_FLAG_OBJECTLIKE
;
1344 add_macro(ret
, strdup("__FILE__"), &m
);
1345 add_macro(ret
, strdup("__LINE__"), &m
);
1349 void cpp_free(struct cpp
*cpp
) {
1351 tglist_free_values(&cpp
->includedirs
);
1352 tglist_free_items(&cpp
->includedirs
);
1355 void cpp_add_includedir(struct cpp
*cpp
, const char* includedir
) {
1356 tglist_add(&cpp
->includedirs
, strdup(includedir
));
1359 int cpp_add_define(struct cpp
*cpp
, const char *mdecl
) {
1360 struct FILE_container tmp
= {0};
1361 tmp
.f
= open_memstream(&tmp
.buf
, &tmp
.len
);
1362 fprintf(tmp
.f
, "%s\n", mdecl
);
1363 tmp
.f
= freopen_r(tmp
.f
, &tmp
.buf
, &tmp
.len
);
1364 tokenizer_from_file(&tmp
.t
, tmp
.f
);
1365 int ret
= parse_macro(cpp
, &tmp
.t
);
1366 free_file_container(&tmp
);
1370 int cpp_run(struct cpp
*cpp
, FILE* in
, FILE* out
, const char* inname
) {
1372 setvbuf(out
, NULL
, _IONBF
, 0);
1374 return parse_file(cpp
, in
, inname
, out
);