10 #include "sys/types.h"
14 #define CONFIG_INCLUDE_DIRS "/usr/include"
16 //#define trace(...) fprintf(stderr, __VA_ARGS__)
19 // A value from an expression
25 void cpp_delete_macro(struct cpp
*cpp
, struct macro
*m
);
26 void cpp_lex(struct cpp
*cpp
);
28 int cpp_get_col(struct cpp
*cpp
, char *p
)
30 return p
+ 1 - cpp
->line_buf
;
33 #define cpp_error(cpp, p, ...) cpp_message(cpp, p, NULL, "error: " __VA_ARGS__)
34 #define cpp_warning(cpp, p, ...) cpp_message(cpp, p, NULL, "warning: " __VA_ARGS__)
35 #define cpp_error_loc(cpp, sloc, ...) cpp_message(cpp, NULL, sloc, "error: " __VA_ARGS__)
36 #define cpp_warning_loc(cpp, sloc, ...) cpp_message(cpp, NULL, sloc, "warning: " __VA_ARGS__)
37 void cpp_message(struct cpp
*cpp
, char *p
, struct sloc
*sloc
, const char *fmt
, ...)
42 my_sloc
= cpp
->line_loc
;
43 my_sloc
.col
= cpp_get_col(cpp
, p
);
46 fprintf(stderr
, "%s:%d: ",
50 // NOTE: don't print col number, because after macro substitution,
51 // the column number becomes incorrect. Fix=???
53 vfprintf(stderr
, fmt
, ap
);
58 void cpp_init(struct cpp
*cpp
)
60 cpp
->include_top
= NULL
;
61 cpp
->stale_files
= NULL
;
64 cpp
->include_dirs
= estrdup(CONFIG_INCLUDE_DIRS
);
67 void cpp_delete(struct cpp
*cpp
)
69 struct incfile
*incf
, *incf_prev
;
70 incf
= cpp
->include_top
;
72 incf_prev
= incf
->prev
;
76 incf
= cpp
->stale_files
;
78 incf_prev
= incf
->prev
;
83 free(cpp
->include_dirs
);
85 cpp_delete_macro(cpp
, cpp
->macros
);
89 struct macro
*cpp_create_macro(const char *name
)
92 m
= emalloc(sizeof(struct macro
) + strlen(name
));
97 strcpy(m
->name
, name
);
101 struct macro
*cpp_create_push_macro(struct cpp
*cpp
, const char *name
)
104 m
= cpp_create_macro(name
);
105 m
->next
= cpp
->macros
;
110 struct macro
*cpp_find_macro(struct cpp
*cpp
, const char *name
)
114 while (m
&& strcmp(m
->name
, name
)){
120 struct macro
*cpp_find_macro_arg(struct macro
*m
, const char *name
)
124 while (m_arg
&& strcmp(m_arg
->name
, name
)){
130 static int macro_count_args(struct macro
*m
)
143 void cpp_delete_macro(struct cpp
*cpp
, struct macro
*m
)
145 struct macro
*m_i
, *m_arg
, *m_arg_next
;
148 cpp
->macros
= m
->next
;
150 while (m_i
&& m_i
->next
!= m
){
159 m_arg_next
= m_arg
->next
;
169 // push a file on the include stack
170 void cpp_include_file(struct cpp
*cpp
, const char *name
, FILE *f
, bool must_close
)
172 struct incfile
*incf
;
173 incf
= emalloc(sizeof(struct incfile
) + strlen(name
));
175 strcpy(incf
->name
, name
);
177 incf
->must_close
= must_close
;
179 incf
->cond_d
= incf
->cond_td
= 0;
180 // push on include stack
181 incf
->prev
= cpp
->include_top
;
182 cpp
->include_top
= incf
;
185 static bool get_dir(char **dirlist
, char **output
)
188 char *p
= strchrnul(*dirlist
, ':');
189 strdncpy(output
, *dirlist
, p
- *dirlist
);
197 // open a file and push it on the include stack
198 bool cpp_open_include_file(struct cpp
*cpp
, const char *name
)
201 char *dirlist
, *attempt_file
= NULL
;
202 struct stat stat_struct
;
204 // try current directory
205 if (!stat(name
, &stat_struct
)){
206 f
= fopen(name
, "r");
208 cpp_include_file(cpp
, name
, f
, true);
213 dirlist
= cpp
->include_dirs
;
214 while (get_dir(&dirlist
, &attempt_file
)){
215 strdcatc(&attempt_file
, '/');
216 strdcat(&attempt_file
, name
);
217 if (!stat(attempt_file
, &stat_struct
)){
218 f
= fopen(attempt_file
, "r");
220 cpp_include_file(cpp
, attempt_file
, f
, true);
229 void cpp_pop_include(struct cpp
*cpp
)
231 struct incfile
*incf
;
232 incf
= cpp
->include_top
;
234 cpp
->include_top
= incf
->prev
;
235 incf
->prev
= cpp
->stale_files
;
236 cpp
->stale_files
= incf
;
237 if (incf
->must_close
== true){
243 void cpp_clear_line(struct cpp
*cpp
)
246 strdcpy(&cpp
->line_buf
, "");
249 void cpp_delete_line(struct cpp
*cpp
)
251 // TODO: can we, erm, keep the buffer?!
253 cpp
->line_buf
= NULL
;
256 void cpp_read_line(struct cpp
*cpp
)
258 struct incfile
*incf
;
259 char **pstr
= &cpp
->line_buf
;
262 incf
= cpp
->include_top
;
263 if (incf
&& incf
->eof
){
264 cpp_pop_include(cpp
);
265 incf
= cpp
->include_top
;
273 } else if (ch
== '\\'){
274 // backslashed newline?
279 strdcatc(pstr
, ' '); // insert a space to ensure token separation
280 ch
= '@'; // this could be anything
283 strdcatc(pstr
, '\\');
286 } else if (ch
!= '\n'){
289 } while (ch
!= EOF
&& ch
!= '\n');
291 cpp
->line_loc
.name
= incf
->name
;
292 cpp
->line_loc
.line
= incf
->line
;
294 cpp_delete_line(cpp
);
298 // read another line and stick it on the end of the buffer
299 void cpp_append_line(struct cpp
*cpp
)
302 old_buf
= cpp
->line_buf
;
303 cpp
->line_buf
= NULL
;
306 strdcatc(&old_buf
, ' ');
307 strdcat(&old_buf
, cpp
->line_buf
);
310 cpp
->line_buf
= old_buf
;
313 static void white(char **p
, char *set
)
315 while (**p
&& strchr(set
, **p
)){
321 bool cpp_lex_number(struct cpp
*cpp
, char **p_start
, char **output
)
326 // octal or hexadecimal
328 if (tolower(*p
) == 'x'){
331 while (isdigit(*p
) || (tolower(*p
) >= 'a' && tolower(*p
) <= 'f')){
337 while (*p
>= '0' && *p
<= '7'){
341 } else if (*p
>= '0' && *p
<= '9'){
343 while (*p
>= '0' && *p
<= '9'){
349 while (*p
>= '0' && *p
<= '9'){
353 if (tolower(*p
) == 'e'){
356 while (*p
>= '0' && *p
<= '9'){
361 // not a number at all
364 strdncat(output
, *p_start
, p
- *p_start
);
369 // read a string literal
370 bool cpp_lex_string(struct cpp
*cpp
, char *quotes
, char **p_start
, char **output
)
372 char *p
= *p_start
, quote
;
374 if (*p
&& strchr(quotes
, *p
)){
376 p_last_start
= *p_start
;
381 while (*p
&& *p
!= quote
){
383 if (p
[1] == quote
|| p
[1] == '\\'){
384 // escaped quotation mark or backslash
387 strdncat(output
, p_last_start
, p
- p_last_start
);
398 cpp_error(cpp
, *p_start
, "unterminated %s literal",
399 (quote
== '\'') ? "character" : "string");
404 strdncat(output
, p_last_start
, p
- p_last_start
);
413 // read an identifier
414 bool cpp_lex_ident(struct cpp
*cpp
, char **p_start
, char **output
)
417 if (*p
&& (isalpha(*p
) || *p
== '_')){
418 while (*p
&& (isalnum(*p
) || *p
== '_')){
421 strdncpy(output
, *p_start
, p
- *p_start
);
429 bool cpp_muted(struct cpp
*cpp
)
431 struct incfile
*incf
;
432 incf
= cpp
->include_top
;
434 if (incf
->cond_d
> incf
->cond_td
){
441 void cpp_parse_include(struct cpp
*cpp
, char *p
, char *p_start
)
443 char *inc_str
= NULL
, *p_str_start
;
446 if (cpp_lex_string(cpp
, "<\"", &p
, &inc_str
)){
449 cpp_warning(cpp
, p
, "junk at end of #include directive");
451 if (strlen(inc_str
) == 2){
452 cpp_error(cpp
, p_str_start
, "empty filename in #include");
454 inc_str
[strlen(inc_str
) - 1] = '\0';
455 if (!cpp_open_include_file(cpp
, inc_str
+ 1)){
456 cpp_error(cpp
, p_str_start
, "%s: no such file or directory", inc_str
+ 1);
462 cpp_error(cpp
, p_start
, "#include expects \"FILENAME\" or <FILENAME>");
466 bool cpp_parse_macro_param(struct cpp
*cpp
, char **p
, struct macro
*m
, struct macro
***p_next
)
468 char *param_name
= NULL
;
473 arg_loc
= cpp
->line_loc
;
474 arg_loc
.col
= cpp_get_col(cpp
, *p
);
475 if (cpp_lex_ident(cpp
, p
, ¶m_name
)){
476 m_arg
= cpp_find_macro_arg(m
, param_name
);
478 cpp_error_loc(cpp
, &arg_loc
, "repeated macro parameter \"%s\"", param_name
);
480 m_arg
= cpp_create_macro(param_name
);
482 *p_next
= &m_arg
->next
;
492 void cpp_parse_define(struct cpp
*cpp
, char *p
, char *p_start
)
494 char *macro_name
= NULL
;
495 struct macro
*m
, **p_next
;
496 struct sloc name_sloc
;
500 name_sloc
= cpp
->line_loc
;
501 name_sloc
.col
= cpp_get_col(cpp
, p
);
502 if (cpp_lex_ident(cpp
, &p
, ¯o_name
)){
503 m
= cpp_find_macro(cpp
, macro_name
);
505 cpp_warning_loc(cpp
, &name_sloc
, "\"%s\" redefined", macro_name
);
506 cpp_warning_loc(cpp
, &m
->sloc
, "previous definition was here");
507 cpp_delete_macro(cpp
, m
);
509 m
= cpp_create_push_macro(cpp
, macro_name
);
513 // macro has parameters
520 if (!strncmp(p
, "...", 3)){
522 struct macro
*m_arg
= cpp_create_macro("__VA_ARGS__");
524 p_next
= &m_arg
->next
;
528 } else if (!cpp_parse_macro_param(cpp
, &p
, m
, &p_next
)){
532 } while ((*p
== ',') ? (
541 cpp_error(cpp
, p
, "missing ')' in macro parameter list");
545 strdcpy(&m
->text
, p
);
548 cpp_delete_macro(cpp
, m
);
551 cpp_error(cpp
, p_start
, "no macro name given in #define directive");
555 void cpp_parse_undef(struct cpp
*cpp
, char *p
, char *p_start
)
560 if (cpp_lex_ident(cpp
, &p
, &tok
)){
563 cpp_warning(cpp
, p
, "junk at end of #undef directive");
565 m
= cpp_find_macro(cpp
, tok
);
567 cpp_delete_macro(cpp
, m
);
572 cpp_error(cpp
, p_start
, "no macro name given in #undef directive");
576 void cpp_do_condition(struct cpp
*cpp
, bool condition
)
578 if (!cpp_muted(cpp
) && condition
){
579 cpp
->include_top
->cond_td
++;
581 cpp
->include_top
->cond_d
++;
585 void cpp_parse_ifdef(struct cpp
*cpp
, char *p
, char *p_start
)
589 if (cpp_lex_ident(cpp
, &p
, &tok
)){
592 cpp_warning(cpp
, p
, "junk at end of #ifdef directive");
594 cpp_do_condition(cpp
, !!cpp_find_macro(cpp
, tok
));
597 cpp_error(cpp
, p_start
, "no macro name given in #ifdef directive");
601 // parse a C number - oct, hex or dec
602 static void parse_cnumber(const char *str
, struct exprval
*presult
)
604 unsigned long long result
= 0;
607 // octal or hexadecimal
609 if (tolower(*p
) == 'x'){
612 while (isdigit(*p
) || (tolower(*p
) >= 'a' && tolower(*p
) <= 'f')){
614 result
= (result
<< 4) | (*p
- '0');
616 result
= (result
<< 4) | ((tolower(*p
) - 'a') + 10);
623 while (*p
>= '0' && *p
<= '7'){
624 result
= (result
<< 3) | (*p
- '0');
628 } else if (*p
>= '0' && *p
<= '9'){
630 while (*p
>= '0' && *p
<= '9'){
631 result
= (result
* 10) + (*p
- '0');
637 while (*p
>= '0' && *p
<= '9'){
638 // TODO: use value (floats, etc.)
642 if (tolower(*p
) == 'e'){
645 while (*p
>= '0' && *p
<= '9'){
646 // TODO: use value (floats, etc.)
651 presult
->value
= result
;
654 void cpp_factor(struct cpp
*cpp
, char **p
, struct exprval
*result
)
656 char *factor_str
= NULL
;
658 if(cpp_lex_number(cpp
, p
, &factor_str
)){
660 parse_cnumber(factor_str
, result
);
662 } else if (**p
== '!'){
665 cpp_factor(cpp
, p
, result
);
666 result
->value
= !result
->value
;
667 } else if (!strncmp(*p
, "defined", 7)){
668 // eg. #if defined(FOO)
669 char *macro_name
= NULL
;
674 cpp_error(cpp
, *p
, "'(' expected");
679 if (cpp_lex_ident(cpp
, p
, ¯o_name
)){
680 m
= cpp_find_macro(cpp
, macro_name
);
684 cpp_error(cpp
, *p
, "identifier (macro name, for \"defined\") expected");
688 cpp_error(cpp
, *p
, "')' expected");
693 cpp_error(cpp
, *p
, "expression expected");
697 void cpp_and_expr(struct cpp
*cpp
, char **p
, struct exprval
*result
)
699 cpp_factor(cpp
, p
, result
);
701 while ((*p
)[0] == '&' && (*p
)[1] == '&'){
705 cpp_factor(cpp
, p
, &rhs
);
707 result
->value
= result
->value
&& rhs
.value
;
711 void cpp_or_expr(struct cpp
*cpp
, char **p
, struct exprval
*result
)
713 cpp_and_expr(cpp
, p
, result
);
715 while ((*p
)[0] == '|' && (*p
)[1] == '|'){
719 cpp_and_expr(cpp
, p
, &rhs
);
721 result
->value
= result
->value
|| rhs
.value
;
725 void cpp_expr(struct cpp
*cpp
, char **p
, struct exprval
*result
)
727 cpp_or_expr(cpp
, p
, result
);
730 void cpp_parse_if(struct cpp
*cpp
, char *p
, char *p_start
)
732 struct exprval result
;
734 cpp_expr(cpp
, &p
, &result
);
735 cpp_do_condition(cpp
, !!result
.value
);
738 void cpp_parse_ifndef(struct cpp
*cpp
, char *p
, char *p_start
)
742 if (cpp_lex_ident(cpp
, &p
, &tok
)){
745 cpp_warning(cpp
, p
, "junk at end of #ifdef directive");
747 cpp_do_condition(cpp
, !cpp_find_macro(cpp
, tok
));
750 cpp_error(cpp
, p_start
, "no macro name given in #ifdef directive");
754 void cpp_parse_else(struct cpp
*cpp
, char *p
, char *p_start
)
757 if (!cpp_muted(cpp
) && *p
){
758 cpp_warning(cpp
, p
, "junk at end of #else directive");
760 if (cpp
->include_top
->cond_d
== 0){
761 cpp_error(cpp
, p_start
, "#else without matching #if or #ifdef");
763 if (cpp
->include_top
->cond_d
== cpp
->include_top
->cond_td
+ 1){
765 cpp
->include_top
->cond_td
++;
766 } else if (!cpp_muted(cpp
)){
768 cpp
->include_top
->cond_td
--;
771 cpp_delete_line(cpp
);
774 void cpp_parse_endif(struct cpp
*cpp
, char *p
, char *p_start
)
777 if (!cpp_muted(cpp
) && *p
){
778 cpp_warning(cpp
, p
, "junk at end of #endif directive");
780 if (cpp
->include_top
->cond_d
== 0){
781 cpp_error(cpp
, p_start
, "#endif without matching #if or #ifdef");
783 if (!cpp_muted(cpp
)){
784 cpp
->include_top
->cond_td
--;
786 cpp
->include_top
->cond_d
--;
789 cpp_delete_line(cpp
);
795 bool conditional_related
;
797 void (* func
)(struct cpp
*cpp
, char *p
, char *p_start
);
799 {false, "define", cpp_parse_define
},
800 {false, "undef", cpp_parse_undef
},
801 {false, "include", cpp_parse_include
},
802 {true, "if", cpp_parse_if
},
803 {true, "ifdef", cpp_parse_ifdef
},
804 {true, "ifndef", cpp_parse_ifndef
},
805 {true, "else", cpp_parse_else
},
806 {true, "endif", cpp_parse_endif
},
809 bool cpp_macro_args(struct cpp
*cpp
, char **line_buf
, char **p
, struct macro
*m
)
813 int n
, depth
, p_n
, p_start_n
;
817 return false; // give up
828 if (!strcmp(m_arg
->name
, "__VA_ARGS__")){
829 // XXX: it's silly having this here - it's almost the same
830 // as the one further down
831 while (**p
!= '\0' && depth
> 0){
832 if (cpp_lex_string(cpp
, "\"'", p
, NULL
)){
833 // it's a string - let's not interpret '(' and ')' in strings!
834 } else if (**p
== '('){
836 } else if (**p
== ')'){
845 while (**p
!= '\0' && depth
> 0 && (**p
!= ',' || depth
> 1)){
846 if (cpp_lex_string(cpp
, "\"'", p
, NULL
)){
847 // it's a string - let's not interpret '(' and ')' in strings!
848 } else if (**p
== '('){
850 } else if (**p
== ')'){
857 if (**p
== '\0' && cpp
->include_top
){
858 // there's a line break in the middle of it
859 p_n
= *p
- *line_buf
;
860 p_start_n
= p_start
- *line_buf
;
861 cpp_append_line(cpp
);
862 *p
= *line_buf
+ p_n
;
863 p_start
= *line_buf
+ p_start_n
;
869 strdncpy(&m_arg
->text
, p_start
, *p
- p_start
);
870 //printf("arg=%s\n", m_arg->text);
874 if ((**p
== ')' && m_arg
->next
)
875 || (**p
== ',' && !m_arg
->next
)){
876 cpp_error(cpp
, *p
, "macro \"%s\" requires %d arguments, but only %d given", m
->name
, macro_count_args(m
), n
);
878 } else if (**p
== '\0'){
879 if (cpp
->include_top
){
881 cpp_error(cpp
, *p
, "unterminated argument list invoking macro \"%s\"", m
->name
);
884 } else if (**p
== ','){
896 // the spaghetti code is served
897 char *cpp_macro_arg_subst(struct cpp
*cpp
, struct macro
*m
)
899 char *result
= NULL
, *s
= NULL
, *p
, *p0
, *p_before
, *sp
;
910 strdncat(&result
, p0
, p
- 1 - p0
);
913 while (*p
== ' ' || *p
== '\t'){
917 // remove whitespace after previous tok
918 sp
= result
+ strlen(result
);
919 while (sp
> result
&& (sp
[-1] == ' ' || sp
[-1] == '\t')){
926 strdncat(&result
, p0
, p
- p0
);
931 while (*p
== ' ' || *p
== '\t'){
934 if (isalpha(*p
) || *p
== '_'){
937 cpp_error_loc(cpp
, &m
->sloc
, "'#' not followed by macro-argument");
941 } else if (isalpha(*p
) || *p
== '_'){
942 // identifier - macro arg name?
946 while (isalnum(*p
) || isdigit(*p
) || *p
== '_'){
949 strdncpy(&s
, p_before
, p
- p_before
);
950 m_arg
= cpp_find_macro_arg(m
, s
);
953 cpp_error_loc(cpp
, &m
->sloc
, "'#' not followed by macro-argument");
958 // macro-arg substitution
960 strdncat(&result
, p0
, p_before
- p0
);
962 strdcatc(&result
, ' ');
964 strdcatc(&result
, '"');
967 strdcat(&result
, m_arg
->text
);
969 strdcatc(&result
, '"');
978 cpp_error_loc(cpp
, &m
->sloc
, "'#' not followed by macro-argument");
980 strdncat(&result
, p0
, p
- p0
);
986 struct macro_stack
*prev
;
990 void cpp_substitute_macros(struct cpp
*cpp
, char **line_buf
, struct macro_stack
*ms
)
992 char *p
, *p_start
, *ident
= NULL
;
993 char *substituted_text
, *new_line_buf
= NULL
;
995 struct macro_stack stack_item
, *ms_ptr
;
996 int p_start_n
, p_end_n
;
1000 if (cpp_lex_ident(cpp
, &p
, &ident
)){
1002 m
= cpp_find_macro(cpp
, ident
);
1003 // is it in the stack?
1006 while (ms_ptr
&& ms_ptr
->m
!= m
){
1007 ms_ptr
= ms_ptr
->prev
;
1010 m
= NULL
; // yes it is. Let's forget about it.
1013 p_start_n
= p_start
- *line_buf
;
1014 if (m
&& cpp_macro_args(cpp
, line_buf
, &p
, m
)){
1015 p_start
= *line_buf
+ p_start_n
;
1016 substituted_text
= cpp_macro_arg_subst(cpp
, m
);
1017 // now we have to substitute the substituted text
1018 stack_item
.prev
= ms
;
1020 cpp_substitute_macros(cpp
, &substituted_text
, &stack_item
);
1021 strdncat(&new_line_buf
, *line_buf
, p_start
- *line_buf
);
1022 strdcat(&new_line_buf
, substituted_text
);
1023 free(substituted_text
);
1024 p_end_n
= strlen(new_line_buf
); // de-pointerize p_start
1025 strdcat(&new_line_buf
, p
);
1026 // replace the line buffer with our new one (with the substitution)
1028 *line_buf
= new_line_buf
;
1029 new_line_buf
= NULL
;
1030 p
= *line_buf
+ p_end_n
;
1032 } else if (cpp_lex_string(cpp
, "\"'", &p
, &ident
)){
1033 // nothing needs to be done
1034 // but at least we've passed the string, so we
1035 // won't expand macros in it
1043 void cpp_remove_comments(struct cpp
*cpp
)
1045 char *p
= cpp
->line_buf
, *p_start
;
1048 if (p
[0] == '/' && p
[1] == '/'){
1049 // single-line comment
1050 memset(p
, ' ', strlen(p
));
1052 } else if (p
[0] == '/' && p
[1] == '*'){
1053 // multi-line comment
1057 if (p
[0] == '*' && p
[1] == '/'){
1060 memset(p_start
, ' ', p
- p_start
);
1062 } else if (*p
== '\0'){
1063 // we need to read another line!
1064 // as you can see, this isn't very elegant
1065 p_start_n
= p_start
- cpp
->line_buf
;
1066 p_n
= p
- cpp
->line_buf
;
1067 cpp_append_line(cpp
);
1068 p_start
= cpp
->line_buf
+ p_start_n
;
1069 p
= cpp
->line_buf
+ p_n
;
1080 void cpp_process_line(struct cpp
*cpp
)
1082 char *p
, *tok
= NULL
, *p_start
;
1087 cpp_remove_comments(cpp
);
1091 // preprocessor directive
1096 cpp_lex_ident(cpp
, &p
, &tok
);
1098 for (i
=0; i
<(sizeof directives
/sizeof *directives
); i
++){
1099 if (!strcmp(tok
, directives
[i
].str
)){
1100 if (!cpp_muted(cpp
) || directives
[i
].conditional_related
){
1101 directives
[i
].func(cpp
, p
, p_start
);
1106 if (!done
&& !cpp_muted(cpp
)){
1107 cpp_error(cpp
, p_start
, "invalid preprocessing directive #%s", tok
);
1110 cpp_clear_line(cpp
);
1114 if (!cpp_muted(cpp
)){
1115 cpp_substitute_macros(cpp
, &cpp
->line_buf
, NULL
);
1118 if (cpp_muted(cpp
)){
1119 cpp_delete_line(cpp
);