tok: ignore "\\\n"
[neatcc.git] / cpp.c
blob9a56f99a230f78fb5a87b7ea56dd8c091128410c
1 #include <ctype.h>
2 #include <fcntl.h>
3 #include <stdarg.h>
4 #include <stddef.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include "tok.h"
12 #include "tab.h"
14 static char *buf;
15 static int len;
16 static int cur;
18 #define MAXDEFS (1 << 12)
19 #define MACROLEN (1 << 10)
20 #define MAXARGS (1 << 5)
21 #define NBUCKET (MAXDEFS << 1)
23 static struct macro {
24 char name[NAMELEN];
25 char def[MACROLEN];
26 char args[MAXARGS][NAMELEN];
27 int nargs;
28 int isfunc;
29 } macros[MAXDEFS];
30 static int nmacros;
31 /* macro hash table */
32 static struct tab mtab;
34 #define MAXBUFS (1 << 5)
35 #define BUF_FILE 0
36 #define BUF_MACRO 1
37 #define BUF_ARG 2
38 #define BUF_EVAL 3
39 #define BUF_TEMP 4
41 static struct buf {
42 char *buf;
43 int len;
44 int cur;
45 int type;
46 /* for BUF_FILE */
47 char path[NAMELEN];
48 /* for BUF_MACRO */
49 struct macro *macro;
50 char args[MAXARGS][MACROLEN]; /* arguments passed to a macro */
51 /* for BUF_ARG */
52 int arg_buf; /* the bufs index of the owning macro */
53 } bufs[MAXBUFS];
54 static int nbufs;
55 static int bufs_limit = 1; /* cpp_read() limit; useful in cpp_eval() */
57 void die(char *fmt, ...)
59 va_list ap;
60 char msg[512];
61 va_start(ap, fmt);
62 vsprintf(msg, fmt, ap);
63 va_end(ap);
64 write(2, msg, strlen(msg));
65 exit(1);
68 static void buf_new(int type, char *dat, int dlen)
70 if (nbufs) {
71 bufs[nbufs - 1].buf = buf;
72 bufs[nbufs - 1].cur = cur;
73 bufs[nbufs - 1].len = len;
75 if (nbufs >= MAXBUFS)
76 die("nomem: MAXBUFS reached!\n");
77 nbufs++;
78 cur = 0;
79 buf = dat;
80 len = dlen;
81 bufs[nbufs - 1].type = type;
84 static void buf_file(char *path, char *dat, int dlen)
86 buf_new(BUF_FILE, dat, dlen);
87 strcpy(bufs[nbufs - 1].path, path ? path : "");
90 static void buf_macro(struct macro *m)
92 buf_new(BUF_MACRO, m->def, strlen(m->def));
93 bufs[nbufs - 1].macro = m;
96 static void buf_arg(char *arg, int mbuf)
98 buf_new(BUF_ARG, arg, strlen(arg));
99 bufs[nbufs - 1].arg_buf = mbuf;
102 static void buf_pop(void)
104 nbufs--;
105 if (nbufs) {
106 cur = bufs[nbufs - 1].cur;
107 len = bufs[nbufs - 1].len;
108 buf = bufs[nbufs - 1].buf;
112 static int buf_iseval(void)
114 int i;
115 for (i = nbufs - 1; i >= 0; i--)
116 if (bufs[i].type == BUF_EVAL)
117 return 1;
118 return 0;
121 static size_t file_size(int fd)
123 struct stat st;
124 if (!fstat(fd, &st))
125 return st.st_size;
126 return 0;
129 static int include_file(char *path)
131 int fd = open(path, O_RDONLY);
132 int n = 0, nr = 0;
133 char *dat;
134 int size;
135 if (fd == -1)
136 return -1;
137 size = file_size(fd) + 1;
138 dat = malloc(size);
139 while ((n = read(fd, dat + nr, size - nr)) > 0)
140 nr += n;
141 close(fd);
142 dat[nr] = '\0';
143 buf_file(path, dat, nr);
144 return 0;
147 int cpp_init(char *path)
149 return include_file(path);
152 static int jumpws(void)
154 int old = cur;
155 while (cur < len && isspace(buf[cur]))
156 cur++;
157 return cur == old;
160 static void read_word(char *dst)
162 jumpws();
163 while (cur < len && (isalnum(buf[cur]) || buf[cur] == '_'))
164 *dst++ = buf[cur++];
165 *dst = '\0';
168 static int jumpcomment(void)
170 if (buf[cur] == '/' && buf[cur + 1] == '*') {
171 while (++cur < len) {
172 if (buf[cur] == '*' && buf[cur + 1] == '/') {
173 cur += 2;
174 return 0;
178 if (buf[cur] == '/' && buf[cur + 1] == '/') {
179 while (++cur < len)
180 if (buf[cur] == '\n')
181 break;
182 return 0;
184 return 1;
187 static int jumpstr(void)
189 if (buf[cur] == '\'') {
190 while (cur < len && buf[++cur] != '\'')
191 if (buf[cur] == '\\')
192 cur++;
193 cur++;
194 return 0;
196 if (buf[cur] == '"') {
197 while (cur < len && buf[++cur] != '"')
198 if (buf[cur] == '\\')
199 cur++;
200 cur++;
201 return 0;
203 return 1;
206 static void read_tilleol(char *dst)
208 while (cur < len && isspace(buf[cur]) && buf[cur] != '\n')
209 cur++;
210 while (cur < len && buf[cur] != '\n') {
211 int last = cur;
212 if (buf[cur] == '\\' && buf[cur + 1] == '\n') {
213 cur += 2;
214 continue;
216 if (!jumpstr()) {
217 memcpy(dst, buf + last, cur - last);
218 dst += cur - last;
219 continue;
221 if (!jumpcomment())
222 continue;
223 *dst++ = buf[cur++];
225 *dst = '\0';
228 static char *putstr(char *d, char *s)
230 while (*s)
231 *d++ = *s++;
232 *d = '\0';
233 return d;
236 #define MAXLOCS (1 << 10)
238 static char *locs[MAXLOCS] = {};
239 static int nlocs = 0;
241 void cpp_addpath(char *s)
243 locs[nlocs++] = s;
246 static int include_find(char *name, int std)
248 int i;
249 for (i = std ? nlocs - 1 : nlocs; i >= 0; i--) {
250 char path[1 << 10];
251 char *s;
252 s = path;
253 if (locs[i]) {
254 s = putstr(s, locs[i]);
255 *s++ = '/';
257 s = putstr(s, name);
258 if (!include_file(path))
259 return 0;
261 return -1;
264 static void readarg(char *s)
266 int depth = 0;
267 int beg = cur;
268 while (cur < len && (depth || (buf[cur] != ',' && buf[cur] != ')'))) {
269 if (!jumpstr() || !jumpcomment())
270 continue;
271 switch (buf[cur++]) {
272 case '(':
273 case '[':
274 case '{':
275 depth++;
276 break;
277 case ')':
278 case ']':
279 case '}':
280 depth--;
281 break;
284 if (s) {
285 memcpy(s, buf + beg, cur - beg);
286 s[cur - beg] = '\0';
290 static int macro_find(char *name)
292 char *n = tab_get(&mtab, name);
293 if (!n)
294 return -1;
295 return container(n, struct macro, name) - macros;
298 static void macro_undef(char *name)
300 int i = macro_find(name);
301 if (i >= 0)
302 tab_del(&mtab, macros[i].name);
305 static int macro_new(char *name)
307 int i = macro_find(name);
308 if (i >= 0)
309 return i;
310 if (nmacros >= MAXDEFS)
311 die("nomem: MAXDEFS reached!\n");
312 i = nmacros++;
313 strcpy(macros[i].name, name);
314 tab_add(&mtab, macros[i].name);
315 return i;
318 static void macro_define(void)
320 char name[NAMELEN];
321 struct macro *d;
322 read_word(name);
323 d = &macros[macro_new(name)];
324 d->isfunc = 0;
325 d->nargs = 0;
326 if (buf[cur] == '(') {
327 cur++;
328 jumpws();
329 while (cur < len && buf[cur] != ')') {
330 readarg(d->args[d->nargs++]);
331 jumpws();
332 if (buf[cur] != ',')
333 break;
334 cur++;
335 jumpws();
337 cur++;
338 d->isfunc = 1;
340 read_tilleol(d->def);
343 int cpp_read(char *buf);
345 static char ebuf[BUFSIZE];
346 static int elen;
347 static int ecur;
349 static long evalexpr(void);
351 static int cpp_eval(void)
353 char evalbuf[BUFSIZE];
354 int old_limit;
355 int ret, nr;
356 read_tilleol(evalbuf);
357 buf_new(BUF_EVAL, evalbuf, strlen(evalbuf));
358 elen = 0;
359 ecur = 0;
360 old_limit = bufs_limit;
361 bufs_limit = nbufs;
362 while ((nr = cpp_read(ebuf + elen)) >= 0)
363 elen += nr;
364 bufs_limit = old_limit;
365 ret = evalexpr();
366 buf_pop();
367 return ret;
370 static void jumpifs(int jumpelse)
372 int depth = 0;
373 while (cur < len) {
374 if (buf[cur] == '#') {
375 char cmd[NAMELEN];
376 cur++;
377 read_word(cmd);
378 if (!strcmp("else", cmd))
379 if (!depth && !jumpelse)
380 break;
381 if (!strcmp("elif", cmd))
382 if (!depth && !jumpelse && cpp_eval())
383 break;
384 if (!strcmp("endif", cmd)) {
385 if (!depth)
386 break;
387 else
388 depth--;
390 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
391 !strcmp("if", cmd))
392 depth++;
393 continue;
395 if (!jumpcomment())
396 continue;
397 if (!jumpstr())
398 continue;
399 cur++;
403 static int cpp_cmd(void)
405 char cmd[NAMELEN];
406 cur++;
407 read_word(cmd);
408 if (!strcmp("define", cmd)) {
409 macro_define();
410 return 0;
412 if (!strcmp("undef", cmd)) {
413 char name[NAMELEN];
414 read_word(name);
415 macro_undef(name);
416 return 0;
418 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
419 !strcmp("if", cmd)) {
420 char name[NAMELEN];
421 int matched = 0;
422 if (cmd[2]) {
423 int not = cmd[2] == 'n';
424 read_word(name);
425 matched = not ? macro_find(name) < 0 :
426 macro_find(name) >= 0;
427 } else {
428 matched = cpp_eval();
430 if (!matched)
431 jumpifs(0);
432 return 0;
434 if (!strcmp("else", cmd) || !strcmp("elif", cmd)) {
435 jumpifs(1);
436 return 0;
438 if (!strcmp("endif", cmd))
439 return 0;
440 if (!strcmp("include", cmd)) {
441 char file[NAMELEN];
442 char *s, *e;
443 jumpws();
444 s = buf + cur + 1;
445 e = strchr(buf + cur + 1, buf[cur] == '"' ? '"' : '>');
446 memcpy(file, s, e - s);
447 file[e - s] = '\0';
448 cur += e - s + 2;
449 if (include_find(file, *e == '>') == -1)
450 err("cannot include <%s>\n", file);
451 return 0;
453 return 1;
456 static int macro_arg(struct macro *m, char *arg)
458 int i;
459 for (i = 0; i < m->nargs; i++)
460 if (!strcmp(arg, m->args[i]))
461 return i;
462 return -1;
465 static int buf_arg_find(char *name)
467 int i;
468 for (i = nbufs - 1; i >= 0; i--) {
469 struct buf *mbuf = &bufs[i];
470 struct macro *m = mbuf->macro;
471 if (mbuf->type == BUF_MACRO && macro_arg(m, name) >= 0)
472 return i;
473 if (mbuf->type == BUF_ARG)
474 i = mbuf->arg_buf;
476 return -1;
479 static void macro_expand(char *name)
481 struct macro *m;
482 int mbuf;
483 if ((mbuf = buf_arg_find(name)) >= 0) {
484 int arg = macro_arg(bufs[mbuf].macro, name);
485 char *dat = bufs[mbuf].args[arg];
486 buf_arg(dat, mbuf);
487 return;
489 m = &macros[macro_find(name)];
490 if (!m->isfunc) {
491 buf_macro(m);
492 return;
494 jumpws();
495 if (buf[cur] == '(') {
496 int i = 0;
497 struct buf *mbuf = &bufs[nbufs];
498 cur++;
499 jumpws();
500 while (cur < len && buf[cur] != ')') {
501 readarg(mbuf->args[i++]);
502 jumpws();
503 if (buf[cur] != ',')
504 break;
505 cur++;
506 jumpws();
508 while (i < m->nargs)
509 mbuf->args[i++][0] = '\0';
510 cur++;
511 buf_macro(m);
515 static int buf_expanding(char *macro)
517 int i;
518 for (i = nbufs - 1; i >= 0; i--) {
519 if (bufs[i].type == BUF_ARG)
520 return 0;
521 if (bufs[i].type == BUF_MACRO &&
522 !strcmp(macro, bufs[i].macro->name))
523 return 1;
525 return 0;
528 /* return 1 for plain macros and arguments and 2 for function macros */
529 static int expandable(char *word)
531 int i;
532 if (buf_arg_find(word) >= 0)
533 return 1;
534 if (buf_expanding(word))
535 return 0;
536 i = macro_find(word);
537 return i >= 0 ? macros[i].isfunc + 1 : 0;
540 void cpp_define(char *name, char *def)
542 char tmp_buf[MACROLEN];
543 char *s = tmp_buf;
544 s = putstr(s, name);
545 *s++ = '\t';
546 s = putstr(s, def);
547 buf_new(BUF_TEMP, tmp_buf, s - tmp_buf);
548 macro_define();
549 buf_pop();
552 static int seen_macro; /* seen a macro; 2 if a function macro */
553 static char seen_name[NAMELEN]; /* the name of the last macro */
555 static int hunk_off;
556 static int hunk_len;
558 int cpp_read(char *s)
560 int old, end;
561 int jump_name = 0;
562 if (seen_macro == 1) {
563 macro_expand(seen_name);
564 seen_macro = 0;
566 if (cur == len) {
567 struct buf *cbuf = &bufs[nbufs - 1];
568 if (nbufs < bufs_limit + 1)
569 return -1;
570 if (cbuf->type == BUF_FILE)
571 free(buf);
572 buf_pop();
574 old = cur;
575 if (buf[cur] == '#')
576 if (!cpp_cmd())
577 return 0;
578 while (cur < len) {
579 if (!jumpws())
580 continue;
581 if (buf[cur] == '#')
582 break;
583 if (!jumpcomment())
584 continue;
585 if (seen_macro == 2) {
586 if (buf[cur] == '(')
587 macro_expand(seen_name);
588 seen_macro = 0;
589 old = cur;
590 continue;
592 if (!jumpstr())
593 continue;
594 if (isalpha(buf[cur]) || buf[cur] == '_') {
595 char word[NAMELEN];
596 read_word(word);
597 seen_macro = expandable(word);
598 if (seen_macro) {
599 strcpy(seen_name, word);
600 jump_name = 1;
601 break;
603 if (buf_iseval() && !strcmp("defined", word)) {
604 int parens = 0;
605 jumpws();
606 if (buf[cur] == '(') {
607 parens = 1;
608 cur++;
610 read_word(word);
611 if (parens) {
612 jumpws();
613 cur++;
616 continue;
618 cur++;
620 /* macros are expanded later; ignore its name */
621 end = jump_name ? cur - strlen(seen_name) : cur;
622 memcpy(s, buf + old, end - old);
623 s[end - old] = '\0';
624 if (!buf_iseval()) {
625 hunk_off += hunk_len;
626 hunk_len = end - old;
628 return end - old;
631 /* preprocessor constant expression evaluation */
633 static char etok[NAMELEN];
634 static int enext;
636 static char *tok2[] = {
637 "<<", ">>", "&&", "||", "==", "!=", "<=", ">="
640 static int eval_tok(void)
642 char *s = etok;
643 int i;
644 while (ecur < elen) {
645 while (ecur < elen && isspace(ebuf[ecur]))
646 ecur++;
647 if (ebuf[ecur] == '/' && ebuf[ecur + 1] == '*') {
648 while (ecur < elen && (ebuf[ecur - 2] != '*' ||
649 ebuf[ecur - 1] != '/'))
650 ecur++;
651 continue;
653 break;
655 if (ecur >= elen)
656 return TOK_EOF;
657 if (isalpha(ebuf[ecur]) || ebuf[ecur] == '_') {
658 while (isalnum(ebuf[ecur]) || ebuf[ecur] == '_')
659 *s++ = ebuf[ecur++];
660 *s = '\0';
661 return TOK_NAME;
663 if (isdigit(ebuf[ecur])) {
664 while (isdigit(ebuf[ecur]))
665 *s++ = ebuf[ecur++];
666 while (tolower(ebuf[ecur]) == 'u' || tolower(ebuf[ecur]) == 'l')
667 ecur++;
668 *s = '\0';
669 return TOK_NUM;
671 for (i = 0; i < ARRAY_SIZE(tok2); i++)
672 if (TOK2(tok2[i]) == TOK2(ebuf + ecur)) {
673 int ret = TOK2(tok2[i]);
674 ecur += 2;
675 return ret;
677 return ebuf[ecur++];
680 static int eval_see(void)
682 if (enext == -1)
683 enext = eval_tok();
684 return enext;
687 static int eval_get(void)
689 if (enext != -1) {
690 int ret = enext;
691 enext = -1;
692 return ret;
694 return eval_tok();
697 static long eval_num(void)
699 return atol(etok);
702 static int eval_jmp(int tok)
704 if (eval_see() == tok) {
705 eval_get();
706 return 0;
708 return 1;
711 static void eval_expect(int tok)
713 eval_jmp(tok);
716 static char *eval_id(void)
718 return etok;
721 static long evalcexpr(void);
723 static long evalatom(void)
725 if (!eval_jmp(TOK_NUM))
726 return eval_num();
727 if (!eval_jmp(TOK_NAME)) {
728 int parens = !eval_jmp('(');
729 long ret;
730 eval_expect(TOK_NAME);
731 ret = macro_find(eval_id()) >= 0;
732 if (parens)
733 eval_expect(')');
734 return ret;
736 if (!eval_jmp('(')) {
737 long ret = evalcexpr();
738 eval_expect(')');
739 return ret;
741 return -1;
744 static long evalpre(void)
746 if (!eval_jmp('!'))
747 return !evalpre();
748 if (!eval_jmp('-'))
749 return -evalpre();
750 if (!eval_jmp('~'))
751 return ~evalpre();
752 return evalatom();
755 static long evalmul(void)
757 long ret = evalpre();
758 while (1) {
759 if (!eval_jmp('*')) {
760 ret *= evalpre();
761 continue;
763 if (!eval_jmp('/')) {
764 ret /= evalpre();
765 continue;
767 if (!eval_jmp('%')) {
768 ret %= evalpre();
769 continue;
771 break;
773 return ret;
776 static long evaladd(void)
778 long ret = evalmul();
779 while (1) {
780 if (!eval_jmp('+')) {
781 ret += evalmul();
782 continue;
784 if (!eval_jmp('-')) {
785 ret -= evalmul();
786 continue;
788 break;
790 return ret;
793 static long evalshift(void)
795 long ret = evaladd();
796 while (1) {
797 if (!eval_jmp(TOK2("<<"))) {
798 ret <<= evaladd();
799 continue;
801 if (!eval_jmp(TOK2(">>"))) {
802 ret >>= evaladd();
803 continue;
805 break;
807 return ret;
810 static long evalcmp(void)
812 long ret = evalshift();
813 while (1) {
814 if (!eval_jmp('<')) {
815 ret = ret < evalshift();
816 continue;
818 if (!eval_jmp('>')) {
819 ret = ret > evalshift();
820 continue;
822 if (!eval_jmp(TOK2("<="))) {
823 ret = ret <= evalshift();
824 continue;
826 if (!eval_jmp(TOK2(">="))) {
827 ret = ret >= evalshift();
828 continue;
830 break;
832 return ret;
835 static long evaleq(void)
837 long ret = evalcmp();
838 while (1) {
839 if (!eval_jmp(TOK2("=="))) {
840 ret = ret == evalcmp();
841 continue;
843 if (!eval_jmp(TOK2("!="))) {
844 ret = ret != evalcmp();
845 continue;
847 break;
849 return ret;
852 static long evalbitand(void)
854 long ret = evaleq();
855 while (!eval_jmp('&'))
856 ret &= evaleq();
857 return ret;
860 static long evalxor(void)
862 long ret = evalbitand();
863 while (!eval_jmp('^'))
864 ret ^= evalbitand();
865 return ret;
868 static long evalbitor(void)
870 long ret = evalxor();
871 while (!eval_jmp('|'))
872 ret |= evalxor();
873 return ret;
876 static long evaland(void)
878 long ret = evalbitor();
879 while (!eval_jmp(TOK2("&&")))
880 ret = ret && evalbitor();
881 return ret;
884 static long evalor(void)
886 long ret = evaland();
887 while (!eval_jmp(TOK2("||")))
888 ret = ret || evaland();
889 return ret;
892 static long evalcexpr(void)
894 long ret = evalor();
895 if (eval_jmp('?'))
896 return ret;
897 if (ret)
898 return evalor();
899 while (eval_get() != ':')
901 return evalor();
904 static long evalexpr(void)
906 enext = -1;
907 return evalcexpr();
910 static int buf_loc(char *s, int off)
912 char *e = s + off;
913 int n = 1;
914 while ((s = strchr(s, '\n')) && s < e) {
915 n++;
916 s++;
918 return n;
921 char *cpp_loc(long addr)
923 static char loc[256];
924 int line = -1;
925 int i;
926 for (i = nbufs - 1; i > 0; i--)
927 if (bufs[i].type == BUF_FILE)
928 break;
929 if (addr >= hunk_off && i == nbufs - 1)
930 line = buf_loc(buf, (cur - hunk_len) + (addr - hunk_off));
931 else
932 line = buf_loc(bufs[i].buf, bufs[i].cur);
933 sprintf(loc, "%s:%d", bufs[i].path, line);
934 return loc;