tok: allow large ds sections and string tokens
[neatcc.git] / cpp.c
blobb42e362c82a4e3cc86d458acd6015c21d8854ab4
1 #include <ctype.h>
2 #include <fcntl.h>
3 #include <stdarg.h>
4 #include <stddef.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include "mem.h"
12 #include "ncc.h"
13 #include "tab.h"
14 #include "tok.h"
16 static char *buf;
17 static int len;
18 static int cur;
20 static struct macro {
21 char name[NAMELEN];
22 char def[MDEFLEN];
23 char args[NARGS][NAMELEN];
24 int nargs;
25 int isfunc;
26 } macros[NDEFS];
27 static int nmacros;
28 /* macro hash table */
29 static struct tab mtab;
31 #define BUF_FILE 0
32 #define BUF_MACRO 1
33 #define BUF_ARG 2
34 #define BUF_EVAL 3
35 #define BUF_TEMP 4
37 static struct buf {
38 char *buf;
39 int len;
40 int cur;
41 int type;
42 /* for BUF_FILE */
43 char path[NAMELEN];
44 /* for BUF_MACRO */
45 struct macro *macro;
46 char args[NARGS][MARGLEN]; /* arguments passed to a macro */
47 /* for BUF_ARG */
48 int arg_buf; /* the bufs index of the owning macro */
49 } bufs[NBUFS];
50 static int nbufs;
51 static int bufs_limit = 1; /* cpp_read() limit; useful in cpp_eval() */
53 void die(char *fmt, ...)
55 va_list ap;
56 char msg[512];
57 va_start(ap, fmt);
58 vsprintf(msg, fmt, ap);
59 va_end(ap);
60 write(2, msg, strlen(msg));
61 exit(1);
64 static void buf_new(int type, char *dat, int dlen)
66 if (nbufs) {
67 bufs[nbufs - 1].buf = buf;
68 bufs[nbufs - 1].cur = cur;
69 bufs[nbufs - 1].len = len;
71 if (nbufs >= NBUFS)
72 die("nomem: NBUFS reached!\n");
73 nbufs++;
74 cur = 0;
75 buf = dat;
76 len = dlen;
77 bufs[nbufs - 1].type = type;
80 static void buf_file(char *path, char *dat, int dlen)
82 buf_new(BUF_FILE, dat, dlen);
83 strcpy(bufs[nbufs - 1].path, path ? path : "");
86 static void buf_macro(struct macro *m)
88 buf_new(BUF_MACRO, m->def, strlen(m->def));
89 bufs[nbufs - 1].macro = m;
92 static void buf_arg(char *arg, int mbuf)
94 buf_new(BUF_ARG, arg, strlen(arg));
95 bufs[nbufs - 1].arg_buf = mbuf;
98 static void buf_pop(void)
100 nbufs--;
101 if (nbufs) {
102 cur = bufs[nbufs - 1].cur;
103 len = bufs[nbufs - 1].len;
104 buf = bufs[nbufs - 1].buf;
108 static int buf_iseval(void)
110 int i;
111 for (i = nbufs - 1; i >= 0; i--)
112 if (bufs[i].type == BUF_EVAL)
113 return 1;
114 return 0;
117 static size_t file_size(int fd)
119 struct stat st;
120 if (!fstat(fd, &st))
121 return st.st_size;
122 return 0;
125 static int include_file(char *path)
127 int fd = open(path, O_RDONLY);
128 int n = 0, nr = 0;
129 char *dat;
130 int size;
131 if (fd == -1)
132 return -1;
133 size = file_size(fd) + 1;
134 dat = malloc(size);
135 while ((n = read(fd, dat + nr, size - nr)) > 0)
136 nr += n;
137 close(fd);
138 dat[nr] = '\0';
139 buf_file(path, dat, nr);
140 return 0;
143 int cpp_init(char *path)
145 return include_file(path);
148 static int jumpws(void)
150 int old = cur;
151 while (cur < len && isspace(buf[cur]))
152 cur++;
153 return cur == old;
156 static void read_word(char *dst)
158 jumpws();
159 while (cur < len && (isalnum(buf[cur]) || buf[cur] == '_'))
160 *dst++ = buf[cur++];
161 *dst = '\0';
164 static int jumpcomment(void)
166 if (buf[cur] == '/' && buf[cur + 1] == '*') {
167 while (++cur < len) {
168 if (buf[cur] == '*' && buf[cur + 1] == '/') {
169 cur += 2;
170 return 0;
174 if (buf[cur] == '/' && buf[cur + 1] == '/') {
175 while (++cur < len)
176 if (buf[cur] == '\n')
177 break;
178 return 0;
180 return 1;
183 static int jumpstr(void)
185 if (buf[cur] == '\'') {
186 while (cur < len && buf[++cur] != '\'')
187 if (buf[cur] == '\\')
188 cur++;
189 cur++;
190 return 0;
192 if (buf[cur] == '"') {
193 while (cur < len && buf[++cur] != '"')
194 if (buf[cur] == '\\')
195 cur++;
196 cur++;
197 return 0;
199 return 1;
202 static void read_tilleol(char *dst)
204 while (cur < len && isspace(buf[cur]) && buf[cur] != '\n')
205 cur++;
206 while (cur < len && buf[cur] != '\n') {
207 int last = cur;
208 if (buf[cur] == '\\' && buf[cur + 1] == '\n') {
209 cur += 2;
210 continue;
212 if (!jumpstr()) {
213 memcpy(dst, buf + last, cur - last);
214 dst += cur - last;
215 continue;
217 if (!jumpcomment())
218 continue;
219 *dst++ = buf[cur++];
221 *dst = '\0';
224 static char *locs[NLOCS] = {};
225 static int nlocs = 0;
227 void cpp_addpath(char *s)
229 locs[nlocs++] = s;
232 static int include_find(char *name, int std)
234 int i;
235 for (i = std ? nlocs - 1 : nlocs; i >= 0; i--) {
236 char path[1 << 10];
237 if (locs[i])
238 sprintf(path, "%s/%s", locs[i], name);
239 else
240 strcpy(path, name);
241 if (!include_file(path))
242 return 0;
244 return -1;
247 static void readarg(char *s)
249 int depth = 0;
250 int beg = cur;
251 while (cur < len && (depth || (buf[cur] != ',' && buf[cur] != ')'))) {
252 if (!jumpstr() || !jumpcomment())
253 continue;
254 switch (buf[cur++]) {
255 case '(':
256 case '[':
257 case '{':
258 depth++;
259 break;
260 case ')':
261 case ']':
262 case '}':
263 depth--;
264 break;
267 if (s) {
268 memcpy(s, buf + beg, cur - beg);
269 s[cur - beg] = '\0';
273 static int macro_find(char *name)
275 char *n = tab_get(&mtab, name);
276 if (!n)
277 return -1;
278 return container(n, struct macro, name) - macros;
281 static void macro_undef(char *name)
283 int i = macro_find(name);
284 if (i >= 0)
285 tab_del(&mtab, macros[i].name);
288 static int macro_new(char *name)
290 int i = macro_find(name);
291 if (i >= 0)
292 return i;
293 if (nmacros >= NDEFS)
294 die("nomem: NDEFS reached!\n");
295 i = nmacros++;
296 strcpy(macros[i].name, name);
297 tab_add(&mtab, macros[i].name);
298 return i;
301 static void macro_define(void)
303 char name[NAMELEN];
304 struct macro *d;
305 read_word(name);
306 d = &macros[macro_new(name)];
307 d->isfunc = 0;
308 d->nargs = 0;
309 if (buf[cur] == '(') {
310 cur++;
311 jumpws();
312 while (cur < len && buf[cur] != ')') {
313 readarg(d->args[d->nargs++]);
314 jumpws();
315 if (buf[cur] != ',')
316 break;
317 cur++;
318 jumpws();
320 cur++;
321 d->isfunc = 1;
323 read_tilleol(d->def);
326 static char ebuf[MARGLEN];
327 static int elen;
328 static int ecur;
330 static long evalexpr(void);
332 static int cpp_eval(void)
334 char evalbuf[MARGLEN];
335 int old_limit;
336 int ret, clen;
337 char *cbuf;
338 read_tilleol(evalbuf);
339 buf_new(BUF_EVAL, evalbuf, strlen(evalbuf));
340 elen = 0;
341 ecur = 0;
342 old_limit = bufs_limit;
343 bufs_limit = nbufs;
344 while (!cpp_read(&cbuf, &clen)) {
345 memcpy(ebuf + elen, cbuf, clen);
346 elen += clen;
348 bufs_limit = old_limit;
349 ret = evalexpr();
350 buf_pop();
351 return ret;
354 static void jumpifs(int jumpelse)
356 int depth = 0;
357 while (cur < len) {
358 if (buf[cur] == '#') {
359 char cmd[NAMELEN];
360 cur++;
361 read_word(cmd);
362 if (!strcmp("else", cmd))
363 if (!depth && !jumpelse)
364 break;
365 if (!strcmp("elif", cmd))
366 if (!depth && !jumpelse && cpp_eval())
367 break;
368 if (!strcmp("endif", cmd)) {
369 if (!depth)
370 break;
371 else
372 depth--;
374 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
375 !strcmp("if", cmd))
376 depth++;
377 continue;
379 if (!jumpcomment())
380 continue;
381 if (!jumpstr())
382 continue;
383 cur++;
387 static int cpp_cmd(void)
389 char cmd[NAMELEN];
390 cur++;
391 read_word(cmd);
392 if (!strcmp("define", cmd)) {
393 macro_define();
394 return 0;
396 if (!strcmp("undef", cmd)) {
397 char name[NAMELEN];
398 read_word(name);
399 macro_undef(name);
400 return 0;
402 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
403 !strcmp("if", cmd)) {
404 char name[NAMELEN];
405 int matched = 0;
406 if (cmd[2]) {
407 int not = cmd[2] == 'n';
408 read_word(name);
409 matched = not ? macro_find(name) < 0 :
410 macro_find(name) >= 0;
411 } else {
412 matched = cpp_eval();
414 if (!matched)
415 jumpifs(0);
416 return 0;
418 if (!strcmp("else", cmd) || !strcmp("elif", cmd)) {
419 jumpifs(1);
420 return 0;
422 if (!strcmp("endif", cmd))
423 return 0;
424 if (!strcmp("include", cmd)) {
425 char file[NAMELEN];
426 char *s, *e;
427 jumpws();
428 s = buf + cur + 1;
429 e = strchr(buf + cur + 1, buf[cur] == '"' ? '"' : '>');
430 memcpy(file, s, e - s);
431 file[e - s] = '\0';
432 cur += e - s + 2;
433 if (include_find(file, *e == '>') == -1)
434 err("cannot include <%s>\n", file);
435 return 0;
437 return 1;
440 static int macro_arg(struct macro *m, char *arg)
442 int i;
443 for (i = 0; i < m->nargs; i++)
444 if (!strcmp(arg, m->args[i]))
445 return i;
446 return -1;
449 static int buf_arg_find(char *name)
451 int i;
452 for (i = nbufs - 1; i >= 0; i--) {
453 struct buf *mbuf = &bufs[i];
454 struct macro *m = mbuf->macro;
455 if (mbuf->type == BUF_MACRO && macro_arg(m, name) >= 0)
456 return i;
457 if (mbuf->type == BUF_ARG)
458 i = mbuf->arg_buf;
460 return -1;
463 static void macro_expand(char *name)
465 struct macro *m;
466 int mbuf;
467 if ((mbuf = buf_arg_find(name)) >= 0) {
468 int arg = macro_arg(bufs[mbuf].macro, name);
469 char *dat = bufs[mbuf].args[arg];
470 buf_arg(dat, mbuf);
471 return;
473 m = &macros[macro_find(name)];
474 if (!m->isfunc) {
475 buf_macro(m);
476 return;
478 jumpws();
479 if (buf[cur] == '(') {
480 int i = 0;
481 struct buf *mbuf = &bufs[nbufs];
482 cur++;
483 jumpws();
484 while (cur < len && buf[cur] != ')') {
485 readarg(mbuf->args[i++]);
486 jumpws();
487 if (buf[cur] != ',')
488 break;
489 cur++;
490 jumpws();
492 while (i < m->nargs)
493 mbuf->args[i++][0] = '\0';
494 cur++;
495 buf_macro(m);
499 static int buf_expanding(char *macro)
501 int i;
502 for (i = nbufs - 1; i >= 0; i--) {
503 if (bufs[i].type == BUF_ARG)
504 return 0;
505 if (bufs[i].type == BUF_MACRO &&
506 !strcmp(macro, bufs[i].macro->name))
507 return 1;
509 return 0;
512 /* return 1 for plain macros and arguments and 2 for function macros */
513 static int expandable(char *word)
515 int i;
516 if (buf_arg_find(word) >= 0)
517 return 1;
518 if (buf_expanding(word))
519 return 0;
520 i = macro_find(word);
521 return i >= 0 ? macros[i].isfunc + 1 : 0;
524 void cpp_define(char *name, char *def)
526 char tmp_buf[MDEFLEN];
527 sprintf(tmp_buf, "%s\t%s", name, def);
528 buf_new(BUF_TEMP, tmp_buf, strlen(tmp_buf));
529 macro_define();
530 buf_pop();
533 static int seen_macro; /* seen a macro; 2 if a function macro */
534 static char seen_name[NAMELEN]; /* the name of the last macro */
536 static int hunk_off;
537 static int hunk_len;
539 int cpp_read(char **obuf, int *olen)
541 int old, end;
542 int jump_name = 0;
543 *olen = 0;
544 *obuf = "";
545 if (seen_macro == 1) {
546 macro_expand(seen_name);
547 seen_macro = 0;
549 if (cur == len) {
550 struct buf *cbuf = &bufs[nbufs - 1];
551 if (nbufs < bufs_limit + 1)
552 return -1;
553 if (cbuf->type == BUF_FILE)
554 free(buf);
555 buf_pop();
557 old = cur;
558 if (buf[cur] == '#')
559 if (!cpp_cmd())
560 return 0;
561 while (cur < len) {
562 if (!jumpws())
563 continue;
564 if (buf[cur] == '#')
565 break;
566 if (!jumpcomment())
567 continue;
568 if (seen_macro == 2) {
569 if (buf[cur] == '(')
570 macro_expand(seen_name);
571 seen_macro = 0;
572 old = cur;
573 continue;
575 if (!jumpstr())
576 continue;
577 if (isalnum(buf[cur]) || buf[cur] == '_') {
578 char word[NAMELEN];
579 read_word(word);
580 seen_macro = expandable(word);
581 if (seen_macro) {
582 strcpy(seen_name, word);
583 jump_name = 1;
584 break;
586 if (buf_iseval() && !strcmp("defined", word)) {
587 int parens = 0;
588 jumpws();
589 if (buf[cur] == '(') {
590 parens = 1;
591 cur++;
593 read_word(word);
594 if (parens) {
595 jumpws();
596 cur++;
599 continue;
601 cur++;
603 /* macros are expanded later; ignoring their names */
604 end = jump_name ? cur - strlen(seen_name) : cur;
605 if (!buf_iseval()) {
606 hunk_off += hunk_len;
607 hunk_len = end - old;
609 *obuf = buf + old;
610 *olen = end - old;
611 return 0;
614 /* preprocessor constant expression evaluation */
616 static char etok[NAMELEN];
617 static int enext;
619 static char *tok2[] = {
620 "<<", ">>", "&&", "||", "==", "!=", "<=", ">="
623 static int eval_tok(void)
625 char *s = etok;
626 int i;
627 while (ecur < elen) {
628 while (ecur < elen && isspace(ebuf[ecur]))
629 ecur++;
630 if (ebuf[ecur] == '/' && ebuf[ecur + 1] == '*') {
631 while (ecur < elen && (ebuf[ecur - 2] != '*' ||
632 ebuf[ecur - 1] != '/'))
633 ecur++;
634 continue;
636 break;
638 if (ecur >= elen)
639 return TOK_EOF;
640 if (isalpha(ebuf[ecur]) || ebuf[ecur] == '_') {
641 while (isalnum(ebuf[ecur]) || ebuf[ecur] == '_')
642 *s++ = ebuf[ecur++];
643 *s = '\0';
644 return TOK_NAME;
646 if (isdigit(ebuf[ecur])) {
647 while (isdigit(ebuf[ecur]))
648 *s++ = ebuf[ecur++];
649 while (tolower(ebuf[ecur]) == 'u' || tolower(ebuf[ecur]) == 'l')
650 ecur++;
651 *s = '\0';
652 return TOK_NUM;
654 for (i = 0; i < LEN(tok2); i++)
655 if (TOK2(tok2[i]) == TOK2(ebuf + ecur)) {
656 int ret = TOK2(tok2[i]);
657 ecur += 2;
658 return ret;
660 return ebuf[ecur++];
663 static int eval_see(void)
665 if (enext == -1)
666 enext = eval_tok();
667 return enext;
670 static int eval_get(void)
672 if (enext != -1) {
673 int ret = enext;
674 enext = -1;
675 return ret;
677 return eval_tok();
680 static long eval_num(void)
682 return atol(etok);
685 static int eval_jmp(int tok)
687 if (eval_see() == tok) {
688 eval_get();
689 return 0;
691 return 1;
694 static void eval_expect(int tok)
696 eval_jmp(tok);
699 static char *eval_id(void)
701 return etok;
704 static long evalcexpr(void);
706 static long evalatom(void)
708 if (!eval_jmp(TOK_NUM))
709 return eval_num();
710 if (!eval_jmp(TOK_NAME)) {
711 int parens = !eval_jmp('(');
712 long ret;
713 eval_expect(TOK_NAME);
714 ret = macro_find(eval_id()) >= 0;
715 if (parens)
716 eval_expect(')');
717 return ret;
719 if (!eval_jmp('(')) {
720 long ret = evalcexpr();
721 eval_expect(')');
722 return ret;
724 return -1;
727 static long evalpre(void)
729 if (!eval_jmp('!'))
730 return !evalpre();
731 if (!eval_jmp('-'))
732 return -evalpre();
733 if (!eval_jmp('~'))
734 return ~evalpre();
735 return evalatom();
738 static long evalmul(void)
740 long ret = evalpre();
741 while (1) {
742 if (!eval_jmp('*')) {
743 ret *= evalpre();
744 continue;
746 if (!eval_jmp('/')) {
747 ret /= evalpre();
748 continue;
750 if (!eval_jmp('%')) {
751 ret %= evalpre();
752 continue;
754 break;
756 return ret;
759 static long evaladd(void)
761 long ret = evalmul();
762 while (1) {
763 if (!eval_jmp('+')) {
764 ret += evalmul();
765 continue;
767 if (!eval_jmp('-')) {
768 ret -= evalmul();
769 continue;
771 break;
773 return ret;
776 static long evalshift(void)
778 long ret = evaladd();
779 while (1) {
780 if (!eval_jmp(TOK2("<<"))) {
781 ret <<= evaladd();
782 continue;
784 if (!eval_jmp(TOK2(">>"))) {
785 ret >>= evaladd();
786 continue;
788 break;
790 return ret;
793 static long evalcmp(void)
795 long ret = evalshift();
796 while (1) {
797 if (!eval_jmp('<')) {
798 ret = ret < evalshift();
799 continue;
801 if (!eval_jmp('>')) {
802 ret = ret > evalshift();
803 continue;
805 if (!eval_jmp(TOK2("<="))) {
806 ret = ret <= evalshift();
807 continue;
809 if (!eval_jmp(TOK2(">="))) {
810 ret = ret >= evalshift();
811 continue;
813 break;
815 return ret;
818 static long evaleq(void)
820 long ret = evalcmp();
821 while (1) {
822 if (!eval_jmp(TOK2("=="))) {
823 ret = ret == evalcmp();
824 continue;
826 if (!eval_jmp(TOK2("!="))) {
827 ret = ret != evalcmp();
828 continue;
830 break;
832 return ret;
835 static long evalbitand(void)
837 long ret = evaleq();
838 while (!eval_jmp('&'))
839 ret &= evaleq();
840 return ret;
843 static long evalxor(void)
845 long ret = evalbitand();
846 while (!eval_jmp('^'))
847 ret ^= evalbitand();
848 return ret;
851 static long evalbitor(void)
853 long ret = evalxor();
854 while (!eval_jmp('|'))
855 ret |= evalxor();
856 return ret;
859 static long evaland(void)
861 long ret = evalbitor();
862 while (!eval_jmp(TOK2("&&")))
863 ret = ret && evalbitor();
864 return ret;
867 static long evalor(void)
869 long ret = evaland();
870 while (!eval_jmp(TOK2("||")))
871 ret = ret || evaland();
872 return ret;
875 static long evalcexpr(void)
877 long ret = evalor();
878 if (eval_jmp('?'))
879 return ret;
880 if (ret)
881 return evalor();
882 while (eval_get() != ':')
884 return evalor();
887 static long evalexpr(void)
889 enext = -1;
890 return evalcexpr();
893 static int buf_loc(char *s, int off)
895 char *e = s + off;
896 int n = 1;
897 while ((s = strchr(s, '\n')) && s < e) {
898 n++;
899 s++;
901 return n;
904 char *cpp_loc(long addr)
906 static char loc[256];
907 int line = -1;
908 int i;
909 for (i = nbufs - 1; i > 0; i--)
910 if (bufs[i].type == BUF_FILE)
911 break;
912 if (addr >= hunk_off && i == nbufs - 1)
913 line = buf_loc(buf, (cur - hunk_len) + (addr - hunk_off));
914 else
915 line = buf_loc(bufs[i].buf, bufs[i].cur);
916 sprintf(loc, "%s:%d", bufs[i].path, line);
917 return loc;