file move
[neatcc/cc.git] / cpp.c
blob26566b6725b37284e41a8b2eb7c62c50daf0d880
1 /* neatcc preprocessor */
2 #include <ctype.h>
3 #include <fcntl.h>
4 #include <stdarg.h>
5 #include <stddef.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <unistd.h>
10 #include <sys/types.h>
11 #include <sys/stat.h>
12 #include "mem.h"
13 #include "ncc.h"
14 #include "tok.h"
16 static char *buf;
17 static int len;
18 static int cur;
20 static struct macro {
21 char name[NAMELEN]; /* macro name */
22 char def[MDEFLEN]; /* macro definition */
23 char args[NARGS][NAMELEN];
24 int nargs; /* number of arguments */
25 int isfunc; /* macro is a function */
26 int undef; /* macro is removed */
27 } macros[NDEFS];
28 static int mcount = 1; /* number of macros */
29 static int mhead[256]; /* macro hash table heads */
30 static int mnext[NDEFS]; /* macro hash table next entries */
32 #define BUF_FILE 0
33 #define BUF_MACRO 1
34 #define BUF_ARG 2
35 #define BUF_EVAL 3
36 #define BUF_TEMP 4
38 /* preprocessing input buffers for files, macros and macro arguments */
39 static struct buf {
40 char *buf;
41 int len;
42 int cur;
43 int type;
44 /* for BUF_FILE */
45 char path[NAMELEN];
46 /* for BUF_MACRO */
47 struct macro *macro;
48 char args[NARGS][MARGLEN]; /* arguments passed to a macro */
49 /* for BUF_ARG */
50 int arg_buf; /* the bufs index of the owning macro */
51 } bufs[NBUFS];
52 static int nbufs;
53 static int bufs_limit = 1; /* cpp_read() limit; useful in cpp_eval() */
55 void die(char *fmt, ...)
57 va_list ap;
58 char msg[512];
59 va_start(ap, fmt);
60 vsprintf(msg, fmt, ap);
61 va_end(ap);
62 write(2, msg, strlen(msg));
63 exit(1);
66 static void buf_new(int type, char *dat, int dlen)
68 if (nbufs) {
69 bufs[nbufs - 1].buf = buf;
70 bufs[nbufs - 1].cur = cur;
71 bufs[nbufs - 1].len = len;
73 if (nbufs >= NBUFS)
74 die("nomem: NBUFS reached!\n");
75 nbufs++;
76 cur = 0;
77 buf = dat;
78 len = dlen;
79 bufs[nbufs - 1].type = type;
82 static void buf_file(char *path, char *dat, int dlen)
84 buf_new(BUF_FILE, dat, dlen);
85 strcpy(bufs[nbufs - 1].path, path ? path : "");
88 static void buf_macro(struct macro *m)
90 buf_new(BUF_MACRO, m->def, strlen(m->def));
91 bufs[nbufs - 1].macro = m;
94 static void buf_arg(char *arg, int mbuf)
96 buf_new(BUF_ARG, arg, strlen(arg));
97 bufs[nbufs - 1].arg_buf = mbuf;
100 static void buf_pop(void)
102 nbufs--;
103 if (nbufs) {
104 cur = bufs[nbufs - 1].cur;
105 len = bufs[nbufs - 1].len;
106 buf = bufs[nbufs - 1].buf;
110 static int buf_iseval(void)
112 int i;
113 for (i = nbufs - 1; i >= 0; i--)
114 if (bufs[i].type == BUF_EVAL)
115 return 1;
116 return 0;
119 static size_t file_size(int fd)
121 struct stat st;
122 if (!fstat(fd, &st))
123 return st.st_size;
124 return 0;
127 static int include_file(char *path)
129 int fd = open(path, O_RDONLY);
130 int n = 0, nr = 0;
131 char *dat;
132 int size;
133 if (fd == -1)
134 return -1;
135 size = file_size(fd) + 1;
136 dat = malloc(size);
137 while ((n = read(fd, dat + nr, size - nr)) > 0)
138 nr += n;
139 close(fd);
140 dat[nr] = '\0';
141 buf_file(path, dat, nr);
142 return 0;
145 int cpp_init(char *path)
147 return include_file(path);
150 static int jumpws(void)
152 int old = cur;
153 while (cur < len && isspace(buf[cur]))
154 cur++;
155 return cur == old;
158 static void read_word(char *dst)
160 jumpws();
161 while (cur < len && (isalnum(buf[cur]) || buf[cur] == '_'))
162 *dst++ = buf[cur++];
163 *dst = '\0';
166 static int jumpcomment(void)
168 if (buf[cur] == '/' && buf[cur + 1] == '*') {
169 while (++cur < len) {
170 if (buf[cur] == '*' && buf[cur + 1] == '/') {
171 cur += 2;
172 return 0;
176 if (buf[cur] == '/' && buf[cur + 1] == '/') {
177 while (++cur < len && buf[cur] != '\n')
178 if (buf[cur] == '\\')
179 cur++;
180 return 0;
182 return 1;
185 static int jumpstr(void)
187 if (buf[cur] == '\'') {
188 while (++cur < len && buf[cur] != '\'')
189 if (buf[cur] == '\\')
190 cur++;
191 cur++;
192 return 0;
194 if (buf[cur] == '"') {
195 while (++cur < len && buf[cur] != '"')
196 if (buf[cur] == '\\')
197 cur++;
198 cur++;
199 return 0;
201 return 1;
204 static void read_tilleol(char *dst)
206 while (cur < len && isspace(buf[cur]) && buf[cur] != '\n')
207 cur++;
208 while (cur < len && buf[cur] != '\n') {
209 int last = cur;
210 if (buf[cur] == '\\' && buf[cur + 1] == '\n') {
211 cur += 2;
212 continue;
214 if (!jumpstr()) {
215 memcpy(dst, buf + last, cur - last);
216 dst += cur - last;
217 continue;
219 if (!jumpcomment())
220 continue;
221 *dst++ = buf[cur++];
223 *dst = '\0';
226 static char *locs[NLOCS] = {};
227 static int nlocs = 0;
229 void cpp_addpath(char *s)
231 locs[nlocs++] = s;
234 static int include_find(char *name, int std)
236 int i;
237 for (i = std ? nlocs - 1 : nlocs; i >= 0; i--) {
238 char path[1 << 10];
239 if (locs[i])
240 sprintf(path, "%s/%s", locs[i], name);
241 else
242 strcpy(path, name);
243 if (!include_file(path))
244 return 0;
246 return -1;
249 static void readarg(char *s)
251 int depth = 0;
252 int beg = cur;
253 while (cur < len && (depth || (buf[cur] != ',' && buf[cur] != ')'))) {
254 if (!jumpstr() || !jumpcomment())
255 continue;
256 switch (buf[cur++]) {
257 case '(':
258 case '[':
259 case '{':
260 depth++;
261 break;
262 case ')':
263 case ']':
264 case '}':
265 depth--;
266 break;
269 if (s) {
270 memcpy(s, buf + beg, cur - beg);
271 s[cur - beg] = '\0';
275 /* find a macro; if undef is nonzero, search #undef-ed macros too */
276 static int macro_find(char *name, int undef)
278 int i = mhead[(unsigned char) name[0]];
279 while (i > 0) {
280 if (!strcmp(name, macros[i].name))
281 if (!macros[i].undef || undef)
282 return i;
283 i = mnext[i];
285 return -1;
288 static void macro_undef(char *name)
290 int i = macro_find(name, 0);
291 if (i >= 0)
292 macros[i].undef = 1;
295 static int macro_new(char *name)
297 int i = macro_find(name, 1);
298 if (i >= 0)
299 return i;
300 if (mcount >= NDEFS)
301 die("nomem: NDEFS reached!\n");
302 i = mcount++;
303 strcpy(macros[i].name, name);
304 mnext[i] = mhead[(unsigned char) name[0]];
305 mhead[(unsigned char) name[0]] = i;
306 return i;
309 static void macro_define(void)
311 char name[NAMELEN];
312 struct macro *d;
313 read_word(name);
314 d = &macros[macro_new(name)];
315 d->isfunc = 0;
316 d->nargs = 0;
317 if (buf[cur] == '(') {
318 cur++;
319 jumpws();
320 while (cur < len && buf[cur] != ')') {
321 readarg(d->args[d->nargs++]);
322 jumpws();
323 if (buf[cur] != ',')
324 break;
325 cur++;
326 jumpws();
328 cur++;
329 d->isfunc = 1;
331 read_tilleol(d->def);
334 static char ebuf[MARGLEN];
335 static int elen;
336 static int ecur;
338 static long evalexpr(void);
340 static int cpp_eval(void)
342 char evalbuf[MARGLEN];
343 int old_limit;
344 int ret, clen;
345 char *cbuf;
346 read_tilleol(evalbuf);
347 buf_new(BUF_EVAL, evalbuf, strlen(evalbuf));
348 elen = 0;
349 ecur = 0;
350 old_limit = bufs_limit;
351 bufs_limit = nbufs;
352 while (!cpp_read(&cbuf, &clen)) {
353 memcpy(ebuf + elen, cbuf, clen);
354 elen += clen;
356 bufs_limit = old_limit;
357 ret = evalexpr();
358 buf_pop();
359 return ret;
362 static void jumpifs(int jumpelse)
364 int depth = 0;
365 while (cur < len) {
366 if (buf[cur] == '#') {
367 char cmd[NAMELEN];
368 cur++;
369 read_word(cmd);
370 if (!strcmp("else", cmd))
371 if (!depth && !jumpelse)
372 break;
373 if (!strcmp("elif", cmd))
374 if (!depth && !jumpelse && cpp_eval())
375 break;
376 if (!strcmp("endif", cmd)) {
377 if (!depth)
378 break;
379 else
380 depth--;
382 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
383 !strcmp("if", cmd))
384 depth++;
385 continue;
387 if (!jumpcomment())
388 continue;
389 if (!jumpstr())
390 continue;
391 cur++;
395 static int cpp_cmd(void)
397 char cmd[NAMELEN];
398 cur++;
399 read_word(cmd);
400 if (!strcmp("define", cmd)) {
401 macro_define();
402 return 0;
404 if (!strcmp("undef", cmd)) {
405 char name[NAMELEN];
406 read_word(name);
407 macro_undef(name);
408 return 0;
410 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
411 !strcmp("if", cmd)) {
412 char name[NAMELEN];
413 int matched = 0;
414 if (cmd[2]) {
415 int not = cmd[2] == 'n';
416 read_word(name);
417 matched = not ? macro_find(name, 0) < 0 :
418 macro_find(name, 0) >= 0;
419 } else {
420 matched = cpp_eval();
422 if (!matched)
423 jumpifs(0);
424 return 0;
426 if (!strcmp("else", cmd) || !strcmp("elif", cmd)) {
427 jumpifs(1);
428 return 0;
430 if (!strcmp("endif", cmd))
431 return 0;
432 if (!strcmp("include", cmd)) {
433 char file[NAMELEN];
434 char *s, *e;
435 jumpws();
436 s = buf + cur + 1;
437 e = strchr(buf + cur + 1, buf[cur] == '"' ? '"' : '>');
438 memcpy(file, s, e - s);
439 file[e - s] = '\0';
440 cur += e - s + 2;
441 if (include_find(file, *e == '>') == -1)
442 err("cannot include <%s>\n", file);
443 return 0;
445 err("unknown directive <%s>\n", cmd);
446 return 1;
449 static int macro_arg(struct macro *m, char *arg)
451 int i;
452 for (i = 0; i < m->nargs; i++)
453 if (!strcmp(arg, m->args[i]))
454 return i;
455 return -1;
458 static int buf_arg_find(char *name)
460 int i;
461 for (i = nbufs - 1; i >= 0; i--) {
462 struct buf *mbuf = &bufs[i];
463 struct macro *m = mbuf->macro;
464 if (mbuf->type == BUF_MACRO && macro_arg(m, name) >= 0)
465 return i;
466 if (mbuf->type == BUF_ARG)
467 i = mbuf->arg_buf;
469 return -1;
472 static void macro_expand(char *name)
474 struct macro *m;
475 int mbuf;
476 if ((mbuf = buf_arg_find(name)) >= 0) {
477 int arg = macro_arg(bufs[mbuf].macro, name);
478 char *dat = bufs[mbuf].args[arg];
479 buf_arg(dat, mbuf);
480 return;
482 m = &macros[macro_find(name, 0)];
483 if (!m->isfunc) {
484 buf_macro(m);
485 return;
487 jumpws();
488 if (buf[cur] == '(') {
489 int i = 0;
490 struct buf *mbuf = &bufs[nbufs];
491 cur++;
492 jumpws();
493 while (cur < len && buf[cur] != ')') {
494 readarg(mbuf->args[i++]);
495 jumpws();
496 if (buf[cur] != ',')
497 break;
498 cur++;
499 jumpws();
501 while (i < m->nargs)
502 mbuf->args[i++][0] = '\0';
503 cur++;
504 buf_macro(m);
508 static int buf_expanding(char *macro)
510 int i;
511 for (i = nbufs - 1; i >= 0; i--) {
512 if (bufs[i].type == BUF_ARG)
513 return 0;
514 if (bufs[i].type == BUF_MACRO &&
515 !strcmp(macro, bufs[i].macro->name))
516 return 1;
518 return 0;
521 /* return 1 for plain macros and arguments and 2 for function macros */
522 static int expandable(char *word)
524 int i;
525 if (buf_arg_find(word) >= 0)
526 return 1;
527 if (buf_expanding(word))
528 return 0;
529 i = macro_find(word, 0);
530 return i >= 0 ? macros[i].isfunc + 1 : 0;
533 void cpp_define(char *name, char *def)
535 char tmp_buf[MDEFLEN];
536 sprintf(tmp_buf, "%s\t%s", name, def);
537 buf_new(BUF_TEMP, tmp_buf, strlen(tmp_buf));
538 macro_define();
539 buf_pop();
542 static int seen_macro; /* seen a macro; 2 if a function macro */
543 static char seen_name[NAMELEN]; /* the name of the last macro */
545 static int hunk_off;
546 static int hunk_len;
548 int cpp_read(char **obuf, int *olen)
550 int old, end;
551 int jump_name = 0;
552 *olen = 0;
553 *obuf = "";
554 if (seen_macro == 1) {
555 macro_expand(seen_name);
556 seen_macro = 0;
558 if (cur == len) {
559 struct buf *cbuf = &bufs[nbufs - 1];
560 if (nbufs < bufs_limit + 1)
561 return -1;
562 if (cbuf->type == BUF_FILE)
563 free(buf);
564 buf_pop();
566 old = cur;
567 if (buf[cur] == '#')
568 if (!cpp_cmd())
569 return 0;
570 while (cur < len) {
571 if (!jumpws())
572 continue;
573 if (buf[cur] == '#')
574 break;
575 if (!jumpcomment())
576 continue;
577 if (seen_macro == 2) {
578 if (buf[cur] == '(')
579 macro_expand(seen_name);
580 seen_macro = 0;
581 old = cur;
582 continue;
584 if (!jumpstr())
585 continue;
586 if (isalnum(buf[cur]) || buf[cur] == '_') {
587 char word[NAMELEN];
588 read_word(word);
589 seen_macro = expandable(word);
590 if (seen_macro) {
591 strcpy(seen_name, word);
592 jump_name = 1;
593 break;
595 if (buf_iseval() && !strcmp("defined", word)) {
596 int parens = 0;
597 jumpws();
598 if (buf[cur] == '(') {
599 parens = 1;
600 cur++;
602 read_word(word);
603 if (parens) {
604 jumpws();
605 cur++;
608 continue;
610 cur++;
612 /* macros are expanded later; ignoring their names */
613 end = jump_name ? cur - strlen(seen_name) : cur;
614 if (!buf_iseval()) {
615 hunk_off += hunk_len;
616 hunk_len = end - old;
618 *obuf = buf + old;
619 *olen = end - old;
620 return 0;
623 /* preprocessor constant expression evaluation */
625 static char etok[NAMELEN];
626 static int enext;
628 static char *tok2[] = {
629 "<<", ">>", "&&", "||", "==", "!=", "<=", ">="
632 static int eval_tok(void)
634 char *s = etok;
635 int i;
636 while (ecur < elen) {
637 while (ecur < elen && isspace(ebuf[ecur]))
638 ecur++;
639 if (ebuf[ecur] == '/' && ebuf[ecur + 1] == '*') {
640 while (ecur < elen && (ebuf[ecur - 2] != '*' ||
641 ebuf[ecur - 1] != '/'))
642 ecur++;
643 continue;
645 break;
647 if (ecur >= elen)
648 return TOK_EOF;
649 if (isalpha(ebuf[ecur]) || ebuf[ecur] == '_') {
650 while (isalnum(ebuf[ecur]) || ebuf[ecur] == '_')
651 *s++ = ebuf[ecur++];
652 *s = '\0';
653 return TOK_NAME;
655 if (isdigit(ebuf[ecur])) {
656 while (isdigit(ebuf[ecur]))
657 *s++ = ebuf[ecur++];
658 while (tolower(ebuf[ecur]) == 'u' || tolower(ebuf[ecur]) == 'l')
659 ecur++;
660 *s = '\0';
661 return TOK_NUM;
663 for (i = 0; i < LEN(tok2); i++)
664 if (TOK2(tok2[i]) == TOK2(ebuf + ecur)) {
665 int ret = TOK2(tok2[i]);
666 ecur += 2;
667 return ret;
669 return ebuf[ecur++];
672 static int eval_see(void)
674 if (enext == -1)
675 enext = eval_tok();
676 return enext;
679 static int eval_get(void)
681 if (enext != -1) {
682 int ret = enext;
683 enext = -1;
684 return ret;
686 return eval_tok();
689 static long eval_num(void)
691 return atol(etok);
694 static int eval_jmp(int tok)
696 if (eval_see() == tok) {
697 eval_get();
698 return 0;
700 return 1;
703 static void eval_expect(int tok)
705 eval_jmp(tok);
708 static char *eval_id(void)
710 return etok;
713 static long evalcexpr(void);
715 static long evalatom(void)
717 if (!eval_jmp(TOK_NUM))
718 return eval_num();
719 if (!eval_jmp(TOK_NAME)) {
720 int parens = !eval_jmp('(');
721 long ret;
722 eval_expect(TOK_NAME);
723 ret = macro_find(eval_id(), 0) >= 0;
724 if (parens)
725 eval_expect(')');
726 return ret;
728 if (!eval_jmp('(')) {
729 long ret = evalcexpr();
730 eval_expect(')');
731 return ret;
733 return -1;
736 static long evalpre(void)
738 if (!eval_jmp('!'))
739 return !evalpre();
740 if (!eval_jmp('-'))
741 return -evalpre();
742 if (!eval_jmp('~'))
743 return ~evalpre();
744 return evalatom();
747 static long evalmul(void)
749 long ret = evalpre();
750 while (1) {
751 if (!eval_jmp('*')) {
752 ret *= evalpre();
753 continue;
755 if (!eval_jmp('/')) {
756 ret /= evalpre();
757 continue;
759 if (!eval_jmp('%')) {
760 ret %= evalpre();
761 continue;
763 break;
765 return ret;
768 static long evaladd(void)
770 long ret = evalmul();
771 while (1) {
772 if (!eval_jmp('+')) {
773 ret += evalmul();
774 continue;
776 if (!eval_jmp('-')) {
777 ret -= evalmul();
778 continue;
780 break;
782 return ret;
785 static long evalshift(void)
787 long ret = evaladd();
788 while (1) {
789 if (!eval_jmp(TOK2("<<"))) {
790 ret <<= evaladd();
791 continue;
793 if (!eval_jmp(TOK2(">>"))) {
794 ret >>= evaladd();
795 continue;
797 break;
799 return ret;
802 static long evalcmp(void)
804 long ret = evalshift();
805 while (1) {
806 if (!eval_jmp('<')) {
807 ret = ret < evalshift();
808 continue;
810 if (!eval_jmp('>')) {
811 ret = ret > evalshift();
812 continue;
814 if (!eval_jmp(TOK2("<="))) {
815 ret = ret <= evalshift();
816 continue;
818 if (!eval_jmp(TOK2(">="))) {
819 ret = ret >= evalshift();
820 continue;
822 break;
824 return ret;
827 static long evaleq(void)
829 long ret = evalcmp();
830 while (1) {
831 if (!eval_jmp(TOK2("=="))) {
832 ret = ret == evalcmp();
833 continue;
835 if (!eval_jmp(TOK2("!="))) {
836 ret = ret != evalcmp();
837 continue;
839 break;
841 return ret;
844 static long evalbitand(void)
846 long ret = evaleq();
847 while (!eval_jmp('&'))
848 ret &= evaleq();
849 return ret;
852 static long evalxor(void)
854 long ret = evalbitand();
855 while (!eval_jmp('^'))
856 ret ^= evalbitand();
857 return ret;
860 static long evalbitor(void)
862 long ret = evalxor();
863 while (!eval_jmp('|'))
864 ret |= evalxor();
865 return ret;
868 static long evaland(void)
870 long ret = evalbitor();
871 while (!eval_jmp(TOK2("&&")))
872 ret = ret && evalbitor();
873 return ret;
876 static long evalor(void)
878 long ret = evaland();
879 while (!eval_jmp(TOK2("||")))
880 ret = ret || evaland();
881 return ret;
884 static long evalcexpr(void)
886 long ret = evalor();
887 if (eval_jmp('?'))
888 return ret;
889 if (ret)
890 return evalor();
891 while (eval_get() != ':')
893 return evalor();
896 static long evalexpr(void)
898 enext = -1;
899 return evalcexpr();
902 static int buf_loc(char *s, int off)
904 char *e = s + off;
905 int n = 1;
906 while ((s = strchr(s, '\n')) && s < e) {
907 n++;
908 s++;
910 return n;
913 char *cpp_loc(long addr)
915 static char loc[256];
916 int line = -1;
917 int i;
918 for (i = nbufs - 1; i > 0; i--)
919 if (bufs[i].type == BUF_FILE)
920 break;
921 if (addr >= hunk_off && i == nbufs - 1)
922 line = buf_loc(buf, (cur - hunk_len) + (addr - hunk_off));
923 else
924 line = buf_loc(bufs[i].buf, bufs[i].cur);
925 sprintf(loc, "%s:%d", bufs[i].path, line);
926 return loc;