tok: handle // comments
[neatcc.git] / cpp.c
blobb382257b359f6d7be61930d5f08c0c878cec486c
1 #include <ctype.h>
2 #include <fcntl.h>
3 #include <stddef.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/stat.h>
10 #include "tok.h"
11 #include "tab.h"
13 static char *buf;
14 static int len;
15 static int cur;
17 #define MAXDEFS (1 << 12)
18 #define MACROLEN (1 << 10)
19 #define MAXARGS (1 << 5)
20 #define NBUCKET (MAXDEFS << 1)
22 static struct macro {
23 char name[NAMELEN];
24 char def[MACROLEN];
25 char args[MAXARGS][NAMELEN];
26 int nargs;
27 int isfunc;
28 } macros[MAXDEFS];
29 static int nmacros;
30 /* macro hash table */
31 static struct tab mtab;
33 #define MAXBUFS (1 << 5)
34 #define BUF_FILE 0
35 #define BUF_MACRO 1
36 #define BUF_ARG 2
37 #define BUF_EVAL 3
38 #define BUF_TEMP 4
40 static struct buf {
41 char *buf;
42 int len;
43 int cur;
44 int type;
45 /* for BUF_FILE */
46 char path[NAMELEN];
47 /* for BUF_MACRO */
48 struct macro *macro;
49 char args[MAXARGS][MACROLEN]; /* arguments passed to a macro */
50 /* for BUF_ARG */
51 int arg_buf; /* the bufs index of the owning macro */
52 } bufs[MAXBUFS];
53 static int nbufs;
55 void die(char *msg)
57 write(2, msg, strlen(msg));
58 exit(1);
61 static void buf_new(int type, char *dat, int dlen)
63 if (nbufs) {
64 bufs[nbufs - 1].buf = buf;
65 bufs[nbufs - 1].cur = cur;
66 bufs[nbufs - 1].len = len;
68 if (nbufs >= MAXBUFS)
69 die("nomem: MAXBUFS reached!\n");
70 nbufs++;
71 cur = 0;
72 buf = dat;
73 len = dlen;
74 bufs[nbufs - 1].type = type;
77 static void buf_file(char *path, char *dat, int dlen)
79 buf_new(BUF_FILE, dat, dlen);
80 strcpy(bufs[nbufs - 1].path, path ? path : "");
83 static void buf_macro(struct macro *m)
85 buf_new(BUF_MACRO, m->def, strlen(m->def));
86 bufs[nbufs - 1].macro = m;
89 static void buf_arg(char *arg, int mbuf)
91 buf_new(BUF_ARG, arg, strlen(arg));
92 bufs[nbufs - 1].arg_buf = mbuf;
95 static void buf_pop(void)
97 nbufs--;
98 if (nbufs) {
99 cur = bufs[nbufs - 1].cur;
100 len = bufs[nbufs - 1].len;
101 buf = bufs[nbufs - 1].buf;
105 static int buf_iseval(void)
107 int i;
108 for (i = nbufs - 1; i >= 0; i--)
109 if (bufs[i].type == BUF_EVAL)
110 return 1;
111 return 0;
114 static size_t file_size(int fd)
116 struct stat st;
117 if (!fstat(fd, &st))
118 return st.st_size;
119 return 0;
122 static int include_file(char *path)
124 int fd = open(path, O_RDONLY);
125 int n = 0, nr = 0;
126 char *dat;
127 int size;
128 if (fd == -1)
129 return -1;
130 size = file_size(fd) + 1;
131 dat = malloc(size);
132 while ((n = read(fd, dat + nr, size - nr)) > 0)
133 nr += n;
134 close(fd);
135 dat[nr] = '\0';
136 buf_file(path, dat, nr);
137 return 0;
140 int cpp_init(char *path)
142 cpp_define("__STDC__", "");
143 cpp_define("__arm__", "");
144 cpp_define("__linux__", "");
145 return include_file(path);
148 static void jumpws(void)
150 while (cur < len && isspace(buf[cur]))
151 cur++;
154 static void read_word(char *dst)
156 jumpws();
157 while (cur < len && (isalnum(buf[cur]) || buf[cur] == '_'))
158 *dst++ = buf[cur++];
159 *dst = '\0';
162 static int jumpcomment(void)
164 if (buf[cur] == '/' && buf[cur + 1] == '*') {
165 while (++cur < len) {
166 if (buf[cur] == '*' && buf[cur + 1] == '/') {
167 cur += 2;
168 return 0;
172 if (buf[cur] == '/' && buf[cur + 1] == '/') {
173 while (++cur < len) {
174 if (buf[cur] == '\n') {
175 cur++;
176 return 0;
180 return 1;
183 static void read_tilleol(char *dst)
185 while (cur < len && isspace(buf[cur]) && buf[cur] != '\n')
186 cur++;
187 while (cur < len && buf[cur] != '\n') {
188 if (buf[cur] == '\\' && buf[cur + 1] == '\n')
189 cur += 2;
190 else if (jumpcomment())
191 *dst++ = buf[cur++];
193 *dst = '\0';
196 static char *putstr(char *d, char *s)
198 while (*s)
199 *d++ = *s++;
200 *d = '\0';
201 return d;
204 #define MAXLOCS (1 << 10)
206 static char *locs[MAXLOCS] = {"/usr/include"};
207 static int nlocs = 1;
209 void cpp_addpath(char *s)
211 locs[nlocs++] = s;
214 static int include_find(char *name, int std)
216 int i;
217 for (i = std ? nlocs - 1 : nlocs; i >= 0; i--) {
218 char path[1 << 10];
219 char *s;
220 s = path;
221 if (locs[i]) {
222 s = putstr(s, locs[i]);
223 *s++ = '/';
225 s = putstr(s, name);
226 if (!include_file(path))
227 return 0;
229 return -1;
232 static int jumpstr(void)
234 if (buf[cur] == '\'') {
235 while (cur < len && buf[++cur] != '\'')
236 if (buf[cur] == '\\')
237 cur++;
238 cur++;
239 return 0;
241 if (buf[cur] == '"') {
242 while (cur < len && buf[++cur] != '"')
243 if (buf[cur] == '\\')
244 cur++;
245 cur++;
246 return 0;
248 return 1;
251 static void readarg(char *s)
253 int depth = 0;
254 int beg = cur;
255 while (cur < len && (depth || buf[cur] != ',' && buf[cur] != ')')) {
256 if (!jumpstr() || !jumpcomment())
257 continue;
258 switch (buf[cur]) {
259 case '(':
260 case '[':
261 case '{':
262 cur++;
263 depth++;
264 break;
265 case ')':
266 case ']':
267 case '}':
268 cur++;
269 depth--;
270 break;
271 default:
272 cur++;
275 if (s) {
276 memcpy(s, buf + beg, cur - beg);
277 s[cur - beg] = '\0';
281 static int macro_find(char *name)
283 char *n = tab_get(&mtab, name);
284 if (!n)
285 return -1;
286 return container(n, struct macro, name) - macros;
289 static void macro_undef(char *name)
291 int i = macro_find(name);
292 if (i >= 0)
293 tab_del(&mtab, macros[i].name);
296 static int macro_new(char *name)
298 int i = macro_find(name);
299 if (i >= 0)
300 return i;
301 if (nmacros >= MAXDEFS)
302 die("nomem: MAXDEFS reached!\n");
303 i = nmacros++;
304 strcpy(macros[i].name, name);
305 tab_add(&mtab, macros[i].name);
306 return i;
309 static void macro_define(void)
311 char name[NAMELEN];
312 struct macro *d;
313 read_word(name);
314 d = &macros[macro_new(name)];
315 d->isfunc = 0;
316 d->nargs = 0;
317 if (buf[cur] == '(') {
318 cur++;
319 jumpws();
320 while (cur < len && buf[cur] != ')') {
321 readarg(d->args[d->nargs++]);
322 jumpws();
323 if (buf[cur] != ',')
324 break;
325 cur++;
326 jumpws();
328 cur++;
329 d->isfunc = 1;
331 read_tilleol(d->def);
334 int cpp_read(char *buf);
336 static char ebuf[BUFSIZE];
337 static int elen;
338 static int ecur;
340 static long evalexpr(void);
342 static int cpp_eval(void)
344 int bufid;
345 int ret;
346 char evalbuf[BUFSIZE];
347 read_tilleol(evalbuf);
348 buf_new(BUF_EVAL, evalbuf, strlen(evalbuf));
349 bufid = nbufs;
350 elen = 0;
351 ecur = 0;
352 while (bufid < nbufs || (bufid == nbufs && cur < len))
353 elen += cpp_read(ebuf + elen);
354 ret = evalexpr();
355 buf_pop();
356 return ret;
359 static void jumpifs(int jumpelse)
361 int depth = 0;
362 while (cur < len) {
363 if (buf[cur] == '#') {
364 char cmd[NAMELEN];
365 cur++;
366 read_word(cmd);
367 if (!strcmp("else", cmd))
368 if (!depth && !jumpelse)
369 break;
370 if (!strcmp("elif", cmd))
371 if (!depth && !jumpelse && cpp_eval())
372 break;
373 if (!strcmp("endif", cmd)) {
374 if (!depth)
375 break;
376 else
377 depth--;
379 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
380 !strcmp("if", cmd))
381 depth++;
382 continue;
384 if (!jumpcomment())
385 continue;
386 if (!jumpstr())
387 continue;
388 cur++;
392 static int cpp_cmd(void)
394 char cmd[NAMELEN];
395 cur++;
396 read_word(cmd);
397 if (!strcmp("define", cmd)) {
398 macro_define();
399 return 0;
401 if (!strcmp("undef", cmd)) {
402 char name[NAMELEN];
403 read_word(name);
404 macro_undef(name);
405 return 0;
407 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
408 !strcmp("if", cmd)) {
409 char name[NAMELEN];
410 int matched = 0;
411 if (cmd[2]) {
412 int not = cmd[2] == 'n';
413 read_word(name);
414 matched = not ? macro_find(name) < 0 :
415 macro_find(name) >= 0;
416 } else {
417 matched = cpp_eval();
419 if (!matched)
420 jumpifs(0);
421 return 0;
423 if (!strcmp("else", cmd) || !strcmp("elif", cmd)) {
424 jumpifs(1);
425 return 0;
427 if (!strcmp("endif", cmd))
428 return 0;
429 if (!strcmp("include", cmd)) {
430 char file[NAMELEN];
431 char *s, *e;
432 jumpws();
433 s = buf + cur + 1;
434 e = strchr(buf + cur + 1, buf[cur] == '"' ? '"' : '>');
435 memcpy(file, s, e - s);
436 file[e - s] = '\0';
437 cur += e - s + 2;
438 if (include_find(file, *e == '>') == -1)
439 die("cannot include file\n");
440 return 0;
442 return 1;
445 static int macro_arg(struct macro *m, char *arg)
447 int i;
448 for (i = 0; i < m->nargs; i++)
449 if (!strcmp(arg, m->args[i]))
450 return i;
451 return -1;
454 static int buf_arg_find(char *name)
456 int i;
457 for (i = nbufs - 1; i >= 0; i--) {
458 struct buf *mbuf = &bufs[i];
459 struct macro *m = mbuf->macro;
460 if (mbuf->type == BUF_MACRO && macro_arg(m, name) >= 0)
461 return i;
462 if (mbuf->type == BUF_ARG)
463 i = mbuf->arg_buf;
465 return -1;
468 static void macro_expand(void)
470 char name[NAMELEN];
471 struct macro *m;
472 int mbuf;
473 read_word(name);
474 if ((mbuf = buf_arg_find(name)) >= 0) {
475 int arg = macro_arg(bufs[mbuf].macro, name);
476 char *dat = bufs[mbuf].args[arg];
477 buf_arg(dat, mbuf);
478 return;
480 m = &macros[macro_find(name)];
481 if (!m->isfunc) {
482 buf_macro(m);
483 return;
485 jumpws();
486 if (buf[cur] == '(') {
487 int i = 0;
488 struct buf *mbuf = &bufs[nbufs];
489 cur++;
490 jumpws();
491 while (cur < len && buf[cur] != ')') {
492 readarg(mbuf->args[i++]);
493 jumpws();
494 if (buf[cur] != ',')
495 break;
496 cur++;
497 jumpws();
499 while (i < m->nargs)
500 mbuf->args[i++][0] = '\0';
501 cur++;
502 buf_macro(m);
506 static int buf_expanding(char *macro)
508 int i;
509 for (i = nbufs - 1; i >= 0; i--) {
510 if (bufs[i].type == BUF_ARG)
511 return 0;
512 if (bufs[i].type == BUF_MACRO &&
513 !strcmp(macro, bufs[i].macro->name))
514 return 1;
516 return 0;
519 static int expandable(char *word)
521 if (buf_arg_find(word) >= 0)
522 return 1;
523 return !buf_expanding(word) && macro_find(word) != -1;
526 void cpp_define(char *name, char *def)
528 char tmp_buf[MACROLEN];
529 char *s = tmp_buf;
530 s = putstr(s, name);
531 *s++ = '\t';
532 s = putstr(s, def);
533 buf_new(BUF_TEMP, tmp_buf, s - tmp_buf);
534 macro_define();
535 buf_pop();
538 static int seen_macro;
540 static int hunk_off;
541 static int hunk_len;
543 int cpp_read(char *s)
545 int old;
546 if (seen_macro) {
547 seen_macro = 0;
548 macro_expand();
550 if (cur == len) {
551 struct buf *cbuf = &bufs[nbufs - 1];
552 if (nbufs < 2)
553 return -1;
554 if (cbuf->type & BUF_FILE)
555 free(buf);
556 buf_pop();
558 old = cur;
559 if (buf[cur] == '#')
560 if (!cpp_cmd())
561 return 0;
562 while (cur < len) {
563 if (buf[cur] == '#')
564 break;
565 if (!jumpcomment())
566 continue;
567 if (!jumpstr())
568 continue;
569 if (isalpha(buf[cur]) || buf[cur] == '_') {
570 char word[NAMELEN];
571 read_word(word);
572 if (expandable(word)) {
573 cur -= strlen(word);
574 seen_macro = 1;
575 break;
577 if (buf_iseval() && !strcmp("defined", word)) {
578 int parens = 0;
579 jumpws();
580 if (buf[cur] == '(') {
581 parens = 1;
582 cur++;
584 read_word(word);
585 if (parens) {
586 jumpws();
587 cur++;
590 continue;
592 cur++;
594 memcpy(s, buf + old, cur - old);
595 s[cur - old] = '\0';
596 if (!buf_iseval()) {
597 hunk_off += hunk_len;
598 hunk_len = cur - old;
600 return cur - old;
603 /* preprocessor constant expression evaluation */
605 static char etok[NAMELEN];
606 static int enext;
608 static char *tok2[] = {
609 "<<", ">>", "&&", "||", "==", "!=", "<=", ">="
612 static int eval_tok(void)
614 char *s = etok;
615 int i;
616 while (ecur < elen) {
617 while (ecur < elen && isspace(ebuf[ecur]))
618 ecur++;
619 if (ebuf[ecur] == '/' && ebuf[ecur + 1] == '*') {
620 while (ecur < elen && (ebuf[ecur - 2] != '*' ||
621 ebuf[ecur - 1] != '/'))
622 ecur++;
623 continue;
625 break;
627 if (ecur >= elen)
628 return TOK_EOF;
629 if (isalpha(ebuf[ecur]) || ebuf[ecur] == '_') {
630 while (isalnum(ebuf[ecur]) || ebuf[ecur] == '_')
631 *s++ = ebuf[ecur++];
632 *s = '\0';
633 return TOK_NAME;
635 if (isdigit(ebuf[ecur])) {
636 while (isdigit(ebuf[ecur]))
637 *s++ = ebuf[ecur++];
638 while (tolower(ebuf[ecur]) == 'u' || tolower(ebuf[ecur]) == 'l')
639 ecur++;
640 return TOK_NUM;
642 for (i = 0; i < ARRAY_SIZE(tok2); i++)
643 if (TOK2(tok2[i]) == TOK2(ebuf + ecur)) {
644 int ret = TOK2(tok2[i]);
645 ecur += 2;
646 return ret;
648 return ebuf[ecur++];
651 static int eval_see(void)
653 if (enext == -1)
654 enext = eval_tok();
655 return enext;
658 static int eval_get(void)
660 if (enext != -1) {
661 int ret = enext;
662 enext = -1;
663 return ret;
665 return eval_tok();
668 static long eval_num(void)
670 return atol(etok);
673 static int eval_jmp(int tok)
675 if (eval_see() == tok) {
676 eval_get();
677 return 0;
679 return 1;
682 static void eval_expect(int tok)
684 eval_jmp(tok);
687 static char *eval_id(void)
689 return etok;
692 static long evalcexpr(void);
694 static long evalatom(void)
696 if (!eval_jmp(TOK_NUM))
697 return eval_num();
698 if (!eval_jmp(TOK_NAME)) {
699 int parens = !eval_jmp('(');
700 long ret;
701 eval_expect(TOK_NAME);
702 ret = macro_find(eval_id()) >= 0;
703 if (parens)
704 eval_expect(')');
705 return ret;
707 if (!eval_jmp('(')) {
708 long ret = evalcexpr();
709 eval_expect(')');
710 return ret;
712 return -1;
715 static long evalpre(void)
717 if (!eval_jmp('!'))
718 return !evalpre();
719 if (!eval_jmp('-'))
720 return -evalpre();
721 if (!eval_jmp('~'))
722 return ~evalpre();
723 return evalatom();
726 static long evalmul(void)
728 long ret = evalpre();
729 while (1) {
730 if (!eval_jmp('*')) {
731 ret *= evalpre();
732 continue;
734 if (!eval_jmp('/')) {
735 ret /= evalpre();
736 continue;
738 if (!eval_jmp('%')) {
739 ret %= evalpre();
740 continue;
742 break;
744 return ret;
747 static long evaladd(void)
749 long ret = evalmul();
750 while (1) {
751 if (!eval_jmp('+')) {
752 ret += evalmul();
753 continue;
755 if (!eval_jmp('-')) {
756 ret -= evalmul();
757 continue;
759 break;
761 return ret;
764 static long evalshift(void)
766 long ret = evaladd();
767 while (1) {
768 if (!eval_jmp(TOK2("<<"))) {
769 ret <<= evaladd();
770 continue;
772 if (!eval_jmp(TOK2(">>"))) {
773 ret >>= evaladd();
774 continue;
776 break;
778 return ret;
781 static long evalcmp(void)
783 long ret = evalshift();
784 while (1) {
785 if (!eval_jmp('<')) {
786 ret = ret < evalshift();
787 continue;
789 if (!eval_jmp('>')) {
790 ret = ret > evalshift();
791 continue;
793 if (!eval_jmp(TOK2("<="))) {
794 ret = ret <= evalshift();
795 continue;
797 if (!eval_jmp(TOK2(">="))) {
798 ret = ret >= evalshift();
799 continue;
801 break;
803 return ret;
806 static long evaleq(void)
808 long ret = evalcmp();
809 while (1) {
810 if (!eval_jmp(TOK2("=="))) {
811 ret = ret == evalcmp();
812 continue;
814 if (!eval_jmp(TOK2("!="))) {
815 ret = ret != evalcmp();
816 continue;
818 break;
820 return ret;
823 static long evalbitand(void)
825 long ret = evaleq();
826 while (!eval_jmp('&'))
827 ret &= evaleq();
828 return ret;
831 static long evalxor(void)
833 long ret = evalbitand();
834 while (!eval_jmp('^'))
835 ret ^= evalbitand();
836 return ret;
839 static long evalbitor(void)
841 long ret = evalxor();
842 while (!eval_jmp('|'))
843 ret |= evalxor();
844 return ret;
847 static long evaland(void)
849 long ret = evalbitor();
850 while (!eval_jmp(TOK2("&&")))
851 ret = ret && evalbitor();
852 return ret;
855 static long evalor(void)
857 long ret = evaland();
858 while (!eval_jmp(TOK2("||")))
859 ret = ret || evaland();
860 return ret;
863 static long evalcexpr(void)
865 long ret = evalor();
866 if (eval_jmp('?'))
867 return ret;
868 if (ret)
869 return evalor();
870 while (eval_get() != ':')
872 return evalor();
875 static long evalexpr(void)
877 enext = -1;
878 return evalcexpr();
881 static int buf_loc(char *s, int off)
883 char *e = s + off;
884 int n = 1;
885 while ((s = strchr(s, '\n')) && s < e) {
886 n++;
887 s++;
889 return n;
892 int cpp_loc(char *s, long addr)
894 int line = -1;
895 int i;
896 for (i = nbufs - 1; i > 0; i--)
897 if (bufs[i].type == BUF_FILE)
898 break;
899 if (addr >= hunk_off && i == nbufs - 1)
900 line = buf_loc(buf, (cur - hunk_len) + (addr - hunk_off));
901 else
902 line = buf_loc(bufs[i].buf, bufs[i].cur);
903 sprintf(s, "%s:%d: ", bufs[i].path, line);
904 return strlen(s);