ncc: use label identifiers more carefully
[neatcc.git] / cpp.c
blob9419a9eeb8b4f40db36b1d3bfe85cdc4b9ef0855
1 #include <ctype.h>
2 #include <fcntl.h>
3 #include <stdarg.h>
4 #include <stddef.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include "mem.h"
12 #include "ncc.h"
13 #include "tok.h"
15 static char *buf;
16 static int len;
17 static int cur;
19 static struct macro {
20 char name[NAMELEN]; /* macro name */
21 char def[MDEFLEN]; /* macro definition */
22 char args[NARGS][NAMELEN];
23 int nargs; /* number of arguments */
24 int isfunc; /* macro is a function */
25 int undef; /* macro is removed */
26 } macros[NDEFS];
27 static int mcount = 1; /* number of macros */
28 static int mhead[256]; /* macro hash table heads */
29 static int mnext[NDEFS]; /* macro hash table next entries */
31 #define BUF_FILE 0
32 #define BUF_MACRO 1
33 #define BUF_ARG 2
34 #define BUF_EVAL 3
35 #define BUF_TEMP 4
37 static struct buf {
38 char *buf;
39 int len;
40 int cur;
41 int type;
42 /* for BUF_FILE */
43 char path[NAMELEN];
44 /* for BUF_MACRO */
45 struct macro *macro;
46 char args[NARGS][MARGLEN]; /* arguments passed to a macro */
47 /* for BUF_ARG */
48 int arg_buf; /* the bufs index of the owning macro */
49 } bufs[NBUFS];
50 static int nbufs;
51 static int bufs_limit = 1; /* cpp_read() limit; useful in cpp_eval() */
53 void die(char *fmt, ...)
55 va_list ap;
56 char msg[512];
57 va_start(ap, fmt);
58 vsprintf(msg, fmt, ap);
59 va_end(ap);
60 write(2, msg, strlen(msg));
61 exit(1);
64 static void buf_new(int type, char *dat, int dlen)
66 if (nbufs) {
67 bufs[nbufs - 1].buf = buf;
68 bufs[nbufs - 1].cur = cur;
69 bufs[nbufs - 1].len = len;
71 if (nbufs >= NBUFS)
72 die("nomem: NBUFS reached!\n");
73 nbufs++;
74 cur = 0;
75 buf = dat;
76 len = dlen;
77 bufs[nbufs - 1].type = type;
80 static void buf_file(char *path, char *dat, int dlen)
82 buf_new(BUF_FILE, dat, dlen);
83 strcpy(bufs[nbufs - 1].path, path ? path : "");
86 static void buf_macro(struct macro *m)
88 buf_new(BUF_MACRO, m->def, strlen(m->def));
89 bufs[nbufs - 1].macro = m;
92 static void buf_arg(char *arg, int mbuf)
94 buf_new(BUF_ARG, arg, strlen(arg));
95 bufs[nbufs - 1].arg_buf = mbuf;
98 static void buf_pop(void)
100 nbufs--;
101 if (nbufs) {
102 cur = bufs[nbufs - 1].cur;
103 len = bufs[nbufs - 1].len;
104 buf = bufs[nbufs - 1].buf;
108 static int buf_iseval(void)
110 int i;
111 for (i = nbufs - 1; i >= 0; i--)
112 if (bufs[i].type == BUF_EVAL)
113 return 1;
114 return 0;
117 static size_t file_size(int fd)
119 struct stat st;
120 if (!fstat(fd, &st))
121 return st.st_size;
122 return 0;
125 static int include_file(char *path)
127 int fd = open(path, O_RDONLY);
128 int n = 0, nr = 0;
129 char *dat;
130 int size;
131 if (fd == -1)
132 return -1;
133 size = file_size(fd) + 1;
134 dat = malloc(size);
135 while ((n = read(fd, dat + nr, size - nr)) > 0)
136 nr += n;
137 close(fd);
138 dat[nr] = '\0';
139 buf_file(path, dat, nr);
140 return 0;
143 int cpp_init(char *path)
145 return include_file(path);
148 static int jumpws(void)
150 int old = cur;
151 while (cur < len && isspace(buf[cur]))
152 cur++;
153 return cur == old;
156 static void read_word(char *dst)
158 jumpws();
159 while (cur < len && (isalnum(buf[cur]) || buf[cur] == '_'))
160 *dst++ = buf[cur++];
161 *dst = '\0';
164 static int jumpcomment(void)
166 if (buf[cur] == '/' && buf[cur + 1] == '*') {
167 while (++cur < len) {
168 if (buf[cur] == '*' && buf[cur + 1] == '/') {
169 cur += 2;
170 return 0;
174 if (buf[cur] == '/' && buf[cur + 1] == '/') {
175 while (++cur < len)
176 if (buf[cur] == '\n')
177 break;
178 return 0;
180 return 1;
183 static int jumpstr(void)
185 if (buf[cur] == '\'') {
186 while (cur < len && buf[++cur] != '\'')
187 if (buf[cur] == '\\')
188 cur++;
189 cur++;
190 return 0;
192 if (buf[cur] == '"') {
193 while (cur < len && buf[++cur] != '"')
194 if (buf[cur] == '\\')
195 cur++;
196 cur++;
197 return 0;
199 return 1;
202 static void read_tilleol(char *dst)
204 while (cur < len && isspace(buf[cur]) && buf[cur] != '\n')
205 cur++;
206 while (cur < len && buf[cur] != '\n') {
207 int last = cur;
208 if (buf[cur] == '\\' && buf[cur + 1] == '\n') {
209 cur += 2;
210 continue;
212 if (!jumpstr()) {
213 memcpy(dst, buf + last, cur - last);
214 dst += cur - last;
215 continue;
217 if (!jumpcomment())
218 continue;
219 *dst++ = buf[cur++];
221 *dst = '\0';
224 static char *locs[NLOCS] = {};
225 static int nlocs = 0;
227 void cpp_addpath(char *s)
229 locs[nlocs++] = s;
232 static int include_find(char *name, int std)
234 int i;
235 for (i = std ? nlocs - 1 : nlocs; i >= 0; i--) {
236 char path[1 << 10];
237 if (locs[i])
238 sprintf(path, "%s/%s", locs[i], name);
239 else
240 strcpy(path, name);
241 if (!include_file(path))
242 return 0;
244 return -1;
247 static void readarg(char *s)
249 int depth = 0;
250 int beg = cur;
251 while (cur < len && (depth || (buf[cur] != ',' && buf[cur] != ')'))) {
252 if (!jumpstr() || !jumpcomment())
253 continue;
254 switch (buf[cur++]) {
255 case '(':
256 case '[':
257 case '{':
258 depth++;
259 break;
260 case ')':
261 case ']':
262 case '}':
263 depth--;
264 break;
267 if (s) {
268 memcpy(s, buf + beg, cur - beg);
269 s[cur - beg] = '\0';
273 /* find a macro; if undef is nonzero, search #undef-ed macros too */
274 static int macro_find(char *name, int undef)
276 int i = mhead[(unsigned char) name[0]];
277 while (i > 0) {
278 if (!strcmp(name, macros[i].name))
279 if (!macros[i].undef || undef)
280 return i;
281 i = mnext[i];
283 return -1;
286 static void macro_undef(char *name)
288 int i = macro_find(name, 0);
289 if (i >= 0)
290 macros[i].undef = 1;
293 static int macro_new(char *name)
295 int i = macro_find(name, 1);
296 if (i >= 0)
297 return i;
298 if (mcount >= NDEFS)
299 die("nomem: NDEFS reached!\n");
300 i = mcount++;
301 strcpy(macros[i].name, name);
302 mnext[i] = mhead[(unsigned char) name[0]];
303 mhead[(unsigned char) name[0]] = i;
304 return i;
307 static void macro_define(void)
309 char name[NAMELEN];
310 struct macro *d;
311 read_word(name);
312 d = &macros[macro_new(name)];
313 d->isfunc = 0;
314 d->nargs = 0;
315 if (buf[cur] == '(') {
316 cur++;
317 jumpws();
318 while (cur < len && buf[cur] != ')') {
319 readarg(d->args[d->nargs++]);
320 jumpws();
321 if (buf[cur] != ',')
322 break;
323 cur++;
324 jumpws();
326 cur++;
327 d->isfunc = 1;
329 read_tilleol(d->def);
332 static char ebuf[MARGLEN];
333 static int elen;
334 static int ecur;
336 static long evalexpr(void);
338 static int cpp_eval(void)
340 char evalbuf[MARGLEN];
341 int old_limit;
342 int ret, clen;
343 char *cbuf;
344 read_tilleol(evalbuf);
345 buf_new(BUF_EVAL, evalbuf, strlen(evalbuf));
346 elen = 0;
347 ecur = 0;
348 old_limit = bufs_limit;
349 bufs_limit = nbufs;
350 while (!cpp_read(&cbuf, &clen)) {
351 memcpy(ebuf + elen, cbuf, clen);
352 elen += clen;
354 bufs_limit = old_limit;
355 ret = evalexpr();
356 buf_pop();
357 return ret;
360 static void jumpifs(int jumpelse)
362 int depth = 0;
363 while (cur < len) {
364 if (buf[cur] == '#') {
365 char cmd[NAMELEN];
366 cur++;
367 read_word(cmd);
368 if (!strcmp("else", cmd))
369 if (!depth && !jumpelse)
370 break;
371 if (!strcmp("elif", cmd))
372 if (!depth && !jumpelse && cpp_eval())
373 break;
374 if (!strcmp("endif", cmd)) {
375 if (!depth)
376 break;
377 else
378 depth--;
380 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
381 !strcmp("if", cmd))
382 depth++;
383 continue;
385 if (!jumpcomment())
386 continue;
387 if (!jumpstr())
388 continue;
389 cur++;
393 static int cpp_cmd(void)
395 char cmd[NAMELEN];
396 cur++;
397 read_word(cmd);
398 if (!strcmp("define", cmd)) {
399 macro_define();
400 return 0;
402 if (!strcmp("undef", cmd)) {
403 char name[NAMELEN];
404 read_word(name);
405 macro_undef(name);
406 return 0;
408 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
409 !strcmp("if", cmd)) {
410 char name[NAMELEN];
411 int matched = 0;
412 if (cmd[2]) {
413 int not = cmd[2] == 'n';
414 read_word(name);
415 matched = not ? macro_find(name, 0) < 0 :
416 macro_find(name, 0) >= 0;
417 } else {
418 matched = cpp_eval();
420 if (!matched)
421 jumpifs(0);
422 return 0;
424 if (!strcmp("else", cmd) || !strcmp("elif", cmd)) {
425 jumpifs(1);
426 return 0;
428 if (!strcmp("endif", cmd))
429 return 0;
430 if (!strcmp("include", cmd)) {
431 char file[NAMELEN];
432 char *s, *e;
433 jumpws();
434 s = buf + cur + 1;
435 e = strchr(buf + cur + 1, buf[cur] == '"' ? '"' : '>');
436 memcpy(file, s, e - s);
437 file[e - s] = '\0';
438 cur += e - s + 2;
439 if (include_find(file, *e == '>') == -1)
440 err("cannot include <%s>\n", file);
441 return 0;
443 return 1;
446 static int macro_arg(struct macro *m, char *arg)
448 int i;
449 for (i = 0; i < m->nargs; i++)
450 if (!strcmp(arg, m->args[i]))
451 return i;
452 return -1;
455 static int buf_arg_find(char *name)
457 int i;
458 for (i = nbufs - 1; i >= 0; i--) {
459 struct buf *mbuf = &bufs[i];
460 struct macro *m = mbuf->macro;
461 if (mbuf->type == BUF_MACRO && macro_arg(m, name) >= 0)
462 return i;
463 if (mbuf->type == BUF_ARG)
464 i = mbuf->arg_buf;
466 return -1;
469 static void macro_expand(char *name)
471 struct macro *m;
472 int mbuf;
473 if ((mbuf = buf_arg_find(name)) >= 0) {
474 int arg = macro_arg(bufs[mbuf].macro, name);
475 char *dat = bufs[mbuf].args[arg];
476 buf_arg(dat, mbuf);
477 return;
479 m = &macros[macro_find(name, 0)];
480 if (!m->isfunc) {
481 buf_macro(m);
482 return;
484 jumpws();
485 if (buf[cur] == '(') {
486 int i = 0;
487 struct buf *mbuf = &bufs[nbufs];
488 cur++;
489 jumpws();
490 while (cur < len && buf[cur] != ')') {
491 readarg(mbuf->args[i++]);
492 jumpws();
493 if (buf[cur] != ',')
494 break;
495 cur++;
496 jumpws();
498 while (i < m->nargs)
499 mbuf->args[i++][0] = '\0';
500 cur++;
501 buf_macro(m);
505 static int buf_expanding(char *macro)
507 int i;
508 for (i = nbufs - 1; i >= 0; i--) {
509 if (bufs[i].type == BUF_ARG)
510 return 0;
511 if (bufs[i].type == BUF_MACRO &&
512 !strcmp(macro, bufs[i].macro->name))
513 return 1;
515 return 0;
518 /* return 1 for plain macros and arguments and 2 for function macros */
519 static int expandable(char *word)
521 int i;
522 if (buf_arg_find(word) >= 0)
523 return 1;
524 if (buf_expanding(word))
525 return 0;
526 i = macro_find(word, 0);
527 return i >= 0 ? macros[i].isfunc + 1 : 0;
530 void cpp_define(char *name, char *def)
532 char tmp_buf[MDEFLEN];
533 sprintf(tmp_buf, "%s\t%s", name, def);
534 buf_new(BUF_TEMP, tmp_buf, strlen(tmp_buf));
535 macro_define();
536 buf_pop();
539 static int seen_macro; /* seen a macro; 2 if a function macro */
540 static char seen_name[NAMELEN]; /* the name of the last macro */
542 static int hunk_off;
543 static int hunk_len;
545 int cpp_read(char **obuf, int *olen)
547 int old, end;
548 int jump_name = 0;
549 *olen = 0;
550 *obuf = "";
551 if (seen_macro == 1) {
552 macro_expand(seen_name);
553 seen_macro = 0;
555 if (cur == len) {
556 struct buf *cbuf = &bufs[nbufs - 1];
557 if (nbufs < bufs_limit + 1)
558 return -1;
559 if (cbuf->type == BUF_FILE)
560 free(buf);
561 buf_pop();
563 old = cur;
564 if (buf[cur] == '#')
565 if (!cpp_cmd())
566 return 0;
567 while (cur < len) {
568 if (!jumpws())
569 continue;
570 if (buf[cur] == '#')
571 break;
572 if (!jumpcomment())
573 continue;
574 if (seen_macro == 2) {
575 if (buf[cur] == '(')
576 macro_expand(seen_name);
577 seen_macro = 0;
578 old = cur;
579 continue;
581 if (!jumpstr())
582 continue;
583 if (isalnum(buf[cur]) || buf[cur] == '_') {
584 char word[NAMELEN];
585 read_word(word);
586 seen_macro = expandable(word);
587 if (seen_macro) {
588 strcpy(seen_name, word);
589 jump_name = 1;
590 break;
592 if (buf_iseval() && !strcmp("defined", word)) {
593 int parens = 0;
594 jumpws();
595 if (buf[cur] == '(') {
596 parens = 1;
597 cur++;
599 read_word(word);
600 if (parens) {
601 jumpws();
602 cur++;
605 continue;
607 cur++;
609 /* macros are expanded later; ignoring their names */
610 end = jump_name ? cur - strlen(seen_name) : cur;
611 if (!buf_iseval()) {
612 hunk_off += hunk_len;
613 hunk_len = end - old;
615 *obuf = buf + old;
616 *olen = end - old;
617 return 0;
620 /* preprocessor constant expression evaluation */
622 static char etok[NAMELEN];
623 static int enext;
625 static char *tok2[] = {
626 "<<", ">>", "&&", "||", "==", "!=", "<=", ">="
629 static int eval_tok(void)
631 char *s = etok;
632 int i;
633 while (ecur < elen) {
634 while (ecur < elen && isspace(ebuf[ecur]))
635 ecur++;
636 if (ebuf[ecur] == '/' && ebuf[ecur + 1] == '*') {
637 while (ecur < elen && (ebuf[ecur - 2] != '*' ||
638 ebuf[ecur - 1] != '/'))
639 ecur++;
640 continue;
642 break;
644 if (ecur >= elen)
645 return TOK_EOF;
646 if (isalpha(ebuf[ecur]) || ebuf[ecur] == '_') {
647 while (isalnum(ebuf[ecur]) || ebuf[ecur] == '_')
648 *s++ = ebuf[ecur++];
649 *s = '\0';
650 return TOK_NAME;
652 if (isdigit(ebuf[ecur])) {
653 while (isdigit(ebuf[ecur]))
654 *s++ = ebuf[ecur++];
655 while (tolower(ebuf[ecur]) == 'u' || tolower(ebuf[ecur]) == 'l')
656 ecur++;
657 *s = '\0';
658 return TOK_NUM;
660 for (i = 0; i < LEN(tok2); i++)
661 if (TOK2(tok2[i]) == TOK2(ebuf + ecur)) {
662 int ret = TOK2(tok2[i]);
663 ecur += 2;
664 return ret;
666 return ebuf[ecur++];
669 static int eval_see(void)
671 if (enext == -1)
672 enext = eval_tok();
673 return enext;
676 static int eval_get(void)
678 if (enext != -1) {
679 int ret = enext;
680 enext = -1;
681 return ret;
683 return eval_tok();
686 static long eval_num(void)
688 return atol(etok);
691 static int eval_jmp(int tok)
693 if (eval_see() == tok) {
694 eval_get();
695 return 0;
697 return 1;
700 static void eval_expect(int tok)
702 eval_jmp(tok);
705 static char *eval_id(void)
707 return etok;
710 static long evalcexpr(void);
712 static long evalatom(void)
714 if (!eval_jmp(TOK_NUM))
715 return eval_num();
716 if (!eval_jmp(TOK_NAME)) {
717 int parens = !eval_jmp('(');
718 long ret;
719 eval_expect(TOK_NAME);
720 ret = macro_find(eval_id(), 0) >= 0;
721 if (parens)
722 eval_expect(')');
723 return ret;
725 if (!eval_jmp('(')) {
726 long ret = evalcexpr();
727 eval_expect(')');
728 return ret;
730 return -1;
733 static long evalpre(void)
735 if (!eval_jmp('!'))
736 return !evalpre();
737 if (!eval_jmp('-'))
738 return -evalpre();
739 if (!eval_jmp('~'))
740 return ~evalpre();
741 return evalatom();
744 static long evalmul(void)
746 long ret = evalpre();
747 while (1) {
748 if (!eval_jmp('*')) {
749 ret *= evalpre();
750 continue;
752 if (!eval_jmp('/')) {
753 ret /= evalpre();
754 continue;
756 if (!eval_jmp('%')) {
757 ret %= evalpre();
758 continue;
760 break;
762 return ret;
765 static long evaladd(void)
767 long ret = evalmul();
768 while (1) {
769 if (!eval_jmp('+')) {
770 ret += evalmul();
771 continue;
773 if (!eval_jmp('-')) {
774 ret -= evalmul();
775 continue;
777 break;
779 return ret;
782 static long evalshift(void)
784 long ret = evaladd();
785 while (1) {
786 if (!eval_jmp(TOK2("<<"))) {
787 ret <<= evaladd();
788 continue;
790 if (!eval_jmp(TOK2(">>"))) {
791 ret >>= evaladd();
792 continue;
794 break;
796 return ret;
799 static long evalcmp(void)
801 long ret = evalshift();
802 while (1) {
803 if (!eval_jmp('<')) {
804 ret = ret < evalshift();
805 continue;
807 if (!eval_jmp('>')) {
808 ret = ret > evalshift();
809 continue;
811 if (!eval_jmp(TOK2("<="))) {
812 ret = ret <= evalshift();
813 continue;
815 if (!eval_jmp(TOK2(">="))) {
816 ret = ret >= evalshift();
817 continue;
819 break;
821 return ret;
824 static long evaleq(void)
826 long ret = evalcmp();
827 while (1) {
828 if (!eval_jmp(TOK2("=="))) {
829 ret = ret == evalcmp();
830 continue;
832 if (!eval_jmp(TOK2("!="))) {
833 ret = ret != evalcmp();
834 continue;
836 break;
838 return ret;
841 static long evalbitand(void)
843 long ret = evaleq();
844 while (!eval_jmp('&'))
845 ret &= evaleq();
846 return ret;
849 static long evalxor(void)
851 long ret = evalbitand();
852 while (!eval_jmp('^'))
853 ret ^= evalbitand();
854 return ret;
857 static long evalbitor(void)
859 long ret = evalxor();
860 while (!eval_jmp('|'))
861 ret |= evalxor();
862 return ret;
865 static long evaland(void)
867 long ret = evalbitor();
868 while (!eval_jmp(TOK2("&&")))
869 ret = ret && evalbitor();
870 return ret;
873 static long evalor(void)
875 long ret = evaland();
876 while (!eval_jmp(TOK2("||")))
877 ret = ret || evaland();
878 return ret;
881 static long evalcexpr(void)
883 long ret = evalor();
884 if (eval_jmp('?'))
885 return ret;
886 if (ret)
887 return evalor();
888 while (eval_get() != ':')
890 return evalor();
893 static long evalexpr(void)
895 enext = -1;
896 return evalcexpr();
899 static int buf_loc(char *s, int off)
901 char *e = s + off;
902 int n = 1;
903 while ((s = strchr(s, '\n')) && s < e) {
904 n++;
905 s++;
907 return n;
910 char *cpp_loc(long addr)
912 static char loc[256];
913 int line = -1;
914 int i;
915 for (i = nbufs - 1; i > 0; i--)
916 if (bufs[i].type == BUF_FILE)
917 break;
918 if (addr >= hunk_off && i == nbufs - 1)
919 line = buf_loc(buf, (cur - hunk_len) + (addr - hunk_off));
920 else
921 line = buf_loc(bufs[i].buf, bufs[i].cur);
922 sprintf(loc, "%s:%d", bufs[i].path, line);
923 return loc;