out: exit if there is no room for more relocations or symbols
[neatcc.git] / cpp.c
blob19d49235115090d71e31c77b2cc649db26fcdaa1
1 /* neatcc preprocessor */
2 #include <ctype.h>
3 #include <fcntl.h>
4 #include <stdarg.h>
5 #include <stddef.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <unistd.h>
10 #include <sys/types.h>
11 #include <sys/stat.h>
12 #include "mem.h"
13 #include "ncc.h"
14 #include "tok.h"
16 static char *buf;
17 static int len;
18 static int cur;
20 static struct macro {
21 char name[NAMELEN]; /* macro name */
22 char def[MDEFLEN]; /* macro definition */
23 char args[NARGS][NAMELEN];
24 int nargs; /* number of arguments */
25 int isfunc; /* macro is a function */
26 int undef; /* macro is removed */
27 } macros[NDEFS];
28 static int mcount = 1; /* number of macros */
29 static int mhead[256]; /* macro hash table heads */
30 static int mnext[NDEFS]; /* macro hash table next entries */
32 #define BUF_FILE 0
33 #define BUF_MACRO 1
34 #define BUF_ARG 2
35 #define BUF_EVAL 3
36 #define BUF_TEMP 4
38 /* preprocessing input buffers for files, macros and macro arguments */
39 static struct buf {
40 char *buf;
41 int len;
42 int cur;
43 int type;
44 /* for BUF_FILE */
45 char path[NAMELEN];
46 /* for BUF_MACRO */
47 struct macro *macro;
48 char args[NARGS][MARGLEN]; /* arguments passed to a macro */
49 /* for BUF_ARG */
50 int arg_buf; /* the bufs index of the owning macro */
51 } bufs[NBUFS];
52 static int nbufs;
53 static int bufs_limit = 1; /* cpp_read() limit; useful in cpp_eval() */
55 void die(char *fmt, ...)
57 va_list ap;
58 char msg[512];
59 va_start(ap, fmt);
60 vsprintf(msg, fmt, ap);
61 va_end(ap);
62 write(2, msg, strlen(msg));
63 exit(1);
66 static void buf_new(int type, char *dat, int dlen)
68 if (nbufs) {
69 bufs[nbufs - 1].buf = buf;
70 bufs[nbufs - 1].cur = cur;
71 bufs[nbufs - 1].len = len;
73 if (nbufs >= NBUFS)
74 die("nomem: NBUFS reached!\n");
75 nbufs++;
76 cur = 0;
77 buf = dat;
78 len = dlen;
79 bufs[nbufs - 1].type = type;
82 static void buf_file(char *path, char *dat, int dlen)
84 buf_new(BUF_FILE, dat, dlen);
85 strcpy(bufs[nbufs - 1].path, path ? path : "");
88 static void buf_macro(struct macro *m)
90 buf_new(BUF_MACRO, m->def, strlen(m->def));
91 bufs[nbufs - 1].macro = m;
94 static void buf_arg(char *arg, int mbuf)
96 buf_new(BUF_ARG, arg, strlen(arg));
97 bufs[nbufs - 1].arg_buf = mbuf;
100 static void buf_pop(void)
102 nbufs--;
103 if (nbufs) {
104 cur = bufs[nbufs - 1].cur;
105 len = bufs[nbufs - 1].len;
106 buf = bufs[nbufs - 1].buf;
110 static int buf_iseval(void)
112 int i;
113 for (i = nbufs - 1; i >= 0; i--)
114 if (bufs[i].type == BUF_EVAL)
115 return 1;
116 return 0;
119 static size_t file_size(int fd)
121 struct stat st;
122 if (!fstat(fd, &st))
123 return st.st_size;
124 return 0;
127 static int include_file(char *path)
129 int fd = open(path, O_RDONLY);
130 int n = 0, nr = 0;
131 char *dat;
132 int size;
133 if (fd == -1)
134 return -1;
135 size = file_size(fd) + 1;
136 dat = malloc(size);
137 while ((n = read(fd, dat + nr, size - nr)) > 0)
138 nr += n;
139 close(fd);
140 dat[nr] = '\0';
141 buf_file(path, dat, nr);
142 return 0;
145 int cpp_init(char *path)
147 return include_file(path);
150 static int jumpws(void)
152 int old = cur;
153 while (cur < len && isspace(buf[cur]))
154 cur++;
155 return cur == old;
158 static void read_word(char *dst)
160 jumpws();
161 while (cur < len && (isalnum(buf[cur]) || buf[cur] == '_'))
162 *dst++ = buf[cur++];
163 *dst = '\0';
166 static int jumpcomment(void)
168 if (buf[cur] == '/' && buf[cur + 1] == '*') {
169 while (++cur < len) {
170 if (buf[cur] == '*' && buf[cur + 1] == '/') {
171 cur += 2;
172 return 0;
176 if (buf[cur] == '/' && buf[cur + 1] == '/') {
177 while (++cur < len)
178 if (buf[cur] == '\n')
179 break;
180 return 0;
182 return 1;
185 static int jumpstr(void)
187 if (buf[cur] == '\'') {
188 while (cur < len && buf[++cur] != '\'')
189 if (buf[cur] == '\\')
190 cur++;
191 cur++;
192 return 0;
194 if (buf[cur] == '"') {
195 while (cur < len && buf[++cur] != '"')
196 if (buf[cur] == '\\')
197 cur++;
198 cur++;
199 return 0;
201 return 1;
204 static void read_tilleol(char *dst)
206 while (cur < len && isspace(buf[cur]) && buf[cur] != '\n')
207 cur++;
208 while (cur < len && buf[cur] != '\n') {
209 int last = cur;
210 if (buf[cur] == '\\' && buf[cur + 1] == '\n') {
211 cur += 2;
212 continue;
214 if (!jumpstr()) {
215 memcpy(dst, buf + last, cur - last);
216 dst += cur - last;
217 continue;
219 if (!jumpcomment())
220 continue;
221 *dst++ = buf[cur++];
223 *dst = '\0';
226 static char *locs[NLOCS] = {};
227 static int nlocs = 0;
229 void cpp_addpath(char *s)
231 locs[nlocs++] = s;
234 static int include_find(char *name, int std)
236 int i;
237 for (i = std ? nlocs - 1 : nlocs; i >= 0; i--) {
238 char path[1 << 10];
239 if (locs[i])
240 sprintf(path, "%s/%s", locs[i], name);
241 else
242 strcpy(path, name);
243 if (!include_file(path))
244 return 0;
246 return -1;
249 static void readarg(char *s)
251 int depth = 0;
252 int beg = cur;
253 while (cur < len && (depth || (buf[cur] != ',' && buf[cur] != ')'))) {
254 if (!jumpstr() || !jumpcomment())
255 continue;
256 switch (buf[cur++]) {
257 case '(':
258 case '[':
259 case '{':
260 depth++;
261 break;
262 case ')':
263 case ']':
264 case '}':
265 depth--;
266 break;
269 if (s) {
270 memcpy(s, buf + beg, cur - beg);
271 s[cur - beg] = '\0';
275 /* find a macro; if undef is nonzero, search #undef-ed macros too */
276 static int macro_find(char *name, int undef)
278 int i = mhead[(unsigned char) name[0]];
279 while (i > 0) {
280 if (!strcmp(name, macros[i].name))
281 if (!macros[i].undef || undef)
282 return i;
283 i = mnext[i];
285 return -1;
288 static void macro_undef(char *name)
290 int i = macro_find(name, 0);
291 if (i >= 0)
292 macros[i].undef = 1;
295 static int macro_new(char *name)
297 int i = macro_find(name, 1);
298 if (i >= 0)
299 return i;
300 if (mcount >= NDEFS)
301 die("nomem: NDEFS reached!\n");
302 i = mcount++;
303 strcpy(macros[i].name, name);
304 mnext[i] = mhead[(unsigned char) name[0]];
305 mhead[(unsigned char) name[0]] = i;
306 return i;
309 static void macro_define(void)
311 char name[NAMELEN];
312 struct macro *d;
313 read_word(name);
314 d = &macros[macro_new(name)];
315 d->isfunc = 0;
316 d->nargs = 0;
317 if (buf[cur] == '(') {
318 cur++;
319 jumpws();
320 while (cur < len && buf[cur] != ')') {
321 readarg(d->args[d->nargs++]);
322 jumpws();
323 if (buf[cur] != ',')
324 break;
325 cur++;
326 jumpws();
328 cur++;
329 d->isfunc = 1;
331 read_tilleol(d->def);
334 static char ebuf[MARGLEN];
335 static int elen;
336 static int ecur;
338 static long evalexpr(void);
340 static int cpp_eval(void)
342 char evalbuf[MARGLEN];
343 int old_limit;
344 int ret, clen;
345 char *cbuf;
346 read_tilleol(evalbuf);
347 buf_new(BUF_EVAL, evalbuf, strlen(evalbuf));
348 elen = 0;
349 ecur = 0;
350 old_limit = bufs_limit;
351 bufs_limit = nbufs;
352 while (!cpp_read(&cbuf, &clen)) {
353 memcpy(ebuf + elen, cbuf, clen);
354 elen += clen;
356 bufs_limit = old_limit;
357 ret = evalexpr();
358 buf_pop();
359 return ret;
362 static void jumpifs(int jumpelse)
364 int depth = 0;
365 while (cur < len) {
366 if (buf[cur] == '#') {
367 char cmd[NAMELEN];
368 cur++;
369 read_word(cmd);
370 if (!strcmp("else", cmd))
371 if (!depth && !jumpelse)
372 break;
373 if (!strcmp("elif", cmd))
374 if (!depth && !jumpelse && cpp_eval())
375 break;
376 if (!strcmp("endif", cmd)) {
377 if (!depth)
378 break;
379 else
380 depth--;
382 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
383 !strcmp("if", cmd))
384 depth++;
385 continue;
387 if (!jumpcomment())
388 continue;
389 if (!jumpstr())
390 continue;
391 cur++;
395 static int cpp_cmd(void)
397 char cmd[NAMELEN];
398 cur++;
399 read_word(cmd);
400 if (!strcmp("define", cmd)) {
401 macro_define();
402 return 0;
404 if (!strcmp("undef", cmd)) {
405 char name[NAMELEN];
406 read_word(name);
407 macro_undef(name);
408 return 0;
410 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
411 !strcmp("if", cmd)) {
412 char name[NAMELEN];
413 int matched = 0;
414 if (cmd[2]) {
415 int not = cmd[2] == 'n';
416 read_word(name);
417 matched = not ? macro_find(name, 0) < 0 :
418 macro_find(name, 0) >= 0;
419 } else {
420 matched = cpp_eval();
422 if (!matched)
423 jumpifs(0);
424 return 0;
426 if (!strcmp("else", cmd) || !strcmp("elif", cmd)) {
427 jumpifs(1);
428 return 0;
430 if (!strcmp("endif", cmd))
431 return 0;
432 if (!strcmp("include", cmd)) {
433 char file[NAMELEN];
434 char *s, *e;
435 jumpws();
436 s = buf + cur + 1;
437 e = strchr(buf + cur + 1, buf[cur] == '"' ? '"' : '>');
438 memcpy(file, s, e - s);
439 file[e - s] = '\0';
440 cur += e - s + 2;
441 if (include_find(file, *e == '>') == -1)
442 err("cannot include <%s>\n", file);
443 return 0;
445 return 1;
448 static int macro_arg(struct macro *m, char *arg)
450 int i;
451 for (i = 0; i < m->nargs; i++)
452 if (!strcmp(arg, m->args[i]))
453 return i;
454 return -1;
457 static int buf_arg_find(char *name)
459 int i;
460 for (i = nbufs - 1; i >= 0; i--) {
461 struct buf *mbuf = &bufs[i];
462 struct macro *m = mbuf->macro;
463 if (mbuf->type == BUF_MACRO && macro_arg(m, name) >= 0)
464 return i;
465 if (mbuf->type == BUF_ARG)
466 i = mbuf->arg_buf;
468 return -1;
471 static void macro_expand(char *name)
473 struct macro *m;
474 int mbuf;
475 if ((mbuf = buf_arg_find(name)) >= 0) {
476 int arg = macro_arg(bufs[mbuf].macro, name);
477 char *dat = bufs[mbuf].args[arg];
478 buf_arg(dat, mbuf);
479 return;
481 m = &macros[macro_find(name, 0)];
482 if (!m->isfunc) {
483 buf_macro(m);
484 return;
486 jumpws();
487 if (buf[cur] == '(') {
488 int i = 0;
489 struct buf *mbuf = &bufs[nbufs];
490 cur++;
491 jumpws();
492 while (cur < len && buf[cur] != ')') {
493 readarg(mbuf->args[i++]);
494 jumpws();
495 if (buf[cur] != ',')
496 break;
497 cur++;
498 jumpws();
500 while (i < m->nargs)
501 mbuf->args[i++][0] = '\0';
502 cur++;
503 buf_macro(m);
507 static int buf_expanding(char *macro)
509 int i;
510 for (i = nbufs - 1; i >= 0; i--) {
511 if (bufs[i].type == BUF_ARG)
512 return 0;
513 if (bufs[i].type == BUF_MACRO &&
514 !strcmp(macro, bufs[i].macro->name))
515 return 1;
517 return 0;
520 /* return 1 for plain macros and arguments and 2 for function macros */
521 static int expandable(char *word)
523 int i;
524 if (buf_arg_find(word) >= 0)
525 return 1;
526 if (buf_expanding(word))
527 return 0;
528 i = macro_find(word, 0);
529 return i >= 0 ? macros[i].isfunc + 1 : 0;
532 void cpp_define(char *name, char *def)
534 char tmp_buf[MDEFLEN];
535 sprintf(tmp_buf, "%s\t%s", name, def);
536 buf_new(BUF_TEMP, tmp_buf, strlen(tmp_buf));
537 macro_define();
538 buf_pop();
541 static int seen_macro; /* seen a macro; 2 if a function macro */
542 static char seen_name[NAMELEN]; /* the name of the last macro */
544 static int hunk_off;
545 static int hunk_len;
547 int cpp_read(char **obuf, int *olen)
549 int old, end;
550 int jump_name = 0;
551 *olen = 0;
552 *obuf = "";
553 if (seen_macro == 1) {
554 macro_expand(seen_name);
555 seen_macro = 0;
557 if (cur == len) {
558 struct buf *cbuf = &bufs[nbufs - 1];
559 if (nbufs < bufs_limit + 1)
560 return -1;
561 if (cbuf->type == BUF_FILE)
562 free(buf);
563 buf_pop();
565 old = cur;
566 if (buf[cur] == '#')
567 if (!cpp_cmd())
568 return 0;
569 while (cur < len) {
570 if (!jumpws())
571 continue;
572 if (buf[cur] == '#')
573 break;
574 if (!jumpcomment())
575 continue;
576 if (seen_macro == 2) {
577 if (buf[cur] == '(')
578 macro_expand(seen_name);
579 seen_macro = 0;
580 old = cur;
581 continue;
583 if (!jumpstr())
584 continue;
585 if (isalnum(buf[cur]) || buf[cur] == '_') {
586 char word[NAMELEN];
587 read_word(word);
588 seen_macro = expandable(word);
589 if (seen_macro) {
590 strcpy(seen_name, word);
591 jump_name = 1;
592 break;
594 if (buf_iseval() && !strcmp("defined", word)) {
595 int parens = 0;
596 jumpws();
597 if (buf[cur] == '(') {
598 parens = 1;
599 cur++;
601 read_word(word);
602 if (parens) {
603 jumpws();
604 cur++;
607 continue;
609 cur++;
611 /* macros are expanded later; ignoring their names */
612 end = jump_name ? cur - strlen(seen_name) : cur;
613 if (!buf_iseval()) {
614 hunk_off += hunk_len;
615 hunk_len = end - old;
617 *obuf = buf + old;
618 *olen = end - old;
619 return 0;
622 /* preprocessor constant expression evaluation */
624 static char etok[NAMELEN];
625 static int enext;
627 static char *tok2[] = {
628 "<<", ">>", "&&", "||", "==", "!=", "<=", ">="
631 static int eval_tok(void)
633 char *s = etok;
634 int i;
635 while (ecur < elen) {
636 while (ecur < elen && isspace(ebuf[ecur]))
637 ecur++;
638 if (ebuf[ecur] == '/' && ebuf[ecur + 1] == '*') {
639 while (ecur < elen && (ebuf[ecur - 2] != '*' ||
640 ebuf[ecur - 1] != '/'))
641 ecur++;
642 continue;
644 break;
646 if (ecur >= elen)
647 return TOK_EOF;
648 if (isalpha(ebuf[ecur]) || ebuf[ecur] == '_') {
649 while (isalnum(ebuf[ecur]) || ebuf[ecur] == '_')
650 *s++ = ebuf[ecur++];
651 *s = '\0';
652 return TOK_NAME;
654 if (isdigit(ebuf[ecur])) {
655 while (isdigit(ebuf[ecur]))
656 *s++ = ebuf[ecur++];
657 while (tolower(ebuf[ecur]) == 'u' || tolower(ebuf[ecur]) == 'l')
658 ecur++;
659 *s = '\0';
660 return TOK_NUM;
662 for (i = 0; i < LEN(tok2); i++)
663 if (TOK2(tok2[i]) == TOK2(ebuf + ecur)) {
664 int ret = TOK2(tok2[i]);
665 ecur += 2;
666 return ret;
668 return ebuf[ecur++];
671 static int eval_see(void)
673 if (enext == -1)
674 enext = eval_tok();
675 return enext;
678 static int eval_get(void)
680 if (enext != -1) {
681 int ret = enext;
682 enext = -1;
683 return ret;
685 return eval_tok();
688 static long eval_num(void)
690 return atol(etok);
693 static int eval_jmp(int tok)
695 if (eval_see() == tok) {
696 eval_get();
697 return 0;
699 return 1;
702 static void eval_expect(int tok)
704 eval_jmp(tok);
707 static char *eval_id(void)
709 return etok;
712 static long evalcexpr(void);
714 static long evalatom(void)
716 if (!eval_jmp(TOK_NUM))
717 return eval_num();
718 if (!eval_jmp(TOK_NAME)) {
719 int parens = !eval_jmp('(');
720 long ret;
721 eval_expect(TOK_NAME);
722 ret = macro_find(eval_id(), 0) >= 0;
723 if (parens)
724 eval_expect(')');
725 return ret;
727 if (!eval_jmp('(')) {
728 long ret = evalcexpr();
729 eval_expect(')');
730 return ret;
732 return -1;
735 static long evalpre(void)
737 if (!eval_jmp('!'))
738 return !evalpre();
739 if (!eval_jmp('-'))
740 return -evalpre();
741 if (!eval_jmp('~'))
742 return ~evalpre();
743 return evalatom();
746 static long evalmul(void)
748 long ret = evalpre();
749 while (1) {
750 if (!eval_jmp('*')) {
751 ret *= evalpre();
752 continue;
754 if (!eval_jmp('/')) {
755 ret /= evalpre();
756 continue;
758 if (!eval_jmp('%')) {
759 ret %= evalpre();
760 continue;
762 break;
764 return ret;
767 static long evaladd(void)
769 long ret = evalmul();
770 while (1) {
771 if (!eval_jmp('+')) {
772 ret += evalmul();
773 continue;
775 if (!eval_jmp('-')) {
776 ret -= evalmul();
777 continue;
779 break;
781 return ret;
784 static long evalshift(void)
786 long ret = evaladd();
787 while (1) {
788 if (!eval_jmp(TOK2("<<"))) {
789 ret <<= evaladd();
790 continue;
792 if (!eval_jmp(TOK2(">>"))) {
793 ret >>= evaladd();
794 continue;
796 break;
798 return ret;
801 static long evalcmp(void)
803 long ret = evalshift();
804 while (1) {
805 if (!eval_jmp('<')) {
806 ret = ret < evalshift();
807 continue;
809 if (!eval_jmp('>')) {
810 ret = ret > evalshift();
811 continue;
813 if (!eval_jmp(TOK2("<="))) {
814 ret = ret <= evalshift();
815 continue;
817 if (!eval_jmp(TOK2(">="))) {
818 ret = ret >= evalshift();
819 continue;
821 break;
823 return ret;
826 static long evaleq(void)
828 long ret = evalcmp();
829 while (1) {
830 if (!eval_jmp(TOK2("=="))) {
831 ret = ret == evalcmp();
832 continue;
834 if (!eval_jmp(TOK2("!="))) {
835 ret = ret != evalcmp();
836 continue;
838 break;
840 return ret;
843 static long evalbitand(void)
845 long ret = evaleq();
846 while (!eval_jmp('&'))
847 ret &= evaleq();
848 return ret;
851 static long evalxor(void)
853 long ret = evalbitand();
854 while (!eval_jmp('^'))
855 ret ^= evalbitand();
856 return ret;
859 static long evalbitor(void)
861 long ret = evalxor();
862 while (!eval_jmp('|'))
863 ret |= evalxor();
864 return ret;
867 static long evaland(void)
869 long ret = evalbitor();
870 while (!eval_jmp(TOK2("&&")))
871 ret = ret && evalbitor();
872 return ret;
875 static long evalor(void)
877 long ret = evaland();
878 while (!eval_jmp(TOK2("||")))
879 ret = ret || evaland();
880 return ret;
883 static long evalcexpr(void)
885 long ret = evalor();
886 if (eval_jmp('?'))
887 return ret;
888 if (ret)
889 return evalor();
890 while (eval_get() != ':')
892 return evalor();
895 static long evalexpr(void)
897 enext = -1;
898 return evalcexpr();
901 static int buf_loc(char *s, int off)
903 char *e = s + off;
904 int n = 1;
905 while ((s = strchr(s, '\n')) && s < e) {
906 n++;
907 s++;
909 return n;
912 char *cpp_loc(long addr)
914 static char loc[256];
915 int line = -1;
916 int i;
917 for (i = nbufs - 1; i > 0; i--)
918 if (bufs[i].type == BUF_FILE)
919 break;
920 if (addr >= hunk_off && i == nbufs - 1)
921 line = buf_loc(buf, (cur - hunk_len) + (addr - hunk_off));
922 else
923 line = buf_loc(bufs[i].buf, bufs[i].cur);
924 sprintf(loc, "%s:%d", bufs[i].path, line);
925 return loc;