arm: fixed minor typo
[neatcc.git] / cpp.c
blob7a096f7237628d436da4b349b2a154ab080728b8
1 /* neatcc preprocessor */
2 #include <ctype.h>
3 #include <fcntl.h>
4 #include <stdarg.h>
5 #include <stddef.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <unistd.h>
10 #include <sys/types.h>
11 #include <sys/stat.h>
12 #include "ncc.h"
14 static char *buf;
15 static long len;
16 static long cur;
18 static struct macro {
19 char name[NAMELEN]; /* macro name */
20 char def[MDEFLEN]; /* macro definition */
21 char args[NARGS][NAMELEN];
22 int nargs; /* number of arguments */
23 int isfunc; /* macro is a function */
24 int undef; /* macro is removed */
25 } macros[NDEFS];
26 static int mcount = 1; /* number of macros */
27 static int mhead[256]; /* macro hash table heads */
28 static int mnext[NDEFS]; /* macro hash table next entries */
30 #define BUF_FILE 0
31 #define BUF_MACRO 1
32 #define BUF_ARG 2
33 #define BUF_EVAL 3
34 #define BUF_TEMP 4
36 /* preprocessing input buffers for files, macros and macro arguments */
37 static struct buf {
38 char *buf;
39 long len;
40 long cur;
41 int type;
42 /* for BUF_FILE */
43 char path[NAMELEN];
44 /* for BUF_MACRO */
45 struct macro *macro;
46 char args[NARGS][MARGLEN]; /* arguments passed to a macro */
47 /* for BUF_ARG */
48 int arg_buf; /* the bufs index of the owning macro */
49 } bufs[NBUFS];
50 static int bufs_n;
51 static int bufs_limit = 0; /* cpp_read() limit; useful in cpp_eval() */
53 void die(char *fmt, ...)
55 va_list ap;
56 char msg[512];
57 va_start(ap, fmt);
58 vsprintf(msg, fmt, ap);
59 va_end(ap);
60 write(2, msg, strlen(msg));
61 exit(1);
64 static void buf_new(int type, char *dat, long dlen)
66 if (bufs_n) {
67 bufs[bufs_n - 1].buf = buf;
68 bufs[bufs_n - 1].cur = cur;
69 bufs[bufs_n - 1].len = len;
71 if (bufs_n >= NBUFS)
72 die("nomem: NBUFS reached!\n");
73 bufs_n++;
74 cur = 0;
75 buf = dat;
76 len = dlen;
77 bufs[bufs_n - 1].type = type;
80 static void buf_file(char *path, char *dat, int dlen)
82 buf_new(BUF_FILE, dat, dlen);
83 strcpy(bufs[bufs_n - 1].path, path ? path : "");
86 static int macro_arg(struct macro *m, char *arg);
88 static void buf_macro(struct macro *m)
90 struct mem mem;
91 char *s = m->def;
92 char arg[NAMELEN];
93 int len;
94 int quote = 0;
95 mem_init(&mem);
96 while (*s) {
97 int numsign = 0;
98 if (quote && s[0] == quote)
99 quote = 0;
100 else if (!quote && s[0] == '"')
101 quote = s[0];
102 else if (!quote && s[0] == '\'')
103 quote = s[0];
104 if (!quote && s[0] == '#')
105 numsign = s[1] == '#' ? 2 : 1;
106 if (numsign && s[numsign]) {
107 struct buf *mbuf = &bufs[bufs_n];
108 char *r = s + numsign;
109 char *d = arg;
110 while (*r && d - arg < sizeof(arg) - 1 &&
111 (isalnum((unsigned char) *r) || *r == '_'))
112 *d++ = *r++;
113 *d++ = '\0';
114 if (macro_arg(m, arg) >= 0) {
115 char *def = mbuf->args[macro_arg(m, arg)];
116 if (def && numsign == 1) {
117 mem_putc(&mem, '\"');
118 while (*def) {
119 if (*def == '\"')
120 mem_putc(&mem, '\\');
121 mem_putc(&mem, (unsigned char) *def++);
123 mem_putc(&mem, '\"');
124 s = r;
125 continue;
127 if (def && numsign == 2) {
128 while (*def)
129 mem_putc(&mem, (unsigned char) *def++);
130 s = r;
131 continue;
135 if (quote && s[0] == '\\')
136 mem_putc(&mem, (unsigned char) *s++);
137 if (s[0])
138 mem_putc(&mem, (unsigned char) *s++);
140 len = mem_len(&mem);
141 buf_new(BUF_MACRO, mem_get(&mem), len);
142 mem_done(&mem);
143 bufs[bufs_n - 1].macro = m;
146 static void buf_arg(char *arg, int mbuf)
148 buf_new(BUF_ARG, arg, strlen(arg));
149 bufs[bufs_n - 1].arg_buf = mbuf;
152 static void buf_pop(void)
154 bufs_n--;
155 if (bufs[bufs_n].type == BUF_FILE || bufs[bufs_n].type == BUF_MACRO)
156 free(buf);
157 if (bufs_n) {
158 cur = bufs[bufs_n - 1].cur;
159 len = bufs[bufs_n - 1].len;
160 buf = bufs[bufs_n - 1].buf;
164 static int buf_iseval(void)
166 int i;
167 for (i = bufs_n - 1; i >= 0; i--)
168 if (bufs[i].type == BUF_EVAL)
169 return 1;
170 return 0;
173 static size_t file_size(int fd)
175 struct stat st;
176 if (!fstat(fd, &st))
177 return st.st_size;
178 return 0;
181 static int include_file(char *path)
183 int fd = open(path, O_RDONLY);
184 int n = 0, nr = 0;
185 char *dat;
186 int size;
187 if (fd == -1)
188 return -1;
189 size = file_size(fd) + 1;
190 dat = malloc(size);
191 while ((n = read(fd, dat + nr, size - nr)) > 0)
192 nr += n;
193 close(fd);
194 dat[nr] = '\0';
195 buf_file(path, dat, nr);
196 return 0;
199 int cpp_init(char *path)
201 return include_file(path);
204 static int jumpws(void)
206 int old = cur;
207 while (cur < len && isspace(buf[cur]))
208 cur++;
209 return cur == old;
212 static void read_word(char *dst)
214 jumpws();
215 while (cur < len && (isalnum(buf[cur]) || buf[cur] == '_'))
216 *dst++ = buf[cur++];
217 *dst = '\0';
220 static int jumpcomment(void)
222 if (buf[cur] == '/' && buf[cur + 1] == '*') {
223 while (++cur < len) {
224 if (buf[cur] == '*' && buf[cur + 1] == '/') {
225 cur += 2;
226 return 0;
230 if (buf[cur] == '/' && buf[cur + 1] == '/') {
231 while (++cur < len && buf[cur] != '\n')
232 if (buf[cur] == '\\')
233 cur++;
234 return 0;
236 return 1;
239 static int jumpstr(void)
241 if (buf[cur] == '\'') {
242 while (++cur < len && buf[cur] != '\'')
243 if (buf[cur] == '\\')
244 cur++;
245 cur++;
246 return 0;
248 if (buf[cur] == '"') {
249 while (++cur < len && buf[cur] != '"')
250 if (buf[cur] == '\\')
251 cur++;
252 cur++;
253 return 0;
255 return 1;
258 static void read_tilleol(char *dst)
260 while (cur < len && isspace(buf[cur]) && buf[cur] != '\n')
261 cur++;
262 while (cur < len && buf[cur] != '\n') {
263 int last = cur;
264 if (buf[cur] == '\\' && buf[cur + 1] == '\n') {
265 cur += 2;
266 continue;
268 if (!jumpstr()) {
269 memcpy(dst, buf + last, cur - last);
270 dst += cur - last;
271 continue;
273 if (!jumpcomment())
274 continue;
275 *dst++ = buf[cur++];
277 *dst = '\0';
280 static char *locs[NLOCS] = {};
281 static int nlocs = 0;
283 /* header directory */
284 void cpp_path(char *s)
286 locs[nlocs++] = s;
289 static int include_find(char *name, int std)
291 int i;
292 for (i = std ? nlocs - 1 : nlocs; i >= 0; i--) {
293 char path[1 << 10];
294 if (locs[i])
295 sprintf(path, "%s/%s", locs[i], name);
296 else
297 strcpy(path, name);
298 if (!include_file(path))
299 return 0;
301 return -1;
304 static void readarg(char *s)
306 int depth = 0;
307 int beg = cur;
308 while (cur < len && (depth || (buf[cur] != ',' && buf[cur] != ')'))) {
309 if (!jumpstr() || !jumpcomment())
310 continue;
311 switch (buf[cur++]) {
312 case '(':
313 case '[':
314 case '{':
315 depth++;
316 break;
317 case ')':
318 case ']':
319 case '}':
320 depth--;
321 break;
324 if (s) {
325 memcpy(s, buf + beg, cur - beg);
326 s[cur - beg] = '\0';
330 /* find a macro; if undef is nonzero, search #undef-ed macros too */
331 static int macro_find(char *name, int undef)
333 int i = mhead[(unsigned char) name[0]];
334 while (i > 0) {
335 if (!strcmp(name, macros[i].name))
336 if (!macros[i].undef || undef)
337 return i;
338 i = mnext[i];
340 return -1;
343 static void macro_undef(char *name)
345 int i = macro_find(name, 0);
346 if (i >= 0)
347 macros[i].undef = 1;
350 static int macro_new(char *name)
352 int i = macro_find(name, 1);
353 if (i >= 0)
354 return i;
355 if (mcount >= NDEFS)
356 die("nomem: NDEFS reached!\n");
357 i = mcount++;
358 strcpy(macros[i].name, name);
359 mnext[i] = mhead[(unsigned char) name[0]];
360 mhead[(unsigned char) name[0]] = i;
361 return i;
364 static void macro_define(void)
366 char name[NAMELEN];
367 struct macro *d;
368 read_word(name);
369 d = &macros[macro_new(name)];
370 d->isfunc = 0;
371 d->nargs = 0;
372 d->undef = 0;
373 if (buf[cur] == '(') {
374 cur++;
375 jumpws();
376 while (cur < len && buf[cur] != ')') {
377 readarg(d->args[d->nargs++]);
378 jumpws();
379 if (buf[cur] != ',')
380 break;
381 cur++;
382 jumpws();
384 cur++;
385 d->isfunc = 1;
387 read_tilleol(d->def);
390 static char ebuf[MARGLEN];
391 static int elen;
392 static int ecur;
394 static long evalexpr(void);
396 static long cpp_eval(void)
398 char evalbuf[MARGLEN];
399 int old_limit;
400 long ret, clen;
401 char *cbuf;
402 read_tilleol(evalbuf);
403 buf_new(BUF_EVAL, evalbuf, strlen(evalbuf));
404 elen = 0;
405 ecur = 0;
406 old_limit = bufs_limit;
407 bufs_limit = bufs_n;
408 while (!cpp_read(&cbuf, &clen)) {
409 memcpy(ebuf + elen, cbuf, clen);
410 elen += clen;
412 bufs_limit = old_limit;
413 ret = evalexpr();
414 buf_pop();
415 return ret;
418 static void jumpifs(int jumpelse)
420 int depth = 0;
421 while (cur < len) {
422 if (buf[cur] == '#') {
423 char cmd[NAMELEN];
424 cur++;
425 read_word(cmd);
426 if (!strcmp("else", cmd))
427 if (!depth && !jumpelse)
428 break;
429 if (!strcmp("elif", cmd))
430 if (!depth && !jumpelse && cpp_eval())
431 break;
432 if (!strcmp("endif", cmd)) {
433 if (!depth)
434 break;
435 else
436 depth--;
438 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
439 !strcmp("if", cmd))
440 depth++;
441 continue;
443 if (!jumpcomment())
444 continue;
445 if (!jumpstr())
446 continue;
447 cur++;
451 static int cpp_cmd(void)
453 char cmd[NAMELEN];
454 cur++;
455 read_word(cmd);
456 if (!strcmp("define", cmd)) {
457 macro_define();
458 return 0;
460 if (!strcmp("undef", cmd)) {
461 char name[NAMELEN];
462 read_word(name);
463 macro_undef(name);
464 return 0;
466 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
467 !strcmp("if", cmd)) {
468 char name[NAMELEN];
469 int matched = 0;
470 if (cmd[2]) {
471 int not = cmd[2] == 'n';
472 read_word(name);
473 matched = not ? macro_find(name, 0) < 0 :
474 macro_find(name, 0) >= 0;
475 } else {
476 matched = cpp_eval();
478 if (!matched)
479 jumpifs(0);
480 return 0;
482 if (!strcmp("else", cmd) || !strcmp("elif", cmd)) {
483 jumpifs(1);
484 return 0;
486 if (!strcmp("endif", cmd))
487 return 0;
488 if (!strcmp("include", cmd)) {
489 char file[NAMELEN];
490 char *s, *e;
491 jumpws();
492 s = buf + cur + 1;
493 e = strchr(buf + cur + 1, buf[cur] == '"' ? '"' : '>');
494 memcpy(file, s, e - s);
495 file[e - s] = '\0';
496 cur += e - s + 2;
497 if (include_find(file, *e == '>') == -1)
498 err("cannot include <%s>\n", file);
499 return 0;
501 err("unknown directive <%s>\n", cmd);
502 return 1;
505 static int macro_arg(struct macro *m, char *arg)
507 int i;
508 for (i = 0; i < m->nargs; i++)
509 if (!strcmp(arg, m->args[i]))
510 return i;
511 return -1;
514 static int buf_arg_find(char *name)
516 int i;
517 for (i = bufs_n - 1; i >= 0; i--) {
518 struct buf *mbuf = &bufs[i];
519 struct macro *m = mbuf->macro;
520 if (mbuf->type == BUF_MACRO && macro_arg(m, name) >= 0)
521 return i;
522 if (mbuf->type == BUF_ARG)
523 i = mbuf->arg_buf;
525 return -1;
528 static void macro_expand(char *name)
530 struct macro *m;
531 int mbuf;
532 if ((mbuf = buf_arg_find(name)) >= 0) {
533 int arg = macro_arg(bufs[mbuf].macro, name);
534 char *dat = bufs[mbuf].args[arg];
535 buf_arg(dat, mbuf);
536 return;
538 m = &macros[macro_find(name, 0)];
539 if (!m->isfunc) {
540 buf_macro(m);
541 return;
543 jumpws();
544 if (buf[cur] == '(') {
545 int i = 0;
546 struct buf *mbuf = &bufs[bufs_n];
547 cur++;
548 jumpws();
549 while (cur < len && buf[cur] != ')') {
550 readarg(mbuf->args[i++]);
551 jumpws();
552 if (buf[cur] != ',')
553 break;
554 cur++;
555 jumpws();
557 while (i < m->nargs)
558 mbuf->args[i++][0] = '\0';
559 cur++;
560 buf_macro(m);
564 static int buf_expanding(char *macro)
566 int i;
567 for (i = bufs_n - 1; i >= 0; i--) {
568 if (bufs[i].type == BUF_ARG)
569 return 0;
570 if (bufs[i].type == BUF_MACRO &&
571 !strcmp(macro, bufs[i].macro->name))
572 return 1;
574 return 0;
577 /* return 1 for plain macros and arguments and 2 for function macros */
578 static int expandable(char *word)
580 int i;
581 if (buf_arg_find(word) >= 0)
582 return 1;
583 if (buf_expanding(word))
584 return 0;
585 i = macro_find(word, 0);
586 return i >= 0 ? macros[i].isfunc + 1 : 0;
589 void cpp_define(char *name, char *def)
591 char tmp_buf[MDEFLEN];
592 sprintf(tmp_buf, "%s\t%s", name, def);
593 buf_new(BUF_TEMP, tmp_buf, strlen(tmp_buf));
594 macro_define();
595 buf_pop();
598 static int seen_macro; /* seen a macro; 2 if a function macro */
599 static char seen_name[NAMELEN]; /* the name of the last macro */
601 static int hunk_off;
602 static int hunk_len;
604 int cpp_read(char **obuf, long *olen)
606 int old, end;
607 int jump_name = 0;
608 *olen = 0;
609 *obuf = "";
610 if (seen_macro == 1) {
611 macro_expand(seen_name);
612 seen_macro = 0;
614 if (cur == len) {
615 if (bufs_n < bufs_limit + 1)
616 return 1;
617 buf_pop();
619 old = cur;
620 if (cur < len && buf[cur] == '#')
621 if (!cpp_cmd())
622 return 0;
623 while (cur < len) {
624 if (!jumpws())
625 continue;
626 if (buf[cur] == '#')
627 break;
628 if (!jumpcomment())
629 continue;
630 if (seen_macro == 2) {
631 if (buf[cur] == '(')
632 macro_expand(seen_name);
633 seen_macro = 0;
634 old = cur;
635 continue;
637 if (!jumpstr())
638 continue;
639 if (isalnum(buf[cur]) || buf[cur] == '_') {
640 char word[NAMELEN];
641 read_word(word);
642 seen_macro = expandable(word);
643 if (seen_macro) {
644 strcpy(seen_name, word);
645 jump_name = 1;
646 break;
648 if (buf_iseval() && !strcmp("defined", word)) {
649 int parens = 0;
650 jumpws();
651 if (buf[cur] == '(') {
652 parens = 1;
653 cur++;
655 read_word(word);
656 if (parens) {
657 jumpws();
658 cur++;
661 continue;
663 cur++;
665 /* macros are expanded later; ignoring their names */
666 end = jump_name ? cur - strlen(seen_name) : cur;
667 if (!buf_iseval()) {
668 hunk_off += hunk_len;
669 hunk_len = end - old;
671 *obuf = buf + old;
672 *olen = end - old;
673 return 0;
676 /* preprocessor constant expression evaluation */
678 #define TOK2(a) ((a)[0] << 16 | (a)[1] << 8)
679 #define TOK_NAME 256
680 #define TOK_NUM 257
681 #define TOK_EOF -1
683 static char etok[NAMELEN];
684 static int enext;
686 static char *tok2[] = {
687 "<<", ">>", "&&", "||", "==", "!=", "<=", ">="
690 static int eval_tok(void)
692 char *s = etok;
693 int i;
694 while (ecur < elen) {
695 while (ecur < elen && isspace(ebuf[ecur]))
696 ecur++;
697 if (ebuf[ecur] == '/' && ebuf[ecur + 1] == '*') {
698 while (ecur < elen && (ebuf[ecur - 2] != '*' ||
699 ebuf[ecur - 1] != '/'))
700 ecur++;
701 continue;
703 break;
705 if (ecur >= elen)
706 return TOK_EOF;
707 if (isalpha(ebuf[ecur]) || ebuf[ecur] == '_') {
708 while (isalnum(ebuf[ecur]) || ebuf[ecur] == '_')
709 *s++ = ebuf[ecur++];
710 *s = '\0';
711 return TOK_NAME;
713 if (isdigit(ebuf[ecur])) {
714 while (isdigit(ebuf[ecur]))
715 *s++ = ebuf[ecur++];
716 while (tolower(ebuf[ecur]) == 'u' || tolower(ebuf[ecur]) == 'l')
717 ecur++;
718 *s = '\0';
719 return TOK_NUM;
721 for (i = 0; i < LEN(tok2); i++)
722 if (TOK2(tok2[i]) == TOK2(ebuf + ecur)) {
723 int ret = TOK2(tok2[i]);
724 ecur += 2;
725 return ret;
727 return ebuf[ecur++];
730 static int eval_see(void)
732 if (enext == -1)
733 enext = eval_tok();
734 return enext;
737 static int eval_get(void)
739 if (enext != -1) {
740 int ret = enext;
741 enext = -1;
742 return ret;
744 return eval_tok();
747 static long eval_num(void)
749 return atol(etok);
752 static int eval_jmp(int tok)
754 if (eval_see() == tok) {
755 eval_get();
756 return 0;
758 return 1;
761 static void eval_expect(int tok)
763 eval_jmp(tok);
766 static char *eval_id(void)
768 return etok;
771 static long evalcexpr(void);
773 static long evalatom(void)
775 if (!eval_jmp(TOK_NUM))
776 return eval_num();
777 if (!eval_jmp(TOK_NAME)) {
778 int parens = !eval_jmp('(');
779 long ret;
780 eval_expect(TOK_NAME);
781 ret = macro_find(eval_id(), 0) >= 0;
782 if (parens)
783 eval_expect(')');
784 return ret;
786 if (!eval_jmp('(')) {
787 long ret = evalcexpr();
788 eval_expect(')');
789 return ret;
791 return -1;
794 static long evalpre(void)
796 if (!eval_jmp('!'))
797 return !evalpre();
798 if (!eval_jmp('-'))
799 return -evalpre();
800 if (!eval_jmp('~'))
801 return ~evalpre();
802 return evalatom();
805 static long evalmul(void)
807 long ret = evalpre();
808 while (1) {
809 if (!eval_jmp('*')) {
810 ret *= evalpre();
811 continue;
813 if (!eval_jmp('/')) {
814 ret /= evalpre();
815 continue;
817 if (!eval_jmp('%')) {
818 ret %= evalpre();
819 continue;
821 break;
823 return ret;
826 static long evaladd(void)
828 long ret = evalmul();
829 while (1) {
830 if (!eval_jmp('+')) {
831 ret += evalmul();
832 continue;
834 if (!eval_jmp('-')) {
835 ret -= evalmul();
836 continue;
838 break;
840 return ret;
843 static long evalshift(void)
845 long ret = evaladd();
846 while (1) {
847 if (!eval_jmp(TOK2("<<"))) {
848 ret <<= evaladd();
849 continue;
851 if (!eval_jmp(TOK2(">>"))) {
852 ret >>= evaladd();
853 continue;
855 break;
857 return ret;
860 static long evalcmp(void)
862 long ret = evalshift();
863 while (1) {
864 if (!eval_jmp('<')) {
865 ret = ret < evalshift();
866 continue;
868 if (!eval_jmp('>')) {
869 ret = ret > evalshift();
870 continue;
872 if (!eval_jmp(TOK2("<="))) {
873 ret = ret <= evalshift();
874 continue;
876 if (!eval_jmp(TOK2(">="))) {
877 ret = ret >= evalshift();
878 continue;
880 break;
882 return ret;
885 static long evaleq(void)
887 long ret = evalcmp();
888 while (1) {
889 if (!eval_jmp(TOK2("=="))) {
890 ret = ret == evalcmp();
891 continue;
893 if (!eval_jmp(TOK2("!="))) {
894 ret = ret != evalcmp();
895 continue;
897 break;
899 return ret;
902 static long evalbitand(void)
904 long ret = evaleq();
905 while (!eval_jmp('&'))
906 ret &= evaleq();
907 return ret;
910 static long evalxor(void)
912 long ret = evalbitand();
913 while (!eval_jmp('^'))
914 ret ^= evalbitand();
915 return ret;
918 static long evalbitor(void)
920 long ret = evalxor();
921 while (!eval_jmp('|'))
922 ret |= evalxor();
923 return ret;
926 static long evaland(void)
928 long ret = evalbitor();
929 while (!eval_jmp(TOK2("&&")))
930 ret = ret && evalbitor();
931 return ret;
934 static long evalor(void)
936 long ret = evaland();
937 while (!eval_jmp(TOK2("||")))
938 ret = ret || evaland();
939 return ret;
942 static long evalcexpr(void)
944 long ret = evalor();
945 if (eval_jmp('?'))
946 return ret;
947 if (ret)
948 return evalor();
949 while (eval_get() != ':')
951 return evalor();
954 static long evalexpr(void)
956 enext = -1;
957 return evalcexpr();
960 static int buf_loc(char *s, int off)
962 char *e = s + off;
963 int n = 1;
964 while ((s = strchr(s, '\n')) && s < e) {
965 n++;
966 s++;
968 return n;
971 char *cpp_loc(long addr)
973 static char loc[256];
974 int line = -1;
975 int i;
976 for (i = bufs_n - 1; i > 0; i--)
977 if (bufs[i].type == BUF_FILE)
978 break;
979 if (addr >= hunk_off && i == bufs_n - 1)
980 line = buf_loc(buf, (cur - hunk_len) + (addr - hunk_off));
981 else
982 line = buf_loc(bufs[i].buf, bufs[i].cur);
983 sprintf(loc, "%s:%d", bufs[i].path, line);
984 return loc;