cpp: don't remove non-macro number signs
[neatcc.git] / cpp.c
blob6c789a669055f7328ae1ea7fbcd489de0fa30e3b
1 #include <ctype.h>
2 #include <fcntl.h>
3 #include <stddef.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/stat.h>
10 #include "tok.h"
11 #include "tab.h"
13 static char *buf;
14 static int len;
15 static int cur;
17 #define MAXDEFS (1 << 12)
18 #define MACROLEN (1 << 10)
19 #define MAXARGS (1 << 5)
20 #define NBUCKET (MAXDEFS << 1)
22 static struct macro {
23 char name[NAMELEN];
24 char def[MACROLEN];
25 char args[MAXARGS][NAMELEN];
26 int nargs;
27 int isfunc;
28 } macros[MAXDEFS];
29 static int nmacros;
30 /* macro hash table */
31 static struct tab mtab;
33 #define MAXBUFS (1 << 5)
34 #define BUF_FILE 0
35 #define BUF_MACRO 1
36 #define BUF_ARG 2
37 #define BUF_EVAL 3
38 #define BUF_TEMP 4
40 static struct buf {
41 char *buf;
42 int len;
43 int cur;
44 int type;
45 /* for BUF_FILE */
46 char path[NAMELEN];
47 /* for BUF_MACRO */
48 struct macro *macro;
49 char args[MAXARGS][MACROLEN]; /* arguments passed to a macro */
50 /* for BUF_ARG */
51 int arg_buf; /* the bufs index of the owning macro */
52 } bufs[MAXBUFS];
53 static int nbufs;
55 void die(char *msg)
57 write(2, msg, strlen(msg));
58 exit(1);
61 static void buf_new(int type, char *dat, int dlen)
63 if (nbufs) {
64 bufs[nbufs - 1].buf = buf;
65 bufs[nbufs - 1].cur = cur;
66 bufs[nbufs - 1].len = len;
68 if (nbufs >= MAXBUFS)
69 die("nomem: MAXBUFS reached!\n");
70 nbufs++;
71 cur = 0;
72 buf = dat;
73 len = dlen;
74 bufs[nbufs - 1].type = type;
77 static void buf_file(char *path, char *dat, int dlen)
79 buf_new(BUF_FILE, dat, dlen);
80 strcpy(bufs[nbufs - 1].path, path ? path : "");
83 static void buf_macro(struct macro *m)
85 buf_new(BUF_MACRO, m->def, strlen(m->def));
86 bufs[nbufs - 1].macro = m;
89 static void buf_arg(char *arg, int mbuf)
91 buf_new(BUF_ARG, arg, strlen(arg));
92 bufs[nbufs - 1].arg_buf = mbuf;
95 static void buf_pop(void)
97 nbufs--;
98 if (nbufs) {
99 cur = bufs[nbufs - 1].cur;
100 len = bufs[nbufs - 1].len;
101 buf = bufs[nbufs - 1].buf;
105 static int buf_iseval(void)
107 int i;
108 for (i = nbufs - 1; i >= 0; i--)
109 if (bufs[i].type == BUF_EVAL)
110 return 1;
111 return 0;
114 static size_t file_size(int fd)
116 struct stat st;
117 if (!fstat(fd, &st))
118 return st.st_size;
119 return 0;
122 static int include_file(char *path)
124 int fd = open(path, O_RDONLY);
125 int n = 0, nr = 0;
126 char *dat;
127 int size;
128 if (fd == -1)
129 return -1;
130 size = file_size(fd) + 1;
131 dat = malloc(size);
132 while ((n = read(fd, dat + nr, size - nr)) > 0)
133 nr += n;
134 close(fd);
135 dat[nr] = '\0';
136 buf_file(path, dat, nr);
137 return 0;
140 int cpp_init(char *path)
142 cpp_define("__STDC__", "");
143 cpp_define("__i386__", "");
144 cpp_define("__linux__", "");
145 return include_file(path);
148 static void jumpws(void)
150 while (cur < len && isspace(buf[cur]))
151 cur++;
154 static void read_word(char *dst)
156 jumpws();
157 while (cur < len && (isalnum(buf[cur]) || buf[cur] == '_'))
158 *dst++ = buf[cur++];
159 *dst = '\0';
162 static void jumpcomment(void)
164 while (++cur < len) {
165 if (buf[cur] == '*' && buf[cur + 1] == '/') {
166 cur += 2;
167 break;
172 static void read_tilleol(char *dst)
174 while (cur < len && isspace(buf[cur]) && buf[cur] != '\n')
175 cur++;
176 while (cur < len && buf[cur] != '\n') {
177 if (buf[cur] == '\\' && buf[cur + 1] == '\n')
178 cur += 2;
179 else if (buf[cur] == '/' && buf[cur + 1] == '*')
180 jumpcomment();
181 else
182 *dst++ = buf[cur++];
184 *dst = '\0';
187 static char *putstr(char *d, char *s)
189 while (*s)
190 *d++ = *s++;
191 *d = '\0';
192 return d;
195 #define MAXLOCS (1 << 10)
197 static char *locs[MAXLOCS] = {"/usr/include"};
198 static int nlocs = 1;
200 void cpp_addpath(char *s)
202 locs[nlocs++] = s;
205 static int include_find(char *name, int std)
207 int i;
208 for (i = std ? nlocs - 1 : nlocs; i >= 0; i--) {
209 char path[1 << 10];
210 char *s;
211 s = path;
212 if (locs[i]) {
213 s = putstr(s, locs[i]);
214 *s++ = '/';
216 s = putstr(s, name);
217 if (!include_file(path))
218 return 0;
220 return -1;
223 static void jumpstr(void)
225 if (buf[cur] == '\'') {
226 while (cur < len && buf[++cur] != '\'')
227 if (buf[cur] == '\\')
228 cur++;
229 cur++;
230 return;
232 if (buf[cur] == '"') {
233 while (cur < len && buf[++cur] != '"')
234 if (buf[cur] == '\\')
235 cur++;
236 cur++;
237 return;
241 static void readarg(char *s)
243 int depth = 0;
244 int beg = cur;
245 while (cur < len && (depth || buf[cur] != ',' && buf[cur] != ')')) {
246 switch (buf[cur]) {
247 case '(':
248 case '[':
249 case '{':
250 cur++;
251 depth++;
252 break;
253 case ')':
254 case ']':
255 case '}':
256 cur++;
257 depth--;
258 break;
259 case '\'':
260 case '"':
261 jumpstr();
262 break;
263 default:
264 if (buf[cur] == '/' && buf[cur + 1] == '*')
265 jumpcomment();
266 else
267 cur++;
270 if (s) {
271 memcpy(s, buf + beg, cur - beg);
272 s[cur - beg] = '\0';
276 static int macro_find(char *name)
278 char *n = tab_get(&mtab, name);
279 if (!n)
280 return -1;
281 return container(n, struct macro, name) - macros;
284 static void macro_undef(char *name)
286 int i = macro_find(name);
287 if (i >= 0)
288 tab_del(&mtab, macros[i].name);
291 static int macro_new(char *name)
293 int i = macro_find(name);
294 if (i >= 0)
295 return i;
296 if (nmacros >= MAXDEFS)
297 die("nomem: MAXDEFS reached!\n");
298 i = nmacros++;
299 strcpy(macros[i].name, name);
300 tab_add(&mtab, macros[i].name);
301 return i;
304 static void macro_define(void)
306 char name[NAMELEN];
307 struct macro *d;
308 read_word(name);
309 d = &macros[macro_new(name)];
310 d->isfunc = 0;
311 d->nargs = 0;
312 if (buf[cur] == '(') {
313 cur++;
314 jumpws();
315 while (cur < len && buf[cur] != ')') {
316 readarg(d->args[d->nargs++]);
317 jumpws();
318 if (buf[cur++] != ',')
319 break;
320 jumpws();
322 d->isfunc = 1;
324 read_tilleol(d->def);
327 int cpp_read(char *buf);
329 static char ebuf[BUFSIZE];
330 static int elen;
331 static int ecur;
333 static long evalexpr(void);
335 static int cpp_eval(void)
337 int bufid;
338 int ret;
339 char evalbuf[BUFSIZE];
340 read_tilleol(evalbuf);
341 buf_new(BUF_EVAL, evalbuf, strlen(evalbuf));
342 bufid = nbufs;
343 elen = 0;
344 ecur = 0;
345 while (bufid < nbufs || (bufid == nbufs && cur < len))
346 elen += cpp_read(ebuf + elen);
347 ret = evalexpr();
348 buf_pop();
349 return ret;
352 static void jumpifs(int jumpelse)
354 int depth = 0;
355 while (cur < len) {
356 if (buf[cur] == '#') {
357 char cmd[NAMELEN];
358 cur++;
359 read_word(cmd);
360 if (!strcmp("else", cmd))
361 if (!depth && !jumpelse)
362 break;
363 if (!strcmp("elif", cmd))
364 if (!depth && !jumpelse && cpp_eval())
365 break;
366 if (!strcmp("endif", cmd)) {
367 if (!depth)
368 break;
369 else
370 depth--;
372 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
373 !strcmp("if", cmd))
374 depth++;
375 continue;
377 if (buf[cur] == '/' && buf[cur + 1] == '*') {
378 jumpcomment();
379 continue;
381 if (buf[cur] == '\'' || buf[cur] == '"') {
382 jumpstr();
383 continue;
385 cur++;
389 static int cpp_cmd(void)
391 char cmd[NAMELEN];
392 cur++;
393 read_word(cmd);
394 if (!strcmp("define", cmd)) {
395 macro_define();
396 return 0;
398 if (!strcmp("undef", cmd)) {
399 char name[NAMELEN];
400 read_word(name);
401 macro_undef(name);
402 return 0;
404 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
405 !strcmp("if", cmd)) {
406 char name[NAMELEN];
407 int matched = 0;
408 if (cmd[2]) {
409 int not = cmd[2] == 'n';
410 read_word(name);
411 matched = not ? macro_find(name) < 0 :
412 macro_find(name) >= 0;
413 } else {
414 matched = cpp_eval();
416 if (!matched)
417 jumpifs(0);
418 return 0;
420 if (!strcmp("else", cmd) || !strcmp("elif", cmd)) {
421 jumpifs(1);
422 return 0;
424 if (!strcmp("endif", cmd))
425 return 0;
426 if (!strcmp("include", cmd)) {
427 char file[NAMELEN];
428 char *s, *e;
429 jumpws();
430 s = buf + cur + 1;
431 e = strchr(buf + cur + 1, buf[cur] == '"' ? '"' : '>');
432 memcpy(file, s, e - s);
433 file[e - s] = '\0';
434 cur += e - s + 2;
435 if (include_find(file, *e == '>') == -1)
436 die("cannot include file\n");
437 return 0;
439 return 1;
442 static int macro_arg(struct macro *m, char *arg)
444 int i;
445 for (i = 0; i < m->nargs; i++)
446 if (!strcmp(arg, m->args[i]))
447 return i;
448 return -1;
451 static int buf_arg_find(char *name)
453 int i;
454 for (i = nbufs - 1; i >= 0; i--) {
455 struct buf *mbuf = &bufs[i];
456 struct macro *m = mbuf->macro;
457 if (mbuf->type == BUF_MACRO && macro_arg(m, name) >= 0)
458 return i;
459 if (mbuf->type == BUF_ARG)
460 i = mbuf->arg_buf;
462 return -1;
465 static void macro_expand(void)
467 char name[NAMELEN];
468 struct macro *m;
469 int mbuf;
470 read_word(name);
471 if ((mbuf = buf_arg_find(name)) >= 0) {
472 int arg = macro_arg(bufs[mbuf].macro, name);
473 char *dat = bufs[mbuf].args[arg];
474 buf_arg(dat, mbuf);
475 return;
477 m = &macros[macro_find(name)];
478 if (!m->isfunc) {
479 buf_macro(m);
480 return;
482 jumpws();
483 if (buf[cur] == '(') {
484 int i = 0;
485 struct buf *mbuf = &bufs[nbufs];
486 cur++;
487 jumpws();
488 while (cur < len && buf[cur] != ')') {
489 readarg(mbuf->args[i++]);
490 jumpws();
491 if (buf[cur] != ',')
492 break;
493 cur++;
494 jumpws();
496 while (i < m->nargs)
497 mbuf->args[i++][0] = '\0';
498 cur++;
499 buf_macro(m);
503 static int buf_expanding(char *macro)
505 int i;
506 for (i = nbufs - 1; i >= 0; i--) {
507 if (bufs[i].type == BUF_ARG)
508 return 0;
509 if (bufs[i].type == BUF_MACRO &&
510 !strcmp(macro, bufs[i].macro->name))
511 return 1;
513 return 0;
516 static int expandable(char *word)
518 if (buf_arg_find(word) >= 0)
519 return 1;
520 return !buf_expanding(word) && macro_find(word) != -1;
523 void cpp_define(char *name, char *def)
525 char tmp_buf[MACROLEN];
526 char *s = tmp_buf;
527 s = putstr(s, name);
528 *s++ = '\t';
529 s = putstr(s, def);
530 buf_new(BUF_TEMP, tmp_buf, s - tmp_buf);
531 macro_define();
532 buf_pop();
535 static int seen_macro;
537 static int hunk_off;
538 static int hunk_len;
540 int cpp_read(char *s)
542 int old;
543 if (seen_macro) {
544 seen_macro = 0;
545 macro_expand();
547 if (cur == len) {
548 struct buf *cbuf = &bufs[nbufs - 1];
549 if (nbufs < 2)
550 return -1;
551 if (cbuf->type & BUF_FILE)
552 free(buf);
553 buf_pop();
555 old = cur;
556 if (buf[cur] == '#')
557 if (!cpp_cmd())
558 return 0;
559 while (cur < len) {
560 if (buf[cur] == '#')
561 break;
562 if (buf[cur] == '/' && buf[cur + 1] == '*') {
563 jumpcomment();
564 continue;
566 if (buf[cur] == '\'' || buf[cur] == '"') {
567 jumpstr();
568 continue;
570 if (isalpha(buf[cur]) || buf[cur] == '_') {
571 char word[NAMELEN];
572 read_word(word);
573 if (expandable(word)) {
574 cur -= strlen(word);
575 seen_macro = 1;
576 break;
578 if (buf_iseval() && !strcmp("defined", word)) {
579 int parens = 0;
580 jumpws();
581 if (buf[cur] == '(') {
582 parens = 1;
583 cur++;
585 read_word(word);
586 if (parens) {
587 jumpws();
588 cur++;
591 continue;
593 cur++;
595 memcpy(s, buf + old, cur - old);
596 s[cur - old] = '\0';
597 if (!buf_iseval()) {
598 hunk_off += hunk_len;
599 hunk_len = cur - old;
601 return cur - old;
604 /* preprocessor constant expression evaluation */
606 static char etok[NAMELEN];
607 static int enext;
609 static char *tok2[] = {
610 "<<", ">>", "&&", "||", "==", "!=", "<=", ">="
613 static int eval_tok(void)
615 char *s = etok;
616 int i;
617 while (ecur < elen) {
618 while (ecur < elen && isspace(ebuf[ecur]))
619 ecur++;
620 if (ebuf[ecur] == '/' && ebuf[ecur + 1] == '*') {
621 while (ecur < elen && (ebuf[ecur - 2] != '*' ||
622 ebuf[ecur - 1] != '/'))
623 ecur++;
624 continue;
626 break;
628 if (ecur >= elen)
629 return TOK_EOF;
630 if (isalpha(ebuf[ecur]) || ebuf[ecur] == '_') {
631 while (isalnum(ebuf[ecur]) || ebuf[ecur] == '_')
632 *s++ = ebuf[ecur++];
633 *s = '\0';
634 return TOK_NAME;
636 if (isdigit(ebuf[ecur])) {
637 while (isdigit(ebuf[ecur]))
638 *s++ = ebuf[ecur++];
639 while (tolower(ebuf[ecur]) == 'u' || tolower(ebuf[ecur]) == 'l')
640 ecur++;
641 return TOK_NUM;
643 for (i = 0; i < ARRAY_SIZE(tok2); i++)
644 if (TOK2(tok2[i]) == TOK2(ebuf + ecur)) {
645 int ret = TOK2(tok2[i]);
646 ecur += 2;
647 return ret;
649 return ebuf[ecur++];
652 static int eval_see(void)
654 if (enext == -1)
655 enext = eval_tok();
656 return enext;
659 static int eval_get(void)
661 if (enext != -1) {
662 int ret = enext;
663 enext = -1;
664 return ret;
666 return eval_tok();
669 static long eval_num(void)
671 return atol(etok);
674 static int eval_jmp(int tok)
676 if (eval_see() == tok) {
677 eval_get();
678 return 0;
680 return 1;
683 static void eval_expect(int tok)
685 eval_jmp(tok);
688 static char *eval_id(void)
690 return etok;
693 static long evalcexpr(void);
695 static long evalatom(void)
697 if (!eval_jmp(TOK_NUM))
698 return eval_num();
699 if (!eval_jmp(TOK_NAME)) {
700 int parens = !eval_jmp('(');
701 long ret;
702 eval_expect(TOK_NAME);
703 ret = macro_find(eval_id()) >= 0;
704 if (parens)
705 eval_expect(')');
706 return ret;
708 if (!eval_jmp('(')) {
709 long ret = evalcexpr();
710 eval_expect(')');
711 return ret;
713 return -1;
716 static long evalpre(void)
718 if (!eval_jmp('!'))
719 return !evalpre();
720 if (!eval_jmp('-'))
721 return -evalpre();
722 if (!eval_jmp('~'))
723 return ~evalpre();
724 return evalatom();
727 static long evalmul(void)
729 long ret = evalpre();
730 while (1) {
731 if (!eval_jmp('*')) {
732 ret *= evalpre();
733 continue;
735 if (!eval_jmp('/')) {
736 ret /= evalpre();
737 continue;
739 if (!eval_jmp('%')) {
740 ret %= evalpre();
741 continue;
743 break;
745 return ret;
748 static long evaladd(void)
750 long ret = evalmul();
751 while (1) {
752 if (!eval_jmp('+')) {
753 ret += evalmul();
754 continue;
756 if (!eval_jmp('-')) {
757 ret -= evalmul();
758 continue;
760 break;
762 return ret;
765 static long evalshift(void)
767 long ret = evaladd();
768 while (1) {
769 if (!eval_jmp(TOK2("<<"))) {
770 ret <<= evaladd();
771 continue;
773 if (!eval_jmp(TOK2(">>"))) {
774 ret >>= evaladd();
775 continue;
777 break;
779 return ret;
782 static long evalcmp(void)
784 long ret = evalshift();
785 while (1) {
786 if (!eval_jmp('<')) {
787 ret = ret < evalshift();
788 continue;
790 if (!eval_jmp('>')) {
791 ret = ret > evalshift();
792 continue;
794 if (!eval_jmp(TOK2("<="))) {
795 ret = ret <= evalshift();
796 continue;
798 if (!eval_jmp(TOK2(">="))) {
799 ret = ret >= evalshift();
800 continue;
802 break;
804 return ret;
807 static long evaleq(void)
809 long ret = evalcmp();
810 while (1) {
811 if (!eval_jmp(TOK2("=="))) {
812 ret = ret == evalcmp();
813 continue;
815 if (!eval_jmp(TOK2("!="))) {
816 ret = ret != evalcmp();
817 continue;
819 break;
821 return ret;
824 static long evalbitand(void)
826 long ret = evaleq();
827 while (!eval_jmp('&'))
828 ret &= evaleq();
829 return ret;
832 static long evalxor(void)
834 long ret = evalbitand();
835 while (!eval_jmp('^'))
836 ret ^= evalbitand();
837 return ret;
840 static long evalbitor(void)
842 long ret = evalxor();
843 while (!eval_jmp('|'))
844 ret |= evalxor();
845 return ret;
848 static long evaland(void)
850 long ret = evalbitor();
851 while (!eval_jmp(TOK2("&&")))
852 ret = ret && evalbitor();
853 return ret;
856 static long evalor(void)
858 long ret = evaland();
859 while (!eval_jmp(TOK2("||")))
860 ret = ret || evaland();
861 return ret;
864 static long evalcexpr(void)
866 long ret = evalor();
867 if (eval_jmp('?'))
868 return ret;
869 if (ret)
870 return evalor();
871 while (eval_get() != ':')
873 return evalor();
876 static long evalexpr(void)
878 enext = -1;
879 return evalcexpr();
882 static int buf_loc(char *s, int off)
884 char *e = s + off;
885 int n = 1;
886 while ((s = strchr(s, '\n')) && s < e) {
887 n++;
888 s++;
890 return n;
893 int cpp_loc(char *s, long addr)
895 int line = -1;
896 int i;
897 for (i = nbufs - 1; i > 0; i--)
898 if (bufs[i].type == BUF_FILE)
899 break;
900 if (addr >= hunk_off && i == nbufs - 1)
901 line = buf_loc(buf, (cur - hunk_len) + (addr - hunk_off));
902 else
903 line = buf_loc(bufs[i].buf, bufs[i].cur);
904 sprintf(s, "%s:%d: ", bufs[i].path, line);
905 return strlen(s);