tok: parse uppercase hex numbers
[neatcc.git] / cpp.c
blob9e32d7b54090ac0debacb070926ec650aed70190
1 #include <ctype.h>
2 #include <fcntl.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include <string.h>
6 #include <unistd.h>
7 #include <sys/types.h>
8 #include <sys/stat.h>
9 #include "tok.h"
10 #include "tab.h"
12 static char *buf;
13 static int len;
14 static int cur;
16 #define MAXDEFS (1 << 12)
17 #define MACROLEN (1 << 10)
18 #define MAXARGS (1 << 5)
19 #define NBUCKET (MAXDEFS << 1)
21 static struct macro {
22 char name[NAMELEN];
23 char def[MACROLEN];
24 char args[MAXARGS][NAMELEN];
25 int nargs;
26 int isfunc;
27 } macros[MAXDEFS];
28 static int nmacros;
29 /* macro hash table */
30 static struct tab mtab;
32 #define MAXBUFS (1 << 5)
33 #define BUF_FILE 0
34 #define BUF_MACRO 1
35 #define BUF_ARG 2
36 #define BUF_EVAL 3
37 #define BUF_TEMP 4
39 static struct buf {
40 char *buf;
41 int len;
42 int cur;
43 int type;
44 /* for BUF_FILE */
45 char path[NAMELEN];
46 /* for BUF_MACRO */
47 struct macro *macro;
48 char args[MAXARGS][MACROLEN]; /* arguments passed to a macro */
49 /* for BUF_ARG */
50 int arg_buf; /* the bufs index of the owning macro */
51 } bufs[MAXBUFS];
52 static int nbufs;
54 static void buf_new(int type, char *dat, int dlen)
56 if (nbufs) {
57 bufs[nbufs - 1].buf = buf;
58 bufs[nbufs - 1].cur = cur;
59 bufs[nbufs - 1].len = len;
61 if (nbufs >= MAXBUFS)
62 die("nomem: MAXBUFS reached!\n");
63 nbufs++;
64 cur = 0;
65 buf = dat;
66 len = dlen;
67 bufs[nbufs - 1].type = type;
70 static void buf_file(char *path, char *dat, int dlen)
72 buf_new(BUF_FILE, dat, dlen);
73 strcpy(bufs[nbufs - 1].path, path ? path : "");
76 static void buf_macro(struct macro *m)
78 buf_new(BUF_MACRO, m->def, strlen(m->def));
79 bufs[nbufs - 1].macro = m;
82 static void buf_arg(char *arg, int mbuf)
84 buf_new(BUF_ARG, arg, strlen(arg));
85 bufs[nbufs - 1].arg_buf = mbuf;
88 static void buf_pop(void)
90 nbufs--;
91 if (nbufs) {
92 cur = bufs[nbufs - 1].cur;
93 len = bufs[nbufs - 1].len;
94 buf = bufs[nbufs - 1].buf;
98 static int buf_iseval(void)
100 int i;
101 for (i = nbufs - 1; i >= 0; i--)
102 if (bufs[i].type == BUF_EVAL)
103 return 1;
104 return 0;
107 static size_t file_size(int fd)
109 struct stat st;
110 if (!fstat(fd, &st))
111 return st.st_size;
112 return 0;
115 static int include_file(char *path)
117 int fd = open(path, O_RDONLY);
118 int n = 0, nr = 0;
119 char *dat;
120 int size;
121 if (fd == -1)
122 return -1;
123 size = file_size(fd) + 1;
124 dat = malloc(size);
125 while ((n = read(fd, dat + nr, size - nr)) > 0)
126 nr += n;
127 close(fd);
128 dat[nr] = '\0';
129 buf_file(path, dat, nr);
130 return 0;
133 int cpp_init(char *path)
135 cpp_define("__STDC__", "");
136 cpp_define("__x86_64__", "");
137 cpp_define("__linux__", "");
138 return include_file(path);
141 static void jumpws(void)
143 while (cur < len && isspace(buf[cur]))
144 cur++;
147 static void read_word(char *dst)
149 jumpws();
150 while (cur < len && (isalnum(buf[cur]) || buf[cur] == '_'))
151 *dst++ = buf[cur++];
152 *dst = '\0';
155 static void jumpcomment(void)
157 while (++cur < len) {
158 if (buf[cur] == '*' && buf[cur + 1] == '/') {
159 cur += 2;
160 break;
165 static void read_tilleol(char *dst)
167 while (cur < len && isspace(buf[cur]) && buf[cur] != '\n')
168 cur++;
169 while (cur < len && buf[cur] != '\n') {
170 if (buf[cur] == '\\')
171 cur += 2;
172 else if (buf[cur] == '/' && buf[cur + 1] == '*')
173 jumpcomment();
174 else
175 *dst++ = buf[cur++];
177 *dst = '\0';
180 static char *putstr(char *d, char *s)
182 while (*s)
183 *d++ = *s++;
184 *d = '\0';
185 return d;
188 #define MAXLOCS (1 << 10)
190 static char *locs[MAXLOCS] = {"/usr/include"};
191 static int nlocs = 1;
193 void cpp_addpath(char *s)
195 locs[nlocs++] = s;
198 static int include_find(char *name, int std)
200 int i;
201 for (i = std ? nlocs - 1 : nlocs; i >= 0; i--) {
202 char path[1 << 10];
203 char *s;
204 s = path;
205 if (locs[i]) {
206 s = putstr(s, locs[i]);
207 *s++ = '/';
209 s = putstr(s, name);
210 if (!include_file(path))
211 return 0;
213 return -1;
216 static void jumpstr(void)
218 if (buf[cur] == '\'') {
219 while (cur < len && buf[++cur] != '\'')
220 if (buf[cur] == '\\')
221 cur++;
222 cur++;
223 return;
225 if (buf[cur] == '"') {
226 while (cur < len && buf[++cur] != '"')
227 if (buf[cur] == '\\')
228 cur++;
229 cur++;
230 return;
234 static void readarg(char *s)
236 int depth = 0;
237 int beg = cur;
238 while (cur < len && (depth || buf[cur] != ',' && buf[cur] != ')')) {
239 switch (buf[cur]) {
240 case '(':
241 case '[':
242 case '{':
243 cur++;
244 depth++;
245 break;
246 case ')':
247 case ']':
248 case '}':
249 cur++;
250 depth--;
251 break;
252 case '\'':
253 case '"':
254 jumpstr();
255 break;
256 default:
257 if (buf[cur] == '/' && buf[cur + 1] == '*')
258 jumpcomment();
259 else
260 cur++;
263 if (s) {
264 memcpy(s, buf + beg, cur - beg);
265 s[cur - beg] = '\0';
269 static int macro_find(char *name)
271 char *n = tab_get(&mtab, name);
272 if (!n)
273 return -1;
274 return container(n, struct macro, name) - macros;
277 static void macro_undef(char *name)
279 int i = macro_find(name);
280 if (i >= 0)
281 tab_del(&mtab, macros[i].name);
284 static int macro_new(char *name)
286 int i = macro_find(name);
287 if (i >= 0)
288 return i;
289 if (nmacros >= MAXDEFS)
290 die("nomem: MAXDEFS reached!\n");
291 i = nmacros++;
292 strcpy(macros[i].name, name);
293 tab_add(&mtab, macros[i].name);
294 return i;
297 static void macro_define(void)
299 char name[NAMELEN];
300 struct macro *d;
301 read_word(name);
302 d = &macros[macro_new(name)];
303 d->isfunc = 0;
304 d->nargs = 0;
305 if (buf[cur] == '(') {
306 cur++;
307 jumpws();
308 while (cur < len && buf[cur] != ')') {
309 readarg(d->args[d->nargs++]);
310 jumpws();
311 if (buf[cur++] != ',')
312 break;
313 jumpws();
315 d->isfunc = 1;
317 read_tilleol(d->def);
320 int cpp_read(char *buf);
322 static char ebuf[BUFSIZE];
323 static int elen;
324 static int ecur;
326 static long evalexpr(void);
328 static int cpp_eval(void)
330 int bufid;
331 int ret;
332 char evalbuf[BUFSIZE];
333 read_tilleol(evalbuf);
334 buf_new(BUF_EVAL, evalbuf, strlen(evalbuf));
335 bufid = nbufs;
336 elen = 0;
337 ecur = 0;
338 while (bufid < nbufs || (bufid == nbufs && cur < len))
339 elen += cpp_read(ebuf + elen);
340 ret = evalexpr();
341 buf_pop();
342 return ret;
345 static void jumpifs(int jumpelse)
347 int depth = 0;
348 while (cur < len) {
349 if (buf[cur] == '#') {
350 char cmd[NAMELEN];
351 cur++;
352 read_word(cmd);
353 if (!strcmp("else", cmd))
354 if (!depth && !jumpelse)
355 break;
356 if (!strcmp("elif", cmd))
357 if (!depth && !jumpelse && cpp_eval())
358 break;
359 if (!strcmp("endif", cmd)) {
360 if (!depth)
361 break;
362 else
363 depth--;
365 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
366 !strcmp("if", cmd))
367 depth++;
368 continue;
370 if (buf[cur] == '/' && buf[cur + 1] == '*') {
371 jumpcomment();
372 continue;
374 if (buf[cur] == '\'' || buf[cur] == '"') {
375 jumpstr();
376 continue;
378 cur++;
382 static void cpp_cmd(void)
384 char cmd[NAMELEN];
385 cur++;
386 read_word(cmd);
387 if (!strcmp("define", cmd)) {
388 macro_define();
389 return;
391 if (!strcmp("undef", cmd)) {
392 char name[NAMELEN];
393 read_word(name);
394 macro_undef(name);
395 return;
397 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
398 !strcmp("if", cmd)) {
399 char name[NAMELEN];
400 int matched = 0;
401 if (cmd[2]) {
402 int not = cmd[2] == 'n';
403 read_word(name);
404 matched = not ? macro_find(name) < 0 :
405 macro_find(name) >= 0;
406 } else {
407 matched = cpp_eval();
409 if (!matched)
410 jumpifs(0);
411 return;
413 if (!strcmp("else", cmd) || !strcmp("elif", cmd)) {
414 jumpifs(1);
415 return;
417 if (!strcmp("endif", cmd))
418 return;
419 if (!strcmp("include", cmd)) {
420 char file[NAMELEN];
421 char *s, *e;
422 jumpws();
423 s = buf + cur + 1;
424 e = strchr(buf + cur + 1, buf[cur] == '"' ? '"' : '>');
425 memcpy(file, s, e - s);
426 file[e - s] = '\0';
427 cur += e - s + 2;
428 if (include_find(file, *e == '>') == -1)
429 die("cannot include file\n");
430 return;
434 static int macro_arg(struct macro *m, char *arg)
436 int i;
437 for (i = 0; i < m->nargs; i++)
438 if (!strcmp(arg, m->args[i]))
439 return i;
440 return -1;
443 static int buf_arg_find(char *name)
445 int i;
446 for (i = nbufs - 1; i >= 0; i--) {
447 struct buf *mbuf = &bufs[i];
448 struct macro *m = mbuf->macro;
449 if (mbuf->type == BUF_MACRO && macro_arg(m, name) >= 0)
450 return i;
451 if (mbuf->type == BUF_ARG)
452 i = mbuf->arg_buf;
454 return -1;
457 static void macro_expand(void)
459 char name[NAMELEN];
460 struct macro *m;
461 int mbuf;
462 read_word(name);
463 if ((mbuf = buf_arg_find(name)) >= 0) {
464 int arg = macro_arg(bufs[mbuf].macro, name);
465 char *dat = bufs[mbuf].args[arg];
466 buf_arg(dat, mbuf);
467 return;
469 m = &macros[macro_find(name)];
470 if (!m->isfunc) {
471 buf_macro(m);
472 return;
474 jumpws();
475 if (buf[cur] == '(') {
476 int i = 0;
477 struct buf *mbuf = &bufs[nbufs];
478 cur++;
479 jumpws();
480 while (cur < len && buf[cur] != ')') {
481 readarg(mbuf->args[i++]);
482 jumpws();
483 if (buf[cur] != ',')
484 break;
485 cur++;
486 jumpws();
488 while (i < m->nargs)
489 mbuf->args[i++][0] = '\0';
490 cur++;
491 buf_macro(m);
495 static int buf_expanding(char *macro)
497 int i;
498 for (i = nbufs - 1; i >= 0; i--) {
499 if (bufs[i].type == BUF_ARG)
500 return 0;
501 if (bufs[i].type == BUF_MACRO &&
502 !strcmp(macro, bufs[i].macro->name))
503 return 1;
505 return 0;
508 static int expandable(char *word)
510 if (buf_arg_find(word) >= 0)
511 return 1;
512 return !buf_expanding(word) && macro_find(word) != -1;
515 void cpp_define(char *name, char *def)
517 char tmp_buf[MACROLEN];
518 char *s = tmp_buf;
519 s = putstr(s, name);
520 *s++ = '\t';
521 s = putstr(s, def);
522 buf_new(BUF_TEMP, tmp_buf, s - tmp_buf);
523 macro_define();
524 buf_pop();
527 static int seen_macro;
529 static int hunk_off;
530 static int hunk_len;
532 int cpp_read(char *s)
534 int old;
535 if (seen_macro) {
536 seen_macro = 0;
537 macro_expand();
539 if (cur == len) {
540 struct buf *cbuf = &bufs[nbufs - 1];
541 if (nbufs < 2)
542 return -1;
543 if (cbuf->type & BUF_FILE)
544 free(buf);
545 buf_pop();
547 old = cur;
548 if (buf[cur] == '#') {
549 cpp_cmd();
550 return 0;
552 while (cur < len) {
553 if (buf[cur] == '#')
554 break;
555 if (buf[cur] == '/' && buf[cur + 1] == '*') {
556 jumpcomment();
557 continue;
559 if (buf[cur] == '\'' || buf[cur] == '"') {
560 jumpstr();
561 continue;
563 if (isalpha(buf[cur]) || buf[cur] == '_') {
564 char word[NAMELEN];
565 read_word(word);
566 if (expandable(word)) {
567 cur -= strlen(word);
568 seen_macro = 1;
569 break;
571 if (buf_iseval() && !strcmp("defined", word)) {
572 int parens = 0;
573 jumpws();
574 if (buf[cur] == '(') {
575 parens = 1;
576 cur++;
578 read_word(word);
579 if (parens) {
580 jumpws();
581 cur++;
584 continue;
586 cur++;
588 memcpy(s, buf + old, cur - old);
589 s[cur - old] = '\0';
590 if (!buf_iseval()) {
591 hunk_off += hunk_len;
592 hunk_len = cur - old;
594 return cur - old;
597 /* preprocessor constant expression evaluation */
599 static char etok[NAMELEN];
600 static int enext;
602 static char *tok2[] = {
603 "<<", ">>", "&&", "||", "==", "!=", "<=", ">="
606 static int eval_tok(void)
608 char *s = etok;
609 int i;
610 while (ecur < elen) {
611 while (ecur < elen && isspace(ebuf[ecur]))
612 ecur++;
613 if (ebuf[ecur] == '/' && ebuf[ecur + 1] == '*') {
614 while (ecur < elen && (ebuf[ecur - 2] != '*' ||
615 ebuf[ecur - 1] != '/'))
616 ecur++;
617 continue;
619 break;
621 if (ecur >= elen)
622 return TOK_EOF;
623 if (isalpha(ebuf[ecur]) || ebuf[ecur] == '_') {
624 while (isalnum(ebuf[ecur]) || ebuf[ecur] == '_')
625 *s++ = ebuf[ecur++];
626 *s = '\0';
627 return TOK_NAME;
629 if (isdigit(ebuf[ecur])) {
630 while (isdigit(ebuf[ecur]))
631 *s++ = ebuf[ecur++];
632 while (tolower(ebuf[ecur]) == 'u' || tolower(ebuf[ecur]) == 'l')
633 ecur++;
634 return TOK_NUM;
636 for (i = 0; i < ARRAY_SIZE(tok2); i++)
637 if (TOK2(tok2[i]) == TOK2(ebuf + ecur)) {
638 int ret = TOK2(tok2[i]);
639 ecur += 2;
640 return ret;
642 return ebuf[ecur++];
645 static int eval_see(void)
647 if (enext == -1)
648 enext = eval_tok();
649 return enext;
652 static int eval_get(void)
654 if (enext != -1) {
655 int ret = enext;
656 enext = -1;
657 return ret;
659 return eval_tok();
662 static long eval_num(void)
664 return atol(etok);
667 static int eval_jmp(int tok)
669 if (eval_see() == tok) {
670 eval_get();
671 return 0;
673 return 1;
676 static void eval_expect(int tok)
678 eval_jmp(tok);
681 static char *eval_id(void)
683 return etok;
686 static long evalcexpr(void);
688 static long evalatom(void)
690 if (!eval_jmp(TOK_NUM))
691 return eval_num();
692 if (!eval_jmp(TOK_NAME)) {
693 int parens = !eval_jmp('(');
694 long ret;
695 eval_expect(TOK_NAME);
696 ret = macro_find(eval_id()) >= 0;
697 if (parens)
698 eval_expect(')');
699 return ret;
701 if (!eval_jmp('(')) {
702 long ret = evalcexpr();
703 eval_expect(')');
704 return ret;
706 return -1;
709 static long evalpre(void)
711 if (!eval_jmp('!'))
712 return !evalpre();
713 if (!eval_jmp('-'))
714 return -evalpre();
715 if (!eval_jmp('~'))
716 return ~evalpre();
717 return evalatom();
720 static long evalmul(void)
722 long ret = evalpre();
723 while (1) {
724 if (!eval_jmp('*')) {
725 ret *= evalpre();
726 continue;
728 if (!eval_jmp('/')) {
729 ret /= evalpre();
730 continue;
732 if (!eval_jmp('%')) {
733 ret %= evalpre();
734 continue;
736 break;
738 return ret;
741 static long evaladd(void)
743 long ret = evalmul();
744 while (1) {
745 if (!eval_jmp('+')) {
746 ret += evalmul();
747 continue;
749 if (!eval_jmp('-')) {
750 ret -= evalmul();
751 continue;
753 break;
755 return ret;
758 static long evalshift(void)
760 long ret = evaladd();
761 while (1) {
762 if (!eval_jmp(TOK2("<<"))) {
763 ret <<= evaladd();
764 continue;
766 if (!eval_jmp(TOK2(">>"))) {
767 ret >>= evaladd();
768 continue;
770 break;
772 return ret;
775 static long evalcmp(void)
777 long ret = evalshift();
778 while (1) {
779 if (!eval_jmp('<')) {
780 ret = ret < evalshift();
781 continue;
783 if (!eval_jmp('>')) {
784 ret = ret > evalshift();
785 continue;
787 if (!eval_jmp(TOK2("<="))) {
788 ret = ret <= evalshift();
789 continue;
791 if (!eval_jmp(TOK2(">="))) {
792 ret = ret >= evalshift();
793 continue;
795 break;
797 return ret;
800 static long evaleq(void)
802 long ret = evalcmp();
803 while (1) {
804 if (!eval_jmp(TOK2("=="))) {
805 ret = ret == evalcmp();
806 continue;
808 if (!eval_jmp(TOK2("!="))) {
809 ret = ret != evalcmp();
810 continue;
812 break;
814 return ret;
817 static long evalbitand(void)
819 long ret = evaleq();
820 while (!eval_jmp('&'))
821 ret &= evaleq();
822 return ret;
825 static long evalxor(void)
827 long ret = evalbitand();
828 while (!eval_jmp('^'))
829 ret ^= evalbitand();
830 return ret;
833 static long evalbitor(void)
835 long ret = evalxor();
836 while (!eval_jmp('|'))
837 ret |= evalxor();
838 return ret;
841 static long evaland(void)
843 long ret = evalbitor();
844 while (!eval_jmp(TOK2("&&")))
845 ret = ret && evalbitor();
846 return ret;
849 static long evalor(void)
851 long ret = evaland();
852 while (!eval_jmp(TOK2("||")))
853 ret = ret || evaland();
854 return ret;
857 static long evalcexpr(void)
859 long ret = evalor();
860 if (eval_jmp('?'))
861 return ret;
862 if (ret)
863 return evalor();
864 while (eval_get() != ':')
866 return evalor();
869 static long evalexpr(void)
871 enext = -1;
872 return evalcexpr();
875 static int buf_loc(char *s, int off)
877 char *e = s + off;
878 int n = 1;
879 while ((s = strchr(s, '\n')) && s < e) {
880 n++;
881 s++;
883 return n;
886 int cpp_loc(char *s, long addr)
888 int line = -1;
889 int i;
890 for (i = nbufs - 1; i > 0; i--)
891 if (bufs[i].type == BUF_FILE)
892 break;
893 if (addr >= hunk_off && i == nbufs - 1)
894 line = buf_loc(buf, (cur - hunk_len) + (addr - hunk_off));
895 else
896 line = buf_loc(bufs[i].buf, bufs[i].cur);
897 sprintf(s, "%s:%d: ", bufs[i].path, line);
898 return strlen(s);