tok: accept uppercase hex digits
[neatcc/cc.git] / cpp.c
blobe4422558eedd30022081ca305dbdd47f02c2e022
1 #include <ctype.h>
2 #include <fcntl.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <unistd.h>
6 #include "tok.h"
8 static char *buf;
9 static int len;
10 static int cur;
12 #define MAXDEFS (1 << 12)
13 #define MACROLEN (1 << 10)
14 #define MAXARGS (1 << 5)
16 static struct macro {
17 char name[NAMELEN];
18 char def[MACROLEN];
19 char args[MAXARGS][NAMELEN];
20 int nargs;
21 int isfunc;
22 } macros[MAXDEFS];
23 static int nmacros;
25 #define MAXBUFS (1 << 5)
26 #define BUF_FILE 0
27 #define BUF_MACRO 1
28 #define BUF_EVAL 2
29 #define BUF_TEMP 3
31 static struct buf {
32 char buf[BUFSIZE];
33 char name[NAMELEN];
34 int len;
35 int cur;
36 int type;
37 } bufs[MAXBUFS];
38 static int nbufs;
40 static void buf_new(int type, char *name)
42 if (nbufs) {
43 bufs[nbufs - 1].cur = cur;
44 bufs[nbufs - 1].len = len;
46 if (nbufs >= MAXBUFS)
47 die("nomem: MAXBUFS reached!\n");
48 nbufs++;
49 cur = 0;
50 len = 0;
51 buf = bufs[nbufs - 1].buf;
52 bufs[nbufs - 1].type = type;
53 strcpy(bufs[nbufs -1].name, name ? name : "");
56 static void buf_pop(void)
58 nbufs--;
59 if (nbufs) {
60 cur = bufs[nbufs - 1].cur;
61 len = bufs[nbufs - 1].len;
62 buf = bufs[nbufs - 1].buf;
66 static int buf_iseval(void)
68 int i;
69 for (i = 0; i < nbufs; i++)
70 if (bufs[i].type == BUF_EVAL)
71 return 1;
72 return 0;
75 static int buf_expanding(char *macro)
77 int i;
78 for (i = 0; i < nbufs; i++)
79 if (bufs[i].type == BUF_MACRO && !strcmp(macro, bufs[i].name))
80 return 1;
81 return 0;
84 static int include_file(char *path)
86 int fd = open(path, O_RDONLY);
87 int n = 0;
88 if (fd == -1)
89 return -1;
90 buf_new(BUF_FILE, path);
91 while ((n = read(fd, buf + len, BUFSIZE - len)) > 0)
92 len += n;
93 close(fd);
94 return 0;
97 void cpp_init(char *path)
99 cpp_define("__STDC__", "");
100 cpp_define("__x86_64__", "");
101 cpp_define("__linux__", "");
102 include_file(path);
105 static void jumpws(void)
107 while (cur < len && isspace(buf[cur]))
108 cur++;
111 static void read_word(char *dst)
113 jumpws();
114 while (cur < len && (isalnum(buf[cur]) || buf[cur] == '_'))
115 *dst++ = buf[cur++];
116 *dst = '\0';
119 static void jumpcomment(void)
121 while (++cur < len) {
122 if (buf[cur] == '*' && buf[cur + 1] == '/') {
123 cur += 2;
124 break;
129 static void read_tilleol(char *dst)
131 while (cur < len && isspace(buf[cur]) && buf[cur] != '\n')
132 cur++;
133 while (cur < len && buf[cur] != '\n') {
134 if (buf[cur] == '\\')
135 cur += 2;
136 else if (buf[cur] == '/' && buf[cur + 1] == '*')
137 jumpcomment();
138 else
139 *dst++ = buf[cur++];
141 *dst = '\0';
144 static char *putstr(char *d, char *s)
146 while (*s)
147 *d++ = *s++;
148 *d = '\0';
149 return d;
152 #define MAXLOCS (1 << 10)
154 static char *locs[MAXLOCS] = {"/usr/include"};
155 static int nlocs = 1;
157 void cpp_addpath(char *s)
159 locs[nlocs++] = s;
162 static int include_find(char *name, int std)
164 int i;
165 for (i = std ? nlocs - 1 : nlocs; i >= 0; i--) {
166 char path[1 << 10];
167 char *s;
168 s = path;
169 if (locs[i]) {
170 s = putstr(s, locs[i]);
171 *s++ = '/';
173 s = putstr(s, name);
174 if (!include_file(path))
175 return 0;
177 return -1;
180 static void jumpstr(void)
182 if (buf[cur] == '\'') {
183 while (cur < len && buf[++cur] != '\'')
184 if (buf[cur] == '\\')
185 cur++;
186 cur++;
187 return;
189 if (buf[cur] == '"') {
190 while (cur < len && buf[++cur] != '"')
191 if (buf[cur] == '\\')
192 cur++;
193 cur++;
194 return;
198 static void readarg(char *s)
200 int depth = 0;
201 int beg = cur;
202 while (cur < len && (depth || buf[cur] != ',' && buf[cur] != ')')) {
203 switch (buf[cur]) {
204 case '(':
205 case '[':
206 case '{':
207 cur++;
208 depth++;
209 break;
210 case ')':
211 case ']':
212 case '}':
213 cur++;
214 depth--;
215 break;
216 case '\'':
217 case '"':
218 jumpstr();
219 break;
220 default:
221 if (buf[cur] == '/' && buf[cur + 1] == '*')
222 jumpcomment();
223 else
224 cur++;
227 memcpy(s, buf + beg, cur - beg);
228 s[cur - beg] = '\0';
231 static int macro_find(char *name)
233 int i;
234 for (i = 0; i < nmacros; i++)
235 if (!strcmp(name, macros[i].name))
236 return i;
237 return -1;
240 static int macro_new(char *name)
242 int i;
243 for (i = 0; i < nmacros; i++) {
244 if (!strcmp(name, macros[i].name))
245 return i;
246 if (!*macros[i].name) {
247 strcpy(macros[i].name, name);
248 return i;
251 if (nmacros >= MAXDEFS)
252 die("nomem: MAXDEFS reached!\n");
253 strcpy(macros[nmacros++].name, name);
254 return nmacros - 1;
257 static void macro_define(void)
259 char name[NAMELEN];
260 struct macro *d;
261 read_word(name);
262 d = &macros[macro_new(name)];
263 d->isfunc = 0;
264 if (buf[cur] == '(') {
265 cur++;
266 jumpws();
267 while (cur < len && buf[cur] != ')') {
268 readarg(d->args[d->nargs++]);
269 jumpws();
270 if (buf[cur++] != ',')
271 break;
272 jumpws();
274 d->isfunc = 1;
276 read_tilleol(d->def);
279 int cpp_read(char *buf);
281 static char ebuf[BUFSIZE];
282 static int elen;
283 static int ecur;
285 static long evalexpr(void);
287 static int cpp_eval(void)
289 int bufid;
290 int ret;
291 char evalbuf[BUFSIZE];
292 read_tilleol(evalbuf);
293 buf_new(BUF_EVAL, NULL);
294 strcpy(buf, evalbuf);
295 len = strlen(evalbuf);
296 bufid = nbufs;
297 elen = 0;
298 ecur = 0;
299 while (bufid < nbufs || cur < len)
300 elen += cpp_read(ebuf + elen);
301 ret = evalexpr();
302 buf_pop();
303 return ret;
306 static void jumpifs(int jumpelse)
308 int depth = 0;
309 while (cur < len) {
310 if (buf[cur] == '#') {
311 char cmd[NAMELEN];
312 cur++;
313 read_word(cmd);
314 if (!strcmp("else", cmd))
315 if (!depth && !jumpelse)
316 break;
317 if (!strcmp("elif", cmd))
318 if (!depth && !jumpelse && cpp_eval())
319 break;
320 if (!strcmp("endif", cmd)) {
321 if (!depth)
322 break;
323 else
324 depth--;
326 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
327 !strcmp("if", cmd))
328 depth++;
329 continue;
331 if (buf[cur] == '/' && buf[cur + 1] == '*') {
332 jumpcomment();
333 continue;
335 if (buf[cur] == '\'' || buf[cur] == '"') {
336 jumpstr();
337 continue;
339 cur++;
343 static void cpp_cmd(void)
345 char cmd[NAMELEN];
346 cur++;
347 read_word(cmd);
348 if (!strcmp("define", cmd)) {
349 macro_define();
350 return;
352 if (!strcmp("undef", cmd)) {
353 char name[NAMELEN];
354 int idx;
355 read_word(name);
356 idx = macro_find(name);
357 if (idx != -1)
358 strcpy(macros[idx].name, "");
359 return;
361 if (!strcmp("ifdef", cmd) || !strcmp("ifndef", cmd) ||
362 !strcmp("if", cmd)) {
363 char name[NAMELEN];
364 int matched = 0;
365 if (cmd[2]) {
366 int not = cmd[2] == 'n';
367 read_word(name);
368 matched = not ? macro_find(name) < 0 :
369 macro_find(name) >= 0;
370 } else {
371 matched = cpp_eval();
373 if (!matched)
374 jumpifs(0);
375 return;
377 if (!strcmp("else", cmd) || !strcmp("elif", cmd)) {
378 jumpifs(1);
379 return;
381 if (!strcmp("endif", cmd))
382 return;
383 if (!strcmp("include", cmd)) {
384 char file[NAMELEN];
385 char *s, *e;
386 jumpws();
387 s = buf + cur + 1;
388 e = strchr(buf + cur + 1, buf[cur] == '"' ? '"' : '>');
389 memcpy(file, s, e - s);
390 file[e - s] = '\0';
391 cur += e - s + 2;
392 if (include_find(file, *e == '>') == -1)
393 die("cannot include file\n");
394 return;
398 static int macro_arg(struct macro *m, char *arg)
400 int i;
401 for (i = 0; i < m->nargs; i++)
402 if (!strcmp(arg, m->args[i]))
403 return i;
404 return -1;
407 static void macro_expand(void)
409 char name[NAMELEN];
410 char args[MAXARGS][MACROLEN];
411 int nargs = 0;
412 struct macro *m;
413 char *dst;
414 int dstlen = 0;
415 int beg;
416 read_word(name);
417 m = &macros[macro_find(name)];
418 if (!m->isfunc) {
419 buf_new(BUF_MACRO, name);
420 strcpy(buf, m->def);
421 len = strlen(m->def);
422 return;
424 jumpws();
425 if (buf[cur] == '(') {
426 cur++;
427 jumpws();
428 while (cur < len && buf[cur] != ')') {
429 readarg(args[nargs++]);
430 jumpws();
431 if (buf[cur] != ',')
432 break;
433 cur++;
434 jumpws();
436 cur++;
437 m->isfunc = 1;
439 buf_new(BUF_MACRO, name);
440 dst = buf;
441 buf = m->def;
442 len = strlen(m->def);
443 beg = cur;
444 while (cur < len) {
445 if (buf[cur] == '/' && buf[cur + 1] == '*') {
446 jumpcomment();
447 continue;
449 if (strchr("'\"", buf[cur])) {
450 jumpstr();
451 continue;
453 if (isalpha(buf[cur]) || buf[cur] == '_') {
454 int arg;
455 char word[NAMELEN];
456 read_word(word);
457 if ((arg = macro_arg(m, word)) != -1) {
458 int len = cur - beg - strlen(word);
459 char *argstr = arg > nargs ? "" : args[arg];
460 int arglen = strlen(argstr);
461 memcpy(dst + dstlen, buf + beg, len);
462 dstlen += len;
463 memcpy(dst + dstlen, argstr, arglen);
464 dstlen += arglen;
465 beg = cur;
467 continue;
469 cur++;
471 memcpy(dst + dstlen, buf + beg, len - beg);
472 dstlen += len - beg;
473 buf = dst;
474 len = dstlen;
475 cur = 0;
476 buf[len] = '\0';
479 void cpp_define(char *name, char *def)
481 char *s;
482 buf_new(BUF_TEMP, NULL);
483 s = buf;
484 s = putstr(s, name);
485 *s++ = '\t';
486 s = putstr(s, def);
487 len = s - buf;
488 macro_define();
489 buf_pop();
492 static int definedword;
494 static int hunk_off;
495 static int hunk_len;
497 int cpp_read(char *s)
499 int old;
500 if (definedword) {
501 definedword = 0;
502 macro_expand();
504 if (cur == len) {
505 if (nbufs < 2)
506 return -1;
507 buf_pop();
509 old = cur;
510 if (buf[cur] == '#') {
511 cpp_cmd();
512 return 0;
514 while (cur < len) {
515 if (buf[cur] == '#')
516 break;
517 if (buf[cur] == '/' && buf[cur + 1] == '*') {
518 jumpcomment();
519 continue;
521 if (buf[cur] == '\'' || buf[cur] == '"') {
522 jumpstr();
523 continue;
525 if (isalpha(buf[cur]) || buf[cur] == '_') {
526 char word[NAMELEN];
527 read_word(word);
528 if (!buf_expanding(word) && macro_find(word) != -1) {
529 cur -= strlen(word);
530 definedword = 1;
531 break;
533 if (buf_iseval() && !strcmp("defined", word)) {
534 int parens = 0;
535 jumpws();
536 if (buf[cur] == '(') {
537 parens = 1;
538 cur++;
540 read_word(word);
541 if (parens) {
542 jumpws();
543 cur++;
546 continue;
548 cur++;
550 memcpy(s, buf + old, cur - old);
551 s[cur - old] = '\0';
552 if (!buf_iseval()) {
553 hunk_off += hunk_len;
554 hunk_len = cur - old;
556 return cur - old;
559 /* preprocessor constant expression evaluation */
561 static char etok[NAMELEN];
562 static int enext;
564 static char *tok2[] = {
565 "<<", ">>", "&&", "||", "==", "!=", "<=", ">="
568 static int eval_tok(void)
570 char *s = etok;
571 int i;
572 while (ecur < elen) {
573 while (ecur < elen && isspace(ebuf[ecur]))
574 ecur++;
575 if (ebuf[ecur] == '/' && ebuf[ecur + 1] == '*') {
576 while (ecur < elen && (ebuf[ecur - 2] != '*' ||
577 ebuf[ecur - 1] != '/'))
578 ecur++;
579 continue;
581 break;
583 if (ecur >= elen)
584 return TOK_EOF;
585 if (isalpha(ebuf[ecur]) || ebuf[ecur] == '_') {
586 while (isalnum(ebuf[ecur]) || ebuf[ecur] == '_')
587 *s++ = ebuf[ecur++];
588 *s = '\0';
589 return TOK_NAME;
591 if (isdigit(ebuf[ecur])) {
592 while (isdigit(ebuf[ecur]))
593 *s++ = ebuf[ecur++];
594 while (tolower(ebuf[ecur]) == 'u' || tolower(ebuf[ecur]) == 'l')
595 ecur++;
596 return TOK_NUM;
598 for (i = 0; i < ARRAY_SIZE(tok2); i++)
599 if (TOK2(tok2[i]) == TOK2(ebuf + ecur)) {
600 int ret = TOK2(tok2[i]);
601 ecur += 2;
602 return ret;
604 return ebuf[ecur++];
607 static int eval_see(void)
609 if (enext == -1)
610 enext = eval_tok();
611 return enext;
614 static int eval_get(void)
616 if (enext != -1) {
617 int ret = enext;
618 enext = -1;
619 return ret;
621 return eval_tok();
624 static long eval_num(void)
626 return atol(etok);
629 static int eval_jmp(int tok)
631 if (eval_see() == tok) {
632 eval_get();
633 return 0;
635 return 1;
638 static void eval_expect(int tok)
640 eval_jmp(tok);
643 static char *eval_id(void)
645 return etok;
648 static long evalcexpr(void);
650 static long evalatom(void)
652 if (!eval_jmp(TOK_NUM))
653 return eval_num();
654 if (!eval_jmp(TOK_NAME)) {
655 int parens = !eval_jmp('(');
656 long ret;
657 eval_expect(TOK_NAME);
658 ret = macro_find(eval_id()) >= 0;
659 if (parens)
660 eval_expect(')');
661 return ret;
663 if (!eval_jmp('(')) {
664 long ret = evalcexpr();
665 eval_expect(')');
666 return ret;
668 return -1;
671 static long evalpre(void)
673 if (!eval_jmp('!'))
674 return !evalpre();
675 if (!eval_jmp('-'))
676 return -evalpre();
677 if (!eval_jmp('~'))
678 return ~evalpre();
679 return evalatom();
682 static long evalmul(void)
684 long ret = evalpre();
685 while (1) {
686 if (!eval_jmp('*')) {
687 ret *= evalpre();
688 continue;
690 if (!eval_jmp('/')) {
691 ret /= evalpre();
692 continue;
694 if (!eval_jmp('%')) {
695 ret %= evalpre();
696 continue;
698 break;
700 return ret;
703 static long evaladd(void)
705 long ret = evalmul();
706 while (1) {
707 if (!eval_jmp('+')) {
708 ret += evalmul();
709 continue;
711 if (!eval_jmp('-')) {
712 ret -= evalmul();
713 continue;
715 break;
717 return ret;
720 static long evalshift(void)
722 long ret = evaladd();
723 while (1) {
724 if (!eval_jmp(TOK2("<<"))) {
725 ret <<= evaladd();
726 continue;
728 if (!eval_jmp(TOK2(">>"))) {
729 ret >>= evaladd();
730 continue;
732 break;
734 return ret;
737 static long evalcmp(void)
739 long ret = evalshift();
740 while (1) {
741 if (!eval_jmp('<')) {
742 ret = ret < evalshift();
743 continue;
745 if (!eval_jmp('>')) {
746 ret = ret > evalshift();
747 continue;
749 if (!eval_jmp(TOK2("<="))) {
750 ret = ret <= evalshift();
751 continue;
753 if (!eval_jmp(TOK2(">="))) {
754 ret = ret >= evalshift();
755 continue;
757 break;
759 return ret;
762 static long evaleq(void)
764 long ret = evalcmp();
765 while (1) {
766 if (!eval_jmp(TOK2("=="))) {
767 ret = ret == evalcmp();
768 continue;
770 if (!eval_jmp(TOK2("!="))) {
771 ret = ret != evalcmp();
772 continue;
774 break;
776 return ret;
779 static long evalbitand(void)
781 long ret = evaleq();
782 while (!eval_jmp('&'))
783 ret &= evaleq();
784 return ret;
787 static long evalxor(void)
789 long ret = evalbitand();
790 while (!eval_jmp('^'))
791 ret ^= evalbitand();
792 return ret;
795 static long evalbitor(void)
797 long ret = evalxor();
798 while (!eval_jmp('|'))
799 ret |= evalxor();
800 return ret;
803 static long evaland(void)
805 long ret = evalbitor();
806 while (!eval_jmp(TOK2("&&")))
807 ret = ret && evalbitor();
808 return ret;
811 static long evalor(void)
813 long ret = evaland();
814 while (!eval_jmp(TOK2("||")))
815 ret = ret || evaland();
816 return ret;
819 static long evalcexpr(void)
821 long ret = evalor();
822 if (eval_jmp('?'))
823 return ret;
824 if (ret)
825 return evalor();
826 while (eval_get() != ':')
828 return evalor();
831 static long evalexpr(void)
833 enext = -1;
834 return evalcexpr();
837 static int numwidth(int n)
839 int i = 0;
840 do {
841 i++;
842 n /= 10;
843 } while (n);
844 return i;
847 static char *putnum(char *s, int n)
849 int w = numwidth(n);
850 int i;
851 for (i = 0; i < w; i++) {
852 s[w - i - 1] = !i || n ? '0' + n % 10 : ' ';
853 n /= 10;
855 return s + w;
858 static int buf_loc(char *s, int off)
860 char *e = s + off;
861 int n = 1;
862 while ((s = strchr(s, '\n')) && s < e) {
863 n++;
864 s++;
866 return n;
869 int cpp_loc(char *s, long addr)
871 int line = -1;
872 int i;
873 char *o = s;
874 for (i = nbufs - 1; i > 0; i--)
875 if (bufs[i].type == BUF_FILE)
876 break;
877 if (addr >= hunk_off && i == nbufs - 1)
878 line = buf_loc(buf, (cur - hunk_len) + (addr - hunk_off));
879 else
880 line = buf_loc(bufs[i].buf, bufs[i].cur);
881 s = putstr(s, bufs[i].name);
882 *s++ = ':';
883 s = putnum(s, line);
884 *s++ = ':';
885 *s++ = ' ';
886 return s - o;