Initial revision
[tinycc.git] / tcc.c
blob4ea084f31b68c92282e63614e2cf05d1ab3974e2
1 #include <stdio.h>
3 #define TEXT_SIZE 20000
4 #define DATA_SIZE 2000
5 #define SYM_TABLE_SIZE 10000
6 #define VAR_TABLE_SIZE 4096
8 /* vac: offset of variables
9 vat: type of variables
10 loc : local variable index
11 glo : global variable index
12 parm : parameter variable index
13 ind : output code ptr
14 lsym: loop symbol stack
15 rsym: return symbol
16 prog: output code
17 astk: arg position stack
19 int tok, *vac, *vat, *lsym, rsym,
20 prog, ind, loc, glo, file, vt,
21 vc, *macro_stack, *macro_stack_ptr;
22 char *idtable, *idptr, *idlast;
24 /* The current value can be: */
25 #define VT_CONST 0x0002 /* constant in vc */
26 #define VT_VAR 0x0004 /* value is in eax */
27 #define VT_LOCAL 0x0008 /* offset on stack */
29 #define VT_LVAL 0x0010 /* const or var is an lvalue */
30 #define VT_CMP 0x0020 /* the value is stored in processor flags (in vc) */
31 #define VT_FORWARD 0x0040 /* value is forward reference (only used for functions) */
32 #define VT_JMP 0x0080 /* value is the consequence of jmp. bit 0 is set if inv */
34 #define VT_LVALN -17 /* ~VT_LVAL */
38 * VT_FUNC indicates a function. The return type is the stored type. A
39 * function pointer is stored as a 'char' pointer.
41 * If VT_PTRMASK is non nul, then it indicates the number of pointer
42 * iterations to reach the basic type.
44 * Basic types:
46 * VT_BYTE indicate a char
49 * otherwise integer type is assumed.
50 * */
52 #define VT_BYTE 0x00001 /* byte pointer. HARDCODED VALUE */
53 #define VT_PTRMASK 0x00f00 /* pointer mask */
54 #define VT_PTRINC 0x00100 /* pointer increment */
55 #define VT_FUNC 0x01000 /* function type */
56 #define VT_TYPE 0x01f01 /* type mask */
58 #define VT_TYPEN 0xffffe0fe /* ~VT_TYPE */
59 #define VT_FUNCN -4097
61 /* Special infos */
62 #define VT_DEFINE 0x02000 /* special value for #defined symbols */
64 /* token values */
65 #define TOK_INT 256
66 #define TOK_VOID 257
67 #define TOK_CHAR 258
68 #define TOK_IF 259
69 #define TOK_ELSE 260
70 #define TOK_WHILE 261
71 #define TOK_BREAK 262
72 #define TOK_RETURN 263
73 #define TOK_DEFINE 264
74 #define TOK_MAIN 265
76 #define TOK_EQ 0x94 /* warning: depend on asm code */
77 #define TOK_NE 0x95 /* warning: depend on asm code */
78 #define TOK_LT 0x9c /* warning: depend on asm code */
79 #define TOK_GE 0x9d /* warning: depend on asm code */
80 #define TOK_LE 0x9e /* warning: depend on asm code */
81 #define TOK_GT 0x9f /* warning: depend on asm code */
83 #define TOK_LAND 0xa0
84 #define TOK_LOR 0xa1
86 #define TOK_DEC 0xa2
87 #define TOK_MID 0xa3 /* inc/dec, to void constant */
88 #define TOK_INC 0xa4
90 #define TOK_SHL 0xe0 /* warning: depend on asm code */
91 #define TOK_SHR 0xf8 /* warning: depend on asm code */
93 #ifdef TEST
94 void error(char *msg)
96 printf("%d: %s\n", ftell(file), msg);
97 exit(1);
99 void warning(char *msg)
101 printf("%d: warning: %s\n", ftell(file), msg);
103 #endif
105 int inp()
107 #if 0
108 int c;
109 c = fgetc(file);
110 printf("c=%c\n", c);
111 return c;
112 #else
113 return fgetc(file);
114 #endif
117 int isid(c)
119 return (c >= 'a' & c <= 'z') |
120 (c >= 'A' & c <= 'Z') |
121 c == '_';
124 int isnum(c)
126 return c >= '0' & c <= '9';
129 #ifdef TEST
130 void skip(c)
132 if (tok != c) {
133 fprintf(stderr, "%d: '%c' expected\n", ftell(file), c);
134 exit(1);
136 next();
138 #else
139 #define skip(c) next()
140 #endif
142 void next()
144 int c, v;
145 char *q, *p;
147 while(1) {
148 c = inp();
149 if (c == 35) {
150 /* preprocessor: we handle only define */
151 next();
152 if (tok == TOK_DEFINE) {
153 next();
154 /* now tok is the macro symbol */
155 vat[tok] = VT_DEFINE;
156 vac[tok] = ftell(file);
158 /* ignore preprocessor or shell */
159 while (c != '\n')
160 c = inp();
161 } else if (c == '\n') {
162 /* end of line : check if we are in macro state. if so,
163 pop new file position */
164 if (macro_stack_ptr > macro_stack)
165 fseek(file, *--macro_stack_ptr, 0);
166 } else if (c != ' ' & c != 9)
167 break;
169 if (isid(c)) {
170 q = idptr;
171 idlast = q;
172 while(isid(c) | isnum(c)) {
173 *q++ = c;
174 c = inp();
176 *q++ = '\0';
177 ungetc(c, file);
178 p = idtable;
179 tok = 256;
180 while (p < idptr) {
181 if (strcmp(p, idptr) == 0)
182 break;
183 while (*p++);
184 tok++;
186 /* if not found, add symbol */
187 if (p == idptr)
188 idptr = q;
189 /* eval defines */
190 if (vat[tok] & VT_DEFINE) {
191 *macro_stack_ptr++ = ftell(file);
192 fseek(file, vac[tok], 0);
193 next();
195 } else {
196 q = "<=\236>=\235!=\225&&\240||\241++\244--\242==\224<<\340>>\370";
197 /* two chars */
198 v = inp();
199 while (*q) {
200 if (*q == c & q[1] == v) {
201 tok = q[2] & 0xff;
202 return;
204 q = q + 3;
206 ungetc(v, file);
207 /* single char substitutions */
208 if (c == '<')
209 tok = TOK_LT;
210 else if (c == '>')
211 tok = TOK_GT;
212 else
213 tok = c;
217 void g(c)
219 *(char *)ind++ = c;
222 void o(c)
224 while (c) {
225 g(c);
226 c = c / 256;
230 /* output a symbol and patch all calls to it */
231 void gsym(t)
233 int n;
234 while (t) {
235 n = *(int *)t; /* next value */
236 *(int *)t = ind - t - 4;
237 t = n;
241 /* psym is used to put an instruction with a data field which is a
242 reference to a symbol. It is in fact the same as oad ! */
243 #define psym oad
245 /* instruction + 4 bytes data. Return the address of the data */
246 int oad(c, s)
248 o(c);
249 *(int *)ind = s;
250 s = ind;
251 ind = ind + 4;
252 return s;
255 /* push to value stack */
256 void vset(t, v)
258 vt = t;
259 vc = v;
262 /* generate a value in eax from vt and vc */
263 void gv()
265 #ifndef TINY
266 int t;
267 #endif
268 if (vt & VT_LVAL) {
269 if ((vt & VT_TYPE) == VT_BYTE)
270 o(0xbe0f); /* movsbl x, %eax */
271 else
272 o(0x8b); /* movl x,%eax */
273 if (vt & VT_CONST)
274 oad(0x05, vc);
275 else if (vt & VT_LOCAL)
276 oad(0x85, vc);
277 else
278 g(0x00);
279 } else {
280 if (vt & VT_CONST) {
281 oad(0xb8, vc); /* mov $xx, %eax */
282 } else if (vt & VT_LOCAL) {
283 oad(0x858d, vc); /* lea xxx(%ebp), %eax */
284 } else if (vt & VT_CMP) {
285 oad(0xb8, 0); /* mov $0, %eax */
286 o(0x0f); /* setxx %al */
287 o(vc);
288 o(0xc0);
290 #ifndef TINY
291 else if (vt & VT_JMP) {
292 t = vt & 1;
293 oad(0xb8, t); /* mov $1, %eax */
294 oad(0xe9, 5); /* jmp after */
295 gsym(vc);
296 oad(0xb8, t ^ 1); /* mov $0, %eax */
298 #endif
300 vt = (vt & VT_TYPE) | VT_VAR;
303 /* generate a test. set 'inv' to invert test */
304 /* XXX: handle constant */
305 int gtst(inv, t)
307 if (vt & VT_CMP) {
308 /* fast case : can jump directly since flags are set */
309 g(0x0f);
310 t = psym((vc - 16) ^ inv, t);
311 } else
312 #ifndef TINY
313 if (vt & VT_JMP) {
314 /* && or || optimization */
315 if ((vt & 1) == inv)
316 t = vc;
317 else {
318 t = psym(0xe9, t);
319 gsym(vc);
321 } else
322 if ((vt & (VT_CONST | VT_LVAL)) == VT_CONST) {
323 /* constant jmp optimization */
324 if ((vc != 0) != inv)
325 t = psym(0xe9, t);
326 } else
327 #endif
329 gv();
330 o(0xc085); /* test %eax, %eax */
331 g(0x0f);
332 t = psym(0x85 ^ inv, t);
334 return t;
337 /* return the size (in bytes) of a given type */
338 int type_size(t)
340 if ((t & VT_PTRMASK) > VT_PTRINC | (t & VT_TYPE) == VT_PTRINC)
341 return 4;
342 else
343 return 1;
346 #define POST_ADD 0x1000
347 #define PRE_ADD 0
349 /* a defines POST/PRE add. c is the token ++ or -- */
350 void inc(a, c)
352 #ifdef TEST
353 if (!(vt & VT_LVAL))
354 error("lvalue expected\n");
355 #endif
356 vt = vt & VT_LVALN;
357 gv();
358 o(0x018bc189); /* movl %eax, %ecx ; mov (%ecx), %eax */
359 o(0x408d | a); /* leal x(%eax), %eax/%edx */
360 g((c - TOK_MID) * type_size(vt));
361 o(0x0189 | a); /* mov %eax/%edx, (%ecx) */
364 /* op is '-' or '+' (or 0) */
365 /* t is the type of the first operand */
366 /* XXX: handle ptr sub and 'int + ptr' case (only 'ptr + int' handled) */
367 void gen_op(op, t)
369 gv();
370 o(0x59); /* pop %ecx */
371 if (op == '+' | op == '-') {
372 /* XXX: incorrect for short (futur!) */
373 if (type_size(t) != 1)
374 o(0x02e0c1); /* shl $2, %eax */
375 if (op == '-')
376 o(0xd8f7); /* neg %eax */
377 o(0xc801); /* add %ecx, %eax */
378 vt = t;
379 } else if (op == '&')
380 o(0xc821);
381 else if (op == '^')
382 o(0xc831);
383 else if (op == '|')
384 o(0xc809);
385 else if (op == '*')
386 o(0xc1af0f); /* imul %ecx, %eax */
387 else if (op == TOK_SHL | op == TOK_SHR) {
388 o(0xd391); /* xchg %ecx, %eax, shl/sar %cl, %eax */
389 o(op);
390 } else if (op == '/' | op == '%') {
391 o(0xd231); /* xor %edx, %edx */
392 o(0xf9f791); /* xchg %ecx, %eax, idiv %ecx, %eax */
393 if (op == '%')
394 o(0x92); /* xchg %edx, %eax */
395 } else {
396 o(0xc139); /* cmp %eax,%ecx */
397 vset(VT_CMP, op);
401 /* return 0 if no type declaration. otherwise, return the basic type
402 and skip it.
403 XXX: A '2' is ored to ensure non zero return if int type.
405 int ist()
407 int t;
409 if (tok == TOK_INT | tok == TOK_CHAR | tok == TOK_VOID) {
410 t = tok;
411 next();
412 return (t != TOK_INT) | 2;
413 } else {
414 return 0;
418 /* Read a type declaration (except basic type), and return the
419 type. If v is true, then also put variable name in 'vc' */
420 int typ(v,t)
422 int u, p, n;
424 t = t & -3; /* suppress the ored '2' */
425 while (tok == '*') {
426 next();
427 t = t + VT_PTRINC;
430 /* recursive type */
431 /* XXX: incorrect if abstract type for functions (e.g. 'int ()') */
432 if (tok == '(') {
433 next();
434 u = typ(v, 0);
435 skip(')');
436 } else {
437 u = 0;
438 /* type identifier */
439 if (v) {
440 vc = tok;
441 next();
444 /* function declaration */
445 if (tok == '(') {
446 next();
447 p = 4;
448 n = vc; /* must save vc there */
449 while (tok != ')') {
450 /* read param name and compute offset */
451 if (t = ist())
452 t = typ(1, t); /* XXX: should accept both arg/non arg if v == 0 */
453 else {
454 vc = tok;
455 t = 0;
456 next();
458 p = p + 4;
459 vat[vc] = VT_LOCAL | VT_LVAL | t;
460 vac[vc] = p;
461 if (tok == ',')
462 next();
464 next(); /* skip ')' */
465 vc = n;
466 if (u)
467 t = u + VT_BYTE;
468 else
469 t = t | VT_FUNC;
471 return t;
474 int getq(n)
476 int c;
477 if (n == '\\') {
478 n = inp();
479 if (n == 'n')
480 n = '\n';
481 else if (isnum(n)) {
482 c = 0;
483 while (isnum(n)) {
484 c = c * 8 + n - '0';
485 n = inp();
487 ungetc(n, file);
488 return c;
491 return n;
494 void unary()
496 int n, t, ft, fc, p;
498 if (isnum(tok)) {
499 /* number */
500 n = 0;
501 while (isnum(tok)) {
502 n = n * 10 + tok - '0';
503 next();
505 vset(VT_CONST, n);
506 } else if (tok == '\'') {
507 vset(VT_CONST, getq(inp()));
508 next(); /* skip char */
509 skip('\'');
510 } else if (tok == '\"') {
511 vset(VT_CONST | VT_PTRINC | VT_BYTE, glo);
512 while((n = inp()) != 34) {
513 *(char *)glo = getq(n);
514 glo++;
516 *(char *)glo = 0;
517 glo = (glo + 4) & -4; /* align heap */
518 next();
519 } else {
520 t = tok;
521 next();
522 if (t == '(') {
523 /* cast ? */
524 if (t = ist()) {
525 ft = typ(0, t);
526 skip(')');
527 unary();
528 vt = (vt & VT_TYPEN) | ft;
529 } else {
530 expr();
531 skip(')');
533 } else if (t == '*') {
534 unary();
535 if (vt & VT_LVAL)
536 gv();
537 #ifdef TEST
538 if (!(vt & VT_PTRMASK))
539 error("pointer expected");
540 #endif
541 vt = (vt - VT_PTRINC) | VT_LVAL;
542 } else if (t == '&') {
543 unary();
544 #ifdef TEST
545 if (!(vt & VT_LVAL))
546 error("lvalue expected");
547 #endif
548 vt = vt & VT_LVALN;
549 vt = vt + VT_PTRINC;
550 } else
551 #ifndef TINY
552 if (t == '!') {
553 unary();
554 if (vt & VT_CMP)
555 vc = vc ^ 1;
556 else
557 vset(VT_JMP, gtst(1, 0));
558 } else
559 if (t == '~') {
560 unary();
561 if ((vt & (VT_CONST | VT_LVAL)) == VT_CONST)
562 vc = ~vc;
563 else {
564 gv();
565 o(0xd0f7);
567 } else
568 #endif
569 if (t == TOK_INC | t == TOK_DEC) {
570 unary();
571 inc(PRE_ADD, t);
572 } else if (t == '-') {
573 unary();
574 if ((vt & (VT_CONST | VT_LVAL)) == VT_CONST)
575 vc = -vc;
576 else {
577 gv();
578 o(0xd8f7); /* neg %eax */
580 } else if (t == '+') {
581 unary();
582 } else {
583 vset(vat[t], vac[t]);
584 /* forward reference or external reference ? */
585 if (vt == 0) {
586 n = dlsym(0, idlast);
587 if (n == 0)
588 vset(VT_CONST | VT_FORWARD | VT_LVAL, vac + t);
589 else
590 vset(VT_CONST | VT_LVAL, n);
595 /* post operations */
596 if (tok == TOK_INC | tok == TOK_DEC) {
597 inc(POST_ADD, tok);
598 next();
599 } else
600 if (tok == '[') {
601 #ifdef TEST
602 if (!(vt & VT_PTRMASK))
603 error("pointer expected");
604 #endif
605 gv();
606 ft = vt;
607 fc = vc;
608 next();
609 o(0x50); /* push %eax */
610 expr();
611 gen_op('+', ft);
612 /* dereference pointer */
613 vt = (ft - VT_PTRINC) | VT_LVAL;
614 vc = fc;
615 skip(']');
616 } else
617 if (tok == '(') {
618 /* function call */
619 /* lvalue is implied */
620 vt = vt & VT_LVALN;
621 if ((vt & VT_CONST) == 0) {
622 /* evaluate function address */
623 gv();
624 o(0x50); /* push %eax */
626 ft = vt;
627 fc = vc;
629 next();
630 t = 0;
631 while (tok != ')') {
632 t = t + 4;
633 expr();
634 gv();
635 o(0x50); /* push %eax */
636 if (tok == ',')
637 next();
639 skip(')');
640 /* horrible, but needed : convert to native ordering (could
641 parse parameters in reverse order, but would cost more
642 code) */
643 n = 0;
644 p = t - 4;
645 while (n < p) {
646 oad(0x24848b, p); /* mov x(%esp,1), %eax */
647 oad(0x248487, n); /* xchg x(%esp,1), %eax */
648 oad(0x248489, p); /* mov %eax, x(%esp,1) */
649 n = n + 4;
650 p = p - 4;
652 if (ft & VT_CONST) {
653 /* forward reference */
654 if (ft & VT_FORWARD)
655 *(int *)fc = psym(0xe8, *(int *)fc);
656 else
657 oad(0xe8, fc - ind - 5);
658 } else {
659 oad(0x2494ff, t); /* call *xxx(%esp) */
660 t = t + 4;
662 if (t)
663 oad(0xc481, t);
664 /* return value is variable */
665 vt = VT_VAR;
669 void uneq()
671 int ft, fc, b;
673 unary();
674 if (tok == '=') {
675 #ifdef TEST
676 if (!(vt & VT_LVAL))
677 error("lvalue expected");
678 #endif
679 next();
680 fc = vc;
681 ft = vt;
682 b = (vt & VT_TYPE) == VT_BYTE;
683 if (ft & VT_VAR)
684 o(0x50); /* push %eax */
685 expr();
686 #ifdef TEST
687 if ((vt & VT_PTRMASK) != (ft & VT_PTRMASK))
688 warning("incompatible type");
689 #endif
690 gv(); /* generate value */
692 if (ft & VT_VAR) {
693 o(0x59); /* pop %ecx */
694 o(0x0189 - b); /* mov %eax/%al, (%ecx) */
695 } else {
696 if (ft & VT_LOCAL)
697 oad(0x8589 - b, fc); /* mov %eax/%al,xxx(%ebp) */
698 else
699 oad(0xa3 - b, fc); /* mov %eax/%al,xxx */
704 void sum(l)
706 int op, t;
707 if (l == 0)
708 uneq();
709 else {
710 l--;
711 sum(l);
712 while (1) {
713 op = tok;
714 if ((l == 0 & op != '*' & op != '/' & op != '%') |
715 (l == 1 & op != '+' & op != '-') |
716 (l == 2 & op != TOK_SHL & op != TOK_SHR) |
717 (l == 3 & (op < TOK_LT | op > TOK_GT)) |
718 (l == 4 & op != TOK_EQ & op != TOK_NE) |
719 (l == 5 & op != '&') |
720 (l == 6 & op != '^') |
721 (l == 7 & op != '|'))
722 break;
723 gv();
724 t = vt;
725 o(0x50); /* push %eax */
726 next();
727 sum(l);
728 gen_op(op, t);
733 #ifdef TINY
734 void expr()
736 sum(8);
738 #else
739 void eand()
741 int t;
743 sum(8);
744 t = 0;
745 while (1) {
746 if (tok != TOK_LAND) {
747 if (t) {
748 t = gtst(1, t);
749 vset(VT_JMP | 1, t);
751 break;
753 t = gtst(1, t);
754 next();
755 sum(8);
759 void expr()
761 int t, u;
763 eand();
764 t = 0;
765 while (1) {
766 if (tok != TOK_LOR) {
767 if (t) {
768 t = gtst(0, t);
769 vset(VT_JMP, t);
771 break;
773 t = gtst(0, t);
774 next();
775 eand();
778 #endif
780 void block()
782 int a, c, d;
784 if (tok == TOK_IF) {
785 /* if test */
786 next();
787 skip('(');
788 expr();
789 skip(')');
790 a = gtst(1, 0);
791 block();
792 c = tok;
793 if (c == TOK_ELSE) {
794 next();
795 d = psym(0xe9, 0); /* jmp */
796 gsym(a);
797 block();
798 gsym(d); /* patch else jmp */
799 } else
800 gsym(a);
801 } else if (tok == TOK_WHILE) {
802 next();
803 d = ind;
804 skip('(');
805 expr();
806 skip(')');
807 *++lsym = gtst(1, 0);
808 block();
809 oad(0xe9, d - ind - 5); /* jmp */
810 gsym(*lsym--);
811 } else if (tok == '{') {
812 next();
813 /* declarations */
814 decl(VT_LOCAL);
815 while (tok != '}')
816 block();
817 next();
818 } else if (tok == TOK_RETURN) {
819 next();
820 if (tok != ';') {
821 expr();
822 gv();
824 skip(';');
825 rsym = psym(0xe9, rsym); /* jmp */
826 } else if (tok == TOK_BREAK) {
827 /* compute jump */
828 *lsym = psym(0xe9, *lsym);
829 next();
830 skip(';');
831 } else {
832 if (tok != ';')
833 expr();
834 skip(';');
838 /* 'l' is true if local declarations */
839 void decl(l)
841 int *a, t, b;
843 while (b = ist()) {
844 while (1) { /* iterate thru each declaration */
845 vt = typ(1, b);
846 if (tok == '{') {
847 /* patch forward references (XXX: does not work for
848 function pointers) */
849 if (vat[vc] == 0)
850 gsym(vac[vc]);
851 /* put function address */
852 vat[vc] = VT_CONST | VT_LVAL | vt;
853 vac[vc] = ind;
854 loc = 0;
855 o(0xe58955); /* push %ebp, mov %esp, %ebp */
856 a = oad(0xec81, 0); /* sub $xxx, %esp */
857 rsym = 0;
858 block();
859 gsym(rsym);
860 o(0xc3c9); /* leave, ret */
861 *a = loc; /* save local variables */
862 break;
863 } else {
864 /* variable */
865 vat[vc] = l | VT_LVAL | vt;
866 if (l == VT_LOCAL) {
867 loc = loc + 4;
868 vac[vc] = -loc;
869 } else {
870 vac[vc] = glo;
871 glo = glo + 4;
873 if (tok != ',') {
874 skip(';');
875 break;
877 next();
883 int main(int c, char **v)
885 int (*t)();
887 if (c < 2) {
888 printf("usage: tc src\n");
889 return 1;
891 v++;
892 file = fopen(*v, "r");
894 idtable = malloc(SYM_TABLE_SIZE);
895 memcpy(idtable,
896 "int\0void\0char\0if\0else\0while\0break\0return\0define\0main", 53);
897 idptr = idtable + 53;
898 glo = malloc(DATA_SIZE);
899 prog = malloc(TEXT_SIZE);
900 vac = malloc(VAR_TABLE_SIZE);
901 vat = malloc(VAR_TABLE_SIZE);
902 lsym = malloc(256);
903 macro_stack = malloc(256);
904 macro_stack_ptr = macro_stack;
905 ind = prog;
906 next();
907 decl(VT_CONST);
908 #ifdef TEST
910 FILE *f;
911 f = fopen(v[1], "w");
912 fwrite((void *)prog, 1, ind - prog, f);
913 fclose(f);
914 return 0;
916 #else
917 t = vac[TOK_MAIN];
918 return (*t)(c - 1, v);
919 #endif