3 #define TEXT_SIZE 20000
5 #define SYM_TABLE_SIZE 10000
6 #define VAR_TABLE_SIZE 4096
8 /* vac: offset of variables
10 loc : local variable index
11 glo : global variable index
12 parm : parameter variable index
14 lsym: loop symbol stack
17 astk: arg position stack
19 int tok
, *vac
, *vat
, *lsym
, rsym
,
20 prog
, ind
, loc
, glo
, file
, vt
,
21 vc
, *macro_stack
, *macro_stack_ptr
, line_num
;
22 char *idtable
, *idptr
, *idlast
, *filename
;
24 /* The current value can be: */
25 #define VT_CONST 0x0002 /* constant in vc */
26 #define VT_VAR 0x0004 /* value is in eax */
27 #define VT_LOCAL 0x0008 /* offset on stack */
29 #define VT_LVAL 0x0010 /* const or var is an lvalue */
30 #define VT_CMP 0x0020 /* the value is stored in processor flags (in vc) */
31 #define VT_FORWARD 0x0040 /* value is forward reference (only used for functions) */
32 #define VT_JMP 0x0080 /* value is the consequence of jmp. bit 0 is set if inv */
34 #define VT_LVALN -17 /* ~VT_LVAL */
38 * VT_FUNC indicates a function. The return type is the stored type. A
39 * function pointer is stored as a 'char' pointer.
41 * If VT_PTRMASK is non nul, then it indicates the number of pointer
42 * iterations to reach the basic type.
46 * VT_BYTE indicate a char
49 * otherwise integer type is assumed.
52 #define VT_BYTE 0x00001 /* byte pointer. HARDCODED VALUE */
53 #define VT_PTRMASK 0x00f00 /* pointer mask */
54 #define VT_PTRINC 0x00100 /* pointer increment */
55 #define VT_FUNC 0x01000 /* function type */
56 #define VT_TYPE 0x01f01 /* type mask */
58 #define VT_TYPEN 0xffffe0fe /* ~VT_TYPE */
59 #define VT_FUNCN -4097
62 #define VT_DEFINE 0x02000 /* special value for #defined symbols */
72 #define TOK_RETURN 263
73 #define TOK_DEFINE 264
77 #define TOK_EQ 0x94 /* warning: depend on asm code */
78 #define TOK_NE 0x95 /* warning: depend on asm code */
79 #define TOK_LT 0x9c /* warning: depend on asm code */
80 #define TOK_GE 0x9d /* warning: depend on asm code */
81 #define TOK_LE 0x9e /* warning: depend on asm code */
82 #define TOK_GT 0x9f /* warning: depend on asm code */
88 #define TOK_MID 0xa3 /* inc/dec, to void constant */
91 #define TOK_SHL 0xe0 /* warning: depend on asm code */
92 #define TOK_SHR 0xf8 /* warning: depend on asm code */
95 #define expr_eq() expr()
112 return (c
>= 'a' & c
<= 'z') |
113 (c
>= 'A' & c
<= 'Z') |
119 return c
>= '0' & c
<= '9';
123 /* XXX: use stderr ? */
124 void error(char *msg
)
126 printf("%s:%d: %s\n", filename
, line_num
, msg
);
130 void warning(char *msg
)
132 printf("%s:%d: warning: %s\n", filename
, line_num
, msg
);
138 printf("%s:%d: '%c' expected\n", filename
, line_num
, c
);
147 error("lvalue expected\n");
152 #define skip(c) next()
153 #define test_lvalue()
169 /* single line comments */
172 } else if (c
== '*') {
174 while ((c
= inp()) >= 0) {
182 } else if (c
== '\n')
193 /* preprocessor: we handle only define */
195 if (tok
== TOK_DEFINE
) {
197 /* now tok is the macro symbol */
198 vat
[tok
] = VT_DEFINE
;
199 vac
[tok
] = ftell(file
);
201 /* ignore preprocessor or shell */
206 /* end of line : check if we are in macro state. if so,
207 pop new file position */
208 if (macro_stack_ptr
> macro_stack
)
209 fseek(file
, *--macro_stack_ptr
, 0);
212 } else if (c
!= ' ' & c
!= 9)
218 while(isid(c
) | isnum(c
)) {
227 if (strcmp(p
, idptr
) == 0)
232 /* if not found, add symbol */
236 if (vat
[tok
] & VT_DEFINE
) {
237 *macro_stack_ptr
++ = ftell(file
);
238 fseek(file
, vac
[tok
], 0);
243 q
= "<=\236>=\235!=\225++\244--\242==\224";
245 q
= "<=\236>=\235!=\225&&\240||\241++\244--\242==\224<<\340>>\370";
250 if (*q
== c
& q
[1] == v
) {
257 /* single char substitutions */
280 /* output a symbol and patch all calls to it */
285 n
= *(int *)t
; /* next value */
286 *(int *)t
= ind
- t
- 4;
291 /* psym is used to put an instruction with a data field which is a
292 reference to a symbol. It is in fact the same as oad ! */
295 /* instruction + 4 bytes data. Return the address of the data */
311 /* generate a value in eax from vt and vc */
318 if ((vt
& VT_TYPE
) == VT_BYTE
)
319 o(0xbe0f); /* movsbl x, %eax */
321 o(0x8b); /* movl x,%eax */
324 else if (vt
& VT_LOCAL
)
330 oad(0xb8, vc
); /* mov $xx, %eax */
331 } else if (vt
& VT_LOCAL
) {
332 oad(0x858d, vc
); /* lea xxx(%ebp), %eax */
333 } else if (vt
& VT_CMP
) {
334 oad(0xb8, 0); /* mov $0, %eax */
335 o(0x0f); /* setxx %al */
340 else if (vt
& VT_JMP
) {
342 oad(0xb8, t
); /* mov $1, %eax */
343 oad(0xe9, 5); /* jmp after */
345 oad(0xb8, t
^ 1); /* mov $0, %eax */
349 vt
= (vt
& VT_TYPE
) | VT_VAR
;
352 /* generate a test. set 'inv' to invert test */
353 /* XXX: handle constant */
357 /* fast case : can jump directly since flags are set */
359 t
= psym((vc
- 16) ^ inv
, t
);
363 /* && or || optimization */
371 if ((vt
& (VT_CONST
| VT_LVAL
)) == VT_CONST
) {
372 /* constant jmp optimization */
373 if ((vc
!= 0) != inv
)
379 o(0xc085); /* test %eax, %eax */
381 t
= psym(0x85 ^ inv
, t
);
386 /* return the size in bytes of a given type */
389 if ((t
& VT_PTRMASK
) > VT_PTRINC
| (t
& VT_TYPE
) == VT_PTRINC
)
395 #define POST_ADD 0x1000
398 /* a defines POST/PRE add. c is the token ++ or -- */
404 o(0x018bc189); /* movl %eax, %ecx ; mov (%ecx), %eax */
405 o(0x408d | a
); /* leal x(%eax), %eax/%edx */
406 g((c
- TOK_MID
) * type_size(vt
));
407 o(0x0189 | a
); /* mov %eax/%edx, (%ecx) */
410 /* XXX: handle ptr sub and 'int + ptr' case (only 'ptr + int' handled) */
411 /* XXX: handle constant propagation (need to track live eax) */
417 o(0x50); /* push %eax */
424 o(0x59); /* pop %ecx */
425 if (op
== '+' | op
== '-') {
426 /* XXX: incorrect for short (futur!) */
427 if (type_size(t
) == 4)
428 o(0x02e0c1); /* shl $2, %eax */
430 o(0xd8f7); /* neg %eax */
431 o(0xc801); /* add %ecx, %eax */
433 } else if (op
== '&')
440 o(0xc1af0f); /* imul %ecx, %eax */
442 else if (op
== TOK_SHL
| op
== TOK_SHR
) {
443 o(0xd391); /* xchg %ecx, %eax, shl/sar %cl, %eax */
447 else if (op
== '/' | op
== '%') {
448 o(0x91); /* xchg %ecx, %eax */
449 o(0xf9f799); /* cltd, idiv %ecx, %eax */
451 o(0x92); /* xchg %edx, %eax */
453 o(0xc139); /* cmp %eax,%ecx */
458 /* return 0 if no type declaration. otherwise, return the basic type
460 XXX: A '2' is ored to ensure non zero return if int type.
466 if (tok
== TOK_INT
| tok
== TOK_CHAR
| tok
== TOK_VOID
) {
469 return (t
!= TOK_INT
) | 2;
475 /* Read a type declaration (except basic type), and return the
476 type. If v is true, then also put variable name in 'vc' */
481 t
= t
& -3; /* suppress the ored '2' */
488 /* XXX: incorrect if abstract type for functions (e.g. 'int ()') */
495 /* type identifier */
501 /* function declaration */
505 n
= vc
; /* must save vc there */
507 /* read param name and compute offset */
509 t
= typ(1, t
); /* XXX: should accept both arg/non arg if v == 0 */
516 vat
[vc
] = VT_LOCAL
| VT_LVAL
| t
;
521 next(); /* skip ')' */
531 /* read a number in base b */
593 vset(VT_CONST
, getn(tok
, t
));
595 vset(VT_CONST
, getn(tok
, 10));
601 vset(VT_CONST
, getq(inp()));
602 next(); /* skip char */
607 vset(VT_CONST
| VT_PTRINC
| VT_BYTE
, glo
);
608 while (tok
== '\"') {
609 while((n
= inp()) != 34) {
610 *(char *)glo
= getq(n
);
616 glo
= (glo
+ 4) & -4; /* align heap */
626 vt
= (vt
& VT_TYPEN
) | ft
;
631 } else if (t
== '*') {
636 if (!(vt
& VT_PTRMASK
))
637 error("pointer expected");
639 vt
= (vt
- VT_PTRINC
) | VT_LVAL
;
640 } else if (t
== '&') {
643 vt
= (vt
& VT_LVALN
) + VT_PTRINC
;
651 vset(VT_JMP
, gtst(1, 0));
655 if ((vt
& (VT_CONST
| VT_LVAL
)) == VT_CONST
)
666 if (t
== TOK_INC
| t
== TOK_DEC
) {
669 } else if (t
== '-') {
671 if ((vt
& (VT_CONST
| VT_LVAL
)) == VT_CONST
)
675 o(0xd8f7); /* neg %eax */
679 vset(vat
[t
], vac
[t
]);
680 /* forward reference or external reference ? */
682 n
= dlsym(0, idlast
);
684 vset(VT_CONST
| VT_FORWARD
| VT_LVAL
, vac
+ t
);
686 vset(VT_CONST
| VT_LVAL
, n
);
691 /* post operations */
692 if (tok
== TOK_INC
| tok
== TOK_DEC
) {
698 if (!(vt
& VT_PTRMASK
))
699 error("pointer expected");
702 /* dereference pointer */
703 vt
= (vt
- VT_PTRINC
) | VT_LVAL
;
708 /* lvalue is implied */
710 if ((vt
& VT_CONST
) == 0) {
711 /* evaluate function address */
713 o(0x50); /* push %eax */
724 o(0x50); /* push %eax */
729 /* horrible, but needed : convert to native ordering (could
730 parse parameters in reverse order, but would cost more
735 oad(0x24848b, p
); /* mov x(%esp,1), %eax */
736 oad(0x248487, n
); /* xchg x(%esp,1), %eax */
737 oad(0x248489, p
); /* mov %eax, x(%esp,1) */
742 /* forward reference */
744 *(int *)fc
= psym(0xe8, *(int *)fc
);
746 oad(0xe8, fc
- ind
- 5);
748 oad(0x2494ff, t
); /* call *xxx(%esp) */
753 /* return value is variable, int */
768 b
= (vt
& VT_TYPE
) == VT_BYTE
;
770 o(0x50); /* push %eax */
773 if ((vt
& VT_PTRMASK
) != (ft
& VT_PTRMASK
))
774 warning("incompatible type");
776 gv(); /* generate value */
779 o(0x59); /* pop %ecx */
780 o(0x0189 - b
); /* mov %eax/%al, (%ecx) */
781 } else if (ft
& VT_LOCAL
)
782 oad(0x8589 - b
, fc
); /* mov %eax/%al,xxx(%ebp) */
784 oad(0xa3 - b
, fc
); /* mov %eax/%al,xxx */
797 while ((l
== 0 & (tok
== '*' | tok
== '/' | tok
== '%')) |
798 (l
== 1 & (tok
== '+' | tok
== '-')) |
800 (l
== 2 & (tok
== TOK_SHL
| tok
== TOK_SHR
)) |
802 (l
== 3 & (tok
>= TOK_LT
& tok
<= TOK_GT
)) |
803 (l
== 4 & (tok
== TOK_EQ
| tok
== TOK_NE
)) |
804 (l
== 5 & tok
== '&') |
805 (l
== 6 & tok
== '^') |
806 (l
== 7 & tok
== '|')) {
825 if (tok
!= TOK_LAND
) {
845 if (tok
!= TOK_LOR
) {
904 d
= psym(0xe9, 0); /* jmp */
907 gsym(d
); /* patch else jmp */
910 } else if (tok
== TOK_WHILE
) {
916 *++lsym
= gtst(1, 0);
918 oad(0xe9, d
- ind
- 5); /* jmp */
920 } else if (tok
== '{') {
927 } else if (tok
== TOK_RETURN
) {
934 rsym
= psym(0xe9, rsym
); /* jmp */
935 } else if (tok
== TOK_BREAK
) {
937 *lsym
= psym(0xe9, *lsym
);
942 if (tok
== TOK_FOR
) {
962 oad(0xe9, d
- ind
- 5); /* jmp */
967 oad(0xe9, c
- ind
- 5); /* jmp */
978 /* 'l' is true if local declarations */
984 while (1) { /* iterate thru each declaration */
987 /* patch forward references (XXX: does not work for
988 function pointers) */
991 /* put function address */
992 vat
[vc
] = VT_CONST
| VT_LVAL
| vt
;
995 o(0xe58955); /* push %ebp, mov %esp, %ebp */
996 a
= oad(0xec81, 0); /* sub $xxx, %esp */
1000 o(0xc3c9); /* leave, ret */
1001 *a
= loc
; /* save local variables */
1005 vat
[vc
] = l
| VT_LVAL
| vt
;
1006 if (l
== VT_LOCAL
) {
1023 int main(int c
, char **v
)
1027 printf("usage: tc src\n");
1032 file
= fopen(filename
, "r");
1040 idtable
= malloc(SYM_TABLE_SIZE
);
1043 "int\0void\0char\0if\0else\0while\0break\0return\0define\0main", 53);
1044 idptr
= idtable
+ 53;
1047 "int\0void\0char\0if\0else\0while\0break\0return\0define\0main\0for", 57);
1048 idptr
= idtable
+ 57;
1050 glo
= malloc(DATA_SIZE
);
1051 prog
= malloc(TEXT_SIZE
);
1052 vac
= malloc(VAR_TABLE_SIZE
);
1053 vat
= malloc(VAR_TABLE_SIZE
);
1055 macro_stack
= malloc(256);
1056 macro_stack_ptr
= macro_stack
;
1064 f
= fopen(v
[1], "w");
1065 fwrite((void *)prog
, 1, ind
- prog
, f
);
1072 error("main() not defined");
1073 return (*t
)(c
- 1, v
);