3 #define TEXT_SIZE 20000
5 #define SYM_TABLE_SIZE 10000
6 #define VAR_TABLE_SIZE 4096
8 /* vac: offset of variables
10 loc : local variable index
11 glo : global variable index
12 parm : parameter variable index
16 astk: arg position stack
18 int tok
, *vac
, *vat
, rsym
,
19 prog
, ind
, loc
, glo
, file
, vt
,
20 vc
, *macro_stack
, *macro_stack_ptr
, line_num
;
21 char *idtable
, *idptr
, *filename
;
23 /* The current value can be: */
24 #define VT_CONST 0x0002 /* constant in vc */
25 #define VT_VAR 0x0004 /* value is in eax */
26 #define VT_LOCAL 0x0008 /* offset on stack */
28 #define VT_LVAL 0x0010 /* const or var is an lvalue */
29 #define VT_CMP 0x0020 /* the value is stored in processor flags (in vc) */
30 #define VT_FORWARD 0x0040 /* value is forward reference (only used for functions) */
31 #define VT_JMP 0x0080 /* value is the consequence of jmp. bit 0 is set if inv */
33 #define VT_LVALN -17 /* ~VT_LVAL */
37 * VT_FUNC indicates a function. The return type is the stored type. A
38 * function pointer is stored as a 'char' pointer.
40 * If VT_PTRMASK is non nul, then it indicates the number of pointer
41 * iterations to reach the basic type.
45 * VT_BYTE indicate a char
48 * otherwise integer type is assumed.
51 #define VT_BYTE 0x00001 /* byte pointer. HARDCODED VALUE */
52 #define VT_PTRMASK 0x00f00 /* pointer mask */
53 #define VT_PTRINC 0x00100 /* pointer increment */
54 #define VT_FUNC 0x01000 /* function type */
55 #define VT_UNSIGNED 0x02000 /* unsigned type */
56 #define VT_ARRAY 0x04000 /* array type (only used in parsing) */
57 #define VT_TYPE 0x07f01 /* type mask */
58 #define VT_TYPEN 0xffff80fe /* ~VT_TYPE */
59 #define VT_FUNCN -4097
61 #define VT_EXTERN 0x08000 /* extern definition */
62 #define VT_STATIC 0x10000 /* static variable */
65 #define VT_DEFINE 0x80000 /* special value for #defined symbols */
75 #define TOK_RETURN 263
76 #define TOK_DEFINE 264
79 #define TOK_EXTERN 267
80 #define TOK_STATIC 268
81 #define TOK_UNSIGNED 269
84 #define TOK_CONTINUE 272
85 #define TOK_SWITCH 273
88 /* ignored types Must have contiguous values */
90 #define TOK_VOLATILE 276
92 #define TOK_REGISTER 278
93 #define TOK_SIGNED 279
95 /* unsupported types. Must have contiguous values */
97 #define TOK_DOUBLE 281
98 #define TOK_STRUCT 282
100 #define TOK_TYPEDEF 284
102 #define TOK_DEFAULT 285
105 #define TOK_EQ 0x94 /* warning: depend on asm code */
106 #define TOK_NE 0x95 /* warning: depend on asm code */
107 #define TOK_LT 0x9c /* warning: depend on asm code */
108 #define TOK_GE 0x9d /* warning: depend on asm code */
109 #define TOK_LE 0x9e /* warning: depend on asm code */
110 #define TOK_GT 0x9f /* warning: depend on asm code */
112 #define TOK_LAND 0xa0
116 #define TOK_MID 0xa3 /* inc/dec, to void constant */
122 /* assignement operators : normal operator or 0x80 */
123 #define TOK_A_MOD 0xa5
124 #define TOK_A_AND 0xa6
125 #define TOK_A_MUL 0xaa
126 #define TOK_A_ADD 0xab
127 #define TOK_A_SUB 0xad
128 #define TOK_A_DIV 0xaf
129 #define TOK_A_XOR 0xde
130 #define TOK_A_OR 0xfc
131 #define TOK_A_SHL 0x81
132 #define TOK_A_SHR 0x82
135 #define expr_eq() expr()
152 return (c
>= 'a' & c
<= 'z') |
153 (c
>= 'A' & c
<= 'Z') |
159 return c
>= '0' & c
<= '9';
163 /* XXX: use stderr ? */
164 void error(char *msg
)
166 printf("%s:%d: %s\n", filename
, line_num
, msg
);
170 void warning(char *msg
)
172 printf("%s:%d: warning: %s\n", filename
, line_num
, msg
);
178 printf("%s:%d: '%c' expected\n", filename
, line_num
, c
);
187 error("lvalue expected\n");
192 #define skip(c) next()
193 #define test_lvalue()
197 char *get_tok_str(int v
)
224 /* single line comments */
227 } else if (c
== '*') {
229 while ((c
= inp()) >= 0) {
237 } else if (c
== '\n')
248 /* preprocessor: we handle only define */
250 if (tok
== TOK_DEFINE
) {
252 /* now tok is the macro symbol */
253 vat
[tok
] = VT_DEFINE
;
254 vac
[tok
] = ftell(file
);
256 /* ignore preprocessor or shell */
261 /* end of line : check if we are in macro state. if so,
262 pop new file position */
263 if (macro_stack_ptr
> macro_stack
)
264 fseek(file
, *--macro_stack_ptr
, 0);
267 } else if (c
!= ' ' & c
!= 9)
272 while(isid(c
) | isnum(c
)) {
281 if (strcmp(p
, idptr
) == 0)
286 /* if not found, add symbol */
290 if (vat
[tok
] & VT_DEFINE
) {
291 *macro_stack_ptr
++ = ftell(file
);
292 fseek(file
, vac
[tok
], 0);
297 q
= "<=\236>=\235!=\225++\244--\242==\224";
299 q
= "<=\236>=\235!=\225&&\240||\241++\244--\242==\224<<\1>>\2+=\253-=\255*=\252/=\257%=\245&=\246^=\336|=\374";
304 if (*q
== c
& q
[1] == v
) {
306 if (tok
== TOK_SHL
| tok
== TOK_SHR
) {
318 /* single char substitutions */
341 /* output a symbol and patch all calls to it */
346 n
= *(int *)t
; /* next value */
347 *(int *)t
= a
- t
- 4;
357 /* psym is used to put an instruction with a data field which is a
358 reference to a symbol. It is in fact the same as oad ! */
361 /* instruction + 4 bytes data. Return the address of the data */
377 /* generate a value in eax from vt and vc */
378 /* XXX: generate correct pointer for forward references to functions */
385 if ((vt
& VT_TYPE
) == VT_BYTE
)
386 o(0xbe0f); /* movsbl x, %eax */
388 o(0x8b); /* movl x,%eax */
391 else if (vt
& VT_LOCAL
)
397 oad(0xb8, vc
); /* mov $xx, %eax */
398 } else if (vt
& VT_LOCAL
) {
399 oad(0x858d, vc
); /* lea xxx(%ebp), %eax */
400 } else if (vt
& VT_CMP
) {
401 oad(0xb8, 0); /* mov $0, %eax */
402 o(0x0f); /* setxx %al */
407 else if (vt
& VT_JMP
) {
409 oad(0xb8, t
); /* mov $1, %eax */
410 oad(0xe9, 5); /* jmp after */
412 oad(0xb8, t
^ 1); /* mov $0, %eax */
416 vt
= (vt
& VT_TYPE
) | VT_VAR
;
419 /* generate a test. set 'inv' to invert test */
420 /* XXX: handle constant */
424 /* fast case : can jump directly since flags are set */
426 t
= psym((vc
- 16) ^ inv
, t
);
430 /* && or || optimization */
438 if ((vt
& (VT_CONST
| VT_LVAL
)) == VT_CONST
) {
439 /* constant jmp optimization */
440 if ((vc
!= 0) != inv
)
446 o(0xc085); /* test %eax, %eax */
448 t
= psym(0x85 ^ inv
, t
);
455 if ((t
& VT_PTRMASK
) >= VT_PTRINC
| (t
& VT_TYPE
) == 0)
461 /* return the number size in bytes of a given type */
464 if ((t
& VT_PTRMASK
) >= VT_PTRINC
)
465 return type_size(t
- VT_PTRINC
);
470 #define POST_ADD 0x1000
473 /* a defines POST/PRE add. c is the token ++ or -- */
479 o(0x018bc189); /* movl %eax, %ecx ; mov (%ecx), %eax */
480 o(0x408d | a
); /* leal x(%eax), %eax/%edx */
481 g((c
- TOK_MID
) * incr_value(vt
));
482 o(0x0189 | a
); /* mov %eax/%edx, (%ecx) */
485 /* XXX: handle ptr sub and 'int + ptr' case (only 'ptr + int' handled) */
486 /* XXX: handle constant propagation (need to track live eax) */
487 /* XXX: handle unsigned propagation */
493 o(0x50); /* push %eax */
502 o(0x59); /* pop %ecx */
503 if (op
== '+' | op
== '-') {
504 /* XXX: incorrect for short (futur!) */
505 if (incr_value(t
) == 4)
506 o(0x02e0c1); /* shl $2, %eax */
508 o(0xd8f7); /* neg %eax */
509 o(0xc801); /* add %ecx, %eax */
511 } else if (op
== '&')
518 o(0xc1af0f); /* imul %ecx, %eax */
520 else if (op
== TOK_SHL
| op
== TOK_SHR
) {
521 o(0xd391); /* xchg %ecx, %eax, shl/shr/sar %cl, %eax */
524 else if (t
& VT_UNSIGNED
)
530 else if (op
== '/' | op
== '%') {
531 o(0x91); /* xchg %ecx, %eax */
532 if (t
& VT_UNSIGNED
) {
533 o(0xd231); /* xor %edx, %edx */
534 o(0xf1f7); /* div %ecx, %eax */
536 o(0xf9f799); /* cltd, idiv %ecx, %eax */
539 o(0x92); /* xchg %edx, %eax */
541 o(0xc139); /* cmp %eax,%ecx */
546 /* return 0 if no type declaration. otherwise, return the basic type
548 XXX: A '2' is ored to ensure non zero return if int type.
555 if (tok
== TOK_CHAR
| tok
== TOK_VOID
) {
557 } else if (tok
== TOK_INT
|
558 (tok
>= TOK_CONST
& tok
<= TOK_SIGNED
)) {
560 } else if (tok
>= TOK_FLOAT
& tok
<= TOK_TYPEDEF
) {
561 error("unsupported type");
562 } else if (tok
== TOK_EXTERN
) {
564 } else if (tok
== TOK_STATIC
) {
566 } else if (tok
== TOK_UNSIGNED
) {
577 /* Read a type declaration (except basic type), and return the
578 type. If v is true, then also put variable name in 'vc' */
579 int typ(int *v
, int t
, int *array_size_ptr
)
583 t
= t
& -3; /* suppress the ored '2' */
590 /* XXX: incorrect if abstract type for functions (e.g. 'int ()') */
597 /* type identifier */
605 /* function declaration */
609 /* read param name and compute offset */
611 t
= typ(&n
, t
, 0); /* XXX: should accept both arg/non arg if v == 0 */
618 vat
[n
] = VT_LOCAL
| VT_LVAL
| t
;
623 next(); /* skip ')' */
628 } else if (tok
== '[') {
629 /* array definition */
631 error("multi dimension arrays not supported");
637 *array_size_ptr
= vc
;
639 if ((vt
& (VT_CONST
| VT_LVAL
)) != VT_CONST
|
640 (vc
<= 0 & array_size_ptr
!= 0))
641 error("invalid array size");
643 t
= (t
+ VT_PTRINC
) | VT_ARRAY
;
650 /* define a new external reference to a function 'v' of type 'u' */
651 void external_func(v
, u
)
656 n
= dlsym(0, get_tok_str(v
));
658 vat
[v
] = u
| VT_CONST
| VT_LVAL
| VT_FORWARD
;
659 vac
[v
] = 0; /* used to generate symbol list */
661 vat
[v
] = u
| VT_CONST
| VT_LVAL
; /* int f() */
667 /* read a number in base b */
729 vset(VT_CONST
, getn(tok
, t
));
731 vset(VT_CONST
, getn(tok
, 10));
737 vset(VT_CONST
, getq(inp()));
738 next(); /* skip char */
743 vset(VT_CONST
| VT_PTRINC
| VT_BYTE
, glo
);
744 while (tok
== '\"') {
745 while((n
= inp()) != 34) {
746 *(char *)glo
++ = getq(n
);
760 vt
= (vt
& VT_TYPEN
) | ft
;
765 } else if (t
== '*') {
770 if (!(vt
& VT_PTRMASK
))
771 error("pointer expected");
773 vt
= (vt
- VT_PTRINC
) | VT_LVAL
;
774 } else if (t
== '&') {
777 vt
= (vt
& VT_LVALN
) + VT_PTRINC
;
785 vset(VT_JMP
, gtst(1, 0));
789 if ((vt
& (VT_CONST
| VT_LVAL
)) == VT_CONST
)
800 if (t
== TOK_INC
| t
== TOK_DEC
) {
803 } else if (t
== '-') {
805 if ((vt
& (VT_CONST
| VT_LVAL
)) == VT_CONST
)
809 o(0xd8f7); /* neg %eax */
815 error("undefined symbol");
816 /* for simple function calls, we tolerate undeclared
817 external reference */
818 external_func(t
, VT_FUNC
); /* int() function */
820 vset(vat
[t
], vac
[t
]);
821 /* if forward reference, we must point to vac[t] */
827 /* post operations */
828 if (tok
== TOK_INC
| tok
== TOK_DEC
) {
834 if (!(vt
& VT_PTRMASK
))
835 error("pointer expected");
838 /* dereference pointer */
839 vt
= (vt
- VT_PTRINC
) | VT_LVAL
;
844 /* lvalue is implied */
846 if ((vt
& VT_CONST
) == 0) {
847 /* evaluate function address */
849 o(0x50); /* push %eax */
860 o(0x50); /* push %eax */
865 /* horrible, but needed : convert to native ordering (could
866 parse parameters in reverse order, but would cost more
871 oad(0x24848b, p
); /* mov x(%esp,1), %eax */
872 oad(0x248487, n
); /* xchg x(%esp,1), %eax */
873 oad(0x248489, p
); /* mov %eax, x(%esp,1) */
878 /* forward reference */
879 if (ft
& VT_FORWARD
) {
880 vac
[fc
] = psym(0xe8, vac
[fc
]);
882 oad(0xe8, fc
- ind
- 5);
883 /* return value is variable, and take type from function proto */
884 vt
= VT_VAR
| (ft
& VT_TYPE
& VT_FUNCN
);
886 oad(0x2494ff, t
); /* call *xxx(%esp) */
888 /* return value is variable, int */
902 (tok
>= TOK_A_MOD
& TOK_A_DIV
) |
903 tok
== TOK_A_XOR
| tok
== TOK_A_OR
|
904 tok
== TOK_A_SHL
| tok
== TOK_A_SHR
) {
908 b
= (vt
& VT_TYPE
) == VT_BYTE
;
910 o(0x50); /* push %eax */
915 if ((vt
& VT_PTRMASK
) != (ft
& VT_PTRMASK
))
916 warning("incompatible type");
918 gv(); /* generate value */
920 gen_op(tok
& 0x7f, -2); /* XXX: incorrect, must call expr_eq */
923 o(0x59); /* pop %ecx */
924 o(0x0189 - b
); /* mov %eax/%al, (%ecx) */
925 } else if (ft
& VT_LOCAL
)
926 oad(0x8589 - b
, fc
); /* mov %eax/%al,xxx(%ebp) */
928 oad(0xa3 - b
, fc
); /* mov %eax/%al,xxx */
941 while ((l
== 0 & (tok
== '*' | tok
== '/' | tok
== '%')) |
942 (l
== 1 & (tok
== '+' | tok
== '-')) |
944 (l
== 2 & (tok
== TOK_SHL
| tok
== TOK_SHR
)) |
946 (l
== 3 & (tok
>= TOK_LT
& tok
<= TOK_GT
)) |
947 (l
== 4 & (tok
== TOK_EQ
| tok
== TOK_NE
)) |
948 (l
== 5 & tok
== '&') |
949 (l
== 6 & tok
== '^') |
950 (l
== 7 & tok
== '|')) {
969 if (tok
!= TOK_LAND
) {
989 if (tok
!= TOK_LOR
) {
1033 void block(int *bsym
, int *csym
)
1037 if (tok
== TOK_IF
) {
1046 if (c
== TOK_ELSE
) {
1048 d
= psym(0xe9, 0); /* jmp */
1051 gsym(d
); /* patch else jmp */
1054 } else if (tok
== TOK_WHILE
) {
1063 oad(0xe9, d
- ind
- 5); /* jmp */
1066 } else if (tok
== '{') {
1073 } else if (tok
== TOK_RETURN
) {
1080 rsym
= psym(0xe9, rsym
); /* jmp */
1081 } else if (tok
== TOK_BREAK
) {
1084 error("cannot break");
1085 *bsym
= psym(0xe9, *bsym
);
1088 } else if (tok
== TOK_CONTINUE
) {
1091 error("cannot continue");
1092 *csym
= psym(0xe9, *csym
);
1097 if (tok
== TOK_FOR
) {
1117 oad(0xe9, d
- ind
- 5); /* jmp */
1122 oad(0xe9, c
- ind
- 5); /* jmp */
1126 if (tok
== TOK_DO
) {
1149 /* 'l' is VT_LOCAL or VT_CONST to define default storage type */
1152 int *a
, t
, b
, s
, align
, v
, u
, n
;
1155 while (1) { /* iterate thru each declaration */
1159 /* patch forward references */
1160 if (vat
[v
] & VT_FORWARD
)
1162 /* put function address */
1163 vat
[v
] = VT_CONST
| VT_LVAL
| t
;
1166 o(0xe58955); /* push %ebp, mov %esp, %ebp */
1167 a
= oad(0xec81, 0); /* sub $xxx, %esp */
1171 o(0xc3c9); /* leave, ret */
1172 *a
= (-loc
+ 3) & -4; /* align local size to word &
1173 save local variables */
1177 /* external function definition */
1178 external_func(v
, t
);
1180 /* not lvalue if array */
1181 if (!(t
& VT_ARRAY
))
1183 if (t
& VT_EXTERN
) {
1184 /* external variable */
1185 n
= dlsym(NULL
, get_tok_str(v
));
1187 error("unknown external variable");
1188 vat
[v
] = VT_CONST
| t
;
1197 align
= type_size(t
);
1199 if (u
== VT_LOCAL
) {
1200 /* allocate space down on the stack */
1201 loc
= (loc
- s
) & -align
;
1204 /* allocate space up in the data space */
1205 glo
= (glo
+ align
- 1) & -align
;
1221 int main(int c
, char **v
)
1225 printf("usage: tc src\n");
1230 file
= fopen(filename
, "r");
1238 idtable
= malloc(SYM_TABLE_SIZE
);
1241 "int\0void\0char\0if\0else\0while\0break\0return\0define\0main", 53);
1242 idptr
= idtable
+ 53;
1245 "int\0void\0char\0if\0else\0while\0break\0return\0define\0main\0for\0extern\0static\0unsigned\0goto\0do\0continue\0switch\0case\0const\0volatile\0long\0register\0signed\0float\0double\0struct\0union\0typedef\0default\0enum", 192);
1246 idptr
= idtable
+ 192;
1248 glo
= malloc(DATA_SIZE
);
1249 prog
= malloc(TEXT_SIZE
);
1250 vac
= malloc(VAR_TABLE_SIZE
);
1251 vat
= malloc(VAR_TABLE_SIZE
);
1252 macro_stack
= malloc(256);
1253 macro_stack_ptr
= macro_stack
;
1261 f
= fopen(v
[1], "w");
1262 fwrite((void *)prog
, 1, ind
- prog
, f
);
1270 error("main() not defined");
1272 return (*t
)(c
- 1, v
);