arm: fixed minor typo
[neatcc.git] / ncc.h
blobc2f763e91d22f3f8601bf296b097cca16de436f0
1 /*
2 * THE NEATCC C COMPILER
4 * This header file is organized as follows:
6 * 0. helper functions and data structures
7 * 1. ncc.c -> tok.c: the interface for reading tokens
8 * 2. ncc.c -> int.c: the interface for generating the intermediate code
9 * 3. int.c -> gen.c: the intermediate code
10 * 4. gen.c -> x64.c: the interface for generating the final code
11 * 5. gen.c -> out.c: the interface for generating object files
14 /* SECTION ZERO: Helper Functions */
15 /* predefined array limits; (p.f. means per function) */
16 #define NARGS 32 /* number of function/macro arguments */
17 #define NTMPS 64 /* number of expression temporaries */
18 #define NFIELDS 128 /* number of fields in structs */
19 #define NAMELEN 128 /* size of identifiers */
20 #define NDEFS 4096 /* number of macros */
21 #define MARGLEN 1024 /* size of macro arguments */
22 #define MDEFLEN 2048 /* size of macro definitions */
23 #define NBUFS 32 /* macro expansion stack depth */
24 #define NLOCS 1024 /* number of header search paths */
26 #define LEN(a) (sizeof(a) / sizeof((a)[0]))
27 #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
28 #define MIN(a, b) ((a) < (b) ? (a) : (b))
29 #define MAX(a, b) ((a) < (b) ? (b) : (a))
31 void *mextend(void *old, long oldsz, long newsz, long memsz);
32 void die(char *msg, ...);
33 void err(char *fmt, ...);
34 int opt(int level);
36 /* variable length buffer */
37 struct mem {
38 char *s; /* allocated buffer */
39 long sz; /* buffer size */
40 long n; /* length of data stored in s */
43 void mem_init(struct mem *mem);
44 void mem_done(struct mem *mem);
45 void mem_cut(struct mem *mem, long pos);
46 void *mem_buf(struct mem *mem);
47 void mem_put(struct mem *mem, void *buf, long len);
48 void mem_putc(struct mem *mem, int c);
49 void mem_putz(struct mem *mem, long sz);
50 void mem_cpy(struct mem *mem, long off, void *buf, long len);
51 long mem_len(struct mem *mem);
52 void *mem_get(struct mem *mem);
54 /* SECTION ONE: Tokenisation */
55 void tok_init(char *path);
56 void tok_done(void);
57 char *tok_see(void); /* return the current token; a static buffer */
58 char *tok_get(void); /* return and consume the current token */
59 long tok_len(void); /* the length of the last token */
60 long tok_num(char *tok, long *n);
61 long tok_addr(void);
62 void tok_jump(long addr);
64 int cpp_init(char *path);
65 void cpp_path(char *s);
66 void cpp_define(char *name, char *def);
67 char *cpp_loc(long addr);
68 int cpp_read(char **buf, long *len);
70 /* SECTION TWO: Intermediate Code Generation */
71 /* basic type meaning */
72 #define T_MSIZE 0x000f
73 #define T_MSIGN 0x0010
74 #define T_SZ(bt) ((bt) & T_MSIZE)
75 #define T_SG(bt) ((bt) & T_MSIGN)
76 #define T_MK(sign, size) (((sign) & T_MSIGN) | ((size) & T_MSIZE))
78 /* number of bytes in basic types */
79 #define ULNG (LONGSZ)
80 #define UINT (4)
81 #define USHT (2)
82 #define UCHR (1)
83 /* basic types */
84 #define SLNG (ULNG | T_MSIGN)
85 #define SINT (UINT | T_MSIGN)
86 #define SSHT (USHT | T_MSIGN)
87 #define SCHR (UCHR | T_MSIGN)
90 * Intermediate instruction operands
91 * R: register, N: immediate, S: symbol, L: local,
92 * D: displacement, G: label, C: arguments
94 /* Instruction rd r1 r2 r3 */
95 #define O_ADD 0x000010 /* R R RN - */
96 #define O_SHL 0x000020 /* R R RN - */
97 #define O_MUL 0x000040 /* R R RN - */
98 #define O_CMP 0x000080 /* R R RN - */
99 #define O_UOP 0x000100 /* R R - - */
100 #define O_CALL 0x000200 /* R RS D C */
101 #define O_MOV 0x000400 /* R RNSL D - */
102 #define O_MEM 0x000800 /* - R R R */
103 #define O_JMP 0x001000 /* - - - G */
104 #define O_JZ 0x002000 /* - R - G */
105 #define O_JCC 0x004000 /* - R RN G */
106 #define O_RET 0x008000 /* - R - - */
107 #define O_LD 0x010000 /* R RSL D - */
108 #define O_ST 0x020000 /* - R RSL D */
109 /* opcode flags: num, loc, sym */
110 #define O_NUM 0x100000 /* instruction immediate */
111 #define O_LOC 0x200000 /* local (frame pointer displacement) */
112 #define O_SYM 0x400000 /* symbols (relocations and offset) */
113 /* other members of instruction groups */
114 #define O_SUB (1 | O_ADD)
115 #define O_AND (2 | O_ADD)
116 #define O_OR (3 | O_ADD)
117 #define O_XOR (4 | O_ADD)
118 #define O_SHR (1 | O_SHL)
119 #define O_DIV (1 | O_MUL)
120 #define O_MOD (2 | O_MUL)
121 #define O_LT (0 | O_CMP)
122 #define O_GE (1 | O_CMP)
123 #define O_EQ (2 | O_CMP)
124 #define O_NE (3 | O_CMP)
125 #define O_LE (4 | O_CMP)
126 #define O_GT (5 | O_CMP)
127 #define O_NEG (0 | O_UOP)
128 #define O_NOT (1 | O_UOP)
129 #define O_LNOT (2 | O_UOP)
130 #define O_MSET (0 | O_MEM)
131 #define O_MCPY (1 | O_MEM)
132 #define O_JNZ (1 | O_JZ)
133 /* instruction masks */
134 #define O_BOP (O_ADD | O_MUL | O_CMP | O_SHL)
135 #define O_OUT (O_BOP | O_UOP | O_CALL | O_MOV | O_LD)
136 #define O_JXX (O_JMP | O_JZ | O_JCC)
137 /* instruction operand type */
138 #define O_C(op) ((op) & 0xffffff) /* operation code */
139 #define O_T(op) ((op) >> 24) /* instruction operand type */
140 #define O_MK(op, bt) ((op) | ((bt) << 24))
142 /* operations on the stack */
143 void o_bop(long op); /* binary operation */
144 void o_uop(long op); /* unary operation */
145 void o_cast(long bt);
146 void o_memcpy(void);
147 void o_memset(void);
148 void o_call(int argc, int ret);
149 void o_ret(int ret);
150 void o_assign(long bt);
151 void o_deref(long bt);
152 void o_load(void);
153 int o_popnum(long *num);
154 int o_popsym(long *sym, long *off);
155 /* pushing values to the stack */
156 void o_num(long n);
157 void o_local(long addr);
158 void o_sym(char *sym);
159 void o_tmpdrop(int n);
160 void o_tmpswap(void);
161 void o_tmpcopy(void);
162 /* handling locals */
163 long o_mklocal(long size);
164 void o_rmlocal(long addr, long sz);
165 long o_arg2loc(int i);
166 /* branches */
167 void o_label(long id);
168 void o_jmp(long id);
169 void o_jz(long id);
170 long o_mark(void);
171 void o_back(long mark);
172 /* data/bss sections */
173 long o_dsnew(char *name, long size, int global);
174 void o_dscpy(long addr, void *buf, long len);
175 void o_dsset(char *name, long off, long bt);
176 void o_bsnew(char *name, long size, int global);
177 /* functions */
178 void o_func_beg(char *name, int argc, int global, int vararg);
179 void o_func_end(void);
180 void o_code(char *name, char *c, long c_len);
181 /* output */
182 void o_write(int fd);
184 /* SECTION THREE: The Intermediate Code */
185 /* intermediate code instructions */
186 struct ic {
187 long op; /* instruction opcode */
188 long a1; /* first argument */
189 long a2; /* second argument */
190 long a3; /* more information, like jump target */
191 long *args; /* call arguments */
194 /* get the generated intermediate code */
195 void ic_get(struct ic **c, long *n);
196 int ic_num(struct ic *ic, long iv, long *num);
197 int ic_sym(struct ic *ic, long iv, long *sym, long *off);
198 long *ic_lastuse(struct ic *ic, long ic_n);
199 void ic_free(struct ic *ic);
200 int ic_regcnt(struct ic *ic);
202 /* global register allocation */
203 void reg_init(struct ic *ic, long ic_n);
204 long reg_mask(void);
205 int reg_lmap(long ic, long loc);
206 int reg_rmap(long ic, long reg);
207 int reg_safe(long loc);
208 void reg_done(void);
210 /* SECTION FOUR: Final Code Generation */
212 * To make maintaining different architectures easier and to unify the
213 * optimizations, I have merged the code generation for different
214 * architectures. The i_*() functions are now the low level
215 * architecture-specific code generation entry points. The
216 * differences between RISC and CISC architectures, actually the
217 * annoying asymmetry in CISC architecture, has made this interface
218 * more complex than it could have ideally been. Nevertheless,
219 * the benefits of extracting gen.c and the cleaner design,
220 * especially with the presence of the optimizations, outweighs the
221 * added complexity. Overall, there were many challenges for
222 * extracting gen.c including:
223 * + Different register sets; caller/callee saved and argument registers
224 * + CISC-style instructions that work on limited registers and parameters
225 * + Different instruction formats and immediate value limitations
226 * + Generating epilog, prolog, and local variable addresses when optimizing
228 * I tried to make this interface as small as possible. The key
229 * functions and macros described next.
231 * i_reg() returns the mask of allowed registers for each
232 * operand of an instruction. The first argument op, specifies
233 * the instruction (O_* macros); i_reg() sets the value r0, r1,
234 * and r2 to indicate the mask of acceptable registers for the
235 * first, second, and third operands of the instruction.
236 * The value of these masks may be changed to zero to indicate
237 * fewer than three operands. If md is zero while m1 is not,
238 * the destination register should be equal to the first register,
239 * as in CISC architectures. mt denotes the mask of registers
240 * that may lose their contents after the instruction.
242 * i_ins() generates code for the given instruction. The arguments
243 * indicate the instruction and its operands. The code is generated
244 * by calling os() and oi() functions and the current position in
245 * the code segment is obtained by calling opos(). For branch
246 * instructions, i_ins() returns the position of branch offset in
247 * code segment, to be filled later with i_fill().
249 * Some macros should be defined in architecture-dependent headers
250 * and a few variables should be defined for each architecture,
251 * such as tmpregs, which is an array of register numbers that
252 * can be used for holding temporaries and argregs, which is an
253 * array of register numbers for holding the first N_ARGS arguments.
254 * Consult x64.h as an example, for the macros defined for each
255 * architecture.
258 #ifdef NEATCC_ARM
259 #include "arm.h"
260 #endif
261 #ifdef NEATCC_X64
262 #include "x64.h"
263 #endif
264 #ifdef NEATCC_X86
265 #include "x86.h"
266 #endif
268 /* architecture-specific operations */
269 long i_reg(long op, long *rd, long *r1, long *r2, long *r3, long *mt);
270 long i_ins(long op, long rd, long r1, long r2, long r3);
271 int i_imm(long lim, long n);
272 void i_label(long id);
273 void i_wrap(int argc, long sargs, long spsub, int initfp, long sregs, long sregs_pos);
274 void i_code(char **c, long *c_len, long **rsym, long **rflg, long **roff, long *rcnt);
275 void i_done(void);
277 extern int tmpregs[];
278 extern int argregs[];
280 /* SECTION FIVE: Object File Generation */
281 #define OUT_CS 0x0001 /* code segment symbol */
282 #define OUT_DS 0x0002 /* data segment symbol */
283 #define OUT_BSS 0x0004 /* bss segment symbol */
285 #define OUT_GLOB 0x0010 /* global symbol */
287 #define OUT_RLREL 0x0020 /* relative relocation */
288 #define OUT_RLSX 0x0040 /* sign extend relocation */
289 #define OUT_RL24 0x0400 /* 3-byte relocation */
290 #define OUT_RL32 0x0800 /* 4-byte relocation */
292 #define OUT_ALIGNMENT 16 /* section alignment */
294 void out_init(long flags);
296 long out_sym(char *name);
297 void out_def(char *name, long flags, long off, long len);
298 void out_rel(long id, long flags, long off);
300 void out_write(int fd, char *cs, long cslen, char *ds, long dslen);