1 /* architecture-dependent code generation for x86 */
5 /* x86-64 registers, without r8-r15 */
36 #define MIN(a, b) ((a) < (b) ? (a) : (b))
37 #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
39 int tmpregs
[] = {0, 1, 2, 6, 7, 3};
42 #define OP2(o2, o1) (0x010000 | ((o2) << 8) | (o1))
43 #define O2(op) (((op) >> 8) & 0xff)
44 #define O1(op) ((op) & 0xff)
45 #define MODRM(m, r1, r2) ((m) << 6 | (r1) << 3 | (r2))
47 static struct mem cs
; /* generated code */
49 /* code generation functions */
50 void os(void *s
, int n
)
55 static char *ointbuf(long n
, int l
)
59 for (i
= 0; i
< l
; i
++) {
66 void oi(long n
, int l
)
68 mem_put(&cs
, ointbuf(n
, l
), l
);
71 void oi_at(long pos
, long n
, int l
)
73 mem_cpy(&cs
, pos
, ointbuf(n
, l
), l
);
81 static void op_x(int op
, int r1
, int r2
, int bt
)
88 oi(sz
== 1 ? O1(op
) & ~0x1 : O1(op
), 1);
93 /* op_*(): r=reg, m=mem, i=imm, s=sym */
94 static void op_rm(int op
, int src
, int base
, int off
, int bt
)
96 int dis
= off
== (char) off
? 1 : 4;
97 int mod
= dis
== 4 ? 2 : 1;
98 if (!off
&& (base
& 7) != R_RBP
)
100 op_x(op
, src
, base
, bt
);
101 oi(MODRM(mod
, src
& 0x07, base
& 0x07), 1);
102 if ((base
& 7) == R_RSP
)
108 static void op_rr(int op
, int src
, int dst
, int bt
)
110 op_x(op
, src
, dst
, bt
);
111 oi(MODRM(3, src
& 0x07, dst
& 0x07), 1);
114 #define movrx_bt(bt) (LONGSZ)
116 static int movrx_op(int bt
, int mov
)
120 return OP2(0x0f, bt
& T_MSIGN
? 0xbf : 0xb7);
122 return OP2(0x0f, bt
& T_MSIGN
? 0xbe : 0xb6);
126 static void mov_r2r(int rd
, int r1
, unsigned bt
)
128 if (rd
!= r1
|| T_SZ(bt
) != LONGSZ
)
129 op_rr(movrx_op(bt
, I_MOVR
), rd
, r1
, movrx_bt(bt
));
132 static void i_push(int reg
)
134 op_x(I_PUSH
| (reg
& 0x7), 0, reg
, LONGSZ
);
137 static void i_pop(int reg
)
139 op_x(I_POP
| (reg
& 0x7), 0, reg
, LONGSZ
);
142 void i_mov(int rd
, int rn
)
144 op_rr(movrx_op(LONGSZ
, I_MOVR
), rd
, rn
, movrx_bt(LONGSZ
));
147 static void i_add(int op
, int rd
, int r1
, int r2
)
149 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
150 static int rx
[] = {0003, 0053, 0043, 0013, 0063};
151 op_rr(rx
[op
& 0x0f], rd
, r2
, LONGSZ
);
154 static void i_add_imm(int op
, int rd
, int rn
, long n
)
156 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
157 static int rx
[] = {0xc0, 0xe8, 0xe0, 0xc8, 0xf0};
158 unsigned char s
[4] = {0x83, rx
[op
& 0x0f] | rd
, n
& 0xff};
162 static void i_num(int rd
, long n
)
165 op_rr(I_XOR
, rd
, rd
, 4);
168 op_x(I_MOVIR
+ (rd
& 7), 0, rd
, LONGSZ
);
173 static void i_mul(int rd
, int r1
, int r2
)
177 op_rr(I_MUL
, 4, r2
, LONGSZ
);
180 static void i_div(int op
, int rd
, int r1
, int r2
)
185 op_x(I_CQO
, R_RAX
, R_RDX
, LONGSZ
);
189 op_rr(I_MUL
, bt
& T_MSIGN
? 7 : 6, r2
, LONGSZ
);
192 static void i_tst(int rn
, int rm
)
194 op_rr(I_TST
, rn
, rm
, LONGSZ
);
197 static void i_cmp(int rn
, int rm
)
199 op_rr(I_CMP
, rn
, rm
, LONGSZ
);
202 static void i_cmp_imm(int rn
, long n
)
204 unsigned char s
[4] = {0x83, 0xf8 | rn
, n
& 0xff};
208 static void i_shl(int op
, int rd
, int r1
, int rs
)
212 if ((op
& 0x0f) == 1)
213 sm
= bt
& T_MSIGN
? 7 : 5;
214 op_rr(I_SHX
, sm
, rd
, LONGSZ
);
217 static void i_shl_imm(int op
, int rd
, int rn
, long n
)
220 int sm
= (op
& 0x1) ? (bt
& T_MSIGN
? 0xf8 : 0xe8) : 0xe0;
221 char s
[4] = {0xc1, sm
| rn
, n
& 0xff};
225 static void i_neg(int rd
)
227 op_rr(I_NOT
, 3, rd
, LONGSZ
);
230 static void i_not(int rd
)
232 op_rr(I_NOT
, 2, rd
, LONGSZ
);
235 static int i_cond(long op
)
237 /* lt, ge, eq, ne, le, gt */
238 static int ucond
[] = {0x92, 0x93, 0x94, 0x95, 0x96, 0x97};
239 static int scond
[] = {0x9c, 0x9d, 0x94, 0x95, 0x9e, 0x9f};
241 return bt
& T_MSIGN
? scond
[op
& 0x0f] : ucond
[op
& 0x0f];
244 static void i_set(long op
, int rd
)
246 char set
[] = "\x0f\x00\xc0";
248 os(set
, 3); /* setl al */
249 os("\x0f\xb6\xc0", 3); /* movzx rax, al */
252 static void i_lnot(int rd
)
254 char cmp
[] = "\x83\xf8\x00";
256 os(cmp
, 3); /* cmp rax, 0 */
260 static void jx(int x
, int nbytes
)
264 oi(0x70 | (x
& 0x0f), 1); /* jx $addr */
267 os(op
, 2); /* jx $addr */
271 static long i_jmp(long op
, long rn
, long rm
, int nbytes
)
276 if (op
& (O_JZ
| O_JCC
)) {
279 jx(O_C(op
) == O_JZ
? 0x84 : 0x85, nbytes
);
285 jx(i_cond(op
) & ~0x10, nbytes
);
288 os(nbytes
== 1 ? "\xeb" : "\xe9", 1); /* jmp $addr */
295 void i_fill(long src
, long dst
, long nbytes
)
299 oi_at(src
, dst
- src
- nbytes
, nbytes
);
302 static void i_zx(int rd
, int r1
, int bits
)
305 i_shl_imm(O_SHL
, rd
, rd
, LONGSZ
* 8 - bits
);
306 i_shl_imm(O_SHR
, rd
, rd
, LONGSZ
* 8 - bits
);
308 mov_r2r(rd
, r1
, bits
>> 3);
312 static void i_sx(int rd
, int r1
, int bits
)
314 mov_r2r(rd
, r1
, T_MSIGN
| (bits
>> 3));
317 static void i_cast(int rd
, int rn
, int bt
)
324 i_sx(rd
, rn
, T_SZ(bt
) * 8);
326 i_zx(rd
, rn
, T_SZ(bt
) * 8);
330 static void i_add_anyimm(int rd
, int rn
, long n
)
332 op_rm(I_LEA
, rd
, rn
, n
, LONGSZ
);
335 static long *rel_sym
; /* relocation symbols */
336 static long *rel_flg
; /* relocation flags */
337 static long *rel_off
; /* relocation offsets */
338 static long rel_n
, rel_sz
; /* relocation count */
340 static long lab_sz
; /* label count */
341 static long *lab_loc
; /* label offsets in cs */
342 static long jmp_n
, jmp_sz
; /* jump count */
343 static long *jmp_off
; /* jump offsets */
344 static long *jmp_dst
; /* jump destinations */
345 static long jmp_ret
; /* the position of the last return jmp */
347 static void lab_add(long id
)
349 while (id
>= lab_sz
) {
351 lab_sz
= MAX(128, lab_sz
* 2);
352 lab_loc
= mextend(lab_loc
, lab_n
, lab_sz
, sizeof(*lab_loc
));
354 lab_loc
[id
] = opos();
357 static void jmp_add(long off
, long dst
)
359 if (jmp_n
== jmp_sz
) {
360 jmp_sz
= MAX(128, jmp_sz
* 2);
361 jmp_off
= mextend(jmp_off
, jmp_n
, jmp_sz
, sizeof(*jmp_off
));
362 jmp_dst
= mextend(jmp_dst
, jmp_n
, jmp_sz
, sizeof(*jmp_dst
));
364 jmp_off
[jmp_n
] = off
;
365 jmp_dst
[jmp_n
] = dst
;
369 void i_label(long id
)
374 static void i_rel(long sym
, long flg
, long off
)
376 if (rel_n
== rel_sz
) {
377 rel_sz
= MAX(128, rel_sz
* 2);
378 rel_sym
= mextend(rel_sym
, rel_n
, rel_sz
, sizeof(*rel_sym
));
379 rel_flg
= mextend(rel_flg
, rel_n
, rel_sz
, sizeof(*rel_flg
));
380 rel_off
= mextend(rel_off
, rel_n
, rel_sz
, sizeof(*rel_off
));
382 rel_sym
[rel_n
] = sym
;
383 rel_flg
[rel_n
] = flg
;
384 rel_off
[rel_n
] = off
;
388 static void i_sym(int rd
, int sym
, int off
)
390 op_x(I_MOVIR
+ (rd
& 7), 0, rd
, LONGSZ
);
391 i_rel(sym
, OUT_CS
, opos());
395 static void i_saveregs(long sregs
, long sregs_pos
, int st
)
399 for (i
= 0; i
< N_TMPS
; i
++)
400 if ((1 << tmpregs
[i
]) & sregs
)
401 op_rm(st
? I_MOV
: I_MOVR
, tmpregs
[i
], REG_FP
,
402 sregs_pos
+ nsregs
++ * ULNG
, ULNG
);
405 void i_wrap(int argc
, long sargs
, long spsub
, int initfp
, long sregs
, long sregs_pos
)
409 long diff
; /* prologue length */
411 /* removing the last jmp to the epilogue */
412 if (jmp_ret
+ 5 == opos()) {
413 mem_cut(&cs
, jmp_ret
);
416 lab_add(0); /* the return label */
417 body_n
= mem_len(&cs
);
419 /* generating function prologue */
421 os("\x55", 1); /* push rbp */
422 os("\x89\xe5", 2); /* mov rbp, rsp */
426 spsub
= ALIGN(spsub
, 8);
429 i_saveregs(sregs
, sregs_pos
, 1); /* saving registers */
431 mem_put(&cs
, body
, body_n
);
433 /* generating function epilogue */
434 i_saveregs(sregs
, sregs_pos
, 0); /* restoring saved registers */
436 os("\xc9", 1); /* leave */
437 os("\xc3", 1); /* ret */
438 /* adjusting code offsets */
439 for (i
= 0; i
< rel_n
; i
++)
441 for (i
= 0; i
< jmp_n
; i
++)
443 for (i
= 0; i
< lab_sz
; i
++)
447 void i_code(char **c
, long *c_len
, long **rsym
, long **rflg
, long **roff
, long *rcnt
)
450 for (i
= 0; i
< jmp_n
; i
++) /* filling jmp destinations */
451 oi_at(jmp_off
[i
], lab_loc
[jmp_dst
[i
]] - jmp_off
[i
] - 4, 4);
452 *c_len
= mem_len(&cs
);
473 long i_reg(long op
, long *rd
, long *r1
, long *r2
, long *tmp
)
483 if (oc
& (O_NUM
| O_SYM
))
484 *r1
= oc
& (O_NUM
| O_SYM
) ? LONGSZ
* 8 : R_TMPS
;
486 *r1
= T_SZ(bt
) == 1 ? R_BYTE
: R_TMPS
;
491 *r2
= oc
& O_NUM
? (oc
== O_ADD
? 32 : 8) : R_TMPS
;
507 *rd
= oc
== O_MOD
? (1 << R_RDX
) : (1 << R_RAX
);
509 *r2
= R_TMPS
& ~*rd
& ~*r1
;
511 *r2
&= ~(1 << R_RDX
);
512 *tmp
= (1 << R_RDX
) | (1 << R_RAX
);
518 *r2
= oc
& O_NUM
? 8 : R_TMPS
;
542 *rd
= (1 << REG_RET
);
546 *rd
= (1 << REG_RET
);
547 *r1
= oc
& O_SYM
? 0 : R_TMPS
;
550 if (oc
& (O_LD
| O_ST
)) {
553 *r2
= oc
& O_NUM
? 0 : R_TMPS
;
562 *r1
= oc
& O_NUM
? 8 : R_TMPS
;
570 int i_imm(long lim
, long n
)
572 long max
= (1 << (lim
- 1)) - 1;
573 return n
<= max
&& n
+ 1 >= -max
;
576 long i_ins(long op
, long r0
, long r1
, long r2
)
582 if (r0
== r1
&& r2
<= 127 && r2
>= -128)
583 i_add_imm(op
, r1
, r1
, r2
);
585 i_add_anyimm(r0
, r1
, r2
);
587 i_add(op
, r1
, r1
, r2
);
592 i_shl_imm(op
, r1
, r1
, r2
);
594 i_shl(op
, r1
, r1
, r2
);
598 i_mul(R_RAX
, r1
, r2
);
600 i_div(op
, R_RAX
, r1
, r2
);
602 i_div(op
, R_RDX
, r1
, r2
);
613 if (oc
& O_UOP
) { /* uop */
623 op_rr(I_CALL
, 2, r1
, LONGSZ
);
626 if (oc
== (O_CALL
| O_SYM
)) {
627 os("\xe8", 1); /* call $x */
628 i_rel(r1
, OUT_CS
| OUT_RLREL
, opos());
632 if (oc
== (O_MOV
| O_SYM
)) {
636 if (oc
== (O_MOV
| O_NUM
)) {
641 os("\xfc\xf3\xaa", 3); /* cld; rep stosb */
645 os("\xfc\xf3\xa4", 3); /* cld; rep movs */
650 jmp_add(i_jmp(O_JMP
, 0, 0, 4), 0);
653 if (oc
== (O_LD
| O_NUM
)) {
654 op_rm(movrx_op(bt
, I_MOVR
), r0
, r1
, r2
, movrx_bt(bt
));
657 if (oc
== (O_ST
| O_NUM
)) {
658 op_rm(I_MOV
, r0
, r1
, r2
, bt
);
666 jmp_add(i_jmp(op
, r0
, r1
, 4), r2
+ 1);