1 /* architecture-dependent code generation for x86_64 */
15 /* x86_64 registers */
44 #define MIN(a, b) ((a) < (b) ? (a) : (b))
45 #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
47 int tmpregs
[] = {0, 7, 6, 2, 1, 8, 9, 10, 11, 3, 12, 13, 14, 15};
48 int argregs
[] = {7, 6, 2, 1, 8, 9};
50 #define OP2(o2, o1) (0x010000 | ((o2) << 8) | (o1))
51 #define O2(op) (((op) >> 8) & 0xff)
52 #define O1(op) ((op) & 0xff)
53 #define MODRM(m, r1, r2) ((m) << 6 | (r1) << 3 | (r2))
54 #define REX(r1, r2) (0x48 | (((r1) & 8) >> 1) | (((r2) & 8) >> 3))
56 static void putint(char *s
, long n
, int l
)
64 static void op_x(int op
, int r1
, int r2
, int bt
)
82 oi(sz
== 1 ? O1(op
) & ~0x1 : O1(op
), 1);
87 /* op_*(): r=reg, m=mem, i=imm, s=sym */
88 static void op_rm(int op
, int src
, int base
, int off
, int bt
)
90 int dis
= off
== (char) off
? 1 : 4;
91 int mod
= dis
== 4 ? 2 : 1;
92 if (!off
&& (base
& 7) != R_RBP
)
94 op_x(op
, src
, base
, bt
);
95 oi(MODRM(mod
, src
& 0x07, base
& 0x07), 1);
96 if ((base
& 7) == R_RSP
)
102 static void op_rr(int op
, int src
, int dst
, int bt
)
104 op_x(op
, src
, dst
, bt
);
105 oi(MODRM(3, src
& 0x07, dst
& 0x07), 1);
108 #define movrx_bt(bt) (((bt) == 4) ? 4 : LONGSZ)
110 static int movrx_op(int bt
, int mov
)
114 return bt
& BT_SIGNED
? I_MOVSXD
: mov
;
116 return OP2(0x0f, bt
& BT_SIGNED
? 0xbf : 0xb7);
118 return OP2(0x0f, bt
& BT_SIGNED
? 0xbe : 0xb6);
122 static void mov_r2r(int rd
, int r1
, unsigned bt
)
124 if (rd
!= r1
|| BT_SZ(bt
) != LONGSZ
)
125 op_rr(movrx_op(bt
, I_MOVR
), rd
, r1
, movrx_bt(bt
));
128 static void mov_m2r(int dst
, int base
, int off
, int bt
)
130 op_rm(movrx_op(bt
, I_MOVR
), dst
, base
, off
, movrx_bt(bt
));
133 int i_imm(int op
, long imm
)
135 if ((op
& 0xf0) == 0x20)
137 return imm
<= 127 && imm
>= -128;
140 static void i_push(int reg
)
142 op_x(I_PUSH
| (reg
& 0x7), 0, reg
, 4);
145 static void i_pop(int reg
)
147 op_x(I_POP
| (reg
& 0x7), 0, reg
, 4);
150 void i_mov(int rd
, int rn
)
152 op_rr(movrx_op(LONGSZ
, I_MOVR
), rd
, rn
, movrx_bt(LONGSZ
));
155 void i_load(int rd
, int rn
, int off
, int bt
)
157 mov_m2r(rd
, rn
, off
, bt
);
160 void i_save(int rd
, int rn
, int off
, int bt
)
162 op_rm(I_MOV
, rd
, rn
, off
, bt
);
165 void i_reg(int op
, int *rd
, int *r1
, int *r2
, int *tmp
)
169 *r2
= op
& O_IMM
? 0 : R_TMPS
;
171 if ((op
& 0xf0) == 0x00) /* add */
173 if ((op
& 0xf0) == 0x10) { /* shl */
180 if ((op
& 0xf0) == 0x20) { /* mul */
181 *rd
= (op
& 0xff) == O_MOD
? (1 << R_RDX
) : (1 << R_RAX
);
183 *r2
= R_TMPS
& ~*rd
& ~*r1
;
184 if ((op
& 0xff) == O_DIV
)
185 *r2
&= ~(1 << R_RDX
);
186 *tmp
= (1 << R_RDX
) | (1 << R_RAX
);
189 if ((op
& 0xf0) == 0x30) { /* cmp */
193 if ((op
& 0xf0) == 0x40) { /* uop */
195 if ((op
& 0xff) == O_LNOT
)
199 if ((op
& 0xf0) == 0x50) { /* etc */
210 if (op
== O_SX
|| op
== O_ZX
|| op
== O_MOV
) {
218 static void i_add(int op
, int rd
, int r1
, int r2
)
220 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
221 static int rx
[] = {0003, 0053, 0043, 0013, 0063};
222 op_rr(rx
[op
& 0x0f], rd
, r2
, LONGSZ
);
225 static void i_add_imm(int op
, int rd
, int rn
, long n
)
227 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
228 static int rx
[] = {0xc0, 0xe8, 0xe0, 0xc8, 0xf0};
229 unsigned char s
[4] = {REX(0, rd
), 0x83, rx
[op
& 0x0f] | (rd
& 7), n
& 0xff};
233 void i_num(int rd
, long n
)
236 op_rr(I_XOR
, rd
, rd
, 4);
239 if (n
< 0 && -n
<= 0xffffffff) {
240 op_rr(I_MOVI
, 0, rd
, LONGSZ
);
244 if (n
> 0 && n
<= 0xffffffff)
246 op_x(I_MOVIR
+ (rd
& 7), 0, rd
, len
);
251 static void i_mul(int rd
, int r1
, int r2
)
255 op_rr(I_MUL
, 4, r2
, LONGSZ
);
258 static void i_div(int op
, int rd
, int r1
, int r2
)
262 op_x(I_CQO
, R_RAX
, R_RDX
, LONGSZ
);
266 op_rr(I_MUL
, op
& O_SIGNED
? 7 : 6, r2
, LONGSZ
);
269 static void i_tst(int rn
, int rm
)
271 op_rr(I_TST
, rn
, rm
, LONGSZ
);
274 static void i_cmp(int rn
, int rm
)
276 op_rr(I_CMP
, rn
, rm
, LONGSZ
);
279 static void i_cmp_imm(int rn
, long n
)
281 unsigned char s
[4] = {REX(0, rn
), 0x83, 0xf8 | rn
, n
& 0xff};
285 static void i_shl(int op
, int rd
, int r1
, int rs
)
288 if ((op
& 0x0f) == 1)
289 sm
= op
& O_SIGNED
? 7 : 5;
290 op_rr(I_SHX
, sm
, rd
, LONGSZ
);
293 static void i_shl_imm(int op
, int rd
, int rn
, long n
)
295 int sm
= (op
& 0x1) ? (op
& O_SIGNED
? 0xf8 : 0xe8) : 0xe0 ;
296 char s
[4] = {REX(0, rn
), 0xc1, sm
| (rn
& 7), n
& 0xff};
300 void i_sym(int rd
, char *sym
, int off
)
302 int sz
= X64_ABS_RL
& OUT_RL32
? 4 : LONGSZ
;
303 if (X64_ABS_RL
& OUT_RLSX
)
304 op_rr(I_MOVI
, 0, rd
, sz
);
306 op_x(I_MOVIR
+ (rd
& 7), 0, rd
, sz
);
308 out_rel(sym
, OUT_CS
| X64_ABS_RL
, cslen
);
312 static void i_neg(int rd
)
314 op_rr(I_NOT
, 3, rd
, LONGSZ
);
317 static void i_not(int rd
)
319 op_rr(I_NOT
, 2, rd
, LONGSZ
);
322 /* for optimizing cmp + tst + jmp to cmp + jmp */
323 #define OPT_ISCMP() (last_set >= 0 && last_set + 7 == cslen)
324 #define OPT_CCOND() (cs[last_set + 1])
326 static long last_set
= -1;
328 static void i_set(int op
, int rd
)
330 /* lt, gt, le, ge, eq, neq */
331 static int ucond
[] = {0x92, 0x97, 0x96, 0x93, 0x94, 0x95};
332 static int scond
[] = {0x9c, 0x9f, 0x9e, 0x9d, 0x94, 0x95};
333 int cond
= op
& O_SIGNED
? scond
[op
& 0x0f] : ucond
[op
& 0x0f];
334 char set
[] = "\x0f\x00\xc0";
337 os(set
, 3); /* setl al */
338 os("\x48\x0f\xb6\xc0", 4); /* movzx rax, al */
341 static void i_lnot(int rd
)
344 cs
[last_set
+ 1] ^= 0x01;
346 char cmp
[] = "\x00\x83\xf8\x00";
349 os(cmp
, 4); /* cmp rax, 0 */
354 static void jx(int x
, int nbytes
)
358 op
[0] = 0x70 | (x
& 0x0f);
359 os(op
, 1); /* jx $addr */
362 os(op
, 2); /* jx $addr */
367 void i_jmp(int rn
, int z
, int nbytes
)
375 int cond
= OPT_CCOND();
377 jx((!z
? cond
: cond
^ 0x01) & ~0x10, nbytes
);
381 jx(z
? 0x84 : 0x85, nbytes
);
384 os(nbytes
== 1 ? "\xeb" : "\xe9", 1); /* jmp $addr */
389 long i_fill(long src
, long dst
, int nbytes
)
395 putint((void *) (cs
+ src
- nbytes
), dst
- src
, nbytes
);
399 static void i_zx(int rd
, int r1
, int bits
)
402 i_shl_imm(O_SHL
, rd
, rd
, LONGSZ
* 8 - bits
);
403 i_shl_imm(O_SHR
, rd
, rd
, LONGSZ
* 8 - bits
);
405 mov_r2r(rd
, r1
, bits
>> 3);
409 static void i_sx(int rd
, int r1
, int bits
)
411 mov_r2r(rd
, r1
, BT_SIGNED
| (bits
>> 3));
414 void i_op(int op
, int rd
, int r1
, int r2
)
416 if ((op
& 0xf0) == 0x00)
417 i_add(op
, r1
, r1
, r2
);
418 if ((op
& 0xf0) == 0x10)
419 i_shl(op
, r1
, r1
, r2
);
420 if ((op
& 0xf0) == 0x20) {
421 if ((op
& 0xff) == O_MUL
)
422 i_mul(R_RAX
, r1
, r2
);
423 if ((op
& 0xff) == O_DIV
)
424 i_div(op
, R_RAX
, r1
, r2
);
425 if ((op
& 0xff) == O_MOD
)
426 i_div(op
, R_RDX
, r1
, r2
);
429 if ((op
& 0xf0) == 0x30) {
434 if ((op
& 0xf0) == 0x40) { /* uop */
435 if ((op
& 0xff) == O_NEG
)
437 if ((op
& 0xff) == O_NOT
)
439 if ((op
& 0xff) == O_LNOT
)
445 static void i_add_anyimm(int rd
, int rn
, long n
)
447 op_rm(I_LEA
, rd
, rn
, n
, LONGSZ
);
450 void i_op_imm(int op
, int rd
, int r1
, long n
)
452 if ((op
& 0xf0) == 0x00) { /* add */
453 if (rd
== r1
&& i_imm(O_ADD
, n
))
454 i_add_imm(op
, rd
, r1
, n
);
456 i_add_anyimm(rd
, r1
, n
);
458 if ((op
& 0xf0) == 0x10) /* shl */
459 i_shl_imm(op
, rd
, r1
, n
);
460 if ((op
& 0xf0) == 0x20) /* mul */
461 die("mul/imm not implemented");
462 if ((op
& 0xf0) == 0x30) { /* imm */
466 if ((op
& 0xf0) == 0x50) { /* etc */
467 if ((op
& 0xff) == O_ZX
)
469 if ((op
& 0xff) == O_SX
)
471 if ((op
& 0xff) == O_MOV
)
476 void i_memcpy(int r0
, int r1
, int r2
)
478 os("\xfc\xf3\xa4", 3); /* cld; rep movs */
481 void i_memset(int r0
, int r1
, int r2
)
483 os("\xfc\xf3\xaa", 3); /* cld; rep stosb */
486 void i_call_reg(int rd
)
488 op_rr(I_CALL
, 2, rd
, LONGSZ
);
491 void i_call(char *sym
, int off
)
493 os("\xe8", 1); /* call $x */
495 out_rel(sym
, OUT_CS
| OUT_RLREL
, cslen
);
499 static int func_argc
;
500 static int func_varg
;
501 static int func_spsub
;
502 static int func_sargs
;
503 static int func_sregs
;
504 static int func_initfp
;
505 static int spsub_addr
;
516 for (i
= 0; i
< N_TMPS
; i
++)
517 if ((1 << tmpregs
[i
]) & func_sregs
)
522 static void i_saveargs(void)
525 os("\x58", 1); /* pop rax */
526 for (i
= N_ARGS
- 1; i
>= 0; i
--)
527 if ((1 << argregs
[i
]) & func_sargs
)
529 os("\x50", 1); /* push rax */
532 void i_prolog(int argc
, int varg
, int sargs
, int sregs
, int initfp
, int subsp
)
540 func_initfp
= initfp
;
545 os("\x55", 1); /* push rbp */
546 os("\x48\x89\xe5", 3); /* mov rbp, rsp */
549 for (i
= N_TMPS
- 1; i
>= 0; i
--)
550 if ((1 << tmpregs
[i
]) & func_sregs
)
554 os("\x48\x81\xec", 3); /* sub rsp, $xxx */
560 void i_epilog(int sp_max
)
566 for (i
= 0; i
< N_TMPS
; i
++)
567 if ((1 << tmpregs
[i
]) & func_sregs
)
569 for (i
= 0; i
< N_ARGS
; i
++)
570 if ((1 << argregs
[i
]) & func_sargs
)
572 diff
= ALIGN(-sp_max
- nsregs
* LONGSZ
, 16);
573 /* forcing 16-byte alignment */
574 diff
= (nsregs
+ nsargs
) & 1 ? diff
+ LONGSZ
: diff
;
575 if (func_spsub
&& diff
) {
576 i_add_anyimm(R_RSP
, R_RBP
, -nsregs
* LONGSZ
);
577 putint(cs
+ spsub_addr
, diff
, 4);
580 for (i
= 0; i
< N_TMPS
; i
++)
581 if ((1 << tmpregs
[i
]) & func_sregs
)
585 os("\xc9", 1); /* leave */
587 os("\xc2", 1); /* ret n */
588 oi(nsargs
* LONGSZ
, 2);
590 os("\xc3", 1); /* ret */