1 /* architecture-dependent code generation for x86_64 */
5 /* x86-64 registers, without r8-r15 */
35 #define MIN(a, b) ((a) < (b) ? (a) : (b))
36 #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
38 int tmpregs
[] = {0, 7, 6, 2, 1, 8, 9, 10, 11, 3, 12, 13, 14, 15};
39 int argregs
[] = {7, 6, 2, 1, 8, 9};
41 #define OP2(o2, o1) (0x010000 | ((o2) << 8) | (o1))
42 #define O2(op) (((op) >> 8) & 0xff)
43 #define O1(op) ((op) & 0xff)
44 #define MODRM(m, r1, r2) ((m) << 6 | (r1) << 3 | (r2))
45 #define REX(r1, r2) (0x48 | (((r1) & 8) >> 1) | (((r2) & 8) >> 3))
47 static struct mem cs
; /* generated code */
49 /* code generation functions */
50 static void os(void *s
, int n
)
55 static char *ointbuf(long n
, int l
)
59 for (i
= 0; i
< l
; i
++) {
66 static void oi(long n
, int l
)
68 mem_put(&cs
, ointbuf(n
, l
), l
);
71 static void oi_at(long pos
, long n
, int l
)
73 mem_cpy(&cs
, pos
, ointbuf(n
, l
), l
);
76 static long opos(void)
81 static void op_x(int op
, int r1
, int r2
, int bt
)
99 oi(sz
== 1 ? O1(op
) & ~0x1 : O1(op
), 1);
104 /* op_*(): r=reg, m=mem, i=imm, s=sym */
105 static void op_rm(int op
, int src
, int base
, int off
, int bt
)
107 int dis
= off
== (char) off
? 1 : 4;
108 int mod
= dis
== 4 ? 2 : 1;
109 if (!off
&& (base
& 7) != R_RBP
)
111 op_x(op
, src
, base
, bt
);
112 oi(MODRM(mod
, src
& 0x07, base
& 0x07), 1);
113 if ((base
& 7) == R_RSP
)
119 static void op_rr(int op
, int src
, int dst
, int bt
)
121 op_x(op
, src
, dst
, bt
);
122 oi(MODRM(3, src
& 0x07, dst
& 0x07), 1);
125 #define movrx_bt(bt) (((bt) == 4) ? 4 : LONGSZ)
127 static int movrx_op(int bt
, int mov
)
131 return bt
& T_MSIGN
? I_MOVSXD
: mov
;
133 return OP2(0x0f, bt
& T_MSIGN
? 0xbf : 0xb7);
135 return OP2(0x0f, bt
& T_MSIGN
? 0xbe : 0xb6);
139 static void mov_r2r(int rd
, int r1
, unsigned bt
)
141 if (rd
!= r1
|| T_SZ(bt
) != LONGSZ
)
142 op_rr(movrx_op(bt
, I_MOVR
), rd
, r1
, movrx_bt(bt
));
145 static void i_push(int reg
)
147 op_x(I_PUSH
| (reg
& 0x7), 0, reg
, LONGSZ
);
150 static void i_pop(int reg
)
152 op_x(I_POP
| (reg
& 0x7), 0, reg
, LONGSZ
);
155 void i_mov(int rd
, int rn
)
157 op_rr(movrx_op(LONGSZ
, I_MOVR
), rd
, rn
, movrx_bt(LONGSZ
));
160 static void i_add(int op
, int rd
, int r1
, int r2
)
162 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
163 static int rx
[] = {0003, 0053, 0043, 0013, 0063};
164 op_rr(rx
[op
& 0x0f], rd
, r2
, LONGSZ
);
167 static void i_add_imm(int op
, int rd
, int rn
, long n
)
169 /* opcode for O_ADD, O_SUB, O_AND, O_OR, O_XOR */
170 static int rx
[] = {0xc0, 0xe8, 0xe0, 0xc8, 0xf0};
171 unsigned char s
[4] = {REX(0, rd
), 0x83, rx
[op
& 0x0f] | (rd
& 7), n
& 0xff};
175 static void i_num(int rd
, long n
)
178 op_rr(I_XOR
, rd
, rd
, 4);
181 if (n
< 0 && -n
<= 0xffffffff) {
182 op_rr(I_MOVI
, 0, rd
, LONGSZ
);
186 if (n
> 0 && n
<= 0xffffffff)
188 op_x(I_MOVIR
+ (rd
& 7), 0, rd
, len
);
193 static void i_mul(int rd
, int r1
, int r2
)
197 op_rr(I_MUL
, 4, r2
, LONGSZ
);
200 static void i_div(int op
, int rd
, int r1
, int r2
)
205 op_x(I_CQO
, R_RAX
, R_RDX
, LONGSZ
);
209 op_rr(I_MUL
, bt
& T_MSIGN
? 7 : 6, r2
, LONGSZ
);
212 static void i_tst(int rn
, int rm
)
214 op_rr(I_TST
, rn
, rm
, LONGSZ
);
217 static void i_cmp(int rn
, int rm
)
219 op_rr(I_CMP
, rn
, rm
, LONGSZ
);
222 static void i_cmp_imm(int rn
, long n
)
224 unsigned char s
[4] = {REX(0, rn
), 0x83, 0xf8 | rn
, n
& 0xff};
228 static void i_shl(int op
, int rd
, int r1
, int rs
)
232 if ((op
& 0x0f) == 1)
233 sm
= bt
& T_MSIGN
? 7 : 5;
234 op_rr(I_SHX
, sm
, rd
, LONGSZ
);
237 static void i_shl_imm(int op
, int rd
, int rn
, long n
)
240 int sm
= (op
& 0x1) ? (bt
& T_MSIGN
? 0xf8 : 0xe8) : 0xe0;
241 char s
[4] = {REX(0, rn
), 0xc1, sm
| (rn
& 7), n
& 0xff};
245 static void i_neg(int rd
)
247 op_rr(I_NOT
, 3, rd
, LONGSZ
);
250 static void i_not(int rd
)
252 op_rr(I_NOT
, 2, rd
, LONGSZ
);
255 static int i_cond(long op
)
257 /* lt, ge, eq, ne, le, gt */
258 static int ucond
[] = {0x92, 0x93, 0x94, 0x95, 0x96, 0x97};
259 static int scond
[] = {0x9c, 0x9d, 0x94, 0x95, 0x9e, 0x9f};
261 return bt
& T_MSIGN
? scond
[op
& 0x0f] : ucond
[op
& 0x0f];
264 static void i_set(long op
, int rd
)
266 char set
[] = "\x0f\x00\xc0";
268 os(set
, 3); /* setl al */
269 os("\x48\x0f\xb6\xc0", 4); /* movzx rax, al */
272 static void i_lnot(int rd
)
274 char cmp
[] = "\x00\x83\xf8\x00";
277 os(cmp
, 4); /* cmp rax, 0 */
281 static void jx(int x
, int nbytes
)
285 op
[0] = 0x70 | (x
& 0x0f);
286 os(op
, 1); /* jx $addr */
289 os(op
, 2); /* jx $addr */
293 /* generate cmp or tst before a conditional jump */
294 static void i_jcmp(long op
, long rn
, long rm
)
306 /* generate a jump instruction and return the of its displacement */
307 static long i_jmp(long op
, int nb
)
310 jx(O_C(op
) == O_JZ
? 0x84 : 0x85, nb
);
312 jx(i_cond(op
) & ~0x10, nb
);
314 os(nb
== 1 ? "\xeb" : "\xe9", 1);
319 /* the length of a jump instruction opcode */
320 static int i_jlen(long op
, int nb
)
322 if (op
& (O_JZ
| O_JCC
))
328 static void i_zx(int rd
, int r1
, int bits
)
331 i_shl_imm(O_SHL
, rd
, rd
, LONGSZ
* 8 - bits
);
332 i_shl_imm(O_SHR
, rd
, rd
, LONGSZ
* 8 - bits
);
334 mov_r2r(rd
, r1
, bits
>> 3);
339 static void i_sx(int rd
, int r1
, int bits
)
341 mov_r2r(rd
, r1
, T_MSIGN
| (bits
>> 3));
344 static void i_cast(int rd
, int rn
, int bt
)
351 i_sx(rd
, rn
, T_SZ(bt
) * 8);
353 i_zx(rd
, rn
, T_SZ(bt
) * 8);
357 static void i_add_anyimm(int rd
, int rn
, long n
)
359 op_rm(I_LEA
, rd
, rn
, n
, LONGSZ
);
362 static long *rel_sym
; /* relocation symbols */
363 static long *rel_flg
; /* relocation flags */
364 static long *rel_off
; /* relocation offsets */
365 static long rel_n
, rel_sz
; /* relocation count */
367 static long lab_sz
; /* label count */
368 static long *lab_loc
; /* label offsets in cs */
369 static long jmp_n
, jmp_sz
; /* jump count */
370 static long *jmp_off
; /* jump offsets */
371 static long *jmp_dst
; /* jump destinations */
372 static long *jmp_op
; /* jump opcode */
373 static long jmp_ret
; /* the position of the last return jmp */
375 static void lab_add(long id
)
377 while (id
>= lab_sz
) {
379 lab_sz
= MAX(128, lab_sz
* 2);
380 lab_loc
= mextend(lab_loc
, lab_n
, lab_sz
, sizeof(*lab_loc
));
382 lab_loc
[id
] = opos();
385 static void jmp_add(long op
, long off
, long dst
)
387 if (jmp_n
== jmp_sz
) {
388 jmp_sz
= MAX(128, jmp_sz
* 2);
389 jmp_off
= mextend(jmp_off
, jmp_n
, jmp_sz
, sizeof(*jmp_off
));
390 jmp_dst
= mextend(jmp_dst
, jmp_n
, jmp_sz
, sizeof(*jmp_dst
));
391 jmp_op
= mextend(jmp_op
, jmp_n
, jmp_sz
, sizeof(*jmp_op
));
393 jmp_off
[jmp_n
] = off
;
394 jmp_dst
[jmp_n
] = dst
;
399 void i_label(long id
)
404 static void i_rel(long sym
, long flg
, long off
)
406 if (rel_n
== rel_sz
) {
407 rel_sz
= MAX(128, rel_sz
* 2);
408 rel_sym
= mextend(rel_sym
, rel_n
, rel_sz
, sizeof(*rel_sym
));
409 rel_flg
= mextend(rel_flg
, rel_n
, rel_sz
, sizeof(*rel_flg
));
410 rel_off
= mextend(rel_off
, rel_n
, rel_sz
, sizeof(*rel_off
));
412 rel_sym
[rel_n
] = sym
;
413 rel_flg
[rel_n
] = flg
;
414 rel_off
[rel_n
] = off
;
418 static void i_sym(int rd
, int sym
, int off
)
420 int sz
= X64_ABS_RL
& OUT_RL32
? 4 : LONGSZ
;
421 if (X64_ABS_RL
& OUT_RLSX
)
422 op_rr(I_MOVI
, 0, rd
, sz
);
424 op_x(I_MOVIR
+ (rd
& 7), 0, rd
, sz
);
425 i_rel(sym
, OUT_CS
| X64_ABS_RL
, opos());
429 static void i_saveargs(long sargs
)
432 os("\x58", 1); /* pop rax */
433 for (i
= N_ARGS
- 1; i
>= 0; i
--)
434 if ((1 << argregs
[i
]) & sargs
)
436 os("\x50", 1); /* push rax */
439 static void i_subsp(long val
)
443 if (val
<= 127 && val
>= -128) {
444 os("\x48\x83\xec", 3);
447 os("\x48\x81\xec", 3);
452 static int regs_count(long regs
)
456 for (i
= 0; i
< N_REGS
; i
++)
457 if (((1 << i
) & R_TMPS
) & regs
)
462 static void regs_save(long sregs
, long dis
)
465 for (i
= 0; i
< N_REGS
; i
++)
466 if (((1 << i
) & R_TMPS
) & sregs
)
472 static void regs_load(long sregs
, long dis
)
477 for (i
= N_REGS
- 1; i
>= 0; --i
)
478 if (((1 << i
) & R_TMPS
) & sregs
)
482 void i_wrap(int argc
, long sargs
, long spsub
, int initfp
, long sregs
, long sregs_pos
)
486 long diff
; /* prologue length */
487 int nsargs
= 0; /* number of saved arguments */
488 int mod16
; /* 16-byte alignment */
490 /* removing the last jmp to the epilogue */
491 if (jmp_ret
+ i_jlen(O_JMP
, 4) + 4 == opos()) {
492 mem_cut(&cs
, jmp_ret
);
495 lab_add(0); /* the return label */
496 body_n
= mem_len(&cs
);
498 /* generating function prologue */
502 os("\x55", 1); /* push rbp */
503 os("\x48\x89\xe5", 3); /* mov rbp, rsp */
505 for (i
= 0; i
< N_ARGS
; i
++)
506 if ((1 << argregs
[i
]) & sargs
)
508 mod16
= (spsub
+ nsargs
* LONGSZ
) % 16; /* forcing 16-byte alignment */
510 spsub
= spsub
+ (16 - mod16
);
511 i_subsp(sregs
? -sregs_pos
- regs_count(sregs
) * ULNG
: spsub
);
513 if (sregs
) /* saving registers */
514 regs_save(sregs
, spsub
+ sregs_pos
);
516 mem_put(&cs
, body
, body_n
);
518 /* generating function epilogue */
519 if (sregs
) /* restoring saved registers */
520 regs_load(sregs
, spsub
+ sregs_pos
);
522 os("\xc9", 1); /* leave */
524 os("\xc2", 1); /* ret n */
525 oi(nsargs
* LONGSZ
, 2);
527 os("\xc3", 1); /* ret */
529 /* adjusting code offsets */
530 for (i
= 0; i
< rel_n
; i
++)
532 for (i
= 0; i
< jmp_n
; i
++)
534 for (i
= 0; i
< lab_sz
; i
++)
538 /* introduce shorter jumps, if possible */
539 static void i_shortjumps(int *nb
)
541 long off
= 0; /* current code offset */
542 long dif
= 0; /* the difference after changing jump instructions */
543 int rel
= 0; /* current relocation */
544 int lab
= 1; /* current label */
545 long c_len
= mem_len(&cs
);
546 char *c
= mem_get(&cs
);
548 for (i
= 0; i
< jmp_n
; i
++)
549 nb
[i
] = abs(lab_loc
[jmp_dst
[i
]] - jmp_off
[i
]) < 0x70 ? 1 : 4;
550 for (i
= 0; i
< jmp_n
; i
++) {
551 long cur
= jmp_off
[i
] - i_jlen(jmp_op
[i
], 4);
552 while (rel
< rel_n
&& rel_off
[rel
] <= cur
)
553 rel_off
[rel
++] += dif
;
554 while (lab
< lab_sz
&& lab_loc
[lab
] <= cur
)
555 lab_loc
[lab
++] += dif
;
556 mem_put(&cs
, c
+ off
, cur
- off
);
557 jmp_off
[i
] = i_jmp(jmp_op
[i
], nb
[i
]);
558 off
= cur
+ i_jlen(jmp_op
[i
], 4) + 4;
559 dif
= mem_len(&cs
) - off
;
562 rel_off
[rel
++] += dif
;
564 lab_loc
[lab
++] += dif
;
566 mem_put(&cs
, c
+ off
, c_len
- off
);
570 void i_code(char **c
, long *c_len
, long **rsym
, long **rflg
, long **roff
, long *rcnt
)
572 int *nb
; /* number of bytes necessary for jump displacements */
574 /* more compact jmp instructions */
575 nb
= malloc(jmp_n
* sizeof(nb
[0]));
576 for (i
= 0; i
< jmp_n
; i
++)
579 for (i
= 0; i
< jmp_n
; i
++) /* filling jmp destinations */
580 oi_at(jmp_off
[i
], lab_loc
[jmp_dst
[i
]] -
581 jmp_off
[i
] - nb
[i
], nb
[i
]);
583 *c_len
= mem_len(&cs
);
605 long i_reg(long op
, long *rd
, long *r1
, long *r2
, long *r3
, long *tmp
)
615 *r1
= oc
& (O_NUM
| O_SYM
) ? 32 : R_TMPS
;
620 *r2
= oc
& O_NUM
? (oc
== O_ADD
? 32 : 8) : R_TMPS
;
636 *rd
= oc
== O_MOD
? (1 << R_RDX
) : (1 << R_RAX
);
638 *r2
= R_TMPS
& ~*rd
& ~*r1
;
640 *r2
&= ~(1 << R_RDX
);
641 *tmp
= (1 << R_RDX
) | (1 << R_RAX
);
647 *r2
= oc
& O_NUM
? 8 : R_TMPS
;
661 *tmp
= (1 << R_RDI
) | (1 << R_RCX
);
668 *tmp
= (1 << R_RDI
) | (1 << R_RSI
) | (1 << R_RCX
);
672 *r1
= (1 << REG_RET
);
676 *rd
= (1 << REG_RET
);
677 *r1
= oc
& O_SYM
? 0 : R_TMPS
;
678 *tmp
= R_TMPS
& ~R_PERM
;
684 *r2
= oc
& O_NUM
? 32 : R_TMPS
;
690 *r3
= oc
& O_NUM
? 32 : R_TMPS
;
699 *r2
= oc
& O_NUM
? 8 : R_TMPS
;
707 int i_imm(long lim
, long n
)
709 long max
= (1 << (lim
- 1)) - 1;
710 return n
<= max
&& n
+ 1 >= -max
;
713 long i_ins(long op
, long rd
, long r1
, long r2
, long r3
)
719 if (rd
== r1
&& r2
<= 127 && r2
>= -128)
720 i_add_imm(op
, r1
, r1
, r2
);
722 i_add_anyimm(rd
, r1
, r2
);
724 i_add(op
, r1
, r1
, r2
);
729 i_shl_imm(op
, r1
, r1
, r2
);
731 i_shl(op
, r1
, r1
, r2
);
735 i_mul(R_RAX
, r1
, r2
);
737 i_div(op
, R_RAX
, r1
, r2
);
739 i_div(op
, R_RDX
, r1
, r2
);
750 if (oc
& O_UOP
) { /* uop */
760 op_rr(I_CALL
, 2, r1
, LONGSZ
);
763 if (oc
== (O_CALL
| O_SYM
)) {
764 os("\xe8", 1); /* call $x */
765 i_rel(r1
, OUT_CS
| OUT_RLREL
, opos());
769 if (oc
== (O_MOV
| O_SYM
)) {
773 if (oc
== (O_MOV
| O_NUM
)) {
778 os("\xfc\xf3\xaa", 3); /* cld; rep stosb */
782 os("\xfc\xf3\xa4", 3); /* cld; rep movs */
787 jmp_add(O_JMP
, i_jmp(op
, 4), 0);
790 if (oc
== (O_LD
| O_NUM
)) {
791 op_rm(movrx_op(bt
, I_MOVR
), rd
, r1
, r2
, movrx_bt(bt
));
794 if (oc
== (O_ST
| O_NUM
)) {
795 op_rm(I_MOV
, r1
, r2
, r3
, bt
);
804 jmp_add(op
, i_jmp(op
, 4), r3
+ 1);