2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #define MAX_OPERANDS 3
26 #define TOK_ASM_first TOK_ASM_clc
27 #define TOK_ASM_last TOK_ASM_emms
28 #define TOK_ASM_alllast TOK_ASM_subps
30 #define OPC_B 0x01 /* only used with OPC_WL */
31 #define OPC_WL 0x02 /* accepts w, l or no suffix */
32 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
33 #define OPC_REG 0x04 /* register is added to opcode */
34 #define OPC_MODRM 0x08 /* modrm encoding */
36 #define OPCT_MASK 0x70
37 #define OPC_FWAIT 0x10 /* add fwait opcode */
38 #define OPC_SHIFT 0x20 /* shift opcodes */
39 #define OPC_ARITH 0x30 /* arithmetic opcodes */
40 #define OPC_FARITH 0x40 /* FPU arithmetic opcodes */
41 #define OPC_TEST 0x50 /* test opcodes */
42 #define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i))
44 #define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */
45 #define OPC_48 0x200 /* Always has REX prefix */
46 #ifdef TCC_TARGET_X86_64
47 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
48 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
49 # define OPC_WLX OPC_WLQ
50 # define OPC_BWLX OPC_BWLQ
52 # define OPC_WLX OPC_WL
53 # define OPC_BWLX OPC_BWL
56 #define OPC_GROUP_SHIFT 13
58 /* in order to compress the operand type, we use specific operands and
61 OPT_REG8
=0, /* warning: value is hardcoded from TOK_ASM_xxx */
62 OPT_REG16
, /* warning: value is hardcoded from TOK_ASM_xxx */
63 OPT_REG32
, /* warning: value is hardcoded from TOK_ASM_xxx */
64 #ifdef TCC_TARGET_X86_64
65 OPT_REG64
, /* warning: value is hardcoded from TOK_ASM_xxx */
67 OPT_MMX
, /* warning: value is hardcoded from TOK_ASM_xxx */
68 OPT_SSE
, /* warning: value is hardcoded from TOK_ASM_xxx */
69 OPT_CR
, /* warning: value is hardcoded from TOK_ASM_xxx */
70 OPT_TR
, /* warning: value is hardcoded from TOK_ASM_xxx */
71 OPT_DB
, /* warning: value is hardcoded from TOK_ASM_xxx */
74 #ifdef TCC_TARGET_X86_64
75 OPT_REG8_LOW
, /* %spl,%bpl,%sil,%dil, encoded like ah,ch,dh,bh, but
76 with REX prefix, not used in insn templates */
82 #ifdef TCC_TARGET_X86_64
85 OPT_EAX
, /* %al, %ax, %eax or %rax register */
86 OPT_ST0
, /* %st(0) register */
87 OPT_CL
, /* %cl register */
88 OPT_DX
, /* %dx register */
89 OPT_ADDR
, /* OP_EA with only offset */
90 OPT_INDIR
, /* *(expr) */
93 OPT_IM
, /* IM8 | IM16 | IM32 */
94 OPT_REG
, /* REG8 | REG16 | REG32 | REG64 */
95 OPT_REGW
, /* REG16 | REG32 | REG64 */
96 OPT_IMW
, /* IM16 | IM32 */
97 OPT_MMXSSE
, /* MMX | SSE */
98 OPT_DISP
, /* Like OPT_ADDR, but emitted as displacement (for jumps) */
99 OPT_DISP8
, /* Like OPT_ADDR, but only 8bit (short jumps) */
100 /* can be ored with any OPT_xxx */
104 #define OP_REG8 (1 << OPT_REG8)
105 #define OP_REG16 (1 << OPT_REG16)
106 #define OP_REG32 (1 << OPT_REG32)
107 #define OP_MMX (1 << OPT_MMX)
108 #define OP_SSE (1 << OPT_SSE)
109 #define OP_CR (1 << OPT_CR)
110 #define OP_TR (1 << OPT_TR)
111 #define OP_DB (1 << OPT_DB)
112 #define OP_SEG (1 << OPT_SEG)
113 #define OP_ST (1 << OPT_ST)
114 #define OP_IM8 (1 << OPT_IM8)
115 #define OP_IM8S (1 << OPT_IM8S)
116 #define OP_IM16 (1 << OPT_IM16)
117 #define OP_IM32 (1 << OPT_IM32)
118 #define OP_EAX (1 << OPT_EAX)
119 #define OP_ST0 (1 << OPT_ST0)
120 #define OP_CL (1 << OPT_CL)
121 #define OP_DX (1 << OPT_DX)
122 #define OP_ADDR (1 << OPT_ADDR)
123 #define OP_INDIR (1 << OPT_INDIR)
124 #ifdef TCC_TARGET_X86_64
125 # define OP_REG64 (1 << OPT_REG64)
126 # define OP_REG8_LOW (1 << OPT_REG8_LOW)
127 # define OP_IM64 (1 << OPT_IM64)
128 # define OP_EA32 (OP_EA << 1)
131 # define OP_REG8_LOW 0
136 #define OP_EA 0x40000000
137 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
139 #ifdef TCC_TARGET_X86_64
140 # define TREG_XAX TREG_RAX
141 # define TREG_XCX TREG_RCX
142 # define TREG_XDX TREG_RDX
144 # define TREG_XAX TREG_EAX
145 # define TREG_XCX TREG_ECX
146 # define TREG_XDX TREG_EDX
149 typedef struct ASMInstr
{
154 uint8_t op_type
[MAX_OPERANDS
]; /* see OP_xxx */
157 typedef struct Operand
{
159 int8_t reg
; /* register, -1 if none */
160 int8_t reg2
; /* second register, -1 if none */
165 static const uint8_t reg_to_size
[9] = {
170 #ifdef TCC_TARGET_X86_64
174 0, 0, 1, 0, 2, 0, 0, 0, 3
177 #define NB_TEST_OPCODES 30
179 static const uint8_t test_bits
[NB_TEST_OPCODES
] = {
212 static const uint8_t segment_prefixes
[] = {
221 static const ASMInstr asm_instrs
[] = {
223 /* This removes a 0x0f in the second byte */
224 #define O(o) ((uint64_t) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o)))
225 /* This constructs instr_type from opcode, type and group. */
226 #define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0))
227 #define DEF_ASM_OP0(name, opcode)
228 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0, { 0 } },
229 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }},
230 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }},
231 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }},
232 #ifdef TCC_TARGET_X86_64
233 # include "x86_64-asm.h"
235 # include "i386-asm.h"
241 static const uint16_t op0_codes
[] = {
243 #define DEF_ASM_OP0(x, opcode) opcode,
244 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
245 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
246 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
247 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
248 #ifdef TCC_TARGET_X86_64
249 # include "x86_64-asm.h"
251 # include "i386-asm.h"
255 static inline int get_reg_shift(TCCState
*s1
)
258 v
= asm_int_expr(s1
);
273 expect("1, 2, 4 or 8 constant");
280 #ifdef TCC_TARGET_X86_64
281 static int asm_parse_numeric_reg(int t
, unsigned int *type
)
284 if (t
>= TOK_IDENT
&& t
< tok_ident
) {
285 const char *s
= table_ident
[t
- TOK_IDENT
]->str
;
294 /* Don't allow leading '0'. */
295 if ((c
= *s
++) >= '1' && c
<= '9')
299 if ((c
= *s
) >= '0' && c
<= '5')
300 s
++, reg
= reg
* 10 + c
- '0';
305 else if (*type
!= OP_REG64
)
307 else if (c
== 'b' && !s
[1])
309 else if (c
== 'w' && !s
[1])
311 else if (c
== 'd' && !s
[1])
320 static int asm_parse_reg(unsigned int *type
)
327 if (tok
>= TOK_ASM_eax
&& tok
<= TOK_ASM_edi
) {
328 reg
= tok
- TOK_ASM_eax
;
330 #ifdef TCC_TARGET_X86_64
331 } else if (tok
>= TOK_ASM_rax
&& tok
<= TOK_ASM_rdi
) {
332 reg
= tok
- TOK_ASM_rax
;
334 } else if (tok
== TOK_ASM_rip
) {
335 reg
= -2; /* Probably should use different escape code. */
337 } else if ((reg
= asm_parse_numeric_reg(tok
, type
)) >= 0
338 && (*type
== OP_REG32
|| *type
== OP_REG64
)) {
349 static void parse_operand(TCCState
*s1
, Operand
*op
)
363 if (tok
>= TOK_ASM_al
&& tok
<= TOK_ASM_db7
) {
364 reg
= tok
- TOK_ASM_al
;
365 op
->type
= 1 << (reg
>> 3); /* WARNING: do not change constant order */
367 if ((op
->type
& OP_REG
) && op
->reg
== TREG_XAX
)
369 else if (op
->type
== OP_REG8
&& op
->reg
== TREG_XCX
)
371 else if (op
->type
== OP_REG16
&& op
->reg
== TREG_XDX
)
373 } else if (tok
>= TOK_ASM_dr0
&& tok
<= TOK_ASM_dr7
) {
375 op
->reg
= tok
- TOK_ASM_dr0
;
376 } else if (tok
>= TOK_ASM_es
&& tok
<= TOK_ASM_gs
) {
378 op
->reg
= tok
- TOK_ASM_es
;
379 } else if (tok
== TOK_ASM_st
) {
385 if (tok
!= TOK_PPNUM
)
389 if ((unsigned)reg
>= 8 || p
[1] != '\0')
398 #ifdef TCC_TARGET_X86_64
399 } else if (tok
>= TOK_ASM_spl
&& tok
<= TOK_ASM_dil
) {
400 op
->type
= OP_REG8
| OP_REG8_LOW
;
401 op
->reg
= 4 + tok
- TOK_ASM_spl
;
402 } else if ((op
->reg
= asm_parse_numeric_reg(tok
, &op
->type
)) >= 0) {
407 tcc_error("unknown register %%%s", get_tok_str(tok
, &tokc
));
411 } else if (tok
== '$') {
418 if (op
->e
.v
== (uint8_t)op
->e
.v
)
420 if (op
->e
.v
== (int8_t)op
->e
.v
)
422 if (op
->e
.v
== (uint16_t)op
->e
.v
)
424 #ifdef TCC_TARGET_X86_64
425 if (op
->e
.v
!= (int32_t)op
->e
.v
&& op
->e
.v
!= (uint32_t)op
->e
.v
)
430 /* address(reg,reg2,shift) with all variants */
445 /* bracketed offset expression */
456 unsigned int type
= 0;
459 op
->reg
= asm_parse_reg(&type
);
464 op
->reg2
= asm_parse_reg(&type
);
468 op
->shift
= get_reg_shift(s1
);
475 if (op
->reg
== -1 && op
->reg2
== -1)
481 /* XXX: unify with C code output ? */
482 ST_FUNC
void gen_expr32(ExprValue
*pe
)
485 /* If PC-relative, always set VT_SYM, even without symbol,
486 so as to force a relocation to be emitted. */
487 gen_addrpc32(VT_SYM
, pe
->sym
, pe
->v
);
489 gen_addr32(pe
->sym
? VT_SYM
: 0, pe
->sym
, pe
->v
);
492 #ifdef TCC_TARGET_X86_64
493 ST_FUNC
void gen_expr64(ExprValue
*pe
)
495 gen_addr64(pe
->sym
? VT_SYM
: 0, pe
->sym
, pe
->v
);
499 /* XXX: unify with C code output ? */
500 static void gen_disp32(ExprValue
*pe
)
503 ElfSym
*esym
= elfsym(sym
);
504 if (esym
&& esym
->st_shndx
== cur_text_section
->sh_num
) {
505 /* same section: we can output an absolute value. Note
506 that the TCC compiler behaves differently here because
507 it always outputs a relocation to ease (future) code
508 elimination in the linker */
509 gen_le32(pe
->v
+ esym
->st_value
- ind
- 4);
511 if (sym
&& sym
->type
.t
== VT_VOID
) {
512 sym
->type
.t
= VT_FUNC
;
513 sym
->type
.ref
= NULL
;
515 gen_addrpc32(VT_SYM
, sym
, pe
->v
);
519 /* generate the modrm operand */
520 static inline int asm_modrm(int reg
, Operand
*op
)
522 int mod
, reg1
, reg2
, sib_reg1
;
524 if (op
->type
& (OP_REG
| OP_MMX
| OP_SSE
)) {
525 g(0xc0 + (reg
<< 3) + op
->reg
);
526 } else if (op
->reg
== -1 && op
->reg2
== -1) {
527 /* displacement only */
528 #ifdef TCC_TARGET_X86_64
529 g(0x04 + (reg
<< 3));
532 g(0x05 + (reg
<< 3));
535 #ifdef TCC_TARGET_X86_64
536 } else if (op
->reg
== -2) {
537 ExprValue
*pe
= &op
->e
;
538 g(0x05 + (reg
<< 3));
539 gen_addrpc32(pe
->sym
? VT_SYM
: 0, pe
->sym
, pe
->v
);
544 /* fist compute displacement encoding */
545 if (sib_reg1
== -1) {
548 } else if (op
->e
.v
== 0 && !op
->e
.sym
&& op
->reg
!= 5) {
550 } else if (op
->e
.v
== (int8_t)op
->e
.v
&& !op
->e
.sym
) {
555 /* compute if sib byte needed */
559 g(mod
+ (reg
<< 3) + reg1
);
564 reg2
= 4; /* indicate no index */
565 g((op
->shift
<< 6) + (reg2
<< 3) + sib_reg1
);
570 } else if (mod
== 0x80 || op
->reg
== -1) {
577 #ifdef TCC_TARGET_X86_64
583 static void asm_rex(int width64
, Operand
*ops
, int nb_ops
, int *op_type
,
586 unsigned char rex
= width64
? 0x48 : 0;
587 int saw_high_8bit
= 0;
590 /* No mod/rm byte, but we might have a register op nevertheless
591 (we will add it to the opcode later). */
592 for(i
= 0; i
< nb_ops
; i
++) {
593 if (op_type
[i
] & (OP_REG
| OP_ST
)) {
594 if (ops
[i
].reg
>= 8) {
597 } else if (ops
[i
].type
& OP_REG8_LOW
)
599 else if (ops
[i
].type
& OP_REG8
&& ops
[i
].reg
>= 4)
600 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
601 saw_high_8bit
= ops
[i
].reg
;
607 if (ops
[regi
].reg
>= 8) {
610 } else if (ops
[regi
].type
& OP_REG8_LOW
)
612 else if (ops
[regi
].type
& OP_REG8
&& ops
[regi
].reg
>= 4)
613 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
614 saw_high_8bit
= ops
[regi
].reg
;
616 if (ops
[rmi
].type
& (OP_REG
| OP_MMX
| OP_SSE
| OP_CR
| OP_EA
)) {
617 if (ops
[rmi
].reg
>= 8) {
620 } else if (ops
[rmi
].type
& OP_REG8_LOW
)
622 else if (ops
[rmi
].type
& OP_REG8
&& ops
[rmi
].reg
>= 4)
623 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
624 saw_high_8bit
= ops
[rmi
].reg
;
626 if (ops
[rmi
].type
& OP_EA
&& ops
[rmi
].reg2
>= 8) {
633 tcc_error("can't encode register %%%ch when REX prefix is required",
634 "acdb"[saw_high_8bit
-4]);
640 static void maybe_print_stats (void)
642 static int already
= 1;
644 /* print stats about opcodes */
646 const struct ASMInstr
*pa
;
649 int nb_op_vals
, i
, j
;
653 memset(freq
, 0, sizeof(freq
));
654 for(pa
= asm_instrs
; pa
->sym
!= 0; pa
++) {
656 //for(i=0;i<pa->nb_ops;i++) {
657 for(j
=0;j
<nb_op_vals
;j
++) {
658 //if (pa->op_type[i] == op_vals[j])
659 if (pa
->instr_type
== op_vals
[j
])
662 //op_vals[nb_op_vals++] = pa->op_type[i];
663 op_vals
[nb_op_vals
++] = pa
->instr_type
;
667 for(i
=0;i
<nb_op_vals
;i
++) {
669 //if ((v & (v - 1)) != 0)
670 printf("%3d: %08x\n", i
, v
);
672 printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n",
673 (int)sizeof(asm_instrs
),
674 (int)sizeof(asm_instrs
) / (int)sizeof(ASMInstr
),
675 freq
[0], freq
[1], freq
[2], freq
[3]);
679 ST_FUNC
void asm_opcode(TCCState
*s1
, int opcode
)
682 int i
, modrm_index
, modreg_index
, reg
, v
, op1
, seg_prefix
, pc
;
684 Operand ops
[MAX_OPERANDS
], *pop
;
685 int op_type
[3]; /* decoded op type */
686 int alltypes
; /* OR of all operand types */
689 #ifdef TCC_TARGET_X86_64
694 /* force synthetic ';' after prefix instruction, so we can handle */
695 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
696 if (opcode
>= TOK_ASM_wait
&& opcode
<= TOK_ASM_repnz
)
705 if (tok
== ';' || tok
== TOK_LINEFEED
)
707 if (nb_ops
>= MAX_OPERANDS
) {
708 tcc_error("incorrect number of operands");
710 parse_operand(s1
, pop
);
712 if (pop
->type
!= OP_SEG
|| seg_prefix
)
713 tcc_error("incorrect prefix");
714 seg_prefix
= segment_prefixes
[pop
->reg
];
716 parse_operand(s1
, pop
);
717 if (!(pop
->type
& OP_EA
)) {
718 tcc_error("segment prefix must be followed by memory reference");
728 s
= 0; /* avoid warning */
731 /* optimize matching by using a lookup table (no hashing is needed
733 for(pa
= asm_instrs
; pa
->sym
!= 0; pa
++) {
734 int it
= pa
->instr_type
& OPCT_MASK
;
736 if (it
== OPC_FARITH
) {
737 v
= opcode
- pa
->sym
;
738 if (!((unsigned)v
< 8 * 6 && (v
% 6) == 0))
740 } else if (it
== OPC_ARITH
) {
741 if (!(opcode
>= pa
->sym
&& opcode
< pa
->sym
+ 8*NBWLX
))
743 s
= (opcode
- pa
->sym
) % NBWLX
;
744 if ((pa
->instr_type
& OPC_BWLX
) == OPC_WLX
)
746 /* We need to reject the xxxb opcodes that we accepted above.
747 Note that pa->sym for WLX opcodes is the 'w' token,
748 to get the 'b' token subtract one. */
749 if (((opcode
- pa
->sym
+ 1) % NBWLX
) == 0)
753 } else if (it
== OPC_SHIFT
) {
754 if (!(opcode
>= pa
->sym
&& opcode
< pa
->sym
+ 7*NBWLX
))
756 s
= (opcode
- pa
->sym
) % NBWLX
;
757 } else if (it
== OPC_TEST
) {
758 if (!(opcode
>= pa
->sym
&& opcode
< pa
->sym
+ NB_TEST_OPCODES
))
760 /* cmovxx is a test opcode but accepts multiple sizes.
761 The suffixes aren't encoded in the table, instead we
762 simply force size autodetection always and deal with suffixed
763 variants below when we don't find e.g. "cmovzl". */
764 if (pa
->instr_type
& OPC_WLX
)
766 } else if (pa
->instr_type
& OPC_B
) {
767 #ifdef TCC_TARGET_X86_64
768 /* Some instructions don't have the full size but only
769 bwl form. insb e.g. */
770 if ((pa
->instr_type
& OPC_WLQ
) != OPC_WLQ
771 && !(opcode
>= pa
->sym
&& opcode
< pa
->sym
+ NBWLX
-1))
774 if (!(opcode
>= pa
->sym
&& opcode
< pa
->sym
+ NBWLX
))
776 s
= opcode
- pa
->sym
;
777 } else if (pa
->instr_type
& OPC_WLX
) {
778 if (!(opcode
>= pa
->sym
&& opcode
< pa
->sym
+ NBWLX
-1))
780 s
= opcode
- pa
->sym
+ 1;
782 if (pa
->sym
!= opcode
)
785 if (pa
->nb_ops
!= nb_ops
)
787 #ifdef TCC_TARGET_X86_64
788 /* Special case for moves. Selecting the IM64->REG64 form
789 should only be done if we really have an >32bit imm64, and that
790 is hardcoded. Ignore it here. */
791 if (pa
->opcode
== 0xb0 && ops
[0].type
!= OP_IM64
792 && (ops
[1].type
& OP_REG
) == OP_REG64
793 && !(pa
->instr_type
& OPC_0F
))
796 /* now decode and check each operand */
798 for(i
= 0; i
< nb_ops
; i
++) {
800 op1
= pa
->op_type
[i
];
804 v
= OP_IM8
| OP_IM16
| OP_IM32
;
807 v
= OP_REG8
| OP_REG16
| OP_REG32
| OP_REG64
;
810 v
= OP_REG16
| OP_REG32
| OP_REG64
;
813 v
= OP_IM16
| OP_IM32
;
829 if ((ops
[i
].type
& v
) == 0)
831 alltypes
|= ops
[i
].type
;
833 /* all is matching ! */
838 if (opcode
>= TOK_ASM_first
&& opcode
<= TOK_ASM_last
) {
840 b
= op0_codes
[opcode
- TOK_ASM_first
];
845 } else if (opcode
<= TOK_ASM_alllast
) {
846 tcc_error("bad operand with opcode '%s'",
847 get_tok_str(opcode
, NULL
));
849 /* Special case for cmovcc, we accept size suffixes but ignore
850 them, but we don't want them to blow up our tables. */
851 TokenSym
*ts
= table_ident
[opcode
- TOK_IDENT
];
853 && strchr("wlq", ts
->str
[ts
->len
-1])
854 && !memcmp(ts
->str
, "cmov", 4)) {
855 opcode
= tok_alloc(ts
->str
, ts
->len
-1)->tok
;
858 tcc_error("unknown opcode '%s'", ts
->str
);
861 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
863 #ifdef TCC_TARGET_X86_64
864 /* XXX the autosize should rather be zero, to not have to adjust this
866 if ((pa
->instr_type
& OPC_BWLQ
) == OPC_B
)
870 /* Check for register operands providing hints about the size.
871 Start from the end, i.e. destination operands. This matters
872 only for opcodes accepting different sized registers, lar and lsl
874 for(i
= nb_ops
- 1; s
== autosize
&& i
>= 0; i
--) {
875 if ((ops
[i
].type
& OP_REG
) && !(op_type
[i
] & (OP_CL
| OP_DX
)))
876 s
= reg_to_size
[ops
[i
].type
& OP_REG
];
879 if ((opcode
== TOK_ASM_push
|| opcode
== TOK_ASM_pop
) &&
880 (ops
[0].type
& (OP_SEG
| OP_IM8S
| OP_IM32
)))
882 else if ((opcode
== TOK_ASM_push
|| opcode
== TOK_ASM_pop
) &&
883 (ops
[0].type
& OP_EA
))
886 tcc_error("cannot infer opcode suffix");
890 #ifdef TCC_TARGET_X86_64
891 /* Generate addr32 prefix if needed */
892 for(i
= 0; i
< nb_ops
; i
++) {
893 if (ops
[i
].type
& OP_EA32
) {
899 /* generate data16 prefix if needed */
904 /* accepting mmx+sse in all operands --> needs 0x66 to
905 switch to sse mode. Accepting only sse in an operand --> is
906 already SSE insn and needs 0x66/f2/f3 handling. */
907 for (i
= 0; i
< nb_ops
; i
++)
908 if ((op_type
[i
] & (OP_MMX
| OP_SSE
)) == (OP_MMX
| OP_SSE
)
909 && ops
[i
].type
& OP_SSE
)
914 #ifdef TCC_TARGET_X86_64
916 if (pa
->instr_type
& OPC_48
)
918 else if (s
== 3 || (alltypes
& OP_REG64
)) {
919 /* generate REX prefix */
921 for(i
= 0; i
< nb_ops
; i
++) {
922 if (op_type
[i
] == OP_REG64
&& pa
->opcode
!= 0xb8) {
923 /* If only 64bit regs are accepted in one operand
924 this is a default64 instruction without need for
925 REX prefixes, except for movabs(0xb8). */
930 /* XXX find better encoding for the default64 instructions. */
931 if (((opcode
!= TOK_ASM_push
&& opcode
!= TOK_ASM_pop
932 && opcode
!= TOK_ASM_pushw
&& opcode
!= TOK_ASM_pushl
933 && opcode
!= TOK_ASM_pushq
&& opcode
!= TOK_ASM_popw
934 && opcode
!= TOK_ASM_popl
&& opcode
!= TOK_ASM_popq
935 && opcode
!= TOK_ASM_call
&& opcode
!= TOK_ASM_jmp
))
941 /* now generates the operation */
942 if (OPCT_IS(pa
->instr_type
, OPC_FWAIT
))
948 if (pa
->instr_type
& OPC_0F
)
949 v
= ((v
& ~0xff) << 8) | 0x0f00 | (v
& 0xff);
950 if ((v
== 0x69 || v
== 0x6b) && nb_ops
== 2) {
951 /* kludge for imul $im, %reg */
954 op_type
[2] = op_type
[1];
955 } else if (v
== 0xcd && ops
[0].e
.v
== 3 && !ops
[0].e
.sym
) {
956 v
--; /* int $3 case */
958 } else if ((v
== 0x06 || v
== 0x07)) {
959 if (ops
[0].reg
>= 4) {
960 /* push/pop %fs or %gs */
961 v
= 0x0fa0 + (v
- 0x06) + ((ops
[0].reg
- 4) << 3);
963 v
+= ops
[0].reg
<< 3;
966 } else if (v
<= 0x05) {
968 v
+= ((opcode
- TOK_ASM_addb
) / NBWLX
) << 3;
969 } else if ((pa
->instr_type
& (OPCT_MASK
| OPC_MODRM
)) == OPC_FARITH
) {
971 v
+= ((opcode
- pa
->sym
) / 6) << 3;
974 /* search which operand will be used for modrm */
977 if (pa
->instr_type
& OPC_MODRM
) {
979 /* A modrm opcode without operands is a special case (e.g. mfence).
980 It has a group and acts as if there's an register operand 0
983 ops
[i
].type
= OP_REG
;
987 /* first look for an ea operand */
988 for(i
= 0;i
< nb_ops
; i
++) {
989 if (op_type
[i
] & OP_EA
)
992 /* then if not found, a register or indirection (shift instructions) */
993 for(i
= 0;i
< nb_ops
; i
++) {
994 if (op_type
[i
] & (OP_REG
| OP_MMX
| OP_SSE
| OP_INDIR
))
998 tcc_error("bad op table");
1002 /* if a register is used in another operand then it is
1003 used instead of group */
1004 for(i
= 0;i
< nb_ops
; i
++) {
1006 if (i
!= modrm_index
&&
1007 (t
& (OP_REG
| OP_MMX
| OP_SSE
| OP_CR
| OP_TR
| OP_DB
| OP_SEG
))) {
1013 #ifdef TCC_TARGET_X86_64
1014 asm_rex (rex64
, ops
, nb_ops
, op_type
, modreg_index
, modrm_index
);
1017 if (pa
->instr_type
& OPC_REG
) {
1018 /* mov $im, %reg case */
1019 if (v
== 0xb0 && s
>= 1)
1021 for(i
= 0; i
< nb_ops
; i
++) {
1022 if (op_type
[i
] & (OP_REG
| OP_ST
)) {
1028 if (pa
->instr_type
& OPC_B
)
1030 if (nb_ops
== 1 && pa
->op_type
[0] == OPT_DISP8
) {
1034 /* see if we can really generate the jump with a byte offset */
1035 esym
= elfsym(ops
[0].e
.sym
);
1036 if (!esym
|| esym
->st_shndx
!= cur_text_section
->sh_num
)
1038 jmp_disp
= ops
[0].e
.v
+ esym
->st_value
- ind
- 2 - (v
>= 0xff);
1039 if (jmp_disp
== (int8_t)jmp_disp
) {
1040 /* OK to generate jump */
1042 ops
[0].e
.v
= jmp_disp
;
1043 op_type
[0] = OP_IM8S
;
1046 /* long jump will be allowed. need to modify the
1048 if (v
== 0xeb) /* jmp */
1050 else if (v
== 0x70) /* jcc */
1053 tcc_error("invalid displacement");
1056 if (OPCT_IS(pa
->instr_type
, OPC_TEST
))
1057 v
+= test_bits
[opcode
- pa
->sym
];
1061 op1
= (v
>> 8) & 0xff;
1066 if (OPCT_IS(pa
->instr_type
, OPC_SHIFT
)) {
1067 reg
= (opcode
- pa
->sym
) / NBWLX
;
1070 } else if (OPCT_IS(pa
->instr_type
, OPC_ARITH
)) {
1071 reg
= (opcode
- pa
->sym
) / NBWLX
;
1072 } else if (OPCT_IS(pa
->instr_type
, OPC_FARITH
)) {
1073 reg
= (opcode
- pa
->sym
) / 6;
1075 reg
= (pa
->instr_type
>> OPC_GROUP_SHIFT
) & 7;
1079 if (pa
->instr_type
& OPC_MODRM
) {
1080 /* if a register is used in another operand then it is
1081 used instead of group */
1082 if (modreg_index
>= 0)
1083 reg
= ops
[modreg_index
].reg
;
1084 pc
= asm_modrm(reg
, &ops
[modrm_index
]);
1087 /* emit constants */
1088 #ifndef TCC_TARGET_X86_64
1089 if (!(pa
->instr_type
& OPC_0F
)
1090 && (pa
->opcode
== 0x9a || pa
->opcode
== 0xea)) {
1091 /* ljmp or lcall kludge */
1092 gen_expr32(&ops
[1].e
);
1094 tcc_error("cannot relocate");
1095 gen_le16(ops
[0].e
.v
);
1099 for(i
= 0;i
< nb_ops
; i
++) {
1101 if (v
& (OP_IM8
| OP_IM16
| OP_IM32
| OP_IM64
| OP_IM8S
| OP_ADDR
)) {
1102 /* if multiple sizes are given it means we must look
1104 if ((v
| OP_IM8
| OP_IM64
) == (OP_IM8
| OP_IM16
| OP_IM32
| OP_IM64
)) {
1109 else if (s
== 2 || (v
& OP_IM64
) == 0)
1115 if ((v
& (OP_IM8
| OP_IM8S
| OP_IM16
)) && ops
[i
].e
.sym
)
1116 tcc_error("cannot relocate");
1118 if (v
& (OP_IM8
| OP_IM8S
)) {
1120 } else if (v
& OP_IM16
) {
1121 gen_le16(ops
[i
].e
.v
);
1122 #ifdef TCC_TARGET_X86_64
1123 } else if (v
& OP_IM64
) {
1124 gen_expr64(&ops
[i
].e
);
1126 } else if (pa
->op_type
[i
] == OPT_DISP
|| pa
->op_type
[i
] == OPT_DISP8
) {
1127 gen_disp32(&ops
[i
].e
);
1129 gen_expr32(&ops
[i
].e
);
1134 /* after immediate operands, adjust pc-relative address */
1136 add32le(cur_text_section
->data
+ pc
- 4, pc
- ind
);
1139 /* return the constraint priority (we allocate first the lowest
1140 numbered constraints) */
1141 static inline int constraint_priority(const char *str
)
1143 int priority
, c
, pr
;
1145 /* we take the lowest priority */
1182 tcc_error("unknown constraint '%c'", c
);
1191 static const char *skip_constraint_modifiers(const char *p
)
1193 while (*p
== '=' || *p
== '&' || *p
== '+' || *p
== '%')
1198 /* If T (a token) is of the form "%reg" returns the register
1199 number and type, otherwise return -1. */
1200 ST_FUNC
int asm_parse_regvar (int t
)
1206 s
= table_ident
[t
- TOK_IDENT
]->str
;
1209 t
= tok_alloc(s
+1, strlen(s
)-1)->tok
;
1212 parse_operand(tcc_state
, &op
);
1213 /* Accept only integer regs for now. */
1214 if (op
.type
& OP_REG
)
1220 #define REG_OUT_MASK 0x01
1221 #define REG_IN_MASK 0x02
1223 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
1225 ST_FUNC
void asm_compute_constraints(ASMOperand
*operands
,
1226 int nb_operands
, int nb_outputs
,
1227 const uint8_t *clobber_regs
,
1231 int sorted_op
[MAX_ASM_OPERANDS
];
1232 int i
, j
, k
, p1
, p2
, tmp
, reg
, c
, reg_mask
;
1234 uint8_t regs_allocated
[NB_ASM_REGS
];
1237 for(i
=0;i
<nb_operands
;i
++) {
1239 op
->input_index
= -1;
1245 /* compute constraint priority and evaluate references to output
1246 constraints if input constraints */
1247 for(i
=0;i
<nb_operands
;i
++) {
1249 str
= op
->constraint
;
1250 str
= skip_constraint_modifiers(str
);
1251 if (isnum(*str
) || *str
== '[') {
1252 /* this is a reference to another constraint */
1253 k
= find_constraint(operands
, nb_operands
, str
, NULL
);
1254 if ((unsigned)k
>= i
|| i
< nb_outputs
)
1255 tcc_error("invalid reference in constraint %d ('%s')",
1258 if (operands
[k
].input_index
>= 0)
1259 tcc_error("cannot reference twice the same operand");
1260 operands
[k
].input_index
= i
;
1262 } else if ((op
->vt
->r
& VT_VALMASK
) == VT_LOCAL
1264 && (reg
= op
->vt
->sym
->r
& VT_VALMASK
) < VT_CONST
) {
1268 op
->priority
= constraint_priority(str
);
1272 /* sort operands according to their priority */
1273 for(i
=0;i
<nb_operands
;i
++)
1275 for(i
=0;i
<nb_operands
- 1;i
++) {
1276 for(j
=i
+1;j
<nb_operands
;j
++) {
1277 p1
= operands
[sorted_op
[i
]].priority
;
1278 p2
= operands
[sorted_op
[j
]].priority
;
1281 sorted_op
[i
] = sorted_op
[j
];
1287 for(i
= 0;i
< NB_ASM_REGS
; i
++) {
1288 if (clobber_regs
[i
])
1289 regs_allocated
[i
] = REG_IN_MASK
| REG_OUT_MASK
;
1291 regs_allocated
[i
] = 0;
1293 /* esp cannot be used */
1294 regs_allocated
[4] = REG_IN_MASK
| REG_OUT_MASK
;
1295 /* ebp cannot be used yet */
1296 regs_allocated
[5] = REG_IN_MASK
| REG_OUT_MASK
;
1298 /* allocate registers and generate corresponding asm moves */
1299 for(i
=0;i
<nb_operands
;i
++) {
1302 str
= op
->constraint
;
1303 /* no need to allocate references */
1304 if (op
->ref_index
>= 0)
1306 /* select if register is used for output, input or both */
1307 if (op
->input_index
>= 0) {
1308 reg_mask
= REG_IN_MASK
| REG_OUT_MASK
;
1309 } else if (j
< nb_outputs
) {
1310 reg_mask
= REG_OUT_MASK
;
1312 reg_mask
= REG_IN_MASK
;
1315 if (is_reg_allocated(op
->reg
))
1316 tcc_error("asm regvar requests register that's taken already");
1329 if (j
>= nb_outputs
)
1330 tcc_error("'%c' modifier can only be applied to outputs", c
);
1331 reg_mask
= REG_IN_MASK
| REG_OUT_MASK
;
1334 /* allocate both eax and edx */
1335 if (is_reg_allocated(TREG_XAX
) ||
1336 is_reg_allocated(TREG_XDX
))
1340 regs_allocated
[TREG_XAX
] |= reg_mask
;
1341 regs_allocated
[TREG_XDX
] |= reg_mask
;
1361 if (is_reg_allocated(reg
))
1365 /* eax, ebx, ecx or edx */
1366 for(reg
= 0; reg
< 4; reg
++) {
1367 if (!is_reg_allocated(reg
))
1373 case 'p': /* A general address, for x86(64) any register is acceptable*/
1374 /* any general register */
1375 for(reg
= 0; reg
< 8; reg
++) {
1376 if (!is_reg_allocated(reg
))
1381 /* now we can reload in the register */
1384 regs_allocated
[reg
] |= reg_mask
;
1388 if (!((op
->vt
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
))
1394 if (!((op
->vt
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
))
1399 /* nothing special to do because the operand is already in
1400 memory, except if the pointer itself is stored in a
1401 memory variable (VT_LLOCAL case) */
1402 /* XXX: fix constant case */
1403 /* if it is a reference to a memory zone, it must lie
1404 in a register, so we reserve the register in the
1405 input registers and a load will be generated
1407 if (j
< nb_outputs
|| c
== 'm') {
1408 if ((op
->vt
->r
& VT_VALMASK
) == VT_LLOCAL
) {
1409 /* any general register */
1410 for(reg
= 0; reg
< 8; reg
++) {
1411 if (!(regs_allocated
[reg
] & REG_IN_MASK
))
1416 /* now we can reload in the register */
1417 regs_allocated
[reg
] |= REG_IN_MASK
;
1424 tcc_error("asm constraint %d ('%s') could not be satisfied",
1428 /* if a reference is present for that operand, we assign it too */
1429 if (op
->input_index
>= 0) {
1430 operands
[op
->input_index
].reg
= op
->reg
;
1431 operands
[op
->input_index
].is_llong
= op
->is_llong
;
1435 /* compute out_reg. It is used to store outputs registers to memory
1436 locations references by pointers (VT_LLOCAL case) */
1438 for(i
=0;i
<nb_operands
;i
++) {
1441 (op
->vt
->r
& VT_VALMASK
) == VT_LLOCAL
&&
1443 for(reg
= 0; reg
< 8; reg
++) {
1444 if (!(regs_allocated
[reg
] & REG_OUT_MASK
))
1447 tcc_error("could not find free output register for reloading");
1454 /* print sorted constraints */
1456 for(i
=0;i
<nb_operands
;i
++) {
1459 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1461 op
->id
? get_tok_str(op
->id
, NULL
) : "",
1467 printf("out_reg=%d\n", *pout_reg
);
1471 ST_FUNC
void subst_asm_operand(CString
*add_str
,
1472 SValue
*sv
, int modifier
)
1474 int r
, reg
, size
, val
;
1478 if ((r
& VT_VALMASK
) == VT_CONST
) {
1479 if (!(r
& VT_LVAL
) && modifier
!= 'c' && modifier
!= 'n' &&
1481 cstr_ccat(add_str
, '$');
1483 const char *name
= get_tok_str(sv
->sym
->v
, NULL
);
1484 if (sv
->sym
->v
>= SYM_FIRST_ANOM
) {
1485 /* In case of anonymous symbols ("L.42", used
1486 for static data labels) we can't find them
1487 in the C symbol table when later looking up
1488 this name. So enter them now into the asm label
1489 list when we still know the symbol. */
1490 get_asm_sym(tok_alloc(name
, strlen(name
))->tok
, sv
->sym
);
1492 cstr_cat(add_str
, name
, -1);
1493 if ((uint32_t)sv
->c
.i
== 0)
1495 cstr_ccat(add_str
, '+');
1498 if (modifier
== 'n')
1500 snprintf(buf
, sizeof(buf
), "%d", (int)sv
->c
.i
);
1501 cstr_cat(add_str
, buf
, -1);
1503 #ifdef TCC_TARGET_X86_64
1505 cstr_cat(add_str
, "(%rip)", -1);
1507 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
1508 #ifdef TCC_TARGET_X86_64
1509 snprintf(buf
, sizeof(buf
), "%d(%%rbp)", (int)sv
->c
.i
);
1511 snprintf(buf
, sizeof(buf
), "%d(%%ebp)", (int)sv
->c
.i
);
1513 cstr_cat(add_str
, buf
, -1);
1514 } else if (r
& VT_LVAL
) {
1515 reg
= r
& VT_VALMASK
;
1516 if (reg
>= VT_CONST
)
1517 tcc_error("internal compiler error");
1518 snprintf(buf
, sizeof(buf
), "(%%%s)",
1519 #ifdef TCC_TARGET_X86_64
1520 get_tok_str(TOK_ASM_rax
+ reg
, NULL
)
1522 get_tok_str(TOK_ASM_eax
+ reg
, NULL
)
1525 cstr_cat(add_str
, buf
, -1);
1528 reg
= r
& VT_VALMASK
;
1529 if (reg
>= VT_CONST
)
1530 tcc_error("internal compiler error");
1532 /* choose register operand size */
1533 if ((sv
->type
.t
& VT_BTYPE
) == VT_BYTE
||
1534 (sv
->type
.t
& VT_BTYPE
) == VT_BOOL
)
1536 else if ((sv
->type
.t
& VT_BTYPE
) == VT_SHORT
)
1538 #ifdef TCC_TARGET_X86_64
1539 else if ((sv
->type
.t
& VT_BTYPE
) == VT_LLONG
||
1540 (sv
->type
.t
& VT_BTYPE
) == VT_PTR
)
1545 if (size
== 1 && reg
>= 4)
1548 if (modifier
== 'b') {
1550 tcc_error("cannot use byte register");
1552 } else if (modifier
== 'h') {
1554 tcc_error("cannot use byte register");
1556 } else if (modifier
== 'w') {
1558 } else if (modifier
== 'k') {
1560 #ifdef TCC_TARGET_X86_64
1561 } else if (modifier
== 'q') {
1568 reg
= TOK_ASM_ah
+ reg
;
1571 reg
= TOK_ASM_al
+ reg
;
1574 reg
= TOK_ASM_ax
+ reg
;
1577 reg
= TOK_ASM_eax
+ reg
;
1579 #ifdef TCC_TARGET_X86_64
1581 reg
= TOK_ASM_rax
+ reg
;
1585 snprintf(buf
, sizeof(buf
), "%%%s", get_tok_str(reg
, NULL
));
1586 cstr_cat(add_str
, buf
, -1);
1590 /* generate prolog and epilog code for asm statement */
1591 ST_FUNC
void asm_gen_code(ASMOperand
*operands
, int nb_operands
,
1592 int nb_outputs
, int is_output
,
1593 uint8_t *clobber_regs
,
1596 uint8_t regs_allocated
[NB_ASM_REGS
];
1600 /* Strictly speaking %Xbp and %Xsp should be included in the
1601 call-preserved registers, but currently it doesn't matter. */
1602 #ifdef TCC_TARGET_X86_64
1603 #ifdef TCC_TARGET_PE
1604 static uint8_t reg_saved
[] = { 3, 6, 7, 12, 13, 14, 15 };
1606 static uint8_t reg_saved
[] = { 3, 12, 13, 14, 15 };
1609 static uint8_t reg_saved
[] = { 3, 6, 7 };
1612 /* mark all used registers */
1613 memcpy(regs_allocated
, clobber_regs
, sizeof(regs_allocated
));
1614 for(i
= 0; i
< nb_operands
;i
++) {
1617 regs_allocated
[op
->reg
] = 1;
1620 /* generate reg save code */
1621 for(i
= 0; i
< sizeof(reg_saved
)/sizeof(reg_saved
[0]); i
++) {
1623 if (regs_allocated
[reg
]) {
1630 /* generate load code */
1631 for(i
= 0; i
< nb_operands
; i
++) {
1634 if ((op
->vt
->r
& VT_VALMASK
) == VT_LLOCAL
&&
1636 /* memory reference case (for both input and
1640 sv
.r
= (sv
.r
& ~VT_VALMASK
) | VT_LOCAL
| VT_LVAL
;
1643 } else if (i
>= nb_outputs
|| op
->is_rw
) {
1644 /* load value in register */
1645 load(op
->reg
, op
->vt
);
1650 load(TREG_XDX
, &sv
);
1656 /* generate save code */
1657 for(i
= 0 ; i
< nb_outputs
; i
++) {
1660 if ((op
->vt
->r
& VT_VALMASK
) == VT_LLOCAL
) {
1661 if (!op
->is_memory
) {
1664 sv
.r
= (sv
.r
& ~VT_VALMASK
) | VT_LOCAL
;
1669 sv
.r
= (sv
.r
& ~VT_VALMASK
) | out_reg
;
1670 store(op
->reg
, &sv
);
1673 store(op
->reg
, op
->vt
);
1678 store(TREG_XDX
, &sv
);
1683 /* generate reg restore code */
1684 for(i
= sizeof(reg_saved
)/sizeof(reg_saved
[0]) - 1; i
>= 0; i
--) {
1686 if (regs_allocated
[reg
]) {
1695 ST_FUNC
void asm_clobber(uint8_t *clobber_regs
, const char *str
)
1699 #ifdef TCC_TARGET_X86_64
1703 if (!strcmp(str
, "memory") ||
1704 !strcmp(str
, "cc") ||
1705 !strcmp(str
, "flags"))
1707 ts
= tok_alloc(str
, strlen(str
));
1709 if (reg
>= TOK_ASM_eax
&& reg
<= TOK_ASM_edi
) {
1711 } else if (reg
>= TOK_ASM_ax
&& reg
<= TOK_ASM_di
) {
1713 #ifdef TCC_TARGET_X86_64
1714 } else if (reg
>= TOK_ASM_rax
&& reg
<= TOK_ASM_rdi
) {
1716 } else if ((reg
= asm_parse_numeric_reg(reg
, &type
)) >= 0) {
1720 tcc_error("invalid clobber register '%s'", str
);
1722 clobber_regs
[reg
] = 1;