x86-asm: Get rid of is_short_jump
[tinycc.git] / i386-asm.c
blobbbc19956ef6bc181992c5dd878865ecbcead1a08
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "tcc.h"
24 /* #define NB_ASM_REGS 8 */
25 #define MAX_OPERANDS 3
26 #define NB_SAVED_REGS 3
28 #define TOK_ASM_first TOK_ASM_clc
29 #define TOK_ASM_last TOK_ASM_emms
30 #define TOK_ASM_alllast TOK_ASM_pxor
32 #define OPC_JMP 0x01 /* jmp operand */
33 #define OPC_B 0x02 /* only used with OPC_WL */
34 #define OPC_WL 0x04 /* accepts w, l or no suffix */
35 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
36 #define OPC_REG 0x08 /* register is added to opcode */
37 #define OPC_MODRM 0x10 /* modrm encoding */
38 #define OPC_FWAIT 0x20 /* add fwait opcode */
39 #define OPC_TEST 0x40 /* test opcodes */
40 #define OPC_SHIFT 0x80 /* shift opcodes */
41 #define OPC_D16 0x0100 /* generate data16 prefix */
42 #define OPC_ARITH 0x0200 /* arithmetic opcodes */
43 #define OPC_SHORTJMP 0x0400 /* short jmp operand */
44 #define OPC_FARITH 0x0800 /* FPU arithmetic opcodes */
45 #ifdef TCC_TARGET_X86_64
46 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
47 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
48 # define OPC_WLX OPC_WLQ
49 #else
50 # define OPC_WLX OPC_WL
51 #endif
53 #define OPC_GROUP_SHIFT 13
55 /* in order to compress the operand type, we use specific operands and
56 we or only with EA */
57 enum {
58 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
59 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
60 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
61 #ifdef TCC_TARGET_X86_64
62 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
63 #endif
64 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
65 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
66 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
67 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
68 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
69 OPT_SEG,
70 OPT_ST,
71 OPT_IM8,
72 OPT_IM8S,
73 OPT_IM16,
74 OPT_IM32,
75 #ifdef TCC_TARGET_X86_64
76 OPT_IM64,
77 #endif
78 OPT_EAX, /* %al, %ax, %eax or %rax register */
79 OPT_ST0, /* %st(0) register */
80 OPT_CL, /* %cl register */
81 OPT_DX, /* %dx register */
82 OPT_ADDR, /* OP_EA with only offset */
83 OPT_INDIR, /* *(expr) */
84 /* composite types */
85 OPT_COMPOSITE_FIRST,
86 OPT_IM, /* IM8 | IM16 | IM32 | IM64 */
87 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
88 OPT_REGW, /* REG16 | REG32 | REG64 */
89 OPT_IMW, /* IM16 | IM32 | IM64 */
90 #ifdef TCC_TARGET_X86_64
91 OPT_IMNO64, /* IM16 | IM32 */
92 #endif
93 /* can be ored with any OPT_xxx */
94 OPT_EA = 0x80
97 #define OP_REG8 (1 << OPT_REG8)
98 #define OP_REG16 (1 << OPT_REG16)
99 #define OP_REG32 (1 << OPT_REG32)
100 #define OP_MMX (1 << OPT_MMX)
101 #define OP_SSE (1 << OPT_SSE)
102 #define OP_CR (1 << OPT_CR)
103 #define OP_TR (1 << OPT_TR)
104 #define OP_DB (1 << OPT_DB)
105 #define OP_SEG (1 << OPT_SEG)
106 #define OP_ST (1 << OPT_ST)
107 #define OP_IM8 (1 << OPT_IM8)
108 #define OP_IM8S (1 << OPT_IM8S)
109 #define OP_IM16 (1 << OPT_IM16)
110 #define OP_IM32 (1 << OPT_IM32)
111 #define OP_EAX (1 << OPT_EAX)
112 #define OP_ST0 (1 << OPT_ST0)
113 #define OP_CL (1 << OPT_CL)
114 #define OP_DX (1 << OPT_DX)
115 #define OP_ADDR (1 << OPT_ADDR)
116 #define OP_INDIR (1 << OPT_INDIR)
117 #ifdef TCC_TARGET_X86_64
118 # define OP_REG64 (1 << OPT_REG64)
119 # define OP_IM64 (1 << OPT_IM64)
120 # define OP_EA32 (OP_EA << 1)
121 #else
122 # define OP_REG64 0
123 # define OP_IM64 0
124 # define OP_EA32 0
125 #endif
127 #define OP_EA 0x40000000
128 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
130 #ifdef TCC_TARGET_X86_64
131 # define OP_IM OP_IM64
132 # define TREG_XAX TREG_RAX
133 # define TREG_XCX TREG_RCX
134 # define TREG_XDX TREG_RDX
135 #else
136 # define OP_IM OP_IM32
137 # define TREG_XAX TREG_EAX
138 # define TREG_XCX TREG_ECX
139 # define TREG_XDX TREG_EDX
140 #endif
142 typedef struct ASMInstr {
143 uint16_t sym;
144 uint16_t opcode;
145 uint16_t instr_type;
146 uint8_t nb_ops;
147 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
148 } ASMInstr;
150 typedef struct Operand {
151 uint32_t type;
152 int8_t reg; /* register, -1 if none */
153 int8_t reg2; /* second register, -1 if none */
154 uint8_t shift;
155 ExprValue e;
156 } Operand;
158 static const uint8_t reg_to_size[9] = {
160 [OP_REG8] = 0,
161 [OP_REG16] = 1,
162 [OP_REG32] = 2,
163 #ifdef TCC_TARGET_X86_64
164 [OP_REG64] = 3,
165 #endif
167 0, 0, 1, 0, 2, 0, 0, 0, 3
170 #define NB_TEST_OPCODES 30
172 static const uint8_t test_bits[NB_TEST_OPCODES] = {
173 0x00, /* o */
174 0x01, /* no */
175 0x02, /* b */
176 0x02, /* c */
177 0x02, /* nae */
178 0x03, /* nb */
179 0x03, /* nc */
180 0x03, /* ae */
181 0x04, /* e */
182 0x04, /* z */
183 0x05, /* ne */
184 0x05, /* nz */
185 0x06, /* be */
186 0x06, /* na */
187 0x07, /* nbe */
188 0x07, /* a */
189 0x08, /* s */
190 0x09, /* ns */
191 0x0a, /* p */
192 0x0a, /* pe */
193 0x0b, /* np */
194 0x0b, /* po */
195 0x0c, /* l */
196 0x0c, /* nge */
197 0x0d, /* nl */
198 0x0d, /* ge */
199 0x0e, /* le */
200 0x0e, /* ng */
201 0x0f, /* nle */
202 0x0f, /* g */
205 static const uint8_t segment_prefixes[] = {
206 0x26, /* es */
207 0x2e, /* cs */
208 0x36, /* ss */
209 0x3e, /* ds */
210 0x64, /* fs */
211 0x65 /* gs */
214 static const ASMInstr asm_instrs[] = {
215 #define ALT(x) x
216 #define DEF_ASM_OP0(name, opcode)
217 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 0 },
218 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 1, { op0 }},
219 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 2, { op0, op1 }},
220 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 3, { op0, op1, op2 }},
221 #ifdef TCC_TARGET_X86_64
222 # include "x86_64-asm.h"
223 #else
224 # include "i386-asm.h"
225 #endif
226 /* last operation */
227 { 0, },
230 static const uint16_t op0_codes[] = {
231 #define ALT(x)
232 #define DEF_ASM_OP0(x, opcode) opcode,
233 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
234 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
235 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
236 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
237 #ifdef TCC_TARGET_X86_64
238 # include "x86_64-asm.h"
239 #else
240 # include "i386-asm.h"
241 #endif
244 static inline int get_reg_shift(TCCState *s1)
246 int shift, v;
247 v = asm_int_expr(s1);
248 switch(v) {
249 case 1:
250 shift = 0;
251 break;
252 case 2:
253 shift = 1;
254 break;
255 case 4:
256 shift = 2;
257 break;
258 case 8:
259 shift = 3;
260 break;
261 default:
262 expect("1, 2, 4 or 8 constant");
263 shift = 0;
264 break;
266 return shift;
269 static int asm_parse_reg(int *type)
271 int reg = 0;
272 *type = 0;
273 if (tok != '%')
274 goto error_32;
275 next();
276 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
277 reg = tok - TOK_ASM_eax;
278 #ifdef TCC_TARGET_X86_64
279 *type = OP_EA32;
280 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
281 reg = tok - TOK_ASM_rax;
282 #endif
283 } else {
284 error_32:
285 expect("register");
287 next();
288 return reg;
291 static void parse_operand(TCCState *s1, Operand *op)
293 ExprValue e;
294 int reg, indir;
295 const char *p;
297 indir = 0;
298 if (tok == '*') {
299 next();
300 indir = OP_INDIR;
303 if (tok == '%') {
304 next();
305 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
306 reg = tok - TOK_ASM_al;
307 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
308 op->reg = reg & 7;
309 if ((op->type & OP_REG) && op->reg == TREG_XAX)
310 op->type |= OP_EAX;
311 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
312 op->type |= OP_CL;
313 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
314 op->type |= OP_DX;
315 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
316 op->type = OP_DB;
317 op->reg = tok - TOK_ASM_dr0;
318 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
319 op->type = OP_SEG;
320 op->reg = tok - TOK_ASM_es;
321 } else if (tok == TOK_ASM_st) {
322 op->type = OP_ST;
323 op->reg = 0;
324 next();
325 if (tok == '(') {
326 next();
327 if (tok != TOK_PPNUM)
328 goto reg_error;
329 p = tokc.str.data;
330 reg = p[0] - '0';
331 if ((unsigned)reg >= 8 || p[1] != '\0')
332 goto reg_error;
333 op->reg = reg;
334 next();
335 skip(')');
337 if (op->reg == 0)
338 op->type |= OP_ST0;
339 goto no_skip;
340 } else {
341 reg_error:
342 tcc_error("unknown register");
344 next();
345 no_skip: ;
346 } else if (tok == '$') {
347 /* constant value */
348 next();
349 asm_expr(s1, &e);
350 op->type = OP_IM;
351 op->e.v = e.v;
352 op->e.sym = e.sym;
353 if (!op->e.sym) {
354 if (op->e.v == (uint8_t)op->e.v)
355 op->type |= OP_IM8;
356 if (op->e.v == (int8_t)op->e.v)
357 op->type |= OP_IM8S;
358 if (op->e.v == (uint16_t)op->e.v)
359 op->type |= OP_IM16;
360 #ifdef TCC_TARGET_X86_64
361 if (op->e.v == (uint32_t)op->e.v)
362 op->type |= OP_IM32;
363 #endif
365 } else {
366 /* address(reg,reg2,shift) with all variants */
367 op->type = OP_EA;
368 op->reg = -1;
369 op->reg2 = -1;
370 op->shift = 0;
371 if (tok != '(') {
372 asm_expr(s1, &e);
373 op->e.v = e.v;
374 op->e.sym = e.sym;
375 } else {
376 next();
377 if (tok == '%') {
378 unget_tok('(');
379 op->e.v = 0;
380 op->e.sym = NULL;
381 } else {
382 /* bracketed offset expression */
383 asm_expr(s1, &e);
384 if (tok != ')')
385 expect(")");
386 next();
387 op->e.v = e.v;
388 op->e.sym = e.sym;
391 if (tok == '(') {
392 int type = 0;
393 next();
394 if (tok != ',') {
395 op->reg = asm_parse_reg(&type);
397 if (tok == ',') {
398 next();
399 if (tok != ',') {
400 op->reg2 = asm_parse_reg(&type);
402 if (tok == ',') {
403 next();
404 op->shift = get_reg_shift(s1);
407 if (type & OP_EA32)
408 op->type |= OP_EA32;
409 skip(')');
411 if (op->reg == -1 && op->reg2 == -1)
412 op->type |= OP_ADDR;
414 op->type |= indir;
417 /* XXX: unify with C code output ? */
418 ST_FUNC void gen_expr32(ExprValue *pe)
420 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
423 #ifdef TCC_TARGET_X86_64
424 static void gen_expr64(ExprValue *pe)
426 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
428 #endif
430 /* XXX: unify with C code output ? */
431 static void gen_disp32(ExprValue *pe)
433 Sym *sym = pe->sym;
434 if (sym && sym->r == cur_text_section->sh_num) {
435 /* same section: we can output an absolute value. Note
436 that the TCC compiler behaves differently here because
437 it always outputs a relocation to ease (future) code
438 elimination in the linker */
439 gen_le32(pe->v + sym->jnext - ind - 4);
440 } else {
441 if (sym && sym->type.t == VT_VOID) {
442 sym->type.t = VT_FUNC;
443 sym->type.ref = NULL;
445 gen_addrpc32(VT_SYM, sym, pe->v);
449 /* generate the modrm operand */
450 static inline void asm_modrm(int reg, Operand *op)
452 int mod, reg1, reg2, sib_reg1;
454 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
455 g(0xc0 + (reg << 3) + op->reg);
456 } else if (op->reg == -1 && op->reg2 == -1) {
457 /* displacement only */
458 #ifdef TCC_TARGET_X86_64
459 g(0x04 + (reg << 3));
460 g(0x25);
461 #else
462 g(0x05 + (reg << 3));
463 #endif
464 gen_expr32(&op->e);
465 } else {
466 sib_reg1 = op->reg;
467 /* fist compute displacement encoding */
468 if (sib_reg1 == -1) {
469 sib_reg1 = 5;
470 mod = 0x00;
471 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
472 mod = 0x00;
473 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
474 mod = 0x40;
475 } else {
476 mod = 0x80;
478 /* compute if sib byte needed */
479 reg1 = op->reg;
480 if (op->reg2 != -1)
481 reg1 = 4;
482 g(mod + (reg << 3) + reg1);
483 if (reg1 == 4) {
484 /* add sib byte */
485 reg2 = op->reg2;
486 if (reg2 == -1)
487 reg2 = 4; /* indicate no index */
488 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
490 /* add offset */
491 if (mod == 0x40) {
492 g(op->e.v);
493 } else if (mod == 0x80 || op->reg == -1) {
494 gen_expr32(&op->e);
499 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
501 const ASMInstr *pa;
502 int i, modrm_index, reg, v, op1, seg_prefix;
503 int nb_ops, s;
504 Operand ops[MAX_OPERANDS], *pop;
505 int op_type[3]; /* decoded op type */
506 int alltypes; /* OR of all operand types */
507 int autosize;
509 /* force synthetic ';' after prefix instruction, so we can handle */
510 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
511 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
512 unget_tok(';');
514 /* get operands */
515 pop = ops;
516 nb_ops = 0;
517 seg_prefix = 0;
518 alltypes = 0;
519 for(;;) {
520 if (tok == ';' || tok == TOK_LINEFEED)
521 break;
522 if (nb_ops >= MAX_OPERANDS) {
523 tcc_error("incorrect number of operands");
525 parse_operand(s1, pop);
526 if (tok == ':') {
527 if (pop->type != OP_SEG || seg_prefix)
528 tcc_error("incorrect prefix");
529 seg_prefix = segment_prefixes[pop->reg];
530 next();
531 parse_operand(s1, pop);
532 if (!(pop->type & OP_EA)) {
533 tcc_error("segment prefix must be followed by memory reference");
536 pop++;
537 nb_ops++;
538 if (tok != ',')
539 break;
540 next();
543 s = 0; /* avoid warning */
545 /* optimize matching by using a lookup table (no hashing is needed
546 !) */
547 for(pa = asm_instrs; pa->sym != 0; pa++) {
548 s = 0;
549 if (pa->instr_type & OPC_FARITH) {
550 v = opcode - pa->sym;
551 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
552 continue;
553 } else if (pa->instr_type & OPC_ARITH) {
554 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
555 continue;
556 s = (opcode - pa->sym) % NBWLX;
557 } else if (pa->instr_type & OPC_SHIFT) {
558 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
559 continue;
560 s = (opcode - pa->sym) % NBWLX;
561 } else if (pa->instr_type & OPC_TEST) {
562 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
563 continue;
564 /* cmovxx is a test opcode but accepts multiple sizes.
565 TCC doesn't accept the suffixed mnemonic, instead we
566 simply force size autodetection always. */
567 if (pa->instr_type & OPC_WLX)
568 s = NBWLX - 1;
569 } else if (pa->instr_type & OPC_B) {
570 #ifdef TCC_TARGET_X86_64
571 /* Some instructions don't have the full size but only
572 bwl form. insb e.g. */
573 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
574 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
575 continue;
576 #endif
577 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
578 continue;
579 s = opcode - pa->sym;
580 } else if (pa->instr_type & OPC_WLX) {
581 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
582 continue;
583 s = opcode - pa->sym + 1;
584 } else {
585 if (pa->sym != opcode)
586 continue;
588 if (pa->nb_ops != nb_ops)
589 continue;
590 /* now decode and check each operand */
591 alltypes = 0;
592 for(i = 0; i < nb_ops; i++) {
593 int op1, op2;
594 op1 = pa->op_type[i];
595 op2 = op1 & 0x1f;
596 switch(op2) {
597 case OPT_IM:
598 v = OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64;
599 break;
600 case OPT_REG:
601 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
602 break;
603 case OPT_REGW:
604 v = OP_REG16 | OP_REG32 | OP_REG64;
605 break;
606 case OPT_IMW:
607 v = OP_IM16 | OP_IM32 | OP_IM64;
608 break;
609 #ifdef TCC_TARGET_X86_64
610 case OPT_IMNO64:
611 v = OP_IM16 | OP_IM32;
612 break;
613 #endif
614 default:
615 v = 1 << op2;
616 break;
618 if (op1 & OPT_EA)
619 v |= OP_EA;
620 op_type[i] = v;
621 if ((ops[i].type & v) == 0)
622 goto next;
623 alltypes |= ops[i].type;
625 /* all is matching ! */
626 break;
627 next: ;
629 if (pa->sym == 0) {
630 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
631 int b;
632 b = op0_codes[opcode - TOK_ASM_first];
633 if (b & 0xff00)
634 g(b >> 8);
635 g(b);
636 return;
637 } else if (opcode <= TOK_ASM_alllast) {
638 tcc_error("bad operand with opcode '%s'",
639 get_tok_str(opcode, NULL));
640 } else {
641 tcc_error("unknown opcode '%s'",
642 get_tok_str(opcode, NULL));
645 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
646 autosize = NBWLX-1;
647 #ifdef TCC_TARGET_X86_64
648 /* XXX the autosize should rather be zero, to not have to adjust this
649 all the time. */
650 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ)
651 autosize = NBWLX-2;
652 #endif
653 if (s == autosize) {
654 for(i = 0; s == autosize && i < nb_ops; i++) {
655 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
656 s = reg_to_size[ops[i].type & OP_REG];
658 if (s == autosize) {
659 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
660 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32 | OP_IM64)))
661 s = 2;
662 else
663 tcc_error("cannot infer opcode suffix");
667 #ifdef TCC_TARGET_X86_64
668 /* Generate addr32 prefix if needed */
669 for(i = 0; i < nb_ops; i++) {
670 if (ops[i].type & OP_EA32) {
671 g(0x67);
672 break;
675 #endif
676 /* generate data16 prefix if needed */
677 if (s == 1 || (pa->instr_type & OPC_D16))
678 g(0x66);
679 #ifdef TCC_TARGET_X86_64
680 if (s == 3 || (alltypes & OP_REG64)) {
681 /* generate REX prefix */
682 int default64 = 0;
683 for(i = 0; i < nb_ops; i++) {
684 if (op_type[i] == OP_REG64) {
685 /* If only 64bit regs are accepted in one operand
686 this is a default64 instruction without need for
687 REX prefixes. */
688 default64 = 1;
689 break;
692 /* XXX find better encoding for the default64 instructions. */
693 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
694 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
695 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
696 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
697 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
698 && !default64)
699 g(0x48);
701 #endif
703 /* now generates the operation */
704 if (pa->instr_type & OPC_FWAIT)
705 g(0x9b);
706 if (seg_prefix)
707 g(seg_prefix);
709 v = pa->opcode;
710 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
711 /* kludge for imul $im, %reg */
712 nb_ops = 3;
713 ops[2] = ops[1];
714 op_type[2] = op_type[1];
715 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
716 v--; /* int $3 case */
717 nb_ops = 0;
718 } else if ((v == 0x06 || v == 0x07)) {
719 if (ops[0].reg >= 4) {
720 /* push/pop %fs or %gs */
721 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
722 } else {
723 v += ops[0].reg << 3;
725 nb_ops = 0;
726 } else if (v <= 0x05) {
727 /* arith case */
728 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
729 } else if ((pa->instr_type & (OPC_FARITH | OPC_MODRM)) == OPC_FARITH) {
730 /* fpu arith case */
731 v += ((opcode - pa->sym) / 6) << 3;
733 if (pa->instr_type & OPC_REG) {
734 for(i = 0; i < nb_ops; i++) {
735 if (op_type[i] & (OP_REG | OP_ST)) {
736 v += ops[i].reg;
737 break;
740 /* mov $im, %reg case */
741 if (pa->opcode == 0xb0 && s >= 1)
742 v += 7;
744 if (pa->instr_type & OPC_B)
745 v += s >= 1;
746 if (pa->instr_type & OPC_TEST)
747 v += test_bits[opcode - pa->sym];
748 if (pa->instr_type & OPC_SHORTJMP) {
749 Sym *sym;
750 int jmp_disp;
752 /* see if we can really generate the jump with a byte offset */
753 sym = ops[0].e.sym;
754 if (!sym)
755 goto no_short_jump;
756 if (sym->r != cur_text_section->sh_num)
757 goto no_short_jump;
758 jmp_disp = ops[0].e.v + sym->jnext - ind - 2 - (v >= 0xff);
759 if (jmp_disp == (int8_t)jmp_disp) {
760 /* OK to generate jump */
761 ops[0].e.sym = 0;
762 ops[0].e.v = jmp_disp;
763 op_type[0] = OP_IM8S;
764 } else {
765 no_short_jump:
766 if (pa->instr_type & OPC_JMP) {
767 /* long jump will be allowed. need to modify the
768 opcode slightly */
769 if (v == 0xeb)
770 v = 0xe9;
771 else
772 v += 0x0f10;
773 } else {
774 tcc_error("invalid displacement");
778 op1 = v >> 8;
779 if (op1)
780 g(op1);
781 g(v);
783 /* search which operand will used for modrm */
784 modrm_index = 0;
785 if (pa->instr_type & OPC_SHIFT) {
786 reg = (opcode - pa->sym) / NBWLX;
787 if (reg == 6)
788 reg = 7;
789 } else if (pa->instr_type & OPC_ARITH) {
790 reg = (opcode - pa->sym) / NBWLX;
791 } else if (pa->instr_type & OPC_FARITH) {
792 reg = (opcode - pa->sym) / 6;
793 } else {
794 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
796 if (pa->instr_type & OPC_MODRM) {
797 /* first look for an ea operand */
798 for(i = 0;i < nb_ops; i++) {
799 if (op_type[i] & OP_EA)
800 goto modrm_found;
802 /* then if not found, a register or indirection (shift instructions) */
803 for(i = 0;i < nb_ops; i++) {
804 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
805 goto modrm_found;
807 #ifdef ASM_DEBUG
808 tcc_error("bad op table");
809 #endif
810 modrm_found:
811 modrm_index = i;
812 /* if a register is used in another operand then it is
813 used instead of group */
814 for(i = 0;i < nb_ops; i++) {
815 v = op_type[i];
816 if (i != modrm_index &&
817 (v & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
818 reg = ops[i].reg;
819 break;
823 asm_modrm(reg, &ops[modrm_index]);
826 /* emit constants */
827 #ifndef TCC_TARGET_X86_64
828 if (pa->opcode == 0x9a || pa->opcode == 0xea) {
829 /* ljmp or lcall kludge */
830 gen_expr32(&ops[1].e);
831 if (ops[0].e.sym)
832 tcc_error("cannot relocate");
833 gen_le16(ops[0].e.v);
834 return;
836 #endif
837 for(i = 0;i < nb_ops; i++) {
838 v = op_type[i];
839 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
840 /* if multiple sizes are given it means we must look
841 at the op size */
842 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
843 if (s == 0)
844 v = OP_IM8;
845 else if (s == 1)
846 v = OP_IM16;
847 else if (s == 2 || (v & OP_IM64) == 0)
848 v = OP_IM32;
849 else
850 v = OP_IM64;
852 if (v & (OP_IM8 | OP_IM8S)) {
853 if (ops[i].e.sym)
854 goto error_relocate;
855 g(ops[i].e.v);
856 } else if (v & OP_IM16) {
857 if (ops[i].e.sym)
858 error_relocate:
859 tcc_error("cannot relocate");
860 else
861 gen_le16(ops[i].e.v);
862 } else {
863 if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) {
864 gen_disp32(&ops[i].e);
865 } else {
866 #ifdef TCC_TARGET_X86_64
867 if (v & OP_IM64)
868 gen_expr64(&ops[i].e);
869 else
870 #endif
871 gen_expr32(&ops[i].e);
878 /* return the constraint priority (we allocate first the lowest
879 numbered constraints) */
880 static inline int constraint_priority(const char *str)
882 int priority, c, pr;
884 /* we take the lowest priority */
885 priority = 0;
886 for(;;) {
887 c = *str;
888 if (c == '\0')
889 break;
890 str++;
891 switch(c) {
892 case 'A':
893 pr = 0;
894 break;
895 case 'a':
896 case 'b':
897 case 'c':
898 case 'd':
899 case 'S':
900 case 'D':
901 pr = 1;
902 break;
903 case 'q':
904 pr = 2;
905 break;
906 case 'r':
907 pr = 3;
908 break;
909 case 'N':
910 case 'M':
911 case 'I':
912 case 'i':
913 case 'm':
914 case 'g':
915 pr = 4;
916 break;
917 default:
918 tcc_error("unknown constraint '%c'", c);
919 pr = 0;
921 if (pr > priority)
922 priority = pr;
924 return priority;
927 static const char *skip_constraint_modifiers(const char *p)
929 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
930 p++;
931 return p;
934 #define REG_OUT_MASK 0x01
935 #define REG_IN_MASK 0x02
937 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
939 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
940 int nb_operands, int nb_outputs,
941 const uint8_t *clobber_regs,
942 int *pout_reg)
944 ASMOperand *op;
945 int sorted_op[MAX_ASM_OPERANDS];
946 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
947 const char *str;
948 uint8_t regs_allocated[NB_ASM_REGS];
950 /* init fields */
951 for(i=0;i<nb_operands;i++) {
952 op = &operands[i];
953 op->input_index = -1;
954 op->ref_index = -1;
955 op->reg = -1;
956 op->is_memory = 0;
957 op->is_rw = 0;
959 /* compute constraint priority and evaluate references to output
960 constraints if input constraints */
961 for(i=0;i<nb_operands;i++) {
962 op = &operands[i];
963 str = op->constraint;
964 str = skip_constraint_modifiers(str);
965 if (isnum(*str) || *str == '[') {
966 /* this is a reference to another constraint */
967 k = find_constraint(operands, nb_operands, str, NULL);
968 if ((unsigned)k >= i || i < nb_outputs)
969 tcc_error("invalid reference in constraint %d ('%s')",
970 i, str);
971 op->ref_index = k;
972 if (operands[k].input_index >= 0)
973 tcc_error("cannot reference twice the same operand");
974 operands[k].input_index = i;
975 op->priority = 5;
976 } else {
977 op->priority = constraint_priority(str);
981 /* sort operands according to their priority */
982 for(i=0;i<nb_operands;i++)
983 sorted_op[i] = i;
984 for(i=0;i<nb_operands - 1;i++) {
985 for(j=i+1;j<nb_operands;j++) {
986 p1 = operands[sorted_op[i]].priority;
987 p2 = operands[sorted_op[j]].priority;
988 if (p2 < p1) {
989 tmp = sorted_op[i];
990 sorted_op[i] = sorted_op[j];
991 sorted_op[j] = tmp;
996 for(i = 0;i < NB_ASM_REGS; i++) {
997 if (clobber_regs[i])
998 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
999 else
1000 regs_allocated[i] = 0;
1002 /* esp cannot be used */
1003 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1004 /* ebp cannot be used yet */
1005 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1007 /* allocate registers and generate corresponding asm moves */
1008 for(i=0;i<nb_operands;i++) {
1009 j = sorted_op[i];
1010 op = &operands[j];
1011 str = op->constraint;
1012 /* no need to allocate references */
1013 if (op->ref_index >= 0)
1014 continue;
1015 /* select if register is used for output, input or both */
1016 if (op->input_index >= 0) {
1017 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1018 } else if (j < nb_outputs) {
1019 reg_mask = REG_OUT_MASK;
1020 } else {
1021 reg_mask = REG_IN_MASK;
1023 try_next:
1024 c = *str++;
1025 switch(c) {
1026 case '=':
1027 goto try_next;
1028 case '+':
1029 op->is_rw = 1;
1030 /* FALL THRU */
1031 case '&':
1032 if (j >= nb_outputs)
1033 tcc_error("'%c' modifier can only be applied to outputs", c);
1034 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1035 goto try_next;
1036 case 'A':
1037 /* allocate both eax and edx */
1038 if (is_reg_allocated(TREG_XAX) ||
1039 is_reg_allocated(TREG_XDX))
1040 goto try_next;
1041 op->is_llong = 1;
1042 op->reg = TREG_XAX;
1043 regs_allocated[TREG_XAX] |= reg_mask;
1044 regs_allocated[TREG_XDX] |= reg_mask;
1045 break;
1046 case 'a':
1047 reg = TREG_XAX;
1048 goto alloc_reg;
1049 case 'b':
1050 reg = 3;
1051 goto alloc_reg;
1052 case 'c':
1053 reg = TREG_XCX;
1054 goto alloc_reg;
1055 case 'd':
1056 reg = TREG_XDX;
1057 goto alloc_reg;
1058 case 'S':
1059 reg = 6;
1060 goto alloc_reg;
1061 case 'D':
1062 reg = 7;
1063 alloc_reg:
1064 if (is_reg_allocated(reg))
1065 goto try_next;
1066 goto reg_found;
1067 case 'q':
1068 /* eax, ebx, ecx or edx */
1069 for(reg = 0; reg < 4; reg++) {
1070 if (!is_reg_allocated(reg))
1071 goto reg_found;
1073 goto try_next;
1074 case 'r':
1075 /* any general register */
1076 for(reg = 0; reg < 8; reg++) {
1077 if (!is_reg_allocated(reg))
1078 goto reg_found;
1080 goto try_next;
1081 reg_found:
1082 /* now we can reload in the register */
1083 op->is_llong = 0;
1084 op->reg = reg;
1085 regs_allocated[reg] |= reg_mask;
1086 break;
1087 case 'i':
1088 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1089 goto try_next;
1090 break;
1091 case 'I':
1092 case 'N':
1093 case 'M':
1094 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1095 goto try_next;
1096 break;
1097 case 'm':
1098 case 'g':
1099 /* nothing special to do because the operand is already in
1100 memory, except if the pointer itself is stored in a
1101 memory variable (VT_LLOCAL case) */
1102 /* XXX: fix constant case */
1103 /* if it is a reference to a memory zone, it must lie
1104 in a register, so we reserve the register in the
1105 input registers and a load will be generated
1106 later */
1107 if (j < nb_outputs || c == 'm') {
1108 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1109 /* any general register */
1110 for(reg = 0; reg < 8; reg++) {
1111 if (!(regs_allocated[reg] & REG_IN_MASK))
1112 goto reg_found1;
1114 goto try_next;
1115 reg_found1:
1116 /* now we can reload in the register */
1117 regs_allocated[reg] |= REG_IN_MASK;
1118 op->reg = reg;
1119 op->is_memory = 1;
1122 break;
1123 default:
1124 tcc_error("asm constraint %d ('%s') could not be satisfied",
1125 j, op->constraint);
1126 break;
1128 /* if a reference is present for that operand, we assign it too */
1129 if (op->input_index >= 0) {
1130 operands[op->input_index].reg = op->reg;
1131 operands[op->input_index].is_llong = op->is_llong;
1135 /* compute out_reg. It is used to store outputs registers to memory
1136 locations references by pointers (VT_LLOCAL case) */
1137 *pout_reg = -1;
1138 for(i=0;i<nb_operands;i++) {
1139 op = &operands[i];
1140 if (op->reg >= 0 &&
1141 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1142 !op->is_memory) {
1143 for(reg = 0; reg < 8; reg++) {
1144 if (!(regs_allocated[reg] & REG_OUT_MASK))
1145 goto reg_found2;
1147 tcc_error("could not find free output register for reloading");
1148 reg_found2:
1149 *pout_reg = reg;
1150 break;
1154 /* print sorted constraints */
1155 #ifdef ASM_DEBUG
1156 for(i=0;i<nb_operands;i++) {
1157 j = sorted_op[i];
1158 op = &operands[j];
1159 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1161 op->id ? get_tok_str(op->id, NULL) : "",
1162 op->constraint,
1163 op->vt->r,
1164 op->reg);
1166 if (*pout_reg >= 0)
1167 printf("out_reg=%d\n", *pout_reg);
1168 #endif
1171 ST_FUNC void subst_asm_operand(CString *add_str,
1172 SValue *sv, int modifier)
1174 int r, reg, size, val;
1175 char buf[64];
1177 r = sv->r;
1178 if ((r & VT_VALMASK) == VT_CONST) {
1179 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n')
1180 cstr_ccat(add_str, '$');
1181 if (r & VT_SYM) {
1182 cstr_cat(add_str, get_tok_str(sv->sym->v, NULL), -1);
1183 if ((uint32_t)sv->c.i != 0) {
1184 cstr_ccat(add_str, '+');
1185 } else {
1186 return;
1189 val = sv->c.i;
1190 if (modifier == 'n')
1191 val = -val;
1192 snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
1193 cstr_cat(add_str, buf, -1);
1194 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1195 #ifdef TCC_TARGET_X86_64
1196 snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i);
1197 #else
1198 snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
1199 #endif
1200 cstr_cat(add_str, buf, -1);
1201 } else if (r & VT_LVAL) {
1202 reg = r & VT_VALMASK;
1203 if (reg >= VT_CONST)
1204 tcc_error("internal compiler error");
1205 snprintf(buf, sizeof(buf), "(%%%s)",
1206 get_tok_str(TOK_ASM_eax + reg, NULL));
1207 cstr_cat(add_str, buf, -1);
1208 } else {
1209 /* register case */
1210 reg = r & VT_VALMASK;
1211 if (reg >= VT_CONST)
1212 tcc_error("internal compiler error");
1214 /* choose register operand size */
1215 if ((sv->type.t & VT_BTYPE) == VT_BYTE)
1216 size = 1;
1217 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1218 size = 2;
1219 #ifdef TCC_TARGET_X86_64
1220 else if ((sv->type.t & VT_BTYPE) == VT_LLONG)
1221 size = 8;
1222 #endif
1223 else
1224 size = 4;
1225 if (size == 1 && reg >= 4)
1226 size = 4;
1228 if (modifier == 'b') {
1229 if (reg >= 4)
1230 tcc_error("cannot use byte register");
1231 size = 1;
1232 } else if (modifier == 'h') {
1233 if (reg >= 4)
1234 tcc_error("cannot use byte register");
1235 size = -1;
1236 } else if (modifier == 'w') {
1237 size = 2;
1238 #ifdef TCC_TARGET_X86_64
1239 } else if (modifier == 'q') {
1240 size = 8;
1241 #endif
1244 switch(size) {
1245 case -1:
1246 reg = TOK_ASM_ah + reg;
1247 break;
1248 case 1:
1249 reg = TOK_ASM_al + reg;
1250 break;
1251 case 2:
1252 reg = TOK_ASM_ax + reg;
1253 break;
1254 default:
1255 reg = TOK_ASM_eax + reg;
1256 break;
1257 #ifdef TCC_TARGET_X86_64
1258 case 8:
1259 reg = TOK_ASM_rax + reg;
1260 break;
1261 #endif
1263 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1264 cstr_cat(add_str, buf, -1);
1268 /* generate prolog and epilog code for asm statement */
1269 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1270 int nb_outputs, int is_output,
1271 uint8_t *clobber_regs,
1272 int out_reg)
1274 uint8_t regs_allocated[NB_ASM_REGS];
1275 ASMOperand *op;
1276 int i, reg;
1277 static uint8_t reg_saved[NB_SAVED_REGS] = { 3, 6, 7 };
1279 /* mark all used registers */
1280 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1281 for(i = 0; i < nb_operands;i++) {
1282 op = &operands[i];
1283 if (op->reg >= 0)
1284 regs_allocated[op->reg] = 1;
1286 if (!is_output) {
1287 /* generate reg save code */
1288 for(i = 0; i < NB_SAVED_REGS; i++) {
1289 reg = reg_saved[i];
1290 if (regs_allocated[reg]) {
1291 g(0x50 + reg);
1295 /* generate load code */
1296 for(i = 0; i < nb_operands; i++) {
1297 op = &operands[i];
1298 if (op->reg >= 0) {
1299 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1300 op->is_memory) {
1301 /* memory reference case (for both input and
1302 output cases) */
1303 SValue sv;
1304 sv = *op->vt;
1305 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1306 load(op->reg, &sv);
1307 } else if (i >= nb_outputs || op->is_rw) {
1308 /* load value in register */
1309 load(op->reg, op->vt);
1310 if (op->is_llong) {
1311 SValue sv;
1312 sv = *op->vt;
1313 sv.c.i += 4;
1314 load(TREG_XDX, &sv);
1319 } else {
1320 /* generate save code */
1321 for(i = 0 ; i < nb_outputs; i++) {
1322 op = &operands[i];
1323 if (op->reg >= 0) {
1324 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1325 if (!op->is_memory) {
1326 SValue sv;
1327 sv = *op->vt;
1328 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1329 load(out_reg, &sv);
1331 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1332 store(op->reg, &sv);
1334 } else {
1335 store(op->reg, op->vt);
1336 if (op->is_llong) {
1337 SValue sv;
1338 sv = *op->vt;
1339 sv.c.i += 4;
1340 store(TREG_XDX, &sv);
1345 /* generate reg restore code */
1346 for(i = NB_SAVED_REGS - 1; i >= 0; i--) {
1347 reg = reg_saved[i];
1348 if (regs_allocated[reg]) {
1349 g(0x58 + reg);
1355 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1357 int reg;
1358 TokenSym *ts;
1360 if (!strcmp(str, "memory") ||
1361 !strcmp(str, "cc"))
1362 return;
1363 ts = tok_alloc(str, strlen(str));
1364 reg = ts->tok;
1365 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1366 reg -= TOK_ASM_eax;
1367 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1368 reg -= TOK_ASM_ax;
1369 #ifdef TCC_TARGET_X86_64
1370 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1371 reg -= TOK_ASM_rax;
1372 #endif
1373 } else {
1374 tcc_error("invalid clobber register '%s'", str);
1376 clobber_regs[reg] = 1;