win64: fix va_arg
[tinycc.git] / i386-asm.c
blobb25bd4281c70d67d4196013fbee1125137ade140
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "tcc.h"
24 /* #define NB_ASM_REGS 8 */
25 #define MAX_OPERANDS 3
26 #define NB_SAVED_REGS 3
28 #define TOK_ASM_first TOK_ASM_clc
29 #define TOK_ASM_last TOK_ASM_emms
30 #define TOK_ASM_alllast TOK_ASM_pxor
32 #define OPC_JMP 0x01 /* jmp operand */
33 #define OPC_B 0x02 /* only used with OPC_WL */
34 #define OPC_WL 0x04 /* accepts w, l or no suffix */
35 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
36 #define OPC_REG 0x08 /* register is added to opcode */
37 #define OPC_MODRM 0x10 /* modrm encoding */
38 #define OPC_FWAIT 0x20 /* add fwait opcode */
39 #define OPC_TEST 0x40 /* test opcodes */
40 #define OPC_SHIFT 0x80 /* shift opcodes */
41 #define OPC_D16 0x0100 /* generate data16 prefix */
42 #define OPC_ARITH 0x0200 /* arithmetic opcodes */
43 #define OPC_SHORTJMP 0x0400 /* short jmp operand */
44 #define OPC_FARITH 0x0800 /* FPU arithmetic opcodes */
45 #ifdef TCC_TARGET_X86_64
46 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
47 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
48 # define OPC_WLX OPC_WLQ
49 # define OPC_BWLX OPC_BWLQ
50 #else
51 # define OPC_WLX OPC_WL
52 # define OPC_BWLX OPC_BWL
53 #endif
55 #define OPC_GROUP_SHIFT 13
57 /* in order to compress the operand type, we use specific operands and
58 we or only with EA */
59 enum {
60 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
61 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
62 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
63 #ifdef TCC_TARGET_X86_64
64 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
65 #endif
66 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
67 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
68 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
69 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
70 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
71 OPT_SEG,
72 OPT_ST,
73 OPT_IM8,
74 OPT_IM8S,
75 OPT_IM16,
76 OPT_IM32,
77 #ifdef TCC_TARGET_X86_64
78 OPT_IM64,
79 #endif
80 OPT_EAX, /* %al, %ax, %eax or %rax register */
81 OPT_ST0, /* %st(0) register */
82 OPT_CL, /* %cl register */
83 OPT_DX, /* %dx register */
84 OPT_ADDR, /* OP_EA with only offset */
85 OPT_INDIR, /* *(expr) */
86 /* composite types */
87 OPT_COMPOSITE_FIRST,
88 OPT_IM, /* IM8 | IM16 | IM32 */
89 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
90 OPT_REGW, /* REG16 | REG32 | REG64 */
91 OPT_IMW, /* IM16 | IM32 */
92 /* can be ored with any OPT_xxx */
93 OPT_EA = 0x80
96 #define OP_REG8 (1 << OPT_REG8)
97 #define OP_REG16 (1 << OPT_REG16)
98 #define OP_REG32 (1 << OPT_REG32)
99 #define OP_MMX (1 << OPT_MMX)
100 #define OP_SSE (1 << OPT_SSE)
101 #define OP_CR (1 << OPT_CR)
102 #define OP_TR (1 << OPT_TR)
103 #define OP_DB (1 << OPT_DB)
104 #define OP_SEG (1 << OPT_SEG)
105 #define OP_ST (1 << OPT_ST)
106 #define OP_IM8 (1 << OPT_IM8)
107 #define OP_IM8S (1 << OPT_IM8S)
108 #define OP_IM16 (1 << OPT_IM16)
109 #define OP_IM32 (1 << OPT_IM32)
110 #define OP_EAX (1 << OPT_EAX)
111 #define OP_ST0 (1 << OPT_ST0)
112 #define OP_CL (1 << OPT_CL)
113 #define OP_DX (1 << OPT_DX)
114 #define OP_ADDR (1 << OPT_ADDR)
115 #define OP_INDIR (1 << OPT_INDIR)
116 #ifdef TCC_TARGET_X86_64
117 # define OP_REG64 (1 << OPT_REG64)
118 # define OP_IM64 (1 << OPT_IM64)
119 # define OP_EA32 (OP_EA << 1)
120 #else
121 # define OP_REG64 0
122 # define OP_IM64 0
123 # define OP_EA32 0
124 #endif
126 #define OP_EA 0x40000000
127 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
129 #ifdef TCC_TARGET_X86_64
130 # define TREG_XAX TREG_RAX
131 # define TREG_XCX TREG_RCX
132 # define TREG_XDX TREG_RDX
133 #else
134 # define TREG_XAX TREG_EAX
135 # define TREG_XCX TREG_ECX
136 # define TREG_XDX TREG_EDX
137 #endif
139 typedef struct ASMInstr {
140 uint16_t sym;
141 uint16_t opcode;
142 uint16_t instr_type;
143 uint8_t nb_ops;
144 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
145 } ASMInstr;
147 typedef struct Operand {
148 uint32_t type;
149 int8_t reg; /* register, -1 if none */
150 int8_t reg2; /* second register, -1 if none */
151 uint8_t shift;
152 ExprValue e;
153 } Operand;
155 static const uint8_t reg_to_size[9] = {
157 [OP_REG8] = 0,
158 [OP_REG16] = 1,
159 [OP_REG32] = 2,
160 #ifdef TCC_TARGET_X86_64
161 [OP_REG64] = 3,
162 #endif
164 0, 0, 1, 0, 2, 0, 0, 0, 3
167 #define NB_TEST_OPCODES 30
169 static const uint8_t test_bits[NB_TEST_OPCODES] = {
170 0x00, /* o */
171 0x01, /* no */
172 0x02, /* b */
173 0x02, /* c */
174 0x02, /* nae */
175 0x03, /* nb */
176 0x03, /* nc */
177 0x03, /* ae */
178 0x04, /* e */
179 0x04, /* z */
180 0x05, /* ne */
181 0x05, /* nz */
182 0x06, /* be */
183 0x06, /* na */
184 0x07, /* nbe */
185 0x07, /* a */
186 0x08, /* s */
187 0x09, /* ns */
188 0x0a, /* p */
189 0x0a, /* pe */
190 0x0b, /* np */
191 0x0b, /* po */
192 0x0c, /* l */
193 0x0c, /* nge */
194 0x0d, /* nl */
195 0x0d, /* ge */
196 0x0e, /* le */
197 0x0e, /* ng */
198 0x0f, /* nle */
199 0x0f, /* g */
202 static const uint8_t segment_prefixes[] = {
203 0x26, /* es */
204 0x2e, /* cs */
205 0x36, /* ss */
206 0x3e, /* ds */
207 0x64, /* fs */
208 0x65 /* gs */
211 static const ASMInstr asm_instrs[] = {
212 #define ALT(x) x
213 #define DEF_ASM_OP0(name, opcode)
214 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 0 },
215 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 1, { op0 }},
216 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 2, { op0, op1 }},
217 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 3, { op0, op1, op2 }},
218 #ifdef TCC_TARGET_X86_64
219 # include "x86_64-asm.h"
220 #else
221 # include "i386-asm.h"
222 #endif
223 /* last operation */
224 { 0, },
227 static const uint16_t op0_codes[] = {
228 #define ALT(x)
229 #define DEF_ASM_OP0(x, opcode) opcode,
230 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
231 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
232 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
233 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
234 #ifdef TCC_TARGET_X86_64
235 # include "x86_64-asm.h"
236 #else
237 # include "i386-asm.h"
238 #endif
241 static inline int get_reg_shift(TCCState *s1)
243 int shift, v;
244 v = asm_int_expr(s1);
245 switch(v) {
246 case 1:
247 shift = 0;
248 break;
249 case 2:
250 shift = 1;
251 break;
252 case 4:
253 shift = 2;
254 break;
255 case 8:
256 shift = 3;
257 break;
258 default:
259 expect("1, 2, 4 or 8 constant");
260 shift = 0;
261 break;
263 return shift;
266 static int asm_parse_reg(int *type)
268 int reg = 0;
269 *type = 0;
270 if (tok != '%')
271 goto error_32;
272 next();
273 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
274 reg = tok - TOK_ASM_eax;
275 #ifdef TCC_TARGET_X86_64
276 *type = OP_EA32;
277 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
278 reg = tok - TOK_ASM_rax;
279 #endif
280 } else {
281 error_32:
282 expect("register");
284 next();
285 return reg;
288 static void parse_operand(TCCState *s1, Operand *op)
290 ExprValue e;
291 int reg, indir;
292 const char *p;
294 indir = 0;
295 if (tok == '*') {
296 next();
297 indir = OP_INDIR;
300 if (tok == '%') {
301 next();
302 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
303 reg = tok - TOK_ASM_al;
304 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
305 op->reg = reg & 7;
306 if ((op->type & OP_REG) && op->reg == TREG_XAX)
307 op->type |= OP_EAX;
308 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
309 op->type |= OP_CL;
310 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
311 op->type |= OP_DX;
312 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
313 op->type = OP_DB;
314 op->reg = tok - TOK_ASM_dr0;
315 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
316 op->type = OP_SEG;
317 op->reg = tok - TOK_ASM_es;
318 } else if (tok == TOK_ASM_st) {
319 op->type = OP_ST;
320 op->reg = 0;
321 next();
322 if (tok == '(') {
323 next();
324 if (tok != TOK_PPNUM)
325 goto reg_error;
326 p = tokc.str.data;
327 reg = p[0] - '0';
328 if ((unsigned)reg >= 8 || p[1] != '\0')
329 goto reg_error;
330 op->reg = reg;
331 next();
332 skip(')');
334 if (op->reg == 0)
335 op->type |= OP_ST0;
336 goto no_skip;
337 } else {
338 reg_error:
339 tcc_error("unknown register");
341 next();
342 no_skip: ;
343 } else if (tok == '$') {
344 /* constant value */
345 next();
346 asm_expr(s1, &e);
347 op->type = OP_IM32;
348 op->e.v = e.v;
349 op->e.sym = e.sym;
350 if (!op->e.sym) {
351 if (op->e.v == (uint8_t)op->e.v)
352 op->type |= OP_IM8;
353 if (op->e.v == (int8_t)op->e.v)
354 op->type |= OP_IM8S;
355 if (op->e.v == (uint16_t)op->e.v)
356 op->type |= OP_IM16;
357 #ifdef TCC_TARGET_X86_64
358 if (op->e.v != (int32_t)op->e.v)
359 op->type = OP_IM64;
360 #endif
362 } else {
363 /* address(reg,reg2,shift) with all variants */
364 op->type = OP_EA;
365 op->reg = -1;
366 op->reg2 = -1;
367 op->shift = 0;
368 if (tok != '(') {
369 asm_expr(s1, &e);
370 op->e.v = e.v;
371 op->e.sym = e.sym;
372 } else {
373 next();
374 if (tok == '%') {
375 unget_tok('(');
376 op->e.v = 0;
377 op->e.sym = NULL;
378 } else {
379 /* bracketed offset expression */
380 asm_expr(s1, &e);
381 if (tok != ')')
382 expect(")");
383 next();
384 op->e.v = e.v;
385 op->e.sym = e.sym;
388 if (tok == '(') {
389 int type = 0;
390 next();
391 if (tok != ',') {
392 op->reg = asm_parse_reg(&type);
394 if (tok == ',') {
395 next();
396 if (tok != ',') {
397 op->reg2 = asm_parse_reg(&type);
399 if (tok == ',') {
400 next();
401 op->shift = get_reg_shift(s1);
404 if (type & OP_EA32)
405 op->type |= OP_EA32;
406 skip(')');
408 if (op->reg == -1 && op->reg2 == -1)
409 op->type |= OP_ADDR;
411 op->type |= indir;
414 /* XXX: unify with C code output ? */
415 ST_FUNC void gen_expr32(ExprValue *pe)
417 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
420 #ifdef TCC_TARGET_X86_64
421 static void gen_expr64(ExprValue *pe)
423 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
425 #endif
427 /* XXX: unify with C code output ? */
428 static void gen_disp32(ExprValue *pe)
430 Sym *sym = pe->sym;
431 if (sym && sym->r == cur_text_section->sh_num) {
432 /* same section: we can output an absolute value. Note
433 that the TCC compiler behaves differently here because
434 it always outputs a relocation to ease (future) code
435 elimination in the linker */
436 gen_le32(pe->v + sym->jnext - ind - 4);
437 } else {
438 if (sym && sym->type.t == VT_VOID) {
439 sym->type.t = VT_FUNC;
440 sym->type.ref = NULL;
442 gen_addrpc32(VT_SYM, sym, pe->v);
446 /* generate the modrm operand */
447 static inline void asm_modrm(int reg, Operand *op)
449 int mod, reg1, reg2, sib_reg1;
451 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
452 g(0xc0 + (reg << 3) + op->reg);
453 } else if (op->reg == -1 && op->reg2 == -1) {
454 /* displacement only */
455 #ifdef TCC_TARGET_X86_64
456 g(0x04 + (reg << 3));
457 g(0x25);
458 #else
459 g(0x05 + (reg << 3));
460 #endif
461 gen_expr32(&op->e);
462 } else {
463 sib_reg1 = op->reg;
464 /* fist compute displacement encoding */
465 if (sib_reg1 == -1) {
466 sib_reg1 = 5;
467 mod = 0x00;
468 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
469 mod = 0x00;
470 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
471 mod = 0x40;
472 } else {
473 mod = 0x80;
475 /* compute if sib byte needed */
476 reg1 = op->reg;
477 if (op->reg2 != -1)
478 reg1 = 4;
479 g(mod + (reg << 3) + reg1);
480 if (reg1 == 4) {
481 /* add sib byte */
482 reg2 = op->reg2;
483 if (reg2 == -1)
484 reg2 = 4; /* indicate no index */
485 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
487 /* add offset */
488 if (mod == 0x40) {
489 g(op->e.v);
490 } else if (mod == 0x80 || op->reg == -1) {
491 gen_expr32(&op->e);
496 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
498 const ASMInstr *pa;
499 int i, modrm_index, reg, v, op1, seg_prefix;
500 int nb_ops, s;
501 Operand ops[MAX_OPERANDS], *pop;
502 int op_type[3]; /* decoded op type */
503 int alltypes; /* OR of all operand types */
504 int autosize;
506 /* force synthetic ';' after prefix instruction, so we can handle */
507 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
508 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
509 unget_tok(';');
511 /* get operands */
512 pop = ops;
513 nb_ops = 0;
514 seg_prefix = 0;
515 alltypes = 0;
516 for(;;) {
517 if (tok == ';' || tok == TOK_LINEFEED)
518 break;
519 if (nb_ops >= MAX_OPERANDS) {
520 tcc_error("incorrect number of operands");
522 parse_operand(s1, pop);
523 if (tok == ':') {
524 if (pop->type != OP_SEG || seg_prefix)
525 tcc_error("incorrect prefix");
526 seg_prefix = segment_prefixes[pop->reg];
527 next();
528 parse_operand(s1, pop);
529 if (!(pop->type & OP_EA)) {
530 tcc_error("segment prefix must be followed by memory reference");
533 pop++;
534 nb_ops++;
535 if (tok != ',')
536 break;
537 next();
540 s = 0; /* avoid warning */
542 /* optimize matching by using a lookup table (no hashing is needed
543 !) */
544 for(pa = asm_instrs; pa->sym != 0; pa++) {
545 s = 0;
546 if (pa->instr_type & OPC_FARITH) {
547 v = opcode - pa->sym;
548 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
549 continue;
550 } else if (pa->instr_type & OPC_ARITH) {
551 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
552 continue;
553 s = (opcode - pa->sym) % NBWLX;
554 if ((pa->instr_type & OPC_BWLX) == OPC_WLX)
556 /* We need to reject the xxxb opcodes that we accepted above.
557 Note that pa->sym for WLX opcodes is the 'w' token,
558 to get the 'b' token subtract one. */
559 if (((opcode - pa->sym + 1) % NBWLX) == 0)
560 continue;
561 s++;
563 } else if (pa->instr_type & OPC_SHIFT) {
564 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
565 continue;
566 s = (opcode - pa->sym) % NBWLX;
567 } else if (pa->instr_type & OPC_TEST) {
568 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
569 continue;
570 /* cmovxx is a test opcode but accepts multiple sizes.
571 TCC doesn't accept the suffixed mnemonic, instead we
572 simply force size autodetection always. */
573 if (pa->instr_type & OPC_WLX)
574 s = NBWLX - 1;
575 } else if (pa->instr_type & OPC_B) {
576 #ifdef TCC_TARGET_X86_64
577 /* Some instructions don't have the full size but only
578 bwl form. insb e.g. */
579 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
580 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
581 continue;
582 #endif
583 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
584 continue;
585 s = opcode - pa->sym;
586 } else if (pa->instr_type & OPC_WLX) {
587 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
588 continue;
589 s = opcode - pa->sym + 1;
590 } else {
591 if (pa->sym != opcode)
592 continue;
594 if (pa->nb_ops != nb_ops)
595 continue;
596 #ifdef TCC_TARGET_X86_64
597 /* Special case for moves. Selecting the IM64->REG64 form
598 should only be done if we really have an >32bit imm64, and that
599 is hardcoded. Ignore it here. */
600 if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
601 && ops[1].type == OP_REG64)
602 continue;
603 #endif
604 /* now decode and check each operand */
605 alltypes = 0;
606 for(i = 0; i < nb_ops; i++) {
607 int op1, op2;
608 op1 = pa->op_type[i];
609 op2 = op1 & 0x1f;
610 switch(op2) {
611 case OPT_IM:
612 v = OP_IM8 | OP_IM16 | OP_IM32;
613 break;
614 case OPT_REG:
615 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
616 break;
617 case OPT_REGW:
618 v = OP_REG16 | OP_REG32 | OP_REG64;
619 break;
620 case OPT_IMW:
621 v = OP_IM16 | OP_IM32;
622 break;
623 default:
624 v = 1 << op2;
625 break;
627 if (op1 & OPT_EA)
628 v |= OP_EA;
629 op_type[i] = v;
630 if ((ops[i].type & v) == 0)
631 goto next;
632 alltypes |= ops[i].type;
634 /* all is matching ! */
635 break;
636 next: ;
638 if (pa->sym == 0) {
639 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
640 int b;
641 b = op0_codes[opcode - TOK_ASM_first];
642 if (b & 0xff00)
643 g(b >> 8);
644 g(b);
645 return;
646 } else if (opcode <= TOK_ASM_alllast) {
647 tcc_error("bad operand with opcode '%s'",
648 get_tok_str(opcode, NULL));
649 } else {
650 tcc_error("unknown opcode '%s'",
651 get_tok_str(opcode, NULL));
654 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
655 autosize = NBWLX-1;
656 #ifdef TCC_TARGET_X86_64
657 /* XXX the autosize should rather be zero, to not have to adjust this
658 all the time. */
659 if ((pa->instr_type & OPC_BWLQ) == OPC_B)
660 autosize = NBWLX-2;
661 #endif
662 if (s == autosize) {
663 for(i = 0; s == autosize && i < nb_ops; i++) {
664 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
665 s = reg_to_size[ops[i].type & OP_REG];
667 if (s == autosize) {
668 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
669 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
670 s = 2;
671 else
672 tcc_error("cannot infer opcode suffix");
676 #ifdef TCC_TARGET_X86_64
677 /* Generate addr32 prefix if needed */
678 for(i = 0; i < nb_ops; i++) {
679 if (ops[i].type & OP_EA32) {
680 g(0x67);
681 break;
684 #endif
685 /* generate data16 prefix if needed */
686 if (s == 1 || (pa->instr_type & OPC_D16))
687 g(0x66);
688 #ifdef TCC_TARGET_X86_64
689 if (s == 3 || (alltypes & OP_REG64)) {
690 /* generate REX prefix */
691 int default64 = 0;
692 for(i = 0; i < nb_ops; i++) {
693 if (op_type[i] == OP_REG64) {
694 /* If only 64bit regs are accepted in one operand
695 this is a default64 instruction without need for
696 REX prefixes. */
697 default64 = 1;
698 break;
701 /* XXX find better encoding for the default64 instructions. */
702 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
703 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
704 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
705 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
706 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
707 && !default64)
708 g(0x48);
710 #endif
712 /* now generates the operation */
713 if (pa->instr_type & OPC_FWAIT)
714 g(0x9b);
715 if (seg_prefix)
716 g(seg_prefix);
718 v = pa->opcode;
719 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
720 /* kludge for imul $im, %reg */
721 nb_ops = 3;
722 ops[2] = ops[1];
723 op_type[2] = op_type[1];
724 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
725 v--; /* int $3 case */
726 nb_ops = 0;
727 } else if ((v == 0x06 || v == 0x07)) {
728 if (ops[0].reg >= 4) {
729 /* push/pop %fs or %gs */
730 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
731 } else {
732 v += ops[0].reg << 3;
734 nb_ops = 0;
735 } else if (v <= 0x05) {
736 /* arith case */
737 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
738 } else if ((pa->instr_type & (OPC_FARITH | OPC_MODRM)) == OPC_FARITH) {
739 /* fpu arith case */
740 v += ((opcode - pa->sym) / 6) << 3;
742 if (pa->instr_type & OPC_REG) {
743 for(i = 0; i < nb_ops; i++) {
744 if (op_type[i] & (OP_REG | OP_ST)) {
745 v += ops[i].reg;
746 break;
749 /* mov $im, %reg case */
750 if (pa->opcode == 0xb0 && s >= 1)
751 v += 7;
753 if (pa->instr_type & OPC_B)
754 v += s >= 1;
755 if (pa->instr_type & OPC_TEST)
756 v += test_bits[opcode - pa->sym];
757 if (pa->instr_type & OPC_SHORTJMP) {
758 Sym *sym;
759 int jmp_disp;
761 /* see if we can really generate the jump with a byte offset */
762 sym = ops[0].e.sym;
763 if (!sym)
764 goto no_short_jump;
765 if (sym->r != cur_text_section->sh_num)
766 goto no_short_jump;
767 jmp_disp = ops[0].e.v + sym->jnext - ind - 2 - (v >= 0xff);
768 if (jmp_disp == (int8_t)jmp_disp) {
769 /* OK to generate jump */
770 ops[0].e.sym = 0;
771 ops[0].e.v = jmp_disp;
772 op_type[0] = OP_IM8S;
773 } else {
774 no_short_jump:
775 if (pa->instr_type & OPC_JMP) {
776 /* long jump will be allowed. need to modify the
777 opcode slightly */
778 if (v == 0xeb)
779 v = 0xe9;
780 else
781 v += 0x0f10;
782 } else {
783 tcc_error("invalid displacement");
787 op1 = v >> 8;
788 if (op1)
789 g(op1);
790 g(v);
792 /* search which operand will used for modrm */
793 modrm_index = 0;
794 if (pa->instr_type & OPC_SHIFT) {
795 reg = (opcode - pa->sym) / NBWLX;
796 if (reg == 6)
797 reg = 7;
798 } else if (pa->instr_type & OPC_ARITH) {
799 reg = (opcode - pa->sym) / NBWLX;
800 } else if (pa->instr_type & OPC_FARITH) {
801 reg = (opcode - pa->sym) / 6;
802 } else {
803 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
805 if (pa->instr_type & OPC_MODRM) {
806 /* first look for an ea operand */
807 for(i = 0;i < nb_ops; i++) {
808 if (op_type[i] & OP_EA)
809 goto modrm_found;
811 /* then if not found, a register or indirection (shift instructions) */
812 for(i = 0;i < nb_ops; i++) {
813 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
814 goto modrm_found;
816 #ifdef ASM_DEBUG
817 tcc_error("bad op table");
818 #endif
819 modrm_found:
820 modrm_index = i;
821 /* if a register is used in another operand then it is
822 used instead of group */
823 for(i = 0;i < nb_ops; i++) {
824 v = op_type[i];
825 if (i != modrm_index &&
826 (v & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
827 reg = ops[i].reg;
828 break;
832 asm_modrm(reg, &ops[modrm_index]);
835 /* emit constants */
836 #ifndef TCC_TARGET_X86_64
837 if (pa->opcode == 0x9a || pa->opcode == 0xea) {
838 /* ljmp or lcall kludge */
839 gen_expr32(&ops[1].e);
840 if (ops[0].e.sym)
841 tcc_error("cannot relocate");
842 gen_le16(ops[0].e.v);
843 return;
845 #endif
846 for(i = 0;i < nb_ops; i++) {
847 v = op_type[i];
848 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
849 /* if multiple sizes are given it means we must look
850 at the op size */
851 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
852 if (s == 0)
853 v = OP_IM8;
854 else if (s == 1)
855 v = OP_IM16;
856 else if (s == 2 || (v & OP_IM64) == 0)
857 v = OP_IM32;
858 else
859 v = OP_IM64;
861 if (v & (OP_IM8 | OP_IM8S)) {
862 if (ops[i].e.sym)
863 goto error_relocate;
864 g(ops[i].e.v);
865 } else if (v & OP_IM16) {
866 if (ops[i].e.sym)
867 error_relocate:
868 tcc_error("cannot relocate");
869 else
870 gen_le16(ops[i].e.v);
871 } else {
872 if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) {
873 gen_disp32(&ops[i].e);
874 } else {
875 #ifdef TCC_TARGET_X86_64
876 if (v & OP_IM64)
877 gen_expr64(&ops[i].e);
878 else
879 #endif
880 gen_expr32(&ops[i].e);
887 /* return the constraint priority (we allocate first the lowest
888 numbered constraints) */
889 static inline int constraint_priority(const char *str)
891 int priority, c, pr;
893 /* we take the lowest priority */
894 priority = 0;
895 for(;;) {
896 c = *str;
897 if (c == '\0')
898 break;
899 str++;
900 switch(c) {
901 case 'A':
902 pr = 0;
903 break;
904 case 'a':
905 case 'b':
906 case 'c':
907 case 'd':
908 case 'S':
909 case 'D':
910 pr = 1;
911 break;
912 case 'q':
913 pr = 2;
914 break;
915 case 'r':
916 pr = 3;
917 break;
918 case 'N':
919 case 'M':
920 case 'I':
921 case 'i':
922 case 'm':
923 case 'g':
924 pr = 4;
925 break;
926 default:
927 tcc_error("unknown constraint '%c'", c);
928 pr = 0;
930 if (pr > priority)
931 priority = pr;
933 return priority;
936 static const char *skip_constraint_modifiers(const char *p)
938 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
939 p++;
940 return p;
943 #define REG_OUT_MASK 0x01
944 #define REG_IN_MASK 0x02
946 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
948 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
949 int nb_operands, int nb_outputs,
950 const uint8_t *clobber_regs,
951 int *pout_reg)
953 ASMOperand *op;
954 int sorted_op[MAX_ASM_OPERANDS];
955 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
956 const char *str;
957 uint8_t regs_allocated[NB_ASM_REGS];
959 /* init fields */
960 for(i=0;i<nb_operands;i++) {
961 op = &operands[i];
962 op->input_index = -1;
963 op->ref_index = -1;
964 op->reg = -1;
965 op->is_memory = 0;
966 op->is_rw = 0;
968 /* compute constraint priority and evaluate references to output
969 constraints if input constraints */
970 for(i=0;i<nb_operands;i++) {
971 op = &operands[i];
972 str = op->constraint;
973 str = skip_constraint_modifiers(str);
974 if (isnum(*str) || *str == '[') {
975 /* this is a reference to another constraint */
976 k = find_constraint(operands, nb_operands, str, NULL);
977 if ((unsigned)k >= i || i < nb_outputs)
978 tcc_error("invalid reference in constraint %d ('%s')",
979 i, str);
980 op->ref_index = k;
981 if (operands[k].input_index >= 0)
982 tcc_error("cannot reference twice the same operand");
983 operands[k].input_index = i;
984 op->priority = 5;
985 } else {
986 op->priority = constraint_priority(str);
990 /* sort operands according to their priority */
991 for(i=0;i<nb_operands;i++)
992 sorted_op[i] = i;
993 for(i=0;i<nb_operands - 1;i++) {
994 for(j=i+1;j<nb_operands;j++) {
995 p1 = operands[sorted_op[i]].priority;
996 p2 = operands[sorted_op[j]].priority;
997 if (p2 < p1) {
998 tmp = sorted_op[i];
999 sorted_op[i] = sorted_op[j];
1000 sorted_op[j] = tmp;
1005 for(i = 0;i < NB_ASM_REGS; i++) {
1006 if (clobber_regs[i])
1007 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
1008 else
1009 regs_allocated[i] = 0;
1011 /* esp cannot be used */
1012 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1013 /* ebp cannot be used yet */
1014 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1016 /* allocate registers and generate corresponding asm moves */
1017 for(i=0;i<nb_operands;i++) {
1018 j = sorted_op[i];
1019 op = &operands[j];
1020 str = op->constraint;
1021 /* no need to allocate references */
1022 if (op->ref_index >= 0)
1023 continue;
1024 /* select if register is used for output, input or both */
1025 if (op->input_index >= 0) {
1026 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1027 } else if (j < nb_outputs) {
1028 reg_mask = REG_OUT_MASK;
1029 } else {
1030 reg_mask = REG_IN_MASK;
1032 try_next:
1033 c = *str++;
1034 switch(c) {
1035 case '=':
1036 goto try_next;
1037 case '+':
1038 op->is_rw = 1;
1039 /* FALL THRU */
1040 case '&':
1041 if (j >= nb_outputs)
1042 tcc_error("'%c' modifier can only be applied to outputs", c);
1043 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1044 goto try_next;
1045 case 'A':
1046 /* allocate both eax and edx */
1047 if (is_reg_allocated(TREG_XAX) ||
1048 is_reg_allocated(TREG_XDX))
1049 goto try_next;
1050 op->is_llong = 1;
1051 op->reg = TREG_XAX;
1052 regs_allocated[TREG_XAX] |= reg_mask;
1053 regs_allocated[TREG_XDX] |= reg_mask;
1054 break;
1055 case 'a':
1056 reg = TREG_XAX;
1057 goto alloc_reg;
1058 case 'b':
1059 reg = 3;
1060 goto alloc_reg;
1061 case 'c':
1062 reg = TREG_XCX;
1063 goto alloc_reg;
1064 case 'd':
1065 reg = TREG_XDX;
1066 goto alloc_reg;
1067 case 'S':
1068 reg = 6;
1069 goto alloc_reg;
1070 case 'D':
1071 reg = 7;
1072 alloc_reg:
1073 if (is_reg_allocated(reg))
1074 goto try_next;
1075 goto reg_found;
1076 case 'q':
1077 /* eax, ebx, ecx or edx */
1078 for(reg = 0; reg < 4; reg++) {
1079 if (!is_reg_allocated(reg))
1080 goto reg_found;
1082 goto try_next;
1083 case 'r':
1084 /* any general register */
1085 for(reg = 0; reg < 8; reg++) {
1086 if (!is_reg_allocated(reg))
1087 goto reg_found;
1089 goto try_next;
1090 reg_found:
1091 /* now we can reload in the register */
1092 op->is_llong = 0;
1093 op->reg = reg;
1094 regs_allocated[reg] |= reg_mask;
1095 break;
1096 case 'i':
1097 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1098 goto try_next;
1099 break;
1100 case 'I':
1101 case 'N':
1102 case 'M':
1103 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1104 goto try_next;
1105 break;
1106 case 'm':
1107 case 'g':
1108 /* nothing special to do because the operand is already in
1109 memory, except if the pointer itself is stored in a
1110 memory variable (VT_LLOCAL case) */
1111 /* XXX: fix constant case */
1112 /* if it is a reference to a memory zone, it must lie
1113 in a register, so we reserve the register in the
1114 input registers and a load will be generated
1115 later */
1116 if (j < nb_outputs || c == 'm') {
1117 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1118 /* any general register */
1119 for(reg = 0; reg < 8; reg++) {
1120 if (!(regs_allocated[reg] & REG_IN_MASK))
1121 goto reg_found1;
1123 goto try_next;
1124 reg_found1:
1125 /* now we can reload in the register */
1126 regs_allocated[reg] |= REG_IN_MASK;
1127 op->reg = reg;
1128 op->is_memory = 1;
1131 break;
1132 default:
1133 tcc_error("asm constraint %d ('%s') could not be satisfied",
1134 j, op->constraint);
1135 break;
1137 /* if a reference is present for that operand, we assign it too */
1138 if (op->input_index >= 0) {
1139 operands[op->input_index].reg = op->reg;
1140 operands[op->input_index].is_llong = op->is_llong;
1144 /* compute out_reg. It is used to store outputs registers to memory
1145 locations references by pointers (VT_LLOCAL case) */
1146 *pout_reg = -1;
1147 for(i=0;i<nb_operands;i++) {
1148 op = &operands[i];
1149 if (op->reg >= 0 &&
1150 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1151 !op->is_memory) {
1152 for(reg = 0; reg < 8; reg++) {
1153 if (!(regs_allocated[reg] & REG_OUT_MASK))
1154 goto reg_found2;
1156 tcc_error("could not find free output register for reloading");
1157 reg_found2:
1158 *pout_reg = reg;
1159 break;
1163 /* print sorted constraints */
1164 #ifdef ASM_DEBUG
1165 for(i=0;i<nb_operands;i++) {
1166 j = sorted_op[i];
1167 op = &operands[j];
1168 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1170 op->id ? get_tok_str(op->id, NULL) : "",
1171 op->constraint,
1172 op->vt->r,
1173 op->reg);
1175 if (*pout_reg >= 0)
1176 printf("out_reg=%d\n", *pout_reg);
1177 #endif
1180 ST_FUNC void subst_asm_operand(CString *add_str,
1181 SValue *sv, int modifier)
1183 int r, reg, size, val;
1184 char buf[64];
1186 r = sv->r;
1187 if ((r & VT_VALMASK) == VT_CONST) {
1188 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n')
1189 cstr_ccat(add_str, '$');
1190 if (r & VT_SYM) {
1191 cstr_cat(add_str, get_tok_str(sv->sym->v, NULL), -1);
1192 if ((uint32_t)sv->c.i != 0) {
1193 cstr_ccat(add_str, '+');
1194 } else {
1195 return;
1198 val = sv->c.i;
1199 if (modifier == 'n')
1200 val = -val;
1201 snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
1202 cstr_cat(add_str, buf, -1);
1203 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1204 #ifdef TCC_TARGET_X86_64
1205 snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i);
1206 #else
1207 snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
1208 #endif
1209 cstr_cat(add_str, buf, -1);
1210 } else if (r & VT_LVAL) {
1211 reg = r & VT_VALMASK;
1212 if (reg >= VT_CONST)
1213 tcc_error("internal compiler error");
1214 snprintf(buf, sizeof(buf), "(%%%s)",
1215 get_tok_str(TOK_ASM_eax + reg, NULL));
1216 cstr_cat(add_str, buf, -1);
1217 } else {
1218 /* register case */
1219 reg = r & VT_VALMASK;
1220 if (reg >= VT_CONST)
1221 tcc_error("internal compiler error");
1223 /* choose register operand size */
1224 if ((sv->type.t & VT_BTYPE) == VT_BYTE)
1225 size = 1;
1226 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1227 size = 2;
1228 #ifdef TCC_TARGET_X86_64
1229 else if ((sv->type.t & VT_BTYPE) == VT_LLONG)
1230 size = 8;
1231 #endif
1232 else
1233 size = 4;
1234 if (size == 1 && reg >= 4)
1235 size = 4;
1237 if (modifier == 'b') {
1238 if (reg >= 4)
1239 tcc_error("cannot use byte register");
1240 size = 1;
1241 } else if (modifier == 'h') {
1242 if (reg >= 4)
1243 tcc_error("cannot use byte register");
1244 size = -1;
1245 } else if (modifier == 'w') {
1246 size = 2;
1247 #ifdef TCC_TARGET_X86_64
1248 } else if (modifier == 'q') {
1249 size = 8;
1250 #endif
1253 switch(size) {
1254 case -1:
1255 reg = TOK_ASM_ah + reg;
1256 break;
1257 case 1:
1258 reg = TOK_ASM_al + reg;
1259 break;
1260 case 2:
1261 reg = TOK_ASM_ax + reg;
1262 break;
1263 default:
1264 reg = TOK_ASM_eax + reg;
1265 break;
1266 #ifdef TCC_TARGET_X86_64
1267 case 8:
1268 reg = TOK_ASM_rax + reg;
1269 break;
1270 #endif
1272 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1273 cstr_cat(add_str, buf, -1);
1277 /* generate prolog and epilog code for asm statement */
1278 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1279 int nb_outputs, int is_output,
1280 uint8_t *clobber_regs,
1281 int out_reg)
1283 uint8_t regs_allocated[NB_ASM_REGS];
1284 ASMOperand *op;
1285 int i, reg;
1286 static uint8_t reg_saved[NB_SAVED_REGS] = { 3, 6, 7 };
1288 /* mark all used registers */
1289 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1290 for(i = 0; i < nb_operands;i++) {
1291 op = &operands[i];
1292 if (op->reg >= 0)
1293 regs_allocated[op->reg] = 1;
1295 if (!is_output) {
1296 /* generate reg save code */
1297 for(i = 0; i < NB_SAVED_REGS; i++) {
1298 reg = reg_saved[i];
1299 if (regs_allocated[reg]) {
1300 g(0x50 + reg);
1304 /* generate load code */
1305 for(i = 0; i < nb_operands; i++) {
1306 op = &operands[i];
1307 if (op->reg >= 0) {
1308 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1309 op->is_memory) {
1310 /* memory reference case (for both input and
1311 output cases) */
1312 SValue sv;
1313 sv = *op->vt;
1314 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1315 load(op->reg, &sv);
1316 } else if (i >= nb_outputs || op->is_rw) {
1317 /* load value in register */
1318 load(op->reg, op->vt);
1319 if (op->is_llong) {
1320 SValue sv;
1321 sv = *op->vt;
1322 sv.c.i += 4;
1323 load(TREG_XDX, &sv);
1328 } else {
1329 /* generate save code */
1330 for(i = 0 ; i < nb_outputs; i++) {
1331 op = &operands[i];
1332 if (op->reg >= 0) {
1333 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1334 if (!op->is_memory) {
1335 SValue sv;
1336 sv = *op->vt;
1337 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1338 load(out_reg, &sv);
1340 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1341 store(op->reg, &sv);
1343 } else {
1344 store(op->reg, op->vt);
1345 if (op->is_llong) {
1346 SValue sv;
1347 sv = *op->vt;
1348 sv.c.i += 4;
1349 store(TREG_XDX, &sv);
1354 /* generate reg restore code */
1355 for(i = NB_SAVED_REGS - 1; i >= 0; i--) {
1356 reg = reg_saved[i];
1357 if (regs_allocated[reg]) {
1358 g(0x58 + reg);
1364 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1366 int reg;
1367 TokenSym *ts;
1369 if (!strcmp(str, "memory") ||
1370 !strcmp(str, "cc"))
1371 return;
1372 ts = tok_alloc(str, strlen(str));
1373 reg = ts->tok;
1374 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1375 reg -= TOK_ASM_eax;
1376 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1377 reg -= TOK_ASM_ax;
1378 #ifdef TCC_TARGET_X86_64
1379 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1380 reg -= TOK_ASM_rax;
1381 #endif
1382 } else {
1383 tcc_error("invalid clobber register '%s'", str);
1385 clobber_regs[reg] = 1;