x86-64-asm: Clean up 64bit immediate support
[tinycc.git] / i386-asm.c
blobb158e81c67eb5e777ff8c8b738a2bd76d0ccc9d9
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "tcc.h"
24 /* #define NB_ASM_REGS 8 */
25 #define MAX_OPERANDS 3
26 #define NB_SAVED_REGS 3
28 #define TOK_ASM_first TOK_ASM_clc
29 #define TOK_ASM_last TOK_ASM_emms
30 #define TOK_ASM_alllast TOK_ASM_pxor
32 #define OPC_JMP 0x01 /* jmp operand */
33 #define OPC_B 0x02 /* only used with OPC_WL */
34 #define OPC_WL 0x04 /* accepts w, l or no suffix */
35 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
36 #define OPC_REG 0x08 /* register is added to opcode */
37 #define OPC_MODRM 0x10 /* modrm encoding */
38 #define OPC_FWAIT 0x20 /* add fwait opcode */
39 #define OPC_TEST 0x40 /* test opcodes */
40 #define OPC_SHIFT 0x80 /* shift opcodes */
41 #define OPC_D16 0x0100 /* generate data16 prefix */
42 #define OPC_ARITH 0x0200 /* arithmetic opcodes */
43 #define OPC_SHORTJMP 0x0400 /* short jmp operand */
44 #define OPC_FARITH 0x0800 /* FPU arithmetic opcodes */
45 #ifdef TCC_TARGET_X86_64
46 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
47 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
48 # define OPC_WLX OPC_WLQ
49 #else
50 # define OPC_WLX OPC_WL
51 #endif
53 #define OPC_GROUP_SHIFT 13
55 /* in order to compress the operand type, we use specific operands and
56 we or only with EA */
57 enum {
58 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
59 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
60 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
61 #ifdef TCC_TARGET_X86_64
62 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
63 #endif
64 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
65 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
66 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
67 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
68 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
69 OPT_SEG,
70 OPT_ST,
71 OPT_IM8,
72 OPT_IM8S,
73 OPT_IM16,
74 OPT_IM32,
75 #ifdef TCC_TARGET_X86_64
76 OPT_IM64,
77 #endif
78 OPT_EAX, /* %al, %ax, %eax or %rax register */
79 OPT_ST0, /* %st(0) register */
80 OPT_CL, /* %cl register */
81 OPT_DX, /* %dx register */
82 OPT_ADDR, /* OP_EA with only offset */
83 OPT_INDIR, /* *(expr) */
84 /* composite types */
85 OPT_COMPOSITE_FIRST,
86 OPT_IM, /* IM8 | IM16 | IM32 */
87 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
88 OPT_REGW, /* REG16 | REG32 | REG64 */
89 OPT_IMW, /* IM16 | IM32 */
90 /* can be ored with any OPT_xxx */
91 OPT_EA = 0x80
94 #define OP_REG8 (1 << OPT_REG8)
95 #define OP_REG16 (1 << OPT_REG16)
96 #define OP_REG32 (1 << OPT_REG32)
97 #define OP_MMX (1 << OPT_MMX)
98 #define OP_SSE (1 << OPT_SSE)
99 #define OP_CR (1 << OPT_CR)
100 #define OP_TR (1 << OPT_TR)
101 #define OP_DB (1 << OPT_DB)
102 #define OP_SEG (1 << OPT_SEG)
103 #define OP_ST (1 << OPT_ST)
104 #define OP_IM8 (1 << OPT_IM8)
105 #define OP_IM8S (1 << OPT_IM8S)
106 #define OP_IM16 (1 << OPT_IM16)
107 #define OP_IM32 (1 << OPT_IM32)
108 #define OP_EAX (1 << OPT_EAX)
109 #define OP_ST0 (1 << OPT_ST0)
110 #define OP_CL (1 << OPT_CL)
111 #define OP_DX (1 << OPT_DX)
112 #define OP_ADDR (1 << OPT_ADDR)
113 #define OP_INDIR (1 << OPT_INDIR)
114 #ifdef TCC_TARGET_X86_64
115 # define OP_REG64 (1 << OPT_REG64)
116 # define OP_IM64 (1 << OPT_IM64)
117 # define OP_EA32 (OP_EA << 1)
118 #else
119 # define OP_REG64 0
120 # define OP_IM64 0
121 # define OP_EA32 0
122 #endif
124 #define OP_EA 0x40000000
125 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
127 #ifdef TCC_TARGET_X86_64
128 # define TREG_XAX TREG_RAX
129 # define TREG_XCX TREG_RCX
130 # define TREG_XDX TREG_RDX
131 #else
132 # define TREG_XAX TREG_EAX
133 # define TREG_XCX TREG_ECX
134 # define TREG_XDX TREG_EDX
135 #endif
137 typedef struct ASMInstr {
138 uint16_t sym;
139 uint16_t opcode;
140 uint16_t instr_type;
141 uint8_t nb_ops;
142 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
143 } ASMInstr;
145 typedef struct Operand {
146 uint32_t type;
147 int8_t reg; /* register, -1 if none */
148 int8_t reg2; /* second register, -1 if none */
149 uint8_t shift;
150 ExprValue e;
151 } Operand;
153 static const uint8_t reg_to_size[9] = {
155 [OP_REG8] = 0,
156 [OP_REG16] = 1,
157 [OP_REG32] = 2,
158 #ifdef TCC_TARGET_X86_64
159 [OP_REG64] = 3,
160 #endif
162 0, 0, 1, 0, 2, 0, 0, 0, 3
165 #define NB_TEST_OPCODES 30
167 static const uint8_t test_bits[NB_TEST_OPCODES] = {
168 0x00, /* o */
169 0x01, /* no */
170 0x02, /* b */
171 0x02, /* c */
172 0x02, /* nae */
173 0x03, /* nb */
174 0x03, /* nc */
175 0x03, /* ae */
176 0x04, /* e */
177 0x04, /* z */
178 0x05, /* ne */
179 0x05, /* nz */
180 0x06, /* be */
181 0x06, /* na */
182 0x07, /* nbe */
183 0x07, /* a */
184 0x08, /* s */
185 0x09, /* ns */
186 0x0a, /* p */
187 0x0a, /* pe */
188 0x0b, /* np */
189 0x0b, /* po */
190 0x0c, /* l */
191 0x0c, /* nge */
192 0x0d, /* nl */
193 0x0d, /* ge */
194 0x0e, /* le */
195 0x0e, /* ng */
196 0x0f, /* nle */
197 0x0f, /* g */
200 static const uint8_t segment_prefixes[] = {
201 0x26, /* es */
202 0x2e, /* cs */
203 0x36, /* ss */
204 0x3e, /* ds */
205 0x64, /* fs */
206 0x65 /* gs */
209 static const ASMInstr asm_instrs[] = {
210 #define ALT(x) x
211 #define DEF_ASM_OP0(name, opcode)
212 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 0 },
213 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 1, { op0 }},
214 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 2, { op0, op1 }},
215 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 3, { op0, op1, op2 }},
216 #ifdef TCC_TARGET_X86_64
217 # include "x86_64-asm.h"
218 #else
219 # include "i386-asm.h"
220 #endif
221 /* last operation */
222 { 0, },
225 static const uint16_t op0_codes[] = {
226 #define ALT(x)
227 #define DEF_ASM_OP0(x, opcode) opcode,
228 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
229 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
230 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
231 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
232 #ifdef TCC_TARGET_X86_64
233 # include "x86_64-asm.h"
234 #else
235 # include "i386-asm.h"
236 #endif
239 static inline int get_reg_shift(TCCState *s1)
241 int shift, v;
242 v = asm_int_expr(s1);
243 switch(v) {
244 case 1:
245 shift = 0;
246 break;
247 case 2:
248 shift = 1;
249 break;
250 case 4:
251 shift = 2;
252 break;
253 case 8:
254 shift = 3;
255 break;
256 default:
257 expect("1, 2, 4 or 8 constant");
258 shift = 0;
259 break;
261 return shift;
264 static int asm_parse_reg(int *type)
266 int reg = 0;
267 *type = 0;
268 if (tok != '%')
269 goto error_32;
270 next();
271 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
272 reg = tok - TOK_ASM_eax;
273 #ifdef TCC_TARGET_X86_64
274 *type = OP_EA32;
275 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
276 reg = tok - TOK_ASM_rax;
277 #endif
278 } else {
279 error_32:
280 expect("register");
282 next();
283 return reg;
286 static void parse_operand(TCCState *s1, Operand *op)
288 ExprValue e;
289 int reg, indir;
290 const char *p;
292 indir = 0;
293 if (tok == '*') {
294 next();
295 indir = OP_INDIR;
298 if (tok == '%') {
299 next();
300 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
301 reg = tok - TOK_ASM_al;
302 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
303 op->reg = reg & 7;
304 if ((op->type & OP_REG) && op->reg == TREG_XAX)
305 op->type |= OP_EAX;
306 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
307 op->type |= OP_CL;
308 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
309 op->type |= OP_DX;
310 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
311 op->type = OP_DB;
312 op->reg = tok - TOK_ASM_dr0;
313 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
314 op->type = OP_SEG;
315 op->reg = tok - TOK_ASM_es;
316 } else if (tok == TOK_ASM_st) {
317 op->type = OP_ST;
318 op->reg = 0;
319 next();
320 if (tok == '(') {
321 next();
322 if (tok != TOK_PPNUM)
323 goto reg_error;
324 p = tokc.str.data;
325 reg = p[0] - '0';
326 if ((unsigned)reg >= 8 || p[1] != '\0')
327 goto reg_error;
328 op->reg = reg;
329 next();
330 skip(')');
332 if (op->reg == 0)
333 op->type |= OP_ST0;
334 goto no_skip;
335 } else {
336 reg_error:
337 tcc_error("unknown register");
339 next();
340 no_skip: ;
341 } else if (tok == '$') {
342 /* constant value */
343 next();
344 asm_expr(s1, &e);
345 op->type = OP_IM32;
346 op->e.v = e.v;
347 op->e.sym = e.sym;
348 if (!op->e.sym) {
349 if (op->e.v == (uint8_t)op->e.v)
350 op->type |= OP_IM8;
351 if (op->e.v == (int8_t)op->e.v)
352 op->type |= OP_IM8S;
353 if (op->e.v == (uint16_t)op->e.v)
354 op->type |= OP_IM16;
355 #ifdef TCC_TARGET_X86_64
356 if (op->e.v != (uint32_t)op->e.v)
357 op->type = OP_IM64;
358 #endif
360 } else {
361 /* address(reg,reg2,shift) with all variants */
362 op->type = OP_EA;
363 op->reg = -1;
364 op->reg2 = -1;
365 op->shift = 0;
366 if (tok != '(') {
367 asm_expr(s1, &e);
368 op->e.v = e.v;
369 op->e.sym = e.sym;
370 } else {
371 next();
372 if (tok == '%') {
373 unget_tok('(');
374 op->e.v = 0;
375 op->e.sym = NULL;
376 } else {
377 /* bracketed offset expression */
378 asm_expr(s1, &e);
379 if (tok != ')')
380 expect(")");
381 next();
382 op->e.v = e.v;
383 op->e.sym = e.sym;
386 if (tok == '(') {
387 int type = 0;
388 next();
389 if (tok != ',') {
390 op->reg = asm_parse_reg(&type);
392 if (tok == ',') {
393 next();
394 if (tok != ',') {
395 op->reg2 = asm_parse_reg(&type);
397 if (tok == ',') {
398 next();
399 op->shift = get_reg_shift(s1);
402 if (type & OP_EA32)
403 op->type |= OP_EA32;
404 skip(')');
406 if (op->reg == -1 && op->reg2 == -1)
407 op->type |= OP_ADDR;
409 op->type |= indir;
412 /* XXX: unify with C code output ? */
413 ST_FUNC void gen_expr32(ExprValue *pe)
415 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
418 #ifdef TCC_TARGET_X86_64
419 static void gen_expr64(ExprValue *pe)
421 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
423 #endif
425 /* XXX: unify with C code output ? */
426 static void gen_disp32(ExprValue *pe)
428 Sym *sym = pe->sym;
429 if (sym && sym->r == cur_text_section->sh_num) {
430 /* same section: we can output an absolute value. Note
431 that the TCC compiler behaves differently here because
432 it always outputs a relocation to ease (future) code
433 elimination in the linker */
434 gen_le32(pe->v + sym->jnext - ind - 4);
435 } else {
436 if (sym && sym->type.t == VT_VOID) {
437 sym->type.t = VT_FUNC;
438 sym->type.ref = NULL;
440 gen_addrpc32(VT_SYM, sym, pe->v);
444 /* generate the modrm operand */
445 static inline void asm_modrm(int reg, Operand *op)
447 int mod, reg1, reg2, sib_reg1;
449 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
450 g(0xc0 + (reg << 3) + op->reg);
451 } else if (op->reg == -1 && op->reg2 == -1) {
452 /* displacement only */
453 #ifdef TCC_TARGET_X86_64
454 g(0x04 + (reg << 3));
455 g(0x25);
456 #else
457 g(0x05 + (reg << 3));
458 #endif
459 gen_expr32(&op->e);
460 } else {
461 sib_reg1 = op->reg;
462 /* fist compute displacement encoding */
463 if (sib_reg1 == -1) {
464 sib_reg1 = 5;
465 mod = 0x00;
466 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
467 mod = 0x00;
468 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
469 mod = 0x40;
470 } else {
471 mod = 0x80;
473 /* compute if sib byte needed */
474 reg1 = op->reg;
475 if (op->reg2 != -1)
476 reg1 = 4;
477 g(mod + (reg << 3) + reg1);
478 if (reg1 == 4) {
479 /* add sib byte */
480 reg2 = op->reg2;
481 if (reg2 == -1)
482 reg2 = 4; /* indicate no index */
483 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
485 /* add offset */
486 if (mod == 0x40) {
487 g(op->e.v);
488 } else if (mod == 0x80 || op->reg == -1) {
489 gen_expr32(&op->e);
494 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
496 const ASMInstr *pa;
497 int i, modrm_index, reg, v, op1, seg_prefix;
498 int nb_ops, s;
499 Operand ops[MAX_OPERANDS], *pop;
500 int op_type[3]; /* decoded op type */
501 int alltypes; /* OR of all operand types */
502 int autosize;
504 /* force synthetic ';' after prefix instruction, so we can handle */
505 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
506 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
507 unget_tok(';');
509 /* get operands */
510 pop = ops;
511 nb_ops = 0;
512 seg_prefix = 0;
513 alltypes = 0;
514 for(;;) {
515 if (tok == ';' || tok == TOK_LINEFEED)
516 break;
517 if (nb_ops >= MAX_OPERANDS) {
518 tcc_error("incorrect number of operands");
520 parse_operand(s1, pop);
521 if (tok == ':') {
522 if (pop->type != OP_SEG || seg_prefix)
523 tcc_error("incorrect prefix");
524 seg_prefix = segment_prefixes[pop->reg];
525 next();
526 parse_operand(s1, pop);
527 if (!(pop->type & OP_EA)) {
528 tcc_error("segment prefix must be followed by memory reference");
531 pop++;
532 nb_ops++;
533 if (tok != ',')
534 break;
535 next();
538 s = 0; /* avoid warning */
540 /* optimize matching by using a lookup table (no hashing is needed
541 !) */
542 for(pa = asm_instrs; pa->sym != 0; pa++) {
543 s = 0;
544 if (pa->instr_type & OPC_FARITH) {
545 v = opcode - pa->sym;
546 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
547 continue;
548 } else if (pa->instr_type & OPC_ARITH) {
549 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
550 continue;
551 s = (opcode - pa->sym) % NBWLX;
552 } else if (pa->instr_type & OPC_SHIFT) {
553 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
554 continue;
555 s = (opcode - pa->sym) % NBWLX;
556 } else if (pa->instr_type & OPC_TEST) {
557 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
558 continue;
559 /* cmovxx is a test opcode but accepts multiple sizes.
560 TCC doesn't accept the suffixed mnemonic, instead we
561 simply force size autodetection always. */
562 if (pa->instr_type & OPC_WLX)
563 s = NBWLX - 1;
564 } else if (pa->instr_type & OPC_B) {
565 #ifdef TCC_TARGET_X86_64
566 /* Some instructions don't have the full size but only
567 bwl form. insb e.g. */
568 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
569 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
570 continue;
571 #endif
572 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
573 continue;
574 s = opcode - pa->sym;
575 } else if (pa->instr_type & OPC_WLX) {
576 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
577 continue;
578 s = opcode - pa->sym + 1;
579 } else {
580 if (pa->sym != opcode)
581 continue;
583 if (pa->nb_ops != nb_ops)
584 continue;
585 #ifdef TCC_TARGET_X86_64
586 /* Special case for moves. Selecting the IM64->REG64 form
587 should only be done if we really have an >32bit imm64, and that
588 is hardcoded. Ignore it here. */
589 if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
590 && ops[1].type == OP_REG64)
591 continue;
592 #endif
593 /* now decode and check each operand */
594 alltypes = 0;
595 for(i = 0; i < nb_ops; i++) {
596 int op1, op2;
597 op1 = pa->op_type[i];
598 op2 = op1 & 0x1f;
599 switch(op2) {
600 case OPT_IM:
601 v = OP_IM8 | OP_IM16 | OP_IM32;
602 break;
603 case OPT_REG:
604 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
605 break;
606 case OPT_REGW:
607 v = OP_REG16 | OP_REG32 | OP_REG64;
608 break;
609 case OPT_IMW:
610 v = OP_IM16 | OP_IM32;
611 break;
612 default:
613 v = 1 << op2;
614 break;
616 if (op1 & OPT_EA)
617 v |= OP_EA;
618 op_type[i] = v;
619 if ((ops[i].type & v) == 0)
620 goto next;
621 alltypes |= ops[i].type;
623 /* all is matching ! */
624 break;
625 next: ;
627 if (pa->sym == 0) {
628 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
629 int b;
630 b = op0_codes[opcode - TOK_ASM_first];
631 if (b & 0xff00)
632 g(b >> 8);
633 g(b);
634 return;
635 } else if (opcode <= TOK_ASM_alllast) {
636 tcc_error("bad operand with opcode '%s'",
637 get_tok_str(opcode, NULL));
638 } else {
639 tcc_error("unknown opcode '%s'",
640 get_tok_str(opcode, NULL));
643 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
644 autosize = NBWLX-1;
645 #ifdef TCC_TARGET_X86_64
646 /* XXX the autosize should rather be zero, to not have to adjust this
647 all the time. */
648 if ((pa->instr_type & OPC_BWLQ) == OPC_B)
649 autosize = NBWLX-2;
650 #endif
651 if (s == autosize) {
652 for(i = 0; s == autosize && i < nb_ops; i++) {
653 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
654 s = reg_to_size[ops[i].type & OP_REG];
656 if (s == autosize) {
657 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
658 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
659 s = 2;
660 else
661 tcc_error("cannot infer opcode suffix");
665 #ifdef TCC_TARGET_X86_64
666 /* Generate addr32 prefix if needed */
667 for(i = 0; i < nb_ops; i++) {
668 if (ops[i].type & OP_EA32) {
669 g(0x67);
670 break;
673 #endif
674 /* generate data16 prefix if needed */
675 if (s == 1 || (pa->instr_type & OPC_D16))
676 g(0x66);
677 #ifdef TCC_TARGET_X86_64
678 if (s == 3 || (alltypes & OP_REG64)) {
679 /* generate REX prefix */
680 int default64 = 0;
681 for(i = 0; i < nb_ops; i++) {
682 if (op_type[i] == OP_REG64) {
683 /* If only 64bit regs are accepted in one operand
684 this is a default64 instruction without need for
685 REX prefixes. */
686 default64 = 1;
687 break;
690 /* XXX find better encoding for the default64 instructions. */
691 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
692 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
693 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
694 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
695 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
696 && !default64)
697 g(0x48);
699 #endif
701 /* now generates the operation */
702 if (pa->instr_type & OPC_FWAIT)
703 g(0x9b);
704 if (seg_prefix)
705 g(seg_prefix);
707 v = pa->opcode;
708 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
709 /* kludge for imul $im, %reg */
710 nb_ops = 3;
711 ops[2] = ops[1];
712 op_type[2] = op_type[1];
713 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
714 v--; /* int $3 case */
715 nb_ops = 0;
716 } else if ((v == 0x06 || v == 0x07)) {
717 if (ops[0].reg >= 4) {
718 /* push/pop %fs or %gs */
719 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
720 } else {
721 v += ops[0].reg << 3;
723 nb_ops = 0;
724 } else if (v <= 0x05) {
725 /* arith case */
726 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
727 } else if ((pa->instr_type & (OPC_FARITH | OPC_MODRM)) == OPC_FARITH) {
728 /* fpu arith case */
729 v += ((opcode - pa->sym) / 6) << 3;
731 if (pa->instr_type & OPC_REG) {
732 for(i = 0; i < nb_ops; i++) {
733 if (op_type[i] & (OP_REG | OP_ST)) {
734 v += ops[i].reg;
735 break;
738 /* mov $im, %reg case */
739 if (pa->opcode == 0xb0 && s >= 1)
740 v += 7;
742 if (pa->instr_type & OPC_B)
743 v += s >= 1;
744 if (pa->instr_type & OPC_TEST)
745 v += test_bits[opcode - pa->sym];
746 if (pa->instr_type & OPC_SHORTJMP) {
747 Sym *sym;
748 int jmp_disp;
750 /* see if we can really generate the jump with a byte offset */
751 sym = ops[0].e.sym;
752 if (!sym)
753 goto no_short_jump;
754 if (sym->r != cur_text_section->sh_num)
755 goto no_short_jump;
756 jmp_disp = ops[0].e.v + sym->jnext - ind - 2 - (v >= 0xff);
757 if (jmp_disp == (int8_t)jmp_disp) {
758 /* OK to generate jump */
759 ops[0].e.sym = 0;
760 ops[0].e.v = jmp_disp;
761 op_type[0] = OP_IM8S;
762 } else {
763 no_short_jump:
764 if (pa->instr_type & OPC_JMP) {
765 /* long jump will be allowed. need to modify the
766 opcode slightly */
767 if (v == 0xeb)
768 v = 0xe9;
769 else
770 v += 0x0f10;
771 } else {
772 tcc_error("invalid displacement");
776 op1 = v >> 8;
777 if (op1)
778 g(op1);
779 g(v);
781 /* search which operand will used for modrm */
782 modrm_index = 0;
783 if (pa->instr_type & OPC_SHIFT) {
784 reg = (opcode - pa->sym) / NBWLX;
785 if (reg == 6)
786 reg = 7;
787 } else if (pa->instr_type & OPC_ARITH) {
788 reg = (opcode - pa->sym) / NBWLX;
789 } else if (pa->instr_type & OPC_FARITH) {
790 reg = (opcode - pa->sym) / 6;
791 } else {
792 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
794 if (pa->instr_type & OPC_MODRM) {
795 /* first look for an ea operand */
796 for(i = 0;i < nb_ops; i++) {
797 if (op_type[i] & OP_EA)
798 goto modrm_found;
800 /* then if not found, a register or indirection (shift instructions) */
801 for(i = 0;i < nb_ops; i++) {
802 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
803 goto modrm_found;
805 #ifdef ASM_DEBUG
806 tcc_error("bad op table");
807 #endif
808 modrm_found:
809 modrm_index = i;
810 /* if a register is used in another operand then it is
811 used instead of group */
812 for(i = 0;i < nb_ops; i++) {
813 v = op_type[i];
814 if (i != modrm_index &&
815 (v & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
816 reg = ops[i].reg;
817 break;
821 asm_modrm(reg, &ops[modrm_index]);
824 /* emit constants */
825 #ifndef TCC_TARGET_X86_64
826 if (pa->opcode == 0x9a || pa->opcode == 0xea) {
827 /* ljmp or lcall kludge */
828 gen_expr32(&ops[1].e);
829 if (ops[0].e.sym)
830 tcc_error("cannot relocate");
831 gen_le16(ops[0].e.v);
832 return;
834 #endif
835 for(i = 0;i < nb_ops; i++) {
836 v = op_type[i];
837 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
838 /* if multiple sizes are given it means we must look
839 at the op size */
840 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
841 if (s == 0)
842 v = OP_IM8;
843 else if (s == 1)
844 v = OP_IM16;
845 else if (s == 2 || (v & OP_IM64) == 0)
846 v = OP_IM32;
847 else
848 v = OP_IM64;
850 if (v & (OP_IM8 | OP_IM8S)) {
851 if (ops[i].e.sym)
852 goto error_relocate;
853 g(ops[i].e.v);
854 } else if (v & OP_IM16) {
855 if (ops[i].e.sym)
856 error_relocate:
857 tcc_error("cannot relocate");
858 else
859 gen_le16(ops[i].e.v);
860 } else {
861 if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) {
862 gen_disp32(&ops[i].e);
863 } else {
864 #ifdef TCC_TARGET_X86_64
865 if (v & OP_IM64)
866 gen_expr64(&ops[i].e);
867 else
868 #endif
869 gen_expr32(&ops[i].e);
876 /* return the constraint priority (we allocate first the lowest
877 numbered constraints) */
878 static inline int constraint_priority(const char *str)
880 int priority, c, pr;
882 /* we take the lowest priority */
883 priority = 0;
884 for(;;) {
885 c = *str;
886 if (c == '\0')
887 break;
888 str++;
889 switch(c) {
890 case 'A':
891 pr = 0;
892 break;
893 case 'a':
894 case 'b':
895 case 'c':
896 case 'd':
897 case 'S':
898 case 'D':
899 pr = 1;
900 break;
901 case 'q':
902 pr = 2;
903 break;
904 case 'r':
905 pr = 3;
906 break;
907 case 'N':
908 case 'M':
909 case 'I':
910 case 'i':
911 case 'm':
912 case 'g':
913 pr = 4;
914 break;
915 default:
916 tcc_error("unknown constraint '%c'", c);
917 pr = 0;
919 if (pr > priority)
920 priority = pr;
922 return priority;
925 static const char *skip_constraint_modifiers(const char *p)
927 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
928 p++;
929 return p;
932 #define REG_OUT_MASK 0x01
933 #define REG_IN_MASK 0x02
935 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
937 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
938 int nb_operands, int nb_outputs,
939 const uint8_t *clobber_regs,
940 int *pout_reg)
942 ASMOperand *op;
943 int sorted_op[MAX_ASM_OPERANDS];
944 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
945 const char *str;
946 uint8_t regs_allocated[NB_ASM_REGS];
948 /* init fields */
949 for(i=0;i<nb_operands;i++) {
950 op = &operands[i];
951 op->input_index = -1;
952 op->ref_index = -1;
953 op->reg = -1;
954 op->is_memory = 0;
955 op->is_rw = 0;
957 /* compute constraint priority and evaluate references to output
958 constraints if input constraints */
959 for(i=0;i<nb_operands;i++) {
960 op = &operands[i];
961 str = op->constraint;
962 str = skip_constraint_modifiers(str);
963 if (isnum(*str) || *str == '[') {
964 /* this is a reference to another constraint */
965 k = find_constraint(operands, nb_operands, str, NULL);
966 if ((unsigned)k >= i || i < nb_outputs)
967 tcc_error("invalid reference in constraint %d ('%s')",
968 i, str);
969 op->ref_index = k;
970 if (operands[k].input_index >= 0)
971 tcc_error("cannot reference twice the same operand");
972 operands[k].input_index = i;
973 op->priority = 5;
974 } else {
975 op->priority = constraint_priority(str);
979 /* sort operands according to their priority */
980 for(i=0;i<nb_operands;i++)
981 sorted_op[i] = i;
982 for(i=0;i<nb_operands - 1;i++) {
983 for(j=i+1;j<nb_operands;j++) {
984 p1 = operands[sorted_op[i]].priority;
985 p2 = operands[sorted_op[j]].priority;
986 if (p2 < p1) {
987 tmp = sorted_op[i];
988 sorted_op[i] = sorted_op[j];
989 sorted_op[j] = tmp;
994 for(i = 0;i < NB_ASM_REGS; i++) {
995 if (clobber_regs[i])
996 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
997 else
998 regs_allocated[i] = 0;
1000 /* esp cannot be used */
1001 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1002 /* ebp cannot be used yet */
1003 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1005 /* allocate registers and generate corresponding asm moves */
1006 for(i=0;i<nb_operands;i++) {
1007 j = sorted_op[i];
1008 op = &operands[j];
1009 str = op->constraint;
1010 /* no need to allocate references */
1011 if (op->ref_index >= 0)
1012 continue;
1013 /* select if register is used for output, input or both */
1014 if (op->input_index >= 0) {
1015 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1016 } else if (j < nb_outputs) {
1017 reg_mask = REG_OUT_MASK;
1018 } else {
1019 reg_mask = REG_IN_MASK;
1021 try_next:
1022 c = *str++;
1023 switch(c) {
1024 case '=':
1025 goto try_next;
1026 case '+':
1027 op->is_rw = 1;
1028 /* FALL THRU */
1029 case '&':
1030 if (j >= nb_outputs)
1031 tcc_error("'%c' modifier can only be applied to outputs", c);
1032 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1033 goto try_next;
1034 case 'A':
1035 /* allocate both eax and edx */
1036 if (is_reg_allocated(TREG_XAX) ||
1037 is_reg_allocated(TREG_XDX))
1038 goto try_next;
1039 op->is_llong = 1;
1040 op->reg = TREG_XAX;
1041 regs_allocated[TREG_XAX] |= reg_mask;
1042 regs_allocated[TREG_XDX] |= reg_mask;
1043 break;
1044 case 'a':
1045 reg = TREG_XAX;
1046 goto alloc_reg;
1047 case 'b':
1048 reg = 3;
1049 goto alloc_reg;
1050 case 'c':
1051 reg = TREG_XCX;
1052 goto alloc_reg;
1053 case 'd':
1054 reg = TREG_XDX;
1055 goto alloc_reg;
1056 case 'S':
1057 reg = 6;
1058 goto alloc_reg;
1059 case 'D':
1060 reg = 7;
1061 alloc_reg:
1062 if (is_reg_allocated(reg))
1063 goto try_next;
1064 goto reg_found;
1065 case 'q':
1066 /* eax, ebx, ecx or edx */
1067 for(reg = 0; reg < 4; reg++) {
1068 if (!is_reg_allocated(reg))
1069 goto reg_found;
1071 goto try_next;
1072 case 'r':
1073 /* any general register */
1074 for(reg = 0; reg < 8; reg++) {
1075 if (!is_reg_allocated(reg))
1076 goto reg_found;
1078 goto try_next;
1079 reg_found:
1080 /* now we can reload in the register */
1081 op->is_llong = 0;
1082 op->reg = reg;
1083 regs_allocated[reg] |= reg_mask;
1084 break;
1085 case 'i':
1086 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1087 goto try_next;
1088 break;
1089 case 'I':
1090 case 'N':
1091 case 'M':
1092 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1093 goto try_next;
1094 break;
1095 case 'm':
1096 case 'g':
1097 /* nothing special to do because the operand is already in
1098 memory, except if the pointer itself is stored in a
1099 memory variable (VT_LLOCAL case) */
1100 /* XXX: fix constant case */
1101 /* if it is a reference to a memory zone, it must lie
1102 in a register, so we reserve the register in the
1103 input registers and a load will be generated
1104 later */
1105 if (j < nb_outputs || c == 'm') {
1106 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1107 /* any general register */
1108 for(reg = 0; reg < 8; reg++) {
1109 if (!(regs_allocated[reg] & REG_IN_MASK))
1110 goto reg_found1;
1112 goto try_next;
1113 reg_found1:
1114 /* now we can reload in the register */
1115 regs_allocated[reg] |= REG_IN_MASK;
1116 op->reg = reg;
1117 op->is_memory = 1;
1120 break;
1121 default:
1122 tcc_error("asm constraint %d ('%s') could not be satisfied",
1123 j, op->constraint);
1124 break;
1126 /* if a reference is present for that operand, we assign it too */
1127 if (op->input_index >= 0) {
1128 operands[op->input_index].reg = op->reg;
1129 operands[op->input_index].is_llong = op->is_llong;
1133 /* compute out_reg. It is used to store outputs registers to memory
1134 locations references by pointers (VT_LLOCAL case) */
1135 *pout_reg = -1;
1136 for(i=0;i<nb_operands;i++) {
1137 op = &operands[i];
1138 if (op->reg >= 0 &&
1139 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1140 !op->is_memory) {
1141 for(reg = 0; reg < 8; reg++) {
1142 if (!(regs_allocated[reg] & REG_OUT_MASK))
1143 goto reg_found2;
1145 tcc_error("could not find free output register for reloading");
1146 reg_found2:
1147 *pout_reg = reg;
1148 break;
1152 /* print sorted constraints */
1153 #ifdef ASM_DEBUG
1154 for(i=0;i<nb_operands;i++) {
1155 j = sorted_op[i];
1156 op = &operands[j];
1157 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1159 op->id ? get_tok_str(op->id, NULL) : "",
1160 op->constraint,
1161 op->vt->r,
1162 op->reg);
1164 if (*pout_reg >= 0)
1165 printf("out_reg=%d\n", *pout_reg);
1166 #endif
1169 ST_FUNC void subst_asm_operand(CString *add_str,
1170 SValue *sv, int modifier)
1172 int r, reg, size, val;
1173 char buf[64];
1175 r = sv->r;
1176 if ((r & VT_VALMASK) == VT_CONST) {
1177 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n')
1178 cstr_ccat(add_str, '$');
1179 if (r & VT_SYM) {
1180 cstr_cat(add_str, get_tok_str(sv->sym->v, NULL), -1);
1181 if ((uint32_t)sv->c.i != 0) {
1182 cstr_ccat(add_str, '+');
1183 } else {
1184 return;
1187 val = sv->c.i;
1188 if (modifier == 'n')
1189 val = -val;
1190 snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
1191 cstr_cat(add_str, buf, -1);
1192 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1193 #ifdef TCC_TARGET_X86_64
1194 snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i);
1195 #else
1196 snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
1197 #endif
1198 cstr_cat(add_str, buf, -1);
1199 } else if (r & VT_LVAL) {
1200 reg = r & VT_VALMASK;
1201 if (reg >= VT_CONST)
1202 tcc_error("internal compiler error");
1203 snprintf(buf, sizeof(buf), "(%%%s)",
1204 get_tok_str(TOK_ASM_eax + reg, NULL));
1205 cstr_cat(add_str, buf, -1);
1206 } else {
1207 /* register case */
1208 reg = r & VT_VALMASK;
1209 if (reg >= VT_CONST)
1210 tcc_error("internal compiler error");
1212 /* choose register operand size */
1213 if ((sv->type.t & VT_BTYPE) == VT_BYTE)
1214 size = 1;
1215 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1216 size = 2;
1217 #ifdef TCC_TARGET_X86_64
1218 else if ((sv->type.t & VT_BTYPE) == VT_LLONG)
1219 size = 8;
1220 #endif
1221 else
1222 size = 4;
1223 if (size == 1 && reg >= 4)
1224 size = 4;
1226 if (modifier == 'b') {
1227 if (reg >= 4)
1228 tcc_error("cannot use byte register");
1229 size = 1;
1230 } else if (modifier == 'h') {
1231 if (reg >= 4)
1232 tcc_error("cannot use byte register");
1233 size = -1;
1234 } else if (modifier == 'w') {
1235 size = 2;
1236 #ifdef TCC_TARGET_X86_64
1237 } else if (modifier == 'q') {
1238 size = 8;
1239 #endif
1242 switch(size) {
1243 case -1:
1244 reg = TOK_ASM_ah + reg;
1245 break;
1246 case 1:
1247 reg = TOK_ASM_al + reg;
1248 break;
1249 case 2:
1250 reg = TOK_ASM_ax + reg;
1251 break;
1252 default:
1253 reg = TOK_ASM_eax + reg;
1254 break;
1255 #ifdef TCC_TARGET_X86_64
1256 case 8:
1257 reg = TOK_ASM_rax + reg;
1258 break;
1259 #endif
1261 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1262 cstr_cat(add_str, buf, -1);
1266 /* generate prolog and epilog code for asm statement */
1267 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1268 int nb_outputs, int is_output,
1269 uint8_t *clobber_regs,
1270 int out_reg)
1272 uint8_t regs_allocated[NB_ASM_REGS];
1273 ASMOperand *op;
1274 int i, reg;
1275 static uint8_t reg_saved[NB_SAVED_REGS] = { 3, 6, 7 };
1277 /* mark all used registers */
1278 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1279 for(i = 0; i < nb_operands;i++) {
1280 op = &operands[i];
1281 if (op->reg >= 0)
1282 regs_allocated[op->reg] = 1;
1284 if (!is_output) {
1285 /* generate reg save code */
1286 for(i = 0; i < NB_SAVED_REGS; i++) {
1287 reg = reg_saved[i];
1288 if (regs_allocated[reg]) {
1289 g(0x50 + reg);
1293 /* generate load code */
1294 for(i = 0; i < nb_operands; i++) {
1295 op = &operands[i];
1296 if (op->reg >= 0) {
1297 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1298 op->is_memory) {
1299 /* memory reference case (for both input and
1300 output cases) */
1301 SValue sv;
1302 sv = *op->vt;
1303 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1304 load(op->reg, &sv);
1305 } else if (i >= nb_outputs || op->is_rw) {
1306 /* load value in register */
1307 load(op->reg, op->vt);
1308 if (op->is_llong) {
1309 SValue sv;
1310 sv = *op->vt;
1311 sv.c.i += 4;
1312 load(TREG_XDX, &sv);
1317 } else {
1318 /* generate save code */
1319 for(i = 0 ; i < nb_outputs; i++) {
1320 op = &operands[i];
1321 if (op->reg >= 0) {
1322 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1323 if (!op->is_memory) {
1324 SValue sv;
1325 sv = *op->vt;
1326 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1327 load(out_reg, &sv);
1329 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1330 store(op->reg, &sv);
1332 } else {
1333 store(op->reg, op->vt);
1334 if (op->is_llong) {
1335 SValue sv;
1336 sv = *op->vt;
1337 sv.c.i += 4;
1338 store(TREG_XDX, &sv);
1343 /* generate reg restore code */
1344 for(i = NB_SAVED_REGS - 1; i >= 0; i--) {
1345 reg = reg_saved[i];
1346 if (regs_allocated[reg]) {
1347 g(0x58 + reg);
1353 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1355 int reg;
1356 TokenSym *ts;
1358 if (!strcmp(str, "memory") ||
1359 !strcmp(str, "cc"))
1360 return;
1361 ts = tok_alloc(str, strlen(str));
1362 reg = ts->tok;
1363 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1364 reg -= TOK_ASM_eax;
1365 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1366 reg -= TOK_ASM_ax;
1367 #ifdef TCC_TARGET_X86_64
1368 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1369 reg -= TOK_ASM_rax;
1370 #endif
1371 } else {
1372 tcc_error("invalid clobber register '%s'", str);
1374 clobber_regs[reg] = 1;