tcc -dt -run ... : simpler is better
[tinycc.git] / i386-asm.c
blob2e184974e382bb554c8f882c73d1d6dd44365618
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "tcc.h"
24 #define MAX_OPERANDS 3
26 #define TOK_ASM_first TOK_ASM_clc
27 #define TOK_ASM_last TOK_ASM_emms
28 #define TOK_ASM_alllast TOK_ASM_subps
30 #define OPC_B 0x01 /* only used with OPC_WL */
31 #define OPC_WL 0x02 /* accepts w, l or no suffix */
32 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
33 #define OPC_REG 0x04 /* register is added to opcode */
34 #define OPC_MODRM 0x08 /* modrm encoding */
36 #define OPCT_MASK 0x70
37 #define OPC_FWAIT 0x10 /* add fwait opcode */
38 #define OPC_SHIFT 0x20 /* shift opcodes */
39 #define OPC_ARITH 0x30 /* arithmetic opcodes */
40 #define OPC_FARITH 0x40 /* FPU arithmetic opcodes */
41 #define OPC_TEST 0x50 /* test opcodes */
42 #define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i))
44 #define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */
45 #define OPC_48 0x200 /* Always has REX prefix */
46 #ifdef TCC_TARGET_X86_64
47 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
48 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
49 # define OPC_WLX OPC_WLQ
50 # define OPC_BWLX OPC_BWLQ
51 #else
52 # define OPC_WLX OPC_WL
53 # define OPC_BWLX OPC_BWL
54 #endif
56 #define OPC_GROUP_SHIFT 13
58 /* in order to compress the operand type, we use specific operands and
59 we or only with EA */
60 enum {
61 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
62 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
63 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
64 #ifdef TCC_TARGET_X86_64
65 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
66 #endif
67 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
68 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
69 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
70 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
71 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
72 OPT_SEG,
73 OPT_ST,
74 #ifdef TCC_TARGET_X86_64
75 OPT_REG8_LOW, /* %spl,%bpl,%sil,%dil, encoded like ah,ch,dh,bh, but
76 with REX prefix, not used in insn templates */
77 #endif
78 OPT_IM8,
79 OPT_IM8S,
80 OPT_IM16,
81 OPT_IM32,
82 #ifdef TCC_TARGET_X86_64
83 OPT_IM64,
84 #endif
85 OPT_EAX, /* %al, %ax, %eax or %rax register */
86 OPT_ST0, /* %st(0) register */
87 OPT_CL, /* %cl register */
88 OPT_DX, /* %dx register */
89 OPT_ADDR, /* OP_EA with only offset */
90 OPT_INDIR, /* *(expr) */
91 /* composite types */
92 OPT_COMPOSITE_FIRST,
93 OPT_IM, /* IM8 | IM16 | IM32 */
94 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
95 OPT_REGW, /* REG16 | REG32 | REG64 */
96 OPT_IMW, /* IM16 | IM32 */
97 OPT_MMXSSE, /* MMX | SSE */
98 OPT_DISP, /* Like OPT_ADDR, but emitted as displacement (for jumps) */
99 OPT_DISP8, /* Like OPT_ADDR, but only 8bit (short jumps) */
100 /* can be ored with any OPT_xxx */
101 OPT_EA = 0x80
104 #define OP_REG8 (1 << OPT_REG8)
105 #define OP_REG16 (1 << OPT_REG16)
106 #define OP_REG32 (1 << OPT_REG32)
107 #define OP_MMX (1 << OPT_MMX)
108 #define OP_SSE (1 << OPT_SSE)
109 #define OP_CR (1 << OPT_CR)
110 #define OP_TR (1 << OPT_TR)
111 #define OP_DB (1 << OPT_DB)
112 #define OP_SEG (1 << OPT_SEG)
113 #define OP_ST (1 << OPT_ST)
114 #define OP_IM8 (1 << OPT_IM8)
115 #define OP_IM8S (1 << OPT_IM8S)
116 #define OP_IM16 (1 << OPT_IM16)
117 #define OP_IM32 (1 << OPT_IM32)
118 #define OP_EAX (1 << OPT_EAX)
119 #define OP_ST0 (1 << OPT_ST0)
120 #define OP_CL (1 << OPT_CL)
121 #define OP_DX (1 << OPT_DX)
122 #define OP_ADDR (1 << OPT_ADDR)
123 #define OP_INDIR (1 << OPT_INDIR)
124 #ifdef TCC_TARGET_X86_64
125 # define OP_REG64 (1 << OPT_REG64)
126 # define OP_REG8_LOW (1 << OPT_REG8_LOW)
127 # define OP_IM64 (1 << OPT_IM64)
128 # define OP_EA32 (OP_EA << 1)
129 #else
130 # define OP_REG64 0
131 # define OP_REG8_LOW 0
132 # define OP_IM64 0
133 # define OP_EA32 0
134 #endif
136 #define OP_EA 0x40000000
137 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
139 #ifdef TCC_TARGET_X86_64
140 # define TREG_XAX TREG_RAX
141 # define TREG_XCX TREG_RCX
142 # define TREG_XDX TREG_RDX
143 #else
144 # define TREG_XAX TREG_EAX
145 # define TREG_XCX TREG_ECX
146 # define TREG_XDX TREG_EDX
147 #endif
149 typedef struct ASMInstr {
150 uint16_t sym;
151 uint16_t opcode;
152 uint16_t instr_type;
153 uint8_t nb_ops;
154 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
155 } ASMInstr;
157 typedef struct Operand {
158 uint32_t type;
159 int8_t reg; /* register, -1 if none */
160 int8_t reg2; /* second register, -1 if none */
161 uint8_t shift;
162 ExprValue e;
163 } Operand;
165 static const uint8_t reg_to_size[9] = {
167 [OP_REG8] = 0,
168 [OP_REG16] = 1,
169 [OP_REG32] = 2,
170 #ifdef TCC_TARGET_X86_64
171 [OP_REG64] = 3,
172 #endif
174 0, 0, 1, 0, 2, 0, 0, 0, 3
177 #define NB_TEST_OPCODES 30
179 static const uint8_t test_bits[NB_TEST_OPCODES] = {
180 0x00, /* o */
181 0x01, /* no */
182 0x02, /* b */
183 0x02, /* c */
184 0x02, /* nae */
185 0x03, /* nb */
186 0x03, /* nc */
187 0x03, /* ae */
188 0x04, /* e */
189 0x04, /* z */
190 0x05, /* ne */
191 0x05, /* nz */
192 0x06, /* be */
193 0x06, /* na */
194 0x07, /* nbe */
195 0x07, /* a */
196 0x08, /* s */
197 0x09, /* ns */
198 0x0a, /* p */
199 0x0a, /* pe */
200 0x0b, /* np */
201 0x0b, /* po */
202 0x0c, /* l */
203 0x0c, /* nge */
204 0x0d, /* nl */
205 0x0d, /* ge */
206 0x0e, /* le */
207 0x0e, /* ng */
208 0x0f, /* nle */
209 0x0f, /* g */
212 static const uint8_t segment_prefixes[] = {
213 0x26, /* es */
214 0x2e, /* cs */
215 0x36, /* ss */
216 0x3e, /* ds */
217 0x64, /* fs */
218 0x65 /* gs */
221 static const ASMInstr asm_instrs[] = {
222 #define ALT(x) x
223 /* This removes a 0x0f in the second byte */
224 #define O(o) ((uint64_t) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o)))
225 /* This constructs instr_type from opcode, type and group. */
226 #define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0))
227 #define DEF_ASM_OP0(name, opcode)
228 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0, { 0 } },
229 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }},
230 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }},
231 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }},
232 #ifdef TCC_TARGET_X86_64
233 # include "x86_64-asm.h"
234 #else
235 # include "i386-asm.h"
236 #endif
237 /* last operation */
238 { 0, },
241 static const uint16_t op0_codes[] = {
242 #define ALT(x)
243 #define DEF_ASM_OP0(x, opcode) opcode,
244 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
245 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
246 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
247 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
248 #ifdef TCC_TARGET_X86_64
249 # include "x86_64-asm.h"
250 #else
251 # include "i386-asm.h"
252 #endif
255 static inline int get_reg_shift(TCCState *s1)
257 int shift, v;
258 v = asm_int_expr(s1);
259 switch(v) {
260 case 1:
261 shift = 0;
262 break;
263 case 2:
264 shift = 1;
265 break;
266 case 4:
267 shift = 2;
268 break;
269 case 8:
270 shift = 3;
271 break;
272 default:
273 expect("1, 2, 4 or 8 constant");
274 shift = 0;
275 break;
277 return shift;
280 #ifdef TCC_TARGET_X86_64
281 static int asm_parse_numeric_reg(int t, unsigned int *type)
283 int reg = -1;
284 if (t >= TOK_IDENT && t < tok_ident) {
285 const char *s = table_ident[t - TOK_IDENT]->str;
286 char c;
287 *type = OP_REG64;
288 if (*s == 'c') {
289 s++;
290 *type = OP_CR;
292 if (*s++ != 'r')
293 return -1;
294 /* Don't allow leading '0'. */
295 if ((c = *s++) >= '1' && c <= '9')
296 reg = c - '0';
297 else
298 return -1;
299 if ((c = *s) >= '0' && c <= '5')
300 s++, reg = reg * 10 + c - '0';
301 if (reg > 15)
302 return -1;
303 if ((c = *s) == 0)
305 else if (*type != OP_REG64)
306 return -1;
307 else if (c == 'b' && !s[1])
308 *type = OP_REG8;
309 else if (c == 'w' && !s[1])
310 *type = OP_REG16;
311 else if (c == 'd' && !s[1])
312 *type = OP_REG32;
313 else
314 return -1;
316 return reg;
318 #endif
320 static int asm_parse_reg(unsigned int *type)
322 int reg = 0;
323 *type = 0;
324 if (tok != '%')
325 goto error_32;
326 next();
327 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
328 reg = tok - TOK_ASM_eax;
329 *type = OP_REG32;
330 #ifdef TCC_TARGET_X86_64
331 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
332 reg = tok - TOK_ASM_rax;
333 *type = OP_REG64;
334 } else if (tok == TOK_ASM_rip) {
335 reg = -2; /* Probably should use different escape code. */
336 *type = OP_REG64;
337 } else if ((reg = asm_parse_numeric_reg(tok, type)) >= 0
338 && (*type == OP_REG32 || *type == OP_REG64)) {
340 #endif
341 } else {
342 error_32:
343 expect("register");
345 next();
346 return reg;
349 static void parse_operand(TCCState *s1, Operand *op)
351 ExprValue e;
352 int reg, indir;
353 const char *p;
355 indir = 0;
356 if (tok == '*') {
357 next();
358 indir = OP_INDIR;
361 if (tok == '%') {
362 next();
363 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
364 reg = tok - TOK_ASM_al;
365 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
366 op->reg = reg & 7;
367 if ((op->type & OP_REG) && op->reg == TREG_XAX)
368 op->type |= OP_EAX;
369 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
370 op->type |= OP_CL;
371 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
372 op->type |= OP_DX;
373 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
374 op->type = OP_DB;
375 op->reg = tok - TOK_ASM_dr0;
376 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
377 op->type = OP_SEG;
378 op->reg = tok - TOK_ASM_es;
379 } else if (tok == TOK_ASM_st) {
380 op->type = OP_ST;
381 op->reg = 0;
382 next();
383 if (tok == '(') {
384 next();
385 if (tok != TOK_PPNUM)
386 goto reg_error;
387 p = tokc.str.data;
388 reg = p[0] - '0';
389 if ((unsigned)reg >= 8 || p[1] != '\0')
390 goto reg_error;
391 op->reg = reg;
392 next();
393 skip(')');
395 if (op->reg == 0)
396 op->type |= OP_ST0;
397 goto no_skip;
398 #ifdef TCC_TARGET_X86_64
399 } else if (tok >= TOK_ASM_spl && tok <= TOK_ASM_dil) {
400 op->type = OP_REG8 | OP_REG8_LOW;
401 op->reg = 4 + tok - TOK_ASM_spl;
402 } else if ((op->reg = asm_parse_numeric_reg(tok, &op->type)) >= 0) {
404 #endif
405 } else {
406 reg_error:
407 tcc_error("unknown register %%%s", get_tok_str(tok, &tokc));
409 next();
410 no_skip: ;
411 } else if (tok == '$') {
412 /* constant value */
413 next();
414 asm_expr(s1, &e);
415 op->type = OP_IM32;
416 op->e = e;
417 if (!op->e.sym) {
418 if (op->e.v == (uint8_t)op->e.v)
419 op->type |= OP_IM8;
420 if (op->e.v == (int8_t)op->e.v)
421 op->type |= OP_IM8S;
422 if (op->e.v == (uint16_t)op->e.v)
423 op->type |= OP_IM16;
424 #ifdef TCC_TARGET_X86_64
425 if (op->e.v != (int32_t)op->e.v && op->e.v != (uint32_t)op->e.v)
426 op->type = OP_IM64;
427 #endif
429 } else {
430 /* address(reg,reg2,shift) with all variants */
431 op->type = OP_EA;
432 op->reg = -1;
433 op->reg2 = -1;
434 op->shift = 0;
435 if (tok != '(') {
436 asm_expr(s1, &e);
437 op->e = e;
438 } else {
439 next();
440 if (tok == '%') {
441 unget_tok('(');
442 op->e.v = 0;
443 op->e.sym = NULL;
444 } else {
445 /* bracketed offset expression */
446 asm_expr(s1, &e);
447 if (tok != ')')
448 expect(")");
449 next();
450 op->e.v = e.v;
451 op->e.sym = e.sym;
453 op->e.pcrel = 0;
455 if (tok == '(') {
456 unsigned int type = 0;
457 next();
458 if (tok != ',') {
459 op->reg = asm_parse_reg(&type);
461 if (tok == ',') {
462 next();
463 if (tok != ',') {
464 op->reg2 = asm_parse_reg(&type);
466 if (tok == ',') {
467 next();
468 op->shift = get_reg_shift(s1);
471 if (type & OP_REG32)
472 op->type |= OP_EA32;
473 skip(')');
475 if (op->reg == -1 && op->reg2 == -1)
476 op->type |= OP_ADDR;
478 op->type |= indir;
481 /* XXX: unify with C code output ? */
482 ST_FUNC void gen_expr32(ExprValue *pe)
484 if (pe->pcrel)
485 /* If PC-relative, always set VT_SYM, even without symbol,
486 so as to force a relocation to be emitted. */
487 gen_addrpc32(VT_SYM, pe->sym, pe->v);
488 else
489 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
492 #ifdef TCC_TARGET_X86_64
493 ST_FUNC void gen_expr64(ExprValue *pe)
495 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
497 #endif
499 /* XXX: unify with C code output ? */
500 static void gen_disp32(ExprValue *pe)
502 Sym *sym = pe->sym;
503 if (sym && sym->r == cur_text_section->sh_num) {
504 /* same section: we can output an absolute value. Note
505 that the TCC compiler behaves differently here because
506 it always outputs a relocation to ease (future) code
507 elimination in the linker */
508 gen_le32(pe->v + sym->jnext - ind - 4);
509 } else {
510 if (sym && sym->type.t == VT_VOID) {
511 sym->type.t = VT_FUNC;
512 sym->type.ref = NULL;
514 gen_addrpc32(VT_SYM, sym, pe->v);
518 /* generate the modrm operand */
519 static inline int asm_modrm(int reg, Operand *op)
521 int mod, reg1, reg2, sib_reg1;
523 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
524 g(0xc0 + (reg << 3) + op->reg);
525 } else if (op->reg == -1 && op->reg2 == -1) {
526 /* displacement only */
527 #ifdef TCC_TARGET_X86_64
528 g(0x04 + (reg << 3));
529 g(0x25);
530 #else
531 g(0x05 + (reg << 3));
532 #endif
533 gen_expr32(&op->e);
534 #ifdef TCC_TARGET_X86_64
535 } else if (op->reg == -2) {
536 ExprValue *pe = &op->e;
537 g(0x05 + (reg << 3));
538 gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
539 return ind;
540 #endif
541 } else {
542 sib_reg1 = op->reg;
543 /* fist compute displacement encoding */
544 if (sib_reg1 == -1) {
545 sib_reg1 = 5;
546 mod = 0x00;
547 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
548 mod = 0x00;
549 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
550 mod = 0x40;
551 } else {
552 mod = 0x80;
554 /* compute if sib byte needed */
555 reg1 = op->reg;
556 if (op->reg2 != -1)
557 reg1 = 4;
558 g(mod + (reg << 3) + reg1);
559 if (reg1 == 4) {
560 /* add sib byte */
561 reg2 = op->reg2;
562 if (reg2 == -1)
563 reg2 = 4; /* indicate no index */
564 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
566 /* add offset */
567 if (mod == 0x40) {
568 g(op->e.v);
569 } else if (mod == 0x80 || op->reg == -1) {
570 gen_expr32(&op->e);
573 return 0;
576 #ifdef TCC_TARGET_X86_64
577 #define REX_W 0x48
578 #define REX_R 0x44
579 #define REX_X 0x42
580 #define REX_B 0x41
582 static void asm_rex(int width64, Operand *ops, int nb_ops, int *op_type,
583 int regi, int rmi)
585 unsigned char rex = width64 ? 0x48 : 0;
586 int saw_high_8bit = 0;
587 int i;
588 if (rmi == -1) {
589 /* No mod/rm byte, but we might have a register op nevertheless
590 (we will add it to the opcode later). */
591 for(i = 0; i < nb_ops; i++) {
592 if (op_type[i] & (OP_REG | OP_ST)) {
593 if (ops[i].reg >= 8) {
594 rex |= REX_B;
595 ops[i].reg -= 8;
596 } else if (ops[i].type & OP_REG8_LOW)
597 rex |= 0x40;
598 else if (ops[i].type & OP_REG8 && ops[i].reg >= 4)
599 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
600 saw_high_8bit = ops[i].reg;
601 break;
604 } else {
605 if (regi != -1) {
606 if (ops[regi].reg >= 8) {
607 rex |= REX_R;
608 ops[regi].reg -= 8;
609 } else if (ops[regi].type & OP_REG8_LOW)
610 rex |= 0x40;
611 else if (ops[regi].type & OP_REG8 && ops[regi].reg >= 4)
612 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
613 saw_high_8bit = ops[regi].reg;
615 if (ops[rmi].type & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_EA)) {
616 if (ops[rmi].reg >= 8) {
617 rex |= REX_B;
618 ops[rmi].reg -= 8;
619 } else if (ops[rmi].type & OP_REG8_LOW)
620 rex |= 0x40;
621 else if (ops[rmi].type & OP_REG8 && ops[rmi].reg >= 4)
622 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
623 saw_high_8bit = ops[rmi].reg;
625 if (ops[rmi].type & OP_EA && ops[rmi].reg2 >= 8) {
626 rex |= REX_X;
627 ops[rmi].reg2 -= 8;
630 if (rex) {
631 if (saw_high_8bit)
632 tcc_error("can't encode register %%%ch when REX prefix is required",
633 "acdb"[saw_high_8bit-4]);
634 g(rex);
637 #endif
639 static void maybe_print_stats (void)
641 static int already = 1;
642 if (!already)
643 /* print stats about opcodes */
645 const struct ASMInstr *pa;
646 int freq[4];
647 int op_vals[500];
648 int nb_op_vals, i, j;
650 already = 1;
651 nb_op_vals = 0;
652 memset(freq, 0, sizeof(freq));
653 for(pa = asm_instrs; pa->sym != 0; pa++) {
654 freq[pa->nb_ops]++;
655 //for(i=0;i<pa->nb_ops;i++) {
656 for(j=0;j<nb_op_vals;j++) {
657 //if (pa->op_type[i] == op_vals[j])
658 if (pa->instr_type == op_vals[j])
659 goto found;
661 //op_vals[nb_op_vals++] = pa->op_type[i];
662 op_vals[nb_op_vals++] = pa->instr_type;
663 found: ;
666 for(i=0;i<nb_op_vals;i++) {
667 int v = op_vals[i];
668 //if ((v & (v - 1)) != 0)
669 printf("%3d: %08x\n", i, v);
671 printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n",
672 (int)sizeof(asm_instrs),
673 (int)sizeof(asm_instrs) / (int)sizeof(ASMInstr),
674 freq[0], freq[1], freq[2], freq[3]);
678 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
680 const ASMInstr *pa;
681 int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc;
682 int nb_ops, s;
683 Operand ops[MAX_OPERANDS], *pop;
684 int op_type[3]; /* decoded op type */
685 int alltypes; /* OR of all operand types */
686 int autosize;
687 int p66;
688 #ifdef TCC_TARGET_X86_64
689 int rex64;
690 #endif
692 maybe_print_stats();
693 /* force synthetic ';' after prefix instruction, so we can handle */
694 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
695 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
696 unget_tok(';');
698 /* get operands */
699 pop = ops;
700 nb_ops = 0;
701 seg_prefix = 0;
702 alltypes = 0;
703 for(;;) {
704 if (tok == ';' || tok == TOK_LINEFEED)
705 break;
706 if (nb_ops >= MAX_OPERANDS) {
707 tcc_error("incorrect number of operands");
709 parse_operand(s1, pop);
710 if (tok == ':') {
711 if (pop->type != OP_SEG || seg_prefix)
712 tcc_error("incorrect prefix");
713 seg_prefix = segment_prefixes[pop->reg];
714 next();
715 parse_operand(s1, pop);
716 if (!(pop->type & OP_EA)) {
717 tcc_error("segment prefix must be followed by memory reference");
720 pop++;
721 nb_ops++;
722 if (tok != ',')
723 break;
724 next();
727 s = 0; /* avoid warning */
729 /* optimize matching by using a lookup table (no hashing is needed
730 !) */
731 for(pa = asm_instrs; pa->sym != 0; pa++) {
732 int it = pa->instr_type & OPCT_MASK;
733 s = 0;
734 if (it == OPC_FARITH) {
735 v = opcode - pa->sym;
736 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
737 continue;
738 } else if (it == OPC_ARITH) {
739 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
740 continue;
741 s = (opcode - pa->sym) % NBWLX;
742 if ((pa->instr_type & OPC_BWLX) == OPC_WLX)
744 /* We need to reject the xxxb opcodes that we accepted above.
745 Note that pa->sym for WLX opcodes is the 'w' token,
746 to get the 'b' token subtract one. */
747 if (((opcode - pa->sym + 1) % NBWLX) == 0)
748 continue;
749 s++;
751 } else if (it == OPC_SHIFT) {
752 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
753 continue;
754 s = (opcode - pa->sym) % NBWLX;
755 } else if (it == OPC_TEST) {
756 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
757 continue;
758 /* cmovxx is a test opcode but accepts multiple sizes.
759 TCC doesn't accept the suffixed mnemonic, instead we
760 simply force size autodetection always. */
761 if (pa->instr_type & OPC_WLX)
762 s = NBWLX - 1;
763 } else if (pa->instr_type & OPC_B) {
764 #ifdef TCC_TARGET_X86_64
765 /* Some instructions don't have the full size but only
766 bwl form. insb e.g. */
767 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
768 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
769 continue;
770 #endif
771 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
772 continue;
773 s = opcode - pa->sym;
774 } else if (pa->instr_type & OPC_WLX) {
775 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
776 continue;
777 s = opcode - pa->sym + 1;
778 } else {
779 if (pa->sym != opcode)
780 continue;
782 if (pa->nb_ops != nb_ops)
783 continue;
784 #ifdef TCC_TARGET_X86_64
785 /* Special case for moves. Selecting the IM64->REG64 form
786 should only be done if we really have an >32bit imm64, and that
787 is hardcoded. Ignore it here. */
788 if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
789 && (ops[1].type & OP_REG) == OP_REG64
790 && !(pa->instr_type & OPC_0F))
791 continue;
792 #endif
793 /* now decode and check each operand */
794 alltypes = 0;
795 for(i = 0; i < nb_ops; i++) {
796 int op1, op2;
797 op1 = pa->op_type[i];
798 op2 = op1 & 0x1f;
799 switch(op2) {
800 case OPT_IM:
801 v = OP_IM8 | OP_IM16 | OP_IM32;
802 break;
803 case OPT_REG:
804 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
805 break;
806 case OPT_REGW:
807 v = OP_REG16 | OP_REG32 | OP_REG64;
808 break;
809 case OPT_IMW:
810 v = OP_IM16 | OP_IM32;
811 break;
812 case OPT_MMXSSE:
813 v = OP_MMX | OP_SSE;
814 break;
815 case OPT_DISP:
816 case OPT_DISP8:
817 v = OP_ADDR;
818 break;
819 default:
820 v = 1 << op2;
821 break;
823 if (op1 & OPT_EA)
824 v |= OP_EA;
825 op_type[i] = v;
826 if ((ops[i].type & v) == 0)
827 goto next;
828 alltypes |= ops[i].type;
830 /* all is matching ! */
831 break;
832 next: ;
834 if (pa->sym == 0) {
835 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
836 int b;
837 b = op0_codes[opcode - TOK_ASM_first];
838 if (b & 0xff00)
839 g(b >> 8);
840 g(b);
841 return;
842 } else if (opcode <= TOK_ASM_alllast) {
843 tcc_error("bad operand with opcode '%s'",
844 get_tok_str(opcode, NULL));
845 } else {
846 tcc_error("unknown opcode '%s'",
847 get_tok_str(opcode, NULL));
850 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
851 autosize = NBWLX-1;
852 #ifdef TCC_TARGET_X86_64
853 /* XXX the autosize should rather be zero, to not have to adjust this
854 all the time. */
855 if ((pa->instr_type & OPC_BWLQ) == OPC_B)
856 autosize = NBWLX-2;
857 #endif
858 if (s == autosize) {
859 /* Check for register operands providing hints about the size.
860 Start from the end, i.e. destination operands. This matters
861 only for opcodes accepting different sized registers, lar and lsl
862 are such opcodes. */
863 for(i = nb_ops - 1; s == autosize && i >= 0; i--) {
864 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
865 s = reg_to_size[ops[i].type & OP_REG];
867 if (s == autosize) {
868 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
869 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
870 s = 2;
871 else if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
872 (ops[0].type & OP_EA))
873 s = NBWLX - 2;
874 else
875 tcc_error("cannot infer opcode suffix");
879 #ifdef TCC_TARGET_X86_64
880 /* Generate addr32 prefix if needed */
881 for(i = 0; i < nb_ops; i++) {
882 if (ops[i].type & OP_EA32) {
883 g(0x67);
884 break;
887 #endif
888 /* generate data16 prefix if needed */
889 p66 = 0;
890 if (s == 1)
891 p66 = 1;
892 else {
893 /* accepting mmx+sse in all operands --> needs 0x66 to
894 switch to sse mode. Accepting only sse in an operand --> is
895 already SSE insn and needs 0x66/f2/f3 handling. */
896 for (i = 0; i < nb_ops; i++)
897 if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
898 && ops[i].type & OP_SSE)
899 p66 = 1;
901 if (p66)
902 g(0x66);
903 #ifdef TCC_TARGET_X86_64
904 rex64 = 0;
905 if (pa->instr_type & OPC_48)
906 rex64 = 1;
907 else if (s == 3 || (alltypes & OP_REG64)) {
908 /* generate REX prefix */
909 int default64 = 0;
910 for(i = 0; i < nb_ops; i++) {
911 if (op_type[i] == OP_REG64 && pa->opcode != 0xb8) {
912 /* If only 64bit regs are accepted in one operand
913 this is a default64 instruction without need for
914 REX prefixes, except for movabs(0xb8). */
915 default64 = 1;
916 break;
919 /* XXX find better encoding for the default64 instructions. */
920 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
921 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
922 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
923 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
924 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
925 && !default64)
926 rex64 = 1;
928 #endif
930 /* now generates the operation */
931 if (OPCT_IS(pa->instr_type, OPC_FWAIT))
932 g(0x9b);
933 if (seg_prefix)
934 g(seg_prefix);
936 v = pa->opcode;
937 if (pa->instr_type & OPC_0F)
938 v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff);
939 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
940 /* kludge for imul $im, %reg */
941 nb_ops = 3;
942 ops[2] = ops[1];
943 op_type[2] = op_type[1];
944 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
945 v--; /* int $3 case */
946 nb_ops = 0;
947 } else if ((v == 0x06 || v == 0x07)) {
948 if (ops[0].reg >= 4) {
949 /* push/pop %fs or %gs */
950 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
951 } else {
952 v += ops[0].reg << 3;
954 nb_ops = 0;
955 } else if (v <= 0x05) {
956 /* arith case */
957 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
958 } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) {
959 /* fpu arith case */
960 v += ((opcode - pa->sym) / 6) << 3;
963 /* search which operand will be used for modrm */
964 modrm_index = -1;
965 modreg_index = -1;
966 if (pa->instr_type & OPC_MODRM) {
967 if (!nb_ops) {
968 /* A modrm opcode without operands is a special case (e.g. mfence).
969 It has a group and acts as if there's an register operand 0
970 (ax). */
971 i = 0;
972 ops[i].type = OP_REG;
973 ops[i].reg = 0;
974 goto modrm_found;
976 /* first look for an ea operand */
977 for(i = 0;i < nb_ops; i++) {
978 if (op_type[i] & OP_EA)
979 goto modrm_found;
981 /* then if not found, a register or indirection (shift instructions) */
982 for(i = 0;i < nb_ops; i++) {
983 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
984 goto modrm_found;
986 #ifdef ASM_DEBUG
987 tcc_error("bad op table");
988 #endif
989 modrm_found:
990 modrm_index = i;
991 /* if a register is used in another operand then it is
992 used instead of group */
993 for(i = 0;i < nb_ops; i++) {
994 int t = op_type[i];
995 if (i != modrm_index &&
996 (t & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
997 modreg_index = i;
998 break;
1002 #ifdef TCC_TARGET_X86_64
1003 asm_rex (rex64, ops, nb_ops, op_type, modreg_index, modrm_index);
1004 #endif
1006 if (pa->instr_type & OPC_REG) {
1007 /* mov $im, %reg case */
1008 if (v == 0xb0 && s >= 1)
1009 v += 7;
1010 for(i = 0; i < nb_ops; i++) {
1011 if (op_type[i] & (OP_REG | OP_ST)) {
1012 v += ops[i].reg;
1013 break;
1017 if (pa->instr_type & OPC_B)
1018 v += s >= 1;
1019 if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) {
1020 Sym *sym;
1021 int jmp_disp;
1023 /* see if we can really generate the jump with a byte offset */
1024 sym = ops[0].e.sym;
1025 if (!sym)
1026 goto no_short_jump;
1027 if (sym->r != cur_text_section->sh_num)
1028 goto no_short_jump;
1029 jmp_disp = ops[0].e.v + sym->jnext - ind - 2 - (v >= 0xff);
1030 if (jmp_disp == (int8_t)jmp_disp) {
1031 /* OK to generate jump */
1032 ops[0].e.sym = 0;
1033 ops[0].e.v = jmp_disp;
1034 op_type[0] = OP_IM8S;
1035 } else {
1036 no_short_jump:
1037 /* long jump will be allowed. need to modify the
1038 opcode slightly */
1039 if (v == 0xeb) /* jmp */
1040 v = 0xe9;
1041 else if (v == 0x70) /* jcc */
1042 v += 0x0f10;
1043 else
1044 tcc_error("invalid displacement");
1047 if (OPCT_IS(pa->instr_type, OPC_TEST))
1048 v += test_bits[opcode - pa->sym];
1049 op1 = v >> 16;
1050 if (op1)
1051 g(op1);
1052 op1 = (v >> 8) & 0xff;
1053 if (op1)
1054 g(op1);
1055 g(v);
1057 if (OPCT_IS(pa->instr_type, OPC_SHIFT)) {
1058 reg = (opcode - pa->sym) / NBWLX;
1059 if (reg == 6)
1060 reg = 7;
1061 } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) {
1062 reg = (opcode - pa->sym) / NBWLX;
1063 } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) {
1064 reg = (opcode - pa->sym) / 6;
1065 } else {
1066 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
1069 pc = 0;
1070 if (pa->instr_type & OPC_MODRM) {
1071 /* if a register is used in another operand then it is
1072 used instead of group */
1073 if (modreg_index >= 0)
1074 reg = ops[modreg_index].reg;
1075 pc = asm_modrm(reg, &ops[modrm_index]);
1078 /* emit constants */
1079 #ifndef TCC_TARGET_X86_64
1080 if (!(pa->instr_type & OPC_0F)
1081 && (pa->opcode == 0x9a || pa->opcode == 0xea)) {
1082 /* ljmp or lcall kludge */
1083 gen_expr32(&ops[1].e);
1084 if (ops[0].e.sym)
1085 tcc_error("cannot relocate");
1086 gen_le16(ops[0].e.v);
1087 return;
1089 #endif
1090 for(i = 0;i < nb_ops; i++) {
1091 v = op_type[i];
1092 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
1093 /* if multiple sizes are given it means we must look
1094 at the op size */
1095 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
1096 if (s == 0)
1097 v = OP_IM8;
1098 else if (s == 1)
1099 v = OP_IM16;
1100 else if (s == 2 || (v & OP_IM64) == 0)
1101 v = OP_IM32;
1102 else
1103 v = OP_IM64;
1106 if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym)
1107 tcc_error("cannot relocate");
1109 if (v & (OP_IM8 | OP_IM8S)) {
1110 g(ops[i].e.v);
1111 } else if (v & OP_IM16) {
1112 gen_le16(ops[i].e.v);
1113 #ifdef TCC_TARGET_X86_64
1114 } else if (v & OP_IM64) {
1115 gen_expr64(&ops[i].e);
1116 #endif
1117 } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) {
1118 gen_disp32(&ops[i].e);
1119 } else {
1120 gen_expr32(&ops[i].e);
1125 /* after immediate operands, adjust pc-relative address */
1126 if (pc)
1127 add32le(cur_text_section->data + pc - 4, pc - ind);
1130 /* return the constraint priority (we allocate first the lowest
1131 numbered constraints) */
1132 static inline int constraint_priority(const char *str)
1134 int priority, c, pr;
1136 /* we take the lowest priority */
1137 priority = 0;
1138 for(;;) {
1139 c = *str;
1140 if (c == '\0')
1141 break;
1142 str++;
1143 switch(c) {
1144 case 'A':
1145 pr = 0;
1146 break;
1147 case 'a':
1148 case 'b':
1149 case 'c':
1150 case 'd':
1151 case 'S':
1152 case 'D':
1153 pr = 1;
1154 break;
1155 case 'q':
1156 pr = 2;
1157 break;
1158 case 'r':
1159 case 'R':
1160 case 'p':
1161 pr = 3;
1162 break;
1163 case 'N':
1164 case 'M':
1165 case 'I':
1166 case 'e':
1167 case 'i':
1168 case 'm':
1169 case 'g':
1170 pr = 4;
1171 break;
1172 default:
1173 tcc_error("unknown constraint '%c'", c);
1174 pr = 0;
1176 if (pr > priority)
1177 priority = pr;
1179 return priority;
1182 static const char *skip_constraint_modifiers(const char *p)
1184 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
1185 p++;
1186 return p;
1189 /* If T (a token) is of the form "%reg" returns the register
1190 number and type, otherwise return -1. */
1191 ST_FUNC int asm_parse_regvar (int t)
1193 const char *s;
1194 Operand op;
1195 if (t < TOK_IDENT)
1196 return -1;
1197 s = table_ident[t - TOK_IDENT]->str;
1198 if (s[0] != '%')
1199 return -1;
1200 t = tok_alloc(s+1, strlen(s)-1)->tok;
1201 unget_tok(t);
1202 unget_tok('%');
1203 parse_operand(tcc_state, &op);
1204 /* Accept only integer regs for now. */
1205 if (op.type & OP_REG)
1206 return op.reg;
1207 else
1208 return -1;
1211 #define REG_OUT_MASK 0x01
1212 #define REG_IN_MASK 0x02
1214 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
1216 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
1217 int nb_operands, int nb_outputs,
1218 const uint8_t *clobber_regs,
1219 int *pout_reg)
1221 ASMOperand *op;
1222 int sorted_op[MAX_ASM_OPERANDS];
1223 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
1224 const char *str;
1225 uint8_t regs_allocated[NB_ASM_REGS];
1227 /* init fields */
1228 for(i=0;i<nb_operands;i++) {
1229 op = &operands[i];
1230 op->input_index = -1;
1231 op->ref_index = -1;
1232 op->reg = -1;
1233 op->is_memory = 0;
1234 op->is_rw = 0;
1236 /* compute constraint priority and evaluate references to output
1237 constraints if input constraints */
1238 for(i=0;i<nb_operands;i++) {
1239 op = &operands[i];
1240 str = op->constraint;
1241 str = skip_constraint_modifiers(str);
1242 if (isnum(*str) || *str == '[') {
1243 /* this is a reference to another constraint */
1244 k = find_constraint(operands, nb_operands, str, NULL);
1245 if ((unsigned)k >= i || i < nb_outputs)
1246 tcc_error("invalid reference in constraint %d ('%s')",
1247 i, str);
1248 op->ref_index = k;
1249 if (operands[k].input_index >= 0)
1250 tcc_error("cannot reference twice the same operand");
1251 operands[k].input_index = i;
1252 op->priority = 5;
1253 } else if ((op->vt->r & VT_VALMASK) == VT_LOCAL
1254 && op->vt->sym
1255 && (reg = op->vt->sym->r & VT_VALMASK) < VT_CONST) {
1256 op->priority = 1;
1257 op->reg = reg;
1258 } else {
1259 op->priority = constraint_priority(str);
1263 /* sort operands according to their priority */
1264 for(i=0;i<nb_operands;i++)
1265 sorted_op[i] = i;
1266 for(i=0;i<nb_operands - 1;i++) {
1267 for(j=i+1;j<nb_operands;j++) {
1268 p1 = operands[sorted_op[i]].priority;
1269 p2 = operands[sorted_op[j]].priority;
1270 if (p2 < p1) {
1271 tmp = sorted_op[i];
1272 sorted_op[i] = sorted_op[j];
1273 sorted_op[j] = tmp;
1278 for(i = 0;i < NB_ASM_REGS; i++) {
1279 if (clobber_regs[i])
1280 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
1281 else
1282 regs_allocated[i] = 0;
1284 /* esp cannot be used */
1285 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1286 /* ebp cannot be used yet */
1287 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1289 /* allocate registers and generate corresponding asm moves */
1290 for(i=0;i<nb_operands;i++) {
1291 j = sorted_op[i];
1292 op = &operands[j];
1293 str = op->constraint;
1294 /* no need to allocate references */
1295 if (op->ref_index >= 0)
1296 continue;
1297 /* select if register is used for output, input or both */
1298 if (op->input_index >= 0) {
1299 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1300 } else if (j < nb_outputs) {
1301 reg_mask = REG_OUT_MASK;
1302 } else {
1303 reg_mask = REG_IN_MASK;
1305 if (op->reg >= 0) {
1306 if (is_reg_allocated(op->reg))
1307 tcc_error("asm regvar requests register that's taken already");
1308 reg = op->reg;
1309 goto reg_found;
1311 try_next:
1312 c = *str++;
1313 switch(c) {
1314 case '=':
1315 goto try_next;
1316 case '+':
1317 op->is_rw = 1;
1318 /* FALL THRU */
1319 case '&':
1320 if (j >= nb_outputs)
1321 tcc_error("'%c' modifier can only be applied to outputs", c);
1322 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1323 goto try_next;
1324 case 'A':
1325 /* allocate both eax and edx */
1326 if (is_reg_allocated(TREG_XAX) ||
1327 is_reg_allocated(TREG_XDX))
1328 goto try_next;
1329 op->is_llong = 1;
1330 op->reg = TREG_XAX;
1331 regs_allocated[TREG_XAX] |= reg_mask;
1332 regs_allocated[TREG_XDX] |= reg_mask;
1333 break;
1334 case 'a':
1335 reg = TREG_XAX;
1336 goto alloc_reg;
1337 case 'b':
1338 reg = 3;
1339 goto alloc_reg;
1340 case 'c':
1341 reg = TREG_XCX;
1342 goto alloc_reg;
1343 case 'd':
1344 reg = TREG_XDX;
1345 goto alloc_reg;
1346 case 'S':
1347 reg = 6;
1348 goto alloc_reg;
1349 case 'D':
1350 reg = 7;
1351 alloc_reg:
1352 if (is_reg_allocated(reg))
1353 goto try_next;
1354 goto reg_found;
1355 case 'q':
1356 /* eax, ebx, ecx or edx */
1357 for(reg = 0; reg < 4; reg++) {
1358 if (!is_reg_allocated(reg))
1359 goto reg_found;
1361 goto try_next;
1362 case 'r':
1363 case 'R':
1364 case 'p': /* A general address, for x86(64) any register is acceptable*/
1365 /* any general register */
1366 for(reg = 0; reg < 8; reg++) {
1367 if (!is_reg_allocated(reg))
1368 goto reg_found;
1370 goto try_next;
1371 reg_found:
1372 /* now we can reload in the register */
1373 op->is_llong = 0;
1374 op->reg = reg;
1375 regs_allocated[reg] |= reg_mask;
1376 break;
1377 case 'e':
1378 case 'i':
1379 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1380 goto try_next;
1381 break;
1382 case 'I':
1383 case 'N':
1384 case 'M':
1385 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1386 goto try_next;
1387 break;
1388 case 'm':
1389 case 'g':
1390 /* nothing special to do because the operand is already in
1391 memory, except if the pointer itself is stored in a
1392 memory variable (VT_LLOCAL case) */
1393 /* XXX: fix constant case */
1394 /* if it is a reference to a memory zone, it must lie
1395 in a register, so we reserve the register in the
1396 input registers and a load will be generated
1397 later */
1398 if (j < nb_outputs || c == 'm') {
1399 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1400 /* any general register */
1401 for(reg = 0; reg < 8; reg++) {
1402 if (!(regs_allocated[reg] & REG_IN_MASK))
1403 goto reg_found1;
1405 goto try_next;
1406 reg_found1:
1407 /* now we can reload in the register */
1408 regs_allocated[reg] |= REG_IN_MASK;
1409 op->reg = reg;
1410 op->is_memory = 1;
1413 break;
1414 default:
1415 tcc_error("asm constraint %d ('%s') could not be satisfied",
1416 j, op->constraint);
1417 break;
1419 /* if a reference is present for that operand, we assign it too */
1420 if (op->input_index >= 0) {
1421 operands[op->input_index].reg = op->reg;
1422 operands[op->input_index].is_llong = op->is_llong;
1426 /* compute out_reg. It is used to store outputs registers to memory
1427 locations references by pointers (VT_LLOCAL case) */
1428 *pout_reg = -1;
1429 for(i=0;i<nb_operands;i++) {
1430 op = &operands[i];
1431 if (op->reg >= 0 &&
1432 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1433 !op->is_memory) {
1434 for(reg = 0; reg < 8; reg++) {
1435 if (!(regs_allocated[reg] & REG_OUT_MASK))
1436 goto reg_found2;
1438 tcc_error("could not find free output register for reloading");
1439 reg_found2:
1440 *pout_reg = reg;
1441 break;
1445 /* print sorted constraints */
1446 #ifdef ASM_DEBUG
1447 for(i=0;i<nb_operands;i++) {
1448 j = sorted_op[i];
1449 op = &operands[j];
1450 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1452 op->id ? get_tok_str(op->id, NULL) : "",
1453 op->constraint,
1454 op->vt->r,
1455 op->reg);
1457 if (*pout_reg >= 0)
1458 printf("out_reg=%d\n", *pout_reg);
1459 #endif
1462 ST_FUNC void subst_asm_operand(CString *add_str,
1463 SValue *sv, int modifier)
1465 int r, reg, size, val;
1466 char buf[64];
1468 r = sv->r;
1469 if ((r & VT_VALMASK) == VT_CONST) {
1470 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' &&
1471 modifier != 'P')
1472 cstr_ccat(add_str, '$');
1473 if (r & VT_SYM) {
1474 const char *name = get_tok_str(sv->sym->v, NULL);
1475 if (sv->sym->v >= SYM_FIRST_ANOM) {
1476 /* In case of anonymous symbols ("L.42", used
1477 for static data labels) we can't find them
1478 in the C symbol table when later looking up
1479 this name. So enter them now into the asm label
1480 list when we still know the symbol. */
1481 get_asm_sym(tok_alloc(name, strlen(name))->tok, sv->sym);
1483 cstr_cat(add_str, name, -1);
1484 if ((uint32_t)sv->c.i == 0)
1485 goto no_offset;
1486 cstr_ccat(add_str, '+');
1488 val = sv->c.i;
1489 if (modifier == 'n')
1490 val = -val;
1491 snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
1492 cstr_cat(add_str, buf, -1);
1493 no_offset:;
1494 #ifdef TCC_TARGET_X86_64
1495 if (r & VT_LVAL)
1496 cstr_cat(add_str, "(%rip)", -1);
1497 #endif
1498 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1499 #ifdef TCC_TARGET_X86_64
1500 snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i);
1501 #else
1502 snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
1503 #endif
1504 cstr_cat(add_str, buf, -1);
1505 } else if (r & VT_LVAL) {
1506 reg = r & VT_VALMASK;
1507 if (reg >= VT_CONST)
1508 tcc_error("internal compiler error");
1509 snprintf(buf, sizeof(buf), "(%%%s)",
1510 #ifdef TCC_TARGET_X86_64
1511 get_tok_str(TOK_ASM_rax + reg, NULL)
1512 #else
1513 get_tok_str(TOK_ASM_eax + reg, NULL)
1514 #endif
1516 cstr_cat(add_str, buf, -1);
1517 } else {
1518 /* register case */
1519 reg = r & VT_VALMASK;
1520 if (reg >= VT_CONST)
1521 tcc_error("internal compiler error");
1523 /* choose register operand size */
1524 if ((sv->type.t & VT_BTYPE) == VT_BYTE ||
1525 (sv->type.t & VT_BTYPE) == VT_BOOL)
1526 size = 1;
1527 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1528 size = 2;
1529 #ifdef TCC_TARGET_X86_64
1530 else if ((sv->type.t & VT_BTYPE) == VT_LLONG ||
1531 (sv->type.t & VT_BTYPE) == VT_PTR)
1532 size = 8;
1533 #endif
1534 else
1535 size = 4;
1536 if (size == 1 && reg >= 4)
1537 size = 4;
1539 if (modifier == 'b') {
1540 if (reg >= 4)
1541 tcc_error("cannot use byte register");
1542 size = 1;
1543 } else if (modifier == 'h') {
1544 if (reg >= 4)
1545 tcc_error("cannot use byte register");
1546 size = -1;
1547 } else if (modifier == 'w') {
1548 size = 2;
1549 } else if (modifier == 'k') {
1550 size = 4;
1551 #ifdef TCC_TARGET_X86_64
1552 } else if (modifier == 'q') {
1553 size = 8;
1554 #endif
1557 switch(size) {
1558 case -1:
1559 reg = TOK_ASM_ah + reg;
1560 break;
1561 case 1:
1562 reg = TOK_ASM_al + reg;
1563 break;
1564 case 2:
1565 reg = TOK_ASM_ax + reg;
1566 break;
1567 default:
1568 reg = TOK_ASM_eax + reg;
1569 break;
1570 #ifdef TCC_TARGET_X86_64
1571 case 8:
1572 reg = TOK_ASM_rax + reg;
1573 break;
1574 #endif
1576 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1577 cstr_cat(add_str, buf, -1);
1581 /* generate prolog and epilog code for asm statement */
1582 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1583 int nb_outputs, int is_output,
1584 uint8_t *clobber_regs,
1585 int out_reg)
1587 uint8_t regs_allocated[NB_ASM_REGS];
1588 ASMOperand *op;
1589 int i, reg;
1591 /* Strictly speaking %Xbp and %Xsp should be included in the
1592 call-preserved registers, but currently it doesn't matter. */
1593 #ifdef TCC_TARGET_X86_64
1594 #ifdef TCC_TARGET_PE
1595 static uint8_t reg_saved[] = { 3, 6, 7, 12, 13, 14, 15 };
1596 #else
1597 static uint8_t reg_saved[] = { 3, 12, 13, 14, 15 };
1598 #endif
1599 #else
1600 static uint8_t reg_saved[] = { 3, 6, 7 };
1601 #endif
1603 /* mark all used registers */
1604 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1605 for(i = 0; i < nb_operands;i++) {
1606 op = &operands[i];
1607 if (op->reg >= 0)
1608 regs_allocated[op->reg] = 1;
1610 if (!is_output) {
1611 /* generate reg save code */
1612 for(i = 0; i < sizeof(reg_saved)/sizeof(reg_saved[0]); i++) {
1613 reg = reg_saved[i];
1614 if (regs_allocated[reg]) {
1615 if (reg >= 8)
1616 g(0x41), reg-=8;
1617 g(0x50 + reg);
1621 /* generate load code */
1622 for(i = 0; i < nb_operands; i++) {
1623 op = &operands[i];
1624 if (op->reg >= 0) {
1625 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1626 op->is_memory) {
1627 /* memory reference case (for both input and
1628 output cases) */
1629 SValue sv;
1630 sv = *op->vt;
1631 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL;
1632 sv.type.t = VT_PTR;
1633 load(op->reg, &sv);
1634 } else if (i >= nb_outputs || op->is_rw) {
1635 /* load value in register */
1636 load(op->reg, op->vt);
1637 if (op->is_llong) {
1638 SValue sv;
1639 sv = *op->vt;
1640 sv.c.i += 4;
1641 load(TREG_XDX, &sv);
1646 } else {
1647 /* generate save code */
1648 for(i = 0 ; i < nb_outputs; i++) {
1649 op = &operands[i];
1650 if (op->reg >= 0) {
1651 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1652 if (!op->is_memory) {
1653 SValue sv;
1654 sv = *op->vt;
1655 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1656 sv.type.t = VT_PTR;
1657 load(out_reg, &sv);
1659 sv = *op->vt;
1660 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1661 store(op->reg, &sv);
1663 } else {
1664 store(op->reg, op->vt);
1665 if (op->is_llong) {
1666 SValue sv;
1667 sv = *op->vt;
1668 sv.c.i += 4;
1669 store(TREG_XDX, &sv);
1674 /* generate reg restore code */
1675 for(i = sizeof(reg_saved)/sizeof(reg_saved[0]) - 1; i >= 0; i--) {
1676 reg = reg_saved[i];
1677 if (regs_allocated[reg]) {
1678 if (reg >= 8)
1679 g(0x41), reg-=8;
1680 g(0x58 + reg);
1686 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1688 int reg;
1689 TokenSym *ts;
1690 #ifdef TCC_TARGET_X86_64
1691 unsigned int type;
1692 #endif
1694 if (!strcmp(str, "memory") ||
1695 !strcmp(str, "cc") ||
1696 !strcmp(str, "flags"))
1697 return;
1698 ts = tok_alloc(str, strlen(str));
1699 reg = ts->tok;
1700 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1701 reg -= TOK_ASM_eax;
1702 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1703 reg -= TOK_ASM_ax;
1704 #ifdef TCC_TARGET_X86_64
1705 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1706 reg -= TOK_ASM_rax;
1707 } else if ((reg = asm_parse_numeric_reg(reg, &type)) >= 0) {
1709 #endif
1710 } else {
1711 tcc_error("invalid clobber register '%s'", str);
1713 clobber_regs[reg] = 1;