Fix more attribute placements
[tinycc.git] / i386-asm.c
blob55c95af95d567b1237b6d6a2d400ec01116d6459
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "tcc.h"
24 #define MAX_OPERANDS 3
26 #define TOK_ASM_first TOK_ASM_clc
27 #define TOK_ASM_last TOK_ASM_emms
28 #define TOK_ASM_alllast TOK_ASM_subps
30 #define OPC_B 0x01 /* only used with OPC_WL */
31 #define OPC_WL 0x02 /* accepts w, l or no suffix */
32 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
33 #define OPC_REG 0x04 /* register is added to opcode */
34 #define OPC_MODRM 0x08 /* modrm encoding */
36 #define OPCT_MASK 0x70
37 #define OPC_FWAIT 0x10 /* add fwait opcode */
38 #define OPC_SHIFT 0x20 /* shift opcodes */
39 #define OPC_ARITH 0x30 /* arithmetic opcodes */
40 #define OPC_FARITH 0x40 /* FPU arithmetic opcodes */
41 #define OPC_TEST 0x50 /* test opcodes */
42 #define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i))
44 #define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */
45 #define OPC_48 0x200 /* Always has REX prefix */
46 #ifdef TCC_TARGET_X86_64
47 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
48 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
49 # define OPC_WLX OPC_WLQ
50 # define OPC_BWLX OPC_BWLQ
51 #else
52 # define OPC_WLX OPC_WL
53 # define OPC_BWLX OPC_BWL
54 #endif
56 #define OPC_GROUP_SHIFT 13
58 /* in order to compress the operand type, we use specific operands and
59 we or only with EA */
60 enum {
61 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
62 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
63 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
64 #ifdef TCC_TARGET_X86_64
65 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
66 #endif
67 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
68 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
69 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
70 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
71 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
72 OPT_SEG,
73 OPT_ST,
74 #ifdef TCC_TARGET_X86_64
75 OPT_REG8_LOW, /* %spl,%bpl,%sil,%dil, encoded like ah,ch,dh,bh, but
76 with REX prefix, not used in insn templates */
77 #endif
78 OPT_IM8,
79 OPT_IM8S,
80 OPT_IM16,
81 OPT_IM32,
82 #ifdef TCC_TARGET_X86_64
83 OPT_IM64,
84 #endif
85 OPT_EAX, /* %al, %ax, %eax or %rax register */
86 OPT_ST0, /* %st(0) register */
87 OPT_CL, /* %cl register */
88 OPT_DX, /* %dx register */
89 OPT_ADDR, /* OP_EA with only offset */
90 OPT_INDIR, /* *(expr) */
91 /* composite types */
92 OPT_COMPOSITE_FIRST,
93 OPT_IM, /* IM8 | IM16 | IM32 */
94 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
95 OPT_REGW, /* REG16 | REG32 | REG64 */
96 OPT_IMW, /* IM16 | IM32 */
97 OPT_MMXSSE, /* MMX | SSE */
98 OPT_DISP, /* Like OPT_ADDR, but emitted as displacement (for jumps) */
99 OPT_DISP8, /* Like OPT_ADDR, but only 8bit (short jumps) */
100 /* can be ored with any OPT_xxx */
101 OPT_EA = 0x80
104 #define OP_REG8 (1 << OPT_REG8)
105 #define OP_REG16 (1 << OPT_REG16)
106 #define OP_REG32 (1 << OPT_REG32)
107 #define OP_MMX (1 << OPT_MMX)
108 #define OP_SSE (1 << OPT_SSE)
109 #define OP_CR (1 << OPT_CR)
110 #define OP_TR (1 << OPT_TR)
111 #define OP_DB (1 << OPT_DB)
112 #define OP_SEG (1 << OPT_SEG)
113 #define OP_ST (1 << OPT_ST)
114 #define OP_IM8 (1 << OPT_IM8)
115 #define OP_IM8S (1 << OPT_IM8S)
116 #define OP_IM16 (1 << OPT_IM16)
117 #define OP_IM32 (1 << OPT_IM32)
118 #define OP_EAX (1 << OPT_EAX)
119 #define OP_ST0 (1 << OPT_ST0)
120 #define OP_CL (1 << OPT_CL)
121 #define OP_DX (1 << OPT_DX)
122 #define OP_ADDR (1 << OPT_ADDR)
123 #define OP_INDIR (1 << OPT_INDIR)
124 #ifdef TCC_TARGET_X86_64
125 # define OP_REG64 (1 << OPT_REG64)
126 # define OP_REG8_LOW (1 << OPT_REG8_LOW)
127 # define OP_IM64 (1 << OPT_IM64)
128 # define OP_EA32 (OP_EA << 1)
129 #else
130 # define OP_REG64 0
131 # define OP_REG8_LOW 0
132 # define OP_IM64 0
133 # define OP_EA32 0
134 #endif
136 #define OP_EA 0x40000000
137 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
139 #ifdef TCC_TARGET_X86_64
140 # define TREG_XAX TREG_RAX
141 # define TREG_XCX TREG_RCX
142 # define TREG_XDX TREG_RDX
143 #else
144 # define TREG_XAX TREG_EAX
145 # define TREG_XCX TREG_ECX
146 # define TREG_XDX TREG_EDX
147 #endif
149 typedef struct ASMInstr {
150 uint16_t sym;
151 uint16_t opcode;
152 uint16_t instr_type;
153 uint8_t nb_ops;
154 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
155 } ASMInstr;
157 typedef struct Operand {
158 uint32_t type;
159 int8_t reg; /* register, -1 if none */
160 int8_t reg2; /* second register, -1 if none */
161 uint8_t shift;
162 ExprValue e;
163 } Operand;
165 static const uint8_t reg_to_size[9] = {
167 [OP_REG8] = 0,
168 [OP_REG16] = 1,
169 [OP_REG32] = 2,
170 #ifdef TCC_TARGET_X86_64
171 [OP_REG64] = 3,
172 #endif
174 0, 0, 1, 0, 2, 0, 0, 0, 3
177 #define NB_TEST_OPCODES 30
179 static const uint8_t test_bits[NB_TEST_OPCODES] = {
180 0x00, /* o */
181 0x01, /* no */
182 0x02, /* b */
183 0x02, /* c */
184 0x02, /* nae */
185 0x03, /* nb */
186 0x03, /* nc */
187 0x03, /* ae */
188 0x04, /* e */
189 0x04, /* z */
190 0x05, /* ne */
191 0x05, /* nz */
192 0x06, /* be */
193 0x06, /* na */
194 0x07, /* nbe */
195 0x07, /* a */
196 0x08, /* s */
197 0x09, /* ns */
198 0x0a, /* p */
199 0x0a, /* pe */
200 0x0b, /* np */
201 0x0b, /* po */
202 0x0c, /* l */
203 0x0c, /* nge */
204 0x0d, /* nl */
205 0x0d, /* ge */
206 0x0e, /* le */
207 0x0e, /* ng */
208 0x0f, /* nle */
209 0x0f, /* g */
212 static const uint8_t segment_prefixes[] = {
213 0x26, /* es */
214 0x2e, /* cs */
215 0x36, /* ss */
216 0x3e, /* ds */
217 0x64, /* fs */
218 0x65 /* gs */
221 static const ASMInstr asm_instrs[] = {
222 #define ALT(x) x
223 /* This removes a 0x0f in the second byte */
224 #define O(o) ((uint64_t) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o)))
225 /* This constructs instr_type from opcode, type and group. */
226 #define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0))
227 #define DEF_ASM_OP0(name, opcode)
228 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0, { 0 } },
229 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }},
230 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }},
231 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }},
232 #ifdef TCC_TARGET_X86_64
233 # include "x86_64-asm.h"
234 #else
235 # include "i386-asm.h"
236 #endif
237 /* last operation */
238 { 0, },
241 static const uint16_t op0_codes[] = {
242 #define ALT(x)
243 #define DEF_ASM_OP0(x, opcode) opcode,
244 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
245 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
246 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
247 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
248 #ifdef TCC_TARGET_X86_64
249 # include "x86_64-asm.h"
250 #else
251 # include "i386-asm.h"
252 #endif
255 static inline int get_reg_shift(TCCState *s1)
257 int shift, v;
258 v = asm_int_expr(s1);
259 switch(v) {
260 case 1:
261 shift = 0;
262 break;
263 case 2:
264 shift = 1;
265 break;
266 case 4:
267 shift = 2;
268 break;
269 case 8:
270 shift = 3;
271 break;
272 default:
273 expect("1, 2, 4 or 8 constant");
274 shift = 0;
275 break;
277 return shift;
280 #ifdef TCC_TARGET_X86_64
281 static int asm_parse_numeric_reg(int t, unsigned int *type)
283 int reg = -1;
284 if (t >= TOK_IDENT && t < tok_ident) {
285 const char *s = table_ident[t - TOK_IDENT]->str;
286 char c;
287 *type = OP_REG64;
288 if (*s == 'c') {
289 s++;
290 *type = OP_CR;
292 if (*s++ != 'r')
293 return -1;
294 /* Don't allow leading '0'. */
295 if ((c = *s++) >= '1' && c <= '9')
296 reg = c - '0';
297 else
298 return -1;
299 if ((c = *s) >= '0' && c <= '5')
300 s++, reg = reg * 10 + c - '0';
301 if (reg > 15)
302 return -1;
303 if ((c = *s) == 0)
305 else if (*type != OP_REG64)
306 return -1;
307 else if (c == 'b' && !s[1])
308 *type = OP_REG8;
309 else if (c == 'w' && !s[1])
310 *type = OP_REG16;
311 else if (c == 'd' && !s[1])
312 *type = OP_REG32;
313 else
314 return -1;
316 return reg;
318 #endif
320 static int asm_parse_reg(unsigned int *type)
322 int reg = 0;
323 *type = 0;
324 if (tok != '%')
325 goto error_32;
326 next();
327 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
328 reg = tok - TOK_ASM_eax;
329 *type = OP_REG32;
330 #ifdef TCC_TARGET_X86_64
331 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
332 reg = tok - TOK_ASM_rax;
333 *type = OP_REG64;
334 } else if (tok == TOK_ASM_rip) {
335 reg = -2; /* Probably should use different escape code. */
336 *type = OP_REG64;
337 } else if ((reg = asm_parse_numeric_reg(tok, type)) >= 0
338 && (*type == OP_REG32 || *type == OP_REG64)) {
340 #endif
341 } else {
342 error_32:
343 expect("register");
345 next();
346 return reg;
349 static void parse_operand(TCCState *s1, Operand *op)
351 ExprValue e;
352 int reg, indir;
353 const char *p;
355 indir = 0;
356 if (tok == '*') {
357 next();
358 indir = OP_INDIR;
361 if (tok == '%') {
362 next();
363 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
364 reg = tok - TOK_ASM_al;
365 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
366 op->reg = reg & 7;
367 if ((op->type & OP_REG) && op->reg == TREG_XAX)
368 op->type |= OP_EAX;
369 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
370 op->type |= OP_CL;
371 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
372 op->type |= OP_DX;
373 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
374 op->type = OP_DB;
375 op->reg = tok - TOK_ASM_dr0;
376 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
377 op->type = OP_SEG;
378 op->reg = tok - TOK_ASM_es;
379 } else if (tok == TOK_ASM_st) {
380 op->type = OP_ST;
381 op->reg = 0;
382 next();
383 if (tok == '(') {
384 next();
385 if (tok != TOK_PPNUM)
386 goto reg_error;
387 p = tokc.str.data;
388 reg = p[0] - '0';
389 if ((unsigned)reg >= 8 || p[1] != '\0')
390 goto reg_error;
391 op->reg = reg;
392 next();
393 skip(')');
395 if (op->reg == 0)
396 op->type |= OP_ST0;
397 goto no_skip;
398 #ifdef TCC_TARGET_X86_64
399 } else if (tok >= TOK_ASM_spl && tok <= TOK_ASM_dil) {
400 op->type = OP_REG8 | OP_REG8_LOW;
401 op->reg = 4 + tok - TOK_ASM_spl;
402 } else if ((op->reg = asm_parse_numeric_reg(tok, &op->type)) >= 0) {
404 #endif
405 } else {
406 reg_error:
407 tcc_error("unknown register %%%s", get_tok_str(tok, &tokc));
409 next();
410 no_skip: ;
411 } else if (tok == '$') {
412 /* constant value */
413 next();
414 asm_expr(s1, &e);
415 op->type = OP_IM32;
416 op->e = e;
417 if (!op->e.sym) {
418 if (op->e.v == (uint8_t)op->e.v)
419 op->type |= OP_IM8;
420 if (op->e.v == (int8_t)op->e.v)
421 op->type |= OP_IM8S;
422 if (op->e.v == (uint16_t)op->e.v)
423 op->type |= OP_IM16;
424 #ifdef TCC_TARGET_X86_64
425 if (op->e.v != (int32_t)op->e.v && op->e.v != (uint32_t)op->e.v)
426 op->type = OP_IM64;
427 #endif
429 } else {
430 /* address(reg,reg2,shift) with all variants */
431 op->type = OP_EA;
432 op->reg = -1;
433 op->reg2 = -1;
434 op->shift = 0;
435 if (tok != '(') {
436 asm_expr(s1, &e);
437 op->e = e;
438 } else {
439 next();
440 if (tok == '%') {
441 unget_tok('(');
442 op->e.v = 0;
443 op->e.sym = NULL;
444 } else {
445 /* bracketed offset expression */
446 asm_expr(s1, &e);
447 if (tok != ')')
448 expect(")");
449 next();
450 op->e.v = e.v;
451 op->e.sym = e.sym;
453 op->e.pcrel = 0;
455 if (tok == '(') {
456 unsigned int type = 0;
457 next();
458 if (tok != ',') {
459 op->reg = asm_parse_reg(&type);
461 if (tok == ',') {
462 next();
463 if (tok != ',') {
464 op->reg2 = asm_parse_reg(&type);
466 if (tok == ',') {
467 next();
468 op->shift = get_reg_shift(s1);
471 if (type & OP_REG32)
472 op->type |= OP_EA32;
473 skip(')');
475 if (op->reg == -1 && op->reg2 == -1)
476 op->type |= OP_ADDR;
478 op->type |= indir;
481 /* XXX: unify with C code output ? */
482 ST_FUNC void gen_expr32(ExprValue *pe)
484 if (pe->pcrel)
485 /* If PC-relative, always set VT_SYM, even without symbol,
486 so as to force a relocation to be emitted. */
487 gen_addrpc32(VT_SYM, pe->sym, pe->v);
488 else
489 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
492 #ifdef TCC_TARGET_X86_64
493 ST_FUNC void gen_expr64(ExprValue *pe)
495 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
497 #endif
499 /* XXX: unify with C code output ? */
500 static void gen_disp32(ExprValue *pe)
502 Sym *sym = pe->sym;
503 ElfSym *esym = elfsym(sym);
504 if (esym && esym->st_shndx == cur_text_section->sh_num) {
505 /* same section: we can output an absolute value. Note
506 that the TCC compiler behaves differently here because
507 it always outputs a relocation to ease (future) code
508 elimination in the linker */
509 gen_le32(pe->v + esym->st_value - ind - 4);
510 } else {
511 if (sym && sym->type.t == VT_VOID) {
512 sym->type.t = VT_FUNC;
513 sym->type.ref = NULL;
515 gen_addrpc32(VT_SYM, sym, pe->v);
519 /* generate the modrm operand */
520 static inline int asm_modrm(int reg, Operand *op)
522 int mod, reg1, reg2, sib_reg1;
524 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
525 g(0xc0 + (reg << 3) + op->reg);
526 } else if (op->reg == -1 && op->reg2 == -1) {
527 /* displacement only */
528 #ifdef TCC_TARGET_X86_64
529 g(0x04 + (reg << 3));
530 g(0x25);
531 #else
532 g(0x05 + (reg << 3));
533 #endif
534 gen_expr32(&op->e);
535 #ifdef TCC_TARGET_X86_64
536 } else if (op->reg == -2) {
537 ExprValue *pe = &op->e;
538 g(0x05 + (reg << 3));
539 gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
540 return ind;
541 #endif
542 } else {
543 sib_reg1 = op->reg;
544 /* fist compute displacement encoding */
545 if (sib_reg1 == -1) {
546 sib_reg1 = 5;
547 mod = 0x00;
548 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
549 mod = 0x00;
550 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
551 mod = 0x40;
552 } else {
553 mod = 0x80;
555 /* compute if sib byte needed */
556 reg1 = op->reg;
557 if (op->reg2 != -1)
558 reg1 = 4;
559 g(mod + (reg << 3) + reg1);
560 if (reg1 == 4) {
561 /* add sib byte */
562 reg2 = op->reg2;
563 if (reg2 == -1)
564 reg2 = 4; /* indicate no index */
565 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
567 /* add offset */
568 if (mod == 0x40) {
569 g(op->e.v);
570 } else if (mod == 0x80 || op->reg == -1) {
571 gen_expr32(&op->e);
574 return 0;
577 #ifdef TCC_TARGET_X86_64
578 #define REX_W 0x48
579 #define REX_R 0x44
580 #define REX_X 0x42
581 #define REX_B 0x41
583 static void asm_rex(int width64, Operand *ops, int nb_ops, int *op_type,
584 int regi, int rmi)
586 unsigned char rex = width64 ? 0x48 : 0;
587 int saw_high_8bit = 0;
588 int i;
589 if (rmi == -1) {
590 /* No mod/rm byte, but we might have a register op nevertheless
591 (we will add it to the opcode later). */
592 for(i = 0; i < nb_ops; i++) {
593 if (op_type[i] & (OP_REG | OP_ST)) {
594 if (ops[i].reg >= 8) {
595 rex |= REX_B;
596 ops[i].reg -= 8;
597 } else if (ops[i].type & OP_REG8_LOW)
598 rex |= 0x40;
599 else if (ops[i].type & OP_REG8 && ops[i].reg >= 4)
600 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
601 saw_high_8bit = ops[i].reg;
602 break;
605 } else {
606 if (regi != -1) {
607 if (ops[regi].reg >= 8) {
608 rex |= REX_R;
609 ops[regi].reg -= 8;
610 } else if (ops[regi].type & OP_REG8_LOW)
611 rex |= 0x40;
612 else if (ops[regi].type & OP_REG8 && ops[regi].reg >= 4)
613 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
614 saw_high_8bit = ops[regi].reg;
616 if (ops[rmi].type & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_EA)) {
617 if (ops[rmi].reg >= 8) {
618 rex |= REX_B;
619 ops[rmi].reg -= 8;
620 } else if (ops[rmi].type & OP_REG8_LOW)
621 rex |= 0x40;
622 else if (ops[rmi].type & OP_REG8 && ops[rmi].reg >= 4)
623 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
624 saw_high_8bit = ops[rmi].reg;
626 if (ops[rmi].type & OP_EA && ops[rmi].reg2 >= 8) {
627 rex |= REX_X;
628 ops[rmi].reg2 -= 8;
631 if (rex) {
632 if (saw_high_8bit)
633 tcc_error("can't encode register %%%ch when REX prefix is required",
634 "acdb"[saw_high_8bit-4]);
635 g(rex);
638 #endif
640 static void maybe_print_stats (void)
642 static int already = 1;
643 if (!already)
644 /* print stats about opcodes */
646 const struct ASMInstr *pa;
647 int freq[4];
648 int op_vals[500];
649 int nb_op_vals, i, j;
651 already = 1;
652 nb_op_vals = 0;
653 memset(freq, 0, sizeof(freq));
654 for(pa = asm_instrs; pa->sym != 0; pa++) {
655 freq[pa->nb_ops]++;
656 //for(i=0;i<pa->nb_ops;i++) {
657 for(j=0;j<nb_op_vals;j++) {
658 //if (pa->op_type[i] == op_vals[j])
659 if (pa->instr_type == op_vals[j])
660 goto found;
662 //op_vals[nb_op_vals++] = pa->op_type[i];
663 op_vals[nb_op_vals++] = pa->instr_type;
664 found: ;
667 for(i=0;i<nb_op_vals;i++) {
668 int v = op_vals[i];
669 //if ((v & (v - 1)) != 0)
670 printf("%3d: %08x\n", i, v);
672 printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n",
673 (int)sizeof(asm_instrs),
674 (int)sizeof(asm_instrs) / (int)sizeof(ASMInstr),
675 freq[0], freq[1], freq[2], freq[3]);
679 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
681 const ASMInstr *pa;
682 int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc;
683 int nb_ops, s;
684 Operand ops[MAX_OPERANDS], *pop;
685 int op_type[3]; /* decoded op type */
686 int alltypes; /* OR of all operand types */
687 int autosize;
688 int p66;
689 #ifdef TCC_TARGET_X86_64
690 int rex64;
691 #endif
693 maybe_print_stats();
694 /* force synthetic ';' after prefix instruction, so we can handle */
695 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
696 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
697 unget_tok(';');
699 /* get operands */
700 pop = ops;
701 nb_ops = 0;
702 seg_prefix = 0;
703 alltypes = 0;
704 for(;;) {
705 if (tok == ';' || tok == TOK_LINEFEED)
706 break;
707 if (nb_ops >= MAX_OPERANDS) {
708 tcc_error("incorrect number of operands");
710 parse_operand(s1, pop);
711 if (tok == ':') {
712 if (pop->type != OP_SEG || seg_prefix)
713 tcc_error("incorrect prefix");
714 seg_prefix = segment_prefixes[pop->reg];
715 next();
716 parse_operand(s1, pop);
717 if (!(pop->type & OP_EA)) {
718 tcc_error("segment prefix must be followed by memory reference");
721 pop++;
722 nb_ops++;
723 if (tok != ',')
724 break;
725 next();
728 s = 0; /* avoid warning */
730 again:
731 /* optimize matching by using a lookup table (no hashing is needed
732 !) */
733 for(pa = asm_instrs; pa->sym != 0; pa++) {
734 int it = pa->instr_type & OPCT_MASK;
735 s = 0;
736 if (it == OPC_FARITH) {
737 v = opcode - pa->sym;
738 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
739 continue;
740 } else if (it == OPC_ARITH) {
741 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
742 continue;
743 s = (opcode - pa->sym) % NBWLX;
744 if ((pa->instr_type & OPC_BWLX) == OPC_WLX)
746 /* We need to reject the xxxb opcodes that we accepted above.
747 Note that pa->sym for WLX opcodes is the 'w' token,
748 to get the 'b' token subtract one. */
749 if (((opcode - pa->sym + 1) % NBWLX) == 0)
750 continue;
751 s++;
753 } else if (it == OPC_SHIFT) {
754 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
755 continue;
756 s = (opcode - pa->sym) % NBWLX;
757 } else if (it == OPC_TEST) {
758 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
759 continue;
760 /* cmovxx is a test opcode but accepts multiple sizes.
761 The suffixes aren't encoded in the table, instead we
762 simply force size autodetection always and deal with suffixed
763 variants below when we don't find e.g. "cmovzl". */
764 if (pa->instr_type & OPC_WLX)
765 s = NBWLX - 1;
766 } else if (pa->instr_type & OPC_B) {
767 #ifdef TCC_TARGET_X86_64
768 /* Some instructions don't have the full size but only
769 bwl form. insb e.g. */
770 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
771 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
772 continue;
773 #endif
774 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
775 continue;
776 s = opcode - pa->sym;
777 } else if (pa->instr_type & OPC_WLX) {
778 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
779 continue;
780 s = opcode - pa->sym + 1;
781 } else {
782 if (pa->sym != opcode)
783 continue;
785 if (pa->nb_ops != nb_ops)
786 continue;
787 #ifdef TCC_TARGET_X86_64
788 /* Special case for moves. Selecting the IM64->REG64 form
789 should only be done if we really have an >32bit imm64, and that
790 is hardcoded. Ignore it here. */
791 if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
792 && (ops[1].type & OP_REG) == OP_REG64
793 && !(pa->instr_type & OPC_0F))
794 continue;
795 #endif
796 /* now decode and check each operand */
797 alltypes = 0;
798 for(i = 0; i < nb_ops; i++) {
799 int op1, op2;
800 op1 = pa->op_type[i];
801 op2 = op1 & 0x1f;
802 switch(op2) {
803 case OPT_IM:
804 v = OP_IM8 | OP_IM16 | OP_IM32;
805 break;
806 case OPT_REG:
807 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
808 break;
809 case OPT_REGW:
810 v = OP_REG16 | OP_REG32 | OP_REG64;
811 break;
812 case OPT_IMW:
813 v = OP_IM16 | OP_IM32;
814 break;
815 case OPT_MMXSSE:
816 v = OP_MMX | OP_SSE;
817 break;
818 case OPT_DISP:
819 case OPT_DISP8:
820 v = OP_ADDR;
821 break;
822 default:
823 v = 1 << op2;
824 break;
826 if (op1 & OPT_EA)
827 v |= OP_EA;
828 op_type[i] = v;
829 if ((ops[i].type & v) == 0)
830 goto next;
831 alltypes |= ops[i].type;
833 /* all is matching ! */
834 break;
835 next: ;
837 if (pa->sym == 0) {
838 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
839 int b;
840 b = op0_codes[opcode - TOK_ASM_first];
841 if (b & 0xff00)
842 g(b >> 8);
843 g(b);
844 return;
845 } else if (opcode <= TOK_ASM_alllast) {
846 tcc_error("bad operand with opcode '%s'",
847 get_tok_str(opcode, NULL));
848 } else {
849 /* Special case for cmovcc, we accept size suffixes but ignore
850 them, but we don't want them to blow up our tables. */
851 TokenSym *ts = table_ident[opcode - TOK_IDENT];
852 if (ts->len >= 6
853 && strchr("wlq", ts->str[ts->len-1])
854 && !memcmp(ts->str, "cmov", 4)) {
855 opcode = tok_alloc(ts->str, ts->len-1)->tok;
856 goto again;
858 tcc_error("unknown opcode '%s'", ts->str);
861 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
862 autosize = NBWLX-1;
863 #ifdef TCC_TARGET_X86_64
864 /* XXX the autosize should rather be zero, to not have to adjust this
865 all the time. */
866 if ((pa->instr_type & OPC_BWLQ) == OPC_B)
867 autosize = NBWLX-2;
868 #endif
869 if (s == autosize) {
870 /* Check for register operands providing hints about the size.
871 Start from the end, i.e. destination operands. This matters
872 only for opcodes accepting different sized registers, lar and lsl
873 are such opcodes. */
874 for(i = nb_ops - 1; s == autosize && i >= 0; i--) {
875 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
876 s = reg_to_size[ops[i].type & OP_REG];
878 if (s == autosize) {
879 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
880 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
881 s = 2;
882 else if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
883 (ops[0].type & OP_EA))
884 s = NBWLX - 2;
885 else
886 tcc_error("cannot infer opcode suffix");
890 #ifdef TCC_TARGET_X86_64
891 /* Generate addr32 prefix if needed */
892 for(i = 0; i < nb_ops; i++) {
893 if (ops[i].type & OP_EA32) {
894 g(0x67);
895 break;
898 #endif
899 /* generate data16 prefix if needed */
900 p66 = 0;
901 if (s == 1)
902 p66 = 1;
903 else {
904 /* accepting mmx+sse in all operands --> needs 0x66 to
905 switch to sse mode. Accepting only sse in an operand --> is
906 already SSE insn and needs 0x66/f2/f3 handling. */
907 for (i = 0; i < nb_ops; i++)
908 if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
909 && ops[i].type & OP_SSE)
910 p66 = 1;
912 if (p66)
913 g(0x66);
914 #ifdef TCC_TARGET_X86_64
915 rex64 = 0;
916 if (pa->instr_type & OPC_48)
917 rex64 = 1;
918 else if (s == 3 || (alltypes & OP_REG64)) {
919 /* generate REX prefix */
920 int default64 = 0;
921 for(i = 0; i < nb_ops; i++) {
922 if (op_type[i] == OP_REG64 && pa->opcode != 0xb8) {
923 /* If only 64bit regs are accepted in one operand
924 this is a default64 instruction without need for
925 REX prefixes, except for movabs(0xb8). */
926 default64 = 1;
927 break;
930 /* XXX find better encoding for the default64 instructions. */
931 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
932 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
933 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
934 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
935 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
936 && !default64)
937 rex64 = 1;
939 #endif
941 /* now generates the operation */
942 if (OPCT_IS(pa->instr_type, OPC_FWAIT))
943 g(0x9b);
944 if (seg_prefix)
945 g(seg_prefix);
947 v = pa->opcode;
948 if (pa->instr_type & OPC_0F)
949 v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff);
950 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
951 /* kludge for imul $im, %reg */
952 nb_ops = 3;
953 ops[2] = ops[1];
954 op_type[2] = op_type[1];
955 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
956 v--; /* int $3 case */
957 nb_ops = 0;
958 } else if ((v == 0x06 || v == 0x07)) {
959 if (ops[0].reg >= 4) {
960 /* push/pop %fs or %gs */
961 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
962 } else {
963 v += ops[0].reg << 3;
965 nb_ops = 0;
966 } else if (v <= 0x05) {
967 /* arith case */
968 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
969 } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) {
970 /* fpu arith case */
971 v += ((opcode - pa->sym) / 6) << 3;
974 /* search which operand will be used for modrm */
975 modrm_index = -1;
976 modreg_index = -1;
977 if (pa->instr_type & OPC_MODRM) {
978 if (!nb_ops) {
979 /* A modrm opcode without operands is a special case (e.g. mfence).
980 It has a group and acts as if there's an register operand 0
981 (ax). */
982 i = 0;
983 ops[i].type = OP_REG;
984 ops[i].reg = 0;
985 goto modrm_found;
987 /* first look for an ea operand */
988 for(i = 0;i < nb_ops; i++) {
989 if (op_type[i] & OP_EA)
990 goto modrm_found;
992 /* then if not found, a register or indirection (shift instructions) */
993 for(i = 0;i < nb_ops; i++) {
994 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
995 goto modrm_found;
997 #ifdef ASM_DEBUG
998 tcc_error("bad op table");
999 #endif
1000 modrm_found:
1001 modrm_index = i;
1002 /* if a register is used in another operand then it is
1003 used instead of group */
1004 for(i = 0;i < nb_ops; i++) {
1005 int t = op_type[i];
1006 if (i != modrm_index &&
1007 (t & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
1008 modreg_index = i;
1009 break;
1013 #ifdef TCC_TARGET_X86_64
1014 asm_rex (rex64, ops, nb_ops, op_type, modreg_index, modrm_index);
1015 #endif
1017 if (pa->instr_type & OPC_REG) {
1018 /* mov $im, %reg case */
1019 if (v == 0xb0 && s >= 1)
1020 v += 7;
1021 for(i = 0; i < nb_ops; i++) {
1022 if (op_type[i] & (OP_REG | OP_ST)) {
1023 v += ops[i].reg;
1024 break;
1028 if (pa->instr_type & OPC_B)
1029 v += s >= 1;
1030 if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) {
1031 ElfSym *esym;
1032 int jmp_disp;
1034 /* see if we can really generate the jump with a byte offset */
1035 esym = elfsym(ops[0].e.sym);
1036 if (!esym || esym->st_shndx != cur_text_section->sh_num)
1037 goto no_short_jump;
1038 jmp_disp = ops[0].e.v + esym->st_value - ind - 2 - (v >= 0xff);
1039 if (jmp_disp == (int8_t)jmp_disp) {
1040 /* OK to generate jump */
1041 ops[0].e.sym = 0;
1042 ops[0].e.v = jmp_disp;
1043 op_type[0] = OP_IM8S;
1044 } else {
1045 no_short_jump:
1046 /* long jump will be allowed. need to modify the
1047 opcode slightly */
1048 if (v == 0xeb) /* jmp */
1049 v = 0xe9;
1050 else if (v == 0x70) /* jcc */
1051 v += 0x0f10;
1052 else
1053 tcc_error("invalid displacement");
1056 if (OPCT_IS(pa->instr_type, OPC_TEST))
1057 v += test_bits[opcode - pa->sym];
1058 op1 = v >> 16;
1059 if (op1)
1060 g(op1);
1061 op1 = (v >> 8) & 0xff;
1062 if (op1)
1063 g(op1);
1064 g(v);
1066 if (OPCT_IS(pa->instr_type, OPC_SHIFT)) {
1067 reg = (opcode - pa->sym) / NBWLX;
1068 if (reg == 6)
1069 reg = 7;
1070 } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) {
1071 reg = (opcode - pa->sym) / NBWLX;
1072 } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) {
1073 reg = (opcode - pa->sym) / 6;
1074 } else {
1075 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
1078 pc = 0;
1079 if (pa->instr_type & OPC_MODRM) {
1080 /* if a register is used in another operand then it is
1081 used instead of group */
1082 if (modreg_index >= 0)
1083 reg = ops[modreg_index].reg;
1084 pc = asm_modrm(reg, &ops[modrm_index]);
1087 /* emit constants */
1088 #ifndef TCC_TARGET_X86_64
1089 if (!(pa->instr_type & OPC_0F)
1090 && (pa->opcode == 0x9a || pa->opcode == 0xea)) {
1091 /* ljmp or lcall kludge */
1092 gen_expr32(&ops[1].e);
1093 if (ops[0].e.sym)
1094 tcc_error("cannot relocate");
1095 gen_le16(ops[0].e.v);
1096 return;
1098 #endif
1099 for(i = 0;i < nb_ops; i++) {
1100 v = op_type[i];
1101 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
1102 /* if multiple sizes are given it means we must look
1103 at the op size */
1104 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
1105 if (s == 0)
1106 v = OP_IM8;
1107 else if (s == 1)
1108 v = OP_IM16;
1109 else if (s == 2 || (v & OP_IM64) == 0)
1110 v = OP_IM32;
1111 else
1112 v = OP_IM64;
1115 if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym)
1116 tcc_error("cannot relocate");
1118 if (v & (OP_IM8 | OP_IM8S)) {
1119 g(ops[i].e.v);
1120 } else if (v & OP_IM16) {
1121 gen_le16(ops[i].e.v);
1122 #ifdef TCC_TARGET_X86_64
1123 } else if (v & OP_IM64) {
1124 gen_expr64(&ops[i].e);
1125 #endif
1126 } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) {
1127 gen_disp32(&ops[i].e);
1128 } else {
1129 gen_expr32(&ops[i].e);
1134 /* after immediate operands, adjust pc-relative address */
1135 if (pc)
1136 add32le(cur_text_section->data + pc - 4, pc - ind);
1139 /* return the constraint priority (we allocate first the lowest
1140 numbered constraints) */
1141 static inline int constraint_priority(const char *str)
1143 int priority, c, pr;
1145 /* we take the lowest priority */
1146 priority = 0;
1147 for(;;) {
1148 c = *str;
1149 if (c == '\0')
1150 break;
1151 str++;
1152 switch(c) {
1153 case 'A':
1154 pr = 0;
1155 break;
1156 case 'a':
1157 case 'b':
1158 case 'c':
1159 case 'd':
1160 case 'S':
1161 case 'D':
1162 pr = 1;
1163 break;
1164 case 'q':
1165 pr = 2;
1166 break;
1167 case 'r':
1168 case 'R':
1169 case 'p':
1170 pr = 3;
1171 break;
1172 case 'N':
1173 case 'M':
1174 case 'I':
1175 case 'e':
1176 case 'i':
1177 case 'm':
1178 case 'g':
1179 pr = 4;
1180 break;
1181 default:
1182 tcc_error("unknown constraint '%c'", c);
1183 pr = 0;
1185 if (pr > priority)
1186 priority = pr;
1188 return priority;
1191 static const char *skip_constraint_modifiers(const char *p)
1193 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
1194 p++;
1195 return p;
1198 /* If T (a token) is of the form "%reg" returns the register
1199 number and type, otherwise return -1. */
1200 ST_FUNC int asm_parse_regvar (int t)
1202 const char *s;
1203 Operand op;
1204 if (t < TOK_IDENT)
1205 return -1;
1206 s = table_ident[t - TOK_IDENT]->str;
1207 if (s[0] != '%')
1208 return -1;
1209 t = tok_alloc(s+1, strlen(s)-1)->tok;
1210 unget_tok(t);
1211 unget_tok('%');
1212 parse_operand(tcc_state, &op);
1213 /* Accept only integer regs for now. */
1214 if (op.type & OP_REG)
1215 return op.reg;
1216 else
1217 return -1;
1220 #define REG_OUT_MASK 0x01
1221 #define REG_IN_MASK 0x02
1223 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
1225 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
1226 int nb_operands, int nb_outputs,
1227 const uint8_t *clobber_regs,
1228 int *pout_reg)
1230 ASMOperand *op;
1231 int sorted_op[MAX_ASM_OPERANDS];
1232 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
1233 const char *str;
1234 uint8_t regs_allocated[NB_ASM_REGS];
1236 /* init fields */
1237 for(i=0;i<nb_operands;i++) {
1238 op = &operands[i];
1239 op->input_index = -1;
1240 op->ref_index = -1;
1241 op->reg = -1;
1242 op->is_memory = 0;
1243 op->is_rw = 0;
1245 /* compute constraint priority and evaluate references to output
1246 constraints if input constraints */
1247 for(i=0;i<nb_operands;i++) {
1248 op = &operands[i];
1249 str = op->constraint;
1250 str = skip_constraint_modifiers(str);
1251 if (isnum(*str) || *str == '[') {
1252 /* this is a reference to another constraint */
1253 k = find_constraint(operands, nb_operands, str, NULL);
1254 if ((unsigned)k >= i || i < nb_outputs)
1255 tcc_error("invalid reference in constraint %d ('%s')",
1256 i, str);
1257 op->ref_index = k;
1258 if (operands[k].input_index >= 0)
1259 tcc_error("cannot reference twice the same operand");
1260 operands[k].input_index = i;
1261 op->priority = 5;
1262 } else if ((op->vt->r & VT_VALMASK) == VT_LOCAL
1263 && op->vt->sym
1264 && (reg = op->vt->sym->r & VT_VALMASK) < VT_CONST) {
1265 op->priority = 1;
1266 op->reg = reg;
1267 } else {
1268 op->priority = constraint_priority(str);
1272 /* sort operands according to their priority */
1273 for(i=0;i<nb_operands;i++)
1274 sorted_op[i] = i;
1275 for(i=0;i<nb_operands - 1;i++) {
1276 for(j=i+1;j<nb_operands;j++) {
1277 p1 = operands[sorted_op[i]].priority;
1278 p2 = operands[sorted_op[j]].priority;
1279 if (p2 < p1) {
1280 tmp = sorted_op[i];
1281 sorted_op[i] = sorted_op[j];
1282 sorted_op[j] = tmp;
1287 for(i = 0;i < NB_ASM_REGS; i++) {
1288 if (clobber_regs[i])
1289 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
1290 else
1291 regs_allocated[i] = 0;
1293 /* esp cannot be used */
1294 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1295 /* ebp cannot be used yet */
1296 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1298 /* allocate registers and generate corresponding asm moves */
1299 for(i=0;i<nb_operands;i++) {
1300 j = sorted_op[i];
1301 op = &operands[j];
1302 str = op->constraint;
1303 /* no need to allocate references */
1304 if (op->ref_index >= 0)
1305 continue;
1306 /* select if register is used for output, input or both */
1307 if (op->input_index >= 0) {
1308 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1309 } else if (j < nb_outputs) {
1310 reg_mask = REG_OUT_MASK;
1311 } else {
1312 reg_mask = REG_IN_MASK;
1314 if (op->reg >= 0) {
1315 if (is_reg_allocated(op->reg))
1316 tcc_error("asm regvar requests register that's taken already");
1317 reg = op->reg;
1318 goto reg_found;
1320 try_next:
1321 c = *str++;
1322 switch(c) {
1323 case '=':
1324 goto try_next;
1325 case '+':
1326 op->is_rw = 1;
1327 /* FALL THRU */
1328 case '&':
1329 if (j >= nb_outputs)
1330 tcc_error("'%c' modifier can only be applied to outputs", c);
1331 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1332 goto try_next;
1333 case 'A':
1334 /* allocate both eax and edx */
1335 if (is_reg_allocated(TREG_XAX) ||
1336 is_reg_allocated(TREG_XDX))
1337 goto try_next;
1338 op->is_llong = 1;
1339 op->reg = TREG_XAX;
1340 regs_allocated[TREG_XAX] |= reg_mask;
1341 regs_allocated[TREG_XDX] |= reg_mask;
1342 break;
1343 case 'a':
1344 reg = TREG_XAX;
1345 goto alloc_reg;
1346 case 'b':
1347 reg = 3;
1348 goto alloc_reg;
1349 case 'c':
1350 reg = TREG_XCX;
1351 goto alloc_reg;
1352 case 'd':
1353 reg = TREG_XDX;
1354 goto alloc_reg;
1355 case 'S':
1356 reg = 6;
1357 goto alloc_reg;
1358 case 'D':
1359 reg = 7;
1360 alloc_reg:
1361 if (is_reg_allocated(reg))
1362 goto try_next;
1363 goto reg_found;
1364 case 'q':
1365 /* eax, ebx, ecx or edx */
1366 for(reg = 0; reg < 4; reg++) {
1367 if (!is_reg_allocated(reg))
1368 goto reg_found;
1370 goto try_next;
1371 case 'r':
1372 case 'R':
1373 case 'p': /* A general address, for x86(64) any register is acceptable*/
1374 /* any general register */
1375 for(reg = 0; reg < 8; reg++) {
1376 if (!is_reg_allocated(reg))
1377 goto reg_found;
1379 goto try_next;
1380 reg_found:
1381 /* now we can reload in the register */
1382 op->is_llong = 0;
1383 op->reg = reg;
1384 regs_allocated[reg] |= reg_mask;
1385 break;
1386 case 'e':
1387 case 'i':
1388 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1389 goto try_next;
1390 break;
1391 case 'I':
1392 case 'N':
1393 case 'M':
1394 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1395 goto try_next;
1396 break;
1397 case 'm':
1398 case 'g':
1399 /* nothing special to do because the operand is already in
1400 memory, except if the pointer itself is stored in a
1401 memory variable (VT_LLOCAL case) */
1402 /* XXX: fix constant case */
1403 /* if it is a reference to a memory zone, it must lie
1404 in a register, so we reserve the register in the
1405 input registers and a load will be generated
1406 later */
1407 if (j < nb_outputs || c == 'm') {
1408 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1409 /* any general register */
1410 for(reg = 0; reg < 8; reg++) {
1411 if (!(regs_allocated[reg] & REG_IN_MASK))
1412 goto reg_found1;
1414 goto try_next;
1415 reg_found1:
1416 /* now we can reload in the register */
1417 regs_allocated[reg] |= REG_IN_MASK;
1418 op->reg = reg;
1419 op->is_memory = 1;
1422 break;
1423 default:
1424 tcc_error("asm constraint %d ('%s') could not be satisfied",
1425 j, op->constraint);
1426 break;
1428 /* if a reference is present for that operand, we assign it too */
1429 if (op->input_index >= 0) {
1430 operands[op->input_index].reg = op->reg;
1431 operands[op->input_index].is_llong = op->is_llong;
1435 /* compute out_reg. It is used to store outputs registers to memory
1436 locations references by pointers (VT_LLOCAL case) */
1437 *pout_reg = -1;
1438 for(i=0;i<nb_operands;i++) {
1439 op = &operands[i];
1440 if (op->reg >= 0 &&
1441 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1442 !op->is_memory) {
1443 for(reg = 0; reg < 8; reg++) {
1444 if (!(regs_allocated[reg] & REG_OUT_MASK))
1445 goto reg_found2;
1447 tcc_error("could not find free output register for reloading");
1448 reg_found2:
1449 *pout_reg = reg;
1450 break;
1454 /* print sorted constraints */
1455 #ifdef ASM_DEBUG
1456 for(i=0;i<nb_operands;i++) {
1457 j = sorted_op[i];
1458 op = &operands[j];
1459 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1461 op->id ? get_tok_str(op->id, NULL) : "",
1462 op->constraint,
1463 op->vt->r,
1464 op->reg);
1466 if (*pout_reg >= 0)
1467 printf("out_reg=%d\n", *pout_reg);
1468 #endif
1471 ST_FUNC void subst_asm_operand(CString *add_str,
1472 SValue *sv, int modifier)
1474 int r, reg, size, val;
1475 char buf[64];
1477 r = sv->r;
1478 if ((r & VT_VALMASK) == VT_CONST) {
1479 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' &&
1480 modifier != 'P')
1481 cstr_ccat(add_str, '$');
1482 if (r & VT_SYM) {
1483 const char *name = get_tok_str(sv->sym->v, NULL);
1484 if (sv->sym->v >= SYM_FIRST_ANOM) {
1485 /* In case of anonymous symbols ("L.42", used
1486 for static data labels) we can't find them
1487 in the C symbol table when later looking up
1488 this name. So enter them now into the asm label
1489 list when we still know the symbol. */
1490 get_asm_sym(tok_alloc(name, strlen(name))->tok, sv->sym);
1492 cstr_cat(add_str, name, -1);
1493 if ((uint32_t)sv->c.i == 0)
1494 goto no_offset;
1495 cstr_ccat(add_str, '+');
1497 val = sv->c.i;
1498 if (modifier == 'n')
1499 val = -val;
1500 snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
1501 cstr_cat(add_str, buf, -1);
1502 no_offset:;
1503 #ifdef TCC_TARGET_X86_64
1504 if (r & VT_LVAL)
1505 cstr_cat(add_str, "(%rip)", -1);
1506 #endif
1507 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1508 #ifdef TCC_TARGET_X86_64
1509 snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i);
1510 #else
1511 snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
1512 #endif
1513 cstr_cat(add_str, buf, -1);
1514 } else if (r & VT_LVAL) {
1515 reg = r & VT_VALMASK;
1516 if (reg >= VT_CONST)
1517 tcc_error("internal compiler error");
1518 snprintf(buf, sizeof(buf), "(%%%s)",
1519 #ifdef TCC_TARGET_X86_64
1520 get_tok_str(TOK_ASM_rax + reg, NULL)
1521 #else
1522 get_tok_str(TOK_ASM_eax + reg, NULL)
1523 #endif
1525 cstr_cat(add_str, buf, -1);
1526 } else {
1527 /* register case */
1528 reg = r & VT_VALMASK;
1529 if (reg >= VT_CONST)
1530 tcc_error("internal compiler error");
1532 /* choose register operand size */
1533 if ((sv->type.t & VT_BTYPE) == VT_BYTE ||
1534 (sv->type.t & VT_BTYPE) == VT_BOOL)
1535 size = 1;
1536 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1537 size = 2;
1538 #ifdef TCC_TARGET_X86_64
1539 else if ((sv->type.t & VT_BTYPE) == VT_LLONG ||
1540 (sv->type.t & VT_BTYPE) == VT_PTR)
1541 size = 8;
1542 #endif
1543 else
1544 size = 4;
1545 if (size == 1 && reg >= 4)
1546 size = 4;
1548 if (modifier == 'b') {
1549 if (reg >= 4)
1550 tcc_error("cannot use byte register");
1551 size = 1;
1552 } else if (modifier == 'h') {
1553 if (reg >= 4)
1554 tcc_error("cannot use byte register");
1555 size = -1;
1556 } else if (modifier == 'w') {
1557 size = 2;
1558 } else if (modifier == 'k') {
1559 size = 4;
1560 #ifdef TCC_TARGET_X86_64
1561 } else if (modifier == 'q') {
1562 size = 8;
1563 #endif
1566 switch(size) {
1567 case -1:
1568 reg = TOK_ASM_ah + reg;
1569 break;
1570 case 1:
1571 reg = TOK_ASM_al + reg;
1572 break;
1573 case 2:
1574 reg = TOK_ASM_ax + reg;
1575 break;
1576 default:
1577 reg = TOK_ASM_eax + reg;
1578 break;
1579 #ifdef TCC_TARGET_X86_64
1580 case 8:
1581 reg = TOK_ASM_rax + reg;
1582 break;
1583 #endif
1585 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1586 cstr_cat(add_str, buf, -1);
1590 /* generate prolog and epilog code for asm statement */
1591 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1592 int nb_outputs, int is_output,
1593 uint8_t *clobber_regs,
1594 int out_reg)
1596 uint8_t regs_allocated[NB_ASM_REGS];
1597 ASMOperand *op;
1598 int i, reg;
1600 /* Strictly speaking %Xbp and %Xsp should be included in the
1601 call-preserved registers, but currently it doesn't matter. */
1602 #ifdef TCC_TARGET_X86_64
1603 #ifdef TCC_TARGET_PE
1604 static uint8_t reg_saved[] = { 3, 6, 7, 12, 13, 14, 15 };
1605 #else
1606 static uint8_t reg_saved[] = { 3, 12, 13, 14, 15 };
1607 #endif
1608 #else
1609 static uint8_t reg_saved[] = { 3, 6, 7 };
1610 #endif
1612 /* mark all used registers */
1613 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1614 for(i = 0; i < nb_operands;i++) {
1615 op = &operands[i];
1616 if (op->reg >= 0)
1617 regs_allocated[op->reg] = 1;
1619 if (!is_output) {
1620 /* generate reg save code */
1621 for(i = 0; i < sizeof(reg_saved)/sizeof(reg_saved[0]); i++) {
1622 reg = reg_saved[i];
1623 if (regs_allocated[reg]) {
1624 if (reg >= 8)
1625 g(0x41), reg-=8;
1626 g(0x50 + reg);
1630 /* generate load code */
1631 for(i = 0; i < nb_operands; i++) {
1632 op = &operands[i];
1633 if (op->reg >= 0) {
1634 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1635 op->is_memory) {
1636 /* memory reference case (for both input and
1637 output cases) */
1638 SValue sv;
1639 sv = *op->vt;
1640 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL;
1641 sv.type.t = VT_PTR;
1642 load(op->reg, &sv);
1643 } else if (i >= nb_outputs || op->is_rw) {
1644 /* load value in register */
1645 load(op->reg, op->vt);
1646 if (op->is_llong) {
1647 SValue sv;
1648 sv = *op->vt;
1649 sv.c.i += 4;
1650 load(TREG_XDX, &sv);
1655 } else {
1656 /* generate save code */
1657 for(i = 0 ; i < nb_outputs; i++) {
1658 op = &operands[i];
1659 if (op->reg >= 0) {
1660 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1661 if (!op->is_memory) {
1662 SValue sv;
1663 sv = *op->vt;
1664 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1665 sv.type.t = VT_PTR;
1666 load(out_reg, &sv);
1668 sv = *op->vt;
1669 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1670 store(op->reg, &sv);
1672 } else {
1673 store(op->reg, op->vt);
1674 if (op->is_llong) {
1675 SValue sv;
1676 sv = *op->vt;
1677 sv.c.i += 4;
1678 store(TREG_XDX, &sv);
1683 /* generate reg restore code */
1684 for(i = sizeof(reg_saved)/sizeof(reg_saved[0]) - 1; i >= 0; i--) {
1685 reg = reg_saved[i];
1686 if (regs_allocated[reg]) {
1687 if (reg >= 8)
1688 g(0x41), reg-=8;
1689 g(0x58 + reg);
1695 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1697 int reg;
1698 TokenSym *ts;
1699 #ifdef TCC_TARGET_X86_64
1700 unsigned int type;
1701 #endif
1703 if (!strcmp(str, "memory") ||
1704 !strcmp(str, "cc") ||
1705 !strcmp(str, "flags"))
1706 return;
1707 ts = tok_alloc(str, strlen(str));
1708 reg = ts->tok;
1709 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1710 reg -= TOK_ASM_eax;
1711 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1712 reg -= TOK_ASM_ax;
1713 #ifdef TCC_TARGET_X86_64
1714 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1715 reg -= TOK_ASM_rax;
1716 } else if ((reg = asm_parse_numeric_reg(reg, &type)) >= 0) {
1718 #endif
1719 } else {
1720 tcc_error("invalid clobber register '%s'", str);
1722 clobber_regs[reg] = 1;