bitfields: Implement MS compatible layout
[tinycc.git] / i386-asm.c
blob5011af58f5d8f0ce5686591cca3d0bfa60bea183
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "tcc.h"
24 #define MAX_OPERANDS 3
26 #define TOK_ASM_first TOK_ASM_clc
27 #define TOK_ASM_last TOK_ASM_emms
28 #define TOK_ASM_alllast TOK_ASM_subps
30 #define OPC_B 0x01 /* only used with OPC_WL */
31 #define OPC_WL 0x02 /* accepts w, l or no suffix */
32 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
33 #define OPC_REG 0x04 /* register is added to opcode */
34 #define OPC_MODRM 0x08 /* modrm encoding */
36 #define OPCT_MASK 0x70
37 #define OPC_FWAIT 0x10 /* add fwait opcode */
38 #define OPC_SHIFT 0x20 /* shift opcodes */
39 #define OPC_ARITH 0x30 /* arithmetic opcodes */
40 #define OPC_FARITH 0x40 /* FPU arithmetic opcodes */
41 #define OPC_TEST 0x50 /* test opcodes */
42 #define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i))
44 #define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */
45 #ifdef TCC_TARGET_X86_64
46 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
47 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
48 # define OPC_WLX OPC_WLQ
49 # define OPC_BWLX OPC_BWLQ
50 #else
51 # define OPC_WLX OPC_WL
52 # define OPC_BWLX OPC_BWL
53 #endif
55 #define OPC_GROUP_SHIFT 13
57 /* in order to compress the operand type, we use specific operands and
58 we or only with EA */
59 enum {
60 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
61 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
62 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
63 #ifdef TCC_TARGET_X86_64
64 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
65 #endif
66 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
67 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
68 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
69 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
70 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
71 OPT_SEG,
72 OPT_ST,
73 #ifdef TCC_TARGET_X86_64
74 OPT_REG8_LOW, /* %spl,%bpl,%sil,%dil, encoded like ah,ch,dh,bh, but
75 with REX prefix, not used in insn templates */
76 #endif
77 OPT_IM8,
78 OPT_IM8S,
79 OPT_IM16,
80 OPT_IM32,
81 #ifdef TCC_TARGET_X86_64
82 OPT_IM64,
83 #endif
84 OPT_EAX, /* %al, %ax, %eax or %rax register */
85 OPT_ST0, /* %st(0) register */
86 OPT_CL, /* %cl register */
87 OPT_DX, /* %dx register */
88 OPT_ADDR, /* OP_EA with only offset */
89 OPT_INDIR, /* *(expr) */
90 /* composite types */
91 OPT_COMPOSITE_FIRST,
92 OPT_IM, /* IM8 | IM16 | IM32 */
93 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
94 OPT_REGW, /* REG16 | REG32 | REG64 */
95 OPT_IMW, /* IM16 | IM32 */
96 OPT_MMXSSE, /* MMX | SSE */
97 OPT_DISP, /* Like OPT_ADDR, but emitted as displacement (for jumps) */
98 OPT_DISP8, /* Like OPT_ADDR, but only 8bit (short jumps) */
99 /* can be ored with any OPT_xxx */
100 OPT_EA = 0x80
103 #define OP_REG8 (1 << OPT_REG8)
104 #define OP_REG16 (1 << OPT_REG16)
105 #define OP_REG32 (1 << OPT_REG32)
106 #define OP_MMX (1 << OPT_MMX)
107 #define OP_SSE (1 << OPT_SSE)
108 #define OP_CR (1 << OPT_CR)
109 #define OP_TR (1 << OPT_TR)
110 #define OP_DB (1 << OPT_DB)
111 #define OP_SEG (1 << OPT_SEG)
112 #define OP_ST (1 << OPT_ST)
113 #define OP_IM8 (1 << OPT_IM8)
114 #define OP_IM8S (1 << OPT_IM8S)
115 #define OP_IM16 (1 << OPT_IM16)
116 #define OP_IM32 (1 << OPT_IM32)
117 #define OP_EAX (1 << OPT_EAX)
118 #define OP_ST0 (1 << OPT_ST0)
119 #define OP_CL (1 << OPT_CL)
120 #define OP_DX (1 << OPT_DX)
121 #define OP_ADDR (1 << OPT_ADDR)
122 #define OP_INDIR (1 << OPT_INDIR)
123 #ifdef TCC_TARGET_X86_64
124 # define OP_REG64 (1 << OPT_REG64)
125 # define OP_REG8_LOW (1 << OPT_REG8_LOW)
126 # define OP_IM64 (1 << OPT_IM64)
127 # define OP_EA32 (OP_EA << 1)
128 #else
129 # define OP_REG64 0
130 # define OP_REG8_LOW 0
131 # define OP_IM64 0
132 # define OP_EA32 0
133 #endif
135 #define OP_EA 0x40000000
136 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
138 #ifdef TCC_TARGET_X86_64
139 # define TREG_XAX TREG_RAX
140 # define TREG_XCX TREG_RCX
141 # define TREG_XDX TREG_RDX
142 #else
143 # define TREG_XAX TREG_EAX
144 # define TREG_XCX TREG_ECX
145 # define TREG_XDX TREG_EDX
146 #endif
148 typedef struct ASMInstr {
149 uint16_t sym;
150 uint16_t opcode;
151 uint16_t instr_type;
152 uint8_t nb_ops;
153 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
154 } ASMInstr;
156 typedef struct Operand {
157 uint32_t type;
158 int8_t reg; /* register, -1 if none */
159 int8_t reg2; /* second register, -1 if none */
160 uint8_t shift;
161 ExprValue e;
162 } Operand;
164 static const uint8_t reg_to_size[9] = {
166 [OP_REG8] = 0,
167 [OP_REG16] = 1,
168 [OP_REG32] = 2,
169 #ifdef TCC_TARGET_X86_64
170 [OP_REG64] = 3,
171 #endif
173 0, 0, 1, 0, 2, 0, 0, 0, 3
176 #define NB_TEST_OPCODES 30
178 static const uint8_t test_bits[NB_TEST_OPCODES] = {
179 0x00, /* o */
180 0x01, /* no */
181 0x02, /* b */
182 0x02, /* c */
183 0x02, /* nae */
184 0x03, /* nb */
185 0x03, /* nc */
186 0x03, /* ae */
187 0x04, /* e */
188 0x04, /* z */
189 0x05, /* ne */
190 0x05, /* nz */
191 0x06, /* be */
192 0x06, /* na */
193 0x07, /* nbe */
194 0x07, /* a */
195 0x08, /* s */
196 0x09, /* ns */
197 0x0a, /* p */
198 0x0a, /* pe */
199 0x0b, /* np */
200 0x0b, /* po */
201 0x0c, /* l */
202 0x0c, /* nge */
203 0x0d, /* nl */
204 0x0d, /* ge */
205 0x0e, /* le */
206 0x0e, /* ng */
207 0x0f, /* nle */
208 0x0f, /* g */
211 static const uint8_t segment_prefixes[] = {
212 0x26, /* es */
213 0x2e, /* cs */
214 0x36, /* ss */
215 0x3e, /* ds */
216 0x64, /* fs */
217 0x65 /* gs */
220 static const ASMInstr asm_instrs[] = {
221 #define ALT(x) x
222 /* This removes a 0x0f in the second byte */
223 #define O(o) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o))
224 /* This constructs instr_type from opcode, type and group. */
225 #define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0))
226 #define DEF_ASM_OP0(name, opcode)
227 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0 },
228 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }},
229 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }},
230 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }},
231 #ifdef TCC_TARGET_X86_64
232 # include "x86_64-asm.h"
233 #else
234 # include "i386-asm.h"
235 #endif
236 /* last operation */
237 { 0, },
240 static const uint16_t op0_codes[] = {
241 #define ALT(x)
242 #define DEF_ASM_OP0(x, opcode) opcode,
243 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
244 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
245 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
246 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
247 #ifdef TCC_TARGET_X86_64
248 # include "x86_64-asm.h"
249 #else
250 # include "i386-asm.h"
251 #endif
254 static inline int get_reg_shift(TCCState *s1)
256 int shift, v;
257 v = asm_int_expr(s1);
258 switch(v) {
259 case 1:
260 shift = 0;
261 break;
262 case 2:
263 shift = 1;
264 break;
265 case 4:
266 shift = 2;
267 break;
268 case 8:
269 shift = 3;
270 break;
271 default:
272 expect("1, 2, 4 or 8 constant");
273 shift = 0;
274 break;
276 return shift;
279 #ifdef TCC_TARGET_X86_64
280 static int asm_parse_numeric_reg(int t, int *type)
282 int reg = -1;
283 if (t >= TOK_IDENT && t < tok_ident) {
284 const char *s = table_ident[t - TOK_IDENT]->str;
285 char c;
286 *type = OP_REG64;
287 if (*s == 'c') {
288 s++;
289 *type = OP_CR;
291 if (*s++ != 'r')
292 return -1;
293 /* Don't allow leading '0'. */
294 if ((c = *s++) >= '1' && c <= '9')
295 reg = c - '0';
296 else
297 return -1;
298 if ((c = *s) >= '0' && c <= '5')
299 s++, reg = reg * 10 + c - '0';
300 if (reg > 15)
301 return -1;
302 if ((c = *s) == 0)
304 else if (*type != OP_REG64)
305 return -1;
306 else if (c == 'b' && !s[1])
307 *type = OP_REG8;
308 else if (c == 'w' && !s[1])
309 *type = OP_REG16;
310 else if (c == 'd' && !s[1])
311 *type = OP_REG32;
312 else
313 return -1;
315 return reg;
317 #endif
319 static int asm_parse_reg(int *type)
321 int reg = 0;
322 *type = 0;
323 if (tok != '%')
324 goto error_32;
325 next();
326 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
327 reg = tok - TOK_ASM_eax;
328 *type = OP_REG32;
329 #ifdef TCC_TARGET_X86_64
330 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
331 reg = tok - TOK_ASM_rax;
332 *type = OP_REG64;
333 } else if (tok == TOK_ASM_rip) {
334 reg = -2; /* Probably should use different escape code. */
335 *type = OP_REG64;
336 } else if ((reg = asm_parse_numeric_reg(tok, type)) >= 0
337 && (*type == OP_REG32 || *type == OP_REG64)) {
339 #endif
340 } else {
341 error_32:
342 expect("register");
344 next();
345 return reg;
348 static void parse_operand(TCCState *s1, Operand *op)
350 ExprValue e;
351 int reg, indir;
352 const char *p;
354 indir = 0;
355 if (tok == '*') {
356 next();
357 indir = OP_INDIR;
360 if (tok == '%') {
361 next();
362 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
363 reg = tok - TOK_ASM_al;
364 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
365 op->reg = reg & 7;
366 if ((op->type & OP_REG) && op->reg == TREG_XAX)
367 op->type |= OP_EAX;
368 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
369 op->type |= OP_CL;
370 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
371 op->type |= OP_DX;
372 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
373 op->type = OP_DB;
374 op->reg = tok - TOK_ASM_dr0;
375 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
376 op->type = OP_SEG;
377 op->reg = tok - TOK_ASM_es;
378 } else if (tok == TOK_ASM_st) {
379 op->type = OP_ST;
380 op->reg = 0;
381 next();
382 if (tok == '(') {
383 next();
384 if (tok != TOK_PPNUM)
385 goto reg_error;
386 p = tokc.str.data;
387 reg = p[0] - '0';
388 if ((unsigned)reg >= 8 || p[1] != '\0')
389 goto reg_error;
390 op->reg = reg;
391 next();
392 skip(')');
394 if (op->reg == 0)
395 op->type |= OP_ST0;
396 goto no_skip;
397 #ifdef TCC_TARGET_X86_64
398 } else if (tok >= TOK_ASM_spl && tok <= TOK_ASM_dil) {
399 op->type = OP_REG8 | OP_REG8_LOW;
400 op->reg = 4 + tok - TOK_ASM_spl;
401 } else if ((op->reg = asm_parse_numeric_reg(tok, &op->type)) >= 0) {
403 #endif
404 } else {
405 reg_error:
406 tcc_error("unknown register %%%s", get_tok_str(tok, &tokc));
408 next();
409 no_skip: ;
410 } else if (tok == '$') {
411 /* constant value */
412 next();
413 asm_expr(s1, &e);
414 op->type = OP_IM32;
415 op->e = e;
416 if (!op->e.sym) {
417 if (op->e.v == (uint8_t)op->e.v)
418 op->type |= OP_IM8;
419 if (op->e.v == (int8_t)op->e.v)
420 op->type |= OP_IM8S;
421 if (op->e.v == (uint16_t)op->e.v)
422 op->type |= OP_IM16;
423 #ifdef TCC_TARGET_X86_64
424 if (op->e.v != (int32_t)op->e.v && op->e.v != (uint32_t)op->e.v)
425 op->type = OP_IM64;
426 #endif
428 } else {
429 /* address(reg,reg2,shift) with all variants */
430 op->type = OP_EA;
431 op->reg = -1;
432 op->reg2 = -1;
433 op->shift = 0;
434 if (tok != '(') {
435 asm_expr(s1, &e);
436 op->e = e;
437 } else {
438 next();
439 if (tok == '%') {
440 unget_tok('(');
441 op->e.v = 0;
442 op->e.sym = NULL;
443 } else {
444 /* bracketed offset expression */
445 asm_expr(s1, &e);
446 if (tok != ')')
447 expect(")");
448 next();
449 op->e.v = e.v;
450 op->e.sym = e.sym;
452 op->e.pcrel = 0;
454 if (tok == '(') {
455 int type = 0;
456 next();
457 if (tok != ',') {
458 op->reg = asm_parse_reg(&type);
460 if (tok == ',') {
461 next();
462 if (tok != ',') {
463 op->reg2 = asm_parse_reg(&type);
465 if (tok == ',') {
466 next();
467 op->shift = get_reg_shift(s1);
470 if (type & OP_REG32)
471 op->type |= OP_EA32;
472 skip(')');
474 if (op->reg == -1 && op->reg2 == -1)
475 op->type |= OP_ADDR;
477 op->type |= indir;
480 /* XXX: unify with C code output ? */
481 ST_FUNC void gen_expr32(ExprValue *pe)
483 if (pe->pcrel)
484 /* If PC-relative, always set VT_SYM, even without symbol,
485 so as to force a relocation to be emitted. */
486 gen_addrpc32(VT_SYM, pe->sym, pe->v);
487 else
488 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
491 #ifdef TCC_TARGET_X86_64
492 ST_FUNC void gen_expr64(ExprValue *pe)
494 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
496 #endif
498 /* XXX: unify with C code output ? */
499 static void gen_disp32(ExprValue *pe)
501 Sym *sym = pe->sym;
502 if (sym && sym->r == cur_text_section->sh_num) {
503 /* same section: we can output an absolute value. Note
504 that the TCC compiler behaves differently here because
505 it always outputs a relocation to ease (future) code
506 elimination in the linker */
507 gen_le32(pe->v + sym->jnext - ind - 4);
508 } else {
509 if (sym && sym->type.t == VT_VOID) {
510 sym->type.t = VT_FUNC;
511 sym->type.ref = NULL;
513 gen_addrpc32(VT_SYM, sym, pe->v);
517 /* generate the modrm operand */
518 static inline int asm_modrm(int reg, Operand *op)
520 int mod, reg1, reg2, sib_reg1;
522 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
523 g(0xc0 + (reg << 3) + op->reg);
524 } else if (op->reg == -1 && op->reg2 == -1) {
525 /* displacement only */
526 #ifdef TCC_TARGET_X86_64
527 g(0x04 + (reg << 3));
528 g(0x25);
529 #else
530 g(0x05 + (reg << 3));
531 #endif
532 gen_expr32(&op->e);
533 #ifdef TCC_TARGET_X86_64
534 } else if (op->reg == -2) {
535 ExprValue *pe = &op->e;
536 g(0x05 + (reg << 3));
537 gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
538 return ind;
539 #endif
540 } else {
541 sib_reg1 = op->reg;
542 /* fist compute displacement encoding */
543 if (sib_reg1 == -1) {
544 sib_reg1 = 5;
545 mod = 0x00;
546 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
547 mod = 0x00;
548 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
549 mod = 0x40;
550 } else {
551 mod = 0x80;
553 /* compute if sib byte needed */
554 reg1 = op->reg;
555 if (op->reg2 != -1)
556 reg1 = 4;
557 g(mod + (reg << 3) + reg1);
558 if (reg1 == 4) {
559 /* add sib byte */
560 reg2 = op->reg2;
561 if (reg2 == -1)
562 reg2 = 4; /* indicate no index */
563 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
565 /* add offset */
566 if (mod == 0x40) {
567 g(op->e.v);
568 } else if (mod == 0x80 || op->reg == -1) {
569 gen_expr32(&op->e);
572 return 0;
575 #ifdef TCC_TARGET_X86_64
576 #define REX_W 0x48
577 #define REX_R 0x44
578 #define REX_X 0x42
579 #define REX_B 0x41
581 static void asm_rex(int width64, Operand *ops, int nb_ops, int *op_type,
582 int regi, int rmi)
584 unsigned char rex = width64 ? 0x48 : 0;
585 int saw_high_8bit = 0;
586 int i;
587 if (rmi == -1) {
588 /* No mod/rm byte, but we might have a register op nevertheless
589 (we will add it to the opcode later). */
590 for(i = 0; i < nb_ops; i++) {
591 if (op_type[i] & (OP_REG | OP_ST)) {
592 if (ops[i].reg >= 8) {
593 rex |= REX_B;
594 ops[i].reg -= 8;
595 } else if (ops[i].type & OP_REG8_LOW)
596 rex |= 0x40;
597 else if (ops[i].type & OP_REG8 && ops[i].reg >= 4)
598 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
599 saw_high_8bit = ops[i].reg;
600 break;
603 } else {
604 if (regi != -1) {
605 if (ops[regi].reg >= 8) {
606 rex |= REX_R;
607 ops[regi].reg -= 8;
608 } else if (ops[regi].type & OP_REG8_LOW)
609 rex |= 0x40;
610 else if (ops[regi].type & OP_REG8 && ops[regi].reg >= 4)
611 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
612 saw_high_8bit = ops[regi].reg;
614 if (ops[rmi].type & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_EA)) {
615 if (ops[rmi].reg >= 8) {
616 rex |= REX_B;
617 ops[rmi].reg -= 8;
618 } else if (ops[rmi].type & OP_REG8_LOW)
619 rex |= 0x40;
620 else if (ops[rmi].type & OP_REG8 && ops[rmi].reg >= 4)
621 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
622 saw_high_8bit = ops[rmi].reg;
624 if (ops[rmi].type & OP_EA && ops[rmi].reg2 >= 8) {
625 rex |= REX_X;
626 ops[rmi].reg2 -= 8;
629 if (rex) {
630 if (saw_high_8bit)
631 tcc_error("can't encode register %%%ch when REX prefix is required",
632 "acdb"[saw_high_8bit-4]);
633 g(rex);
636 #endif
638 static void maybe_print_stats (void)
640 static int already = 1;
641 if (!already)
642 /* print stats about opcodes */
644 const struct ASMInstr *pa;
645 int freq[4];
646 int op_vals[500];
647 int nb_op_vals, i, j;
649 already = 1;
650 nb_op_vals = 0;
651 memset(freq, 0, sizeof(freq));
652 for(pa = asm_instrs; pa->sym != 0; pa++) {
653 freq[pa->nb_ops]++;
654 //for(i=0;i<pa->nb_ops;i++) {
655 for(j=0;j<nb_op_vals;j++) {
656 //if (pa->op_type[i] == op_vals[j])
657 if (pa->instr_type == op_vals[j])
658 goto found;
660 //op_vals[nb_op_vals++] = pa->op_type[i];
661 op_vals[nb_op_vals++] = pa->instr_type;
662 found: ;
665 for(i=0;i<nb_op_vals;i++) {
666 int v = op_vals[i];
667 //if ((v & (v - 1)) != 0)
668 printf("%3d: %08x\n", i, v);
670 printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n",
671 (int)sizeof(asm_instrs),
672 (int)sizeof(asm_instrs) / (int)sizeof(ASMInstr),
673 freq[0], freq[1], freq[2], freq[3]);
677 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
679 const ASMInstr *pa;
680 int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc;
681 int nb_ops, s;
682 Operand ops[MAX_OPERANDS], *pop;
683 int op_type[3]; /* decoded op type */
684 int alltypes; /* OR of all operand types */
685 int autosize;
686 int p66;
687 #ifdef TCC_TARGET_X86_64
688 int rex64;
689 #endif
691 maybe_print_stats();
692 /* force synthetic ';' after prefix instruction, so we can handle */
693 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
694 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
695 unget_tok(';');
697 /* get operands */
698 pop = ops;
699 nb_ops = 0;
700 seg_prefix = 0;
701 alltypes = 0;
702 for(;;) {
703 if (tok == ';' || tok == TOK_LINEFEED)
704 break;
705 if (nb_ops >= MAX_OPERANDS) {
706 tcc_error("incorrect number of operands");
708 parse_operand(s1, pop);
709 if (tok == ':') {
710 if (pop->type != OP_SEG || seg_prefix)
711 tcc_error("incorrect prefix");
712 seg_prefix = segment_prefixes[pop->reg];
713 next();
714 parse_operand(s1, pop);
715 if (!(pop->type & OP_EA)) {
716 tcc_error("segment prefix must be followed by memory reference");
719 pop++;
720 nb_ops++;
721 if (tok != ',')
722 break;
723 next();
726 s = 0; /* avoid warning */
728 /* optimize matching by using a lookup table (no hashing is needed
729 !) */
730 for(pa = asm_instrs; pa->sym != 0; pa++) {
731 int it = pa->instr_type & OPCT_MASK;
732 s = 0;
733 if (it == OPC_FARITH) {
734 v = opcode - pa->sym;
735 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
736 continue;
737 } else if (it == OPC_ARITH) {
738 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
739 continue;
740 s = (opcode - pa->sym) % NBWLX;
741 if ((pa->instr_type & OPC_BWLX) == OPC_WLX)
743 /* We need to reject the xxxb opcodes that we accepted above.
744 Note that pa->sym for WLX opcodes is the 'w' token,
745 to get the 'b' token subtract one. */
746 if (((opcode - pa->sym + 1) % NBWLX) == 0)
747 continue;
748 s++;
750 } else if (it == OPC_SHIFT) {
751 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
752 continue;
753 s = (opcode - pa->sym) % NBWLX;
754 } else if (it == OPC_TEST) {
755 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
756 continue;
757 /* cmovxx is a test opcode but accepts multiple sizes.
758 TCC doesn't accept the suffixed mnemonic, instead we
759 simply force size autodetection always. */
760 if (pa->instr_type & OPC_WLX)
761 s = NBWLX - 1;
762 } else if (pa->instr_type & OPC_B) {
763 #ifdef TCC_TARGET_X86_64
764 /* Some instructions don't have the full size but only
765 bwl form. insb e.g. */
766 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
767 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
768 continue;
769 #endif
770 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
771 continue;
772 s = opcode - pa->sym;
773 } else if (pa->instr_type & OPC_WLX) {
774 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
775 continue;
776 s = opcode - pa->sym + 1;
777 } else {
778 if (pa->sym != opcode)
779 continue;
781 if (pa->nb_ops != nb_ops)
782 continue;
783 #ifdef TCC_TARGET_X86_64
784 /* Special case for moves. Selecting the IM64->REG64 form
785 should only be done if we really have an >32bit imm64, and that
786 is hardcoded. Ignore it here. */
787 if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
788 && ops[1].type == OP_REG64
789 && !(pa->instr_type & OPC_0F))
790 continue;
791 #endif
792 /* now decode and check each operand */
793 alltypes = 0;
794 for(i = 0; i < nb_ops; i++) {
795 int op1, op2;
796 op1 = pa->op_type[i];
797 op2 = op1 & 0x1f;
798 switch(op2) {
799 case OPT_IM:
800 v = OP_IM8 | OP_IM16 | OP_IM32;
801 break;
802 case OPT_REG:
803 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
804 break;
805 case OPT_REGW:
806 v = OP_REG16 | OP_REG32 | OP_REG64;
807 break;
808 case OPT_IMW:
809 v = OP_IM16 | OP_IM32;
810 break;
811 case OPT_MMXSSE:
812 v = OP_MMX | OP_SSE;
813 break;
814 case OPT_DISP:
815 case OPT_DISP8:
816 v = OP_ADDR;
817 break;
818 default:
819 v = 1 << op2;
820 break;
822 if (op1 & OPT_EA)
823 v |= OP_EA;
824 op_type[i] = v;
825 if ((ops[i].type & v) == 0)
826 goto next;
827 alltypes |= ops[i].type;
829 /* all is matching ! */
830 break;
831 next: ;
833 if (pa->sym == 0) {
834 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
835 int b;
836 b = op0_codes[opcode - TOK_ASM_first];
837 if (b & 0xff00)
838 g(b >> 8);
839 g(b);
840 return;
841 } else if (opcode <= TOK_ASM_alllast) {
842 tcc_error("bad operand with opcode '%s'",
843 get_tok_str(opcode, NULL));
844 } else {
845 tcc_error("unknown opcode '%s'",
846 get_tok_str(opcode, NULL));
849 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
850 autosize = NBWLX-1;
851 #ifdef TCC_TARGET_X86_64
852 /* XXX the autosize should rather be zero, to not have to adjust this
853 all the time. */
854 if ((pa->instr_type & OPC_BWLQ) == OPC_B)
855 autosize = NBWLX-2;
856 #endif
857 if (s == autosize) {
858 /* Check for register operands providing hints about the size.
859 Start from the end, i.e. destination operands. This matters
860 only for opcodes accepting different sized registers, lar and lsl
861 are such opcodes. */
862 for(i = nb_ops - 1; s == autosize && i >= 0; i--) {
863 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
864 s = reg_to_size[ops[i].type & OP_REG];
866 if (s == autosize) {
867 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
868 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
869 s = 2;
870 else if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
871 (ops[0].type & OP_EA))
872 s = NBWLX - 2;
873 else
874 tcc_error("cannot infer opcode suffix");
878 #ifdef TCC_TARGET_X86_64
879 /* Generate addr32 prefix if needed */
880 for(i = 0; i < nb_ops; i++) {
881 if (ops[i].type & OP_EA32) {
882 g(0x67);
883 break;
886 #endif
887 /* generate data16 prefix if needed */
888 p66 = 0;
889 if (s == 1)
890 p66 = 1;
891 else {
892 /* accepting mmx+sse in all operands --> needs 0x66 to
893 switch to sse mode. Accepting only sse in an operand --> is
894 already SSE insn and needs 0x66/f2/f3 handling. */
895 for (i = 0; i < nb_ops; i++)
896 if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
897 && ops[i].type & OP_SSE)
898 p66 = 1;
900 if (p66)
901 g(0x66);
902 #ifdef TCC_TARGET_X86_64
903 rex64 = 0;
904 if (s == 3 || (alltypes & OP_REG64)) {
905 /* generate REX prefix */
906 int default64 = 0;
907 for(i = 0; i < nb_ops; i++) {
908 if (op_type[i] == OP_REG64) {
909 /* If only 64bit regs are accepted in one operand
910 this is a default64 instruction without need for
911 REX prefixes. */
912 default64 = 1;
913 break;
916 /* XXX find better encoding for the default64 instructions. */
917 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
918 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
919 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
920 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
921 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
922 && !default64)
923 rex64 = 1;
925 #endif
927 /* now generates the operation */
928 if (OPCT_IS(pa->instr_type, OPC_FWAIT))
929 g(0x9b);
930 if (seg_prefix)
931 g(seg_prefix);
933 v = pa->opcode;
934 if (pa->instr_type & OPC_0F)
935 v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff);
936 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
937 /* kludge for imul $im, %reg */
938 nb_ops = 3;
939 ops[2] = ops[1];
940 op_type[2] = op_type[1];
941 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
942 v--; /* int $3 case */
943 nb_ops = 0;
944 } else if ((v == 0x06 || v == 0x07)) {
945 if (ops[0].reg >= 4) {
946 /* push/pop %fs or %gs */
947 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
948 } else {
949 v += ops[0].reg << 3;
951 nb_ops = 0;
952 } else if (v <= 0x05) {
953 /* arith case */
954 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
955 } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) {
956 /* fpu arith case */
957 v += ((opcode - pa->sym) / 6) << 3;
960 /* search which operand will be used for modrm */
961 modrm_index = -1;
962 modreg_index = -1;
963 if (pa->instr_type & OPC_MODRM) {
964 if (!nb_ops) {
965 /* A modrm opcode without operands is a special case (e.g. mfence).
966 It has a group and acts as if there's an register operand 0
967 (ax). */
968 i = 0;
969 ops[i].type = OP_REG;
970 ops[i].reg = 0;
971 goto modrm_found;
973 /* first look for an ea operand */
974 for(i = 0;i < nb_ops; i++) {
975 if (op_type[i] & OP_EA)
976 goto modrm_found;
978 /* then if not found, a register or indirection (shift instructions) */
979 for(i = 0;i < nb_ops; i++) {
980 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
981 goto modrm_found;
983 #ifdef ASM_DEBUG
984 tcc_error("bad op table");
985 #endif
986 modrm_found:
987 modrm_index = i;
988 /* if a register is used in another operand then it is
989 used instead of group */
990 for(i = 0;i < nb_ops; i++) {
991 int t = op_type[i];
992 if (i != modrm_index &&
993 (t & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
994 modreg_index = i;
995 break;
999 #ifdef TCC_TARGET_X86_64
1000 asm_rex (rex64, ops, nb_ops, op_type, modreg_index, modrm_index);
1001 #endif
1003 if (pa->instr_type & OPC_REG) {
1004 /* mov $im, %reg case */
1005 if (v == 0xb0 && s >= 1)
1006 v += 7;
1007 for(i = 0; i < nb_ops; i++) {
1008 if (op_type[i] & (OP_REG | OP_ST)) {
1009 v += ops[i].reg;
1010 break;
1014 if (pa->instr_type & OPC_B)
1015 v += s >= 1;
1016 if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) {
1017 Sym *sym;
1018 int jmp_disp;
1020 /* see if we can really generate the jump with a byte offset */
1021 sym = ops[0].e.sym;
1022 if (!sym)
1023 goto no_short_jump;
1024 if (sym->r != cur_text_section->sh_num)
1025 goto no_short_jump;
1026 jmp_disp = ops[0].e.v + sym->jnext - ind - 2 - (v >= 0xff);
1027 if (jmp_disp == (int8_t)jmp_disp) {
1028 /* OK to generate jump */
1029 ops[0].e.sym = 0;
1030 ops[0].e.v = jmp_disp;
1031 op_type[0] = OP_IM8S;
1032 } else {
1033 no_short_jump:
1034 /* long jump will be allowed. need to modify the
1035 opcode slightly */
1036 if (v == 0xeb) /* jmp */
1037 v = 0xe9;
1038 else if (v == 0x70) /* jcc */
1039 v += 0x0f10;
1040 else
1041 tcc_error("invalid displacement");
1044 if (OPCT_IS(pa->instr_type, OPC_TEST))
1045 v += test_bits[opcode - pa->sym];
1046 op1 = v >> 16;
1047 if (op1)
1048 g(op1);
1049 op1 = (v >> 8) & 0xff;
1050 if (op1)
1051 g(op1);
1052 g(v);
1054 if (OPCT_IS(pa->instr_type, OPC_SHIFT)) {
1055 reg = (opcode - pa->sym) / NBWLX;
1056 if (reg == 6)
1057 reg = 7;
1058 } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) {
1059 reg = (opcode - pa->sym) / NBWLX;
1060 } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) {
1061 reg = (opcode - pa->sym) / 6;
1062 } else {
1063 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
1066 pc = 0;
1067 if (pa->instr_type & OPC_MODRM) {
1068 /* if a register is used in another operand then it is
1069 used instead of group */
1070 if (modreg_index >= 0)
1071 reg = ops[modreg_index].reg;
1072 pc = asm_modrm(reg, &ops[modrm_index]);
1075 /* emit constants */
1076 #ifndef TCC_TARGET_X86_64
1077 if (!(pa->instr_type & OPC_0F)
1078 && (pa->opcode == 0x9a || pa->opcode == 0xea)) {
1079 /* ljmp or lcall kludge */
1080 gen_expr32(&ops[1].e);
1081 if (ops[0].e.sym)
1082 tcc_error("cannot relocate");
1083 gen_le16(ops[0].e.v);
1084 return;
1086 #endif
1087 for(i = 0;i < nb_ops; i++) {
1088 v = op_type[i];
1089 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
1090 /* if multiple sizes are given it means we must look
1091 at the op size */
1092 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
1093 if (s == 0)
1094 v = OP_IM8;
1095 else if (s == 1)
1096 v = OP_IM16;
1097 else if (s == 2 || (v & OP_IM64) == 0)
1098 v = OP_IM32;
1099 else
1100 v = OP_IM64;
1103 if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym)
1104 tcc_error("cannot relocate");
1106 if (v & (OP_IM8 | OP_IM8S)) {
1107 g(ops[i].e.v);
1108 } else if (v & OP_IM16) {
1109 gen_le16(ops[i].e.v);
1110 #ifdef TCC_TARGET_X86_64
1111 } else if (v & OP_IM64) {
1112 gen_expr64(&ops[i].e);
1113 #endif
1114 } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) {
1115 gen_disp32(&ops[i].e);
1116 } else {
1117 gen_expr32(&ops[i].e);
1122 /* after immediate operands, adjust pc-relative address */
1123 if (pc)
1124 add32le(cur_text_section->data + pc - 4, pc - ind);
1127 /* return the constraint priority (we allocate first the lowest
1128 numbered constraints) */
1129 static inline int constraint_priority(const char *str)
1131 int priority, c, pr;
1133 /* we take the lowest priority */
1134 priority = 0;
1135 for(;;) {
1136 c = *str;
1137 if (c == '\0')
1138 break;
1139 str++;
1140 switch(c) {
1141 case 'A':
1142 pr = 0;
1143 break;
1144 case 'a':
1145 case 'b':
1146 case 'c':
1147 case 'd':
1148 case 'S':
1149 case 'D':
1150 pr = 1;
1151 break;
1152 case 'q':
1153 pr = 2;
1154 break;
1155 case 'r':
1156 case 'R':
1157 case 'p':
1158 pr = 3;
1159 break;
1160 case 'N':
1161 case 'M':
1162 case 'I':
1163 case 'e':
1164 case 'i':
1165 case 'm':
1166 case 'g':
1167 pr = 4;
1168 break;
1169 default:
1170 tcc_error("unknown constraint '%c'", c);
1171 pr = 0;
1173 if (pr > priority)
1174 priority = pr;
1176 return priority;
1179 static const char *skip_constraint_modifiers(const char *p)
1181 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
1182 p++;
1183 return p;
1186 /* If T (a token) is of the form "%reg" returns the register
1187 number and type, otherwise return -1. */
1188 ST_FUNC int asm_parse_regvar (int t)
1190 const char *s;
1191 Operand op;
1192 if (t < TOK_IDENT)
1193 return -1;
1194 s = table_ident[t - TOK_IDENT]->str;
1195 if (s[0] != '%')
1196 return -1;
1197 t = tok_alloc(s+1, strlen(s)-1)->tok;
1198 unget_tok(t);
1199 unget_tok('%');
1200 parse_operand(tcc_state, &op);
1201 /* Accept only integer regs for now. */
1202 if (op.type & OP_REG)
1203 return op.reg;
1204 else
1205 return -1;
1208 #define REG_OUT_MASK 0x01
1209 #define REG_IN_MASK 0x02
1211 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
1213 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
1214 int nb_operands, int nb_outputs,
1215 const uint8_t *clobber_regs,
1216 int *pout_reg)
1218 ASMOperand *op;
1219 int sorted_op[MAX_ASM_OPERANDS];
1220 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
1221 const char *str;
1222 uint8_t regs_allocated[NB_ASM_REGS];
1224 /* init fields */
1225 for(i=0;i<nb_operands;i++) {
1226 op = &operands[i];
1227 op->input_index = -1;
1228 op->ref_index = -1;
1229 op->reg = -1;
1230 op->is_memory = 0;
1231 op->is_rw = 0;
1233 /* compute constraint priority and evaluate references to output
1234 constraints if input constraints */
1235 for(i=0;i<nb_operands;i++) {
1236 op = &operands[i];
1237 str = op->constraint;
1238 str = skip_constraint_modifiers(str);
1239 if (isnum(*str) || *str == '[') {
1240 /* this is a reference to another constraint */
1241 k = find_constraint(operands, nb_operands, str, NULL);
1242 if ((unsigned)k >= i || i < nb_outputs)
1243 tcc_error("invalid reference in constraint %d ('%s')",
1244 i, str);
1245 op->ref_index = k;
1246 if (operands[k].input_index >= 0)
1247 tcc_error("cannot reference twice the same operand");
1248 operands[k].input_index = i;
1249 op->priority = 5;
1250 } else if ((op->vt->r & VT_VALMASK) == VT_LOCAL
1251 && op->vt->sym
1252 && (reg = op->vt->sym->r & VT_VALMASK) < VT_CONST) {
1253 op->priority = 1;
1254 op->reg = reg;
1255 } else {
1256 op->priority = constraint_priority(str);
1260 /* sort operands according to their priority */
1261 for(i=0;i<nb_operands;i++)
1262 sorted_op[i] = i;
1263 for(i=0;i<nb_operands - 1;i++) {
1264 for(j=i+1;j<nb_operands;j++) {
1265 p1 = operands[sorted_op[i]].priority;
1266 p2 = operands[sorted_op[j]].priority;
1267 if (p2 < p1) {
1268 tmp = sorted_op[i];
1269 sorted_op[i] = sorted_op[j];
1270 sorted_op[j] = tmp;
1275 for(i = 0;i < NB_ASM_REGS; i++) {
1276 if (clobber_regs[i])
1277 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
1278 else
1279 regs_allocated[i] = 0;
1281 /* esp cannot be used */
1282 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1283 /* ebp cannot be used yet */
1284 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1286 /* allocate registers and generate corresponding asm moves */
1287 for(i=0;i<nb_operands;i++) {
1288 j = sorted_op[i];
1289 op = &operands[j];
1290 str = op->constraint;
1291 /* no need to allocate references */
1292 if (op->ref_index >= 0)
1293 continue;
1294 /* select if register is used for output, input or both */
1295 if (op->input_index >= 0) {
1296 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1297 } else if (j < nb_outputs) {
1298 reg_mask = REG_OUT_MASK;
1299 } else {
1300 reg_mask = REG_IN_MASK;
1302 if (op->reg >= 0) {
1303 if (is_reg_allocated(op->reg))
1304 tcc_error("asm regvar requests register that's taken already");
1305 reg = op->reg;
1306 goto reg_found;
1308 try_next:
1309 c = *str++;
1310 switch(c) {
1311 case '=':
1312 goto try_next;
1313 case '+':
1314 op->is_rw = 1;
1315 /* FALL THRU */
1316 case '&':
1317 if (j >= nb_outputs)
1318 tcc_error("'%c' modifier can only be applied to outputs", c);
1319 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1320 goto try_next;
1321 case 'A':
1322 /* allocate both eax and edx */
1323 if (is_reg_allocated(TREG_XAX) ||
1324 is_reg_allocated(TREG_XDX))
1325 goto try_next;
1326 op->is_llong = 1;
1327 op->reg = TREG_XAX;
1328 regs_allocated[TREG_XAX] |= reg_mask;
1329 regs_allocated[TREG_XDX] |= reg_mask;
1330 break;
1331 case 'a':
1332 reg = TREG_XAX;
1333 goto alloc_reg;
1334 case 'b':
1335 reg = 3;
1336 goto alloc_reg;
1337 case 'c':
1338 reg = TREG_XCX;
1339 goto alloc_reg;
1340 case 'd':
1341 reg = TREG_XDX;
1342 goto alloc_reg;
1343 case 'S':
1344 reg = 6;
1345 goto alloc_reg;
1346 case 'D':
1347 reg = 7;
1348 alloc_reg:
1349 if (is_reg_allocated(reg))
1350 goto try_next;
1351 goto reg_found;
1352 case 'q':
1353 /* eax, ebx, ecx or edx */
1354 for(reg = 0; reg < 4; reg++) {
1355 if (!is_reg_allocated(reg))
1356 goto reg_found;
1358 goto try_next;
1359 case 'r':
1360 case 'R':
1361 case 'p': /* A general address, for x86(64) any register is acceptable*/
1362 /* any general register */
1363 for(reg = 0; reg < 8; reg++) {
1364 if (!is_reg_allocated(reg))
1365 goto reg_found;
1367 goto try_next;
1368 reg_found:
1369 /* now we can reload in the register */
1370 op->is_llong = 0;
1371 op->reg = reg;
1372 regs_allocated[reg] |= reg_mask;
1373 break;
1374 case 'e':
1375 case 'i':
1376 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1377 goto try_next;
1378 break;
1379 case 'I':
1380 case 'N':
1381 case 'M':
1382 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1383 goto try_next;
1384 break;
1385 case 'm':
1386 case 'g':
1387 /* nothing special to do because the operand is already in
1388 memory, except if the pointer itself is stored in a
1389 memory variable (VT_LLOCAL case) */
1390 /* XXX: fix constant case */
1391 /* if it is a reference to a memory zone, it must lie
1392 in a register, so we reserve the register in the
1393 input registers and a load will be generated
1394 later */
1395 if (j < nb_outputs || c == 'm') {
1396 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1397 /* any general register */
1398 for(reg = 0; reg < 8; reg++) {
1399 if (!(regs_allocated[reg] & REG_IN_MASK))
1400 goto reg_found1;
1402 goto try_next;
1403 reg_found1:
1404 /* now we can reload in the register */
1405 regs_allocated[reg] |= REG_IN_MASK;
1406 op->reg = reg;
1407 op->is_memory = 1;
1410 break;
1411 default:
1412 tcc_error("asm constraint %d ('%s') could not be satisfied",
1413 j, op->constraint);
1414 break;
1416 /* if a reference is present for that operand, we assign it too */
1417 if (op->input_index >= 0) {
1418 operands[op->input_index].reg = op->reg;
1419 operands[op->input_index].is_llong = op->is_llong;
1423 /* compute out_reg. It is used to store outputs registers to memory
1424 locations references by pointers (VT_LLOCAL case) */
1425 *pout_reg = -1;
1426 for(i=0;i<nb_operands;i++) {
1427 op = &operands[i];
1428 if (op->reg >= 0 &&
1429 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1430 !op->is_memory) {
1431 for(reg = 0; reg < 8; reg++) {
1432 if (!(regs_allocated[reg] & REG_OUT_MASK))
1433 goto reg_found2;
1435 tcc_error("could not find free output register for reloading");
1436 reg_found2:
1437 *pout_reg = reg;
1438 break;
1442 /* print sorted constraints */
1443 #ifdef ASM_DEBUG
1444 for(i=0;i<nb_operands;i++) {
1445 j = sorted_op[i];
1446 op = &operands[j];
1447 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1449 op->id ? get_tok_str(op->id, NULL) : "",
1450 op->constraint,
1451 op->vt->r,
1452 op->reg);
1454 if (*pout_reg >= 0)
1455 printf("out_reg=%d\n", *pout_reg);
1456 #endif
1459 ST_FUNC void subst_asm_operand(CString *add_str,
1460 SValue *sv, int modifier)
1462 int r, reg, size, val;
1463 char buf[64];
1465 r = sv->r;
1466 if ((r & VT_VALMASK) == VT_CONST) {
1467 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' &&
1468 modifier != 'P')
1469 cstr_ccat(add_str, '$');
1470 if (r & VT_SYM) {
1471 const char *name = get_tok_str(sv->sym->v, NULL);
1472 if (sv->sym->v >= SYM_FIRST_ANOM) {
1473 /* In case of anonymuous symbols ("L.42", used
1474 for static data labels) we can't find them
1475 in the C symbol table when later looking up
1476 this name. So enter them now into the asm label
1477 list when we still know the symbol. */
1478 get_asm_sym(tok_alloc(name, strlen(name))->tok, sv->sym);
1480 cstr_cat(add_str, name, -1);
1481 if ((uint32_t)sv->c.i == 0)
1482 goto no_offset;
1483 cstr_ccat(add_str, '+');
1485 val = sv->c.i;
1486 if (modifier == 'n')
1487 val = -val;
1488 snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
1489 cstr_cat(add_str, buf, -1);
1490 no_offset:;
1491 #ifdef TCC_TARGET_X86_64
1492 if (r & VT_LVAL)
1493 cstr_cat(add_str, "(%rip)", -1);
1494 #endif
1495 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1496 #ifdef TCC_TARGET_X86_64
1497 snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i);
1498 #else
1499 snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
1500 #endif
1501 cstr_cat(add_str, buf, -1);
1502 } else if (r & VT_LVAL) {
1503 reg = r & VT_VALMASK;
1504 if (reg >= VT_CONST)
1505 tcc_error("internal compiler error");
1506 snprintf(buf, sizeof(buf), "(%%%s)",
1507 #ifdef TCC_TARGET_X86_64
1508 get_tok_str(TOK_ASM_rax + reg, NULL)
1509 #else
1510 get_tok_str(TOK_ASM_eax + reg, NULL)
1511 #endif
1513 cstr_cat(add_str, buf, -1);
1514 } else {
1515 /* register case */
1516 reg = r & VT_VALMASK;
1517 if (reg >= VT_CONST)
1518 tcc_error("internal compiler error");
1520 /* choose register operand size */
1521 if ((sv->type.t & VT_BTYPE) == VT_BYTE ||
1522 (sv->type.t & VT_BTYPE) == VT_BOOL)
1523 size = 1;
1524 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1525 size = 2;
1526 #ifdef TCC_TARGET_X86_64
1527 else if ((sv->type.t & VT_BTYPE) == VT_LLONG ||
1528 (sv->type.t & VT_BTYPE) == VT_PTR)
1529 size = 8;
1530 #endif
1531 else
1532 size = 4;
1533 if (size == 1 && reg >= 4)
1534 size = 4;
1536 if (modifier == 'b') {
1537 if (reg >= 4)
1538 tcc_error("cannot use byte register");
1539 size = 1;
1540 } else if (modifier == 'h') {
1541 if (reg >= 4)
1542 tcc_error("cannot use byte register");
1543 size = -1;
1544 } else if (modifier == 'w') {
1545 size = 2;
1546 } else if (modifier == 'k') {
1547 size = 4;
1548 #ifdef TCC_TARGET_X86_64
1549 } else if (modifier == 'q') {
1550 size = 8;
1551 #endif
1554 switch(size) {
1555 case -1:
1556 reg = TOK_ASM_ah + reg;
1557 break;
1558 case 1:
1559 reg = TOK_ASM_al + reg;
1560 break;
1561 case 2:
1562 reg = TOK_ASM_ax + reg;
1563 break;
1564 default:
1565 reg = TOK_ASM_eax + reg;
1566 break;
1567 #ifdef TCC_TARGET_X86_64
1568 case 8:
1569 reg = TOK_ASM_rax + reg;
1570 break;
1571 #endif
1573 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1574 cstr_cat(add_str, buf, -1);
1578 /* generate prolog and epilog code for asm statement */
1579 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1580 int nb_outputs, int is_output,
1581 uint8_t *clobber_regs,
1582 int out_reg)
1584 uint8_t regs_allocated[NB_ASM_REGS];
1585 ASMOperand *op;
1586 int i, reg;
1588 /* Strictly speaking %Xbp and %Xsp should be included in the
1589 call-preserved registers, but currently it doesn't matter. */
1590 #ifdef TCC_TARGET_X86_64
1591 #ifdef TCC_TARGET_PE
1592 static uint8_t reg_saved[] = { 3, 6, 7, 12, 13, 14, 15 };
1593 #else
1594 static uint8_t reg_saved[] = { 3, 12, 13, 14, 15 };
1595 #endif
1596 #else
1597 static uint8_t reg_saved[] = { 3, 6, 7 };
1598 #endif
1600 /* mark all used registers */
1601 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1602 for(i = 0; i < nb_operands;i++) {
1603 op = &operands[i];
1604 if (op->reg >= 0)
1605 regs_allocated[op->reg] = 1;
1607 if (!is_output) {
1608 /* generate reg save code */
1609 for(i = 0; i < sizeof(reg_saved)/sizeof(reg_saved[0]); i++) {
1610 reg = reg_saved[i];
1611 if (regs_allocated[reg]) {
1612 if (reg >= 8)
1613 g(0x41), reg-=8;
1614 g(0x50 + reg);
1618 /* generate load code */
1619 for(i = 0; i < nb_operands; i++) {
1620 op = &operands[i];
1621 if (op->reg >= 0) {
1622 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1623 op->is_memory) {
1624 /* memory reference case (for both input and
1625 output cases) */
1626 SValue sv;
1627 sv = *op->vt;
1628 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL;
1629 sv.type.t = VT_PTR;
1630 load(op->reg, &sv);
1631 } else if (i >= nb_outputs || op->is_rw) {
1632 /* load value in register */
1633 load(op->reg, op->vt);
1634 if (op->is_llong) {
1635 SValue sv;
1636 sv = *op->vt;
1637 sv.c.i += 4;
1638 load(TREG_XDX, &sv);
1643 } else {
1644 /* generate save code */
1645 for(i = 0 ; i < nb_outputs; i++) {
1646 op = &operands[i];
1647 if (op->reg >= 0) {
1648 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1649 if (!op->is_memory) {
1650 SValue sv;
1651 sv = *op->vt;
1652 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1653 sv.type.t = VT_PTR;
1654 load(out_reg, &sv);
1656 sv = *op->vt;
1657 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1658 store(op->reg, &sv);
1660 } else {
1661 store(op->reg, op->vt);
1662 if (op->is_llong) {
1663 SValue sv;
1664 sv = *op->vt;
1665 sv.c.i += 4;
1666 store(TREG_XDX, &sv);
1671 /* generate reg restore code */
1672 for(i = sizeof(reg_saved)/sizeof(reg_saved[0]) - 1; i >= 0; i--) {
1673 reg = reg_saved[i];
1674 if (regs_allocated[reg]) {
1675 if (reg >= 8)
1676 g(0x41), reg-=8;
1677 g(0x58 + reg);
1683 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1685 int reg;
1686 TokenSym *ts;
1687 #ifdef TCC_TARGET_X86_64
1688 int type;
1689 #endif
1691 if (!strcmp(str, "memory") ||
1692 !strcmp(str, "cc") ||
1693 !strcmp(str, "flags"))
1694 return;
1695 ts = tok_alloc(str, strlen(str));
1696 reg = ts->tok;
1697 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1698 reg -= TOK_ASM_eax;
1699 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1700 reg -= TOK_ASM_ax;
1701 #ifdef TCC_TARGET_X86_64
1702 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1703 reg -= TOK_ASM_rax;
1704 } else if ((reg = asm_parse_numeric_reg(reg, &type)) >= 0) {
1706 #endif
1707 } else {
1708 tcc_error("invalid clobber register '%s'", str);
1710 clobber_regs[reg] = 1;