x86-asm: Correct register size for pointer ops
[tinycc.git] / i386-asm.c
blob16b2e91519de7944f5b244c3d68c00ded70a9c79
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "tcc.h"
24 /* #define NB_ASM_REGS 8 */
25 #define MAX_OPERANDS 3
26 #define NB_SAVED_REGS 3
28 #define TOK_ASM_first TOK_ASM_clc
29 #define TOK_ASM_last TOK_ASM_emms
30 #define TOK_ASM_alllast TOK_ASM_subps
32 #define OPC_B 0x01 /* only used with OPC_WL */
33 #define OPC_WL 0x02 /* accepts w, l or no suffix */
34 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
35 #define OPC_REG 0x04 /* register is added to opcode */
36 #define OPC_MODRM 0x08 /* modrm encoding */
38 #define OPCT_MASK 0x70
39 #define OPC_FWAIT 0x10 /* add fwait opcode */
40 #define OPC_SHIFT 0x20 /* shift opcodes */
41 #define OPC_ARITH 0x30 /* arithmetic opcodes */
42 #define OPC_FARITH 0x40 /* FPU arithmetic opcodes */
43 #define OPC_TEST 0x50 /* test opcodes */
44 #define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i))
46 #define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */
47 #ifdef TCC_TARGET_X86_64
48 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
49 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
50 # define OPC_WLX OPC_WLQ
51 # define OPC_BWLX OPC_BWLQ
52 #else
53 # define OPC_WLX OPC_WL
54 # define OPC_BWLX OPC_BWL
55 #endif
57 #define OPC_GROUP_SHIFT 13
59 /* in order to compress the operand type, we use specific operands and
60 we or only with EA */
61 enum {
62 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
63 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
64 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
65 #ifdef TCC_TARGET_X86_64
66 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
67 #endif
68 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
69 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
70 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
71 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
72 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
73 OPT_SEG,
74 OPT_ST,
75 #ifdef TCC_TARGET_X86_64
76 OPT_REG8_LOW, /* %spl,%bpl,%sil,%dil, encoded like ah,ch,dh,bh, but
77 with REX prefix, not used in insn templates */
78 #endif
79 OPT_IM8,
80 OPT_IM8S,
81 OPT_IM16,
82 OPT_IM32,
83 #ifdef TCC_TARGET_X86_64
84 OPT_IM64,
85 #endif
86 OPT_EAX, /* %al, %ax, %eax or %rax register */
87 OPT_ST0, /* %st(0) register */
88 OPT_CL, /* %cl register */
89 OPT_DX, /* %dx register */
90 OPT_ADDR, /* OP_EA with only offset */
91 OPT_INDIR, /* *(expr) */
92 /* composite types */
93 OPT_COMPOSITE_FIRST,
94 OPT_IM, /* IM8 | IM16 | IM32 */
95 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
96 OPT_REGW, /* REG16 | REG32 | REG64 */
97 OPT_IMW, /* IM16 | IM32 */
98 OPT_MMXSSE, /* MMX | SSE */
99 OPT_DISP, /* Like OPT_ADDR, but emitted as displacement (for jumps) */
100 OPT_DISP8, /* Like OPT_ADDR, but only 8bit (short jumps) */
101 /* can be ored with any OPT_xxx */
102 OPT_EA = 0x80
105 #define OP_REG8 (1 << OPT_REG8)
106 #define OP_REG16 (1 << OPT_REG16)
107 #define OP_REG32 (1 << OPT_REG32)
108 #define OP_MMX (1 << OPT_MMX)
109 #define OP_SSE (1 << OPT_SSE)
110 #define OP_CR (1 << OPT_CR)
111 #define OP_TR (1 << OPT_TR)
112 #define OP_DB (1 << OPT_DB)
113 #define OP_SEG (1 << OPT_SEG)
114 #define OP_ST (1 << OPT_ST)
115 #define OP_IM8 (1 << OPT_IM8)
116 #define OP_IM8S (1 << OPT_IM8S)
117 #define OP_IM16 (1 << OPT_IM16)
118 #define OP_IM32 (1 << OPT_IM32)
119 #define OP_EAX (1 << OPT_EAX)
120 #define OP_ST0 (1 << OPT_ST0)
121 #define OP_CL (1 << OPT_CL)
122 #define OP_DX (1 << OPT_DX)
123 #define OP_ADDR (1 << OPT_ADDR)
124 #define OP_INDIR (1 << OPT_INDIR)
125 #ifdef TCC_TARGET_X86_64
126 # define OP_REG64 (1 << OPT_REG64)
127 # define OP_REG8_LOW (1 << OPT_REG8_LOW)
128 # define OP_IM64 (1 << OPT_IM64)
129 # define OP_EA32 (OP_EA << 1)
130 #else
131 # define OP_REG64 0
132 # define OP_REG8_LOW 0
133 # define OP_IM64 0
134 # define OP_EA32 0
135 #endif
137 #define OP_EA 0x40000000
138 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
140 #ifdef TCC_TARGET_X86_64
141 # define TREG_XAX TREG_RAX
142 # define TREG_XCX TREG_RCX
143 # define TREG_XDX TREG_RDX
144 #else
145 # define TREG_XAX TREG_EAX
146 # define TREG_XCX TREG_ECX
147 # define TREG_XDX TREG_EDX
148 #endif
150 typedef struct ASMInstr {
151 uint16_t sym;
152 uint16_t opcode;
153 uint16_t instr_type;
154 uint8_t nb_ops;
155 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
156 } ASMInstr;
158 typedef struct Operand {
159 uint32_t type;
160 int8_t reg; /* register, -1 if none */
161 int8_t reg2; /* second register, -1 if none */
162 uint8_t shift;
163 ExprValue e;
164 } Operand;
166 static const uint8_t reg_to_size[9] = {
168 [OP_REG8] = 0,
169 [OP_REG16] = 1,
170 [OP_REG32] = 2,
171 #ifdef TCC_TARGET_X86_64
172 [OP_REG64] = 3,
173 #endif
175 0, 0, 1, 0, 2, 0, 0, 0, 3
178 #define NB_TEST_OPCODES 30
180 static const uint8_t test_bits[NB_TEST_OPCODES] = {
181 0x00, /* o */
182 0x01, /* no */
183 0x02, /* b */
184 0x02, /* c */
185 0x02, /* nae */
186 0x03, /* nb */
187 0x03, /* nc */
188 0x03, /* ae */
189 0x04, /* e */
190 0x04, /* z */
191 0x05, /* ne */
192 0x05, /* nz */
193 0x06, /* be */
194 0x06, /* na */
195 0x07, /* nbe */
196 0x07, /* a */
197 0x08, /* s */
198 0x09, /* ns */
199 0x0a, /* p */
200 0x0a, /* pe */
201 0x0b, /* np */
202 0x0b, /* po */
203 0x0c, /* l */
204 0x0c, /* nge */
205 0x0d, /* nl */
206 0x0d, /* ge */
207 0x0e, /* le */
208 0x0e, /* ng */
209 0x0f, /* nle */
210 0x0f, /* g */
213 static const uint8_t segment_prefixes[] = {
214 0x26, /* es */
215 0x2e, /* cs */
216 0x36, /* ss */
217 0x3e, /* ds */
218 0x64, /* fs */
219 0x65 /* gs */
222 static const ASMInstr asm_instrs[] = {
223 #define ALT(x) x
224 /* This removes a 0x0f in the second byte */
225 #define O(o) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o))
226 /* This constructs instr_type from opcode, type and group. */
227 #define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0))
228 #define DEF_ASM_OP0(name, opcode)
229 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0 },
230 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }},
231 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }},
232 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }},
233 #ifdef TCC_TARGET_X86_64
234 # include "x86_64-asm.h"
235 #else
236 # include "i386-asm.h"
237 #endif
238 /* last operation */
239 { 0, },
242 static const uint16_t op0_codes[] = {
243 #define ALT(x)
244 #define DEF_ASM_OP0(x, opcode) opcode,
245 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
246 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
247 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
248 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
249 #ifdef TCC_TARGET_X86_64
250 # include "x86_64-asm.h"
251 #else
252 # include "i386-asm.h"
253 #endif
256 static inline int get_reg_shift(TCCState *s1)
258 int shift, v;
259 v = asm_int_expr(s1);
260 switch(v) {
261 case 1:
262 shift = 0;
263 break;
264 case 2:
265 shift = 1;
266 break;
267 case 4:
268 shift = 2;
269 break;
270 case 8:
271 shift = 3;
272 break;
273 default:
274 expect("1, 2, 4 or 8 constant");
275 shift = 0;
276 break;
278 return shift;
281 #ifdef TCC_TARGET_X86_64
282 static int asm_parse_numeric_reg(int *type)
284 int reg = -1;
285 if (tok >= TOK_IDENT && tok < tok_ident) {
286 const char *s = table_ident[tok - TOK_IDENT]->str;
287 char c;
288 *type = OP_REG64;
289 if (*s == 'c') {
290 s++;
291 *type = OP_CR;
293 if (*s++ != 'r')
294 return -1;
295 /* Don't allow leading '0'. */
296 if ((c = *s++) >= '1' && c <= '9')
297 reg = c - '0';
298 else
299 return -1;
300 if ((c = *s) >= '0' && c <= '5')
301 s++, reg = reg * 10 + c - '0';
302 if (reg > 15)
303 return -1;
304 if ((c = *s) == 0)
306 else if (*type != OP_REG64)
307 return -1;
308 else if (c == 'b' && !s[1])
309 *type = OP_REG8;
310 else if (c == 'w' && !s[1])
311 *type = OP_REG16;
312 else if (c == 'd' && !s[1])
313 *type = OP_REG32;
314 else
315 return -1;
317 return reg;
319 #endif
321 static int asm_parse_reg(int *type)
323 int reg = 0;
324 *type = 0;
325 if (tok != '%')
326 goto error_32;
327 next();
328 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
329 reg = tok - TOK_ASM_eax;
330 *type = OP_REG32;
331 #ifdef TCC_TARGET_X86_64
332 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
333 reg = tok - TOK_ASM_rax;
334 *type = OP_REG64;
335 } else if (tok == TOK_ASM_rip) {
336 reg = -2; /* Probably should use different escape code. */
337 *type = OP_REG64;
338 } else if ((reg = asm_parse_numeric_reg(type)) >= 0
339 && (*type == OP_REG32 || *type == OP_REG64)) {
341 #endif
342 } else {
343 error_32:
344 expect("register");
346 next();
347 return reg;
350 static void parse_operand(TCCState *s1, Operand *op)
352 ExprValue e;
353 int reg, indir;
354 const char *p;
356 indir = 0;
357 if (tok == '*') {
358 next();
359 indir = OP_INDIR;
362 if (tok == '%') {
363 next();
364 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
365 reg = tok - TOK_ASM_al;
366 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
367 op->reg = reg & 7;
368 if ((op->type & OP_REG) && op->reg == TREG_XAX)
369 op->type |= OP_EAX;
370 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
371 op->type |= OP_CL;
372 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
373 op->type |= OP_DX;
374 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
375 op->type = OP_DB;
376 op->reg = tok - TOK_ASM_dr0;
377 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
378 op->type = OP_SEG;
379 op->reg = tok - TOK_ASM_es;
380 } else if (tok == TOK_ASM_st) {
381 op->type = OP_ST;
382 op->reg = 0;
383 next();
384 if (tok == '(') {
385 next();
386 if (tok != TOK_PPNUM)
387 goto reg_error;
388 p = tokc.str.data;
389 reg = p[0] - '0';
390 if ((unsigned)reg >= 8 || p[1] != '\0')
391 goto reg_error;
392 op->reg = reg;
393 next();
394 skip(')');
396 if (op->reg == 0)
397 op->type |= OP_ST0;
398 goto no_skip;
399 #ifdef TCC_TARGET_X86_64
400 } else if (tok >= TOK_ASM_spl && tok <= TOK_ASM_dil) {
401 op->type = OP_REG8 | OP_REG8_LOW;
402 op->reg = 4 + tok - TOK_ASM_spl;
403 } else if ((op->reg = asm_parse_numeric_reg(&op->type)) >= 0) {
405 #endif
406 } else {
407 reg_error:
408 tcc_error("unknown register %%%s", get_tok_str(tok, &tokc));
410 next();
411 no_skip: ;
412 } else if (tok == '$') {
413 /* constant value */
414 next();
415 asm_expr(s1, &e);
416 op->type = OP_IM32;
417 op->e = e;
418 if (!op->e.sym) {
419 if (op->e.v == (uint8_t)op->e.v)
420 op->type |= OP_IM8;
421 if (op->e.v == (int8_t)op->e.v)
422 op->type |= OP_IM8S;
423 if (op->e.v == (uint16_t)op->e.v)
424 op->type |= OP_IM16;
425 #ifdef TCC_TARGET_X86_64
426 if (op->e.v != (int32_t)op->e.v && op->e.v != (uint32_t)op->e.v)
427 op->type = OP_IM64;
428 #endif
430 } else {
431 /* address(reg,reg2,shift) with all variants */
432 op->type = OP_EA;
433 op->reg = -1;
434 op->reg2 = -1;
435 op->shift = 0;
436 if (tok != '(') {
437 asm_expr(s1, &e);
438 op->e = e;
439 } else {
440 next();
441 if (tok == '%') {
442 unget_tok('(');
443 op->e.v = 0;
444 op->e.sym = NULL;
445 } else {
446 /* bracketed offset expression */
447 asm_expr(s1, &e);
448 if (tok != ')')
449 expect(")");
450 next();
451 op->e.v = e.v;
452 op->e.sym = e.sym;
454 op->e.pcrel = 0;
456 if (tok == '(') {
457 int type = 0;
458 next();
459 if (tok != ',') {
460 op->reg = asm_parse_reg(&type);
462 if (tok == ',') {
463 next();
464 if (tok != ',') {
465 op->reg2 = asm_parse_reg(&type);
467 if (tok == ',') {
468 next();
469 op->shift = get_reg_shift(s1);
472 if (type & OP_REG32)
473 op->type |= OP_EA32;
474 skip(')');
476 if (op->reg == -1 && op->reg2 == -1)
477 op->type |= OP_ADDR;
479 op->type |= indir;
482 /* XXX: unify with C code output ? */
483 ST_FUNC void gen_expr32(ExprValue *pe)
485 if (pe->pcrel)
486 /* If PC-relative, always set VT_SYM, even without symbol,
487 so as to force a relocation to be emitted. */
488 gen_addrpc32(VT_SYM, pe->sym, pe->v);
489 else
490 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
493 #ifdef TCC_TARGET_X86_64
494 ST_FUNC void gen_expr64(ExprValue *pe)
496 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
498 #endif
500 /* XXX: unify with C code output ? */
501 static void gen_disp32(ExprValue *pe)
503 Sym *sym = pe->sym;
504 if (sym && sym->r == cur_text_section->sh_num) {
505 /* same section: we can output an absolute value. Note
506 that the TCC compiler behaves differently here because
507 it always outputs a relocation to ease (future) code
508 elimination in the linker */
509 gen_le32(pe->v + sym->jnext - ind - 4);
510 } else {
511 if (sym && sym->type.t == VT_VOID) {
512 sym->type.t = VT_FUNC;
513 sym->type.ref = NULL;
515 gen_addrpc32(VT_SYM, sym, pe->v);
519 /* generate the modrm operand */
520 static inline int asm_modrm(int reg, Operand *op)
522 int mod, reg1, reg2, sib_reg1;
524 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
525 g(0xc0 + (reg << 3) + op->reg);
526 } else if (op->reg == -1 && op->reg2 == -1) {
527 /* displacement only */
528 #ifdef TCC_TARGET_X86_64
529 g(0x04 + (reg << 3));
530 g(0x25);
531 #else
532 g(0x05 + (reg << 3));
533 #endif
534 gen_expr32(&op->e);
535 #ifdef TCC_TARGET_X86_64
536 } else if (op->reg == -2) {
537 ExprValue *pe = &op->e;
538 g(0x05 + (reg << 3));
539 gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
540 return ind;
541 #endif
542 } else {
543 sib_reg1 = op->reg;
544 /* fist compute displacement encoding */
545 if (sib_reg1 == -1) {
546 sib_reg1 = 5;
547 mod = 0x00;
548 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
549 mod = 0x00;
550 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
551 mod = 0x40;
552 } else {
553 mod = 0x80;
555 /* compute if sib byte needed */
556 reg1 = op->reg;
557 if (op->reg2 != -1)
558 reg1 = 4;
559 g(mod + (reg << 3) + reg1);
560 if (reg1 == 4) {
561 /* add sib byte */
562 reg2 = op->reg2;
563 if (reg2 == -1)
564 reg2 = 4; /* indicate no index */
565 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
567 /* add offset */
568 if (mod == 0x40) {
569 g(op->e.v);
570 } else if (mod == 0x80 || op->reg == -1) {
571 gen_expr32(&op->e);
574 return 0;
577 #ifdef TCC_TARGET_X86_64
578 #define REX_W 0x48
579 #define REX_R 0x44
580 #define REX_X 0x42
581 #define REX_B 0x41
583 static void asm_rex(int width64, Operand *ops, int nb_ops, int *op_type,
584 int regi, int rmi)
586 unsigned char rex = width64 ? 0x48 : 0;
587 int saw_high_8bit = 0;
588 int i;
589 if (rmi == -1) {
590 /* No mod/rm byte, but we might have a register op nevertheless
591 (we will add it to the opcode later). */
592 for(i = 0; i < nb_ops; i++) {
593 if (op_type[i] & (OP_REG | OP_ST)) {
594 if (ops[i].reg >= 8) {
595 rex |= REX_B;
596 ops[i].reg -= 8;
597 } else if (ops[i].type & OP_REG8_LOW)
598 rex |= 0x40;
599 else if (ops[i].type & OP_REG8 && ops[i].reg >= 4)
600 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
601 saw_high_8bit = ops[i].reg;
602 break;
605 } else {
606 if (regi != -1) {
607 if (ops[regi].reg >= 8) {
608 rex |= REX_R;
609 ops[regi].reg -= 8;
610 } else if (ops[regi].type & OP_REG8_LOW)
611 rex |= 0x40;
612 else if (ops[regi].type & OP_REG8 && ops[regi].reg >= 4)
613 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
614 saw_high_8bit = ops[regi].reg;
616 if (ops[rmi].type & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_EA)) {
617 if (ops[rmi].reg >= 8) {
618 rex |= REX_B;
619 ops[rmi].reg -= 8;
620 } else if (ops[rmi].type & OP_REG8_LOW)
621 rex |= 0x40;
622 else if (ops[rmi].type & OP_REG8 && ops[rmi].reg >= 4)
623 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
624 saw_high_8bit = ops[rmi].reg;
626 if (ops[rmi].type & OP_EA && ops[rmi].reg2 >= 8) {
627 rex |= REX_X;
628 ops[rmi].reg2 -= 8;
631 if (rex) {
632 if (saw_high_8bit)
633 tcc_error("can't encode register %%%ch when REX prefix is required",
634 "acdb"[saw_high_8bit-4]);
635 g(rex);
638 #endif
640 static void maybe_print_stats (void)
642 static int already = 1;
643 if (!already)
644 /* print stats about opcodes */
646 const struct ASMInstr *pa;
647 int freq[4];
648 int op_vals[500];
649 int nb_op_vals, i, j;
651 already = 1;
652 nb_op_vals = 0;
653 memset(freq, 0, sizeof(freq));
654 for(pa = asm_instrs; pa->sym != 0; pa++) {
655 freq[pa->nb_ops]++;
656 //for(i=0;i<pa->nb_ops;i++) {
657 for(j=0;j<nb_op_vals;j++) {
658 //if (pa->op_type[i] == op_vals[j])
659 if (pa->instr_type == op_vals[j])
660 goto found;
662 //op_vals[nb_op_vals++] = pa->op_type[i];
663 op_vals[nb_op_vals++] = pa->instr_type;
664 found: ;
667 for(i=0;i<nb_op_vals;i++) {
668 int v = op_vals[i];
669 //if ((v & (v - 1)) != 0)
670 printf("%3d: %08x\n", i, v);
672 printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n",
673 (int)sizeof(asm_instrs),
674 (int)sizeof(asm_instrs) / (int)sizeof(ASMInstr),
675 freq[0], freq[1], freq[2], freq[3]);
679 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
681 const ASMInstr *pa;
682 int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc;
683 int nb_ops, s;
684 Operand ops[MAX_OPERANDS], *pop;
685 int op_type[3]; /* decoded op type */
686 int alltypes; /* OR of all operand types */
687 int autosize;
688 int p66;
689 #ifdef TCC_TARGET_X86_64
690 int rex64;
691 #endif
693 maybe_print_stats();
694 /* force synthetic ';' after prefix instruction, so we can handle */
695 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
696 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
697 unget_tok(';');
699 /* get operands */
700 pop = ops;
701 nb_ops = 0;
702 seg_prefix = 0;
703 alltypes = 0;
704 for(;;) {
705 if (tok == ';' || tok == TOK_LINEFEED)
706 break;
707 if (nb_ops >= MAX_OPERANDS) {
708 tcc_error("incorrect number of operands");
710 parse_operand(s1, pop);
711 if (tok == ':') {
712 if (pop->type != OP_SEG || seg_prefix)
713 tcc_error("incorrect prefix");
714 seg_prefix = segment_prefixes[pop->reg];
715 next();
716 parse_operand(s1, pop);
717 if (!(pop->type & OP_EA)) {
718 tcc_error("segment prefix must be followed by memory reference");
721 pop++;
722 nb_ops++;
723 if (tok != ',')
724 break;
725 next();
728 s = 0; /* avoid warning */
730 /* optimize matching by using a lookup table (no hashing is needed
731 !) */
732 for(pa = asm_instrs; pa->sym != 0; pa++) {
733 int it = pa->instr_type & OPCT_MASK;
734 s = 0;
735 if (it == OPC_FARITH) {
736 v = opcode - pa->sym;
737 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
738 continue;
739 } else if (it == OPC_ARITH) {
740 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
741 continue;
742 s = (opcode - pa->sym) % NBWLX;
743 if ((pa->instr_type & OPC_BWLX) == OPC_WLX)
745 /* We need to reject the xxxb opcodes that we accepted above.
746 Note that pa->sym for WLX opcodes is the 'w' token,
747 to get the 'b' token subtract one. */
748 if (((opcode - pa->sym + 1) % NBWLX) == 0)
749 continue;
750 s++;
752 } else if (it == OPC_SHIFT) {
753 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
754 continue;
755 s = (opcode - pa->sym) % NBWLX;
756 } else if (it == OPC_TEST) {
757 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
758 continue;
759 /* cmovxx is a test opcode but accepts multiple sizes.
760 TCC doesn't accept the suffixed mnemonic, instead we
761 simply force size autodetection always. */
762 if (pa->instr_type & OPC_WLX)
763 s = NBWLX - 1;
764 } else if (pa->instr_type & OPC_B) {
765 #ifdef TCC_TARGET_X86_64
766 /* Some instructions don't have the full size but only
767 bwl form. insb e.g. */
768 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
769 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
770 continue;
771 #endif
772 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
773 continue;
774 s = opcode - pa->sym;
775 } else if (pa->instr_type & OPC_WLX) {
776 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
777 continue;
778 s = opcode - pa->sym + 1;
779 } else {
780 if (pa->sym != opcode)
781 continue;
783 if (pa->nb_ops != nb_ops)
784 continue;
785 #ifdef TCC_TARGET_X86_64
786 /* Special case for moves. Selecting the IM64->REG64 form
787 should only be done if we really have an >32bit imm64, and that
788 is hardcoded. Ignore it here. */
789 if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
790 && ops[1].type == OP_REG64
791 && !(pa->instr_type & OPC_0F))
792 continue;
793 #endif
794 /* now decode and check each operand */
795 alltypes = 0;
796 for(i = 0; i < nb_ops; i++) {
797 int op1, op2;
798 op1 = pa->op_type[i];
799 op2 = op1 & 0x1f;
800 switch(op2) {
801 case OPT_IM:
802 v = OP_IM8 | OP_IM16 | OP_IM32;
803 break;
804 case OPT_REG:
805 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
806 break;
807 case OPT_REGW:
808 v = OP_REG16 | OP_REG32 | OP_REG64;
809 break;
810 case OPT_IMW:
811 v = OP_IM16 | OP_IM32;
812 break;
813 case OPT_MMXSSE:
814 v = OP_MMX | OP_SSE;
815 break;
816 case OPT_DISP:
817 case OPT_DISP8:
818 v = OP_ADDR;
819 break;
820 default:
821 v = 1 << op2;
822 break;
824 if (op1 & OPT_EA)
825 v |= OP_EA;
826 op_type[i] = v;
827 if ((ops[i].type & v) == 0)
828 goto next;
829 alltypes |= ops[i].type;
831 /* all is matching ! */
832 break;
833 next: ;
835 if (pa->sym == 0) {
836 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
837 int b;
838 b = op0_codes[opcode - TOK_ASM_first];
839 if (b & 0xff00)
840 g(b >> 8);
841 g(b);
842 return;
843 } else if (opcode <= TOK_ASM_alllast) {
844 tcc_error("bad operand with opcode '%s'",
845 get_tok_str(opcode, NULL));
846 } else {
847 tcc_error("unknown opcode '%s'",
848 get_tok_str(opcode, NULL));
851 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
852 autosize = NBWLX-1;
853 #ifdef TCC_TARGET_X86_64
854 /* XXX the autosize should rather be zero, to not have to adjust this
855 all the time. */
856 if ((pa->instr_type & OPC_BWLQ) == OPC_B)
857 autosize = NBWLX-2;
858 #endif
859 if (s == autosize) {
860 /* Check for register operands providing hints about the size.
861 Start from the end, i.e. destination operands. This matters
862 only for opcodes accepting different sized registers, lar and lsl
863 are such opcodes. */
864 for(i = nb_ops - 1; s == autosize && i >= 0; i--) {
865 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
866 s = reg_to_size[ops[i].type & OP_REG];
868 if (s == autosize) {
869 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
870 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
871 s = 2;
872 else if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
873 (ops[0].type & OP_EA))
874 s = NBWLX - 2;
875 else
876 tcc_error("cannot infer opcode suffix");
880 #ifdef TCC_TARGET_X86_64
881 /* Generate addr32 prefix if needed */
882 for(i = 0; i < nb_ops; i++) {
883 if (ops[i].type & OP_EA32) {
884 g(0x67);
885 break;
888 #endif
889 /* generate data16 prefix if needed */
890 p66 = 0;
891 if (s == 1)
892 p66 = 1;
893 else {
894 /* accepting mmx+sse in all operands --> needs 0x66 to
895 switch to sse mode. Accepting only sse in an operand --> is
896 already SSE insn and needs 0x66/f2/f3 handling. */
897 for (i = 0; i < nb_ops; i++)
898 if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
899 && ops[i].type & OP_SSE)
900 p66 = 1;
902 if (p66)
903 g(0x66);
904 #ifdef TCC_TARGET_X86_64
905 rex64 = 0;
906 if (s == 3 || (alltypes & OP_REG64)) {
907 /* generate REX prefix */
908 int default64 = 0;
909 for(i = 0; i < nb_ops; i++) {
910 if (op_type[i] == OP_REG64) {
911 /* If only 64bit regs are accepted in one operand
912 this is a default64 instruction without need for
913 REX prefixes. */
914 default64 = 1;
915 break;
918 /* XXX find better encoding for the default64 instructions. */
919 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
920 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
921 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
922 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
923 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
924 && !default64)
925 rex64 = 1;
927 #endif
929 /* now generates the operation */
930 if (OPCT_IS(pa->instr_type, OPC_FWAIT))
931 g(0x9b);
932 if (seg_prefix)
933 g(seg_prefix);
935 v = pa->opcode;
936 if (pa->instr_type & OPC_0F)
937 v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff);
938 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
939 /* kludge for imul $im, %reg */
940 nb_ops = 3;
941 ops[2] = ops[1];
942 op_type[2] = op_type[1];
943 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
944 v--; /* int $3 case */
945 nb_ops = 0;
946 } else if ((v == 0x06 || v == 0x07)) {
947 if (ops[0].reg >= 4) {
948 /* push/pop %fs or %gs */
949 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
950 } else {
951 v += ops[0].reg << 3;
953 nb_ops = 0;
954 } else if (v <= 0x05) {
955 /* arith case */
956 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
957 } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) {
958 /* fpu arith case */
959 v += ((opcode - pa->sym) / 6) << 3;
962 /* search which operand will be used for modrm */
963 modrm_index = -1;
964 modreg_index = -1;
965 if (pa->instr_type & OPC_MODRM) {
966 if (!nb_ops) {
967 /* A modrm opcode without operands is a special case (e.g. mfence).
968 It has a group and acts as if there's an register operand 0
969 (ax). */
970 i = 0;
971 ops[i].type = OP_REG;
972 ops[i].reg = 0;
973 goto modrm_found;
975 /* first look for an ea operand */
976 for(i = 0;i < nb_ops; i++) {
977 if (op_type[i] & OP_EA)
978 goto modrm_found;
980 /* then if not found, a register or indirection (shift instructions) */
981 for(i = 0;i < nb_ops; i++) {
982 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
983 goto modrm_found;
985 #ifdef ASM_DEBUG
986 tcc_error("bad op table");
987 #endif
988 modrm_found:
989 modrm_index = i;
990 /* if a register is used in another operand then it is
991 used instead of group */
992 for(i = 0;i < nb_ops; i++) {
993 int t = op_type[i];
994 if (i != modrm_index &&
995 (t & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
996 modreg_index = i;
997 break;
1001 #ifdef TCC_TARGET_X86_64
1002 asm_rex (rex64, ops, nb_ops, op_type, modreg_index, modrm_index);
1003 #endif
1005 if (pa->instr_type & OPC_REG) {
1006 /* mov $im, %reg case */
1007 if (v == 0xb0 && s >= 1)
1008 v += 7;
1009 for(i = 0; i < nb_ops; i++) {
1010 if (op_type[i] & (OP_REG | OP_ST)) {
1011 v += ops[i].reg;
1012 break;
1016 if (pa->instr_type & OPC_B)
1017 v += s >= 1;
1018 if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) {
1019 Sym *sym;
1020 int jmp_disp;
1022 /* see if we can really generate the jump with a byte offset */
1023 sym = ops[0].e.sym;
1024 if (!sym)
1025 goto no_short_jump;
1026 if (sym->r != cur_text_section->sh_num)
1027 goto no_short_jump;
1028 jmp_disp = ops[0].e.v + sym->jnext - ind - 2 - (v >= 0xff);
1029 if (jmp_disp == (int8_t)jmp_disp) {
1030 /* OK to generate jump */
1031 ops[0].e.sym = 0;
1032 ops[0].e.v = jmp_disp;
1033 op_type[0] = OP_IM8S;
1034 } else {
1035 no_short_jump:
1036 /* long jump will be allowed. need to modify the
1037 opcode slightly */
1038 if (v == 0xeb) /* jmp */
1039 v = 0xe9;
1040 else if (v == 0x70) /* jcc */
1041 v += 0x0f10;
1042 else
1043 tcc_error("invalid displacement");
1046 if (OPCT_IS(pa->instr_type, OPC_TEST))
1047 v += test_bits[opcode - pa->sym];
1048 op1 = v >> 16;
1049 if (op1)
1050 g(op1);
1051 op1 = (v >> 8) & 0xff;
1052 if (op1)
1053 g(op1);
1054 g(v);
1056 if (OPCT_IS(pa->instr_type, OPC_SHIFT)) {
1057 reg = (opcode - pa->sym) / NBWLX;
1058 if (reg == 6)
1059 reg = 7;
1060 } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) {
1061 reg = (opcode - pa->sym) / NBWLX;
1062 } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) {
1063 reg = (opcode - pa->sym) / 6;
1064 } else {
1065 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
1068 pc = 0;
1069 if (pa->instr_type & OPC_MODRM) {
1070 /* if a register is used in another operand then it is
1071 used instead of group */
1072 if (modreg_index >= 0)
1073 reg = ops[modreg_index].reg;
1074 pc = asm_modrm(reg, &ops[modrm_index]);
1077 /* emit constants */
1078 #ifndef TCC_TARGET_X86_64
1079 if (!(pa->instr_type & OPC_0F)
1080 && (pa->opcode == 0x9a || pa->opcode == 0xea)) {
1081 /* ljmp or lcall kludge */
1082 gen_expr32(&ops[1].e);
1083 if (ops[0].e.sym)
1084 tcc_error("cannot relocate");
1085 gen_le16(ops[0].e.v);
1086 return;
1088 #endif
1089 for(i = 0;i < nb_ops; i++) {
1090 v = op_type[i];
1091 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
1092 /* if multiple sizes are given it means we must look
1093 at the op size */
1094 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
1095 if (s == 0)
1096 v = OP_IM8;
1097 else if (s == 1)
1098 v = OP_IM16;
1099 else if (s == 2 || (v & OP_IM64) == 0)
1100 v = OP_IM32;
1101 else
1102 v = OP_IM64;
1105 if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym)
1106 tcc_error("cannot relocate");
1108 if (v & (OP_IM8 | OP_IM8S)) {
1109 g(ops[i].e.v);
1110 } else if (v & OP_IM16) {
1111 gen_le16(ops[i].e.v);
1112 #ifdef TCC_TARGET_X86_64
1113 } else if (v & OP_IM64) {
1114 gen_expr64(&ops[i].e);
1115 #endif
1116 } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) {
1117 gen_disp32(&ops[i].e);
1118 } else {
1119 gen_expr32(&ops[i].e);
1124 /* after immediate operands, adjust pc-relative address */
1125 if (pc)
1126 add32le(text_section->data + pc - 4, pc - ind);
1129 /* return the constraint priority (we allocate first the lowest
1130 numbered constraints) */
1131 static inline int constraint_priority(const char *str)
1133 int priority, c, pr;
1135 /* we take the lowest priority */
1136 priority = 0;
1137 for(;;) {
1138 c = *str;
1139 if (c == '\0')
1140 break;
1141 str++;
1142 switch(c) {
1143 case 'A':
1144 pr = 0;
1145 break;
1146 case 'a':
1147 case 'b':
1148 case 'c':
1149 case 'd':
1150 case 'S':
1151 case 'D':
1152 pr = 1;
1153 break;
1154 case 'q':
1155 pr = 2;
1156 break;
1157 case 'r':
1158 case 'R':
1159 case 'p':
1160 pr = 3;
1161 break;
1162 case 'N':
1163 case 'M':
1164 case 'I':
1165 case 'e':
1166 case 'i':
1167 case 'm':
1168 case 'g':
1169 pr = 4;
1170 break;
1171 default:
1172 tcc_error("unknown constraint '%c'", c);
1173 pr = 0;
1175 if (pr > priority)
1176 priority = pr;
1178 return priority;
1181 static const char *skip_constraint_modifiers(const char *p)
1183 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
1184 p++;
1185 return p;
1188 #define REG_OUT_MASK 0x01
1189 #define REG_IN_MASK 0x02
1191 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
1193 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
1194 int nb_operands, int nb_outputs,
1195 const uint8_t *clobber_regs,
1196 int *pout_reg)
1198 ASMOperand *op;
1199 int sorted_op[MAX_ASM_OPERANDS];
1200 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
1201 const char *str;
1202 uint8_t regs_allocated[NB_ASM_REGS];
1204 /* init fields */
1205 for(i=0;i<nb_operands;i++) {
1206 op = &operands[i];
1207 op->input_index = -1;
1208 op->ref_index = -1;
1209 op->reg = -1;
1210 op->is_memory = 0;
1211 op->is_rw = 0;
1213 /* compute constraint priority and evaluate references to output
1214 constraints if input constraints */
1215 for(i=0;i<nb_operands;i++) {
1216 op = &operands[i];
1217 str = op->constraint;
1218 str = skip_constraint_modifiers(str);
1219 if (isnum(*str) || *str == '[') {
1220 /* this is a reference to another constraint */
1221 k = find_constraint(operands, nb_operands, str, NULL);
1222 if ((unsigned)k >= i || i < nb_outputs)
1223 tcc_error("invalid reference in constraint %d ('%s')",
1224 i, str);
1225 op->ref_index = k;
1226 if (operands[k].input_index >= 0)
1227 tcc_error("cannot reference twice the same operand");
1228 operands[k].input_index = i;
1229 op->priority = 5;
1230 } else {
1231 op->priority = constraint_priority(str);
1235 /* sort operands according to their priority */
1236 for(i=0;i<nb_operands;i++)
1237 sorted_op[i] = i;
1238 for(i=0;i<nb_operands - 1;i++) {
1239 for(j=i+1;j<nb_operands;j++) {
1240 p1 = operands[sorted_op[i]].priority;
1241 p2 = operands[sorted_op[j]].priority;
1242 if (p2 < p1) {
1243 tmp = sorted_op[i];
1244 sorted_op[i] = sorted_op[j];
1245 sorted_op[j] = tmp;
1250 for(i = 0;i < NB_ASM_REGS; i++) {
1251 if (clobber_regs[i])
1252 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
1253 else
1254 regs_allocated[i] = 0;
1256 /* esp cannot be used */
1257 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1258 /* ebp cannot be used yet */
1259 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1261 /* allocate registers and generate corresponding asm moves */
1262 for(i=0;i<nb_operands;i++) {
1263 j = sorted_op[i];
1264 op = &operands[j];
1265 str = op->constraint;
1266 /* no need to allocate references */
1267 if (op->ref_index >= 0)
1268 continue;
1269 /* select if register is used for output, input or both */
1270 if (op->input_index >= 0) {
1271 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1272 } else if (j < nb_outputs) {
1273 reg_mask = REG_OUT_MASK;
1274 } else {
1275 reg_mask = REG_IN_MASK;
1277 try_next:
1278 c = *str++;
1279 switch(c) {
1280 case '=':
1281 goto try_next;
1282 case '+':
1283 op->is_rw = 1;
1284 /* FALL THRU */
1285 case '&':
1286 if (j >= nb_outputs)
1287 tcc_error("'%c' modifier can only be applied to outputs", c);
1288 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1289 goto try_next;
1290 case 'A':
1291 /* allocate both eax and edx */
1292 if (is_reg_allocated(TREG_XAX) ||
1293 is_reg_allocated(TREG_XDX))
1294 goto try_next;
1295 op->is_llong = 1;
1296 op->reg = TREG_XAX;
1297 regs_allocated[TREG_XAX] |= reg_mask;
1298 regs_allocated[TREG_XDX] |= reg_mask;
1299 break;
1300 case 'a':
1301 reg = TREG_XAX;
1302 goto alloc_reg;
1303 case 'b':
1304 reg = 3;
1305 goto alloc_reg;
1306 case 'c':
1307 reg = TREG_XCX;
1308 goto alloc_reg;
1309 case 'd':
1310 reg = TREG_XDX;
1311 goto alloc_reg;
1312 case 'S':
1313 reg = 6;
1314 goto alloc_reg;
1315 case 'D':
1316 reg = 7;
1317 alloc_reg:
1318 if (is_reg_allocated(reg))
1319 goto try_next;
1320 goto reg_found;
1321 case 'q':
1322 /* eax, ebx, ecx or edx */
1323 for(reg = 0; reg < 4; reg++) {
1324 if (!is_reg_allocated(reg))
1325 goto reg_found;
1327 goto try_next;
1328 case 'r':
1329 case 'R':
1330 case 'p': /* A general address, for x86(64) any register is acceptable*/
1331 /* any general register */
1332 for(reg = 0; reg < 8; reg++) {
1333 if (!is_reg_allocated(reg))
1334 goto reg_found;
1336 goto try_next;
1337 reg_found:
1338 /* now we can reload in the register */
1339 op->is_llong = 0;
1340 op->reg = reg;
1341 regs_allocated[reg] |= reg_mask;
1342 break;
1343 case 'e':
1344 case 'i':
1345 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1346 goto try_next;
1347 break;
1348 case 'I':
1349 case 'N':
1350 case 'M':
1351 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1352 goto try_next;
1353 break;
1354 case 'm':
1355 case 'g':
1356 /* nothing special to do because the operand is already in
1357 memory, except if the pointer itself is stored in a
1358 memory variable (VT_LLOCAL case) */
1359 /* XXX: fix constant case */
1360 /* if it is a reference to a memory zone, it must lie
1361 in a register, so we reserve the register in the
1362 input registers and a load will be generated
1363 later */
1364 if (j < nb_outputs || c == 'm') {
1365 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1366 /* any general register */
1367 for(reg = 0; reg < 8; reg++) {
1368 if (!(regs_allocated[reg] & REG_IN_MASK))
1369 goto reg_found1;
1371 goto try_next;
1372 reg_found1:
1373 /* now we can reload in the register */
1374 regs_allocated[reg] |= REG_IN_MASK;
1375 op->reg = reg;
1376 op->is_memory = 1;
1379 break;
1380 default:
1381 tcc_error("asm constraint %d ('%s') could not be satisfied",
1382 j, op->constraint);
1383 break;
1385 /* if a reference is present for that operand, we assign it too */
1386 if (op->input_index >= 0) {
1387 operands[op->input_index].reg = op->reg;
1388 operands[op->input_index].is_llong = op->is_llong;
1392 /* compute out_reg. It is used to store outputs registers to memory
1393 locations references by pointers (VT_LLOCAL case) */
1394 *pout_reg = -1;
1395 for(i=0;i<nb_operands;i++) {
1396 op = &operands[i];
1397 if (op->reg >= 0 &&
1398 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1399 !op->is_memory) {
1400 for(reg = 0; reg < 8; reg++) {
1401 if (!(regs_allocated[reg] & REG_OUT_MASK))
1402 goto reg_found2;
1404 tcc_error("could not find free output register for reloading");
1405 reg_found2:
1406 *pout_reg = reg;
1407 break;
1411 /* print sorted constraints */
1412 #ifdef ASM_DEBUG
1413 for(i=0;i<nb_operands;i++) {
1414 j = sorted_op[i];
1415 op = &operands[j];
1416 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1418 op->id ? get_tok_str(op->id, NULL) : "",
1419 op->constraint,
1420 op->vt->r,
1421 op->reg);
1423 if (*pout_reg >= 0)
1424 printf("out_reg=%d\n", *pout_reg);
1425 #endif
1428 ST_FUNC void subst_asm_operand(CString *add_str,
1429 SValue *sv, int modifier)
1431 int r, reg, size, val;
1432 char buf[64];
1434 r = sv->r;
1435 if ((r & VT_VALMASK) == VT_CONST) {
1436 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' &&
1437 modifier != 'P')
1438 cstr_ccat(add_str, '$');
1439 if (r & VT_SYM) {
1440 const char *name = get_tok_str(sv->sym->v, NULL);
1441 if (sv->sym->v >= SYM_FIRST_ANOM) {
1442 /* In case of anonymuous symbols ("L.42", used
1443 for static data labels) we can't find them
1444 in the C symbol table when later looking up
1445 this name. So enter them now into the asm label
1446 list when we still know the symbol. */
1447 get_asm_sym(tok_alloc(name, strlen(name))->tok, sv->sym);
1449 cstr_cat(add_str, name, -1);
1450 if ((uint32_t)sv->c.i == 0)
1451 goto no_offset;
1452 cstr_ccat(add_str, '+');
1454 val = sv->c.i;
1455 if (modifier == 'n')
1456 val = -val;
1457 snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
1458 cstr_cat(add_str, buf, -1);
1459 no_offset:;
1460 #ifdef TCC_TARGET_X86_64
1461 if (r & VT_LVAL)
1462 cstr_cat(add_str, "(%rip)", -1);
1463 #endif
1464 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1465 #ifdef TCC_TARGET_X86_64
1466 snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i);
1467 #else
1468 snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
1469 #endif
1470 cstr_cat(add_str, buf, -1);
1471 } else if (r & VT_LVAL) {
1472 reg = r & VT_VALMASK;
1473 if (reg >= VT_CONST)
1474 tcc_error("internal compiler error");
1475 snprintf(buf, sizeof(buf), "(%%%s)",
1476 #ifdef TCC_TARGET_X86_64
1477 get_tok_str(TOK_ASM_rax + reg, NULL)
1478 #else
1479 get_tok_str(TOK_ASM_eax + reg, NULL)
1480 #endif
1482 cstr_cat(add_str, buf, -1);
1483 } else {
1484 /* register case */
1485 reg = r & VT_VALMASK;
1486 if (reg >= VT_CONST)
1487 tcc_error("internal compiler error");
1489 /* choose register operand size */
1490 if ((sv->type.t & VT_BTYPE) == VT_BYTE ||
1491 (sv->type.t & VT_BTYPE) == VT_BOOL)
1492 size = 1;
1493 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1494 size = 2;
1495 #ifdef TCC_TARGET_X86_64
1496 else if ((sv->type.t & VT_BTYPE) == VT_LLONG ||
1497 (sv->type.t & VT_BTYPE) == VT_PTR)
1498 size = 8;
1499 #endif
1500 else
1501 size = 4;
1502 if (size == 1 && reg >= 4)
1503 size = 4;
1505 if (modifier == 'b') {
1506 if (reg >= 4)
1507 tcc_error("cannot use byte register");
1508 size = 1;
1509 } else if (modifier == 'h') {
1510 if (reg >= 4)
1511 tcc_error("cannot use byte register");
1512 size = -1;
1513 } else if (modifier == 'w') {
1514 size = 2;
1515 } else if (modifier == 'k') {
1516 size = 4;
1517 #ifdef TCC_TARGET_X86_64
1518 } else if (modifier == 'q') {
1519 size = 8;
1520 #endif
1523 switch(size) {
1524 case -1:
1525 reg = TOK_ASM_ah + reg;
1526 break;
1527 case 1:
1528 reg = TOK_ASM_al + reg;
1529 break;
1530 case 2:
1531 reg = TOK_ASM_ax + reg;
1532 break;
1533 default:
1534 reg = TOK_ASM_eax + reg;
1535 break;
1536 #ifdef TCC_TARGET_X86_64
1537 case 8:
1538 reg = TOK_ASM_rax + reg;
1539 break;
1540 #endif
1542 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1543 cstr_cat(add_str, buf, -1);
1547 /* generate prolog and epilog code for asm statement */
1548 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1549 int nb_outputs, int is_output,
1550 uint8_t *clobber_regs,
1551 int out_reg)
1553 uint8_t regs_allocated[NB_ASM_REGS];
1554 ASMOperand *op;
1555 int i, reg;
1556 static uint8_t reg_saved[NB_SAVED_REGS] = { 3, 6, 7 };
1558 /* mark all used registers */
1559 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1560 for(i = 0; i < nb_operands;i++) {
1561 op = &operands[i];
1562 if (op->reg >= 0)
1563 regs_allocated[op->reg] = 1;
1565 if (!is_output) {
1566 /* generate reg save code */
1567 for(i = 0; i < NB_SAVED_REGS; i++) {
1568 reg = reg_saved[i];
1569 if (regs_allocated[reg]) {
1570 g(0x50 + reg);
1574 /* generate load code */
1575 for(i = 0; i < nb_operands; i++) {
1576 op = &operands[i];
1577 if (op->reg >= 0) {
1578 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1579 op->is_memory) {
1580 /* memory reference case (for both input and
1581 output cases) */
1582 SValue sv;
1583 sv = *op->vt;
1584 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL;
1585 sv.type.t = VT_PTR;
1586 load(op->reg, &sv);
1587 } else if (i >= nb_outputs || op->is_rw) {
1588 /* load value in register */
1589 load(op->reg, op->vt);
1590 if (op->is_llong) {
1591 SValue sv;
1592 sv = *op->vt;
1593 sv.c.i += 4;
1594 load(TREG_XDX, &sv);
1599 } else {
1600 /* generate save code */
1601 for(i = 0 ; i < nb_outputs; i++) {
1602 op = &operands[i];
1603 if (op->reg >= 0) {
1604 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1605 if (!op->is_memory) {
1606 SValue sv;
1607 sv = *op->vt;
1608 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1609 sv.type.t = VT_PTR;
1610 load(out_reg, &sv);
1612 sv = *op->vt;
1613 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1614 store(op->reg, &sv);
1616 } else {
1617 store(op->reg, op->vt);
1618 if (op->is_llong) {
1619 SValue sv;
1620 sv = *op->vt;
1621 sv.c.i += 4;
1622 store(TREG_XDX, &sv);
1627 /* generate reg restore code */
1628 for(i = NB_SAVED_REGS - 1; i >= 0; i--) {
1629 reg = reg_saved[i];
1630 if (regs_allocated[reg]) {
1631 g(0x58 + reg);
1637 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1639 int reg;
1640 TokenSym *ts;
1642 if (!strcmp(str, "memory") ||
1643 !strcmp(str, "cc") ||
1644 !strcmp(str, "flags"))
1645 return;
1646 ts = tok_alloc(str, strlen(str));
1647 reg = ts->tok;
1648 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1649 reg -= TOK_ASM_eax;
1650 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1651 reg -= TOK_ASM_ax;
1652 #ifdef TCC_TARGET_X86_64
1653 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1654 reg -= TOK_ASM_rax;
1655 } else if (1 && str[0] == 'r' &&
1656 (((str[1] == '8' || str[1] == '9') && str[2] == 0) ||
1657 (str[1] == '1' && str[2] >= '0' && str[2] <= '5' &&
1658 str[3] == 0))) {
1659 /* Do nothing for now. We can't parse the high registers. */
1660 goto end;
1661 #endif
1662 } else {
1663 tcc_error("invalid clobber register '%s'", str);
1665 clobber_regs[reg] = 1;
1666 end:;