Fix test90 for 32 bits targets
[tinycc.git] / i386-asm.c
blob3cc8d18b8650f46dc798c756a2b8d262a1da4e9c
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #define USING_GLOBALS
23 #include "tcc.h"
25 #define MAX_OPERANDS 3
27 #define TOK_ASM_first TOK_ASM_clc
28 #define TOK_ASM_last TOK_ASM_emms
29 #define TOK_ASM_alllast TOK_ASM_subps
31 #define OPC_B 0x01 /* only used with OPC_WL */
32 #define OPC_WL 0x02 /* accepts w, l or no suffix */
33 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
34 #define OPC_REG 0x04 /* register is added to opcode */
35 #define OPC_MODRM 0x08 /* modrm encoding */
37 #define OPCT_MASK 0x70
38 #define OPC_FWAIT 0x10 /* add fwait opcode */
39 #define OPC_SHIFT 0x20 /* shift opcodes */
40 #define OPC_ARITH 0x30 /* arithmetic opcodes */
41 #define OPC_FARITH 0x40 /* FPU arithmetic opcodes */
42 #define OPC_TEST 0x50 /* test opcodes */
43 #define OPC_0F01 0x60 /* 0x0f01XX (group 7, XX is 2nd opcode,
44 no operands and unstructured mod/rm) */
45 #define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i))
47 #define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */
48 #define OPC_48 0x200 /* Always has REX prefix */
49 #ifdef TCC_TARGET_X86_64
50 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
51 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
52 # define OPC_WLX OPC_WLQ
53 # define OPC_BWLX OPC_BWLQ
54 #else
55 # define OPC_WLX OPC_WL
56 # define OPC_BWLX OPC_BWL
57 #endif
59 #define OPC_GROUP_SHIFT 13
61 /* in order to compress the operand type, we use specific operands and
62 we or only with EA */
63 enum {
64 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
65 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
66 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
67 #ifdef TCC_TARGET_X86_64
68 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
69 #endif
70 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
71 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
72 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
73 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
74 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
75 OPT_SEG,
76 OPT_ST,
77 #ifdef TCC_TARGET_X86_64
78 OPT_REG8_LOW, /* %spl,%bpl,%sil,%dil, encoded like ah,ch,dh,bh, but
79 with REX prefix, not used in insn templates */
80 #endif
81 OPT_IM8,
82 OPT_IM8S,
83 OPT_IM16,
84 OPT_IM32,
85 #ifdef TCC_TARGET_X86_64
86 OPT_IM64,
87 #endif
88 OPT_EAX, /* %al, %ax, %eax or %rax register */
89 OPT_ST0, /* %st(0) register */
90 OPT_CL, /* %cl register */
91 OPT_DX, /* %dx register */
92 OPT_ADDR, /* OP_EA with only offset */
93 OPT_INDIR, /* *(expr) */
94 /* composite types */
95 OPT_COMPOSITE_FIRST,
96 OPT_IM, /* IM8 | IM16 | IM32 */
97 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
98 OPT_REGW, /* REG16 | REG32 | REG64 */
99 OPT_IMW, /* IM16 | IM32 */
100 OPT_MMXSSE, /* MMX | SSE */
101 OPT_DISP, /* Like OPT_ADDR, but emitted as displacement (for jumps) */
102 OPT_DISP8, /* Like OPT_ADDR, but only 8bit (short jumps) */
103 /* can be ored with any OPT_xxx */
104 OPT_EA = 0x80
107 #define OP_REG8 (1 << OPT_REG8)
108 #define OP_REG16 (1 << OPT_REG16)
109 #define OP_REG32 (1 << OPT_REG32)
110 #define OP_MMX (1 << OPT_MMX)
111 #define OP_SSE (1 << OPT_SSE)
112 #define OP_CR (1 << OPT_CR)
113 #define OP_TR (1 << OPT_TR)
114 #define OP_DB (1 << OPT_DB)
115 #define OP_SEG (1 << OPT_SEG)
116 #define OP_ST (1 << OPT_ST)
117 #define OP_IM8 (1 << OPT_IM8)
118 #define OP_IM8S (1 << OPT_IM8S)
119 #define OP_IM16 (1 << OPT_IM16)
120 #define OP_IM32 (1 << OPT_IM32)
121 #define OP_EAX (1 << OPT_EAX)
122 #define OP_ST0 (1 << OPT_ST0)
123 #define OP_CL (1 << OPT_CL)
124 #define OP_DX (1 << OPT_DX)
125 #define OP_ADDR (1 << OPT_ADDR)
126 #define OP_INDIR (1 << OPT_INDIR)
127 #ifdef TCC_TARGET_X86_64
128 # define OP_REG64 (1 << OPT_REG64)
129 # define OP_REG8_LOW (1 << OPT_REG8_LOW)
130 # define OP_IM64 (1 << OPT_IM64)
131 # define OP_EA32 (OP_EA << 1)
132 #else
133 # define OP_REG64 0
134 # define OP_REG8_LOW 0
135 # define OP_IM64 0
136 # define OP_EA32 0
137 #endif
139 #define OP_EA 0x40000000
140 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
142 #ifdef TCC_TARGET_X86_64
143 # define TREG_XAX TREG_RAX
144 # define TREG_XCX TREG_RCX
145 # define TREG_XDX TREG_RDX
146 #else
147 # define TREG_XAX TREG_EAX
148 # define TREG_XCX TREG_ECX
149 # define TREG_XDX TREG_EDX
150 #endif
152 typedef struct ASMInstr {
153 uint16_t sym;
154 uint16_t opcode;
155 uint16_t instr_type;
156 uint8_t nb_ops;
157 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
158 } ASMInstr;
160 typedef struct Operand {
161 uint32_t type;
162 int8_t reg; /* register, -1 if none */
163 int8_t reg2; /* second register, -1 if none */
164 uint8_t shift;
165 ExprValue e;
166 } Operand;
168 static const uint8_t reg_to_size[9] = {
170 [OP_REG8] = 0,
171 [OP_REG16] = 1,
172 [OP_REG32] = 2,
173 #ifdef TCC_TARGET_X86_64
174 [OP_REG64] = 3,
175 #endif
177 0, 0, 1, 0, 2, 0, 0, 0, 3
180 #define NB_TEST_OPCODES 30
182 static const uint8_t test_bits[NB_TEST_OPCODES] = {
183 0x00, /* o */
184 0x01, /* no */
185 0x02, /* b */
186 0x02, /* c */
187 0x02, /* nae */
188 0x03, /* nb */
189 0x03, /* nc */
190 0x03, /* ae */
191 0x04, /* e */
192 0x04, /* z */
193 0x05, /* ne */
194 0x05, /* nz */
195 0x06, /* be */
196 0x06, /* na */
197 0x07, /* nbe */
198 0x07, /* a */
199 0x08, /* s */
200 0x09, /* ns */
201 0x0a, /* p */
202 0x0a, /* pe */
203 0x0b, /* np */
204 0x0b, /* po */
205 0x0c, /* l */
206 0x0c, /* nge */
207 0x0d, /* nl */
208 0x0d, /* ge */
209 0x0e, /* le */
210 0x0e, /* ng */
211 0x0f, /* nle */
212 0x0f, /* g */
215 static const uint8_t segment_prefixes[] = {
216 0x26, /* es */
217 0x2e, /* cs */
218 0x36, /* ss */
219 0x3e, /* ds */
220 0x64, /* fs */
221 0x65 /* gs */
224 static const ASMInstr asm_instrs[] = {
225 #define ALT(x) x
226 /* This removes a 0x0f in the second byte */
227 #define O(o) ((uint64_t) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o)))
228 /* This constructs instr_type from opcode, type and group. */
229 #define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0))
230 #define DEF_ASM_OP0(name, opcode)
231 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0, { 0 } },
232 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }},
233 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }},
234 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }},
235 #ifdef TCC_TARGET_X86_64
236 # include "x86_64-asm.h"
237 #else
238 # include "i386-asm.h"
239 #endif
240 /* last operation */
241 { 0, },
244 static const uint16_t op0_codes[] = {
245 #define ALT(x)
246 #define DEF_ASM_OP0(x, opcode) opcode,
247 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
248 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
249 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
250 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
251 #ifdef TCC_TARGET_X86_64
252 # include "x86_64-asm.h"
253 #else
254 # include "i386-asm.h"
255 #endif
258 static inline int get_reg_shift(TCCState *s1)
260 int shift, v;
261 v = asm_int_expr(s1);
262 switch(v) {
263 case 1:
264 shift = 0;
265 break;
266 case 2:
267 shift = 1;
268 break;
269 case 4:
270 shift = 2;
271 break;
272 case 8:
273 shift = 3;
274 break;
275 default:
276 expect("1, 2, 4 or 8 constant");
277 shift = 0;
278 break;
280 return shift;
283 #ifdef TCC_TARGET_X86_64
284 static int asm_parse_numeric_reg(int t, unsigned int *type)
286 int reg = -1;
287 if (t >= TOK_IDENT && t < tok_ident) {
288 const char *s = table_ident[t - TOK_IDENT]->str;
289 char c;
290 *type = OP_REG64;
291 if (*s == 'c') {
292 s++;
293 *type = OP_CR;
295 if (*s++ != 'r')
296 return -1;
297 /* Don't allow leading '0'. */
298 if ((c = *s++) >= '1' && c <= '9')
299 reg = c - '0';
300 else
301 return -1;
302 if ((c = *s) >= '0' && c <= '5')
303 s++, reg = reg * 10 + c - '0';
304 if (reg > 15)
305 return -1;
306 if ((c = *s) == 0)
308 else if (*type != OP_REG64)
309 return -1;
310 else if (c == 'b' && !s[1])
311 *type = OP_REG8;
312 else if (c == 'w' && !s[1])
313 *type = OP_REG16;
314 else if (c == 'd' && !s[1])
315 *type = OP_REG32;
316 else
317 return -1;
319 return reg;
321 #endif
323 static int asm_parse_reg(unsigned int *type)
325 int reg = 0;
326 *type = 0;
327 if (tok != '%')
328 goto error_32;
329 next();
330 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
331 reg = tok - TOK_ASM_eax;
332 *type = OP_REG32;
333 #ifdef TCC_TARGET_X86_64
334 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
335 reg = tok - TOK_ASM_rax;
336 *type = OP_REG64;
337 } else if (tok == TOK_ASM_rip) {
338 reg = -2; /* Probably should use different escape code. */
339 *type = OP_REG64;
340 } else if ((reg = asm_parse_numeric_reg(tok, type)) >= 0
341 && (*type == OP_REG32 || *type == OP_REG64)) {
343 #endif
344 } else {
345 error_32:
346 expect("register");
348 next();
349 return reg;
352 static void parse_operand(TCCState *s1, Operand *op)
354 ExprValue e;
355 int reg, indir;
356 const char *p;
358 indir = 0;
359 if (tok == '*') {
360 next();
361 indir = OP_INDIR;
364 if (tok == '%') {
365 next();
366 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
367 reg = tok - TOK_ASM_al;
368 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
369 op->reg = reg & 7;
370 if ((op->type & OP_REG) && op->reg == TREG_XAX)
371 op->type |= OP_EAX;
372 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
373 op->type |= OP_CL;
374 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
375 op->type |= OP_DX;
376 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
377 op->type = OP_DB;
378 op->reg = tok - TOK_ASM_dr0;
379 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
380 op->type = OP_SEG;
381 op->reg = tok - TOK_ASM_es;
382 } else if (tok == TOK_ASM_st) {
383 op->type = OP_ST;
384 op->reg = 0;
385 next();
386 if (tok == '(') {
387 next();
388 if (tok != TOK_PPNUM)
389 goto reg_error;
390 p = tokc.str.data;
391 reg = p[0] - '0';
392 if ((unsigned)reg >= 8 || p[1] != '\0')
393 goto reg_error;
394 op->reg = reg;
395 next();
396 skip(')');
398 if (op->reg == 0)
399 op->type |= OP_ST0;
400 goto no_skip;
401 #ifdef TCC_TARGET_X86_64
402 } else if (tok >= TOK_ASM_spl && tok <= TOK_ASM_dil) {
403 op->type = OP_REG8 | OP_REG8_LOW;
404 op->reg = 4 + tok - TOK_ASM_spl;
405 } else if ((op->reg = asm_parse_numeric_reg(tok, &op->type)) >= 0) {
407 #endif
408 } else {
409 reg_error:
410 tcc_error("unknown register %%%s", get_tok_str(tok, &tokc));
412 next();
413 no_skip: ;
414 } else if (tok == '$') {
415 /* constant value */
416 next();
417 asm_expr(s1, &e);
418 op->type = OP_IM32;
419 op->e = e;
420 if (!op->e.sym) {
421 if (op->e.v == (uint8_t)op->e.v)
422 op->type |= OP_IM8;
423 if (op->e.v == (int8_t)op->e.v)
424 op->type |= OP_IM8S;
425 if (op->e.v == (uint16_t)op->e.v)
426 op->type |= OP_IM16;
427 #ifdef TCC_TARGET_X86_64
428 if (op->e.v != (int32_t)op->e.v && op->e.v != (uint32_t)op->e.v)
429 op->type = OP_IM64;
430 #endif
432 } else {
433 /* address(reg,reg2,shift) with all variants */
434 op->type = OP_EA;
435 op->reg = -1;
436 op->reg2 = -1;
437 op->shift = 0;
438 if (tok != '(') {
439 asm_expr(s1, &e);
440 op->e = e;
441 } else {
442 next();
443 if (tok == '%') {
444 unget_tok('(');
445 op->e.v = 0;
446 op->e.sym = NULL;
447 } else {
448 /* bracketed offset expression */
449 asm_expr(s1, &e);
450 if (tok != ')')
451 expect(")");
452 next();
453 op->e.v = e.v;
454 op->e.sym = e.sym;
456 op->e.pcrel = 0;
458 if (tok == '(') {
459 unsigned int type = 0;
460 next();
461 if (tok != ',') {
462 op->reg = asm_parse_reg(&type);
464 if (tok == ',') {
465 next();
466 if (tok != ',') {
467 op->reg2 = asm_parse_reg(&type);
469 if (tok == ',') {
470 next();
471 op->shift = get_reg_shift(s1);
474 if (type & OP_REG32)
475 op->type |= OP_EA32;
476 skip(')');
478 if (op->reg == -1 && op->reg2 == -1)
479 op->type |= OP_ADDR;
481 op->type |= indir;
484 /* XXX: unify with C code output ? */
485 ST_FUNC void gen_expr32(ExprValue *pe)
487 if (pe->pcrel)
488 /* If PC-relative, always set VT_SYM, even without symbol,
489 so as to force a relocation to be emitted. */
490 gen_addrpc32(VT_SYM, pe->sym, pe->v);
491 else
492 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
495 #ifdef TCC_TARGET_X86_64
496 ST_FUNC void gen_expr64(ExprValue *pe)
498 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
500 #endif
502 /* XXX: unify with C code output ? */
503 static void gen_disp32(ExprValue *pe)
505 Sym *sym = pe->sym;
506 ElfSym *esym = elfsym(sym);
507 if (esym && esym->st_shndx == cur_text_section->sh_num) {
508 /* same section: we can output an absolute value. Note
509 that the TCC compiler behaves differently here because
510 it always outputs a relocation to ease (future) code
511 elimination in the linker */
512 gen_le32(pe->v + esym->st_value - ind - 4);
513 } else {
514 if (sym && sym->type.t == VT_VOID) {
515 sym->type.t = VT_FUNC;
516 sym->type.ref = NULL;
518 gen_addrpc32(VT_SYM, sym, pe->v);
522 /* generate the modrm operand */
523 static inline int asm_modrm(int reg, Operand *op)
525 int mod, reg1, reg2, sib_reg1;
527 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
528 g(0xc0 + (reg << 3) + op->reg);
529 } else if (op->reg == -1 && op->reg2 == -1) {
530 /* displacement only */
531 #ifdef TCC_TARGET_X86_64
532 g(0x04 + (reg << 3));
533 g(0x25);
534 #else
535 g(0x05 + (reg << 3));
536 #endif
537 gen_expr32(&op->e);
538 #ifdef TCC_TARGET_X86_64
539 } else if (op->reg == -2) {
540 ExprValue *pe = &op->e;
541 g(0x05 + (reg << 3));
542 gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
543 return ind;
544 #endif
545 } else {
546 sib_reg1 = op->reg;
547 /* fist compute displacement encoding */
548 if (sib_reg1 == -1) {
549 sib_reg1 = 5;
550 mod = 0x00;
551 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
552 mod = 0x00;
553 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
554 mod = 0x40;
555 } else {
556 mod = 0x80;
558 /* compute if sib byte needed */
559 reg1 = op->reg;
560 if (op->reg2 != -1)
561 reg1 = 4;
562 g(mod + (reg << 3) + reg1);
563 if (reg1 == 4) {
564 /* add sib byte */
565 reg2 = op->reg2;
566 if (reg2 == -1)
567 reg2 = 4; /* indicate no index */
568 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
570 /* add offset */
571 if (mod == 0x40) {
572 g(op->e.v);
573 } else if (mod == 0x80 || op->reg == -1) {
574 gen_expr32(&op->e);
577 return 0;
580 #ifdef TCC_TARGET_X86_64
581 #define REX_W 0x48
582 #define REX_R 0x44
583 #define REX_X 0x42
584 #define REX_B 0x41
586 static void asm_rex(int width64, Operand *ops, int nb_ops, int *op_type,
587 int regi, int rmi)
589 unsigned char rex = width64 ? 0x48 : 0;
590 int saw_high_8bit = 0;
591 int i;
592 if (rmi == -1) {
593 /* No mod/rm byte, but we might have a register op nevertheless
594 (we will add it to the opcode later). */
595 for(i = 0; i < nb_ops; i++) {
596 if (op_type[i] & (OP_REG | OP_ST)) {
597 if (ops[i].reg >= 8) {
598 rex |= REX_B;
599 ops[i].reg -= 8;
600 } else if (ops[i].type & OP_REG8_LOW)
601 rex |= 0x40;
602 else if (ops[i].type & OP_REG8 && ops[i].reg >= 4)
603 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
604 saw_high_8bit = ops[i].reg;
605 break;
608 } else {
609 if (regi != -1) {
610 if (ops[regi].reg >= 8) {
611 rex |= REX_R;
612 ops[regi].reg -= 8;
613 } else if (ops[regi].type & OP_REG8_LOW)
614 rex |= 0x40;
615 else if (ops[regi].type & OP_REG8 && ops[regi].reg >= 4)
616 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
617 saw_high_8bit = ops[regi].reg;
619 if (ops[rmi].type & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_EA)) {
620 if (ops[rmi].reg >= 8) {
621 rex |= REX_B;
622 ops[rmi].reg -= 8;
623 } else if (ops[rmi].type & OP_REG8_LOW)
624 rex |= 0x40;
625 else if (ops[rmi].type & OP_REG8 && ops[rmi].reg >= 4)
626 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
627 saw_high_8bit = ops[rmi].reg;
629 if (ops[rmi].type & OP_EA && ops[rmi].reg2 >= 8) {
630 rex |= REX_X;
631 ops[rmi].reg2 -= 8;
634 if (rex) {
635 if (saw_high_8bit)
636 tcc_error("can't encode register %%%ch when REX prefix is required",
637 "acdb"[saw_high_8bit-4]);
638 g(rex);
641 #endif
644 static void maybe_print_stats (void)
646 static int already;
648 if (0 && !already)
649 /* print stats about opcodes */
651 const struct ASMInstr *pa;
652 int freq[4];
653 int op_vals[500];
654 int nb_op_vals, i, j;
656 already = 1;
657 nb_op_vals = 0;
658 memset(freq, 0, sizeof(freq));
659 for(pa = asm_instrs; pa->sym != 0; pa++) {
660 freq[pa->nb_ops]++;
661 //for(i=0;i<pa->nb_ops;i++) {
662 for(j=0;j<nb_op_vals;j++) {
663 //if (pa->op_type[i] == op_vals[j])
664 if (pa->instr_type == op_vals[j])
665 goto found;
667 //op_vals[nb_op_vals++] = pa->op_type[i];
668 op_vals[nb_op_vals++] = pa->instr_type;
669 found: ;
672 for(i=0;i<nb_op_vals;i++) {
673 int v = op_vals[i];
674 //if ((v & (v - 1)) != 0)
675 printf("%3d: %08x\n", i, v);
677 printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n",
678 (int)sizeof(asm_instrs),
679 (int)sizeof(asm_instrs) / (int)sizeof(ASMInstr),
680 freq[0], freq[1], freq[2], freq[3]);
684 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
686 const ASMInstr *pa;
687 int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc, p;
688 int nb_ops, s;
689 Operand ops[MAX_OPERANDS], *pop;
690 int op_type[3]; /* decoded op type */
691 int alltypes; /* OR of all operand types */
692 int autosize;
693 int p66;
694 #ifdef TCC_TARGET_X86_64
695 int rex64;
696 #endif
698 maybe_print_stats();
699 /* force synthetic ';' after prefix instruction, so we can handle */
700 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
701 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
702 unget_tok(';');
704 /* get operands */
705 pop = ops;
706 nb_ops = 0;
707 seg_prefix = 0;
708 alltypes = 0;
709 for(;;) {
710 if (tok == ';' || tok == TOK_LINEFEED)
711 break;
712 if (nb_ops >= MAX_OPERANDS) {
713 tcc_error("incorrect number of operands");
715 parse_operand(s1, pop);
716 if (tok == ':') {
717 if (pop->type != OP_SEG || seg_prefix)
718 tcc_error("incorrect prefix");
719 seg_prefix = segment_prefixes[pop->reg];
720 next();
721 parse_operand(s1, pop);
722 if (!(pop->type & OP_EA)) {
723 tcc_error("segment prefix must be followed by memory reference");
726 pop++;
727 nb_ops++;
728 if (tok != ',')
729 break;
730 next();
733 s = 0; /* avoid warning */
735 again:
736 /* optimize matching by using a lookup table (no hashing is needed
737 !) */
738 for(pa = asm_instrs; pa->sym != 0; pa++) {
739 int it = pa->instr_type & OPCT_MASK;
740 s = 0;
741 if (it == OPC_FARITH) {
742 v = opcode - pa->sym;
743 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
744 continue;
745 } else if (it == OPC_ARITH) {
746 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
747 continue;
748 s = (opcode - pa->sym) % NBWLX;
749 if ((pa->instr_type & OPC_BWLX) == OPC_WLX)
751 /* We need to reject the xxxb opcodes that we accepted above.
752 Note that pa->sym for WLX opcodes is the 'w' token,
753 to get the 'b' token subtract one. */
754 if (((opcode - pa->sym + 1) % NBWLX) == 0)
755 continue;
756 s++;
758 } else if (it == OPC_SHIFT) {
759 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
760 continue;
761 s = (opcode - pa->sym) % NBWLX;
762 } else if (it == OPC_TEST) {
763 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
764 continue;
765 /* cmovxx is a test opcode but accepts multiple sizes.
766 The suffixes aren't encoded in the table, instead we
767 simply force size autodetection always and deal with suffixed
768 variants below when we don't find e.g. "cmovzl". */
769 if (pa->instr_type & OPC_WLX)
770 s = NBWLX - 1;
771 } else if (pa->instr_type & OPC_B) {
772 #ifdef TCC_TARGET_X86_64
773 /* Some instructions don't have the full size but only
774 bwl form. insb e.g. */
775 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
776 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
777 continue;
778 #endif
779 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
780 continue;
781 s = opcode - pa->sym;
782 } else if (pa->instr_type & OPC_WLX) {
783 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
784 continue;
785 s = opcode - pa->sym + 1;
786 } else {
787 if (pa->sym != opcode)
788 continue;
790 if (pa->nb_ops != nb_ops)
791 continue;
792 #ifdef TCC_TARGET_X86_64
793 /* Special case for moves. Selecting the IM64->REG64 form
794 should only be done if we really have an >32bit imm64, and that
795 is hardcoded. Ignore it here. */
796 if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
797 && (ops[1].type & OP_REG) == OP_REG64
798 && !(pa->instr_type & OPC_0F))
799 continue;
800 #endif
801 /* now decode and check each operand */
802 alltypes = 0;
803 for(i = 0; i < nb_ops; i++) {
804 int op1, op2;
805 op1 = pa->op_type[i];
806 op2 = op1 & 0x1f;
807 switch(op2) {
808 case OPT_IM:
809 v = OP_IM8 | OP_IM16 | OP_IM32;
810 break;
811 case OPT_REG:
812 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
813 break;
814 case OPT_REGW:
815 v = OP_REG16 | OP_REG32 | OP_REG64;
816 break;
817 case OPT_IMW:
818 v = OP_IM16 | OP_IM32;
819 break;
820 case OPT_MMXSSE:
821 v = OP_MMX | OP_SSE;
822 break;
823 case OPT_DISP:
824 case OPT_DISP8:
825 v = OP_ADDR;
826 break;
827 default:
828 v = 1 << op2;
829 break;
831 if (op1 & OPT_EA)
832 v |= OP_EA;
833 op_type[i] = v;
834 if ((ops[i].type & v) == 0)
835 goto next;
836 alltypes |= ops[i].type;
838 (void)alltypes; /* maybe unused */
839 /* all is matching ! */
840 break;
841 next: ;
843 if (pa->sym == 0) {
844 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
845 int b;
846 b = op0_codes[opcode - TOK_ASM_first];
847 if (b & 0xff00)
848 g(b >> 8);
849 g(b);
850 return;
851 } else if (opcode <= TOK_ASM_alllast) {
852 tcc_error("bad operand with opcode '%s'",
853 get_tok_str(opcode, NULL));
854 } else {
855 /* Special case for cmovcc, we accept size suffixes but ignore
856 them, but we don't want them to blow up our tables. */
857 TokenSym *ts = table_ident[opcode - TOK_IDENT];
858 if (ts->len >= 6
859 && strchr("wlq", ts->str[ts->len-1])
860 && !memcmp(ts->str, "cmov", 4)) {
861 opcode = tok_alloc(ts->str, ts->len-1)->tok;
862 goto again;
864 tcc_error("unknown opcode '%s'", ts->str);
867 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
868 autosize = NBWLX-1;
869 #ifdef TCC_TARGET_X86_64
870 /* XXX the autosize should rather be zero, to not have to adjust this
871 all the time. */
872 if ((pa->instr_type & OPC_BWLQ) == OPC_B)
873 autosize = NBWLX-2;
874 #endif
875 if (s == autosize) {
876 /* Check for register operands providing hints about the size.
877 Start from the end, i.e. destination operands. This matters
878 only for opcodes accepting different sized registers, lar and lsl
879 are such opcodes. */
880 for(i = nb_ops - 1; s == autosize && i >= 0; i--) {
881 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
882 s = reg_to_size[ops[i].type & OP_REG];
884 if (s == autosize) {
885 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
886 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
887 s = 2;
888 else if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
889 (ops[0].type & OP_EA))
890 s = NBWLX - 2;
891 else
892 tcc_error("cannot infer opcode suffix");
896 #ifdef TCC_TARGET_X86_64
897 rex64 = 0;
898 if (pa->instr_type & OPC_48)
899 rex64 = 1;
900 else if (s == 3 || (alltypes & OP_REG64)) {
901 /* generate REX prefix */
902 int default64 = 0;
903 for(i = 0; i < nb_ops; i++) {
904 if (op_type[i] == OP_REG64 && pa->opcode != 0xb8) {
905 /* If only 64bit regs are accepted in one operand
906 this is a default64 instruction without need for
907 REX prefixes, except for movabs(0xb8). */
908 default64 = 1;
909 break;
912 /* XXX find better encoding for the default64 instructions. */
913 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
914 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
915 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
916 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
917 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
918 && !default64)
919 rex64 = 1;
921 #endif
923 /* now generates the operation */
924 if (OPCT_IS(pa->instr_type, OPC_FWAIT))
925 g(0x9b);
926 if (seg_prefix)
927 g(seg_prefix);
928 #ifdef TCC_TARGET_X86_64
929 /* Generate addr32 prefix if needed */
930 for(i = 0; i < nb_ops; i++) {
931 if (ops[i].type & OP_EA32) {
932 g(0x67);
933 break;
936 #endif
937 /* generate data16 prefix if needed */
938 p66 = 0;
939 if (s == 1)
940 p66 = 1;
941 else {
942 /* accepting mmx+sse in all operands --> needs 0x66 to
943 switch to sse mode. Accepting only sse in an operand --> is
944 already SSE insn and needs 0x66/f2/f3 handling. */
945 for (i = 0; i < nb_ops; i++)
946 if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
947 && ops[i].type & OP_SSE)
948 p66 = 1;
950 if (p66)
951 g(0x66);
953 v = pa->opcode;
954 p = v >> 8; /* possibly prefix byte(s) */
955 switch (p) {
956 case 0: break; /* no prefix */
957 case 0x48: break; /* REX, handled elsewhere */
958 case 0x66:
959 case 0x67:
960 case 0xf2:
961 case 0xf3: v = v & 0xff; g(p); break;
962 case 0xd4: case 0xd5: break; /* aam and aad, not prefix, but hardcoded immediate argument "10" */
963 case 0xd8: case 0xd9: case 0xda: case 0xdb: /* x87, no normal prefix */
964 case 0xdc: case 0xdd: case 0xde: case 0xdf: break;
965 default: tcc_error("bad prefix 0x%2x in opcode table", p); break;
967 if (pa->instr_type & OPC_0F)
968 v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff);
969 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
970 /* kludge for imul $im, %reg */
971 nb_ops = 3;
972 ops[2] = ops[1];
973 op_type[2] = op_type[1];
974 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
975 v--; /* int $3 case */
976 nb_ops = 0;
977 } else if ((v == 0x06 || v == 0x07)) {
978 if (ops[0].reg >= 4) {
979 /* push/pop %fs or %gs */
980 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
981 } else {
982 v += ops[0].reg << 3;
984 nb_ops = 0;
985 } else if (v <= 0x05) {
986 /* arith case */
987 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
988 } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) {
989 /* fpu arith case */
990 v += ((opcode - pa->sym) / 6) << 3;
993 /* search which operand will be used for modrm */
994 modrm_index = -1;
995 modreg_index = -1;
996 if (pa->instr_type & OPC_MODRM) {
997 if (!nb_ops) {
998 /* A modrm opcode without operands is a special case (e.g. mfence).
999 It has a group and acts as if there's an register operand 0
1000 (ax). */
1001 i = 0;
1002 ops[i].type = OP_REG;
1003 ops[i].reg = 0;
1004 goto modrm_found;
1006 /* first look for an ea operand */
1007 for(i = 0;i < nb_ops; i++) {
1008 if (op_type[i] & OP_EA)
1009 goto modrm_found;
1011 /* then if not found, a register or indirection (shift instructions) */
1012 for(i = 0;i < nb_ops; i++) {
1013 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
1014 goto modrm_found;
1016 #ifdef ASM_DEBUG
1017 tcc_error("bad op table");
1018 #endif
1019 modrm_found:
1020 modrm_index = i;
1021 /* if a register is used in another operand then it is
1022 used instead of group */
1023 for(i = 0;i < nb_ops; i++) {
1024 int t = op_type[i];
1025 if (i != modrm_index &&
1026 (t & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
1027 modreg_index = i;
1028 break;
1032 #ifdef TCC_TARGET_X86_64
1033 asm_rex (rex64, ops, nb_ops, op_type, modreg_index, modrm_index);
1034 #endif
1036 if (pa->instr_type & OPC_REG) {
1037 /* mov $im, %reg case */
1038 if (v == 0xb0 && s >= 1)
1039 v += 7;
1040 for(i = 0; i < nb_ops; i++) {
1041 if (op_type[i] & (OP_REG | OP_ST)) {
1042 v += ops[i].reg;
1043 break;
1047 if (pa->instr_type & OPC_B)
1048 v += s >= 1;
1049 if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) {
1050 ElfSym *esym;
1051 int jmp_disp;
1053 /* see if we can really generate the jump with a byte offset */
1054 esym = elfsym(ops[0].e.sym);
1055 if (!esym || esym->st_shndx != cur_text_section->sh_num)
1056 goto no_short_jump;
1057 jmp_disp = ops[0].e.v + esym->st_value - ind - 2 - (v >= 0xff);
1058 if (jmp_disp == (int8_t)jmp_disp) {
1059 /* OK to generate jump */
1060 ops[0].e.sym = 0;
1061 ops[0].e.v = jmp_disp;
1062 op_type[0] = OP_IM8S;
1063 } else {
1064 no_short_jump:
1065 /* long jump will be allowed. need to modify the
1066 opcode slightly */
1067 if (v == 0xeb) /* jmp */
1068 v = 0xe9;
1069 else if (v == 0x70) /* jcc */
1070 v += 0x0f10;
1071 else
1072 tcc_error("invalid displacement");
1075 if (OPCT_IS(pa->instr_type, OPC_TEST))
1076 v += test_bits[opcode - pa->sym];
1077 else if (OPCT_IS(pa->instr_type, OPC_0F01))
1078 v |= 0x0f0100;
1079 op1 = v >> 16;
1080 if (op1)
1081 g(op1);
1082 op1 = (v >> 8) & 0xff;
1083 if (op1)
1084 g(op1);
1085 g(v);
1087 if (OPCT_IS(pa->instr_type, OPC_SHIFT)) {
1088 reg = (opcode - pa->sym) / NBWLX;
1089 if (reg == 6)
1090 reg = 7;
1091 } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) {
1092 reg = (opcode - pa->sym) / NBWLX;
1093 } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) {
1094 reg = (opcode - pa->sym) / 6;
1095 } else {
1096 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
1099 pc = 0;
1100 if (pa->instr_type & OPC_MODRM) {
1101 /* if a register is used in another operand then it is
1102 used instead of group */
1103 if (modreg_index >= 0)
1104 reg = ops[modreg_index].reg;
1105 pc = asm_modrm(reg, &ops[modrm_index]);
1108 /* emit constants */
1109 #ifndef TCC_TARGET_X86_64
1110 if (!(pa->instr_type & OPC_0F)
1111 && (pa->opcode == 0x9a || pa->opcode == 0xea)) {
1112 /* ljmp or lcall kludge */
1113 gen_expr32(&ops[1].e);
1114 if (ops[0].e.sym)
1115 tcc_error("cannot relocate");
1116 gen_le16(ops[0].e.v);
1117 return;
1119 #endif
1120 for(i = 0;i < nb_ops; i++) {
1121 v = op_type[i];
1122 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
1123 /* if multiple sizes are given it means we must look
1124 at the op size */
1125 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
1126 if (s == 0)
1127 v = OP_IM8;
1128 else if (s == 1)
1129 v = OP_IM16;
1130 else if (s == 2 || (v & OP_IM64) == 0)
1131 v = OP_IM32;
1132 else
1133 v = OP_IM64;
1136 if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym)
1137 tcc_error("cannot relocate");
1139 if (v & (OP_IM8 | OP_IM8S)) {
1140 g(ops[i].e.v);
1141 } else if (v & OP_IM16) {
1142 gen_le16(ops[i].e.v);
1143 #ifdef TCC_TARGET_X86_64
1144 } else if (v & OP_IM64) {
1145 gen_expr64(&ops[i].e);
1146 #endif
1147 } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) {
1148 gen_disp32(&ops[i].e);
1149 } else {
1150 gen_expr32(&ops[i].e);
1155 /* after immediate operands, adjust pc-relative address */
1156 if (pc)
1157 add32le(cur_text_section->data + pc - 4, pc - ind);
1160 /* return the constraint priority (we allocate first the lowest
1161 numbered constraints) */
1162 static inline int constraint_priority(const char *str)
1164 int priority, c, pr;
1166 /* we take the lowest priority */
1167 priority = 0;
1168 for(;;) {
1169 c = *str;
1170 if (c == '\0')
1171 break;
1172 str++;
1173 switch(c) {
1174 case 'A':
1175 pr = 0;
1176 break;
1177 case 'a':
1178 case 'b':
1179 case 'c':
1180 case 'd':
1181 case 'S':
1182 case 'D':
1183 pr = 1;
1184 break;
1185 case 'q':
1186 pr = 2;
1187 break;
1188 case 'r':
1189 case 'R':
1190 case 'p':
1191 pr = 3;
1192 break;
1193 case 'N':
1194 case 'M':
1195 case 'I':
1196 case 'e':
1197 case 'i':
1198 case 'm':
1199 case 'g':
1200 pr = 4;
1201 break;
1202 default:
1203 tcc_error("unknown constraint '%c'", c);
1204 pr = 0;
1206 if (pr > priority)
1207 priority = pr;
1209 return priority;
1212 static const char *skip_constraint_modifiers(const char *p)
1214 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
1215 p++;
1216 return p;
1219 /* If T (a token) is of the form "%reg" returns the register
1220 number and type, otherwise return -1. */
1221 ST_FUNC int asm_parse_regvar (int t)
1223 const char *s;
1224 Operand op;
1225 if (t < TOK_IDENT || (t & SYM_FIELD))
1226 return -1;
1227 s = table_ident[t - TOK_IDENT]->str;
1228 if (s[0] != '%')
1229 return -1;
1230 t = tok_alloc_const(s + 1);
1231 unget_tok(t);
1232 unget_tok('%');
1233 parse_operand(tcc_state, &op);
1234 /* Accept only integer regs for now. */
1235 if (op.type & OP_REG)
1236 return op.reg;
1237 else
1238 return -1;
1241 #define REG_OUT_MASK 0x01
1242 #define REG_IN_MASK 0x02
1244 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
1246 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
1247 int nb_operands, int nb_outputs,
1248 const uint8_t *clobber_regs,
1249 int *pout_reg)
1251 ASMOperand *op;
1252 int sorted_op[MAX_ASM_OPERANDS];
1253 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
1254 const char *str;
1255 uint8_t regs_allocated[NB_ASM_REGS];
1257 /* init fields */
1258 for(i=0;i<nb_operands;i++) {
1259 op = &operands[i];
1260 op->input_index = -1;
1261 op->ref_index = -1;
1262 op->reg = -1;
1263 op->is_memory = 0;
1264 op->is_rw = 0;
1266 /* compute constraint priority and evaluate references to output
1267 constraints if input constraints */
1268 for(i=0;i<nb_operands;i++) {
1269 op = &operands[i];
1270 str = op->constraint;
1271 str = skip_constraint_modifiers(str);
1272 if (isnum(*str) || *str == '[') {
1273 /* this is a reference to another constraint */
1274 k = find_constraint(operands, nb_operands, str, NULL);
1275 if ((unsigned)k >= i || i < nb_outputs)
1276 tcc_error("invalid reference in constraint %d ('%s')",
1277 i, str);
1278 op->ref_index = k;
1279 if (operands[k].input_index >= 0)
1280 tcc_error("cannot reference twice the same operand");
1281 operands[k].input_index = i;
1282 op->priority = 5;
1283 } else if ((op->vt->r & VT_VALMASK) == VT_LOCAL
1284 && op->vt->sym
1285 && (reg = op->vt->sym->r & VT_VALMASK) < VT_CONST) {
1286 op->priority = 1;
1287 op->reg = reg;
1288 } else {
1289 op->priority = constraint_priority(str);
1293 /* sort operands according to their priority */
1294 for(i=0;i<nb_operands;i++)
1295 sorted_op[i] = i;
1296 for(i=0;i<nb_operands - 1;i++) {
1297 for(j=i+1;j<nb_operands;j++) {
1298 p1 = operands[sorted_op[i]].priority;
1299 p2 = operands[sorted_op[j]].priority;
1300 if (p2 < p1) {
1301 tmp = sorted_op[i];
1302 sorted_op[i] = sorted_op[j];
1303 sorted_op[j] = tmp;
1308 for(i = 0;i < NB_ASM_REGS; i++) {
1309 if (clobber_regs[i])
1310 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
1311 else
1312 regs_allocated[i] = 0;
1314 /* esp cannot be used */
1315 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1316 /* ebp cannot be used yet */
1317 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1319 /* allocate registers and generate corresponding asm moves */
1320 for(i=0;i<nb_operands;i++) {
1321 j = sorted_op[i];
1322 op = &operands[j];
1323 str = op->constraint;
1324 /* no need to allocate references */
1325 if (op->ref_index >= 0)
1326 continue;
1327 /* select if register is used for output, input or both */
1328 if (op->input_index >= 0) {
1329 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1330 } else if (j < nb_outputs) {
1331 reg_mask = REG_OUT_MASK;
1332 } else {
1333 reg_mask = REG_IN_MASK;
1335 if (op->reg >= 0) {
1336 if (is_reg_allocated(op->reg))
1337 tcc_error("asm regvar requests register that's taken already");
1338 reg = op->reg;
1339 goto reg_found;
1341 try_next:
1342 c = *str++;
1343 switch(c) {
1344 case '=':
1345 goto try_next;
1346 case '+':
1347 op->is_rw = 1;
1348 /* FALL THRU */
1349 case '&':
1350 if (j >= nb_outputs)
1351 tcc_error("'%c' modifier can only be applied to outputs", c);
1352 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1353 goto try_next;
1354 case 'A':
1355 /* allocate both eax and edx */
1356 if (is_reg_allocated(TREG_XAX) ||
1357 is_reg_allocated(TREG_XDX))
1358 goto try_next;
1359 op->is_llong = 1;
1360 op->reg = TREG_XAX;
1361 regs_allocated[TREG_XAX] |= reg_mask;
1362 regs_allocated[TREG_XDX] |= reg_mask;
1363 break;
1364 case 'a':
1365 reg = TREG_XAX;
1366 goto alloc_reg;
1367 case 'b':
1368 reg = 3;
1369 goto alloc_reg;
1370 case 'c':
1371 reg = TREG_XCX;
1372 goto alloc_reg;
1373 case 'd':
1374 reg = TREG_XDX;
1375 goto alloc_reg;
1376 case 'S':
1377 reg = 6;
1378 goto alloc_reg;
1379 case 'D':
1380 reg = 7;
1381 alloc_reg:
1382 if (is_reg_allocated(reg))
1383 goto try_next;
1384 goto reg_found;
1385 case 'q':
1386 /* eax, ebx, ecx or edx */
1387 for(reg = 0; reg < 4; reg++) {
1388 if (!is_reg_allocated(reg))
1389 goto reg_found;
1391 goto try_next;
1392 case 'r':
1393 case 'R':
1394 case 'p': /* A general address, for x86(64) any register is acceptable*/
1395 /* any general register */
1396 for(reg = 0; reg < 8; reg++) {
1397 if (!is_reg_allocated(reg))
1398 goto reg_found;
1400 goto try_next;
1401 reg_found:
1402 /* now we can reload in the register */
1403 op->is_llong = 0;
1404 op->reg = reg;
1405 regs_allocated[reg] |= reg_mask;
1406 break;
1407 case 'e':
1408 case 'i':
1409 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1410 goto try_next;
1411 break;
1412 case 'I':
1413 case 'N':
1414 case 'M':
1415 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1416 goto try_next;
1417 break;
1418 case 'm':
1419 case 'g':
1420 /* nothing special to do because the operand is already in
1421 memory, except if the pointer itself is stored in a
1422 memory variable (VT_LLOCAL case) */
1423 /* XXX: fix constant case */
1424 /* if it is a reference to a memory zone, it must lie
1425 in a register, so we reserve the register in the
1426 input registers and a load will be generated
1427 later */
1428 if (j < nb_outputs || c == 'm') {
1429 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1430 /* any general register */
1431 for(reg = 0; reg < 8; reg++) {
1432 if (!(regs_allocated[reg] & REG_IN_MASK))
1433 goto reg_found1;
1435 goto try_next;
1436 reg_found1:
1437 /* now we can reload in the register */
1438 regs_allocated[reg] |= REG_IN_MASK;
1439 op->reg = reg;
1440 op->is_memory = 1;
1443 break;
1444 default:
1445 tcc_error("asm constraint %d ('%s') could not be satisfied",
1446 j, op->constraint);
1447 break;
1449 /* if a reference is present for that operand, we assign it too */
1450 if (op->input_index >= 0) {
1451 operands[op->input_index].reg = op->reg;
1452 operands[op->input_index].is_llong = op->is_llong;
1456 /* compute out_reg. It is used to store outputs registers to memory
1457 locations references by pointers (VT_LLOCAL case) */
1458 *pout_reg = -1;
1459 for(i=0;i<nb_operands;i++) {
1460 op = &operands[i];
1461 if (op->reg >= 0 &&
1462 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1463 !op->is_memory) {
1464 for(reg = 0; reg < 8; reg++) {
1465 if (!(regs_allocated[reg] & REG_OUT_MASK))
1466 goto reg_found2;
1468 tcc_error("could not find free output register for reloading");
1469 reg_found2:
1470 *pout_reg = reg;
1471 break;
1475 /* print sorted constraints */
1476 #ifdef ASM_DEBUG
1477 for(i=0;i<nb_operands;i++) {
1478 j = sorted_op[i];
1479 op = &operands[j];
1480 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1482 op->id ? get_tok_str(op->id, NULL) : "",
1483 op->constraint,
1484 op->vt->r,
1485 op->reg);
1487 if (*pout_reg >= 0)
1488 printf("out_reg=%d\n", *pout_reg);
1489 #endif
1492 ST_FUNC void subst_asm_operand(CString *add_str,
1493 SValue *sv, int modifier)
1495 int r, reg, size, val;
1496 char buf[64];
1498 r = sv->r;
1499 if ((r & VT_VALMASK) == VT_CONST) {
1500 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' &&
1501 modifier != 'P')
1502 cstr_ccat(add_str, '$');
1503 if (r & VT_SYM) {
1504 const char *name = get_tok_str(sv->sym->v, NULL);
1505 if (sv->sym->v >= SYM_FIRST_ANOM) {
1506 /* In case of anonymous symbols ("L.42", used
1507 for static data labels) we can't find them
1508 in the C symbol table when later looking up
1509 this name. So enter them now into the asm label
1510 list when we still know the symbol. */
1511 get_asm_sym(tok_alloc_const(name), sv->sym);
1513 if (tcc_state->leading_underscore)
1514 cstr_ccat(add_str, '_');
1515 cstr_cat(add_str, name, -1);
1516 if ((uint32_t)sv->c.i == 0)
1517 goto no_offset;
1518 cstr_ccat(add_str, '+');
1520 val = sv->c.i;
1521 if (modifier == 'n')
1522 val = -val;
1523 snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
1524 cstr_cat(add_str, buf, -1);
1525 no_offset:;
1526 #ifdef TCC_TARGET_X86_64
1527 if (r & VT_LVAL)
1528 cstr_cat(add_str, "(%rip)", -1);
1529 #endif
1530 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1531 #ifdef TCC_TARGET_X86_64
1532 snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i);
1533 #else
1534 snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
1535 #endif
1536 cstr_cat(add_str, buf, -1);
1537 } else if (r & VT_LVAL) {
1538 reg = r & VT_VALMASK;
1539 if (reg >= VT_CONST)
1540 tcc_internal_error("");
1541 snprintf(buf, sizeof(buf), "(%%%s)",
1542 #ifdef TCC_TARGET_X86_64
1543 get_tok_str(TOK_ASM_rax + reg, NULL)
1544 #else
1545 get_tok_str(TOK_ASM_eax + reg, NULL)
1546 #endif
1548 cstr_cat(add_str, buf, -1);
1549 } else {
1550 /* register case */
1551 reg = r & VT_VALMASK;
1552 if (reg >= VT_CONST)
1553 tcc_internal_error("");
1555 /* choose register operand size */
1556 if ((sv->type.t & VT_BTYPE) == VT_BYTE ||
1557 (sv->type.t & VT_BTYPE) == VT_BOOL)
1558 size = 1;
1559 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1560 size = 2;
1561 #ifdef TCC_TARGET_X86_64
1562 else if ((sv->type.t & VT_BTYPE) == VT_LLONG ||
1563 (sv->type.t & VT_BTYPE) == VT_PTR)
1564 size = 8;
1565 #endif
1566 else
1567 size = 4;
1568 if (size == 1 && reg >= 4)
1569 size = 4;
1571 if (modifier == 'b') {
1572 if (reg >= 4)
1573 tcc_error("cannot use byte register");
1574 size = 1;
1575 } else if (modifier == 'h') {
1576 if (reg >= 4)
1577 tcc_error("cannot use byte register");
1578 size = -1;
1579 } else if (modifier == 'w') {
1580 size = 2;
1581 } else if (modifier == 'k') {
1582 size = 4;
1583 #ifdef TCC_TARGET_X86_64
1584 } else if (modifier == 'q') {
1585 size = 8;
1586 #endif
1589 switch(size) {
1590 case -1:
1591 reg = TOK_ASM_ah + reg;
1592 break;
1593 case 1:
1594 reg = TOK_ASM_al + reg;
1595 break;
1596 case 2:
1597 reg = TOK_ASM_ax + reg;
1598 break;
1599 default:
1600 reg = TOK_ASM_eax + reg;
1601 break;
1602 #ifdef TCC_TARGET_X86_64
1603 case 8:
1604 reg = TOK_ASM_rax + reg;
1605 break;
1606 #endif
1608 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1609 cstr_cat(add_str, buf, -1);
1613 /* generate prolog and epilog code for asm statement */
1614 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1615 int nb_outputs, int is_output,
1616 uint8_t *clobber_regs,
1617 int out_reg)
1619 uint8_t regs_allocated[NB_ASM_REGS];
1620 ASMOperand *op;
1621 int i, reg;
1623 /* Strictly speaking %Xbp and %Xsp should be included in the
1624 call-preserved registers, but currently it doesn't matter. */
1625 #ifdef TCC_TARGET_X86_64
1626 #ifdef TCC_TARGET_PE
1627 static const uint8_t reg_saved[] = { 3, 6, 7, 12, 13, 14, 15 };
1628 #else
1629 static const uint8_t reg_saved[] = { 3, 12, 13, 14, 15 };
1630 #endif
1631 #else
1632 static const uint8_t reg_saved[] = { 3, 6, 7 };
1633 #endif
1635 /* mark all used registers */
1636 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1637 for(i = 0; i < nb_operands;i++) {
1638 op = &operands[i];
1639 if (op->reg >= 0)
1640 regs_allocated[op->reg] = 1;
1642 if (!is_output) {
1643 /* generate reg save code */
1644 for(i = 0; i < sizeof(reg_saved)/sizeof(reg_saved[0]); i++) {
1645 reg = reg_saved[i];
1646 if (regs_allocated[reg]) {
1647 if (reg >= 8)
1648 g(0x41), reg-=8;
1649 g(0x50 + reg);
1653 /* generate load code */
1654 for(i = 0; i < nb_operands; i++) {
1655 op = &operands[i];
1656 if (op->reg >= 0) {
1657 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1658 op->is_memory) {
1659 /* memory reference case (for both input and
1660 output cases) */
1661 SValue sv;
1662 sv = *op->vt;
1663 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL;
1664 sv.type.t = VT_PTR;
1665 load(op->reg, &sv);
1666 } else if (i >= nb_outputs || op->is_rw) {
1667 /* load value in register */
1668 load(op->reg, op->vt);
1669 if (op->is_llong) {
1670 SValue sv;
1671 sv = *op->vt;
1672 sv.c.i += 4;
1673 load(TREG_XDX, &sv);
1678 } else {
1679 /* generate save code */
1680 for(i = 0 ; i < nb_outputs; i++) {
1681 op = &operands[i];
1682 if (op->reg >= 0) {
1683 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1684 if (!op->is_memory) {
1685 SValue sv;
1686 sv = *op->vt;
1687 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1688 sv.type.t = VT_PTR;
1689 load(out_reg, &sv);
1691 sv = *op->vt;
1692 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1693 store(op->reg, &sv);
1695 } else {
1696 store(op->reg, op->vt);
1697 if (op->is_llong) {
1698 SValue sv;
1699 sv = *op->vt;
1700 sv.c.i += 4;
1701 store(TREG_XDX, &sv);
1706 /* generate reg restore code */
1707 for(i = sizeof(reg_saved)/sizeof(reg_saved[0]) - 1; i >= 0; i--) {
1708 reg = reg_saved[i];
1709 if (regs_allocated[reg]) {
1710 if (reg >= 8)
1711 g(0x41), reg-=8;
1712 g(0x58 + reg);
1718 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1720 int reg;
1721 #ifdef TCC_TARGET_X86_64
1722 unsigned int type;
1723 #endif
1725 if (!strcmp(str, "memory") ||
1726 !strcmp(str, "cc") ||
1727 !strcmp(str, "flags"))
1728 return;
1729 reg = tok_alloc_const(str);
1730 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1731 reg -= TOK_ASM_eax;
1732 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1733 reg -= TOK_ASM_ax;
1734 #ifdef TCC_TARGET_X86_64
1735 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1736 reg -= TOK_ASM_rax;
1737 } else if ((reg = asm_parse_numeric_reg(reg, &type)) >= 0) {
1739 #endif
1740 } else {
1741 tcc_error("invalid clobber register '%s'", str);
1743 clobber_regs[reg] = 1;