x86asm: Add popcnt
[tinycc.git] / i386-asm.c
blob22bb43400c54b0a6dd56e5ee08a5161c3ae7d131
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #define USING_GLOBALS
23 #include "tcc.h"
25 #define MAX_OPERANDS 3
27 #define TOK_ASM_first TOK_ASM_clc
28 #define TOK_ASM_last TOK_ASM_emms
29 #define TOK_ASM_alllast TOK_ASM_subps
31 #define OPC_B 0x01 /* only used with OPC_WL */
32 #define OPC_WL 0x02 /* accepts w, l or no suffix */
33 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
34 #define OPC_REG 0x04 /* register is added to opcode */
35 #define OPC_MODRM 0x08 /* modrm encoding */
37 #define OPCT_MASK 0x70
38 #define OPC_FWAIT 0x10 /* add fwait opcode */
39 #define OPC_SHIFT 0x20 /* shift opcodes */
40 #define OPC_ARITH 0x30 /* arithmetic opcodes */
41 #define OPC_FARITH 0x40 /* FPU arithmetic opcodes */
42 #define OPC_TEST 0x50 /* test opcodes */
43 #define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i))
45 #define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */
46 #define OPC_48 0x200 /* Always has REX prefix */
47 #ifdef TCC_TARGET_X86_64
48 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
49 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
50 # define OPC_WLX OPC_WLQ
51 # define OPC_BWLX OPC_BWLQ
52 #else
53 # define OPC_WLX OPC_WL
54 # define OPC_BWLX OPC_BWL
55 #endif
57 #define OPC_GROUP_SHIFT 13
59 /* in order to compress the operand type, we use specific operands and
60 we or only with EA */
61 enum {
62 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
63 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
64 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
65 #ifdef TCC_TARGET_X86_64
66 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
67 #endif
68 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
69 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
70 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
71 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
72 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
73 OPT_SEG,
74 OPT_ST,
75 #ifdef TCC_TARGET_X86_64
76 OPT_REG8_LOW, /* %spl,%bpl,%sil,%dil, encoded like ah,ch,dh,bh, but
77 with REX prefix, not used in insn templates */
78 #endif
79 OPT_IM8,
80 OPT_IM8S,
81 OPT_IM16,
82 OPT_IM32,
83 #ifdef TCC_TARGET_X86_64
84 OPT_IM64,
85 #endif
86 OPT_EAX, /* %al, %ax, %eax or %rax register */
87 OPT_ST0, /* %st(0) register */
88 OPT_CL, /* %cl register */
89 OPT_DX, /* %dx register */
90 OPT_ADDR, /* OP_EA with only offset */
91 OPT_INDIR, /* *(expr) */
92 /* composite types */
93 OPT_COMPOSITE_FIRST,
94 OPT_IM, /* IM8 | IM16 | IM32 */
95 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
96 OPT_REGW, /* REG16 | REG32 | REG64 */
97 OPT_IMW, /* IM16 | IM32 */
98 OPT_MMXSSE, /* MMX | SSE */
99 OPT_DISP, /* Like OPT_ADDR, but emitted as displacement (for jumps) */
100 OPT_DISP8, /* Like OPT_ADDR, but only 8bit (short jumps) */
101 /* can be ored with any OPT_xxx */
102 OPT_EA = 0x80
105 #define OP_REG8 (1 << OPT_REG8)
106 #define OP_REG16 (1 << OPT_REG16)
107 #define OP_REG32 (1 << OPT_REG32)
108 #define OP_MMX (1 << OPT_MMX)
109 #define OP_SSE (1 << OPT_SSE)
110 #define OP_CR (1 << OPT_CR)
111 #define OP_TR (1 << OPT_TR)
112 #define OP_DB (1 << OPT_DB)
113 #define OP_SEG (1 << OPT_SEG)
114 #define OP_ST (1 << OPT_ST)
115 #define OP_IM8 (1 << OPT_IM8)
116 #define OP_IM8S (1 << OPT_IM8S)
117 #define OP_IM16 (1 << OPT_IM16)
118 #define OP_IM32 (1 << OPT_IM32)
119 #define OP_EAX (1 << OPT_EAX)
120 #define OP_ST0 (1 << OPT_ST0)
121 #define OP_CL (1 << OPT_CL)
122 #define OP_DX (1 << OPT_DX)
123 #define OP_ADDR (1 << OPT_ADDR)
124 #define OP_INDIR (1 << OPT_INDIR)
125 #ifdef TCC_TARGET_X86_64
126 # define OP_REG64 (1 << OPT_REG64)
127 # define OP_REG8_LOW (1 << OPT_REG8_LOW)
128 # define OP_IM64 (1 << OPT_IM64)
129 # define OP_EA32 (OP_EA << 1)
130 #else
131 # define OP_REG64 0
132 # define OP_REG8_LOW 0
133 # define OP_IM64 0
134 # define OP_EA32 0
135 #endif
137 #define OP_EA 0x40000000
138 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
140 #ifdef TCC_TARGET_X86_64
141 # define TREG_XAX TREG_RAX
142 # define TREG_XCX TREG_RCX
143 # define TREG_XDX TREG_RDX
144 #else
145 # define TREG_XAX TREG_EAX
146 # define TREG_XCX TREG_ECX
147 # define TREG_XDX TREG_EDX
148 #endif
150 typedef struct ASMInstr {
151 uint16_t sym;
152 uint16_t opcode;
153 uint16_t instr_type;
154 uint8_t nb_ops;
155 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
156 } ASMInstr;
158 typedef struct Operand {
159 uint32_t type;
160 int8_t reg; /* register, -1 if none */
161 int8_t reg2; /* second register, -1 if none */
162 uint8_t shift;
163 ExprValue e;
164 } Operand;
166 static const uint8_t reg_to_size[9] = {
168 [OP_REG8] = 0,
169 [OP_REG16] = 1,
170 [OP_REG32] = 2,
171 #ifdef TCC_TARGET_X86_64
172 [OP_REG64] = 3,
173 #endif
175 0, 0, 1, 0, 2, 0, 0, 0, 3
178 #define NB_TEST_OPCODES 30
180 static const uint8_t test_bits[NB_TEST_OPCODES] = {
181 0x00, /* o */
182 0x01, /* no */
183 0x02, /* b */
184 0x02, /* c */
185 0x02, /* nae */
186 0x03, /* nb */
187 0x03, /* nc */
188 0x03, /* ae */
189 0x04, /* e */
190 0x04, /* z */
191 0x05, /* ne */
192 0x05, /* nz */
193 0x06, /* be */
194 0x06, /* na */
195 0x07, /* nbe */
196 0x07, /* a */
197 0x08, /* s */
198 0x09, /* ns */
199 0x0a, /* p */
200 0x0a, /* pe */
201 0x0b, /* np */
202 0x0b, /* po */
203 0x0c, /* l */
204 0x0c, /* nge */
205 0x0d, /* nl */
206 0x0d, /* ge */
207 0x0e, /* le */
208 0x0e, /* ng */
209 0x0f, /* nle */
210 0x0f, /* g */
213 static const uint8_t segment_prefixes[] = {
214 0x26, /* es */
215 0x2e, /* cs */
216 0x36, /* ss */
217 0x3e, /* ds */
218 0x64, /* fs */
219 0x65 /* gs */
222 static const ASMInstr asm_instrs[] = {
223 #define ALT(x) x
224 /* This removes a 0x0f in the second byte */
225 #define O(o) ((uint64_t) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o)))
226 /* This constructs instr_type from opcode, type and group. */
227 #define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0))
228 #define DEF_ASM_OP0(name, opcode)
229 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0, { 0 } },
230 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }},
231 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }},
232 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }},
233 #ifdef TCC_TARGET_X86_64
234 # include "x86_64-asm.h"
235 #else
236 # include "i386-asm.h"
237 #endif
238 /* last operation */
239 { 0, },
242 static const uint16_t op0_codes[] = {
243 #define ALT(x)
244 #define DEF_ASM_OP0(x, opcode) opcode,
245 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
246 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
247 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
248 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
249 #ifdef TCC_TARGET_X86_64
250 # include "x86_64-asm.h"
251 #else
252 # include "i386-asm.h"
253 #endif
256 static inline int get_reg_shift(TCCState *s1)
258 int shift, v;
259 v = asm_int_expr(s1);
260 switch(v) {
261 case 1:
262 shift = 0;
263 break;
264 case 2:
265 shift = 1;
266 break;
267 case 4:
268 shift = 2;
269 break;
270 case 8:
271 shift = 3;
272 break;
273 default:
274 expect("1, 2, 4 or 8 constant");
275 shift = 0;
276 break;
278 return shift;
281 #ifdef TCC_TARGET_X86_64
282 static int asm_parse_numeric_reg(int t, unsigned int *type)
284 int reg = -1;
285 if (t >= TOK_IDENT && t < tok_ident) {
286 const char *s = table_ident[t - TOK_IDENT]->str;
287 char c;
288 *type = OP_REG64;
289 if (*s == 'c') {
290 s++;
291 *type = OP_CR;
293 if (*s++ != 'r')
294 return -1;
295 /* Don't allow leading '0'. */
296 if ((c = *s++) >= '1' && c <= '9')
297 reg = c - '0';
298 else
299 return -1;
300 if ((c = *s) >= '0' && c <= '5')
301 s++, reg = reg * 10 + c - '0';
302 if (reg > 15)
303 return -1;
304 if ((c = *s) == 0)
306 else if (*type != OP_REG64)
307 return -1;
308 else if (c == 'b' && !s[1])
309 *type = OP_REG8;
310 else if (c == 'w' && !s[1])
311 *type = OP_REG16;
312 else if (c == 'd' && !s[1])
313 *type = OP_REG32;
314 else
315 return -1;
317 return reg;
319 #endif
321 static int asm_parse_reg(unsigned int *type)
323 int reg = 0;
324 *type = 0;
325 if (tok != '%')
326 goto error_32;
327 next();
328 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
329 reg = tok - TOK_ASM_eax;
330 *type = OP_REG32;
331 #ifdef TCC_TARGET_X86_64
332 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
333 reg = tok - TOK_ASM_rax;
334 *type = OP_REG64;
335 } else if (tok == TOK_ASM_rip) {
336 reg = -2; /* Probably should use different escape code. */
337 *type = OP_REG64;
338 } else if ((reg = asm_parse_numeric_reg(tok, type)) >= 0
339 && (*type == OP_REG32 || *type == OP_REG64)) {
341 #endif
342 } else {
343 error_32:
344 expect("register");
346 next();
347 return reg;
350 static void parse_operand(TCCState *s1, Operand *op)
352 ExprValue e;
353 int reg, indir;
354 const char *p;
356 indir = 0;
357 if (tok == '*') {
358 next();
359 indir = OP_INDIR;
362 if (tok == '%') {
363 next();
364 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
365 reg = tok - TOK_ASM_al;
366 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
367 op->reg = reg & 7;
368 if ((op->type & OP_REG) && op->reg == TREG_XAX)
369 op->type |= OP_EAX;
370 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
371 op->type |= OP_CL;
372 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
373 op->type |= OP_DX;
374 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
375 op->type = OP_DB;
376 op->reg = tok - TOK_ASM_dr0;
377 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
378 op->type = OP_SEG;
379 op->reg = tok - TOK_ASM_es;
380 } else if (tok == TOK_ASM_st) {
381 op->type = OP_ST;
382 op->reg = 0;
383 next();
384 if (tok == '(') {
385 next();
386 if (tok != TOK_PPNUM)
387 goto reg_error;
388 p = tokc.str.data;
389 reg = p[0] - '0';
390 if ((unsigned)reg >= 8 || p[1] != '\0')
391 goto reg_error;
392 op->reg = reg;
393 next();
394 skip(')');
396 if (op->reg == 0)
397 op->type |= OP_ST0;
398 goto no_skip;
399 #ifdef TCC_TARGET_X86_64
400 } else if (tok >= TOK_ASM_spl && tok <= TOK_ASM_dil) {
401 op->type = OP_REG8 | OP_REG8_LOW;
402 op->reg = 4 + tok - TOK_ASM_spl;
403 } else if ((op->reg = asm_parse_numeric_reg(tok, &op->type)) >= 0) {
405 #endif
406 } else {
407 reg_error:
408 tcc_error("unknown register %%%s", get_tok_str(tok, &tokc));
410 next();
411 no_skip: ;
412 } else if (tok == '$') {
413 /* constant value */
414 next();
415 asm_expr(s1, &e);
416 op->type = OP_IM32;
417 op->e = e;
418 if (!op->e.sym) {
419 if (op->e.v == (uint8_t)op->e.v)
420 op->type |= OP_IM8;
421 if (op->e.v == (int8_t)op->e.v)
422 op->type |= OP_IM8S;
423 if (op->e.v == (uint16_t)op->e.v)
424 op->type |= OP_IM16;
425 #ifdef TCC_TARGET_X86_64
426 if (op->e.v != (int32_t)op->e.v && op->e.v != (uint32_t)op->e.v)
427 op->type = OP_IM64;
428 #endif
430 } else {
431 /* address(reg,reg2,shift) with all variants */
432 op->type = OP_EA;
433 op->reg = -1;
434 op->reg2 = -1;
435 op->shift = 0;
436 if (tok != '(') {
437 asm_expr(s1, &e);
438 op->e = e;
439 } else {
440 next();
441 if (tok == '%') {
442 unget_tok('(');
443 op->e.v = 0;
444 op->e.sym = NULL;
445 } else {
446 /* bracketed offset expression */
447 asm_expr(s1, &e);
448 if (tok != ')')
449 expect(")");
450 next();
451 op->e.v = e.v;
452 op->e.sym = e.sym;
454 op->e.pcrel = 0;
456 if (tok == '(') {
457 unsigned int type = 0;
458 next();
459 if (tok != ',') {
460 op->reg = asm_parse_reg(&type);
462 if (tok == ',') {
463 next();
464 if (tok != ',') {
465 op->reg2 = asm_parse_reg(&type);
467 if (tok == ',') {
468 next();
469 op->shift = get_reg_shift(s1);
472 if (type & OP_REG32)
473 op->type |= OP_EA32;
474 skip(')');
476 if (op->reg == -1 && op->reg2 == -1)
477 op->type |= OP_ADDR;
479 op->type |= indir;
482 /* XXX: unify with C code output ? */
483 ST_FUNC void gen_expr32(ExprValue *pe)
485 if (pe->pcrel)
486 /* If PC-relative, always set VT_SYM, even without symbol,
487 so as to force a relocation to be emitted. */
488 gen_addrpc32(VT_SYM, pe->sym, pe->v);
489 else
490 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
493 #ifdef TCC_TARGET_X86_64
494 ST_FUNC void gen_expr64(ExprValue *pe)
496 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
498 #endif
500 /* XXX: unify with C code output ? */
501 static void gen_disp32(ExprValue *pe)
503 Sym *sym = pe->sym;
504 ElfSym *esym = elfsym(sym);
505 if (esym && esym->st_shndx == cur_text_section->sh_num) {
506 /* same section: we can output an absolute value. Note
507 that the TCC compiler behaves differently here because
508 it always outputs a relocation to ease (future) code
509 elimination in the linker */
510 gen_le32(pe->v + esym->st_value - ind - 4);
511 } else {
512 if (sym && sym->type.t == VT_VOID) {
513 sym->type.t = VT_FUNC;
514 sym->type.ref = NULL;
516 gen_addrpc32(VT_SYM, sym, pe->v);
520 /* generate the modrm operand */
521 static inline int asm_modrm(int reg, Operand *op)
523 int mod, reg1, reg2, sib_reg1;
525 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
526 g(0xc0 + (reg << 3) + op->reg);
527 } else if (op->reg == -1 && op->reg2 == -1) {
528 /* displacement only */
529 #ifdef TCC_TARGET_X86_64
530 g(0x04 + (reg << 3));
531 g(0x25);
532 #else
533 g(0x05 + (reg << 3));
534 #endif
535 gen_expr32(&op->e);
536 #ifdef TCC_TARGET_X86_64
537 } else if (op->reg == -2) {
538 ExprValue *pe = &op->e;
539 g(0x05 + (reg << 3));
540 gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
541 return ind;
542 #endif
543 } else {
544 sib_reg1 = op->reg;
545 /* fist compute displacement encoding */
546 if (sib_reg1 == -1) {
547 sib_reg1 = 5;
548 mod = 0x00;
549 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
550 mod = 0x00;
551 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
552 mod = 0x40;
553 } else {
554 mod = 0x80;
556 /* compute if sib byte needed */
557 reg1 = op->reg;
558 if (op->reg2 != -1)
559 reg1 = 4;
560 g(mod + (reg << 3) + reg1);
561 if (reg1 == 4) {
562 /* add sib byte */
563 reg2 = op->reg2;
564 if (reg2 == -1)
565 reg2 = 4; /* indicate no index */
566 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
568 /* add offset */
569 if (mod == 0x40) {
570 g(op->e.v);
571 } else if (mod == 0x80 || op->reg == -1) {
572 gen_expr32(&op->e);
575 return 0;
578 #ifdef TCC_TARGET_X86_64
579 #define REX_W 0x48
580 #define REX_R 0x44
581 #define REX_X 0x42
582 #define REX_B 0x41
584 static void asm_rex(int width64, Operand *ops, int nb_ops, int *op_type,
585 int regi, int rmi)
587 unsigned char rex = width64 ? 0x48 : 0;
588 int saw_high_8bit = 0;
589 int i;
590 if (rmi == -1) {
591 /* No mod/rm byte, but we might have a register op nevertheless
592 (we will add it to the opcode later). */
593 for(i = 0; i < nb_ops; i++) {
594 if (op_type[i] & (OP_REG | OP_ST)) {
595 if (ops[i].reg >= 8) {
596 rex |= REX_B;
597 ops[i].reg -= 8;
598 } else if (ops[i].type & OP_REG8_LOW)
599 rex |= 0x40;
600 else if (ops[i].type & OP_REG8 && ops[i].reg >= 4)
601 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
602 saw_high_8bit = ops[i].reg;
603 break;
606 } else {
607 if (regi != -1) {
608 if (ops[regi].reg >= 8) {
609 rex |= REX_R;
610 ops[regi].reg -= 8;
611 } else if (ops[regi].type & OP_REG8_LOW)
612 rex |= 0x40;
613 else if (ops[regi].type & OP_REG8 && ops[regi].reg >= 4)
614 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
615 saw_high_8bit = ops[regi].reg;
617 if (ops[rmi].type & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_EA)) {
618 if (ops[rmi].reg >= 8) {
619 rex |= REX_B;
620 ops[rmi].reg -= 8;
621 } else if (ops[rmi].type & OP_REG8_LOW)
622 rex |= 0x40;
623 else if (ops[rmi].type & OP_REG8 && ops[rmi].reg >= 4)
624 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
625 saw_high_8bit = ops[rmi].reg;
627 if (ops[rmi].type & OP_EA && ops[rmi].reg2 >= 8) {
628 rex |= REX_X;
629 ops[rmi].reg2 -= 8;
632 if (rex) {
633 if (saw_high_8bit)
634 tcc_error("can't encode register %%%ch when REX prefix is required",
635 "acdb"[saw_high_8bit-4]);
636 g(rex);
639 #endif
642 static void maybe_print_stats (void)
644 static int already;
646 if (0 && !already)
647 /* print stats about opcodes */
649 const struct ASMInstr *pa;
650 int freq[4];
651 int op_vals[500];
652 int nb_op_vals, i, j;
654 already = 1;
655 nb_op_vals = 0;
656 memset(freq, 0, sizeof(freq));
657 for(pa = asm_instrs; pa->sym != 0; pa++) {
658 freq[pa->nb_ops]++;
659 //for(i=0;i<pa->nb_ops;i++) {
660 for(j=0;j<nb_op_vals;j++) {
661 //if (pa->op_type[i] == op_vals[j])
662 if (pa->instr_type == op_vals[j])
663 goto found;
665 //op_vals[nb_op_vals++] = pa->op_type[i];
666 op_vals[nb_op_vals++] = pa->instr_type;
667 found: ;
670 for(i=0;i<nb_op_vals;i++) {
671 int v = op_vals[i];
672 //if ((v & (v - 1)) != 0)
673 printf("%3d: %08x\n", i, v);
675 printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n",
676 (int)sizeof(asm_instrs),
677 (int)sizeof(asm_instrs) / (int)sizeof(ASMInstr),
678 freq[0], freq[1], freq[2], freq[3]);
682 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
684 const ASMInstr *pa;
685 int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc, p;
686 int nb_ops, s;
687 Operand ops[MAX_OPERANDS], *pop;
688 int op_type[3]; /* decoded op type */
689 int alltypes; /* OR of all operand types */
690 int autosize;
691 int p66;
692 #ifdef TCC_TARGET_X86_64
693 int rex64;
694 #endif
696 maybe_print_stats();
697 /* force synthetic ';' after prefix instruction, so we can handle */
698 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
699 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
700 unget_tok(';');
702 /* get operands */
703 pop = ops;
704 nb_ops = 0;
705 seg_prefix = 0;
706 alltypes = 0;
707 for(;;) {
708 if (tok == ';' || tok == TOK_LINEFEED)
709 break;
710 if (nb_ops >= MAX_OPERANDS) {
711 tcc_error("incorrect number of operands");
713 parse_operand(s1, pop);
714 if (tok == ':') {
715 if (pop->type != OP_SEG || seg_prefix)
716 tcc_error("incorrect prefix");
717 seg_prefix = segment_prefixes[pop->reg];
718 next();
719 parse_operand(s1, pop);
720 if (!(pop->type & OP_EA)) {
721 tcc_error("segment prefix must be followed by memory reference");
724 pop++;
725 nb_ops++;
726 if (tok != ',')
727 break;
728 next();
731 s = 0; /* avoid warning */
733 again:
734 /* optimize matching by using a lookup table (no hashing is needed
735 !) */
736 for(pa = asm_instrs; pa->sym != 0; pa++) {
737 int it = pa->instr_type & OPCT_MASK;
738 s = 0;
739 if (it == OPC_FARITH) {
740 v = opcode - pa->sym;
741 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
742 continue;
743 } else if (it == OPC_ARITH) {
744 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
745 continue;
746 s = (opcode - pa->sym) % NBWLX;
747 if ((pa->instr_type & OPC_BWLX) == OPC_WLX)
749 /* We need to reject the xxxb opcodes that we accepted above.
750 Note that pa->sym for WLX opcodes is the 'w' token,
751 to get the 'b' token subtract one. */
752 if (((opcode - pa->sym + 1) % NBWLX) == 0)
753 continue;
754 s++;
756 } else if (it == OPC_SHIFT) {
757 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
758 continue;
759 s = (opcode - pa->sym) % NBWLX;
760 } else if (it == OPC_TEST) {
761 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
762 continue;
763 /* cmovxx is a test opcode but accepts multiple sizes.
764 The suffixes aren't encoded in the table, instead we
765 simply force size autodetection always and deal with suffixed
766 variants below when we don't find e.g. "cmovzl". */
767 if (pa->instr_type & OPC_WLX)
768 s = NBWLX - 1;
769 } else if (pa->instr_type & OPC_B) {
770 #ifdef TCC_TARGET_X86_64
771 /* Some instructions don't have the full size but only
772 bwl form. insb e.g. */
773 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
774 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
775 continue;
776 #endif
777 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
778 continue;
779 s = opcode - pa->sym;
780 } else if (pa->instr_type & OPC_WLX) {
781 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
782 continue;
783 s = opcode - pa->sym + 1;
784 } else {
785 if (pa->sym != opcode)
786 continue;
788 if (pa->nb_ops != nb_ops)
789 continue;
790 #ifdef TCC_TARGET_X86_64
791 /* Special case for moves. Selecting the IM64->REG64 form
792 should only be done if we really have an >32bit imm64, and that
793 is hardcoded. Ignore it here. */
794 if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
795 && (ops[1].type & OP_REG) == OP_REG64
796 && !(pa->instr_type & OPC_0F))
797 continue;
798 #endif
799 /* now decode and check each operand */
800 alltypes = 0;
801 for(i = 0; i < nb_ops; i++) {
802 int op1, op2;
803 op1 = pa->op_type[i];
804 op2 = op1 & 0x1f;
805 switch(op2) {
806 case OPT_IM:
807 v = OP_IM8 | OP_IM16 | OP_IM32;
808 break;
809 case OPT_REG:
810 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
811 break;
812 case OPT_REGW:
813 v = OP_REG16 | OP_REG32 | OP_REG64;
814 break;
815 case OPT_IMW:
816 v = OP_IM16 | OP_IM32;
817 break;
818 case OPT_MMXSSE:
819 v = OP_MMX | OP_SSE;
820 break;
821 case OPT_DISP:
822 case OPT_DISP8:
823 v = OP_ADDR;
824 break;
825 default:
826 v = 1 << op2;
827 break;
829 if (op1 & OPT_EA)
830 v |= OP_EA;
831 op_type[i] = v;
832 if ((ops[i].type & v) == 0)
833 goto next;
834 alltypes |= ops[i].type;
836 (void)alltypes; /* maybe unused */
837 /* all is matching ! */
838 break;
839 next: ;
841 if (pa->sym == 0) {
842 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
843 int b;
844 b = op0_codes[opcode - TOK_ASM_first];
845 if (b & 0xff00)
846 g(b >> 8);
847 g(b);
848 return;
849 } else if (opcode <= TOK_ASM_alllast) {
850 tcc_error("bad operand with opcode '%s'",
851 get_tok_str(opcode, NULL));
852 } else {
853 /* Special case for cmovcc, we accept size suffixes but ignore
854 them, but we don't want them to blow up our tables. */
855 TokenSym *ts = table_ident[opcode - TOK_IDENT];
856 if (ts->len >= 6
857 && strchr("wlq", ts->str[ts->len-1])
858 && !memcmp(ts->str, "cmov", 4)) {
859 opcode = tok_alloc(ts->str, ts->len-1)->tok;
860 goto again;
862 tcc_error("unknown opcode '%s'", ts->str);
865 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
866 autosize = NBWLX-1;
867 #ifdef TCC_TARGET_X86_64
868 /* XXX the autosize should rather be zero, to not have to adjust this
869 all the time. */
870 if ((pa->instr_type & OPC_BWLQ) == OPC_B)
871 autosize = NBWLX-2;
872 #endif
873 if (s == autosize) {
874 /* Check for register operands providing hints about the size.
875 Start from the end, i.e. destination operands. This matters
876 only for opcodes accepting different sized registers, lar and lsl
877 are such opcodes. */
878 for(i = nb_ops - 1; s == autosize && i >= 0; i--) {
879 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
880 s = reg_to_size[ops[i].type & OP_REG];
882 if (s == autosize) {
883 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
884 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
885 s = 2;
886 else if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
887 (ops[0].type & OP_EA))
888 s = NBWLX - 2;
889 else
890 tcc_error("cannot infer opcode suffix");
894 #ifdef TCC_TARGET_X86_64
895 rex64 = 0;
896 if (pa->instr_type & OPC_48)
897 rex64 = 1;
898 else if (s == 3 || (alltypes & OP_REG64)) {
899 /* generate REX prefix */
900 int default64 = 0;
901 for(i = 0; i < nb_ops; i++) {
902 if (op_type[i] == OP_REG64 && pa->opcode != 0xb8) {
903 /* If only 64bit regs are accepted in one operand
904 this is a default64 instruction without need for
905 REX prefixes, except for movabs(0xb8). */
906 default64 = 1;
907 break;
910 /* XXX find better encoding for the default64 instructions. */
911 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
912 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
913 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
914 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
915 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
916 && !default64)
917 rex64 = 1;
919 #endif
921 /* now generates the operation */
922 if (OPCT_IS(pa->instr_type, OPC_FWAIT))
923 g(0x9b);
924 if (seg_prefix)
925 g(seg_prefix);
926 #ifdef TCC_TARGET_X86_64
927 /* Generate addr32 prefix if needed */
928 for(i = 0; i < nb_ops; i++) {
929 if (ops[i].type & OP_EA32) {
930 g(0x67);
931 break;
934 #endif
935 /* generate data16 prefix if needed */
936 p66 = 0;
937 if (s == 1)
938 p66 = 1;
939 else {
940 /* accepting mmx+sse in all operands --> needs 0x66 to
941 switch to sse mode. Accepting only sse in an operand --> is
942 already SSE insn and needs 0x66/f2/f3 handling. */
943 for (i = 0; i < nb_ops; i++)
944 if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
945 && ops[i].type & OP_SSE)
946 p66 = 1;
948 if (p66)
949 g(0x66);
951 v = pa->opcode;
952 p = v >> 8; /* possibly prefix byte(s) */
953 switch (p) {
954 case 0: break; /* no prefix */
955 case 0x48: break; /* REX, handled elsewhere */
956 case 0x66:
957 case 0x67:
958 case 0xf2:
959 case 0xf3: v = v & 0xff; g(p); break;
960 case 0xd4: case 0xd5: break; /* aam and aad, not prefix, but hardcoded immediate argument "10" */
961 case 0xd8: case 0xd9: case 0xda: case 0xdb: /* x87, no normal prefix */
962 case 0xdc: case 0xdd: case 0xde: case 0xdf: break;
963 default: tcc_error("bad prefix 0x%2x in opcode table", p); break;
965 if (pa->instr_type & OPC_0F)
966 v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff);
967 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
968 /* kludge for imul $im, %reg */
969 nb_ops = 3;
970 ops[2] = ops[1];
971 op_type[2] = op_type[1];
972 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
973 v--; /* int $3 case */
974 nb_ops = 0;
975 } else if ((v == 0x06 || v == 0x07)) {
976 if (ops[0].reg >= 4) {
977 /* push/pop %fs or %gs */
978 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
979 } else {
980 v += ops[0].reg << 3;
982 nb_ops = 0;
983 } else if (v <= 0x05) {
984 /* arith case */
985 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
986 } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) {
987 /* fpu arith case */
988 v += ((opcode - pa->sym) / 6) << 3;
991 /* search which operand will be used for modrm */
992 modrm_index = -1;
993 modreg_index = -1;
994 if (pa->instr_type & OPC_MODRM) {
995 if (!nb_ops) {
996 /* A modrm opcode without operands is a special case (e.g. mfence).
997 It has a group and acts as if there's an register operand 0
998 (ax). */
999 i = 0;
1000 ops[i].type = OP_REG;
1001 ops[i].reg = 0;
1002 goto modrm_found;
1004 /* first look for an ea operand */
1005 for(i = 0;i < nb_ops; i++) {
1006 if (op_type[i] & OP_EA)
1007 goto modrm_found;
1009 /* then if not found, a register or indirection (shift instructions) */
1010 for(i = 0;i < nb_ops; i++) {
1011 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
1012 goto modrm_found;
1014 #ifdef ASM_DEBUG
1015 tcc_error("bad op table");
1016 #endif
1017 modrm_found:
1018 modrm_index = i;
1019 /* if a register is used in another operand then it is
1020 used instead of group */
1021 for(i = 0;i < nb_ops; i++) {
1022 int t = op_type[i];
1023 if (i != modrm_index &&
1024 (t & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
1025 modreg_index = i;
1026 break;
1030 #ifdef TCC_TARGET_X86_64
1031 asm_rex (rex64, ops, nb_ops, op_type, modreg_index, modrm_index);
1032 #endif
1034 if (pa->instr_type & OPC_REG) {
1035 /* mov $im, %reg case */
1036 if (v == 0xb0 && s >= 1)
1037 v += 7;
1038 for(i = 0; i < nb_ops; i++) {
1039 if (op_type[i] & (OP_REG | OP_ST)) {
1040 v += ops[i].reg;
1041 break;
1045 if (pa->instr_type & OPC_B)
1046 v += s >= 1;
1047 if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) {
1048 ElfSym *esym;
1049 int jmp_disp;
1051 /* see if we can really generate the jump with a byte offset */
1052 esym = elfsym(ops[0].e.sym);
1053 if (!esym || esym->st_shndx != cur_text_section->sh_num)
1054 goto no_short_jump;
1055 jmp_disp = ops[0].e.v + esym->st_value - ind - 2 - (v >= 0xff);
1056 if (jmp_disp == (int8_t)jmp_disp) {
1057 /* OK to generate jump */
1058 ops[0].e.sym = 0;
1059 ops[0].e.v = jmp_disp;
1060 op_type[0] = OP_IM8S;
1061 } else {
1062 no_short_jump:
1063 /* long jump will be allowed. need to modify the
1064 opcode slightly */
1065 if (v == 0xeb) /* jmp */
1066 v = 0xe9;
1067 else if (v == 0x70) /* jcc */
1068 v += 0x0f10;
1069 else
1070 tcc_error("invalid displacement");
1073 if (OPCT_IS(pa->instr_type, OPC_TEST))
1074 v += test_bits[opcode - pa->sym];
1075 op1 = v >> 16;
1076 if (op1)
1077 g(op1);
1078 op1 = (v >> 8) & 0xff;
1079 if (op1)
1080 g(op1);
1081 g(v);
1083 if (OPCT_IS(pa->instr_type, OPC_SHIFT)) {
1084 reg = (opcode - pa->sym) / NBWLX;
1085 if (reg == 6)
1086 reg = 7;
1087 } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) {
1088 reg = (opcode - pa->sym) / NBWLX;
1089 } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) {
1090 reg = (opcode - pa->sym) / 6;
1091 } else {
1092 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
1095 pc = 0;
1096 if (pa->instr_type & OPC_MODRM) {
1097 /* if a register is used in another operand then it is
1098 used instead of group */
1099 if (modreg_index >= 0)
1100 reg = ops[modreg_index].reg;
1101 pc = asm_modrm(reg, &ops[modrm_index]);
1104 /* emit constants */
1105 #ifndef TCC_TARGET_X86_64
1106 if (!(pa->instr_type & OPC_0F)
1107 && (pa->opcode == 0x9a || pa->opcode == 0xea)) {
1108 /* ljmp or lcall kludge */
1109 gen_expr32(&ops[1].e);
1110 if (ops[0].e.sym)
1111 tcc_error("cannot relocate");
1112 gen_le16(ops[0].e.v);
1113 return;
1115 #endif
1116 for(i = 0;i < nb_ops; i++) {
1117 v = op_type[i];
1118 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
1119 /* if multiple sizes are given it means we must look
1120 at the op size */
1121 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
1122 if (s == 0)
1123 v = OP_IM8;
1124 else if (s == 1)
1125 v = OP_IM16;
1126 else if (s == 2 || (v & OP_IM64) == 0)
1127 v = OP_IM32;
1128 else
1129 v = OP_IM64;
1132 if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym)
1133 tcc_error("cannot relocate");
1135 if (v & (OP_IM8 | OP_IM8S)) {
1136 g(ops[i].e.v);
1137 } else if (v & OP_IM16) {
1138 gen_le16(ops[i].e.v);
1139 #ifdef TCC_TARGET_X86_64
1140 } else if (v & OP_IM64) {
1141 gen_expr64(&ops[i].e);
1142 #endif
1143 } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) {
1144 gen_disp32(&ops[i].e);
1145 } else {
1146 gen_expr32(&ops[i].e);
1151 /* after immediate operands, adjust pc-relative address */
1152 if (pc)
1153 add32le(cur_text_section->data + pc - 4, pc - ind);
1156 /* return the constraint priority (we allocate first the lowest
1157 numbered constraints) */
1158 static inline int constraint_priority(const char *str)
1160 int priority, c, pr;
1162 /* we take the lowest priority */
1163 priority = 0;
1164 for(;;) {
1165 c = *str;
1166 if (c == '\0')
1167 break;
1168 str++;
1169 switch(c) {
1170 case 'A':
1171 pr = 0;
1172 break;
1173 case 'a':
1174 case 'b':
1175 case 'c':
1176 case 'd':
1177 case 'S':
1178 case 'D':
1179 pr = 1;
1180 break;
1181 case 'q':
1182 pr = 2;
1183 break;
1184 case 'r':
1185 case 'R':
1186 case 'p':
1187 pr = 3;
1188 break;
1189 case 'N':
1190 case 'M':
1191 case 'I':
1192 case 'e':
1193 case 'i':
1194 case 'm':
1195 case 'g':
1196 pr = 4;
1197 break;
1198 default:
1199 tcc_error("unknown constraint '%c'", c);
1200 pr = 0;
1202 if (pr > priority)
1203 priority = pr;
1205 return priority;
1208 static const char *skip_constraint_modifiers(const char *p)
1210 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
1211 p++;
1212 return p;
1215 /* If T (a token) is of the form "%reg" returns the register
1216 number and type, otherwise return -1. */
1217 ST_FUNC int asm_parse_regvar (int t)
1219 const char *s;
1220 Operand op;
1221 if (t < TOK_IDENT || (t & SYM_FIELD))
1222 return -1;
1223 s = table_ident[t - TOK_IDENT]->str;
1224 if (s[0] != '%')
1225 return -1;
1226 t = tok_alloc_const(s + 1);
1227 unget_tok(t);
1228 unget_tok('%');
1229 parse_operand(tcc_state, &op);
1230 /* Accept only integer regs for now. */
1231 if (op.type & OP_REG)
1232 return op.reg;
1233 else
1234 return -1;
1237 #define REG_OUT_MASK 0x01
1238 #define REG_IN_MASK 0x02
1240 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
1242 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
1243 int nb_operands, int nb_outputs,
1244 const uint8_t *clobber_regs,
1245 int *pout_reg)
1247 ASMOperand *op;
1248 int sorted_op[MAX_ASM_OPERANDS];
1249 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
1250 const char *str;
1251 uint8_t regs_allocated[NB_ASM_REGS];
1253 /* init fields */
1254 for(i=0;i<nb_operands;i++) {
1255 op = &operands[i];
1256 op->input_index = -1;
1257 op->ref_index = -1;
1258 op->reg = -1;
1259 op->is_memory = 0;
1260 op->is_rw = 0;
1262 /* compute constraint priority and evaluate references to output
1263 constraints if input constraints */
1264 for(i=0;i<nb_operands;i++) {
1265 op = &operands[i];
1266 str = op->constraint;
1267 str = skip_constraint_modifiers(str);
1268 if (isnum(*str) || *str == '[') {
1269 /* this is a reference to another constraint */
1270 k = find_constraint(operands, nb_operands, str, NULL);
1271 if ((unsigned)k >= i || i < nb_outputs)
1272 tcc_error("invalid reference in constraint %d ('%s')",
1273 i, str);
1274 op->ref_index = k;
1275 if (operands[k].input_index >= 0)
1276 tcc_error("cannot reference twice the same operand");
1277 operands[k].input_index = i;
1278 op->priority = 5;
1279 } else if ((op->vt->r & VT_VALMASK) == VT_LOCAL
1280 && op->vt->sym
1281 && (reg = op->vt->sym->r & VT_VALMASK) < VT_CONST) {
1282 op->priority = 1;
1283 op->reg = reg;
1284 } else {
1285 op->priority = constraint_priority(str);
1289 /* sort operands according to their priority */
1290 for(i=0;i<nb_operands;i++)
1291 sorted_op[i] = i;
1292 for(i=0;i<nb_operands - 1;i++) {
1293 for(j=i+1;j<nb_operands;j++) {
1294 p1 = operands[sorted_op[i]].priority;
1295 p2 = operands[sorted_op[j]].priority;
1296 if (p2 < p1) {
1297 tmp = sorted_op[i];
1298 sorted_op[i] = sorted_op[j];
1299 sorted_op[j] = tmp;
1304 for(i = 0;i < NB_ASM_REGS; i++) {
1305 if (clobber_regs[i])
1306 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
1307 else
1308 regs_allocated[i] = 0;
1310 /* esp cannot be used */
1311 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1312 /* ebp cannot be used yet */
1313 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1315 /* allocate registers and generate corresponding asm moves */
1316 for(i=0;i<nb_operands;i++) {
1317 j = sorted_op[i];
1318 op = &operands[j];
1319 str = op->constraint;
1320 /* no need to allocate references */
1321 if (op->ref_index >= 0)
1322 continue;
1323 /* select if register is used for output, input or both */
1324 if (op->input_index >= 0) {
1325 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1326 } else if (j < nb_outputs) {
1327 reg_mask = REG_OUT_MASK;
1328 } else {
1329 reg_mask = REG_IN_MASK;
1331 if (op->reg >= 0) {
1332 if (is_reg_allocated(op->reg))
1333 tcc_error("asm regvar requests register that's taken already");
1334 reg = op->reg;
1335 goto reg_found;
1337 try_next:
1338 c = *str++;
1339 switch(c) {
1340 case '=':
1341 goto try_next;
1342 case '+':
1343 op->is_rw = 1;
1344 /* FALL THRU */
1345 case '&':
1346 if (j >= nb_outputs)
1347 tcc_error("'%c' modifier can only be applied to outputs", c);
1348 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1349 goto try_next;
1350 case 'A':
1351 /* allocate both eax and edx */
1352 if (is_reg_allocated(TREG_XAX) ||
1353 is_reg_allocated(TREG_XDX))
1354 goto try_next;
1355 op->is_llong = 1;
1356 op->reg = TREG_XAX;
1357 regs_allocated[TREG_XAX] |= reg_mask;
1358 regs_allocated[TREG_XDX] |= reg_mask;
1359 break;
1360 case 'a':
1361 reg = TREG_XAX;
1362 goto alloc_reg;
1363 case 'b':
1364 reg = 3;
1365 goto alloc_reg;
1366 case 'c':
1367 reg = TREG_XCX;
1368 goto alloc_reg;
1369 case 'd':
1370 reg = TREG_XDX;
1371 goto alloc_reg;
1372 case 'S':
1373 reg = 6;
1374 goto alloc_reg;
1375 case 'D':
1376 reg = 7;
1377 alloc_reg:
1378 if (is_reg_allocated(reg))
1379 goto try_next;
1380 goto reg_found;
1381 case 'q':
1382 /* eax, ebx, ecx or edx */
1383 for(reg = 0; reg < 4; reg++) {
1384 if (!is_reg_allocated(reg))
1385 goto reg_found;
1387 goto try_next;
1388 case 'r':
1389 case 'R':
1390 case 'p': /* A general address, for x86(64) any register is acceptable*/
1391 /* any general register */
1392 for(reg = 0; reg < 8; reg++) {
1393 if (!is_reg_allocated(reg))
1394 goto reg_found;
1396 goto try_next;
1397 reg_found:
1398 /* now we can reload in the register */
1399 op->is_llong = 0;
1400 op->reg = reg;
1401 regs_allocated[reg] |= reg_mask;
1402 break;
1403 case 'e':
1404 case 'i':
1405 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1406 goto try_next;
1407 break;
1408 case 'I':
1409 case 'N':
1410 case 'M':
1411 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1412 goto try_next;
1413 break;
1414 case 'm':
1415 case 'g':
1416 /* nothing special to do because the operand is already in
1417 memory, except if the pointer itself is stored in a
1418 memory variable (VT_LLOCAL case) */
1419 /* XXX: fix constant case */
1420 /* if it is a reference to a memory zone, it must lie
1421 in a register, so we reserve the register in the
1422 input registers and a load will be generated
1423 later */
1424 if (j < nb_outputs || c == 'm') {
1425 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1426 /* any general register */
1427 for(reg = 0; reg < 8; reg++) {
1428 if (!(regs_allocated[reg] & REG_IN_MASK))
1429 goto reg_found1;
1431 goto try_next;
1432 reg_found1:
1433 /* now we can reload in the register */
1434 regs_allocated[reg] |= REG_IN_MASK;
1435 op->reg = reg;
1436 op->is_memory = 1;
1439 break;
1440 default:
1441 tcc_error("asm constraint %d ('%s') could not be satisfied",
1442 j, op->constraint);
1443 break;
1445 /* if a reference is present for that operand, we assign it too */
1446 if (op->input_index >= 0) {
1447 operands[op->input_index].reg = op->reg;
1448 operands[op->input_index].is_llong = op->is_llong;
1452 /* compute out_reg. It is used to store outputs registers to memory
1453 locations references by pointers (VT_LLOCAL case) */
1454 *pout_reg = -1;
1455 for(i=0;i<nb_operands;i++) {
1456 op = &operands[i];
1457 if (op->reg >= 0 &&
1458 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1459 !op->is_memory) {
1460 for(reg = 0; reg < 8; reg++) {
1461 if (!(regs_allocated[reg] & REG_OUT_MASK))
1462 goto reg_found2;
1464 tcc_error("could not find free output register for reloading");
1465 reg_found2:
1466 *pout_reg = reg;
1467 break;
1471 /* print sorted constraints */
1472 #ifdef ASM_DEBUG
1473 for(i=0;i<nb_operands;i++) {
1474 j = sorted_op[i];
1475 op = &operands[j];
1476 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1478 op->id ? get_tok_str(op->id, NULL) : "",
1479 op->constraint,
1480 op->vt->r,
1481 op->reg);
1483 if (*pout_reg >= 0)
1484 printf("out_reg=%d\n", *pout_reg);
1485 #endif
1488 ST_FUNC void subst_asm_operand(CString *add_str,
1489 SValue *sv, int modifier)
1491 int r, reg, size, val;
1492 char buf[64];
1494 r = sv->r;
1495 if ((r & VT_VALMASK) == VT_CONST) {
1496 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' &&
1497 modifier != 'P')
1498 cstr_ccat(add_str, '$');
1499 if (r & VT_SYM) {
1500 const char *name = get_tok_str(sv->sym->v, NULL);
1501 if (sv->sym->v >= SYM_FIRST_ANOM) {
1502 /* In case of anonymous symbols ("L.42", used
1503 for static data labels) we can't find them
1504 in the C symbol table when later looking up
1505 this name. So enter them now into the asm label
1506 list when we still know the symbol. */
1507 get_asm_sym(tok_alloc_const(name), sv->sym);
1509 if (tcc_state->leading_underscore)
1510 cstr_ccat(add_str, '_');
1511 cstr_cat(add_str, name, -1);
1512 if ((uint32_t)sv->c.i == 0)
1513 goto no_offset;
1514 cstr_ccat(add_str, '+');
1516 val = sv->c.i;
1517 if (modifier == 'n')
1518 val = -val;
1519 snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
1520 cstr_cat(add_str, buf, -1);
1521 no_offset:;
1522 #ifdef TCC_TARGET_X86_64
1523 if (r & VT_LVAL)
1524 cstr_cat(add_str, "(%rip)", -1);
1525 #endif
1526 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1527 #ifdef TCC_TARGET_X86_64
1528 snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i);
1529 #else
1530 snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
1531 #endif
1532 cstr_cat(add_str, buf, -1);
1533 } else if (r & VT_LVAL) {
1534 reg = r & VT_VALMASK;
1535 if (reg >= VT_CONST)
1536 tcc_internal_error("");
1537 snprintf(buf, sizeof(buf), "(%%%s)",
1538 #ifdef TCC_TARGET_X86_64
1539 get_tok_str(TOK_ASM_rax + reg, NULL)
1540 #else
1541 get_tok_str(TOK_ASM_eax + reg, NULL)
1542 #endif
1544 cstr_cat(add_str, buf, -1);
1545 } else {
1546 /* register case */
1547 reg = r & VT_VALMASK;
1548 if (reg >= VT_CONST)
1549 tcc_internal_error("");
1551 /* choose register operand size */
1552 if ((sv->type.t & VT_BTYPE) == VT_BYTE ||
1553 (sv->type.t & VT_BTYPE) == VT_BOOL)
1554 size = 1;
1555 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1556 size = 2;
1557 #ifdef TCC_TARGET_X86_64
1558 else if ((sv->type.t & VT_BTYPE) == VT_LLONG ||
1559 (sv->type.t & VT_BTYPE) == VT_PTR)
1560 size = 8;
1561 #endif
1562 else
1563 size = 4;
1564 if (size == 1 && reg >= 4)
1565 size = 4;
1567 if (modifier == 'b') {
1568 if (reg >= 4)
1569 tcc_error("cannot use byte register");
1570 size = 1;
1571 } else if (modifier == 'h') {
1572 if (reg >= 4)
1573 tcc_error("cannot use byte register");
1574 size = -1;
1575 } else if (modifier == 'w') {
1576 size = 2;
1577 } else if (modifier == 'k') {
1578 size = 4;
1579 #ifdef TCC_TARGET_X86_64
1580 } else if (modifier == 'q') {
1581 size = 8;
1582 #endif
1585 switch(size) {
1586 case -1:
1587 reg = TOK_ASM_ah + reg;
1588 break;
1589 case 1:
1590 reg = TOK_ASM_al + reg;
1591 break;
1592 case 2:
1593 reg = TOK_ASM_ax + reg;
1594 break;
1595 default:
1596 reg = TOK_ASM_eax + reg;
1597 break;
1598 #ifdef TCC_TARGET_X86_64
1599 case 8:
1600 reg = TOK_ASM_rax + reg;
1601 break;
1602 #endif
1604 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1605 cstr_cat(add_str, buf, -1);
1609 /* generate prolog and epilog code for asm statement */
1610 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1611 int nb_outputs, int is_output,
1612 uint8_t *clobber_regs,
1613 int out_reg)
1615 uint8_t regs_allocated[NB_ASM_REGS];
1616 ASMOperand *op;
1617 int i, reg;
1619 /* Strictly speaking %Xbp and %Xsp should be included in the
1620 call-preserved registers, but currently it doesn't matter. */
1621 #ifdef TCC_TARGET_X86_64
1622 #ifdef TCC_TARGET_PE
1623 static const uint8_t reg_saved[] = { 3, 6, 7, 12, 13, 14, 15 };
1624 #else
1625 static const uint8_t reg_saved[] = { 3, 12, 13, 14, 15 };
1626 #endif
1627 #else
1628 static const uint8_t reg_saved[] = { 3, 6, 7 };
1629 #endif
1631 /* mark all used registers */
1632 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1633 for(i = 0; i < nb_operands;i++) {
1634 op = &operands[i];
1635 if (op->reg >= 0)
1636 regs_allocated[op->reg] = 1;
1638 if (!is_output) {
1639 /* generate reg save code */
1640 for(i = 0; i < sizeof(reg_saved)/sizeof(reg_saved[0]); i++) {
1641 reg = reg_saved[i];
1642 if (regs_allocated[reg]) {
1643 if (reg >= 8)
1644 g(0x41), reg-=8;
1645 g(0x50 + reg);
1649 /* generate load code */
1650 for(i = 0; i < nb_operands; i++) {
1651 op = &operands[i];
1652 if (op->reg >= 0) {
1653 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1654 op->is_memory) {
1655 /* memory reference case (for both input and
1656 output cases) */
1657 SValue sv;
1658 sv = *op->vt;
1659 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL;
1660 sv.type.t = VT_PTR;
1661 load(op->reg, &sv);
1662 } else if (i >= nb_outputs || op->is_rw) {
1663 /* load value in register */
1664 load(op->reg, op->vt);
1665 if (op->is_llong) {
1666 SValue sv;
1667 sv = *op->vt;
1668 sv.c.i += 4;
1669 load(TREG_XDX, &sv);
1674 } else {
1675 /* generate save code */
1676 for(i = 0 ; i < nb_outputs; i++) {
1677 op = &operands[i];
1678 if (op->reg >= 0) {
1679 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1680 if (!op->is_memory) {
1681 SValue sv;
1682 sv = *op->vt;
1683 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1684 sv.type.t = VT_PTR;
1685 load(out_reg, &sv);
1687 sv = *op->vt;
1688 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1689 store(op->reg, &sv);
1691 } else {
1692 store(op->reg, op->vt);
1693 if (op->is_llong) {
1694 SValue sv;
1695 sv = *op->vt;
1696 sv.c.i += 4;
1697 store(TREG_XDX, &sv);
1702 /* generate reg restore code */
1703 for(i = sizeof(reg_saved)/sizeof(reg_saved[0]) - 1; i >= 0; i--) {
1704 reg = reg_saved[i];
1705 if (regs_allocated[reg]) {
1706 if (reg >= 8)
1707 g(0x41), reg-=8;
1708 g(0x58 + reg);
1714 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1716 int reg;
1717 #ifdef TCC_TARGET_X86_64
1718 unsigned int type;
1719 #endif
1721 if (!strcmp(str, "memory") ||
1722 !strcmp(str, "cc") ||
1723 !strcmp(str, "flags"))
1724 return;
1725 reg = tok_alloc_const(str);
1726 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1727 reg -= TOK_ASM_eax;
1728 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1729 reg -= TOK_ASM_ax;
1730 #ifdef TCC_TARGET_X86_64
1731 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1732 reg -= TOK_ASM_rax;
1733 } else if ((reg = asm_parse_numeric_reg(reg, &type)) >= 0) {
1735 #endif
1736 } else {
1737 tcc_error("invalid clobber register '%s'", str);
1739 clobber_regs[reg] = 1;