riscv: asm: Add branch to label
[tinycc.git] / i386-asm.c
blobea734ae830ec70c375031917adf7d17e664e6c50
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #define USING_GLOBALS
23 #include "tcc.h"
25 #define MAX_OPERANDS 3
27 #define TOK_ASM_first TOK_ASM_clc
28 #define TOK_ASM_last TOK_ASM_emms
29 #define TOK_ASM_alllast TOK_ASM_subps
31 #define OPC_B 0x01 /* only used with OPC_WL */
32 #define OPC_WL 0x02 /* accepts w, l or no suffix */
33 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
34 #define OPC_REG 0x04 /* register is added to opcode */
35 #define OPC_MODRM 0x08 /* modrm encoding */
37 #define OPCT_MASK 0x70
38 #define OPC_FWAIT 0x10 /* add fwait opcode */
39 #define OPC_SHIFT 0x20 /* shift opcodes */
40 #define OPC_ARITH 0x30 /* arithmetic opcodes */
41 #define OPC_FARITH 0x40 /* FPU arithmetic opcodes */
42 #define OPC_TEST 0x50 /* test opcodes */
43 #define OPC_0F01 0x60 /* 0x0f01XX (group 7, XX is 2nd opcode,
44 no operands and unstructured mod/rm) */
45 #define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i))
47 #define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */
48 #define OPC_48 0x200 /* Always has REX prefix */
49 #ifdef TCC_TARGET_X86_64
50 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
51 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
52 # define OPC_WLX OPC_WLQ
53 # define OPC_BWLX OPC_BWLQ
54 #else
55 # define OPC_WLX OPC_WL
56 # define OPC_BWLX OPC_BWL
57 #endif
59 #define OPC_GROUP_SHIFT 13
61 /* in order to compress the operand type, we use specific operands and
62 we or only with EA */
63 enum {
64 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
65 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
66 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
67 #ifdef TCC_TARGET_X86_64
68 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
69 #endif
70 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
71 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
72 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
73 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
74 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
75 OPT_SEG,
76 OPT_ST,
77 #ifdef TCC_TARGET_X86_64
78 OPT_REG8_LOW, /* %spl,%bpl,%sil,%dil, encoded like ah,ch,dh,bh, but
79 with REX prefix, not used in insn templates */
80 #endif
81 OPT_IM8,
82 OPT_IM8S,
83 OPT_IM16,
84 OPT_IM32,
85 #ifdef TCC_TARGET_X86_64
86 OPT_IM64,
87 #endif
88 OPT_EAX, /* %al, %ax, %eax or %rax register */
89 OPT_ST0, /* %st(0) register */
90 OPT_CL, /* %cl register */
91 OPT_DX, /* %dx register */
92 OPT_ADDR, /* OP_EA with only offset */
93 OPT_INDIR, /* *(expr) */
94 /* composite types */
95 OPT_COMPOSITE_FIRST,
96 OPT_IM, /* IM8 | IM16 | IM32 */
97 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
98 OPT_REGW, /* REG16 | REG32 | REG64 */
99 OPT_IMW, /* IM16 | IM32 */
100 OPT_MMXSSE, /* MMX | SSE */
101 OPT_DISP, /* Like OPT_ADDR, but emitted as displacement (for jumps) */
102 OPT_DISP8, /* Like OPT_ADDR, but only 8bit (short jumps) */
103 /* can be ored with any OPT_xxx */
104 OPT_EA = 0x80
107 #define OP_REG8 (1 << OPT_REG8)
108 #define OP_REG16 (1 << OPT_REG16)
109 #define OP_REG32 (1 << OPT_REG32)
110 #define OP_MMX (1 << OPT_MMX)
111 #define OP_SSE (1 << OPT_SSE)
112 #define OP_CR (1 << OPT_CR)
113 #define OP_TR (1 << OPT_TR)
114 #define OP_DB (1 << OPT_DB)
115 #define OP_SEG (1 << OPT_SEG)
116 #define OP_ST (1 << OPT_ST)
117 #define OP_IM8 (1 << OPT_IM8)
118 #define OP_IM8S (1 << OPT_IM8S)
119 #define OP_IM16 (1 << OPT_IM16)
120 #define OP_IM32 (1 << OPT_IM32)
121 #define OP_EAX (1 << OPT_EAX)
122 #define OP_ST0 (1 << OPT_ST0)
123 #define OP_CL (1 << OPT_CL)
124 #define OP_DX (1 << OPT_DX)
125 #define OP_ADDR (1 << OPT_ADDR)
126 #define OP_INDIR (1 << OPT_INDIR)
127 #ifdef TCC_TARGET_X86_64
128 # define OP_REG64 (1 << OPT_REG64)
129 # define OP_REG8_LOW (1 << OPT_REG8_LOW)
130 # define OP_IM64 (1 << OPT_IM64)
131 # define OP_EA32 (OP_EA << 1)
132 #else
133 # define OP_REG64 0
134 # define OP_REG8_LOW 0
135 # define OP_IM64 0
136 # define OP_EA32 0
137 #endif
139 #define OP_EA 0x40000000
140 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
142 #ifdef TCC_TARGET_X86_64
143 # define TREG_XAX TREG_RAX
144 # define TREG_XCX TREG_RCX
145 # define TREG_XDX TREG_RDX
146 #else
147 # define TREG_XAX TREG_EAX
148 # define TREG_XCX TREG_ECX
149 # define TREG_XDX TREG_EDX
150 #endif
152 typedef struct ASMInstr {
153 uint16_t sym;
154 uint16_t opcode;
155 uint16_t instr_type;
156 uint8_t nb_ops;
157 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
158 } ASMInstr;
160 typedef struct Operand {
161 uint32_t type;
162 int8_t reg; /* register, -1 if none */
163 int8_t reg2; /* second register, -1 if none */
164 uint8_t shift;
165 ExprValue e;
166 } Operand;
168 static const uint8_t reg_to_size[9] = {
170 [OP_REG8] = 0,
171 [OP_REG16] = 1,
172 [OP_REG32] = 2,
173 #ifdef TCC_TARGET_X86_64
174 [OP_REG64] = 3,
175 #endif
177 0, 0, 1, 0, 2, 0, 0, 0, 3
180 #define NB_TEST_OPCODES 30
182 static const uint8_t test_bits[NB_TEST_OPCODES] = {
183 0x00, /* o */
184 0x01, /* no */
185 0x02, /* b */
186 0x02, /* c */
187 0x02, /* nae */
188 0x03, /* nb */
189 0x03, /* nc */
190 0x03, /* ae */
191 0x04, /* e */
192 0x04, /* z */
193 0x05, /* ne */
194 0x05, /* nz */
195 0x06, /* be */
196 0x06, /* na */
197 0x07, /* nbe */
198 0x07, /* a */
199 0x08, /* s */
200 0x09, /* ns */
201 0x0a, /* p */
202 0x0a, /* pe */
203 0x0b, /* np */
204 0x0b, /* po */
205 0x0c, /* l */
206 0x0c, /* nge */
207 0x0d, /* nl */
208 0x0d, /* ge */
209 0x0e, /* le */
210 0x0e, /* ng */
211 0x0f, /* nle */
212 0x0f, /* g */
215 static const uint8_t segment_prefixes[] = {
216 0x26, /* es */
217 0x2e, /* cs */
218 0x36, /* ss */
219 0x3e, /* ds */
220 0x64, /* fs */
221 0x65 /* gs */
224 static const ASMInstr asm_instrs[] = {
225 #define ALT(x) x
226 /* This removes a 0x0f in the second byte */
227 #define O(o) ((uint64_t) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o)))
228 /* This constructs instr_type from opcode, type and group. */
229 #define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0))
230 #define DEF_ASM_OP0(name, opcode)
231 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0, { 0 } },
232 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }},
233 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }},
234 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }},
235 #ifdef TCC_TARGET_X86_64
236 # include "x86_64-asm.h"
237 #else
238 # include "i386-asm.h"
239 #endif
240 /* last operation */
241 { 0, },
244 static const uint16_t op0_codes[] = {
245 #define ALT(x)
246 #define DEF_ASM_OP0(x, opcode) opcode,
247 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
248 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
249 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
250 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
251 #ifdef TCC_TARGET_X86_64
252 # include "x86_64-asm.h"
253 #else
254 # include "i386-asm.h"
255 #endif
258 static inline int get_reg_shift(TCCState *s1)
260 int shift, v;
261 v = asm_int_expr(s1);
262 switch(v) {
263 case 1:
264 shift = 0;
265 break;
266 case 2:
267 shift = 1;
268 break;
269 case 4:
270 shift = 2;
271 break;
272 case 8:
273 shift = 3;
274 break;
275 default:
276 expect("1, 2, 4 or 8 constant");
277 shift = 0;
278 break;
280 return shift;
283 #ifdef TCC_TARGET_X86_64
284 static int asm_parse_numeric_reg(int t, unsigned int *type)
286 int reg = -1;
287 if (t >= TOK_IDENT && t < tok_ident) {
288 const char *s = table_ident[t - TOK_IDENT]->str;
289 char c;
290 *type = OP_REG64;
291 if (*s == 'c') {
292 s++;
293 *type = OP_CR;
295 if (*s++ != 'r')
296 return -1;
297 /* Don't allow leading '0'. */
298 if ((c = *s++) >= '1' && c <= '9')
299 reg = c - '0';
300 else
301 return -1;
302 if ((c = *s) >= '0' && c <= '5')
303 s++, reg = reg * 10 + c - '0';
304 if (reg > 15)
305 return -1;
306 if ((c = *s) == 0)
308 else if (*type != OP_REG64)
309 return -1;
310 else if (c == 'b' && !s[1])
311 *type = OP_REG8;
312 else if (c == 'w' && !s[1])
313 *type = OP_REG16;
314 else if (c == 'd' && !s[1])
315 *type = OP_REG32;
316 else
317 return -1;
319 return reg;
321 #endif
323 static int asm_parse_reg(unsigned int *type)
325 int reg = 0;
326 *type = 0;
327 if (tok != '%')
328 goto error_32;
329 next();
330 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
331 reg = tok - TOK_ASM_eax;
332 *type = OP_REG32;
333 #ifdef TCC_TARGET_X86_64
334 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
335 reg = tok - TOK_ASM_rax;
336 *type = OP_REG64;
337 } else if (tok == TOK_ASM_rip) {
338 reg = -2; /* Probably should use different escape code. */
339 *type = OP_REG64;
340 } else if ((reg = asm_parse_numeric_reg(tok, type)) >= 0
341 && (*type == OP_REG32 || *type == OP_REG64)) {
343 #endif
344 } else {
345 error_32:
346 expect("register");
348 next();
349 return reg;
352 static void parse_operand(TCCState *s1, Operand *op)
354 ExprValue e;
355 int reg, indir;
356 const char *p;
358 indir = 0;
359 if (tok == '*') {
360 next();
361 indir = OP_INDIR;
364 if (tok == '%') {
365 next();
366 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
367 reg = tok - TOK_ASM_al;
368 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
369 op->reg = reg & 7;
370 if ((op->type & OP_REG) && op->reg == TREG_XAX)
371 op->type |= OP_EAX;
372 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
373 op->type |= OP_CL;
374 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
375 op->type |= OP_DX;
376 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
377 op->type = OP_DB;
378 op->reg = tok - TOK_ASM_dr0;
379 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
380 op->type = OP_SEG;
381 op->reg = tok - TOK_ASM_es;
382 } else if (tok == TOK_ASM_st) {
383 op->type = OP_ST;
384 op->reg = 0;
385 next();
386 if (tok == '(') {
387 next();
388 if (tok != TOK_PPNUM)
389 goto reg_error;
390 p = tokc.str.data;
391 reg = p[0] - '0';
392 if ((unsigned)reg >= 8 || p[1] != '\0')
393 goto reg_error;
394 op->reg = reg;
395 next();
396 skip(')');
398 if (op->reg == 0)
399 op->type |= OP_ST0;
400 goto no_skip;
401 #ifdef TCC_TARGET_X86_64
402 } else if (tok >= TOK_ASM_spl && tok <= TOK_ASM_dil) {
403 op->type = OP_REG8 | OP_REG8_LOW;
404 op->reg = 4 + tok - TOK_ASM_spl;
405 } else if ((op->reg = asm_parse_numeric_reg(tok, &op->type)) >= 0) {
407 #endif
408 } else {
409 reg_error:
410 tcc_error("unknown register %%%s", get_tok_str(tok, &tokc));
412 next();
413 no_skip: ;
414 } else if (tok == '$') {
415 /* constant value */
416 next();
417 asm_expr(s1, &e);
418 op->type = OP_IM32;
419 op->e = e;
420 if (!op->e.sym) {
421 if (op->e.v == (uint8_t)op->e.v)
422 op->type |= OP_IM8;
423 if (op->e.v == (int8_t)op->e.v)
424 op->type |= OP_IM8S;
425 if (op->e.v == (uint16_t)op->e.v)
426 op->type |= OP_IM16;
427 #ifdef TCC_TARGET_X86_64
428 if (op->e.v != (int32_t)op->e.v && op->e.v != (uint32_t)op->e.v)
429 op->type = OP_IM64;
430 #endif
432 } else {
433 /* address(reg,reg2,shift) with all variants */
434 op->type = OP_EA;
435 op->reg = -1;
436 op->reg2 = -1;
437 op->shift = 0;
438 if (tok != '(') {
439 asm_expr(s1, &e);
440 op->e = e;
441 } else {
442 next();
443 if (tok == '%') {
444 unget_tok('(');
445 op->e.v = 0;
446 op->e.sym = NULL;
447 } else {
448 /* bracketed offset expression */
449 asm_expr(s1, &e);
450 if (tok != ')')
451 expect(")");
452 next();
453 op->e.v = e.v;
454 op->e.sym = e.sym;
456 op->e.pcrel = 0;
458 if (tok == '(') {
459 unsigned int type = 0;
460 next();
461 if (tok != ',') {
462 op->reg = asm_parse_reg(&type);
464 if (tok == ',') {
465 next();
466 if (tok != ',') {
467 op->reg2 = asm_parse_reg(&type);
469 if (tok == ',') {
470 next();
471 op->shift = get_reg_shift(s1);
474 if (type & OP_REG32)
475 op->type |= OP_EA32;
476 skip(')');
478 if (op->reg == -1 && op->reg2 == -1)
479 op->type |= OP_ADDR;
481 op->type |= indir;
484 /* XXX: unify with C code output ? */
485 ST_FUNC void gen_expr32(ExprValue *pe)
487 if (pe->pcrel)
488 /* If PC-relative, always set VT_SYM, even without symbol,
489 so as to force a relocation to be emitted. */
490 gen_addrpc32(VT_SYM, pe->sym, pe->v + (ind + 4));
491 else
492 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
495 #ifdef TCC_TARGET_X86_64
496 ST_FUNC void gen_expr64(ExprValue *pe)
498 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
500 #endif
502 /* XXX: unify with C code output ? */
503 static void gen_disp32(ExprValue *pe)
505 Sym *sym = pe->sym;
506 ElfSym *esym = elfsym(sym);
507 if (esym && esym->st_shndx == cur_text_section->sh_num) {
508 /* same section: we can output an absolute value. Note
509 that the TCC compiler behaves differently here because
510 it always outputs a relocation to ease (future) code
511 elimination in the linker */
512 gen_le32(pe->v + esym->st_value - ind - 4);
513 } else {
514 if (sym && sym->type.t == VT_VOID) {
515 sym->type.t = VT_FUNC;
516 sym->type.ref = NULL;
518 #ifdef TCC_TARGET_X86_64
519 greloca(cur_text_section, sym, ind, R_X86_64_PLT32, pe->v - 4);
520 gen_le32(0);
521 #else
522 gen_addrpc32(VT_SYM, sym, pe->v);
523 #endif
528 /* generate the modrm operand */
529 static inline int asm_modrm(int reg, Operand *op)
531 int mod, reg1, reg2, sib_reg1;
533 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
534 g(0xc0 + (reg << 3) + op->reg);
535 } else if (op->reg == -1 && op->reg2 == -1) {
536 /* displacement only */
537 #ifdef TCC_TARGET_X86_64
538 g(0x04 + (reg << 3));
539 g(0x25);
540 #else
541 g(0x05 + (reg << 3));
542 #endif
543 gen_expr32(&op->e);
544 #ifdef TCC_TARGET_X86_64
545 } else if (op->reg == -2) {
546 ExprValue *pe = &op->e;
547 g(0x05 + (reg << 3));
548 gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
549 return ind;
550 #endif
551 } else {
552 sib_reg1 = op->reg;
553 /* fist compute displacement encoding */
554 if (sib_reg1 == -1) {
555 sib_reg1 = 5;
556 mod = 0x00;
557 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
558 mod = 0x00;
559 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
560 mod = 0x40;
561 } else {
562 mod = 0x80;
564 /* compute if sib byte needed */
565 reg1 = op->reg;
566 if (op->reg2 != -1)
567 reg1 = 4;
568 g(mod + (reg << 3) + reg1);
569 if (reg1 == 4) {
570 /* add sib byte */
571 reg2 = op->reg2;
572 if (reg2 == -1)
573 reg2 = 4; /* indicate no index */
574 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
576 /* add offset */
577 if (mod == 0x40) {
578 g(op->e.v);
579 } else if (mod == 0x80 || op->reg == -1) {
580 gen_expr32(&op->e);
583 return 0;
586 #ifdef TCC_TARGET_X86_64
587 #define REX_W 0x48
588 #define REX_R 0x44
589 #define REX_X 0x42
590 #define REX_B 0x41
592 static void asm_rex(int width64, Operand *ops, int nb_ops, int *op_type,
593 int regi, int rmi)
595 unsigned char rex = width64 ? 0x48 : 0;
596 int saw_high_8bit = 0;
597 int i;
598 if (rmi == -1) {
599 /* No mod/rm byte, but we might have a register op nevertheless
600 (we will add it to the opcode later). */
601 for(i = 0; i < nb_ops; i++) {
602 if (op_type[i] & (OP_REG | OP_ST)) {
603 if (ops[i].reg >= 8) {
604 rex |= REX_B;
605 ops[i].reg -= 8;
606 } else if (ops[i].type & OP_REG8_LOW)
607 rex |= 0x40;
608 else if (ops[i].type & OP_REG8 && ops[i].reg >= 4)
609 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
610 saw_high_8bit = ops[i].reg;
611 break;
614 } else {
615 if (regi != -1) {
616 if (ops[regi].reg >= 8) {
617 rex |= REX_R;
618 ops[regi].reg -= 8;
619 } else if (ops[regi].type & OP_REG8_LOW)
620 rex |= 0x40;
621 else if (ops[regi].type & OP_REG8 && ops[regi].reg >= 4)
622 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
623 saw_high_8bit = ops[regi].reg;
625 if (ops[rmi].type & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_EA)) {
626 if (ops[rmi].reg >= 8) {
627 rex |= REX_B;
628 ops[rmi].reg -= 8;
629 } else if (ops[rmi].type & OP_REG8_LOW)
630 rex |= 0x40;
631 else if (ops[rmi].type & OP_REG8 && ops[rmi].reg >= 4)
632 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
633 saw_high_8bit = ops[rmi].reg;
635 if (ops[rmi].type & OP_EA && ops[rmi].reg2 >= 8) {
636 rex |= REX_X;
637 ops[rmi].reg2 -= 8;
640 if (rex) {
641 if (saw_high_8bit)
642 tcc_error("can't encode register %%%ch when REX prefix is required",
643 "acdb"[saw_high_8bit-4]);
644 g(rex);
647 #endif
650 static void maybe_print_stats (void)
652 static int already;
654 if (0 && !already)
655 /* print stats about opcodes */
657 const struct ASMInstr *pa;
658 int freq[4];
659 int op_vals[500];
660 int nb_op_vals, i, j;
662 already = 1;
663 nb_op_vals = 0;
664 memset(freq, 0, sizeof(freq));
665 for(pa = asm_instrs; pa->sym != 0; pa++) {
666 freq[pa->nb_ops]++;
667 //for(i=0;i<pa->nb_ops;i++) {
668 for(j=0;j<nb_op_vals;j++) {
669 //if (pa->op_type[i] == op_vals[j])
670 if (pa->instr_type == op_vals[j])
671 goto found;
673 //op_vals[nb_op_vals++] = pa->op_type[i];
674 op_vals[nb_op_vals++] = pa->instr_type;
675 found: ;
678 for(i=0;i<nb_op_vals;i++) {
679 int v = op_vals[i];
680 //if ((v & (v - 1)) != 0)
681 printf("%3d: %08x\n", i, v);
683 printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n",
684 (int)sizeof(asm_instrs),
685 (int)sizeof(asm_instrs) / (int)sizeof(ASMInstr),
686 freq[0], freq[1], freq[2], freq[3]);
690 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
692 const ASMInstr *pa;
693 int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc, p;
694 int nb_ops, s;
695 Operand ops[MAX_OPERANDS], *pop;
696 int op_type[3]; /* decoded op type */
697 int alltypes; /* OR of all operand types */
698 int autosize;
699 int p66;
700 #ifdef TCC_TARGET_X86_64
701 int rex64;
702 #endif
704 maybe_print_stats();
705 /* force synthetic ';' after prefix instruction, so we can handle */
706 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
707 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
708 unget_tok(';');
710 /* get operands */
711 pop = ops;
712 nb_ops = 0;
713 seg_prefix = 0;
714 alltypes = 0;
715 for(;;) {
716 if (tok == ';' || tok == TOK_LINEFEED)
717 break;
718 if (nb_ops >= MAX_OPERANDS) {
719 tcc_error("incorrect number of operands");
721 parse_operand(s1, pop);
722 if (tok == ':') {
723 if (pop->type != OP_SEG || seg_prefix)
724 tcc_error("incorrect prefix");
725 seg_prefix = segment_prefixes[pop->reg];
726 next();
727 parse_operand(s1, pop);
728 if (!(pop->type & OP_EA)) {
729 tcc_error("segment prefix must be followed by memory reference");
732 pop++;
733 nb_ops++;
734 if (tok != ',')
735 break;
736 next();
739 s = 0; /* avoid warning */
741 again:
742 /* optimize matching by using a lookup table (no hashing is needed
743 !) */
744 for(pa = asm_instrs; pa->sym != 0; pa++) {
745 int it = pa->instr_type & OPCT_MASK;
746 s = 0;
747 if (it == OPC_FARITH) {
748 v = opcode - pa->sym;
749 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
750 continue;
751 } else if (it == OPC_ARITH) {
752 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
753 continue;
754 s = (opcode - pa->sym) % NBWLX;
755 if ((pa->instr_type & OPC_BWLX) == OPC_WLX)
757 /* We need to reject the xxxb opcodes that we accepted above.
758 Note that pa->sym for WLX opcodes is the 'w' token,
759 to get the 'b' token subtract one. */
760 if (((opcode - pa->sym + 1) % NBWLX) == 0)
761 continue;
762 s++;
764 } else if (it == OPC_SHIFT) {
765 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
766 continue;
767 s = (opcode - pa->sym) % NBWLX;
768 } else if (it == OPC_TEST) {
769 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
770 continue;
771 /* cmovxx is a test opcode but accepts multiple sizes.
772 The suffixes aren't encoded in the table, instead we
773 simply force size autodetection always and deal with suffixed
774 variants below when we don't find e.g. "cmovzl". */
775 if (pa->instr_type & OPC_WLX)
776 s = NBWLX - 1;
777 } else if (pa->instr_type & OPC_B) {
778 #ifdef TCC_TARGET_X86_64
779 /* Some instructions don't have the full size but only
780 bwl form. insb e.g. */
781 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
782 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
783 continue;
784 #endif
785 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
786 continue;
787 s = opcode - pa->sym;
788 } else if (pa->instr_type & OPC_WLX) {
789 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
790 continue;
791 s = opcode - pa->sym + 1;
792 } else {
793 if (pa->sym != opcode)
794 continue;
796 if (pa->nb_ops != nb_ops)
797 continue;
798 #ifdef TCC_TARGET_X86_64
799 /* Special case for moves. Selecting the IM64->REG64 form
800 should only be done if we really have an >32bit imm64, and that
801 is hardcoded. Ignore it here. */
802 if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
803 && (ops[1].type & OP_REG) == OP_REG64
804 && !(pa->instr_type & OPC_0F))
805 continue;
806 #endif
807 /* now decode and check each operand */
808 alltypes = 0;
809 for(i = 0; i < nb_ops; i++) {
810 int op1, op2;
811 op1 = pa->op_type[i];
812 op2 = op1 & 0x1f;
813 switch(op2) {
814 case OPT_IM:
815 v = OP_IM8 | OP_IM16 | OP_IM32;
816 break;
817 case OPT_REG:
818 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
819 break;
820 case OPT_REGW:
821 v = OP_REG16 | OP_REG32 | OP_REG64;
822 break;
823 case OPT_IMW:
824 v = OP_IM16 | OP_IM32;
825 break;
826 case OPT_MMXSSE:
827 v = OP_MMX | OP_SSE;
828 break;
829 case OPT_DISP:
830 case OPT_DISP8:
831 v = OP_ADDR;
832 break;
833 default:
834 v = 1 << op2;
835 break;
837 if (op1 & OPT_EA)
838 v |= OP_EA;
839 op_type[i] = v;
840 if ((ops[i].type & v) == 0)
841 goto next;
842 alltypes |= ops[i].type;
844 (void)alltypes; /* maybe unused */
845 /* all is matching ! */
846 break;
847 next: ;
849 if (pa->sym == 0) {
850 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
851 int b;
852 b = op0_codes[opcode - TOK_ASM_first];
853 if (b & 0xff00)
854 g(b >> 8);
855 g(b);
856 return;
857 } else if (opcode <= TOK_ASM_alllast) {
858 tcc_error("bad operand with opcode '%s'",
859 get_tok_str(opcode, NULL));
860 } else {
861 /* Special case for cmovcc, we accept size suffixes but ignore
862 them, but we don't want them to blow up our tables. */
863 TokenSym *ts = table_ident[opcode - TOK_IDENT];
864 if (ts->len >= 6
865 && strchr("wlq", ts->str[ts->len-1])
866 && !memcmp(ts->str, "cmov", 4)) {
867 opcode = tok_alloc(ts->str, ts->len-1)->tok;
868 goto again;
870 tcc_error("unknown opcode '%s'", ts->str);
873 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
874 autosize = NBWLX-1;
875 #ifdef TCC_TARGET_X86_64
876 /* XXX the autosize should rather be zero, to not have to adjust this
877 all the time. */
878 if ((pa->instr_type & OPC_BWLQ) == OPC_B)
879 autosize = NBWLX-2;
880 #endif
881 if (s == autosize) {
882 /* Check for register operands providing hints about the size.
883 Start from the end, i.e. destination operands. This matters
884 only for opcodes accepting different sized registers, lar and lsl
885 are such opcodes. */
886 for(i = nb_ops - 1; s == autosize && i >= 0; i--) {
887 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
888 s = reg_to_size[ops[i].type & OP_REG];
890 if (s == autosize) {
891 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
892 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
893 s = 2;
894 else if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
895 (ops[0].type & OP_EA))
896 s = NBWLX - 2;
897 else
898 tcc_error("cannot infer opcode suffix");
902 #ifdef TCC_TARGET_X86_64
903 rex64 = 0;
904 if (pa->instr_type & OPC_48)
905 rex64 = 1;
906 else if (s == 3 || (alltypes & OP_REG64)) {
907 /* generate REX prefix */
908 int default64 = 0;
909 for(i = 0; i < nb_ops; i++) {
910 if (op_type[i] == OP_REG64 && pa->opcode != 0xb8) {
911 /* If only 64bit regs are accepted in one operand
912 this is a default64 instruction without need for
913 REX prefixes, except for movabs(0xb8). */
914 default64 = 1;
915 break;
918 /* XXX find better encoding for the default64 instructions. */
919 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
920 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
921 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
922 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
923 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
924 && !default64)
925 rex64 = 1;
927 #endif
929 /* now generates the operation */
930 if (OPCT_IS(pa->instr_type, OPC_FWAIT))
931 g(0x9b);
932 if (seg_prefix)
933 g(seg_prefix);
934 #ifdef TCC_TARGET_X86_64
935 /* Generate addr32 prefix if needed */
936 for(i = 0; i < nb_ops; i++) {
937 if (ops[i].type & OP_EA32) {
938 g(0x67);
939 break;
942 #endif
943 /* generate data16 prefix if needed */
944 p66 = 0;
945 if (s == 1)
946 p66 = 1;
947 else {
948 /* accepting mmx+sse in all operands --> needs 0x66 to
949 switch to sse mode. Accepting only sse in an operand --> is
950 already SSE insn and needs 0x66/f2/f3 handling. */
951 for (i = 0; i < nb_ops; i++)
952 if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
953 && ops[i].type & OP_SSE)
954 p66 = 1;
956 if (p66)
957 g(0x66);
959 v = pa->opcode;
960 p = v >> 8; /* possibly prefix byte(s) */
961 switch (p) {
962 case 0: break; /* no prefix */
963 case 0x48: break; /* REX, handled elsewhere */
964 case 0x66:
965 case 0x67:
966 case 0xf2:
967 case 0xf3: v = v & 0xff; g(p); break;
968 case 0xd4: case 0xd5: break; /* aam and aad, not prefix, but hardcoded immediate argument "10" */
969 case 0xd8: case 0xd9: case 0xda: case 0xdb: /* x87, no normal prefix */
970 case 0xdc: case 0xdd: case 0xde: case 0xdf: break;
971 default: tcc_error("bad prefix 0x%2x in opcode table", p); break;
973 if (pa->instr_type & OPC_0F)
974 v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff);
975 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
976 /* kludge for imul $im, %reg */
977 nb_ops = 3;
978 ops[2] = ops[1];
979 op_type[2] = op_type[1];
980 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
981 v--; /* int $3 case */
982 nb_ops = 0;
983 } else if ((v == 0x06 || v == 0x07)) {
984 if (ops[0].reg >= 4) {
985 /* push/pop %fs or %gs */
986 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
987 } else {
988 v += ops[0].reg << 3;
990 nb_ops = 0;
991 } else if (v <= 0x05) {
992 /* arith case */
993 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
994 } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) {
995 /* fpu arith case */
996 v += ((opcode - pa->sym) / 6) << 3;
999 /* search which operand will be used for modrm */
1000 modrm_index = -1;
1001 modreg_index = -1;
1002 if (pa->instr_type & OPC_MODRM) {
1003 if (!nb_ops) {
1004 /* A modrm opcode without operands is a special case (e.g. mfence).
1005 It has a group and acts as if there's an register operand 0
1006 (ax). */
1007 i = 0;
1008 ops[i].type = OP_REG;
1009 ops[i].reg = 0;
1010 goto modrm_found;
1012 /* first look for an ea operand */
1013 for(i = 0;i < nb_ops; i++) {
1014 if (op_type[i] & OP_EA)
1015 goto modrm_found;
1017 /* then if not found, a register or indirection (shift instructions) */
1018 for(i = 0;i < nb_ops; i++) {
1019 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
1020 goto modrm_found;
1022 #ifdef ASM_DEBUG
1023 tcc_error("bad op table");
1024 #endif
1025 modrm_found:
1026 modrm_index = i;
1027 /* if a register is used in another operand then it is
1028 used instead of group */
1029 for(i = 0;i < nb_ops; i++) {
1030 int t = op_type[i];
1031 if (i != modrm_index &&
1032 (t & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
1033 modreg_index = i;
1034 break;
1038 #ifdef TCC_TARGET_X86_64
1039 asm_rex (rex64, ops, nb_ops, op_type, modreg_index, modrm_index);
1040 #endif
1042 if (pa->instr_type & OPC_REG) {
1043 /* mov $im, %reg case */
1044 if (v == 0xb0 && s >= 1)
1045 v += 7;
1046 for(i = 0; i < nb_ops; i++) {
1047 if (op_type[i] & (OP_REG | OP_ST)) {
1048 v += ops[i].reg;
1049 break;
1053 if (pa->instr_type & OPC_B)
1054 v += s >= 1;
1055 if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) {
1056 ElfSym *esym;
1057 int jmp_disp;
1059 /* see if we can really generate the jump with a byte offset */
1060 esym = elfsym(ops[0].e.sym);
1061 if (!esym || esym->st_shndx != cur_text_section->sh_num)
1062 goto no_short_jump;
1063 jmp_disp = ops[0].e.v + esym->st_value - ind - 2 - (v >= 0xff);
1064 if (jmp_disp == (int8_t)jmp_disp) {
1065 /* OK to generate jump */
1066 ops[0].e.sym = 0;
1067 ops[0].e.v = jmp_disp;
1068 op_type[0] = OP_IM8S;
1069 } else {
1070 no_short_jump:
1071 /* long jump will be allowed. need to modify the
1072 opcode slightly */
1073 if (v == 0xeb) /* jmp */
1074 v = 0xe9;
1075 else if (v == 0x70) /* jcc */
1076 v += 0x0f10;
1077 else
1078 tcc_error("invalid displacement");
1081 if (OPCT_IS(pa->instr_type, OPC_TEST))
1082 v += test_bits[opcode - pa->sym];
1083 else if (OPCT_IS(pa->instr_type, OPC_0F01))
1084 v |= 0x0f0100;
1085 op1 = v >> 16;
1086 if (op1)
1087 g(op1);
1088 op1 = (v >> 8) & 0xff;
1089 if (op1)
1090 g(op1);
1091 g(v);
1093 if (OPCT_IS(pa->instr_type, OPC_SHIFT)) {
1094 reg = (opcode - pa->sym) / NBWLX;
1095 if (reg == 6)
1096 reg = 7;
1097 } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) {
1098 reg = (opcode - pa->sym) / NBWLX;
1099 } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) {
1100 reg = (opcode - pa->sym) / 6;
1101 } else {
1102 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
1105 pc = 0;
1106 if (pa->instr_type & OPC_MODRM) {
1107 /* if a register is used in another operand then it is
1108 used instead of group */
1109 if (modreg_index >= 0)
1110 reg = ops[modreg_index].reg;
1111 pc = asm_modrm(reg, &ops[modrm_index]);
1114 /* emit constants */
1115 #ifndef TCC_TARGET_X86_64
1116 if (!(pa->instr_type & OPC_0F)
1117 && (pa->opcode == 0x9a || pa->opcode == 0xea)) {
1118 /* ljmp or lcall kludge */
1119 gen_expr32(&ops[1].e);
1120 if (ops[0].e.sym)
1121 tcc_error("cannot relocate");
1122 gen_le16(ops[0].e.v);
1123 return;
1125 #endif
1126 for(i = 0;i < nb_ops; i++) {
1127 v = op_type[i];
1128 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
1129 /* if multiple sizes are given it means we must look
1130 at the op size */
1131 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
1132 if (s == 0)
1133 v = OP_IM8;
1134 else if (s == 1)
1135 v = OP_IM16;
1136 else if (s == 2 || (v & OP_IM64) == 0)
1137 v = OP_IM32;
1138 else
1139 v = OP_IM64;
1142 if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym)
1143 tcc_error("cannot relocate");
1145 if (v & (OP_IM8 | OP_IM8S)) {
1146 g(ops[i].e.v);
1147 } else if (v & OP_IM16) {
1148 gen_le16(ops[i].e.v);
1149 #ifdef TCC_TARGET_X86_64
1150 } else if (v & OP_IM64) {
1151 gen_expr64(&ops[i].e);
1152 #endif
1153 } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) {
1154 gen_disp32(&ops[i].e);
1155 } else {
1156 gen_expr32(&ops[i].e);
1161 /* after immediate operands, adjust pc-relative address */
1162 if (pc)
1163 add32le(cur_text_section->data + pc - 4, pc - ind);
1166 /* return the constraint priority (we allocate first the lowest
1167 numbered constraints) */
1168 static inline int constraint_priority(const char *str)
1170 int priority, c, pr;
1172 /* we take the lowest priority */
1173 priority = 0;
1174 for(;;) {
1175 c = *str;
1176 if (c == '\0')
1177 break;
1178 str++;
1179 switch(c) {
1180 case 'A':
1181 pr = 0;
1182 break;
1183 case 'a':
1184 case 'b':
1185 case 'c':
1186 case 'd':
1187 case 'S':
1188 case 'D':
1189 pr = 1;
1190 break;
1191 case 'q':
1192 pr = 2;
1193 break;
1194 case 'r':
1195 case 'R':
1196 case 'p':
1197 pr = 3;
1198 break;
1199 case 'N':
1200 case 'M':
1201 case 'I':
1202 case 'e':
1203 case 'i':
1204 case 'm':
1205 case 'g':
1206 pr = 4;
1207 break;
1208 default:
1209 tcc_error("unknown constraint '%c'", c);
1210 pr = 0;
1212 if (pr > priority)
1213 priority = pr;
1215 return priority;
1218 static const char *skip_constraint_modifiers(const char *p)
1220 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
1221 p++;
1222 return p;
1225 /* If T (a token) is of the form "%reg" returns the register
1226 number and type, otherwise return -1. */
1227 ST_FUNC int asm_parse_regvar (int t)
1229 const char *s;
1230 Operand op;
1231 if (t < TOK_IDENT || (t & SYM_FIELD))
1232 return -1;
1233 s = table_ident[t - TOK_IDENT]->str;
1234 if (s[0] != '%')
1235 return -1;
1236 t = tok_alloc_const(s + 1);
1237 unget_tok(t);
1238 unget_tok('%');
1239 parse_operand(tcc_state, &op);
1240 /* Accept only integer regs for now. */
1241 if (op.type & OP_REG)
1242 return op.reg;
1243 else
1244 return -1;
1247 #define REG_OUT_MASK 0x01
1248 #define REG_IN_MASK 0x02
1250 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
1252 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
1253 int nb_operands, int nb_outputs,
1254 const uint8_t *clobber_regs,
1255 int *pout_reg)
1257 ASMOperand *op;
1258 int sorted_op[MAX_ASM_OPERANDS];
1259 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
1260 const char *str;
1261 uint8_t regs_allocated[NB_ASM_REGS];
1263 /* init fields */
1264 for(i=0;i<nb_operands;i++) {
1265 op = &operands[i];
1266 op->input_index = -1;
1267 op->ref_index = -1;
1268 op->reg = -1;
1269 op->is_memory = 0;
1270 op->is_rw = 0;
1272 /* compute constraint priority and evaluate references to output
1273 constraints if input constraints */
1274 for(i=0;i<nb_operands;i++) {
1275 op = &operands[i];
1276 str = op->constraint;
1277 str = skip_constraint_modifiers(str);
1278 if (isnum(*str) || *str == '[') {
1279 /* this is a reference to another constraint */
1280 k = find_constraint(operands, nb_operands, str, NULL);
1281 if ((unsigned)k >= i || i < nb_outputs)
1282 tcc_error("invalid reference in constraint %d ('%s')",
1283 i, str);
1284 op->ref_index = k;
1285 if (operands[k].input_index >= 0)
1286 tcc_error("cannot reference twice the same operand");
1287 operands[k].input_index = i;
1288 op->priority = 5;
1289 } else if ((op->vt->r & VT_VALMASK) == VT_LOCAL
1290 && op->vt->sym
1291 && (reg = op->vt->sym->r & VT_VALMASK) < VT_CONST) {
1292 op->priority = 1;
1293 op->reg = reg;
1294 } else {
1295 op->priority = constraint_priority(str);
1299 /* sort operands according to their priority */
1300 for(i=0;i<nb_operands;i++)
1301 sorted_op[i] = i;
1302 for(i=0;i<nb_operands - 1;i++) {
1303 for(j=i+1;j<nb_operands;j++) {
1304 p1 = operands[sorted_op[i]].priority;
1305 p2 = operands[sorted_op[j]].priority;
1306 if (p2 < p1) {
1307 tmp = sorted_op[i];
1308 sorted_op[i] = sorted_op[j];
1309 sorted_op[j] = tmp;
1314 for(i = 0;i < NB_ASM_REGS; i++) {
1315 if (clobber_regs[i])
1316 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
1317 else
1318 regs_allocated[i] = 0;
1320 /* esp cannot be used */
1321 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1322 /* ebp cannot be used yet */
1323 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1325 /* allocate registers and generate corresponding asm moves */
1326 for(i=0;i<nb_operands;i++) {
1327 j = sorted_op[i];
1328 op = &operands[j];
1329 str = op->constraint;
1330 /* no need to allocate references */
1331 if (op->ref_index >= 0)
1332 continue;
1333 /* select if register is used for output, input or both */
1334 if (op->input_index >= 0) {
1335 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1336 } else if (j < nb_outputs) {
1337 reg_mask = REG_OUT_MASK;
1338 } else {
1339 reg_mask = REG_IN_MASK;
1341 if (op->reg >= 0) {
1342 if (is_reg_allocated(op->reg))
1343 tcc_error("asm regvar requests register that's taken already");
1344 reg = op->reg;
1346 try_next:
1347 c = *str++;
1348 switch(c) {
1349 case '=':
1350 goto try_next;
1351 case '+':
1352 op->is_rw = 1;
1353 /* FALL THRU */
1354 case '&':
1355 if (j >= nb_outputs)
1356 tcc_error("'%c' modifier can only be applied to outputs", c);
1357 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1358 goto try_next;
1359 case 'A':
1360 /* allocate both eax and edx */
1361 if (is_reg_allocated(TREG_XAX) ||
1362 is_reg_allocated(TREG_XDX))
1363 goto try_next;
1364 op->is_llong = 1;
1365 op->reg = TREG_XAX;
1366 regs_allocated[TREG_XAX] |= reg_mask;
1367 regs_allocated[TREG_XDX] |= reg_mask;
1368 break;
1369 case 'a':
1370 reg = TREG_XAX;
1371 goto alloc_reg;
1372 case 'b':
1373 reg = 3;
1374 goto alloc_reg;
1375 case 'c':
1376 reg = TREG_XCX;
1377 goto alloc_reg;
1378 case 'd':
1379 reg = TREG_XDX;
1380 goto alloc_reg;
1381 case 'S':
1382 reg = 6;
1383 goto alloc_reg;
1384 case 'D':
1385 reg = 7;
1386 alloc_reg:
1387 if (op->reg >= 0 && reg != op->reg)
1388 goto try_next;
1389 if (is_reg_allocated(reg))
1390 goto try_next;
1391 goto reg_found;
1392 case 'q':
1393 /* eax, ebx, ecx or edx */
1394 if (op->reg >= 0) {
1395 if ((reg = op->reg) < 4)
1396 goto reg_found;
1397 } else for(reg = 0; reg < 4; reg++) {
1398 if (!is_reg_allocated(reg))
1399 goto reg_found;
1401 goto try_next;
1402 case 'r':
1403 case 'R':
1404 case 'p': /* A general address, for x86(64) any register is acceptable*/
1405 /* any general register */
1406 if ((reg = op->reg) >= 0)
1407 goto reg_found;
1408 else for(reg = 0; reg < 8; reg++) {
1409 if (!is_reg_allocated(reg))
1410 goto reg_found;
1412 goto try_next;
1413 reg_found:
1414 /* now we can reload in the register */
1415 op->is_llong = 0;
1416 op->reg = reg;
1417 regs_allocated[reg] |= reg_mask;
1418 break;
1419 case 'e':
1420 case 'i':
1421 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1422 goto try_next;
1423 break;
1424 case 'I':
1425 case 'N':
1426 case 'M':
1427 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1428 goto try_next;
1429 break;
1430 case 'm':
1431 case 'g':
1432 /* nothing special to do because the operand is already in
1433 memory, except if the pointer itself is stored in a
1434 memory variable (VT_LLOCAL case) */
1435 /* XXX: fix constant case */
1436 /* if it is a reference to a memory zone, it must lie
1437 in a register, so we reserve the register in the
1438 input registers and a load will be generated
1439 later */
1440 if (j < nb_outputs || c == 'm') {
1441 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1442 /* any general register */
1443 for(reg = 0; reg < 8; reg++) {
1444 if (!(regs_allocated[reg] & REG_IN_MASK))
1445 goto reg_found1;
1447 goto try_next;
1448 reg_found1:
1449 /* now we can reload in the register */
1450 regs_allocated[reg] |= REG_IN_MASK;
1451 op->reg = reg;
1452 op->is_memory = 1;
1455 break;
1456 default:
1457 tcc_error("asm constraint %d ('%s') could not be satisfied",
1458 j, op->constraint);
1459 break;
1461 /* if a reference is present for that operand, we assign it too */
1462 if (op->input_index >= 0) {
1463 operands[op->input_index].reg = op->reg;
1464 operands[op->input_index].is_llong = op->is_llong;
1468 /* compute out_reg. It is used to store outputs registers to memory
1469 locations references by pointers (VT_LLOCAL case) */
1470 *pout_reg = -1;
1471 for(i=0;i<nb_operands;i++) {
1472 op = &operands[i];
1473 if (op->reg >= 0 &&
1474 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1475 !op->is_memory) {
1476 for(reg = 0; reg < 8; reg++) {
1477 if (!(regs_allocated[reg] & REG_OUT_MASK))
1478 goto reg_found2;
1480 tcc_error("could not find free output register for reloading");
1481 reg_found2:
1482 *pout_reg = reg;
1483 break;
1487 /* print sorted constraints */
1488 #ifdef ASM_DEBUG
1489 for(i=0;i<nb_operands;i++) {
1490 j = sorted_op[i];
1491 op = &operands[j];
1492 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1494 op->id ? get_tok_str(op->id, NULL) : "",
1495 op->constraint,
1496 op->vt->r,
1497 op->reg);
1499 if (*pout_reg >= 0)
1500 printf("out_reg=%d\n", *pout_reg);
1501 #endif
1504 ST_FUNC void subst_asm_operand(CString *add_str,
1505 SValue *sv, int modifier)
1507 int r, reg, size, val;
1508 char buf[64];
1510 r = sv->r;
1511 if ((r & VT_VALMASK) == VT_CONST) {
1512 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' &&
1513 modifier != 'P')
1514 cstr_ccat(add_str, '$');
1515 if (r & VT_SYM) {
1516 const char *name = get_tok_str(sv->sym->v, NULL);
1517 if (sv->sym->v >= SYM_FIRST_ANOM) {
1518 /* In case of anonymous symbols ("L.42", used
1519 for static data labels) we can't find them
1520 in the C symbol table when later looking up
1521 this name. So enter them now into the asm label
1522 list when we still know the symbol. */
1523 get_asm_sym(tok_alloc_const(name), sv->sym);
1525 if (tcc_state->leading_underscore)
1526 cstr_ccat(add_str, '_');
1527 cstr_cat(add_str, name, -1);
1528 if ((uint32_t)sv->c.i == 0)
1529 goto no_offset;
1530 cstr_ccat(add_str, '+');
1532 val = sv->c.i;
1533 if (modifier == 'n')
1534 val = -val;
1535 snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
1536 cstr_cat(add_str, buf, -1);
1537 no_offset:;
1538 #ifdef TCC_TARGET_X86_64
1539 if (r & VT_LVAL)
1540 cstr_cat(add_str, "(%rip)", -1);
1541 #endif
1542 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1543 #ifdef TCC_TARGET_X86_64
1544 snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i);
1545 #else
1546 snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
1547 #endif
1548 cstr_cat(add_str, buf, -1);
1549 } else if (r & VT_LVAL) {
1550 reg = r & VT_VALMASK;
1551 if (reg >= VT_CONST)
1552 tcc_internal_error("");
1553 snprintf(buf, sizeof(buf), "(%%%s)",
1554 #ifdef TCC_TARGET_X86_64
1555 get_tok_str(TOK_ASM_rax + reg, NULL)
1556 #else
1557 get_tok_str(TOK_ASM_eax + reg, NULL)
1558 #endif
1560 cstr_cat(add_str, buf, -1);
1561 } else {
1562 /* register case */
1563 reg = r & VT_VALMASK;
1564 if (reg >= VT_CONST)
1565 tcc_internal_error("");
1567 /* choose register operand size */
1568 if ((sv->type.t & VT_BTYPE) == VT_BYTE ||
1569 (sv->type.t & VT_BTYPE) == VT_BOOL)
1570 size = 1;
1571 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1572 size = 2;
1573 #ifdef TCC_TARGET_X86_64
1574 else if ((sv->type.t & VT_BTYPE) == VT_LLONG ||
1575 (sv->type.t & VT_BTYPE) == VT_PTR)
1576 size = 8;
1577 #endif
1578 else
1579 size = 4;
1580 if (size == 1 && reg >= 4)
1581 size = 4;
1583 if (modifier == 'b') {
1584 if (reg >= 4)
1585 tcc_error("cannot use byte register");
1586 size = 1;
1587 } else if (modifier == 'h') {
1588 if (reg >= 4)
1589 tcc_error("cannot use byte register");
1590 size = -1;
1591 } else if (modifier == 'w') {
1592 size = 2;
1593 } else if (modifier == 'k') {
1594 size = 4;
1595 #ifdef TCC_TARGET_X86_64
1596 } else if (modifier == 'q') {
1597 size = 8;
1598 #endif
1601 switch(size) {
1602 case -1:
1603 reg = TOK_ASM_ah + reg;
1604 break;
1605 case 1:
1606 reg = TOK_ASM_al + reg;
1607 break;
1608 case 2:
1609 reg = TOK_ASM_ax + reg;
1610 break;
1611 default:
1612 reg = TOK_ASM_eax + reg;
1613 break;
1614 #ifdef TCC_TARGET_X86_64
1615 case 8:
1616 reg = TOK_ASM_rax + reg;
1617 break;
1618 #endif
1620 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1621 cstr_cat(add_str, buf, -1);
1625 /* generate prolog and epilog code for asm statement */
1626 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1627 int nb_outputs, int is_output,
1628 uint8_t *clobber_regs,
1629 int out_reg)
1631 uint8_t regs_allocated[NB_ASM_REGS];
1632 ASMOperand *op;
1633 int i, reg;
1635 /* Strictly speaking %Xbp and %Xsp should be included in the
1636 call-preserved registers, but currently it doesn't matter. */
1637 #ifdef TCC_TARGET_X86_64
1638 #ifdef TCC_TARGET_PE
1639 static const uint8_t reg_saved[] = { 3, 6, 7, 12, 13, 14, 15 };
1640 #else
1641 static const uint8_t reg_saved[] = { 3, 12, 13, 14, 15 };
1642 #endif
1643 #else
1644 static const uint8_t reg_saved[] = { 3, 6, 7 };
1645 #endif
1647 /* mark all used registers */
1648 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1649 for(i = 0; i < nb_operands;i++) {
1650 op = &operands[i];
1651 if (op->reg >= 0)
1652 regs_allocated[op->reg] = 1;
1654 if (!is_output) {
1655 /* generate reg save code */
1656 for(i = 0; i < sizeof(reg_saved)/sizeof(reg_saved[0]); i++) {
1657 reg = reg_saved[i];
1658 if (regs_allocated[reg]) {
1659 if (reg >= 8)
1660 g(0x41), reg-=8;
1661 g(0x50 + reg);
1665 /* generate load code */
1666 for(i = 0; i < nb_operands; i++) {
1667 op = &operands[i];
1668 if (op->reg >= 0) {
1669 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1670 op->is_memory) {
1671 /* memory reference case (for both input and
1672 output cases) */
1673 SValue sv;
1674 sv = *op->vt;
1675 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL;
1676 sv.type.t = VT_PTR;
1677 load(op->reg, &sv);
1678 } else if (i >= nb_outputs || op->is_rw) {
1679 /* load value in register */
1680 load(op->reg, op->vt);
1681 if (op->is_llong) {
1682 SValue sv;
1683 sv = *op->vt;
1684 sv.c.i += 4;
1685 load(TREG_XDX, &sv);
1690 } else {
1691 /* generate save code */
1692 for(i = 0 ; i < nb_outputs; i++) {
1693 op = &operands[i];
1694 if (op->reg >= 0) {
1695 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1696 if (!op->is_memory) {
1697 SValue sv;
1698 sv = *op->vt;
1699 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1700 sv.type.t = VT_PTR;
1701 load(out_reg, &sv);
1703 sv = *op->vt;
1704 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1705 store(op->reg, &sv);
1707 } else {
1708 store(op->reg, op->vt);
1709 if (op->is_llong) {
1710 SValue sv;
1711 sv = *op->vt;
1712 sv.c.i += 4;
1713 store(TREG_XDX, &sv);
1718 /* generate reg restore code */
1719 for(i = sizeof(reg_saved)/sizeof(reg_saved[0]) - 1; i >= 0; i--) {
1720 reg = reg_saved[i];
1721 if (regs_allocated[reg]) {
1722 if (reg >= 8)
1723 g(0x41), reg-=8;
1724 g(0x58 + reg);
1730 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1732 int reg;
1733 #ifdef TCC_TARGET_X86_64
1734 unsigned int type;
1735 #endif
1737 if (!strcmp(str, "memory") ||
1738 !strcmp(str, "cc") ||
1739 !strcmp(str, "flags"))
1740 return;
1741 reg = tok_alloc_const(str);
1742 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1743 reg -= TOK_ASM_eax;
1744 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1745 reg -= TOK_ASM_ax;
1746 #ifdef TCC_TARGET_X86_64
1747 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1748 reg -= TOK_ASM_rax;
1749 } else if ((reg = asm_parse_numeric_reg(reg, &type)) >= 0) {
1751 #endif
1752 } else {
1753 tcc_error("invalid clobber register '%s'", str);
1755 clobber_regs[reg] = 1;