inline asm: Accept 'p' constraint and 'P' template mod
[tinycc.git] / i386-asm.c
blobe532911408d1df9a2e1e3b785ace25744cf34f2b
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "tcc.h"
24 /* #define NB_ASM_REGS 8 */
25 #define MAX_OPERANDS 3
26 #define NB_SAVED_REGS 3
28 #define TOK_ASM_first TOK_ASM_clc
29 #define TOK_ASM_last TOK_ASM_emms
30 #define TOK_ASM_alllast TOK_ASM_subps
32 #define OPC_B 0x01 /* only used with OPC_WL */
33 #define OPC_WL 0x02 /* accepts w, l or no suffix */
34 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
35 #define OPC_REG 0x04 /* register is added to opcode */
36 #define OPC_MODRM 0x08 /* modrm encoding */
38 #define OPCT_MASK 0x70
39 #define OPC_FWAIT 0x10 /* add fwait opcode */
40 #define OPC_SHIFT 0x20 /* shift opcodes */
41 #define OPC_ARITH 0x30 /* arithmetic opcodes */
42 #define OPC_FARITH 0x40 /* FPU arithmetic opcodes */
43 #define OPC_TEST 0x50 /* test opcodes */
44 #define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i))
46 #define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */
47 #ifdef TCC_TARGET_X86_64
48 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
49 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
50 # define OPC_WLX OPC_WLQ
51 # define OPC_BWLX OPC_BWLQ
52 #else
53 # define OPC_WLX OPC_WL
54 # define OPC_BWLX OPC_BWL
55 #endif
57 #define OPC_GROUP_SHIFT 13
59 /* in order to compress the operand type, we use specific operands and
60 we or only with EA */
61 enum {
62 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
63 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
64 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
65 #ifdef TCC_TARGET_X86_64
66 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
67 #endif
68 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
69 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
70 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
71 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
72 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
73 OPT_SEG,
74 OPT_ST,
75 OPT_IM8,
76 OPT_IM8S,
77 OPT_IM16,
78 OPT_IM32,
79 #ifdef TCC_TARGET_X86_64
80 OPT_IM64,
81 #endif
82 OPT_EAX, /* %al, %ax, %eax or %rax register */
83 OPT_ST0, /* %st(0) register */
84 OPT_CL, /* %cl register */
85 OPT_DX, /* %dx register */
86 OPT_ADDR, /* OP_EA with only offset */
87 OPT_INDIR, /* *(expr) */
88 /* composite types */
89 OPT_COMPOSITE_FIRST,
90 OPT_IM, /* IM8 | IM16 | IM32 */
91 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
92 OPT_REGW, /* REG16 | REG32 | REG64 */
93 OPT_IMW, /* IM16 | IM32 */
94 OPT_MMXSSE, /* MMX | SSE */
95 OPT_DISP, /* Like OPT_ADDR, but emitted as displacement (for jumps) */
96 OPT_DISP8, /* Like OPT_ADDR, but only 8bit (short jumps) */
97 /* can be ored with any OPT_xxx */
98 OPT_EA = 0x80
101 #define OP_REG8 (1 << OPT_REG8)
102 #define OP_REG16 (1 << OPT_REG16)
103 #define OP_REG32 (1 << OPT_REG32)
104 #define OP_MMX (1 << OPT_MMX)
105 #define OP_SSE (1 << OPT_SSE)
106 #define OP_CR (1 << OPT_CR)
107 #define OP_TR (1 << OPT_TR)
108 #define OP_DB (1 << OPT_DB)
109 #define OP_SEG (1 << OPT_SEG)
110 #define OP_ST (1 << OPT_ST)
111 #define OP_IM8 (1 << OPT_IM8)
112 #define OP_IM8S (1 << OPT_IM8S)
113 #define OP_IM16 (1 << OPT_IM16)
114 #define OP_IM32 (1 << OPT_IM32)
115 #define OP_EAX (1 << OPT_EAX)
116 #define OP_ST0 (1 << OPT_ST0)
117 #define OP_CL (1 << OPT_CL)
118 #define OP_DX (1 << OPT_DX)
119 #define OP_ADDR (1 << OPT_ADDR)
120 #define OP_INDIR (1 << OPT_INDIR)
121 #ifdef TCC_TARGET_X86_64
122 # define OP_REG64 (1 << OPT_REG64)
123 # define OP_IM64 (1 << OPT_IM64)
124 # define OP_EA32 (OP_EA << 1)
125 #else
126 # define OP_REG64 0
127 # define OP_IM64 0
128 # define OP_EA32 0
129 #endif
131 #define OP_EA 0x40000000
132 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
134 #ifdef TCC_TARGET_X86_64
135 # define TREG_XAX TREG_RAX
136 # define TREG_XCX TREG_RCX
137 # define TREG_XDX TREG_RDX
138 #else
139 # define TREG_XAX TREG_EAX
140 # define TREG_XCX TREG_ECX
141 # define TREG_XDX TREG_EDX
142 #endif
144 typedef struct ASMInstr {
145 uint16_t sym;
146 uint16_t opcode;
147 uint16_t instr_type;
148 uint8_t nb_ops;
149 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
150 } ASMInstr;
152 typedef struct Operand {
153 uint32_t type;
154 int8_t reg; /* register, -1 if none */
155 int8_t reg2; /* second register, -1 if none */
156 uint8_t shift;
157 ExprValue e;
158 } Operand;
160 static const uint8_t reg_to_size[9] = {
162 [OP_REG8] = 0,
163 [OP_REG16] = 1,
164 [OP_REG32] = 2,
165 #ifdef TCC_TARGET_X86_64
166 [OP_REG64] = 3,
167 #endif
169 0, 0, 1, 0, 2, 0, 0, 0, 3
172 #define NB_TEST_OPCODES 30
174 static const uint8_t test_bits[NB_TEST_OPCODES] = {
175 0x00, /* o */
176 0x01, /* no */
177 0x02, /* b */
178 0x02, /* c */
179 0x02, /* nae */
180 0x03, /* nb */
181 0x03, /* nc */
182 0x03, /* ae */
183 0x04, /* e */
184 0x04, /* z */
185 0x05, /* ne */
186 0x05, /* nz */
187 0x06, /* be */
188 0x06, /* na */
189 0x07, /* nbe */
190 0x07, /* a */
191 0x08, /* s */
192 0x09, /* ns */
193 0x0a, /* p */
194 0x0a, /* pe */
195 0x0b, /* np */
196 0x0b, /* po */
197 0x0c, /* l */
198 0x0c, /* nge */
199 0x0d, /* nl */
200 0x0d, /* ge */
201 0x0e, /* le */
202 0x0e, /* ng */
203 0x0f, /* nle */
204 0x0f, /* g */
207 static const uint8_t segment_prefixes[] = {
208 0x26, /* es */
209 0x2e, /* cs */
210 0x36, /* ss */
211 0x3e, /* ds */
212 0x64, /* fs */
213 0x65 /* gs */
216 static const ASMInstr asm_instrs[] = {
217 #define ALT(x) x
218 /* This removes a 0x0f in the second byte */
219 #define O(o) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o))
220 /* This constructs instr_type from opcode, type and group. */
221 #define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0))
222 #define DEF_ASM_OP0(name, opcode)
223 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0 },
224 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }},
225 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }},
226 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }},
227 #ifdef TCC_TARGET_X86_64
228 # include "x86_64-asm.h"
229 #else
230 # include "i386-asm.h"
231 #endif
232 /* last operation */
233 { 0, },
236 static const uint16_t op0_codes[] = {
237 #define ALT(x)
238 #define DEF_ASM_OP0(x, opcode) opcode,
239 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
240 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
241 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
242 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
243 #ifdef TCC_TARGET_X86_64
244 # include "x86_64-asm.h"
245 #else
246 # include "i386-asm.h"
247 #endif
250 static inline int get_reg_shift(TCCState *s1)
252 int shift, v;
253 v = asm_int_expr(s1);
254 switch(v) {
255 case 1:
256 shift = 0;
257 break;
258 case 2:
259 shift = 1;
260 break;
261 case 4:
262 shift = 2;
263 break;
264 case 8:
265 shift = 3;
266 break;
267 default:
268 expect("1, 2, 4 or 8 constant");
269 shift = 0;
270 break;
272 return shift;
275 static int asm_parse_reg(int *type)
277 int reg = 0;
278 *type = 0;
279 if (tok != '%')
280 goto error_32;
281 next();
282 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
283 reg = tok - TOK_ASM_eax;
284 #ifdef TCC_TARGET_X86_64
285 *type = OP_EA32;
286 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
287 reg = tok - TOK_ASM_rax;
288 } else if (tok == TOK_ASM_rip) {
289 reg = 8;
290 #endif
291 } else {
292 error_32:
293 expect("register");
295 next();
296 return reg;
299 static void parse_operand(TCCState *s1, Operand *op)
301 ExprValue e;
302 int reg, indir;
303 const char *p;
305 indir = 0;
306 if (tok == '*') {
307 next();
308 indir = OP_INDIR;
311 if (tok == '%') {
312 next();
313 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
314 reg = tok - TOK_ASM_al;
315 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
316 op->reg = reg & 7;
317 if ((op->type & OP_REG) && op->reg == TREG_XAX)
318 op->type |= OP_EAX;
319 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
320 op->type |= OP_CL;
321 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
322 op->type |= OP_DX;
323 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
324 op->type = OP_DB;
325 op->reg = tok - TOK_ASM_dr0;
326 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
327 op->type = OP_SEG;
328 op->reg = tok - TOK_ASM_es;
329 } else if (tok == TOK_ASM_st) {
330 op->type = OP_ST;
331 op->reg = 0;
332 next();
333 if (tok == '(') {
334 next();
335 if (tok != TOK_PPNUM)
336 goto reg_error;
337 p = tokc.str.data;
338 reg = p[0] - '0';
339 if ((unsigned)reg >= 8 || p[1] != '\0')
340 goto reg_error;
341 op->reg = reg;
342 next();
343 skip(')');
345 if (op->reg == 0)
346 op->type |= OP_ST0;
347 goto no_skip;
348 } else {
349 reg_error:
350 tcc_error("unknown register");
352 next();
353 no_skip: ;
354 } else if (tok == '$') {
355 /* constant value */
356 next();
357 asm_expr(s1, &e);
358 op->type = OP_IM32;
359 op->e = e;
360 if (!op->e.sym) {
361 if (op->e.v == (uint8_t)op->e.v)
362 op->type |= OP_IM8;
363 if (op->e.v == (int8_t)op->e.v)
364 op->type |= OP_IM8S;
365 if (op->e.v == (uint16_t)op->e.v)
366 op->type |= OP_IM16;
367 #ifdef TCC_TARGET_X86_64
368 if (op->e.v != (int32_t)op->e.v)
369 op->type = OP_IM64;
370 #endif
372 } else {
373 /* address(reg,reg2,shift) with all variants */
374 op->type = OP_EA;
375 op->reg = -1;
376 op->reg2 = -1;
377 op->shift = 0;
378 if (tok != '(') {
379 asm_expr(s1, &e);
380 op->e = e;
381 } else {
382 next();
383 if (tok == '%') {
384 unget_tok('(');
385 op->e.v = 0;
386 op->e.sym = NULL;
387 } else {
388 /* bracketed offset expression */
389 asm_expr(s1, &e);
390 if (tok != ')')
391 expect(")");
392 next();
393 op->e.v = e.v;
394 op->e.sym = e.sym;
396 op->e.pcrel = 0;
398 if (tok == '(') {
399 int type = 0;
400 next();
401 if (tok != ',') {
402 op->reg = asm_parse_reg(&type);
404 if (tok == ',') {
405 next();
406 if (tok != ',') {
407 op->reg2 = asm_parse_reg(&type);
409 if (tok == ',') {
410 next();
411 op->shift = get_reg_shift(s1);
414 if (type & OP_EA32)
415 op->type |= OP_EA32;
416 skip(')');
418 if (op->reg == -1 && op->reg2 == -1)
419 op->type |= OP_ADDR;
421 op->type |= indir;
424 /* XXX: unify with C code output ? */
425 ST_FUNC void gen_expr32(ExprValue *pe)
427 if (pe->pcrel)
428 /* If PC-relative, always set VT_SYM, even without symbol,
429 so as to force a relocation to be emitted. */
430 gen_addrpc32(VT_SYM, pe->sym, pe->v);
431 else
432 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
435 #ifdef TCC_TARGET_X86_64
436 static void gen_expr64(ExprValue *pe)
438 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
440 #endif
442 /* XXX: unify with C code output ? */
443 static void gen_disp32(ExprValue *pe)
445 Sym *sym = pe->sym;
446 if (sym && sym->r == cur_text_section->sh_num) {
447 /* same section: we can output an absolute value. Note
448 that the TCC compiler behaves differently here because
449 it always outputs a relocation to ease (future) code
450 elimination in the linker */
451 gen_le32(pe->v + sym->jnext - ind - 4);
452 } else {
453 if (sym && sym->type.t == VT_VOID) {
454 sym->type.t = VT_FUNC;
455 sym->type.ref = NULL;
457 gen_addrpc32(VT_SYM, sym, pe->v);
461 /* generate the modrm operand */
462 static inline int asm_modrm(int reg, Operand *op)
464 int mod, reg1, reg2, sib_reg1;
466 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
467 g(0xc0 + (reg << 3) + op->reg);
468 } else if (op->reg == -1 && op->reg2 == -1) {
469 /* displacement only */
470 #ifdef TCC_TARGET_X86_64
471 g(0x04 + (reg << 3));
472 g(0x25);
473 #else
474 g(0x05 + (reg << 3));
475 #endif
476 gen_expr32(&op->e);
477 #ifdef TCC_TARGET_X86_64
478 } else if (op->reg == 8) {
479 ExprValue *pe = &op->e;
480 g(0x05 + (reg << 3));
481 gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
482 return ind;
483 #endif
484 } else {
485 sib_reg1 = op->reg;
486 /* fist compute displacement encoding */
487 if (sib_reg1 == -1) {
488 sib_reg1 = 5;
489 mod = 0x00;
490 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
491 mod = 0x00;
492 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
493 mod = 0x40;
494 } else {
495 mod = 0x80;
497 /* compute if sib byte needed */
498 reg1 = op->reg;
499 if (op->reg2 != -1)
500 reg1 = 4;
501 g(mod + (reg << 3) + reg1);
502 if (reg1 == 4) {
503 /* add sib byte */
504 reg2 = op->reg2;
505 if (reg2 == -1)
506 reg2 = 4; /* indicate no index */
507 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
509 /* add offset */
510 if (mod == 0x40) {
511 g(op->e.v);
512 } else if (mod == 0x80 || op->reg == -1) {
513 gen_expr32(&op->e);
516 return 0;
519 static void maybe_print_stats (void)
521 static int already = 1;
522 if (!already)
523 /* print stats about opcodes */
525 const struct ASMInstr *pa;
526 int freq[4];
527 int op_vals[500];
528 int nb_op_vals, i, j;
530 already = 1;
531 nb_op_vals = 0;
532 memset(freq, 0, sizeof(freq));
533 for(pa = asm_instrs; pa->sym != 0; pa++) {
534 freq[pa->nb_ops]++;
535 //for(i=0;i<pa->nb_ops;i++) {
536 for(j=0;j<nb_op_vals;j++) {
537 //if (pa->op_type[i] == op_vals[j])
538 if (pa->instr_type == op_vals[j])
539 goto found;
541 //op_vals[nb_op_vals++] = pa->op_type[i];
542 op_vals[nb_op_vals++] = pa->instr_type;
543 found: ;
546 for(i=0;i<nb_op_vals;i++) {
547 int v = op_vals[i];
548 //if ((v & (v - 1)) != 0)
549 printf("%3d: %08x\n", i, v);
551 printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n",
552 (int)sizeof(asm_instrs),
553 (int)sizeof(asm_instrs) / (int)sizeof(ASMInstr),
554 freq[0], freq[1], freq[2], freq[3]);
558 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
560 const ASMInstr *pa;
561 int i, modrm_index, reg, v, op1, seg_prefix, pc;
562 int nb_ops, s;
563 Operand ops[MAX_OPERANDS], *pop;
564 int op_type[3]; /* decoded op type */
565 int alltypes; /* OR of all operand types */
566 int autosize;
567 int p66;
569 maybe_print_stats();
570 /* force synthetic ';' after prefix instruction, so we can handle */
571 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
572 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
573 unget_tok(';');
575 /* get operands */
576 pop = ops;
577 nb_ops = 0;
578 seg_prefix = 0;
579 alltypes = 0;
580 for(;;) {
581 if (tok == ';' || tok == TOK_LINEFEED)
582 break;
583 if (nb_ops >= MAX_OPERANDS) {
584 tcc_error("incorrect number of operands");
586 parse_operand(s1, pop);
587 if (tok == ':') {
588 if (pop->type != OP_SEG || seg_prefix)
589 tcc_error("incorrect prefix");
590 seg_prefix = segment_prefixes[pop->reg];
591 next();
592 parse_operand(s1, pop);
593 if (!(pop->type & OP_EA)) {
594 tcc_error("segment prefix must be followed by memory reference");
597 pop++;
598 nb_ops++;
599 if (tok != ',')
600 break;
601 next();
604 s = 0; /* avoid warning */
606 /* optimize matching by using a lookup table (no hashing is needed
607 !) */
608 for(pa = asm_instrs; pa->sym != 0; pa++) {
609 int it = pa->instr_type & OPCT_MASK;
610 s = 0;
611 if (it == OPC_FARITH) {
612 v = opcode - pa->sym;
613 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
614 continue;
615 } else if (it == OPC_ARITH) {
616 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
617 continue;
618 s = (opcode - pa->sym) % NBWLX;
619 if ((pa->instr_type & OPC_BWLX) == OPC_WLX)
621 /* We need to reject the xxxb opcodes that we accepted above.
622 Note that pa->sym for WLX opcodes is the 'w' token,
623 to get the 'b' token subtract one. */
624 if (((opcode - pa->sym + 1) % NBWLX) == 0)
625 continue;
626 s++;
628 } else if (it == OPC_SHIFT) {
629 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
630 continue;
631 s = (opcode - pa->sym) % NBWLX;
632 } else if (it == OPC_TEST) {
633 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
634 continue;
635 /* cmovxx is a test opcode but accepts multiple sizes.
636 TCC doesn't accept the suffixed mnemonic, instead we
637 simply force size autodetection always. */
638 if (pa->instr_type & OPC_WLX)
639 s = NBWLX - 1;
640 } else if (pa->instr_type & OPC_B) {
641 #ifdef TCC_TARGET_X86_64
642 /* Some instructions don't have the full size but only
643 bwl form. insb e.g. */
644 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
645 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
646 continue;
647 #endif
648 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
649 continue;
650 s = opcode - pa->sym;
651 } else if (pa->instr_type & OPC_WLX) {
652 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
653 continue;
654 s = opcode - pa->sym + 1;
655 } else {
656 if (pa->sym != opcode)
657 continue;
659 if (pa->nb_ops != nb_ops)
660 continue;
661 #ifdef TCC_TARGET_X86_64
662 /* Special case for moves. Selecting the IM64->REG64 form
663 should only be done if we really have an >32bit imm64, and that
664 is hardcoded. Ignore it here. */
665 if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
666 && ops[1].type == OP_REG64
667 && !(pa->instr_type & OPC_0F))
668 continue;
669 #endif
670 /* now decode and check each operand */
671 alltypes = 0;
672 for(i = 0; i < nb_ops; i++) {
673 int op1, op2;
674 op1 = pa->op_type[i];
675 op2 = op1 & 0x1f;
676 switch(op2) {
677 case OPT_IM:
678 v = OP_IM8 | OP_IM16 | OP_IM32;
679 break;
680 case OPT_REG:
681 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
682 break;
683 case OPT_REGW:
684 v = OP_REG16 | OP_REG32 | OP_REG64;
685 break;
686 case OPT_IMW:
687 v = OP_IM16 | OP_IM32;
688 break;
689 case OPT_MMXSSE:
690 v = OP_MMX | OP_SSE;
691 break;
692 case OPT_DISP:
693 case OPT_DISP8:
694 v = OP_ADDR;
695 break;
696 default:
697 v = 1 << op2;
698 break;
700 if (op1 & OPT_EA)
701 v |= OP_EA;
702 op_type[i] = v;
703 if ((ops[i].type & v) == 0)
704 goto next;
705 alltypes |= ops[i].type;
707 /* all is matching ! */
708 break;
709 next: ;
711 if (pa->sym == 0) {
712 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
713 int b;
714 b = op0_codes[opcode - TOK_ASM_first];
715 if (b & 0xff00)
716 g(b >> 8);
717 g(b);
718 return;
719 } else if (opcode <= TOK_ASM_alllast) {
720 tcc_error("bad operand with opcode '%s'",
721 get_tok_str(opcode, NULL));
722 } else {
723 tcc_error("unknown opcode '%s'",
724 get_tok_str(opcode, NULL));
727 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
728 autosize = NBWLX-1;
729 #ifdef TCC_TARGET_X86_64
730 /* XXX the autosize should rather be zero, to not have to adjust this
731 all the time. */
732 if ((pa->instr_type & OPC_BWLQ) == OPC_B)
733 autosize = NBWLX-2;
734 #endif
735 if (s == autosize) {
736 for(i = 0; s == autosize && i < nb_ops; i++) {
737 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
738 s = reg_to_size[ops[i].type & OP_REG];
740 if (s == autosize) {
741 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
742 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
743 s = 2;
744 else
745 tcc_error("cannot infer opcode suffix");
749 #ifdef TCC_TARGET_X86_64
750 /* Generate addr32 prefix if needed */
751 for(i = 0; i < nb_ops; i++) {
752 if (ops[i].type & OP_EA32) {
753 g(0x67);
754 break;
757 #endif
758 /* generate data16 prefix if needed */
759 p66 = 0;
760 if (s == 1)
761 p66 = 1;
762 else {
763 /* accepting mmx+sse in all operands --> needs 0x66 to
764 switch to sse mode. Accepting only sse in an operand --> is
765 already SSE insn and needs 0x66/f2/f3 handling. */
766 for (i = 0; i < nb_ops; i++)
767 if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
768 && ops[i].type & OP_SSE)
769 p66 = 1;
771 if (p66)
772 g(0x66);
773 #ifdef TCC_TARGET_X86_64
774 if (s == 3 || (alltypes & OP_REG64)) {
775 /* generate REX prefix */
776 int default64 = 0;
777 for(i = 0; i < nb_ops; i++) {
778 if (op_type[i] == OP_REG64) {
779 /* If only 64bit regs are accepted in one operand
780 this is a default64 instruction without need for
781 REX prefixes. */
782 default64 = 1;
783 break;
786 /* XXX find better encoding for the default64 instructions. */
787 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
788 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
789 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
790 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
791 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
792 && !default64)
793 g(0x48);
795 #endif
797 /* now generates the operation */
798 if (OPCT_IS(pa->instr_type, OPC_FWAIT))
799 g(0x9b);
800 if (seg_prefix)
801 g(seg_prefix);
803 v = pa->opcode;
804 if (pa->instr_type & OPC_0F)
805 v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff);
806 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
807 /* kludge for imul $im, %reg */
808 nb_ops = 3;
809 ops[2] = ops[1];
810 op_type[2] = op_type[1];
811 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
812 v--; /* int $3 case */
813 nb_ops = 0;
814 } else if ((v == 0x06 || v == 0x07)) {
815 if (ops[0].reg >= 4) {
816 /* push/pop %fs or %gs */
817 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
818 } else {
819 v += ops[0].reg << 3;
821 nb_ops = 0;
822 } else if (v <= 0x05) {
823 /* arith case */
824 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
825 } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) {
826 /* fpu arith case */
827 v += ((opcode - pa->sym) / 6) << 3;
829 if (pa->instr_type & OPC_REG) {
830 /* mov $im, %reg case */
831 if (v == 0xb0 && s >= 1)
832 v += 7;
833 for(i = 0; i < nb_ops; i++) {
834 if (op_type[i] & (OP_REG | OP_ST)) {
835 v += ops[i].reg;
836 break;
840 if (pa->instr_type & OPC_B)
841 v += s >= 1;
842 if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) {
843 Sym *sym;
844 int jmp_disp;
846 /* see if we can really generate the jump with a byte offset */
847 sym = ops[0].e.sym;
848 if (!sym)
849 goto no_short_jump;
850 if (sym->r != cur_text_section->sh_num)
851 goto no_short_jump;
852 jmp_disp = ops[0].e.v + sym->jnext - ind - 2 - (v >= 0xff);
853 if (jmp_disp == (int8_t)jmp_disp) {
854 /* OK to generate jump */
855 ops[0].e.sym = 0;
856 ops[0].e.v = jmp_disp;
857 op_type[0] = OP_IM8S;
858 } else {
859 no_short_jump:
860 /* long jump will be allowed. need to modify the
861 opcode slightly */
862 if (v == 0xeb) /* jmp */
863 v = 0xe9;
864 else if (v == 0x70) /* jcc */
865 v += 0x0f10;
866 else
867 tcc_error("invalid displacement");
870 if (OPCT_IS(pa->instr_type, OPC_TEST))
871 v += test_bits[opcode - pa->sym];
872 op1 = v >> 16;
873 if (op1)
874 g(op1);
875 op1 = (v >> 8) & 0xff;
876 if (op1)
877 g(op1);
878 g(v);
880 /* search which operand will used for modrm */
881 modrm_index = 0;
882 if (OPCT_IS(pa->instr_type, OPC_SHIFT)) {
883 reg = (opcode - pa->sym) / NBWLX;
884 if (reg == 6)
885 reg = 7;
886 } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) {
887 reg = (opcode - pa->sym) / NBWLX;
888 } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) {
889 reg = (opcode - pa->sym) / 6;
890 } else {
891 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
894 pc = 0;
895 if (pa->instr_type & OPC_MODRM) {
896 /* first look for an ea operand */
897 for(i = 0;i < nb_ops; i++) {
898 if (op_type[i] & OP_EA)
899 goto modrm_found;
901 /* then if not found, a register or indirection (shift instructions) */
902 for(i = 0;i < nb_ops; i++) {
903 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
904 goto modrm_found;
906 #ifdef ASM_DEBUG
907 tcc_error("bad op table");
908 #endif
909 modrm_found:
910 modrm_index = i;
911 /* if a register is used in another operand then it is
912 used instead of group */
913 for(i = 0;i < nb_ops; i++) {
914 v = op_type[i];
915 if (i != modrm_index &&
916 (v & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
917 reg = ops[i].reg;
918 break;
921 pc = asm_modrm(reg, &ops[modrm_index]);
924 /* emit constants */
925 #ifndef TCC_TARGET_X86_64
926 if (!(pa->instr_type & OPC_0F)
927 && (pa->opcode == 0x9a || pa->opcode == 0xea)) {
928 /* ljmp or lcall kludge */
929 gen_expr32(&ops[1].e);
930 if (ops[0].e.sym)
931 tcc_error("cannot relocate");
932 gen_le16(ops[0].e.v);
933 return;
935 #endif
936 for(i = 0;i < nb_ops; i++) {
937 v = op_type[i];
938 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
939 /* if multiple sizes are given it means we must look
940 at the op size */
941 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
942 if (s == 0)
943 v = OP_IM8;
944 else if (s == 1)
945 v = OP_IM16;
946 else if (s == 2 || (v & OP_IM64) == 0)
947 v = OP_IM32;
948 else
949 v = OP_IM64;
952 if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym)
953 tcc_error("cannot relocate");
955 if (v & (OP_IM8 | OP_IM8S)) {
956 g(ops[i].e.v);
957 } else if (v & OP_IM16) {
958 gen_le16(ops[i].e.v);
959 #ifdef TCC_TARGET_X86_64
960 } else if (v & OP_IM64) {
961 gen_expr64(&ops[i].e);
962 #endif
963 } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) {
964 gen_disp32(&ops[i].e);
965 } else {
966 gen_expr32(&ops[i].e);
971 /* after immediate operands, adjust pc-relative address */
972 if (pc)
973 add32le(text_section->data + pc - 4, pc - ind);
976 /* return the constraint priority (we allocate first the lowest
977 numbered constraints) */
978 static inline int constraint_priority(const char *str)
980 int priority, c, pr;
982 /* we take the lowest priority */
983 priority = 0;
984 for(;;) {
985 c = *str;
986 if (c == '\0')
987 break;
988 str++;
989 switch(c) {
990 case 'A':
991 pr = 0;
992 break;
993 case 'a':
994 case 'b':
995 case 'c':
996 case 'd':
997 case 'S':
998 case 'D':
999 pr = 1;
1000 break;
1001 case 'q':
1002 pr = 2;
1003 break;
1004 case 'r':
1005 case 'p':
1006 pr = 3;
1007 break;
1008 case 'N':
1009 case 'M':
1010 case 'I':
1011 case 'i':
1012 case 'm':
1013 case 'g':
1014 pr = 4;
1015 break;
1016 default:
1017 tcc_error("unknown constraint '%c'", c);
1018 pr = 0;
1020 if (pr > priority)
1021 priority = pr;
1023 return priority;
1026 static const char *skip_constraint_modifiers(const char *p)
1028 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
1029 p++;
1030 return p;
1033 #define REG_OUT_MASK 0x01
1034 #define REG_IN_MASK 0x02
1036 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
1038 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
1039 int nb_operands, int nb_outputs,
1040 const uint8_t *clobber_regs,
1041 int *pout_reg)
1043 ASMOperand *op;
1044 int sorted_op[MAX_ASM_OPERANDS];
1045 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
1046 const char *str;
1047 uint8_t regs_allocated[NB_ASM_REGS];
1049 /* init fields */
1050 for(i=0;i<nb_operands;i++) {
1051 op = &operands[i];
1052 op->input_index = -1;
1053 op->ref_index = -1;
1054 op->reg = -1;
1055 op->is_memory = 0;
1056 op->is_rw = 0;
1058 /* compute constraint priority and evaluate references to output
1059 constraints if input constraints */
1060 for(i=0;i<nb_operands;i++) {
1061 op = &operands[i];
1062 str = op->constraint;
1063 str = skip_constraint_modifiers(str);
1064 if (isnum(*str) || *str == '[') {
1065 /* this is a reference to another constraint */
1066 k = find_constraint(operands, nb_operands, str, NULL);
1067 if ((unsigned)k >= i || i < nb_outputs)
1068 tcc_error("invalid reference in constraint %d ('%s')",
1069 i, str);
1070 op->ref_index = k;
1071 if (operands[k].input_index >= 0)
1072 tcc_error("cannot reference twice the same operand");
1073 operands[k].input_index = i;
1074 op->priority = 5;
1075 } else {
1076 op->priority = constraint_priority(str);
1080 /* sort operands according to their priority */
1081 for(i=0;i<nb_operands;i++)
1082 sorted_op[i] = i;
1083 for(i=0;i<nb_operands - 1;i++) {
1084 for(j=i+1;j<nb_operands;j++) {
1085 p1 = operands[sorted_op[i]].priority;
1086 p2 = operands[sorted_op[j]].priority;
1087 if (p2 < p1) {
1088 tmp = sorted_op[i];
1089 sorted_op[i] = sorted_op[j];
1090 sorted_op[j] = tmp;
1095 for(i = 0;i < NB_ASM_REGS; i++) {
1096 if (clobber_regs[i])
1097 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
1098 else
1099 regs_allocated[i] = 0;
1101 /* esp cannot be used */
1102 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1103 /* ebp cannot be used yet */
1104 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1106 /* allocate registers and generate corresponding asm moves */
1107 for(i=0;i<nb_operands;i++) {
1108 j = sorted_op[i];
1109 op = &operands[j];
1110 str = op->constraint;
1111 /* no need to allocate references */
1112 if (op->ref_index >= 0)
1113 continue;
1114 /* select if register is used for output, input or both */
1115 if (op->input_index >= 0) {
1116 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1117 } else if (j < nb_outputs) {
1118 reg_mask = REG_OUT_MASK;
1119 } else {
1120 reg_mask = REG_IN_MASK;
1122 try_next:
1123 c = *str++;
1124 switch(c) {
1125 case '=':
1126 goto try_next;
1127 case '+':
1128 op->is_rw = 1;
1129 /* FALL THRU */
1130 case '&':
1131 if (j >= nb_outputs)
1132 tcc_error("'%c' modifier can only be applied to outputs", c);
1133 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1134 goto try_next;
1135 case 'A':
1136 /* allocate both eax and edx */
1137 if (is_reg_allocated(TREG_XAX) ||
1138 is_reg_allocated(TREG_XDX))
1139 goto try_next;
1140 op->is_llong = 1;
1141 op->reg = TREG_XAX;
1142 regs_allocated[TREG_XAX] |= reg_mask;
1143 regs_allocated[TREG_XDX] |= reg_mask;
1144 break;
1145 case 'a':
1146 reg = TREG_XAX;
1147 goto alloc_reg;
1148 case 'b':
1149 reg = 3;
1150 goto alloc_reg;
1151 case 'c':
1152 reg = TREG_XCX;
1153 goto alloc_reg;
1154 case 'd':
1155 reg = TREG_XDX;
1156 goto alloc_reg;
1157 case 'S':
1158 reg = 6;
1159 goto alloc_reg;
1160 case 'D':
1161 reg = 7;
1162 alloc_reg:
1163 if (is_reg_allocated(reg))
1164 goto try_next;
1165 goto reg_found;
1166 case 'q':
1167 /* eax, ebx, ecx or edx */
1168 for(reg = 0; reg < 4; reg++) {
1169 if (!is_reg_allocated(reg))
1170 goto reg_found;
1172 goto try_next;
1173 case 'r':
1174 case 'p': /* A general address, for x86(64) any register is acceptable*/
1175 /* any general register */
1176 for(reg = 0; reg < 8; reg++) {
1177 if (!is_reg_allocated(reg))
1178 goto reg_found;
1180 goto try_next;
1181 reg_found:
1182 /* now we can reload in the register */
1183 op->is_llong = 0;
1184 op->reg = reg;
1185 regs_allocated[reg] |= reg_mask;
1186 break;
1187 case 'i':
1188 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1189 goto try_next;
1190 break;
1191 case 'I':
1192 case 'N':
1193 case 'M':
1194 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1195 goto try_next;
1196 break;
1197 case 'm':
1198 case 'g':
1199 /* nothing special to do because the operand is already in
1200 memory, except if the pointer itself is stored in a
1201 memory variable (VT_LLOCAL case) */
1202 /* XXX: fix constant case */
1203 /* if it is a reference to a memory zone, it must lie
1204 in a register, so we reserve the register in the
1205 input registers and a load will be generated
1206 later */
1207 if (j < nb_outputs || c == 'm') {
1208 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1209 /* any general register */
1210 for(reg = 0; reg < 8; reg++) {
1211 if (!(regs_allocated[reg] & REG_IN_MASK))
1212 goto reg_found1;
1214 goto try_next;
1215 reg_found1:
1216 /* now we can reload in the register */
1217 regs_allocated[reg] |= REG_IN_MASK;
1218 op->reg = reg;
1219 op->is_memory = 1;
1222 break;
1223 default:
1224 tcc_error("asm constraint %d ('%s') could not be satisfied",
1225 j, op->constraint);
1226 break;
1228 /* if a reference is present for that operand, we assign it too */
1229 if (op->input_index >= 0) {
1230 operands[op->input_index].reg = op->reg;
1231 operands[op->input_index].is_llong = op->is_llong;
1235 /* compute out_reg. It is used to store outputs registers to memory
1236 locations references by pointers (VT_LLOCAL case) */
1237 *pout_reg = -1;
1238 for(i=0;i<nb_operands;i++) {
1239 op = &operands[i];
1240 if (op->reg >= 0 &&
1241 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1242 !op->is_memory) {
1243 for(reg = 0; reg < 8; reg++) {
1244 if (!(regs_allocated[reg] & REG_OUT_MASK))
1245 goto reg_found2;
1247 tcc_error("could not find free output register for reloading");
1248 reg_found2:
1249 *pout_reg = reg;
1250 break;
1254 /* print sorted constraints */
1255 #ifdef ASM_DEBUG
1256 for(i=0;i<nb_operands;i++) {
1257 j = sorted_op[i];
1258 op = &operands[j];
1259 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1261 op->id ? get_tok_str(op->id, NULL) : "",
1262 op->constraint,
1263 op->vt->r,
1264 op->reg);
1266 if (*pout_reg >= 0)
1267 printf("out_reg=%d\n", *pout_reg);
1268 #endif
1271 ST_FUNC void subst_asm_operand(CString *add_str,
1272 SValue *sv, int modifier)
1274 int r, reg, size, val;
1275 char buf[64];
1277 r = sv->r;
1278 if ((r & VT_VALMASK) == VT_CONST) {
1279 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n')
1280 cstr_ccat(add_str, '$');
1281 if (r & VT_SYM) {
1282 cstr_cat(add_str, get_tok_str(sv->sym->v, NULL), -1);
1283 if ((uint32_t)sv->c.i == 0)
1284 goto no_offset;
1285 cstr_ccat(add_str, '+');
1287 val = sv->c.i;
1288 if (modifier == 'n')
1289 val = -val;
1290 snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
1291 cstr_cat(add_str, buf, -1);
1292 no_offset:;
1293 #ifdef TCC_TARGET_X86_64
1294 if (r & VT_LVAL)
1295 cstr_cat(add_str, "(%rip)", -1);
1296 #endif
1297 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1298 #ifdef TCC_TARGET_X86_64
1299 snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i);
1300 #else
1301 snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
1302 #endif
1303 cstr_cat(add_str, buf, -1);
1304 } else if (r & VT_LVAL) {
1305 reg = r & VT_VALMASK;
1306 if (reg >= VT_CONST)
1307 tcc_error("internal compiler error");
1308 snprintf(buf, sizeof(buf), "(%%%s)",
1309 #ifdef TCC_TARGET_X86_64
1310 get_tok_str(TOK_ASM_rax + reg, NULL)
1311 #else
1312 get_tok_str(TOK_ASM_eax + reg, NULL)
1313 #endif
1315 cstr_cat(add_str, buf, -1);
1316 } else {
1317 /* register case */
1318 reg = r & VT_VALMASK;
1319 if (reg >= VT_CONST)
1320 tcc_error("internal compiler error");
1322 /* choose register operand size */
1323 if ((sv->type.t & VT_BTYPE) == VT_BYTE)
1324 size = 1;
1325 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1326 size = 2;
1327 #ifdef TCC_TARGET_X86_64
1328 else if ((sv->type.t & VT_BTYPE) == VT_LLONG)
1329 size = 8;
1330 #endif
1331 else
1332 size = 4;
1333 if (size == 1 && reg >= 4)
1334 size = 4;
1336 if (modifier == 'b') {
1337 if (reg >= 4)
1338 tcc_error("cannot use byte register");
1339 size = 1;
1340 } else if (modifier == 'h') {
1341 if (reg >= 4)
1342 tcc_error("cannot use byte register");
1343 size = -1;
1344 } else if (modifier == 'w') {
1345 size = 2;
1346 } else if (modifier == 'k') {
1347 size = 4;
1348 #ifdef TCC_TARGET_X86_64
1349 } else if (modifier == 'q') {
1350 size = 8;
1351 #endif
1354 switch(size) {
1355 case -1:
1356 reg = TOK_ASM_ah + reg;
1357 break;
1358 case 1:
1359 reg = TOK_ASM_al + reg;
1360 break;
1361 case 2:
1362 reg = TOK_ASM_ax + reg;
1363 break;
1364 default:
1365 reg = TOK_ASM_eax + reg;
1366 break;
1367 #ifdef TCC_TARGET_X86_64
1368 case 8:
1369 reg = TOK_ASM_rax + reg;
1370 break;
1371 #endif
1373 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1374 cstr_cat(add_str, buf, -1);
1378 /* generate prolog and epilog code for asm statement */
1379 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1380 int nb_outputs, int is_output,
1381 uint8_t *clobber_regs,
1382 int out_reg)
1384 uint8_t regs_allocated[NB_ASM_REGS];
1385 ASMOperand *op;
1386 int i, reg;
1387 static uint8_t reg_saved[NB_SAVED_REGS] = { 3, 6, 7 };
1389 /* mark all used registers */
1390 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1391 for(i = 0; i < nb_operands;i++) {
1392 op = &operands[i];
1393 if (op->reg >= 0)
1394 regs_allocated[op->reg] = 1;
1396 if (!is_output) {
1397 /* generate reg save code */
1398 for(i = 0; i < NB_SAVED_REGS; i++) {
1399 reg = reg_saved[i];
1400 if (regs_allocated[reg]) {
1401 g(0x50 + reg);
1405 /* generate load code */
1406 for(i = 0; i < nb_operands; i++) {
1407 op = &operands[i];
1408 if (op->reg >= 0) {
1409 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1410 op->is_memory) {
1411 /* memory reference case (for both input and
1412 output cases) */
1413 SValue sv;
1414 sv = *op->vt;
1415 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL;
1416 sv.type.t = VT_PTR;
1417 load(op->reg, &sv);
1418 } else if (i >= nb_outputs || op->is_rw) {
1419 /* load value in register */
1420 load(op->reg, op->vt);
1421 if (op->is_llong) {
1422 SValue sv;
1423 sv = *op->vt;
1424 sv.c.i += 4;
1425 load(TREG_XDX, &sv);
1430 } else {
1431 /* generate save code */
1432 for(i = 0 ; i < nb_outputs; i++) {
1433 op = &operands[i];
1434 if (op->reg >= 0) {
1435 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1436 if (!op->is_memory) {
1437 SValue sv;
1438 sv = *op->vt;
1439 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1440 load(out_reg, &sv);
1442 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1443 store(op->reg, &sv);
1445 } else {
1446 store(op->reg, op->vt);
1447 if (op->is_llong) {
1448 SValue sv;
1449 sv = *op->vt;
1450 sv.c.i += 4;
1451 store(TREG_XDX, &sv);
1456 /* generate reg restore code */
1457 for(i = NB_SAVED_REGS - 1; i >= 0; i--) {
1458 reg = reg_saved[i];
1459 if (regs_allocated[reg]) {
1460 g(0x58 + reg);
1466 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1468 int reg;
1469 TokenSym *ts;
1471 if (!strcmp(str, "memory") ||
1472 !strcmp(str, "cc"))
1473 return;
1474 ts = tok_alloc(str, strlen(str));
1475 reg = ts->tok;
1476 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1477 reg -= TOK_ASM_eax;
1478 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1479 reg -= TOK_ASM_ax;
1480 #ifdef TCC_TARGET_X86_64
1481 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1482 reg -= TOK_ASM_rax;
1483 #endif
1484 } else {
1485 tcc_error("invalid clobber register '%s'", str);
1487 clobber_regs[reg] = 1;