Optimize vswap()
[tinycc.git] / i386-asm.c
blob664aadebdb6c16cbc31de7012b5dcdc4bc2a22f7
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "tcc.h"
24 // #define NB_ASM_REGS 8
25 #define MAX_OPERANDS 3
26 #define NB_SAVED_REGS 3
28 #define TOK_ASM_first TOK_ASM_clc
29 #define TOK_ASM_last TOK_ASM_emms
31 #define OPC_JMP 0x01 /* jmp operand */
32 #define OPC_B 0x02 /* only used with OPC_WL */
33 #define OPC_WL 0x04 /* accepts w, l or no suffix */
34 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
35 #define OPC_REG 0x08 /* register is added to opcode */
36 #define OPC_MODRM 0x10 /* modrm encoding */
37 #define OPC_FWAIT 0x20 /* add fwait opcode */
38 #define OPC_TEST 0x40 /* test opcodes */
39 #define OPC_SHIFT 0x80 /* shift opcodes */
40 #define OPC_D16 0x0100 /* generate data16 prefix */
41 #define OPC_ARITH 0x0200 /* arithmetic opcodes */
42 #define OPC_SHORTJMP 0x0400 /* short jmp operand */
43 #define OPC_FARITH 0x0800 /* FPU arithmetic opcodes */
44 #ifdef TCC_TARGET_X86_64
45 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
46 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
47 # define OPC_WLX OPC_WLQ
48 #else
49 # define OPC_WLX OPC_WL
50 #endif
52 #define OPC_GROUP_SHIFT 13
54 /* in order to compress the operand type, we use specific operands and
55 we or only with EA */
56 enum {
57 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
58 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
59 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
60 #ifdef TCC_TARGET_X86_64
61 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
62 #endif
63 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
64 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
65 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
66 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
67 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
68 OPT_SEG,
69 OPT_ST,
70 OPT_IM8,
71 OPT_IM8S,
72 OPT_IM16,
73 OPT_IM32,
74 #ifdef TCC_TARGET_X86_64
75 OPT_IM64,
76 #endif
77 OPT_EAX, /* %al, %ax, %eax or %rax register */
78 OPT_ST0, /* %st(0) register */
79 OPT_CL, /* %cl register */
80 OPT_DX, /* %dx register */
81 OPT_ADDR, /* OP_EA with only offset */
82 OPT_INDIR, /* *(expr) */
83 /* composite types */
84 OPT_COMPOSITE_FIRST,
85 OPT_IM, /* IM8 | IM16 | IM32 | IM64 */
86 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
87 OPT_REGW, /* REG16 | REG32 | REG64 */
88 OPT_IMW, /* IM16 | IM32 | IM64 */
89 #ifdef TCC_TARGET_X86_64
90 OPT_IMNO64, /* IM16 | IM32 */
91 #endif
92 /* can be ored with any OPT_xxx */
93 OPT_EA = 0x80
96 #define OP_REG8 (1 << OPT_REG8)
97 #define OP_REG16 (1 << OPT_REG16)
98 #define OP_REG32 (1 << OPT_REG32)
99 #define OP_MMX (1 << OPT_MMX)
100 #define OP_SSE (1 << OPT_SSE)
101 #define OP_CR (1 << OPT_CR)
102 #define OP_TR (1 << OPT_TR)
103 #define OP_DB (1 << OPT_DB)
104 #define OP_SEG (1 << OPT_SEG)
105 #define OP_ST (1 << OPT_ST)
106 #define OP_IM8 (1 << OPT_IM8)
107 #define OP_IM8S (1 << OPT_IM8S)
108 #define OP_IM16 (1 << OPT_IM16)
109 #define OP_IM32 (1 << OPT_IM32)
110 #define OP_EAX (1 << OPT_EAX)
111 #define OP_ST0 (1 << OPT_ST0)
112 #define OP_CL (1 << OPT_CL)
113 #define OP_DX (1 << OPT_DX)
114 #define OP_ADDR (1 << OPT_ADDR)
115 #define OP_INDIR (1 << OPT_INDIR)
116 #ifdef TCC_TARGET_X86_64
117 # define OP_REG64 (1 << OPT_REG64)
118 # define OP_IM64 (1 << OPT_IM64)
119 #else
120 # define OP_REG64 0
121 # define OP_IM64 0
122 #endif
124 #define OP_EA 0x40000000
125 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
127 #ifdef TCC_TARGET_X86_64
128 # define OP_IM OP_IM64
129 # define TREG_XAX TREG_RAX
130 # define TREG_XCX TREG_RCX
131 # define TREG_XDX TREG_RDX
132 #else
133 # define OP_IM OP_IM32
134 # define TREG_XAX TREG_EAX
135 # define TREG_XCX TREG_ECX
136 # define TREG_XDX TREG_EDX
137 #endif
139 typedef struct ASMInstr {
140 uint16_t sym;
141 uint16_t opcode;
142 uint16_t instr_type;
143 uint8_t nb_ops;
144 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
145 } ASMInstr;
147 typedef struct Operand {
148 uint32_t type;
149 int8_t reg; /* register, -1 if none */
150 int8_t reg2; /* second register, -1 if none */
151 uint8_t shift;
152 ExprValue e;
153 } Operand;
155 static const uint8_t reg_to_size[9] = {
157 [OP_REG8] = 0,
158 [OP_REG16] = 1,
159 [OP_REG32] = 2,
160 #ifdef TCC_TARGET_X86_64
161 [OP_REG64] = 3,
162 #endif
164 0, 0, 1, 0, 2, 0, 0, 0, 3
167 #define NB_TEST_OPCODES 30
169 static const uint8_t test_bits[NB_TEST_OPCODES] = {
170 0x00, /* o */
171 0x01, /* no */
172 0x02, /* b */
173 0x02, /* c */
174 0x02, /* nae */
175 0x03, /* nb */
176 0x03, /* nc */
177 0x03, /* ae */
178 0x04, /* e */
179 0x04, /* z */
180 0x05, /* ne */
181 0x05, /* nz */
182 0x06, /* be */
183 0x06, /* na */
184 0x07, /* nbe */
185 0x07, /* a */
186 0x08, /* s */
187 0x09, /* ns */
188 0x0a, /* p */
189 0x0a, /* pe */
190 0x0b, /* np */
191 0x0b, /* po */
192 0x0c, /* l */
193 0x0c, /* nge */
194 0x0d, /* nl */
195 0x0d, /* ge */
196 0x0e, /* le */
197 0x0e, /* ng */
198 0x0f, /* nle */
199 0x0f, /* g */
202 static const uint8_t segment_prefixes[] = {
203 0x26, /* es */
204 0x2e, /* cs */
205 0x36, /* ss */
206 0x3e, /* ds */
207 0x64, /* fs */
208 0x65 /* gs */
211 static const ASMInstr asm_instrs[] = {
212 #define ALT(x) x
213 #define DEF_ASM_OP0(name, opcode)
214 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 0 },
215 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 1, { op0 }},
216 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 2, { op0, op1 }},
217 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 3, { op0, op1, op2 }},
218 #ifdef TCC_TARGET_X86_64
219 # include "x86_64-asm.h"
220 #else
221 # include "i386-asm.h"
222 #endif
223 /* last operation */
224 { 0, },
227 static const uint16_t op0_codes[] = {
228 #define ALT(x)
229 #define DEF_ASM_OP0(x, opcode) opcode,
230 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
231 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
232 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
233 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
234 #ifdef TCC_TARGET_X86_64
235 # include "x86_64-asm.h"
236 #else
237 # include "i386-asm.h"
238 #endif
241 static inline int get_reg_shift(TCCState *s1)
243 int shift, v;
244 #ifdef I386_ASM_16
245 if (s1->seg_size == 16)
246 tcc_error("invalid effective address");
247 #endif
248 v = asm_int_expr(s1);
249 switch(v) {
250 case 1:
251 shift = 0;
252 break;
253 case 2:
254 shift = 1;
255 break;
256 case 4:
257 shift = 2;
258 break;
259 case 8:
260 shift = 3;
261 break;
262 default:
263 expect("1, 2, 4 or 8 constant");
264 shift = 0;
265 break;
267 return shift;
270 static int asm_parse_reg(void)
272 int reg = 0;
273 if (tok != '%')
274 goto error_32;
275 next();
276 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
277 reg = tok - TOK_ASM_eax;
278 #ifdef TCC_TARGET_X86_64
279 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
280 reg = tok - TOK_ASM_rax;
281 #endif
282 #ifdef I386_ASM_16
283 } else if (tok >= TOK_ASM_ax && tok <= TOK_ASM_di) {
284 reg = tok - TOK_ASM_ax;
285 #endif
286 } else {
287 error_32:
288 expect("register");
290 next();
291 return reg;
294 static void parse_operand(TCCState *s1, Operand *op)
296 ExprValue e;
297 int reg, indir;
298 const char *p;
300 indir = 0;
301 if (tok == '*') {
302 next();
303 indir = OP_INDIR;
306 if (tok == '%') {
307 next();
308 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
309 reg = tok - TOK_ASM_al;
310 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
311 op->reg = reg & 7;
312 if ((op->type & OP_REG) && op->reg == TREG_XAX)
313 op->type |= OP_EAX;
314 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
315 op->type |= OP_CL;
316 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
317 op->type |= OP_DX;
318 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
319 op->type = OP_DB;
320 op->reg = tok - TOK_ASM_dr0;
321 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
322 op->type = OP_SEG;
323 op->reg = tok - TOK_ASM_es;
324 } else if (tok == TOK_ASM_st) {
325 op->type = OP_ST;
326 op->reg = 0;
327 next();
328 if (tok == '(') {
329 next();
330 if (tok != TOK_PPNUM)
331 goto reg_error;
332 p = tokc.cstr->data;
333 reg = p[0] - '0';
334 if ((unsigned)reg >= 8 || p[1] != '\0')
335 goto reg_error;
336 op->reg = reg;
337 next();
338 skip(')');
340 if (op->reg == 0)
341 op->type |= OP_ST0;
342 goto no_skip;
343 } else {
344 reg_error:
345 tcc_error("unknown register");
347 next();
348 no_skip: ;
349 } else if (tok == '$') {
350 /* constant value */
351 next();
352 asm_expr(s1, &e);
353 op->type = OP_IM;
354 op->e.v = e.v;
355 op->e.sym = e.sym;
356 if (!op->e.sym) {
357 if (op->e.v == (uint8_t)op->e.v)
358 op->type |= OP_IM8;
359 if (op->e.v == (int8_t)op->e.v)
360 op->type |= OP_IM8S;
361 if (op->e.v == (uint16_t)op->e.v)
362 op->type |= OP_IM16;
363 #ifdef TCC_TARGET_X86_64
364 if (op->e.v == (uint32_t)op->e.v)
365 op->type |= OP_IM32;
366 #endif
368 } else {
369 /* address(reg,reg2,shift) with all variants */
370 op->type = OP_EA;
371 op->reg = -1;
372 op->reg2 = -1;
373 op->shift = 0;
374 if (tok != '(') {
375 asm_expr(s1, &e);
376 op->e.v = e.v;
377 op->e.sym = e.sym;
378 } else {
379 next();
380 if (tok == '%') {
381 unget_tok('(');
382 op->e.v = 0;
383 op->e.sym = NULL;
384 } else {
385 /* bracketed offset expression */
386 asm_expr(s1, &e);
387 if (tok != ')')
388 expect(")");
389 next();
390 op->e.v = e.v;
391 op->e.sym = e.sym;
394 if (tok == '(') {
395 next();
396 if (tok != ',') {
397 op->reg = asm_parse_reg();
399 if (tok == ',') {
400 next();
401 if (tok != ',') {
402 op->reg2 = asm_parse_reg();
404 if (tok == ',') {
405 next();
406 op->shift = get_reg_shift(s1);
409 skip(')');
411 if (op->reg == -1 && op->reg2 == -1)
412 op->type |= OP_ADDR;
414 op->type |= indir;
417 /* XXX: unify with C code output ? */
418 ST_FUNC void gen_expr32(ExprValue *pe)
420 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
423 #ifdef TCC_TARGET_X86_64
424 static void gen_expr64(ExprValue *pe)
426 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
428 #endif
430 /* XXX: unify with C code output ? */
431 static void gen_disp32(ExprValue *pe)
433 Sym *sym = pe->sym;
434 if (sym && sym->r == cur_text_section->sh_num) {
435 /* same section: we can output an absolute value. Note
436 that the TCC compiler behaves differently here because
437 it always outputs a relocation to ease (future) code
438 elimination in the linker */
439 gen_le32(pe->v + sym->jnext - ind - 4);
440 } else {
441 if (sym && sym->type.t == VT_VOID) {
442 sym->type.t = VT_FUNC;
443 sym->type.ref = NULL;
445 gen_addrpc32(VT_SYM, sym, pe->v);
449 #ifdef I386_ASM_16
450 static void gen_expr16(ExprValue *pe)
452 if (pe->sym)
453 greloc(cur_text_section, pe->sym, ind, R_386_16);
454 gen_le16(pe->v);
456 static void gen_disp16(ExprValue *pe)
458 Sym *sym;
459 sym = pe->sym;
460 if (sym) {
461 if (sym->r == cur_text_section->sh_num) {
462 /* same section: we can output an absolute value. Note
463 that the TCC compiler behaves differently here because
464 it always outputs a relocation to ease (future) code
465 elimination in the linker */
466 gen_le16(pe->v + sym->jnext - ind - 2);
467 } else {
468 greloc(cur_text_section, sym, ind, R_386_PC16);
469 gen_le16(pe->v - 2);
471 } else {
472 /* put an empty PC32 relocation */
473 put_elf_reloc(symtab_section, cur_text_section,
474 ind, R_386_PC16, 0);
475 gen_le16(pe->v - 2);
478 #endif
480 /* generate the modrm operand */
481 static inline void asm_modrm(int reg, Operand *op)
483 int mod, reg1, reg2, sib_reg1;
485 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
486 g(0xc0 + (reg << 3) + op->reg);
487 } else if (op->reg == -1 && op->reg2 == -1) {
488 /* displacement only */
489 #ifdef I386_ASM_16
490 if (tcc_state->seg_size == 16) {
491 g(0x06 + (reg << 3));
492 gen_expr16(&op->e);
493 } else if (tcc_state->seg_size == 32)
494 #endif
496 g(0x05 + (reg << 3));
497 gen_expr32(&op->e);
499 } else {
500 sib_reg1 = op->reg;
501 /* fist compute displacement encoding */
502 if (sib_reg1 == -1) {
503 sib_reg1 = 5;
504 mod = 0x00;
505 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
506 mod = 0x00;
507 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
508 mod = 0x40;
509 } else {
510 mod = 0x80;
512 /* compute if sib byte needed */
513 reg1 = op->reg;
514 if (op->reg2 != -1)
515 reg1 = 4;
516 #ifdef I386_ASM_16
517 if (tcc_state->seg_size == 32) {
518 #endif
519 g(mod + (reg << 3) + reg1);
520 if (reg1 == 4) {
521 /* add sib byte */
522 reg2 = op->reg2;
523 if (reg2 == -1)
524 reg2 = 4; /* indicate no index */
525 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
527 #ifdef I386_ASM_16
528 } else if (tcc_state->seg_size == 16) {
529 /* edi = 7, esi = 6 --> di = 5, si = 4 */
530 if ((reg1 == 6) || (reg1 == 7)) {
531 reg1 -= 2;
532 /* ebx = 3 --> bx = 7 */
533 } else if (reg1 == 3) {
534 reg1 = 7;
535 /* o32 = 5 --> o16 = 6 */
536 } else if (reg1 == 5) {
537 reg1 = 6;
538 /* sib not valid in 16-bit mode */
539 } else if (reg1 == 4) {
540 reg2 = op->reg2;
541 /* bp + si + offset */
542 if ((sib_reg1 == 5) && (reg2 == 6)) {
543 reg1 = 2;
544 /* bp + di + offset */
545 } else if ((sib_reg1 == 5) && (reg2 == 7)) {
546 reg1 = 3;
547 /* bx + si + offset */
548 } else if ((sib_reg1 == 3) && (reg2 == 6)) {
549 reg1 = 0;
550 /* bx + di + offset */
551 } else if ((sib_reg1 == 3) && (reg2 == 7)) {
552 reg1 = 1;
553 } else {
554 tcc_error("invalid effective address");
556 if (op->e.v == 0)
557 mod = 0;
558 } else {
559 tcc_error("invalid register");
561 g(mod + (reg << 3) + reg1);
563 #endif
564 /* add offset */
565 if (mod == 0x40) {
566 g(op->e.v);
567 } else if (mod == 0x80 || op->reg == -1) {
568 #ifdef I386_ASM_16
569 if (tcc_state->seg_size == 16)
570 gen_expr16(&op->e);
571 else if (tcc_state->seg_size == 32)
572 #endif
573 gen_expr32(&op->e);
578 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
580 const ASMInstr *pa;
581 int i, modrm_index, reg, v, op1, is_short_jmp, seg_prefix;
582 int nb_ops, s;
583 Operand ops[MAX_OPERANDS], *pop;
584 int op_type[3]; /* decoded op type */
585 #ifdef I386_ASM_16
586 static int a32 = 0, o32 = 0, addr32 = 0, data32 = 0;
587 #endif
589 /* force synthetic ';' after prefix instruction, so we can handle */
590 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
591 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
592 unget_tok(';');
594 /* get operands */
595 pop = ops;
596 nb_ops = 0;
597 seg_prefix = 0;
598 for(;;) {
599 if (tok == ';' || tok == TOK_LINEFEED)
600 break;
601 if (nb_ops >= MAX_OPERANDS) {
602 tcc_error("incorrect number of operands");
604 parse_operand(s1, pop);
605 if (tok == ':') {
606 if (pop->type != OP_SEG || seg_prefix)
607 tcc_error("incorrect prefix");
608 seg_prefix = segment_prefixes[pop->reg];
609 next();
610 parse_operand(s1, pop);
611 #ifndef I386_ASM_16
612 if (!(pop->type & OP_EA)) {
613 tcc_error("segment prefix must be followed by memory reference");
615 #endif
617 pop++;
618 nb_ops++;
619 if (tok != ',')
620 break;
621 next();
624 is_short_jmp = 0;
625 s = 0; /* avoid warning */
627 /* optimize matching by using a lookup table (no hashing is needed
628 !) */
629 for(pa = asm_instrs; pa->sym != 0; pa++) {
630 s = 0;
631 if (pa->instr_type & OPC_FARITH) {
632 v = opcode - pa->sym;
633 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
634 continue;
635 } else if (pa->instr_type & OPC_ARITH) {
636 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
637 continue;
638 s = (opcode - pa->sym) % NBWLX;
639 } else if (pa->instr_type & OPC_SHIFT) {
640 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
641 continue;
642 s = (opcode - pa->sym) % NBWLX;
643 } else if (pa->instr_type & OPC_TEST) {
644 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
645 continue;
646 } else if (pa->instr_type & OPC_B) {
647 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
648 continue;
649 s = opcode - pa->sym;
650 } else if (pa->instr_type & OPC_WLX) {
651 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
652 continue;
653 s = opcode - pa->sym + 1;
654 } else {
655 if (pa->sym != opcode)
656 continue;
658 if (pa->nb_ops != nb_ops)
659 continue;
660 /* now decode and check each operand */
661 for(i = 0; i < nb_ops; i++) {
662 int op1, op2;
663 op1 = pa->op_type[i];
664 op2 = op1 & 0x1f;
665 switch(op2) {
666 case OPT_IM:
667 v = OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64;
668 break;
669 case OPT_REG:
670 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
671 break;
672 case OPT_REGW:
673 v = OP_REG16 | OP_REG32 | OP_REG64;
674 break;
675 case OPT_IMW:
676 v = OP_IM16 | OP_IM32 | OP_IM64;
677 break;
678 #ifdef TCC_TARGET_X86_64
679 case OPT_IMNO64:
680 v = OP_IM16 | OP_IM32;
681 break;
682 #endif
683 default:
684 v = 1 << op2;
685 break;
687 if (op1 & OPT_EA)
688 v |= OP_EA;
689 op_type[i] = v;
690 if ((ops[i].type & v) == 0)
691 goto next;
693 /* all is matching ! */
694 break;
695 next: ;
697 if (pa->sym == 0) {
698 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
699 int b;
700 b = op0_codes[opcode - TOK_ASM_first];
701 #ifdef I386_ASM_16
702 if (opcode == TOK_ASM_o32) {
703 if (s1->seg_size == 32)
704 tcc_error("incorrect prefix");
705 else
706 o32 = data32 = 1;
707 } else if (opcode == TOK_ASM_a32) {
708 if (s1->seg_size == 32)
709 tcc_error("incorrect prefix");
710 else
711 a32 = addr32 = 1;
713 #endif
714 if (b & 0xff00)
715 g(b >> 8);
716 g(b);
717 return;
718 } else {
719 tcc_error("unknown opcode '%s'",
720 get_tok_str(opcode, NULL));
723 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
724 if (s == NBWLX-1) {
725 for(i = 0; s == NBWLX-1 && i < nb_ops; i++) {
726 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
727 s = reg_to_size[ops[i].type & OP_REG];
729 if (s == NBWLX-1) {
730 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
731 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32 | OP_IM64)))
732 s = 2;
733 else
734 tcc_error("cannot infer opcode suffix");
738 #ifdef I386_ASM_16
739 for(i = 0; i < nb_ops; i++) {
740 if (ops[i].type & OP_REG32) {
741 if (s1->seg_size == 16)
742 o32 = 1;
743 } else if (!(ops[i].type & OP_REG32)) {
744 if (s1->seg_size == 32)
745 o32 = 1;
750 if (s == 1 || (pa->instr_type & OPC_D16)) {
751 if (s1->seg_size == 32)
752 o32 = 1;
753 } else if (s == 2) {
754 if (s1->seg_size == 16) {
755 if (!(pa->instr_type & OPC_D16))
756 o32 = 1;
760 /* generate a16/a32 prefix if needed */
761 if ((a32 == 1) && (addr32 == 0))
762 g(0x67);
763 /* generate o16/o32 prefix if needed */
764 if ((o32 == 1) && (data32 == 0))
765 g(0x66);
767 addr32 = data32 = 0;
768 #else
769 /* generate data16 prefix if needed */
770 if (s == 1 || (pa->instr_type & OPC_D16))
771 g(0x66);
772 #ifdef TCC_TARGET_X86_64
773 else if (s == 3) {
774 /* generate REX prefix */
775 if ((opcode != TOK_ASM_push && opcode != TOK_ASM_pop)
776 || !(ops[0].type & OP_REG64))
777 g(0x48);
779 #endif
780 #endif
782 /* now generates the operation */
783 if (pa->instr_type & OPC_FWAIT)
784 g(0x9b);
785 if (seg_prefix)
786 g(seg_prefix);
788 v = pa->opcode;
789 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
790 /* kludge for imul $im, %reg */
791 nb_ops = 3;
792 ops[2] = ops[1];
793 op_type[2] = op_type[1];
794 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
795 v--; /* int $3 case */
796 nb_ops = 0;
797 } else if ((v == 0x06 || v == 0x07)) {
798 if (ops[0].reg >= 4) {
799 /* push/pop %fs or %gs */
800 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
801 } else {
802 v += ops[0].reg << 3;
804 nb_ops = 0;
805 } else if (v <= 0x05) {
806 /* arith case */
807 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
808 } else if ((pa->instr_type & (OPC_FARITH | OPC_MODRM)) == OPC_FARITH) {
809 /* fpu arith case */
810 v += ((opcode - pa->sym) / 6) << 3;
812 if (pa->instr_type & OPC_REG) {
813 for(i = 0; i < nb_ops; i++) {
814 if (op_type[i] & (OP_REG | OP_ST)) {
815 v += ops[i].reg;
816 break;
819 /* mov $im, %reg case */
820 if (pa->opcode == 0xb0 && s >= 1)
821 v += 7;
823 if (pa->instr_type & OPC_B)
824 v += s >= 1;
825 if (pa->instr_type & OPC_TEST)
826 v += test_bits[opcode - pa->sym];
827 if (pa->instr_type & OPC_SHORTJMP) {
828 Sym *sym;
829 int jmp_disp;
831 /* see if we can really generate the jump with a byte offset */
832 sym = ops[0].e.sym;
833 if (!sym)
834 goto no_short_jump;
835 if (sym->r != cur_text_section->sh_num)
836 goto no_short_jump;
837 jmp_disp = ops[0].e.v + sym->jnext - ind - 2;
838 if (jmp_disp == (int8_t)jmp_disp) {
839 /* OK to generate jump */
840 is_short_jmp = 1;
841 ops[0].e.v = jmp_disp;
842 } else {
843 no_short_jump:
844 if (pa->instr_type & OPC_JMP) {
845 /* long jump will be allowed. need to modify the
846 opcode slightly */
847 if (v == 0xeb)
848 v = 0xe9;
849 else
850 v += 0x0f10;
851 } else {
852 tcc_error("invalid displacement");
856 op1 = v >> 8;
857 if (op1)
858 g(op1);
859 g(v);
861 /* search which operand will used for modrm */
862 modrm_index = 0;
863 if (pa->instr_type & OPC_SHIFT) {
864 reg = (opcode - pa->sym) / NBWLX;
865 if (reg == 6)
866 reg = 7;
867 } else if (pa->instr_type & OPC_ARITH) {
868 reg = (opcode - pa->sym) / NBWLX;
869 } else if (pa->instr_type & OPC_FARITH) {
870 reg = (opcode - pa->sym) / 6;
871 } else {
872 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
874 if (pa->instr_type & OPC_MODRM) {
875 /* first look for an ea operand */
876 for(i = 0;i < nb_ops; i++) {
877 if (op_type[i] & OP_EA)
878 goto modrm_found;
880 /* then if not found, a register or indirection (shift instructions) */
881 for(i = 0;i < nb_ops; i++) {
882 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
883 goto modrm_found;
885 #ifdef ASM_DEBUG
886 tcc_error("bad op table");
887 #endif
888 modrm_found:
889 modrm_index = i;
890 /* if a register is used in another operand then it is
891 used instead of group */
892 for(i = 0;i < nb_ops; i++) {
893 v = op_type[i];
894 if (i != modrm_index &&
895 (v & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
896 reg = ops[i].reg;
897 break;
901 asm_modrm(reg, &ops[modrm_index]);
904 /* emit constants */
905 #ifndef TCC_TARGET_X86_64
906 if (pa->opcode == 0x9a || pa->opcode == 0xea) {
907 /* ljmp or lcall kludge */
908 #ifdef I386_ASM_16
909 if (s1->seg_size == 16 && o32 == 0)
910 gen_expr16(&ops[1].e);
911 else
912 #endif
913 gen_expr32(&ops[1].e);
914 if (ops[0].e.sym)
915 tcc_error("cannot relocate");
916 gen_le16(ops[0].e.v);
917 return;
919 #endif
920 for(i = 0;i < nb_ops; i++) {
921 v = op_type[i];
922 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
923 /* if multiple sizes are given it means we must look
924 at the op size */
925 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
926 if (s == 0)
927 v = OP_IM8;
928 else if (s == 1)
929 v = OP_IM16;
930 else if (s == 2 || (v & OP_IM64) == 0)
931 v = OP_IM32;
932 else
933 v = OP_IM64;
935 if (v & (OP_IM8 | OP_IM8S)) {
936 if (ops[i].e.sym)
937 goto error_relocate;
938 g(ops[i].e.v);
939 } else if (v & OP_IM16) {
940 #ifdef I386_ASM_16
941 if (s1->seg_size == 16)
942 gen_expr16(&ops[i].e);
943 else
944 #endif
945 if (ops[i].e.sym)
946 error_relocate:
947 tcc_error("cannot relocate");
948 else
949 gen_le16(ops[i].e.v);
950 } else {
951 if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) {
952 if (is_short_jmp)
953 g(ops[i].e.v);
954 #ifdef I386_ASM_16
955 else if (s1->seg_size == 16)
956 gen_disp16(&ops[i].e);
957 #endif
958 else
959 gen_disp32(&ops[i].e);
960 } else {
961 #ifdef I386_ASM_16
962 if (s1->seg_size == 16 && !((o32 == 1) && (v & OP_IM32)))
963 gen_expr16(&ops[i].e);
964 else
965 #endif
966 #ifdef TCC_TARGET_X86_64
967 if (v & OP_IM64)
968 gen_expr64(&ops[i].e);
969 else
970 #endif
971 gen_expr32(&ops[i].e);
974 #ifdef I386_ASM_16
975 } else if (v & (OP_REG16 | OP_REG32)) {
976 if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) {
977 /* jmp $r */
978 g(0xE0 + ops[i].reg);
980 #endif
981 #ifdef TCC_TARGET_X86_64
982 } else if (v & (OP_REG32 | OP_REG64)) {
983 if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) {
984 /* jmp $r */
985 g(0xE0 + ops[i].reg);
987 #endif
990 #ifdef I386_ASM_16
991 a32 = o32 = 0;
992 #endif
995 /* return the constraint priority (we allocate first the lowest
996 numbered constraints) */
997 static inline int constraint_priority(const char *str)
999 int priority, c, pr;
1001 /* we take the lowest priority */
1002 priority = 0;
1003 for(;;) {
1004 c = *str;
1005 if (c == '\0')
1006 break;
1007 str++;
1008 switch(c) {
1009 case 'A':
1010 pr = 0;
1011 break;
1012 case 'a':
1013 case 'b':
1014 case 'c':
1015 case 'd':
1016 case 'S':
1017 case 'D':
1018 pr = 1;
1019 break;
1020 case 'q':
1021 pr = 2;
1022 break;
1023 case 'r':
1024 pr = 3;
1025 break;
1026 case 'N':
1027 case 'M':
1028 case 'I':
1029 case 'i':
1030 case 'm':
1031 case 'g':
1032 pr = 4;
1033 break;
1034 default:
1035 tcc_error("unknown constraint '%c'", c);
1036 pr = 0;
1038 if (pr > priority)
1039 priority = pr;
1041 return priority;
1044 static const char *skip_constraint_modifiers(const char *p)
1046 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
1047 p++;
1048 return p;
1051 #define REG_OUT_MASK 0x01
1052 #define REG_IN_MASK 0x02
1054 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
1056 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
1057 int nb_operands, int nb_outputs,
1058 const uint8_t *clobber_regs,
1059 int *pout_reg)
1061 ASMOperand *op;
1062 int sorted_op[MAX_ASM_OPERANDS];
1063 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
1064 const char *str;
1065 uint8_t regs_allocated[NB_ASM_REGS];
1067 /* init fields */
1068 for(i=0;i<nb_operands;i++) {
1069 op = &operands[i];
1070 op->input_index = -1;
1071 op->ref_index = -1;
1072 op->reg = -1;
1073 op->is_memory = 0;
1074 op->is_rw = 0;
1076 /* compute constraint priority and evaluate references to output
1077 constraints if input constraints */
1078 for(i=0;i<nb_operands;i++) {
1079 op = &operands[i];
1080 str = op->constraint;
1081 str = skip_constraint_modifiers(str);
1082 if (isnum(*str) || *str == '[') {
1083 /* this is a reference to another constraint */
1084 k = find_constraint(operands, nb_operands, str, NULL);
1085 if ((unsigned)k >= i || i < nb_outputs)
1086 tcc_error("invalid reference in constraint %d ('%s')",
1087 i, str);
1088 op->ref_index = k;
1089 if (operands[k].input_index >= 0)
1090 tcc_error("cannot reference twice the same operand");
1091 operands[k].input_index = i;
1092 op->priority = 5;
1093 } else {
1094 op->priority = constraint_priority(str);
1098 /* sort operands according to their priority */
1099 for(i=0;i<nb_operands;i++)
1100 sorted_op[i] = i;
1101 for(i=0;i<nb_operands - 1;i++) {
1102 for(j=i+1;j<nb_operands;j++) {
1103 p1 = operands[sorted_op[i]].priority;
1104 p2 = operands[sorted_op[j]].priority;
1105 if (p2 < p1) {
1106 tmp = sorted_op[i];
1107 sorted_op[i] = sorted_op[j];
1108 sorted_op[j] = tmp;
1113 for(i = 0;i < NB_ASM_REGS; i++) {
1114 if (clobber_regs[i])
1115 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
1116 else
1117 regs_allocated[i] = 0;
1119 /* esp cannot be used */
1120 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1121 /* ebp cannot be used yet */
1122 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1124 /* allocate registers and generate corresponding asm moves */
1125 for(i=0;i<nb_operands;i++) {
1126 j = sorted_op[i];
1127 op = &operands[j];
1128 str = op->constraint;
1129 /* no need to allocate references */
1130 if (op->ref_index >= 0)
1131 continue;
1132 /* select if register is used for output, input or both */
1133 if (op->input_index >= 0) {
1134 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1135 } else if (j < nb_outputs) {
1136 reg_mask = REG_OUT_MASK;
1137 } else {
1138 reg_mask = REG_IN_MASK;
1140 try_next:
1141 c = *str++;
1142 switch(c) {
1143 case '=':
1144 goto try_next;
1145 case '+':
1146 op->is_rw = 1;
1147 /* FALL THRU */
1148 case '&':
1149 if (j >= nb_outputs)
1150 tcc_error("'%c' modifier can only be applied to outputs", c);
1151 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1152 goto try_next;
1153 case 'A':
1154 /* allocate both eax and edx */
1155 if (is_reg_allocated(TREG_XAX) ||
1156 is_reg_allocated(TREG_XDX))
1157 goto try_next;
1158 op->is_llong = 1;
1159 op->reg = TREG_XAX;
1160 regs_allocated[TREG_XAX] |= reg_mask;
1161 regs_allocated[TREG_XDX] |= reg_mask;
1162 break;
1163 case 'a':
1164 reg = TREG_XAX;
1165 goto alloc_reg;
1166 case 'b':
1167 reg = 3;
1168 goto alloc_reg;
1169 case 'c':
1170 reg = TREG_XCX;
1171 goto alloc_reg;
1172 case 'd':
1173 reg = TREG_XDX;
1174 goto alloc_reg;
1175 case 'S':
1176 reg = 6;
1177 goto alloc_reg;
1178 case 'D':
1179 reg = 7;
1180 alloc_reg:
1181 if (is_reg_allocated(reg))
1182 goto try_next;
1183 goto reg_found;
1184 case 'q':
1185 /* eax, ebx, ecx or edx */
1186 for(reg = 0; reg < 4; reg++) {
1187 if (!is_reg_allocated(reg))
1188 goto reg_found;
1190 goto try_next;
1191 case 'r':
1192 /* any general register */
1193 for(reg = 0; reg < 8; reg++) {
1194 if (!is_reg_allocated(reg))
1195 goto reg_found;
1197 goto try_next;
1198 reg_found:
1199 /* now we can reload in the register */
1200 op->is_llong = 0;
1201 op->reg = reg;
1202 regs_allocated[reg] |= reg_mask;
1203 break;
1204 case 'i':
1205 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1206 goto try_next;
1207 break;
1208 case 'I':
1209 case 'N':
1210 case 'M':
1211 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1212 goto try_next;
1213 break;
1214 case 'm':
1215 case 'g':
1216 /* nothing special to do because the operand is already in
1217 memory, except if the pointer itself is stored in a
1218 memory variable (VT_LLOCAL case) */
1219 /* XXX: fix constant case */
1220 /* if it is a reference to a memory zone, it must lie
1221 in a register, so we reserve the register in the
1222 input registers and a load will be generated
1223 later */
1224 if (j < nb_outputs || c == 'm') {
1225 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1226 /* any general register */
1227 for(reg = 0; reg < 8; reg++) {
1228 if (!(regs_allocated[reg] & REG_IN_MASK))
1229 goto reg_found1;
1231 goto try_next;
1232 reg_found1:
1233 /* now we can reload in the register */
1234 regs_allocated[reg] |= REG_IN_MASK;
1235 op->reg = reg;
1236 op->is_memory = 1;
1239 break;
1240 default:
1241 tcc_error("asm constraint %d ('%s') could not be satisfied",
1242 j, op->constraint);
1243 break;
1245 /* if a reference is present for that operand, we assign it too */
1246 if (op->input_index >= 0) {
1247 operands[op->input_index].reg = op->reg;
1248 operands[op->input_index].is_llong = op->is_llong;
1252 /* compute out_reg. It is used to store outputs registers to memory
1253 locations references by pointers (VT_LLOCAL case) */
1254 *pout_reg = -1;
1255 for(i=0;i<nb_operands;i++) {
1256 op = &operands[i];
1257 if (op->reg >= 0 &&
1258 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1259 !op->is_memory) {
1260 for(reg = 0; reg < 8; reg++) {
1261 if (!(regs_allocated[reg] & REG_OUT_MASK))
1262 goto reg_found2;
1264 tcc_error("could not find free output register for reloading");
1265 reg_found2:
1266 *pout_reg = reg;
1267 break;
1271 /* print sorted constraints */
1272 #ifdef ASM_DEBUG
1273 for(i=0;i<nb_operands;i++) {
1274 j = sorted_op[i];
1275 op = &operands[j];
1276 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1278 op->id ? get_tok_str(op->id, NULL) : "",
1279 op->constraint,
1280 op->vt->r,
1281 op->reg);
1283 if (*pout_reg >= 0)
1284 printf("out_reg=%d\n", *pout_reg);
1285 #endif
1288 ST_FUNC void subst_asm_operand(CString *add_str,
1289 SValue *sv, int modifier)
1291 int r, reg, size, val;
1292 char buf[64];
1294 r = sv->r;
1295 if ((r & VT_VALMASK) == VT_CONST) {
1296 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n')
1297 cstr_ccat(add_str, '$');
1298 if (r & VT_SYM) {
1299 cstr_cat(add_str, get_tok_str(sv->sym->v, NULL));
1300 if (sv->c.i != 0) {
1301 cstr_ccat(add_str, '+');
1302 } else {
1303 return;
1306 val = sv->c.i;
1307 if (modifier == 'n')
1308 val = -val;
1309 snprintf(buf, sizeof(buf), "%d", sv->c.i);
1310 cstr_cat(add_str, buf);
1311 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1312 snprintf(buf, sizeof(buf), "%d(%%ebp)", sv->c.i);
1313 cstr_cat(add_str, buf);
1314 } else if (r & VT_LVAL) {
1315 reg = r & VT_VALMASK;
1316 if (reg >= VT_CONST)
1317 tcc_error("internal compiler error");
1318 snprintf(buf, sizeof(buf), "(%%%s)",
1319 get_tok_str(TOK_ASM_eax + reg, NULL));
1320 cstr_cat(add_str, buf);
1321 } else {
1322 /* register case */
1323 reg = r & VT_VALMASK;
1324 if (reg >= VT_CONST)
1325 tcc_error("internal compiler error");
1327 /* choose register operand size */
1328 if ((sv->type.t & VT_BTYPE) == VT_BYTE)
1329 size = 1;
1330 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1331 size = 2;
1332 #ifdef TCC_TARGET_X86_64
1333 else if ((sv->type.t & VT_BTYPE) == VT_LLONG)
1334 size = 8;
1335 #endif
1336 else
1337 size = 4;
1338 if (size == 1 && reg >= 4)
1339 size = 4;
1341 if (modifier == 'b') {
1342 if (reg >= 4)
1343 tcc_error("cannot use byte register");
1344 size = 1;
1345 } else if (modifier == 'h') {
1346 if (reg >= 4)
1347 tcc_error("cannot use byte register");
1348 size = -1;
1349 } else if (modifier == 'w') {
1350 size = 2;
1351 #ifdef TCC_TARGET_X86_64
1352 } else if (modifier == 'q') {
1353 size = 8;
1354 #endif
1357 switch(size) {
1358 case -1:
1359 reg = TOK_ASM_ah + reg;
1360 break;
1361 case 1:
1362 reg = TOK_ASM_al + reg;
1363 break;
1364 case 2:
1365 reg = TOK_ASM_ax + reg;
1366 break;
1367 default:
1368 reg = TOK_ASM_eax + reg;
1369 break;
1370 #ifdef TCC_TARGET_X86_64
1371 case 8:
1372 reg = TOK_ASM_rax + reg;
1373 break;
1374 #endif
1376 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1377 cstr_cat(add_str, buf);
1381 /* generate prolog and epilog code for asm statment */
1382 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1383 int nb_outputs, int is_output,
1384 uint8_t *clobber_regs,
1385 int out_reg)
1387 uint8_t regs_allocated[NB_ASM_REGS];
1388 ASMOperand *op;
1389 int i, reg;
1390 static uint8_t reg_saved[NB_SAVED_REGS] = { 3, 6, 7 };
1392 /* mark all used registers */
1393 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1394 for(i = 0; i < nb_operands;i++) {
1395 op = &operands[i];
1396 if (op->reg >= 0)
1397 regs_allocated[op->reg] = 1;
1399 if (!is_output) {
1400 /* generate reg save code */
1401 for(i = 0; i < NB_SAVED_REGS; i++) {
1402 reg = reg_saved[i];
1403 if (regs_allocated[reg]) {
1404 #ifdef I386_ASM_16
1405 if (tcc_state->seg_size == 16)
1406 g(0x66);
1407 #endif
1408 g(0x50 + reg);
1412 /* generate load code */
1413 for(i = 0; i < nb_operands; i++) {
1414 op = &operands[i];
1415 if (op->reg >= 0) {
1416 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1417 op->is_memory) {
1418 /* memory reference case (for both input and
1419 output cases) */
1420 SValue sv;
1421 sv = *op->vt;
1422 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1423 load(op->reg, &sv);
1424 } else if (i >= nb_outputs || op->is_rw) {
1425 /* load value in register */
1426 load(op->reg, op->vt);
1427 if (op->is_llong) {
1428 SValue sv;
1429 sv = *op->vt;
1430 sv.c.ul += 4;
1431 load(TREG_XDX, &sv);
1436 } else {
1437 /* generate save code */
1438 for(i = 0 ; i < nb_outputs; i++) {
1439 op = &operands[i];
1440 if (op->reg >= 0) {
1441 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1442 if (!op->is_memory) {
1443 SValue sv;
1444 sv = *op->vt;
1445 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1446 load(out_reg, &sv);
1448 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1449 store(op->reg, &sv);
1451 } else {
1452 store(op->reg, op->vt);
1453 if (op->is_llong) {
1454 SValue sv;
1455 sv = *op->vt;
1456 sv.c.ul += 4;
1457 store(TREG_XDX, &sv);
1462 /* generate reg restore code */
1463 for(i = NB_SAVED_REGS - 1; i >= 0; i--) {
1464 reg = reg_saved[i];
1465 if (regs_allocated[reg]) {
1466 #ifdef I386_ASM_16
1467 if (tcc_state->seg_size == 16)
1468 g(0x66);
1469 #endif
1470 g(0x58 + reg);
1476 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1478 int reg;
1479 TokenSym *ts;
1481 if (!strcmp(str, "memory") ||
1482 !strcmp(str, "cc"))
1483 return;
1484 ts = tok_alloc(str, strlen(str));
1485 reg = ts->tok;
1486 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1487 reg -= TOK_ASM_eax;
1488 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1489 reg -= TOK_ASM_ax;
1490 #ifdef TCC_TARGET_X86_64
1491 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1492 reg -= TOK_ASM_rax;
1493 #endif
1494 } else {
1495 tcc_error("invalid clobber register '%s'", str);
1497 clobber_regs[reg] = 1;