x86: Improve cmov handling
[tinycc.git] / i386-asm.c
blob69514c08acff12587c05bdc0d6849c93f2e595ed
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "tcc.h"
24 /* #define NB_ASM_REGS 8 */
25 #define MAX_OPERANDS 3
26 #define NB_SAVED_REGS 3
28 #define TOK_ASM_first TOK_ASM_clc
29 #define TOK_ASM_last TOK_ASM_emms
30 #define TOK_ASM_alllast TOK_ASM_pxor
32 #define OPC_JMP 0x01 /* jmp operand */
33 #define OPC_B 0x02 /* only used with OPC_WL */
34 #define OPC_WL 0x04 /* accepts w, l or no suffix */
35 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
36 #define OPC_REG 0x08 /* register is added to opcode */
37 #define OPC_MODRM 0x10 /* modrm encoding */
38 #define OPC_FWAIT 0x20 /* add fwait opcode */
39 #define OPC_TEST 0x40 /* test opcodes */
40 #define OPC_SHIFT 0x80 /* shift opcodes */
41 #define OPC_D16 0x0100 /* generate data16 prefix */
42 #define OPC_ARITH 0x0200 /* arithmetic opcodes */
43 #define OPC_SHORTJMP 0x0400 /* short jmp operand */
44 #define OPC_FARITH 0x0800 /* FPU arithmetic opcodes */
45 #ifdef TCC_TARGET_X86_64
46 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
47 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
48 # define OPC_WLX OPC_WLQ
49 #else
50 # define OPC_WLX OPC_WL
51 #endif
53 #define OPC_GROUP_SHIFT 13
55 /* in order to compress the operand type, we use specific operands and
56 we or only with EA */
57 enum {
58 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
59 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
60 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
61 #ifdef TCC_TARGET_X86_64
62 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
63 #endif
64 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
65 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
66 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
67 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
68 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
69 OPT_SEG,
70 OPT_ST,
71 OPT_IM8,
72 OPT_IM8S,
73 OPT_IM16,
74 OPT_IM32,
75 #ifdef TCC_TARGET_X86_64
76 OPT_IM64,
77 #endif
78 OPT_EAX, /* %al, %ax, %eax or %rax register */
79 OPT_ST0, /* %st(0) register */
80 OPT_CL, /* %cl register */
81 OPT_DX, /* %dx register */
82 OPT_ADDR, /* OP_EA with only offset */
83 OPT_INDIR, /* *(expr) */
84 /* composite types */
85 OPT_COMPOSITE_FIRST,
86 OPT_IM, /* IM8 | IM16 | IM32 | IM64 */
87 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
88 OPT_REGW, /* REG16 | REG32 | REG64 */
89 OPT_IMW, /* IM16 | IM32 | IM64 */
90 #ifdef TCC_TARGET_X86_64
91 OPT_IMNO64, /* IM16 | IM32 */
92 #endif
93 /* can be ored with any OPT_xxx */
94 OPT_EA = 0x80
97 #define OP_REG8 (1 << OPT_REG8)
98 #define OP_REG16 (1 << OPT_REG16)
99 #define OP_REG32 (1 << OPT_REG32)
100 #define OP_MMX (1 << OPT_MMX)
101 #define OP_SSE (1 << OPT_SSE)
102 #define OP_CR (1 << OPT_CR)
103 #define OP_TR (1 << OPT_TR)
104 #define OP_DB (1 << OPT_DB)
105 #define OP_SEG (1 << OPT_SEG)
106 #define OP_ST (1 << OPT_ST)
107 #define OP_IM8 (1 << OPT_IM8)
108 #define OP_IM8S (1 << OPT_IM8S)
109 #define OP_IM16 (1 << OPT_IM16)
110 #define OP_IM32 (1 << OPT_IM32)
111 #define OP_EAX (1 << OPT_EAX)
112 #define OP_ST0 (1 << OPT_ST0)
113 #define OP_CL (1 << OPT_CL)
114 #define OP_DX (1 << OPT_DX)
115 #define OP_ADDR (1 << OPT_ADDR)
116 #define OP_INDIR (1 << OPT_INDIR)
117 #ifdef TCC_TARGET_X86_64
118 # define OP_REG64 (1 << OPT_REG64)
119 # define OP_IM64 (1 << OPT_IM64)
120 # define OP_EA32 (OP_EA << 1)
121 #else
122 # define OP_REG64 0
123 # define OP_IM64 0
124 # define OP_EA32 0
125 #endif
127 #define OP_EA 0x40000000
128 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
130 #ifdef TCC_TARGET_X86_64
131 # define OP_IM OP_IM64
132 # define TREG_XAX TREG_RAX
133 # define TREG_XCX TREG_RCX
134 # define TREG_XDX TREG_RDX
135 #else
136 # define OP_IM OP_IM32
137 # define TREG_XAX TREG_EAX
138 # define TREG_XCX TREG_ECX
139 # define TREG_XDX TREG_EDX
140 #endif
142 typedef struct ASMInstr {
143 uint16_t sym;
144 uint16_t opcode;
145 uint16_t instr_type;
146 uint8_t nb_ops;
147 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
148 } ASMInstr;
150 typedef struct Operand {
151 uint32_t type;
152 int8_t reg; /* register, -1 if none */
153 int8_t reg2; /* second register, -1 if none */
154 uint8_t shift;
155 ExprValue e;
156 } Operand;
158 static const uint8_t reg_to_size[9] = {
160 [OP_REG8] = 0,
161 [OP_REG16] = 1,
162 [OP_REG32] = 2,
163 #ifdef TCC_TARGET_X86_64
164 [OP_REG64] = 3,
165 #endif
167 0, 0, 1, 0, 2, 0, 0, 0, 3
170 #define NB_TEST_OPCODES 30
172 static const uint8_t test_bits[NB_TEST_OPCODES] = {
173 0x00, /* o */
174 0x01, /* no */
175 0x02, /* b */
176 0x02, /* c */
177 0x02, /* nae */
178 0x03, /* nb */
179 0x03, /* nc */
180 0x03, /* ae */
181 0x04, /* e */
182 0x04, /* z */
183 0x05, /* ne */
184 0x05, /* nz */
185 0x06, /* be */
186 0x06, /* na */
187 0x07, /* nbe */
188 0x07, /* a */
189 0x08, /* s */
190 0x09, /* ns */
191 0x0a, /* p */
192 0x0a, /* pe */
193 0x0b, /* np */
194 0x0b, /* po */
195 0x0c, /* l */
196 0x0c, /* nge */
197 0x0d, /* nl */
198 0x0d, /* ge */
199 0x0e, /* le */
200 0x0e, /* ng */
201 0x0f, /* nle */
202 0x0f, /* g */
205 static const uint8_t segment_prefixes[] = {
206 0x26, /* es */
207 0x2e, /* cs */
208 0x36, /* ss */
209 0x3e, /* ds */
210 0x64, /* fs */
211 0x65 /* gs */
214 static const ASMInstr asm_instrs[] = {
215 #define ALT(x) x
216 #define DEF_ASM_OP0(name, opcode)
217 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 0 },
218 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 1, { op0 }},
219 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 2, { op0, op1 }},
220 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, opcode, (instr_type | group << OPC_GROUP_SHIFT), 3, { op0, op1, op2 }},
221 #ifdef TCC_TARGET_X86_64
222 # include "x86_64-asm.h"
223 #else
224 # include "i386-asm.h"
225 #endif
226 /* last operation */
227 { 0, },
230 static const uint16_t op0_codes[] = {
231 #define ALT(x)
232 #define DEF_ASM_OP0(x, opcode) opcode,
233 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
234 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
235 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
236 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
237 #ifdef TCC_TARGET_X86_64
238 # include "x86_64-asm.h"
239 #else
240 # include "i386-asm.h"
241 #endif
244 static inline int get_reg_shift(TCCState *s1)
246 int shift, v;
247 #ifdef I386_ASM_16
248 if (s1->seg_size == 16)
249 tcc_error("invalid effective address");
250 #endif
251 v = asm_int_expr(s1);
252 switch(v) {
253 case 1:
254 shift = 0;
255 break;
256 case 2:
257 shift = 1;
258 break;
259 case 4:
260 shift = 2;
261 break;
262 case 8:
263 shift = 3;
264 break;
265 default:
266 expect("1, 2, 4 or 8 constant");
267 shift = 0;
268 break;
270 return shift;
273 static int asm_parse_reg(int *type)
275 int reg = 0;
276 *type = 0;
277 if (tok != '%')
278 goto error_32;
279 next();
280 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
281 reg = tok - TOK_ASM_eax;
282 #ifdef TCC_TARGET_X86_64
283 *type = OP_EA32;
284 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
285 reg = tok - TOK_ASM_rax;
286 #endif
287 #ifdef I386_ASM_16
288 } else if (tok >= TOK_ASM_ax && tok <= TOK_ASM_di) {
289 reg = tok - TOK_ASM_ax;
290 #endif
291 } else {
292 error_32:
293 expect("register");
295 next();
296 return reg;
299 static void parse_operand(TCCState *s1, Operand *op)
301 ExprValue e;
302 int reg, indir;
303 const char *p;
305 indir = 0;
306 if (tok == '*') {
307 next();
308 indir = OP_INDIR;
311 if (tok == '%') {
312 next();
313 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
314 reg = tok - TOK_ASM_al;
315 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
316 op->reg = reg & 7;
317 if ((op->type & OP_REG) && op->reg == TREG_XAX)
318 op->type |= OP_EAX;
319 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
320 op->type |= OP_CL;
321 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
322 op->type |= OP_DX;
323 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
324 op->type = OP_DB;
325 op->reg = tok - TOK_ASM_dr0;
326 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
327 op->type = OP_SEG;
328 op->reg = tok - TOK_ASM_es;
329 } else if (tok == TOK_ASM_st) {
330 op->type = OP_ST;
331 op->reg = 0;
332 next();
333 if (tok == '(') {
334 next();
335 if (tok != TOK_PPNUM)
336 goto reg_error;
337 p = tokc.str.data;
338 reg = p[0] - '0';
339 if ((unsigned)reg >= 8 || p[1] != '\0')
340 goto reg_error;
341 op->reg = reg;
342 next();
343 skip(')');
345 if (op->reg == 0)
346 op->type |= OP_ST0;
347 goto no_skip;
348 } else {
349 reg_error:
350 tcc_error("unknown register");
352 next();
353 no_skip: ;
354 } else if (tok == '$') {
355 /* constant value */
356 next();
357 asm_expr(s1, &e);
358 op->type = OP_IM;
359 op->e.v = e.v;
360 op->e.sym = e.sym;
361 if (!op->e.sym) {
362 if (op->e.v == (uint8_t)op->e.v)
363 op->type |= OP_IM8;
364 if (op->e.v == (int8_t)op->e.v)
365 op->type |= OP_IM8S;
366 if (op->e.v == (uint16_t)op->e.v)
367 op->type |= OP_IM16;
368 #ifdef TCC_TARGET_X86_64
369 if (op->e.v == (uint32_t)op->e.v)
370 op->type |= OP_IM32;
371 #endif
373 } else {
374 /* address(reg,reg2,shift) with all variants */
375 op->type = OP_EA;
376 op->reg = -1;
377 op->reg2 = -1;
378 op->shift = 0;
379 if (tok != '(') {
380 asm_expr(s1, &e);
381 op->e.v = e.v;
382 op->e.sym = e.sym;
383 } else {
384 next();
385 if (tok == '%') {
386 unget_tok('(');
387 op->e.v = 0;
388 op->e.sym = NULL;
389 } else {
390 /* bracketed offset expression */
391 asm_expr(s1, &e);
392 if (tok != ')')
393 expect(")");
394 next();
395 op->e.v = e.v;
396 op->e.sym = e.sym;
399 if (tok == '(') {
400 int type = 0;
401 next();
402 if (tok != ',') {
403 op->reg = asm_parse_reg(&type);
405 if (tok == ',') {
406 next();
407 if (tok != ',') {
408 op->reg2 = asm_parse_reg(&type);
410 if (tok == ',') {
411 next();
412 op->shift = get_reg_shift(s1);
415 if (type & OP_EA32)
416 op->type |= OP_EA32;
417 skip(')');
419 if (op->reg == -1 && op->reg2 == -1)
420 op->type |= OP_ADDR;
422 op->type |= indir;
425 /* XXX: unify with C code output ? */
426 ST_FUNC void gen_expr32(ExprValue *pe)
428 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
431 #ifdef TCC_TARGET_X86_64
432 static void gen_expr64(ExprValue *pe)
434 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
436 #endif
438 /* XXX: unify with C code output ? */
439 static void gen_disp32(ExprValue *pe)
441 Sym *sym = pe->sym;
442 if (sym && sym->r == cur_text_section->sh_num) {
443 /* same section: we can output an absolute value. Note
444 that the TCC compiler behaves differently here because
445 it always outputs a relocation to ease (future) code
446 elimination in the linker */
447 gen_le32(pe->v + sym->jnext - ind - 4);
448 } else {
449 if (sym && sym->type.t == VT_VOID) {
450 sym->type.t = VT_FUNC;
451 sym->type.ref = NULL;
453 gen_addrpc32(VT_SYM, sym, pe->v);
457 #ifdef I386_ASM_16
458 static void gen_expr16(ExprValue *pe)
460 if (pe->sym)
461 greloc(cur_text_section, pe->sym, ind, R_386_16);
462 gen_le16(pe->v);
464 static void gen_disp16(ExprValue *pe)
466 Sym *sym;
467 sym = pe->sym;
468 if (sym) {
469 if (sym->r == cur_text_section->sh_num) {
470 /* same section: we can output an absolute value. Note
471 that the TCC compiler behaves differently here because
472 it always outputs a relocation to ease (future) code
473 elimination in the linker */
474 gen_le16(pe->v + sym->jnext - ind - 2);
475 } else {
476 greloc(cur_text_section, sym, ind, R_386_PC16);
477 gen_le16(pe->v - 2);
479 } else {
480 /* put an empty PC32 relocation */
481 put_elf_reloc(symtab_section, cur_text_section,
482 ind, R_386_PC16, 0);
483 gen_le16(pe->v - 2);
486 #endif
488 /* generate the modrm operand */
489 static inline void asm_modrm(int reg, Operand *op)
491 int mod, reg1, reg2, sib_reg1;
493 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
494 g(0xc0 + (reg << 3) + op->reg);
495 } else if (op->reg == -1 && op->reg2 == -1) {
496 /* displacement only */
497 #ifdef I386_ASM_16
498 if (tcc_state->seg_size == 16) {
499 g(0x06 + (reg << 3));
500 gen_expr16(&op->e);
501 } else if (tcc_state->seg_size == 32)
502 #endif
504 #ifdef TCC_TARGET_X86_64
505 g(0x04 + (reg << 3));
506 g(0x25);
507 #else
508 g(0x05 + (reg << 3));
509 #endif
510 gen_expr32(&op->e);
512 } else {
513 sib_reg1 = op->reg;
514 /* fist compute displacement encoding */
515 if (sib_reg1 == -1) {
516 sib_reg1 = 5;
517 mod = 0x00;
518 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
519 mod = 0x00;
520 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
521 mod = 0x40;
522 } else {
523 mod = 0x80;
525 /* compute if sib byte needed */
526 reg1 = op->reg;
527 if (op->reg2 != -1)
528 reg1 = 4;
529 #ifdef I386_ASM_16
530 if (tcc_state->seg_size == 32) {
531 #endif
532 g(mod + (reg << 3) + reg1);
533 if (reg1 == 4) {
534 /* add sib byte */
535 reg2 = op->reg2;
536 if (reg2 == -1)
537 reg2 = 4; /* indicate no index */
538 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
540 #ifdef I386_ASM_16
541 } else if (tcc_state->seg_size == 16) {
542 /* edi = 7, esi = 6 --> di = 5, si = 4 */
543 if ((reg1 == 6) || (reg1 == 7)) {
544 reg1 -= 2;
545 /* ebx = 3 --> bx = 7 */
546 } else if (reg1 == 3) {
547 reg1 = 7;
548 /* o32 = 5 --> o16 = 6 */
549 } else if (reg1 == 5) {
550 reg1 = 6;
551 /* sib not valid in 16-bit mode */
552 } else if (reg1 == 4) {
553 reg2 = op->reg2;
554 /* bp + si + offset */
555 if ((sib_reg1 == 5) && (reg2 == 6)) {
556 reg1 = 2;
557 /* bp + di + offset */
558 } else if ((sib_reg1 == 5) && (reg2 == 7)) {
559 reg1 = 3;
560 /* bx + si + offset */
561 } else if ((sib_reg1 == 3) && (reg2 == 6)) {
562 reg1 = 0;
563 /* bx + di + offset */
564 } else if ((sib_reg1 == 3) && (reg2 == 7)) {
565 reg1 = 1;
566 } else {
567 tcc_error("invalid effective address");
569 if (op->e.v == 0)
570 mod = 0;
571 } else {
572 tcc_error("invalid register");
574 g(mod + (reg << 3) + reg1);
576 #endif
577 /* add offset */
578 if (mod == 0x40) {
579 g(op->e.v);
580 } else if (mod == 0x80 || op->reg == -1) {
581 #ifdef I386_ASM_16
582 if (tcc_state->seg_size == 16)
583 gen_expr16(&op->e);
584 else if (tcc_state->seg_size == 32)
585 #endif
586 gen_expr32(&op->e);
591 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
593 const ASMInstr *pa;
594 int i, modrm_index, reg, v, op1, is_short_jmp, seg_prefix;
595 int nb_ops, s;
596 Operand ops[MAX_OPERANDS], *pop;
597 int op_type[3]; /* decoded op type */
598 int alltypes; /* OR of all operand types */
599 int autosize;
600 #ifdef I386_ASM_16
601 static int a32 = 0, o32 = 0, addr32 = 0, data32 = 0;
602 #endif
604 /* force synthetic ';' after prefix instruction, so we can handle */
605 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
606 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
607 unget_tok(';');
609 /* get operands */
610 pop = ops;
611 nb_ops = 0;
612 seg_prefix = 0;
613 alltypes = 0;
614 for(;;) {
615 if (tok == ';' || tok == TOK_LINEFEED)
616 break;
617 if (nb_ops >= MAX_OPERANDS) {
618 tcc_error("incorrect number of operands");
620 parse_operand(s1, pop);
621 if (tok == ':') {
622 if (pop->type != OP_SEG || seg_prefix)
623 tcc_error("incorrect prefix");
624 seg_prefix = segment_prefixes[pop->reg];
625 next();
626 parse_operand(s1, pop);
627 #ifndef I386_ASM_16
628 if (!(pop->type & OP_EA)) {
629 tcc_error("segment prefix must be followed by memory reference");
631 #endif
633 pop++;
634 nb_ops++;
635 if (tok != ',')
636 break;
637 next();
640 is_short_jmp = 0;
641 s = 0; /* avoid warning */
643 /* optimize matching by using a lookup table (no hashing is needed
644 !) */
645 for(pa = asm_instrs; pa->sym != 0; pa++) {
646 s = 0;
647 if (pa->instr_type & OPC_FARITH) {
648 v = opcode - pa->sym;
649 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
650 continue;
651 } else if (pa->instr_type & OPC_ARITH) {
652 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
653 continue;
654 s = (opcode - pa->sym) % NBWLX;
655 } else if (pa->instr_type & OPC_SHIFT) {
656 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
657 continue;
658 s = (opcode - pa->sym) % NBWLX;
659 } else if (pa->instr_type & OPC_TEST) {
660 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
661 continue;
662 /* cmovxx is a test opcode but accepts multiple sizes.
663 TCC doesn't accept the suffixed mnemonic, instead we
664 simply force size autodetection always. */
665 if (pa->instr_type & OPC_WLX)
666 s = NBWLX - 1;
667 } else if (pa->instr_type & OPC_B) {
668 #ifdef TCC_TARGET_X86_64
669 /* Some instructions don't have the full size but only
670 bwl form. insb e.g. */
671 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
672 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
673 continue;
674 #endif
675 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
676 continue;
677 s = opcode - pa->sym;
678 } else if (pa->instr_type & OPC_WLX) {
679 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
680 continue;
681 s = opcode - pa->sym + 1;
682 } else {
683 if (pa->sym != opcode)
684 continue;
686 if (pa->nb_ops != nb_ops)
687 continue;
688 /* now decode and check each operand */
689 alltypes = 0;
690 for(i = 0; i < nb_ops; i++) {
691 int op1, op2;
692 op1 = pa->op_type[i];
693 op2 = op1 & 0x1f;
694 switch(op2) {
695 case OPT_IM:
696 v = OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64;
697 break;
698 case OPT_REG:
699 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
700 break;
701 case OPT_REGW:
702 v = OP_REG16 | OP_REG32 | OP_REG64;
703 break;
704 case OPT_IMW:
705 v = OP_IM16 | OP_IM32 | OP_IM64;
706 break;
707 #ifdef TCC_TARGET_X86_64
708 case OPT_IMNO64:
709 v = OP_IM16 | OP_IM32;
710 break;
711 #endif
712 default:
713 v = 1 << op2;
714 break;
716 if (op1 & OPT_EA)
717 v |= OP_EA;
718 op_type[i] = v;
719 if ((ops[i].type & v) == 0)
720 goto next;
721 alltypes |= ops[i].type;
723 /* all is matching ! */
724 break;
725 next: ;
727 if (pa->sym == 0) {
728 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
729 int b;
730 b = op0_codes[opcode - TOK_ASM_first];
731 #ifdef I386_ASM_16
732 if (opcode == TOK_ASM_o32) {
733 if (s1->seg_size == 32)
734 tcc_error("incorrect prefix");
735 else
736 o32 = data32 = 1;
737 } else if (opcode == TOK_ASM_a32) {
738 if (s1->seg_size == 32)
739 tcc_error("incorrect prefix");
740 else
741 a32 = addr32 = 1;
743 #endif
744 if (b & 0xff00)
745 g(b >> 8);
746 g(b);
747 return;
748 } else if (opcode <= TOK_ASM_alllast) {
749 tcc_error("bad operand with opcode '%s'",
750 get_tok_str(opcode, NULL));
751 } else {
752 tcc_error("unknown opcode '%s'",
753 get_tok_str(opcode, NULL));
756 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
757 autosize = NBWLX-1;
758 #ifdef TCC_TARGET_X86_64
759 /* XXX the autosize should rather be zero, to not have to adjust this
760 all the time. */
761 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ)
762 autosize = NBWLX-2;
763 #endif
764 if (s == autosize) {
765 for(i = 0; s == autosize && i < nb_ops; i++) {
766 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
767 s = reg_to_size[ops[i].type & OP_REG];
769 if (s == autosize) {
770 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
771 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32 | OP_IM64)))
772 s = 2;
773 else
774 tcc_error("cannot infer opcode suffix");
778 #ifdef I386_ASM_16
779 for(i = 0; i < nb_ops; i++) {
780 if (ops[i].type & OP_REG32) {
781 if (s1->seg_size == 16)
782 o32 = 1;
783 } else if (!(ops[i].type & OP_REG32)) {
784 if (s1->seg_size == 32)
785 o32 = 1;
790 if (s == 1 || (pa->instr_type & OPC_D16)) {
791 if (s1->seg_size == 32)
792 o32 = 1;
793 } else if (s == 2) {
794 if (s1->seg_size == 16) {
795 if (!(pa->instr_type & OPC_D16))
796 o32 = 1;
800 /* generate a16/a32 prefix if needed */
801 if ((a32 == 1) && (addr32 == 0))
802 g(0x67);
803 /* generate o16/o32 prefix if needed */
804 if ((o32 == 1) && (data32 == 0))
805 g(0x66);
807 addr32 = data32 = 0;
808 #else
809 #ifdef TCC_TARGET_X86_64
810 /* Generate addr32 prefix if needed */
811 for(i = 0; i < nb_ops; i++) {
812 if (ops[i].type & OP_EA32) {
813 g(0x67);
814 break;
817 #endif
818 /* generate data16 prefix if needed */
819 if (s == 1 || (pa->instr_type & OPC_D16))
820 g(0x66);
821 #ifdef TCC_TARGET_X86_64
822 if (s == 3 || (alltypes & OP_REG64)) {
823 /* generate REX prefix */
824 int default64 = 0;
825 for(i = 0; i < nb_ops; i++) {
826 if (op_type[i] == OP_REG64) {
827 /* If only 64bit regs are accepted in one operand
828 this is a default64 instruction without need for
829 REX prefixes. */
830 default64 = 1;
831 break;
834 /* XXX find better encoding for the default64 instructions. */
835 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
836 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
837 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
838 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
839 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
840 && !default64)
841 g(0x48);
843 #endif
844 #endif
846 /* now generates the operation */
847 if (pa->instr_type & OPC_FWAIT)
848 g(0x9b);
849 if (seg_prefix)
850 g(seg_prefix);
852 v = pa->opcode;
853 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
854 /* kludge for imul $im, %reg */
855 nb_ops = 3;
856 ops[2] = ops[1];
857 op_type[2] = op_type[1];
858 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
859 v--; /* int $3 case */
860 nb_ops = 0;
861 } else if ((v == 0x06 || v == 0x07)) {
862 if (ops[0].reg >= 4) {
863 /* push/pop %fs or %gs */
864 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
865 } else {
866 v += ops[0].reg << 3;
868 nb_ops = 0;
869 } else if (v <= 0x05) {
870 /* arith case */
871 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
872 } else if ((pa->instr_type & (OPC_FARITH | OPC_MODRM)) == OPC_FARITH) {
873 /* fpu arith case */
874 v += ((opcode - pa->sym) / 6) << 3;
876 if (pa->instr_type & OPC_REG) {
877 for(i = 0; i < nb_ops; i++) {
878 if (op_type[i] & (OP_REG | OP_ST)) {
879 v += ops[i].reg;
880 break;
883 /* mov $im, %reg case */
884 if (pa->opcode == 0xb0 && s >= 1)
885 v += 7;
887 if (pa->instr_type & OPC_B)
888 v += s >= 1;
889 if (pa->instr_type & OPC_TEST)
890 v += test_bits[opcode - pa->sym];
891 if (pa->instr_type & OPC_SHORTJMP) {
892 Sym *sym;
893 int jmp_disp;
895 /* see if we can really generate the jump with a byte offset */
896 sym = ops[0].e.sym;
897 if (!sym)
898 goto no_short_jump;
899 if (sym->r != cur_text_section->sh_num)
900 goto no_short_jump;
901 jmp_disp = ops[0].e.v + sym->jnext - ind - 2 - (v >= 0xff);
902 if (jmp_disp == (int8_t)jmp_disp) {
903 /* OK to generate jump */
904 is_short_jmp = 1;
905 ops[0].e.v = jmp_disp;
906 } else {
907 no_short_jump:
908 if (pa->instr_type & OPC_JMP) {
909 /* long jump will be allowed. need to modify the
910 opcode slightly */
911 if (v == 0xeb)
912 v = 0xe9;
913 else
914 v += 0x0f10;
915 } else {
916 tcc_error("invalid displacement");
920 op1 = v >> 8;
921 if (op1)
922 g(op1);
923 g(v);
925 /* search which operand will used for modrm */
926 modrm_index = 0;
927 if (pa->instr_type & OPC_SHIFT) {
928 reg = (opcode - pa->sym) / NBWLX;
929 if (reg == 6)
930 reg = 7;
931 } else if (pa->instr_type & OPC_ARITH) {
932 reg = (opcode - pa->sym) / NBWLX;
933 } else if (pa->instr_type & OPC_FARITH) {
934 reg = (opcode - pa->sym) / 6;
935 } else {
936 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
938 if (pa->instr_type & OPC_MODRM) {
939 /* first look for an ea operand */
940 for(i = 0;i < nb_ops; i++) {
941 if (op_type[i] & OP_EA)
942 goto modrm_found;
944 /* then if not found, a register or indirection (shift instructions) */
945 for(i = 0;i < nb_ops; i++) {
946 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
947 goto modrm_found;
949 #ifdef ASM_DEBUG
950 tcc_error("bad op table");
951 #endif
952 modrm_found:
953 modrm_index = i;
954 /* if a register is used in another operand then it is
955 used instead of group */
956 for(i = 0;i < nb_ops; i++) {
957 v = op_type[i];
958 if (i != modrm_index &&
959 (v & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
960 reg = ops[i].reg;
961 break;
965 asm_modrm(reg, &ops[modrm_index]);
968 /* emit constants */
969 #ifndef TCC_TARGET_X86_64
970 if (pa->opcode == 0x9a || pa->opcode == 0xea) {
971 /* ljmp or lcall kludge */
972 #ifdef I386_ASM_16
973 if (s1->seg_size == 16 && o32 == 0)
974 gen_expr16(&ops[1].e);
975 else
976 #endif
977 gen_expr32(&ops[1].e);
978 if (ops[0].e.sym)
979 tcc_error("cannot relocate");
980 gen_le16(ops[0].e.v);
981 return;
983 #endif
984 for(i = 0;i < nb_ops; i++) {
985 v = op_type[i];
986 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
987 /* if multiple sizes are given it means we must look
988 at the op size */
989 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
990 if (s == 0)
991 v = OP_IM8;
992 else if (s == 1)
993 v = OP_IM16;
994 else if (s == 2 || (v & OP_IM64) == 0)
995 v = OP_IM32;
996 else
997 v = OP_IM64;
999 if (v & (OP_IM8 | OP_IM8S)) {
1000 if (ops[i].e.sym)
1001 goto error_relocate;
1002 g(ops[i].e.v);
1003 } else if (v & OP_IM16) {
1004 #ifdef I386_ASM_16
1005 if (s1->seg_size == 16)
1006 gen_expr16(&ops[i].e);
1007 else
1008 #endif
1009 if (ops[i].e.sym)
1010 error_relocate:
1011 tcc_error("cannot relocate");
1012 else
1013 gen_le16(ops[i].e.v);
1014 } else {
1015 if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) {
1016 if (is_short_jmp)
1017 g(ops[i].e.v);
1018 #ifdef I386_ASM_16
1019 else if (s1->seg_size == 16)
1020 gen_disp16(&ops[i].e);
1021 #endif
1022 else
1023 gen_disp32(&ops[i].e);
1024 } else {
1025 #ifdef I386_ASM_16
1026 if (s1->seg_size == 16 && !((o32 == 1) && (v & OP_IM32)))
1027 gen_expr16(&ops[i].e);
1028 else
1029 #endif
1030 #ifdef TCC_TARGET_X86_64
1031 if (v & OP_IM64)
1032 gen_expr64(&ops[i].e);
1033 else
1034 #endif
1035 gen_expr32(&ops[i].e);
1038 #ifdef I386_ASM_16
1039 } else if (v & (OP_REG16 | OP_REG32)) {
1040 if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) {
1041 /* jmp $r */
1042 g(0xE0 + ops[i].reg);
1044 #endif
1045 #ifdef TCC_TARGET_X86_64
1046 } else if (v & (OP_REG32 | OP_REG64)) {
1047 if (pa->instr_type & (OPC_JMP | OPC_SHORTJMP)) {
1048 /* jmp $r */
1049 g(0xE0 + ops[i].reg);
1051 #endif
1054 #ifdef I386_ASM_16
1055 a32 = o32 = 0;
1056 #endif
1059 /* return the constraint priority (we allocate first the lowest
1060 numbered constraints) */
1061 static inline int constraint_priority(const char *str)
1063 int priority, c, pr;
1065 /* we take the lowest priority */
1066 priority = 0;
1067 for(;;) {
1068 c = *str;
1069 if (c == '\0')
1070 break;
1071 str++;
1072 switch(c) {
1073 case 'A':
1074 pr = 0;
1075 break;
1076 case 'a':
1077 case 'b':
1078 case 'c':
1079 case 'd':
1080 case 'S':
1081 case 'D':
1082 pr = 1;
1083 break;
1084 case 'q':
1085 pr = 2;
1086 break;
1087 case 'r':
1088 pr = 3;
1089 break;
1090 case 'N':
1091 case 'M':
1092 case 'I':
1093 case 'i':
1094 case 'm':
1095 case 'g':
1096 pr = 4;
1097 break;
1098 default:
1099 tcc_error("unknown constraint '%c'", c);
1100 pr = 0;
1102 if (pr > priority)
1103 priority = pr;
1105 return priority;
1108 static const char *skip_constraint_modifiers(const char *p)
1110 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
1111 p++;
1112 return p;
1115 #define REG_OUT_MASK 0x01
1116 #define REG_IN_MASK 0x02
1118 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
1120 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
1121 int nb_operands, int nb_outputs,
1122 const uint8_t *clobber_regs,
1123 int *pout_reg)
1125 ASMOperand *op;
1126 int sorted_op[MAX_ASM_OPERANDS];
1127 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
1128 const char *str;
1129 uint8_t regs_allocated[NB_ASM_REGS];
1131 /* init fields */
1132 for(i=0;i<nb_operands;i++) {
1133 op = &operands[i];
1134 op->input_index = -1;
1135 op->ref_index = -1;
1136 op->reg = -1;
1137 op->is_memory = 0;
1138 op->is_rw = 0;
1140 /* compute constraint priority and evaluate references to output
1141 constraints if input constraints */
1142 for(i=0;i<nb_operands;i++) {
1143 op = &operands[i];
1144 str = op->constraint;
1145 str = skip_constraint_modifiers(str);
1146 if (isnum(*str) || *str == '[') {
1147 /* this is a reference to another constraint */
1148 k = find_constraint(operands, nb_operands, str, NULL);
1149 if ((unsigned)k >= i || i < nb_outputs)
1150 tcc_error("invalid reference in constraint %d ('%s')",
1151 i, str);
1152 op->ref_index = k;
1153 if (operands[k].input_index >= 0)
1154 tcc_error("cannot reference twice the same operand");
1155 operands[k].input_index = i;
1156 op->priority = 5;
1157 } else {
1158 op->priority = constraint_priority(str);
1162 /* sort operands according to their priority */
1163 for(i=0;i<nb_operands;i++)
1164 sorted_op[i] = i;
1165 for(i=0;i<nb_operands - 1;i++) {
1166 for(j=i+1;j<nb_operands;j++) {
1167 p1 = operands[sorted_op[i]].priority;
1168 p2 = operands[sorted_op[j]].priority;
1169 if (p2 < p1) {
1170 tmp = sorted_op[i];
1171 sorted_op[i] = sorted_op[j];
1172 sorted_op[j] = tmp;
1177 for(i = 0;i < NB_ASM_REGS; i++) {
1178 if (clobber_regs[i])
1179 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
1180 else
1181 regs_allocated[i] = 0;
1183 /* esp cannot be used */
1184 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1185 /* ebp cannot be used yet */
1186 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1188 /* allocate registers and generate corresponding asm moves */
1189 for(i=0;i<nb_operands;i++) {
1190 j = sorted_op[i];
1191 op = &operands[j];
1192 str = op->constraint;
1193 /* no need to allocate references */
1194 if (op->ref_index >= 0)
1195 continue;
1196 /* select if register is used for output, input or both */
1197 if (op->input_index >= 0) {
1198 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1199 } else if (j < nb_outputs) {
1200 reg_mask = REG_OUT_MASK;
1201 } else {
1202 reg_mask = REG_IN_MASK;
1204 try_next:
1205 c = *str++;
1206 switch(c) {
1207 case '=':
1208 goto try_next;
1209 case '+':
1210 op->is_rw = 1;
1211 /* FALL THRU */
1212 case '&':
1213 if (j >= nb_outputs)
1214 tcc_error("'%c' modifier can only be applied to outputs", c);
1215 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1216 goto try_next;
1217 case 'A':
1218 /* allocate both eax and edx */
1219 if (is_reg_allocated(TREG_XAX) ||
1220 is_reg_allocated(TREG_XDX))
1221 goto try_next;
1222 op->is_llong = 1;
1223 op->reg = TREG_XAX;
1224 regs_allocated[TREG_XAX] |= reg_mask;
1225 regs_allocated[TREG_XDX] |= reg_mask;
1226 break;
1227 case 'a':
1228 reg = TREG_XAX;
1229 goto alloc_reg;
1230 case 'b':
1231 reg = 3;
1232 goto alloc_reg;
1233 case 'c':
1234 reg = TREG_XCX;
1235 goto alloc_reg;
1236 case 'd':
1237 reg = TREG_XDX;
1238 goto alloc_reg;
1239 case 'S':
1240 reg = 6;
1241 goto alloc_reg;
1242 case 'D':
1243 reg = 7;
1244 alloc_reg:
1245 if (is_reg_allocated(reg))
1246 goto try_next;
1247 goto reg_found;
1248 case 'q':
1249 /* eax, ebx, ecx or edx */
1250 for(reg = 0; reg < 4; reg++) {
1251 if (!is_reg_allocated(reg))
1252 goto reg_found;
1254 goto try_next;
1255 case 'r':
1256 /* any general register */
1257 for(reg = 0; reg < 8; reg++) {
1258 if (!is_reg_allocated(reg))
1259 goto reg_found;
1261 goto try_next;
1262 reg_found:
1263 /* now we can reload in the register */
1264 op->is_llong = 0;
1265 op->reg = reg;
1266 regs_allocated[reg] |= reg_mask;
1267 break;
1268 case 'i':
1269 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1270 goto try_next;
1271 break;
1272 case 'I':
1273 case 'N':
1274 case 'M':
1275 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1276 goto try_next;
1277 break;
1278 case 'm':
1279 case 'g':
1280 /* nothing special to do because the operand is already in
1281 memory, except if the pointer itself is stored in a
1282 memory variable (VT_LLOCAL case) */
1283 /* XXX: fix constant case */
1284 /* if it is a reference to a memory zone, it must lie
1285 in a register, so we reserve the register in the
1286 input registers and a load will be generated
1287 later */
1288 if (j < nb_outputs || c == 'm') {
1289 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1290 /* any general register */
1291 for(reg = 0; reg < 8; reg++) {
1292 if (!(regs_allocated[reg] & REG_IN_MASK))
1293 goto reg_found1;
1295 goto try_next;
1296 reg_found1:
1297 /* now we can reload in the register */
1298 regs_allocated[reg] |= REG_IN_MASK;
1299 op->reg = reg;
1300 op->is_memory = 1;
1303 break;
1304 default:
1305 tcc_error("asm constraint %d ('%s') could not be satisfied",
1306 j, op->constraint);
1307 break;
1309 /* if a reference is present for that operand, we assign it too */
1310 if (op->input_index >= 0) {
1311 operands[op->input_index].reg = op->reg;
1312 operands[op->input_index].is_llong = op->is_llong;
1316 /* compute out_reg. It is used to store outputs registers to memory
1317 locations references by pointers (VT_LLOCAL case) */
1318 *pout_reg = -1;
1319 for(i=0;i<nb_operands;i++) {
1320 op = &operands[i];
1321 if (op->reg >= 0 &&
1322 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1323 !op->is_memory) {
1324 for(reg = 0; reg < 8; reg++) {
1325 if (!(regs_allocated[reg] & REG_OUT_MASK))
1326 goto reg_found2;
1328 tcc_error("could not find free output register for reloading");
1329 reg_found2:
1330 *pout_reg = reg;
1331 break;
1335 /* print sorted constraints */
1336 #ifdef ASM_DEBUG
1337 for(i=0;i<nb_operands;i++) {
1338 j = sorted_op[i];
1339 op = &operands[j];
1340 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1342 op->id ? get_tok_str(op->id, NULL) : "",
1343 op->constraint,
1344 op->vt->r,
1345 op->reg);
1347 if (*pout_reg >= 0)
1348 printf("out_reg=%d\n", *pout_reg);
1349 #endif
1352 ST_FUNC void subst_asm_operand(CString *add_str,
1353 SValue *sv, int modifier)
1355 int r, reg, size, val;
1356 char buf[64];
1358 r = sv->r;
1359 if ((r & VT_VALMASK) == VT_CONST) {
1360 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n')
1361 cstr_ccat(add_str, '$');
1362 if (r & VT_SYM) {
1363 cstr_cat(add_str, get_tok_str(sv->sym->v, NULL), -1);
1364 if ((uint32_t)sv->c.i != 0) {
1365 cstr_ccat(add_str, '+');
1366 } else {
1367 return;
1370 val = sv->c.i;
1371 if (modifier == 'n')
1372 val = -val;
1373 snprintf(buf, sizeof(buf), "%d", (int)sv->c.i);
1374 cstr_cat(add_str, buf, -1);
1375 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1376 snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i);
1377 cstr_cat(add_str, buf, -1);
1378 } else if (r & VT_LVAL) {
1379 reg = r & VT_VALMASK;
1380 if (reg >= VT_CONST)
1381 tcc_error("internal compiler error");
1382 snprintf(buf, sizeof(buf), "(%%%s)",
1383 get_tok_str(TOK_ASM_eax + reg, NULL));
1384 cstr_cat(add_str, buf, -1);
1385 } else {
1386 /* register case */
1387 reg = r & VT_VALMASK;
1388 if (reg >= VT_CONST)
1389 tcc_error("internal compiler error");
1391 /* choose register operand size */
1392 if ((sv->type.t & VT_BTYPE) == VT_BYTE)
1393 size = 1;
1394 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1395 size = 2;
1396 #ifdef TCC_TARGET_X86_64
1397 else if ((sv->type.t & VT_BTYPE) == VT_LLONG)
1398 size = 8;
1399 #endif
1400 else
1401 size = 4;
1402 if (size == 1 && reg >= 4)
1403 size = 4;
1405 if (modifier == 'b') {
1406 if (reg >= 4)
1407 tcc_error("cannot use byte register");
1408 size = 1;
1409 } else if (modifier == 'h') {
1410 if (reg >= 4)
1411 tcc_error("cannot use byte register");
1412 size = -1;
1413 } else if (modifier == 'w') {
1414 size = 2;
1415 #ifdef TCC_TARGET_X86_64
1416 } else if (modifier == 'q') {
1417 size = 8;
1418 #endif
1421 switch(size) {
1422 case -1:
1423 reg = TOK_ASM_ah + reg;
1424 break;
1425 case 1:
1426 reg = TOK_ASM_al + reg;
1427 break;
1428 case 2:
1429 reg = TOK_ASM_ax + reg;
1430 break;
1431 default:
1432 reg = TOK_ASM_eax + reg;
1433 break;
1434 #ifdef TCC_TARGET_X86_64
1435 case 8:
1436 reg = TOK_ASM_rax + reg;
1437 break;
1438 #endif
1440 snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL));
1441 cstr_cat(add_str, buf, -1);
1445 /* generate prolog and epilog code for asm statement */
1446 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1447 int nb_outputs, int is_output,
1448 uint8_t *clobber_regs,
1449 int out_reg)
1451 uint8_t regs_allocated[NB_ASM_REGS];
1452 ASMOperand *op;
1453 int i, reg;
1454 static uint8_t reg_saved[NB_SAVED_REGS] = { 3, 6, 7 };
1456 /* mark all used registers */
1457 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1458 for(i = 0; i < nb_operands;i++) {
1459 op = &operands[i];
1460 if (op->reg >= 0)
1461 regs_allocated[op->reg] = 1;
1463 if (!is_output) {
1464 /* generate reg save code */
1465 for(i = 0; i < NB_SAVED_REGS; i++) {
1466 reg = reg_saved[i];
1467 if (regs_allocated[reg]) {
1468 #ifdef I386_ASM_16
1469 if (tcc_state->seg_size == 16)
1470 g(0x66);
1471 #endif
1472 g(0x50 + reg);
1476 /* generate load code */
1477 for(i = 0; i < nb_operands; i++) {
1478 op = &operands[i];
1479 if (op->reg >= 0) {
1480 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1481 op->is_memory) {
1482 /* memory reference case (for both input and
1483 output cases) */
1484 SValue sv;
1485 sv = *op->vt;
1486 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1487 load(op->reg, &sv);
1488 } else if (i >= nb_outputs || op->is_rw) {
1489 /* load value in register */
1490 load(op->reg, op->vt);
1491 if (op->is_llong) {
1492 SValue sv;
1493 sv = *op->vt;
1494 sv.c.i += 4;
1495 load(TREG_XDX, &sv);
1500 } else {
1501 /* generate save code */
1502 for(i = 0 ; i < nb_outputs; i++) {
1503 op = &operands[i];
1504 if (op->reg >= 0) {
1505 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1506 if (!op->is_memory) {
1507 SValue sv;
1508 sv = *op->vt;
1509 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1510 load(out_reg, &sv);
1512 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1513 store(op->reg, &sv);
1515 } else {
1516 store(op->reg, op->vt);
1517 if (op->is_llong) {
1518 SValue sv;
1519 sv = *op->vt;
1520 sv.c.i += 4;
1521 store(TREG_XDX, &sv);
1526 /* generate reg restore code */
1527 for(i = NB_SAVED_REGS - 1; i >= 0; i--) {
1528 reg = reg_saved[i];
1529 if (regs_allocated[reg]) {
1530 #ifdef I386_ASM_16
1531 if (tcc_state->seg_size == 16)
1532 g(0x66);
1533 #endif
1534 g(0x58 + reg);
1540 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1542 int reg;
1543 TokenSym *ts;
1545 if (!strcmp(str, "memory") ||
1546 !strcmp(str, "cc"))
1547 return;
1548 ts = tok_alloc(str, strlen(str));
1549 reg = ts->tok;
1550 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1551 reg -= TOK_ASM_eax;
1552 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1553 reg -= TOK_ASM_ax;
1554 #ifdef TCC_TARGET_X86_64
1555 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1556 reg -= TOK_ASM_rax;
1557 #endif
1558 } else {
1559 tcc_error("invalid clobber register '%s'", str);
1561 clobber_regs[reg] = 1;