fix UB in constant folding of double -> signed integer conversion
[tinycc.git] / i386-asm.c
blob33783d5f5b41938b98038e97c32e7de7ecffce0b
1 /*
2 * i386 specific functions for TCC assembler
4 * Copyright (c) 2001, 2002 Fabrice Bellard
5 * Copyright (c) 2009 Frédéric Feret (x86_64 support)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #define USING_GLOBALS
23 #include "tcc.h"
25 #define MAX_OPERANDS 3
27 #define TOK_ASM_first TOK_ASM_clc
28 #define TOK_ASM_last TOK_ASM_emms
29 #define TOK_ASM_alllast TOK_ASM_subps
31 #define OPC_B 0x01 /* only used with OPC_WL */
32 #define OPC_WL 0x02 /* accepts w, l or no suffix */
33 #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */
34 #define OPC_REG 0x04 /* register is added to opcode */
35 #define OPC_MODRM 0x08 /* modrm encoding */
37 #define OPCT_MASK 0x70
38 #define OPC_FWAIT 0x10 /* add fwait opcode */
39 #define OPC_SHIFT 0x20 /* shift opcodes */
40 #define OPC_ARITH 0x30 /* arithmetic opcodes */
41 #define OPC_FARITH 0x40 /* FPU arithmetic opcodes */
42 #define OPC_TEST 0x50 /* test opcodes */
43 #define OPC_0F01 0x60 /* 0x0f01XX (group 7, XX is 2nd opcode,
44 no operands and unstructured mod/rm) */
45 #define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i))
47 #define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */
48 #define OPC_48 0x200 /* Always has REX prefix */
49 #ifdef TCC_TARGET_X86_64
50 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */
51 # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */
52 # define OPC_WLX OPC_WLQ
53 # define OPC_BWLX OPC_BWLQ
54 #else
55 # define OPC_WLX OPC_WL
56 # define OPC_BWLX OPC_BWL
57 #endif
59 #define OPC_GROUP_SHIFT 13
61 /* in order to compress the operand type, we use specific operands and
62 we or only with EA */
63 enum {
64 OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */
65 OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */
66 OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */
67 #ifdef TCC_TARGET_X86_64
68 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */
69 #endif
70 OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */
71 OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */
72 OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */
73 OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */
74 OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */
75 OPT_SEG,
76 OPT_ST,
77 #ifdef TCC_TARGET_X86_64
78 OPT_REG8_LOW, /* %spl,%bpl,%sil,%dil, encoded like ah,ch,dh,bh, but
79 with REX prefix, not used in insn templates */
80 #endif
81 OPT_IM8,
82 OPT_IM8S,
83 OPT_IM16,
84 OPT_IM32,
85 #ifdef TCC_TARGET_X86_64
86 OPT_IM64,
87 #endif
88 OPT_EAX, /* %al, %ax, %eax or %rax register */
89 OPT_ST0, /* %st(0) register */
90 OPT_CL, /* %cl register */
91 OPT_DX, /* %dx register */
92 OPT_ADDR, /* OP_EA with only offset */
93 OPT_INDIR, /* *(expr) */
94 /* composite types */
95 OPT_COMPOSITE_FIRST,
96 OPT_IM, /* IM8 | IM16 | IM32 */
97 OPT_REG, /* REG8 | REG16 | REG32 | REG64 */
98 OPT_REGW, /* REG16 | REG32 | REG64 */
99 OPT_IMW, /* IM16 | IM32 */
100 OPT_MMXSSE, /* MMX | SSE */
101 OPT_DISP, /* Like OPT_ADDR, but emitted as displacement (for jumps) */
102 OPT_DISP8, /* Like OPT_ADDR, but only 8bit (short jumps) */
103 /* can be ored with any OPT_xxx */
104 OPT_EA = 0x80
107 #define OP_REG8 (1 << OPT_REG8)
108 #define OP_REG16 (1 << OPT_REG16)
109 #define OP_REG32 (1 << OPT_REG32)
110 #define OP_MMX (1 << OPT_MMX)
111 #define OP_SSE (1 << OPT_SSE)
112 #define OP_CR (1 << OPT_CR)
113 #define OP_TR (1 << OPT_TR)
114 #define OP_DB (1 << OPT_DB)
115 #define OP_SEG (1 << OPT_SEG)
116 #define OP_ST (1 << OPT_ST)
117 #define OP_IM8 (1 << OPT_IM8)
118 #define OP_IM8S (1 << OPT_IM8S)
119 #define OP_IM16 (1 << OPT_IM16)
120 #define OP_IM32 (1 << OPT_IM32)
121 #define OP_EAX (1 << OPT_EAX)
122 #define OP_ST0 (1 << OPT_ST0)
123 #define OP_CL (1 << OPT_CL)
124 #define OP_DX (1 << OPT_DX)
125 #define OP_ADDR (1 << OPT_ADDR)
126 #define OP_INDIR (1 << OPT_INDIR)
127 #ifdef TCC_TARGET_X86_64
128 # define OP_REG64 (1 << OPT_REG64)
129 # define OP_REG8_LOW (1 << OPT_REG8_LOW)
130 # define OP_IM64 (1 << OPT_IM64)
131 # define OP_EA32 (OP_EA << 1)
132 #else
133 # define OP_REG64 0
134 # define OP_REG8_LOW 0
135 # define OP_IM64 0
136 # define OP_EA32 0
137 #endif
139 #define OP_EA 0x40000000
140 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64)
142 #ifdef TCC_TARGET_X86_64
143 # define TREG_XAX TREG_RAX
144 # define TREG_XCX TREG_RCX
145 # define TREG_XDX TREG_RDX
146 # define TOK_ASM_xax TOK_ASM_rax
147 #else
148 # define TREG_XAX TREG_EAX
149 # define TREG_XCX TREG_ECX
150 # define TREG_XDX TREG_EDX
151 # define TOK_ASM_xax TOK_ASM_eax
152 #endif
154 typedef struct ASMInstr {
155 uint16_t sym;
156 uint16_t opcode;
157 uint16_t instr_type;
158 uint8_t nb_ops;
159 uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */
160 } ASMInstr;
162 typedef struct Operand {
163 uint32_t type;
164 int8_t reg; /* register, -1 if none */
165 int8_t reg2; /* second register, -1 if none */
166 uint8_t shift;
167 ExprValue e;
168 } Operand;
170 static const uint8_t reg_to_size[9] = {
172 [OP_REG8] = 0,
173 [OP_REG16] = 1,
174 [OP_REG32] = 2,
175 #ifdef TCC_TARGET_X86_64
176 [OP_REG64] = 3,
177 #endif
179 0, 0, 1, 0, 2, 0, 0, 0, 3
182 #define NB_TEST_OPCODES 30
184 static const uint8_t test_bits[NB_TEST_OPCODES] = {
185 0x00, /* o */
186 0x01, /* no */
187 0x02, /* b */
188 0x02, /* c */
189 0x02, /* nae */
190 0x03, /* nb */
191 0x03, /* nc */
192 0x03, /* ae */
193 0x04, /* e */
194 0x04, /* z */
195 0x05, /* ne */
196 0x05, /* nz */
197 0x06, /* be */
198 0x06, /* na */
199 0x07, /* nbe */
200 0x07, /* a */
201 0x08, /* s */
202 0x09, /* ns */
203 0x0a, /* p */
204 0x0a, /* pe */
205 0x0b, /* np */
206 0x0b, /* po */
207 0x0c, /* l */
208 0x0c, /* nge */
209 0x0d, /* nl */
210 0x0d, /* ge */
211 0x0e, /* le */
212 0x0e, /* ng */
213 0x0f, /* nle */
214 0x0f, /* g */
217 static const uint8_t segment_prefixes[] = {
218 0x26, /* es */
219 0x2e, /* cs */
220 0x36, /* ss */
221 0x3e, /* ds */
222 0x64, /* fs */
223 0x65 /* gs */
226 static const ASMInstr asm_instrs[] = {
227 #define ALT(x) x
228 /* This removes a 0x0f in the second byte */
229 #define O(o) ((uint64_t) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o)))
230 /* This constructs instr_type from opcode, type and group. */
231 #define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0))
232 #define DEF_ASM_OP0(name, opcode)
233 #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0, { 0 } },
234 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }},
235 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }},
236 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }},
237 #ifdef TCC_TARGET_X86_64
238 # include "x86_64-asm.h"
239 #else
240 # include "i386-asm.h"
241 #endif
242 /* last operation */
243 { 0, },
246 static const uint16_t op0_codes[] = {
247 #define ALT(x)
248 #define DEF_ASM_OP0(x, opcode) opcode,
249 #define DEF_ASM_OP0L(name, opcode, group, instr_type)
250 #define DEF_ASM_OP1(name, opcode, group, instr_type, op0)
251 #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1)
252 #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2)
253 #ifdef TCC_TARGET_X86_64
254 # include "x86_64-asm.h"
255 #else
256 # include "i386-asm.h"
257 #endif
260 static inline int get_reg_shift(TCCState *s1)
262 int shift, v;
263 v = asm_int_expr(s1);
264 switch(v) {
265 case 1:
266 shift = 0;
267 break;
268 case 2:
269 shift = 1;
270 break;
271 case 4:
272 shift = 2;
273 break;
274 case 8:
275 shift = 3;
276 break;
277 default:
278 expect("1, 2, 4 or 8 constant");
279 shift = 0;
280 break;
282 return shift;
285 #ifdef TCC_TARGET_X86_64
286 static int asm_parse_numeric_reg(int t, unsigned int *type)
288 int reg = -1;
289 if (t >= TOK_IDENT && t < tok_ident) {
290 const char *s = table_ident[t - TOK_IDENT]->str;
291 char c;
292 *type = OP_REG64;
293 if (*s == 'c') {
294 s++;
295 *type = OP_CR;
297 if (*s++ != 'r')
298 return -1;
299 /* Don't allow leading '0'. */
300 if ((c = *s++) >= '1' && c <= '9')
301 reg = c - '0';
302 else
303 return -1;
304 if ((c = *s) >= '0' && c <= '5')
305 s++, reg = reg * 10 + c - '0';
306 if (reg > 15)
307 return -1;
308 if ((c = *s) == 0)
310 else if (*type != OP_REG64)
311 return -1;
312 else if (c == 'b' && !s[1])
313 *type = OP_REG8;
314 else if (c == 'w' && !s[1])
315 *type = OP_REG16;
316 else if (c == 'd' && !s[1])
317 *type = OP_REG32;
318 else
319 return -1;
321 return reg;
323 #endif
325 static int asm_parse_reg(unsigned int *type)
327 int reg = 0;
328 *type = 0;
329 if (tok != '%')
330 goto error_32;
331 next();
332 if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) {
333 reg = tok - TOK_ASM_eax;
334 *type = OP_REG32;
335 #ifdef TCC_TARGET_X86_64
336 } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) {
337 reg = tok - TOK_ASM_rax;
338 *type = OP_REG64;
339 } else if (tok == TOK_ASM_rip) {
340 reg = -2; /* Probably should use different escape code. */
341 *type = OP_REG64;
342 } else if ((reg = asm_parse_numeric_reg(tok, type)) >= 0
343 && (*type == OP_REG32 || *type == OP_REG64)) {
345 #endif
346 } else {
347 error_32:
348 expect("register");
350 next();
351 return reg;
354 static void parse_operand(TCCState *s1, Operand *op)
356 ExprValue e;
357 int reg, indir;
358 const char *p;
360 indir = 0;
361 if (tok == '*') {
362 next();
363 indir = OP_INDIR;
366 if (tok == '%') {
367 next();
368 if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) {
369 reg = tok - TOK_ASM_al;
370 op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */
371 op->reg = reg & 7;
372 if ((op->type & OP_REG) && op->reg == TREG_XAX)
373 op->type |= OP_EAX;
374 else if (op->type == OP_REG8 && op->reg == TREG_XCX)
375 op->type |= OP_CL;
376 else if (op->type == OP_REG16 && op->reg == TREG_XDX)
377 op->type |= OP_DX;
378 } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) {
379 op->type = OP_DB;
380 op->reg = tok - TOK_ASM_dr0;
381 } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) {
382 op->type = OP_SEG;
383 op->reg = tok - TOK_ASM_es;
384 } else if (tok == TOK_ASM_st) {
385 op->type = OP_ST;
386 op->reg = 0;
387 next();
388 if (tok == '(') {
389 next();
390 if (tok != TOK_PPNUM)
391 goto reg_error;
392 p = tokc.str.data;
393 reg = p[0] - '0';
394 if ((unsigned)reg >= 8 || p[1] != '\0')
395 goto reg_error;
396 op->reg = reg;
397 next();
398 skip(')');
400 if (op->reg == 0)
401 op->type |= OP_ST0;
402 goto no_skip;
403 #ifdef TCC_TARGET_X86_64
404 } else if (tok >= TOK_ASM_spl && tok <= TOK_ASM_dil) {
405 op->type = OP_REG8 | OP_REG8_LOW;
406 op->reg = 4 + tok - TOK_ASM_spl;
407 } else if ((op->reg = asm_parse_numeric_reg(tok, &op->type)) >= 0) {
409 #endif
410 } else {
411 reg_error:
412 tcc_error("unknown register %%%s", get_tok_str(tok, &tokc));
414 next();
415 no_skip: ;
416 } else if (tok == '$') {
417 /* constant value */
418 next();
419 asm_expr(s1, &e);
420 op->type = OP_IM32;
421 op->e = e;
422 if (!op->e.sym) {
423 if (op->e.v == (uint8_t)op->e.v)
424 op->type |= OP_IM8;
425 if (op->e.v == (int8_t)op->e.v)
426 op->type |= OP_IM8S;
427 if (op->e.v == (uint16_t)op->e.v)
428 op->type |= OP_IM16;
429 #ifdef TCC_TARGET_X86_64
430 if (op->e.v != (int32_t)op->e.v && op->e.v != (uint32_t)op->e.v)
431 op->type = OP_IM64;
432 #endif
434 } else {
435 /* address(reg,reg2,shift) with all variants */
436 op->type = OP_EA;
437 op->reg = -1;
438 op->reg2 = -1;
439 op->shift = 0;
440 if (tok != '(') {
441 asm_expr(s1, &e);
442 op->e = e;
443 } else {
444 next();
445 if (tok == '%') {
446 unget_tok('(');
447 op->e.v = 0;
448 op->e.sym = NULL;
449 } else {
450 /* bracketed offset expression */
451 asm_expr(s1, &e);
452 if (tok != ')')
453 expect(")");
454 next();
455 op->e.v = e.v;
456 op->e.sym = e.sym;
458 op->e.pcrel = 0;
460 if (tok == '(') {
461 unsigned int type = 0;
462 next();
463 if (tok != ',') {
464 op->reg = asm_parse_reg(&type);
466 if (tok == ',') {
467 next();
468 if (tok != ',') {
469 op->reg2 = asm_parse_reg(&type);
471 if (tok == ',') {
472 next();
473 op->shift = get_reg_shift(s1);
476 if (type & OP_REG32)
477 op->type |= OP_EA32;
478 skip(')');
480 if (op->reg == -1 && op->reg2 == -1)
481 op->type |= OP_ADDR;
483 op->type |= indir;
486 /* XXX: unify with C code output ? */
487 ST_FUNC void gen_expr32(ExprValue *pe)
489 if (pe->pcrel)
490 /* If PC-relative, always set VT_SYM, even without symbol,
491 so as to force a relocation to be emitted. */
492 gen_addrpc32(VT_SYM, pe->sym, pe->v + (ind + 4));
493 else
494 gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
497 #ifdef TCC_TARGET_X86_64
498 ST_FUNC void gen_expr64(ExprValue *pe)
500 gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
502 #endif
504 /* XXX: unify with C code output ? */
505 static void gen_disp32(ExprValue *pe)
507 Sym *sym = pe->sym;
508 ElfSym *esym = elfsym(sym);
509 if (esym && esym->st_shndx == cur_text_section->sh_num) {
510 /* same section: we can output an absolute value. Note
511 that the TCC compiler behaves differently here because
512 it always outputs a relocation to ease (future) code
513 elimination in the linker */
514 gen_le32(pe->v + esym->st_value - ind - 4);
515 } else {
516 if (sym && sym->type.t == VT_VOID) {
517 sym->type.t = VT_FUNC;
518 sym->type.ref = NULL;
520 #ifdef TCC_TARGET_X86_64
521 greloca(cur_text_section, sym, ind, R_X86_64_PLT32, pe->v - 4);
522 gen_le32(0);
523 #else
524 gen_addrpc32(VT_SYM, sym, pe->v);
525 #endif
530 /* generate the modrm operand */
531 static inline int asm_modrm(int reg, Operand *op)
533 int mod, reg1, reg2, sib_reg1;
535 if (op->type & (OP_REG | OP_MMX | OP_SSE)) {
536 g(0xc0 + (reg << 3) + op->reg);
537 } else if (op->reg == -1 && op->reg2 == -1) {
538 /* displacement only */
539 #ifdef TCC_TARGET_X86_64
540 g(0x04 + (reg << 3));
541 g(0x25);
542 #else
543 g(0x05 + (reg << 3));
544 #endif
545 gen_expr32(&op->e);
546 #ifdef TCC_TARGET_X86_64
547 } else if (op->reg == -2) {
548 ExprValue *pe = &op->e;
549 g(0x05 + (reg << 3));
550 gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v);
551 return ind;
552 #endif
553 } else {
554 sib_reg1 = op->reg;
555 /* fist compute displacement encoding */
556 if (sib_reg1 == -1) {
557 sib_reg1 = 5;
558 mod = 0x00;
559 } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) {
560 mod = 0x00;
561 } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) {
562 mod = 0x40;
563 } else {
564 mod = 0x80;
566 /* compute if sib byte needed */
567 reg1 = op->reg;
568 if (op->reg2 != -1)
569 reg1 = 4;
570 g(mod + (reg << 3) + reg1);
571 if (reg1 == 4) {
572 /* add sib byte */
573 reg2 = op->reg2;
574 if (reg2 == -1)
575 reg2 = 4; /* indicate no index */
576 g((op->shift << 6) + (reg2 << 3) + sib_reg1);
578 /* add offset */
579 if (mod == 0x40) {
580 g(op->e.v);
581 } else if (mod == 0x80 || op->reg == -1) {
582 gen_expr32(&op->e);
585 return 0;
588 #ifdef TCC_TARGET_X86_64
589 #define REX_W 0x48
590 #define REX_R 0x44
591 #define REX_X 0x42
592 #define REX_B 0x41
594 static void asm_rex(int width64, Operand *ops, int nb_ops, int *op_type,
595 int regi, int rmi)
597 unsigned char rex = width64 ? 0x48 : 0;
598 int saw_high_8bit = 0;
599 int i;
600 if (rmi == -1) {
601 /* No mod/rm byte, but we might have a register op nevertheless
602 (we will add it to the opcode later). */
603 for(i = 0; i < nb_ops; i++) {
604 if (op_type[i] & (OP_REG | OP_ST)) {
605 if (ops[i].reg >= 8) {
606 rex |= REX_B;
607 ops[i].reg -= 8;
608 } else if (ops[i].type & OP_REG8_LOW)
609 rex |= 0x40;
610 else if (ops[i].type & OP_REG8 && ops[i].reg >= 4)
611 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
612 saw_high_8bit = ops[i].reg;
613 break;
616 } else {
617 if (regi != -1) {
618 if (ops[regi].reg >= 8) {
619 rex |= REX_R;
620 ops[regi].reg -= 8;
621 } else if (ops[regi].type & OP_REG8_LOW)
622 rex |= 0x40;
623 else if (ops[regi].type & OP_REG8 && ops[regi].reg >= 4)
624 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
625 saw_high_8bit = ops[regi].reg;
627 if (ops[rmi].type & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_EA)) {
628 if (ops[rmi].reg >= 8) {
629 rex |= REX_B;
630 ops[rmi].reg -= 8;
631 } else if (ops[rmi].type & OP_REG8_LOW)
632 rex |= 0x40;
633 else if (ops[rmi].type & OP_REG8 && ops[rmi].reg >= 4)
634 /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */
635 saw_high_8bit = ops[rmi].reg;
637 if (ops[rmi].type & OP_EA && ops[rmi].reg2 >= 8) {
638 rex |= REX_X;
639 ops[rmi].reg2 -= 8;
642 if (rex) {
643 if (saw_high_8bit)
644 tcc_error("can't encode register %%%ch when REX prefix is required",
645 "acdb"[saw_high_8bit-4]);
646 g(rex);
649 #endif
652 static void maybe_print_stats (void)
654 static int already;
656 if (0 && !already)
657 /* print stats about opcodes */
659 const struct ASMInstr *pa;
660 int freq[4];
661 int op_vals[500];
662 int nb_op_vals, i, j;
664 already = 1;
665 nb_op_vals = 0;
666 memset(freq, 0, sizeof(freq));
667 for(pa = asm_instrs; pa->sym != 0; pa++) {
668 freq[pa->nb_ops]++;
669 //for(i=0;i<pa->nb_ops;i++) {
670 for(j=0;j<nb_op_vals;j++) {
671 //if (pa->op_type[i] == op_vals[j])
672 if (pa->instr_type == op_vals[j])
673 goto found;
675 //op_vals[nb_op_vals++] = pa->op_type[i];
676 op_vals[nb_op_vals++] = pa->instr_type;
677 found: ;
680 for(i=0;i<nb_op_vals;i++) {
681 int v = op_vals[i];
682 //if ((v & (v - 1)) != 0)
683 printf("%3d: %08x\n", i, v);
685 printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n",
686 (int)sizeof(asm_instrs),
687 (int)sizeof(asm_instrs) / (int)sizeof(ASMInstr),
688 freq[0], freq[1], freq[2], freq[3]);
692 ST_FUNC void asm_opcode(TCCState *s1, int opcode)
694 const ASMInstr *pa;
695 int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc, p;
696 int nb_ops, s;
697 Operand ops[MAX_OPERANDS], *pop;
698 int op_type[3]; /* decoded op type */
699 int alltypes; /* OR of all operand types */
700 int autosize;
701 int p66;
702 #ifdef TCC_TARGET_X86_64
703 int rex64;
704 #endif
706 maybe_print_stats();
707 /* force synthetic ';' after prefix instruction, so we can handle */
708 /* one-line things like "rep stosb" instead of only "rep\nstosb" */
709 if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz)
710 unget_tok(';');
712 /* get operands */
713 pop = ops;
714 nb_ops = 0;
715 seg_prefix = 0;
716 alltypes = 0;
717 for(;;) {
718 if (tok == ';' || tok == TOK_LINEFEED)
719 break;
720 if (nb_ops >= MAX_OPERANDS) {
721 tcc_error("incorrect number of operands");
723 parse_operand(s1, pop);
724 if (tok == ':') {
725 if (pop->type != OP_SEG || seg_prefix)
726 tcc_error("incorrect prefix");
727 seg_prefix = segment_prefixes[pop->reg];
728 next();
729 parse_operand(s1, pop);
730 if (!(pop->type & OP_EA)) {
731 tcc_error("segment prefix must be followed by memory reference");
734 pop++;
735 nb_ops++;
736 if (tok != ',')
737 break;
738 next();
741 s = 0; /* avoid warning */
743 again:
744 /* optimize matching by using a lookup table (no hashing is needed
745 !) */
746 for(pa = asm_instrs; pa->sym != 0; pa++) {
747 int it = pa->instr_type & OPCT_MASK;
748 s = 0;
749 if (it == OPC_FARITH) {
750 v = opcode - pa->sym;
751 if (!((unsigned)v < 8 * 6 && (v % 6) == 0))
752 continue;
753 } else if (it == OPC_ARITH) {
754 if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX))
755 continue;
756 s = (opcode - pa->sym) % NBWLX;
757 if ((pa->instr_type & OPC_BWLX) == OPC_WLX)
759 /* We need to reject the xxxb opcodes that we accepted above.
760 Note that pa->sym for WLX opcodes is the 'w' token,
761 to get the 'b' token subtract one. */
762 if (((opcode - pa->sym + 1) % NBWLX) == 0)
763 continue;
764 s++;
766 } else if (it == OPC_SHIFT) {
767 if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX))
768 continue;
769 s = (opcode - pa->sym) % NBWLX;
770 } else if (it == OPC_TEST) {
771 if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES))
772 continue;
773 /* cmovxx is a test opcode but accepts multiple sizes.
774 The suffixes aren't encoded in the table, instead we
775 simply force size autodetection always and deal with suffixed
776 variants below when we don't find e.g. "cmovzl". */
777 if (pa->instr_type & OPC_WLX)
778 s = NBWLX - 1;
779 } else if (pa->instr_type & OPC_B) {
780 #ifdef TCC_TARGET_X86_64
781 /* Some instructions don't have the full size but only
782 bwl form. insb e.g. */
783 if ((pa->instr_type & OPC_WLQ) != OPC_WLQ
784 && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
785 continue;
786 #endif
787 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX))
788 continue;
789 s = opcode - pa->sym;
790 } else if (pa->instr_type & OPC_WLX) {
791 if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1))
792 continue;
793 s = opcode - pa->sym + 1;
794 } else {
795 if (pa->sym != opcode)
796 continue;
798 if (pa->nb_ops != nb_ops)
799 continue;
800 #ifdef TCC_TARGET_X86_64
801 /* Special case for moves. Selecting the IM64->REG64 form
802 should only be done if we really have an >32bit imm64, and that
803 is hardcoded. Ignore it here. */
804 if (pa->opcode == 0xb0 && ops[0].type != OP_IM64
805 && (ops[1].type & OP_REG) == OP_REG64
806 && !(pa->instr_type & OPC_0F))
807 continue;
808 #endif
809 /* now decode and check each operand */
810 alltypes = 0;
811 for(i = 0; i < nb_ops; i++) {
812 int op1, op2;
813 op1 = pa->op_type[i];
814 op2 = op1 & 0x1f;
815 switch(op2) {
816 case OPT_IM:
817 v = OP_IM8 | OP_IM16 | OP_IM32;
818 break;
819 case OPT_REG:
820 v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64;
821 break;
822 case OPT_REGW:
823 v = OP_REG16 | OP_REG32 | OP_REG64;
824 break;
825 case OPT_IMW:
826 v = OP_IM16 | OP_IM32;
827 break;
828 case OPT_MMXSSE:
829 v = OP_MMX | OP_SSE;
830 break;
831 case OPT_DISP:
832 case OPT_DISP8:
833 v = OP_ADDR;
834 break;
835 default:
836 v = 1 << op2;
837 break;
839 if (op1 & OPT_EA)
840 v |= OP_EA;
841 op_type[i] = v;
842 if ((ops[i].type & v) == 0)
843 goto next;
844 alltypes |= ops[i].type;
846 (void)alltypes; /* maybe unused */
847 /* all is matching ! */
848 break;
849 next: ;
851 if (pa->sym == 0) {
852 if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) {
853 int b;
854 b = op0_codes[opcode - TOK_ASM_first];
855 if (b & 0xff00)
856 g(b >> 8);
857 g(b);
858 return;
859 } else if (opcode <= TOK_ASM_alllast) {
860 tcc_error("bad operand with opcode '%s'",
861 get_tok_str(opcode, NULL));
862 } else {
863 /* Special case for cmovcc, we accept size suffixes but ignore
864 them, but we don't want them to blow up our tables. */
865 TokenSym *ts = table_ident[opcode - TOK_IDENT];
866 if (ts->len >= 6
867 && strchr("wlq", ts->str[ts->len-1])
868 && !memcmp(ts->str, "cmov", 4)) {
869 opcode = tok_alloc(ts->str, ts->len-1)->tok;
870 goto again;
872 tcc_error("unknown opcode '%s'", ts->str);
875 /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */
876 autosize = NBWLX-1;
877 #ifdef TCC_TARGET_X86_64
878 /* XXX the autosize should rather be zero, to not have to adjust this
879 all the time. */
880 if ((pa->instr_type & OPC_BWLQ) == OPC_B)
881 autosize = NBWLX-2;
882 #endif
883 if (s == autosize) {
884 /* Check for register operands providing hints about the size.
885 Start from the end, i.e. destination operands. This matters
886 only for opcodes accepting different sized registers, lar and lsl
887 are such opcodes. */
888 for(i = nb_ops - 1; s == autosize && i >= 0; i--) {
889 if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX)))
890 s = reg_to_size[ops[i].type & OP_REG];
892 if (s == autosize) {
893 if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
894 (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32)))
895 s = 2;
896 else if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) &&
897 (ops[0].type & OP_EA))
898 s = NBWLX - 2;
899 else
900 tcc_error("cannot infer opcode suffix");
904 #ifdef TCC_TARGET_X86_64
905 rex64 = 0;
906 if (pa->instr_type & OPC_48)
907 rex64 = 1;
908 else if (s == 3 || (alltypes & OP_REG64)) {
909 /* generate REX prefix */
910 int default64 = 0;
911 for(i = 0; i < nb_ops; i++) {
912 if (op_type[i] == OP_REG64 && pa->opcode != 0xb8) {
913 /* If only 64bit regs are accepted in one operand
914 this is a default64 instruction without need for
915 REX prefixes, except for movabs(0xb8). */
916 default64 = 1;
917 break;
920 /* XXX find better encoding for the default64 instructions. */
921 if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop
922 && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl
923 && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw
924 && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq
925 && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp))
926 && !default64)
927 rex64 = 1;
929 #endif
931 /* now generates the operation */
932 if (OPCT_IS(pa->instr_type, OPC_FWAIT))
933 g(0x9b);
934 if (seg_prefix)
935 g(seg_prefix);
936 #ifdef TCC_TARGET_X86_64
937 /* Generate addr32 prefix if needed */
938 for(i = 0; i < nb_ops; i++) {
939 if (ops[i].type & OP_EA32) {
940 g(0x67);
941 break;
944 #endif
945 /* generate data16 prefix if needed */
946 p66 = 0;
947 if (s == 1)
948 p66 = 1;
949 else {
950 /* accepting mmx+sse in all operands --> needs 0x66 to
951 switch to sse mode. Accepting only sse in an operand --> is
952 already SSE insn and needs 0x66/f2/f3 handling. */
953 for (i = 0; i < nb_ops; i++)
954 if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE)
955 && ops[i].type & OP_SSE)
956 p66 = 1;
958 if (p66)
959 g(0x66);
961 v = pa->opcode;
962 p = v >> 8; /* possibly prefix byte(s) */
963 switch (p) {
964 case 0: break; /* no prefix */
965 case 0x48: break; /* REX, handled elsewhere */
966 case 0x66:
967 case 0x67:
968 case 0xf2:
969 case 0xf3: v = v & 0xff; g(p); break;
970 case 0xd4: case 0xd5: break; /* aam and aad, not prefix, but hardcoded immediate argument "10" */
971 case 0xd8: case 0xd9: case 0xda: case 0xdb: /* x87, no normal prefix */
972 case 0xdc: case 0xdd: case 0xde: case 0xdf: break;
973 default: tcc_error("bad prefix 0x%2x in opcode table", p); break;
975 if (pa->instr_type & OPC_0F)
976 v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff);
977 if ((v == 0x69 || v == 0x6b) && nb_ops == 2) {
978 /* kludge for imul $im, %reg */
979 nb_ops = 3;
980 ops[2] = ops[1];
981 op_type[2] = op_type[1];
982 } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) {
983 v--; /* int $3 case */
984 nb_ops = 0;
985 } else if ((v == 0x06 || v == 0x07)) {
986 if (ops[0].reg >= 4) {
987 /* push/pop %fs or %gs */
988 v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3);
989 } else {
990 v += ops[0].reg << 3;
992 nb_ops = 0;
993 } else if (v <= 0x05) {
994 /* arith case */
995 v += ((opcode - TOK_ASM_addb) / NBWLX) << 3;
996 } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) {
997 /* fpu arith case */
998 v += ((opcode - pa->sym) / 6) << 3;
1001 /* search which operand will be used for modrm */
1002 modrm_index = -1;
1003 modreg_index = -1;
1004 if (pa->instr_type & OPC_MODRM) {
1005 if (!nb_ops) {
1006 /* A modrm opcode without operands is a special case (e.g. mfence).
1007 It has a group and acts as if there's an register operand 0
1008 (ax). */
1009 i = 0;
1010 ops[i].type = OP_REG;
1011 ops[i].reg = 0;
1012 goto modrm_found;
1014 /* first look for an ea operand */
1015 for(i = 0;i < nb_ops; i++) {
1016 if (op_type[i] & OP_EA)
1017 goto modrm_found;
1019 /* then if not found, a register or indirection (shift instructions) */
1020 for(i = 0;i < nb_ops; i++) {
1021 if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR))
1022 goto modrm_found;
1024 #ifdef ASM_DEBUG
1025 tcc_error("bad op table");
1026 #endif
1027 modrm_found:
1028 modrm_index = i;
1029 /* if a register is used in another operand then it is
1030 used instead of group */
1031 for(i = 0;i < nb_ops; i++) {
1032 int t = op_type[i];
1033 if (i != modrm_index &&
1034 (t & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) {
1035 modreg_index = i;
1036 break;
1040 #ifdef TCC_TARGET_X86_64
1041 asm_rex (rex64, ops, nb_ops, op_type, modreg_index, modrm_index);
1042 #endif
1044 if (pa->instr_type & OPC_REG) {
1045 /* mov $im, %reg case */
1046 if (v == 0xb0 && s >= 1)
1047 v += 7;
1048 for(i = 0; i < nb_ops; i++) {
1049 if (op_type[i] & (OP_REG | OP_ST)) {
1050 v += ops[i].reg;
1051 break;
1055 if (pa->instr_type & OPC_B)
1056 v += s >= 1;
1057 if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) {
1058 ElfSym *esym;
1059 int jmp_disp;
1061 /* see if we can really generate the jump with a byte offset */
1062 esym = elfsym(ops[0].e.sym);
1063 if (!esym || esym->st_shndx != cur_text_section->sh_num)
1064 goto no_short_jump;
1065 jmp_disp = ops[0].e.v + esym->st_value - ind - 2 - (v >= 0xff);
1066 if (jmp_disp == (int8_t)jmp_disp) {
1067 /* OK to generate jump */
1068 ops[0].e.sym = 0;
1069 ops[0].e.v = jmp_disp;
1070 op_type[0] = OP_IM8S;
1071 } else {
1072 no_short_jump:
1073 /* long jump will be allowed. need to modify the
1074 opcode slightly */
1075 if (v == 0xeb) /* jmp */
1076 v = 0xe9;
1077 else if (v == 0x70) /* jcc */
1078 v += 0x0f10;
1079 else
1080 tcc_error("invalid displacement");
1083 if (OPCT_IS(pa->instr_type, OPC_TEST))
1084 v += test_bits[opcode - pa->sym];
1085 else if (OPCT_IS(pa->instr_type, OPC_0F01))
1086 v |= 0x0f0100;
1087 op1 = v >> 16;
1088 if (op1)
1089 g(op1);
1090 op1 = (v >> 8) & 0xff;
1091 if (op1)
1092 g(op1);
1093 g(v);
1095 if (OPCT_IS(pa->instr_type, OPC_SHIFT)) {
1096 reg = (opcode - pa->sym) / NBWLX;
1097 if (reg == 6)
1098 reg = 7;
1099 } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) {
1100 reg = (opcode - pa->sym) / NBWLX;
1101 } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) {
1102 reg = (opcode - pa->sym) / 6;
1103 } else {
1104 reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7;
1107 pc = 0;
1108 if (pa->instr_type & OPC_MODRM) {
1109 /* if a register is used in another operand then it is
1110 used instead of group */
1111 if (modreg_index >= 0)
1112 reg = ops[modreg_index].reg;
1113 pc = asm_modrm(reg, &ops[modrm_index]);
1116 /* emit constants */
1117 #ifndef TCC_TARGET_X86_64
1118 if (!(pa->instr_type & OPC_0F)
1119 && (pa->opcode == 0x9a || pa->opcode == 0xea)) {
1120 /* ljmp or lcall kludge */
1121 gen_expr32(&ops[1].e);
1122 if (ops[0].e.sym)
1123 tcc_error("cannot relocate");
1124 gen_le16(ops[0].e.v);
1125 return;
1127 #endif
1128 for(i = 0;i < nb_ops; i++) {
1129 v = op_type[i];
1130 if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) {
1131 /* if multiple sizes are given it means we must look
1132 at the op size */
1133 if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) {
1134 if (s == 0)
1135 v = OP_IM8;
1136 else if (s == 1)
1137 v = OP_IM16;
1138 else if (s == 2 || (v & OP_IM64) == 0)
1139 v = OP_IM32;
1140 else
1141 v = OP_IM64;
1144 if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym)
1145 tcc_error("cannot relocate");
1147 if (v & (OP_IM8 | OP_IM8S)) {
1148 g(ops[i].e.v);
1149 } else if (v & OP_IM16) {
1150 gen_le16(ops[i].e.v);
1151 #ifdef TCC_TARGET_X86_64
1152 } else if (v & OP_IM64) {
1153 gen_expr64(&ops[i].e);
1154 #endif
1155 } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) {
1156 gen_disp32(&ops[i].e);
1157 } else {
1158 gen_expr32(&ops[i].e);
1163 /* after immediate operands, adjust pc-relative address */
1164 if (pc)
1165 add32le(cur_text_section->data + pc - 4, pc - ind);
1168 /* return the constraint priority (we allocate first the lowest
1169 numbered constraints) */
1170 static inline int constraint_priority(const char *str)
1172 int priority, c, pr;
1174 /* we take the lowest priority */
1175 priority = 0;
1176 for(;;) {
1177 c = *str;
1178 if (c == '\0')
1179 break;
1180 str++;
1181 switch(c) {
1182 case 'A':
1183 pr = 0;
1184 break;
1185 case 'a':
1186 case 'b':
1187 case 'c':
1188 case 'd':
1189 case 'S':
1190 case 'D':
1191 pr = 1;
1192 break;
1193 case 'q':
1194 pr = 2;
1195 break;
1196 case 'r':
1197 case 'R':
1198 case 'p':
1199 pr = 3;
1200 break;
1201 case 'N':
1202 case 'M':
1203 case 'I':
1204 case 'e':
1205 case 'i':
1206 case 'm':
1207 case 'g':
1208 pr = 4;
1209 break;
1210 default:
1211 tcc_error("unknown constraint '%c'", c);
1212 pr = 0;
1214 if (pr > priority)
1215 priority = pr;
1217 return priority;
1220 static const char *skip_constraint_modifiers(const char *p)
1222 while (*p == '=' || *p == '&' || *p == '+' || *p == '%')
1223 p++;
1224 return p;
1227 /* If T (a token) is of the form "%reg" returns the register
1228 number and type, otherwise return -1. */
1229 ST_FUNC int asm_parse_regvar (int t)
1231 const char *s;
1232 Operand op;
1233 if (t < TOK_IDENT || (t & SYM_FIELD))
1234 return -1;
1235 s = table_ident[t - TOK_IDENT]->str;
1236 if (s[0] != '%')
1237 return -1;
1238 t = tok_alloc_const(s + 1);
1239 unget_tok(t);
1240 unget_tok('%');
1241 parse_operand(tcc_state, &op);
1242 /* Accept only integer regs for now. */
1243 if (op.type & OP_REG)
1244 return op.reg;
1245 else
1246 return -1;
1249 #define REG_OUT_MASK 0x01
1250 #define REG_IN_MASK 0x02
1252 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask)
1254 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
1255 int nb_operands, int nb_outputs,
1256 const uint8_t *clobber_regs,
1257 int *pout_reg)
1259 ASMOperand *op;
1260 int sorted_op[MAX_ASM_OPERANDS];
1261 int i, j, k, p1, p2, tmp, reg, c, reg_mask;
1262 const char *str;
1263 uint8_t regs_allocated[NB_ASM_REGS];
1265 /* init fields */
1266 for(i=0;i<nb_operands;i++) {
1267 op = &operands[i];
1268 op->input_index = -1;
1269 op->ref_index = -1;
1270 op->reg = -1;
1271 op->is_memory = 0;
1272 op->is_rw = 0;
1274 /* compute constraint priority and evaluate references to output
1275 constraints if input constraints */
1276 for(i=0;i<nb_operands;i++) {
1277 op = &operands[i];
1278 str = op->constraint;
1279 str = skip_constraint_modifiers(str);
1280 if (isnum(*str) || *str == '[') {
1281 /* this is a reference to another constraint */
1282 k = find_constraint(operands, nb_operands, str, NULL);
1283 if ((unsigned)k >= i || i < nb_outputs)
1284 tcc_error("invalid reference in constraint %d ('%s')",
1285 i, str);
1286 op->ref_index = k;
1287 if (operands[k].input_index >= 0)
1288 tcc_error("cannot reference twice the same operand");
1289 operands[k].input_index = i;
1290 op->priority = 5;
1291 } else if ((op->vt->r & VT_VALMASK) == VT_LOCAL
1292 && op->vt->sym
1293 && (reg = op->vt->sym->r & VT_VALMASK) < VT_CONST) {
1294 op->priority = 1;
1295 op->reg = reg;
1296 } else {
1297 op->priority = constraint_priority(str);
1301 /* sort operands according to their priority */
1302 for(i=0;i<nb_operands;i++)
1303 sorted_op[i] = i;
1304 for(i=0;i<nb_operands - 1;i++) {
1305 for(j=i+1;j<nb_operands;j++) {
1306 p1 = operands[sorted_op[i]].priority;
1307 p2 = operands[sorted_op[j]].priority;
1308 if (p2 < p1) {
1309 tmp = sorted_op[i];
1310 sorted_op[i] = sorted_op[j];
1311 sorted_op[j] = tmp;
1316 for(i = 0;i < NB_ASM_REGS; i++) {
1317 if (clobber_regs[i])
1318 regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK;
1319 else
1320 regs_allocated[i] = 0;
1322 /* esp cannot be used */
1323 regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK;
1324 /* ebp cannot be used yet */
1325 regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK;
1327 /* allocate registers and generate corresponding asm moves */
1328 for(i=0;i<nb_operands;i++) {
1329 j = sorted_op[i];
1330 op = &operands[j];
1331 str = op->constraint;
1332 /* no need to allocate references */
1333 if (op->ref_index >= 0)
1334 continue;
1335 /* select if register is used for output, input or both */
1336 if (op->input_index >= 0) {
1337 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1338 } else if (j < nb_outputs) {
1339 reg_mask = REG_OUT_MASK;
1340 } else {
1341 reg_mask = REG_IN_MASK;
1343 if (op->reg >= 0) {
1344 if (is_reg_allocated(op->reg))
1345 tcc_error("asm regvar requests register that's taken already");
1346 reg = op->reg;
1348 try_next:
1349 c = *str++;
1350 switch(c) {
1351 case '=':
1352 goto try_next;
1353 case '+':
1354 op->is_rw = 1;
1355 /* FALL THRU */
1356 case '&':
1357 if (j >= nb_outputs)
1358 tcc_error("'%c' modifier can only be applied to outputs", c);
1359 reg_mask = REG_IN_MASK | REG_OUT_MASK;
1360 goto try_next;
1361 case 'A':
1362 /* allocate both eax and edx */
1363 if (is_reg_allocated(TREG_XAX) ||
1364 is_reg_allocated(TREG_XDX))
1365 goto try_next;
1366 op->is_llong = 1;
1367 op->reg = TREG_XAX;
1368 regs_allocated[TREG_XAX] |= reg_mask;
1369 regs_allocated[TREG_XDX] |= reg_mask;
1370 break;
1371 case 'a':
1372 reg = TREG_XAX;
1373 goto alloc_reg;
1374 case 'b':
1375 reg = 3;
1376 goto alloc_reg;
1377 case 'c':
1378 reg = TREG_XCX;
1379 goto alloc_reg;
1380 case 'd':
1381 reg = TREG_XDX;
1382 goto alloc_reg;
1383 case 'S':
1384 reg = 6;
1385 goto alloc_reg;
1386 case 'D':
1387 reg = 7;
1388 alloc_reg:
1389 if (op->reg >= 0 && reg != op->reg)
1390 goto try_next;
1391 if (is_reg_allocated(reg))
1392 goto try_next;
1393 goto reg_found;
1394 case 'q':
1395 /* eax, ebx, ecx or edx */
1396 if (op->reg >= 0) {
1397 if ((reg = op->reg) < 4)
1398 goto reg_found;
1399 } else for(reg = 0; reg < 4; reg++) {
1400 if (!is_reg_allocated(reg))
1401 goto reg_found;
1403 goto try_next;
1404 case 'r':
1405 case 'R':
1406 case 'p': /* A general address, for x86(64) any register is acceptable*/
1407 /* any general register */
1408 if ((reg = op->reg) >= 0)
1409 goto reg_found;
1410 else for(reg = 0; reg < 8; reg++) {
1411 if (!is_reg_allocated(reg))
1412 goto reg_found;
1414 goto try_next;
1415 reg_found:
1416 /* now we can reload in the register */
1417 op->is_llong = 0;
1418 op->reg = reg;
1419 regs_allocated[reg] |= reg_mask;
1420 break;
1421 case 'e':
1422 case 'i':
1423 if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST))
1424 goto try_next;
1425 break;
1426 case 'I':
1427 case 'N':
1428 case 'M':
1429 if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST))
1430 goto try_next;
1431 break;
1432 case 'm':
1433 case 'g':
1434 /* nothing special to do because the operand is already in
1435 memory, except if the pointer itself is stored in a
1436 memory variable (VT_LLOCAL case) */
1437 /* XXX: fix constant case */
1438 /* if it is a reference to a memory zone, it must lie
1439 in a register, so we reserve the register in the
1440 input registers and a load will be generated
1441 later */
1442 if (j < nb_outputs || c == 'm') {
1443 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1444 /* any general register */
1445 for(reg = 0; reg < 8; reg++) {
1446 if (!(regs_allocated[reg] & REG_IN_MASK))
1447 goto reg_found1;
1449 goto try_next;
1450 reg_found1:
1451 /* now we can reload in the register */
1452 regs_allocated[reg] |= REG_IN_MASK;
1453 op->reg = reg;
1454 op->is_memory = 1;
1457 break;
1458 default:
1459 tcc_error("asm constraint %d ('%s') could not be satisfied",
1460 j, op->constraint);
1461 break;
1463 /* if a reference is present for that operand, we assign it too */
1464 if (op->input_index >= 0) {
1465 operands[op->input_index].reg = op->reg;
1466 operands[op->input_index].is_llong = op->is_llong;
1470 /* compute out_reg. It is used to store outputs registers to memory
1471 locations references by pointers (VT_LLOCAL case) */
1472 *pout_reg = -1;
1473 for(i=0;i<nb_operands;i++) {
1474 op = &operands[i];
1475 if (op->reg >= 0 &&
1476 (op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1477 !op->is_memory) {
1478 for(reg = 0; reg < 8; reg++) {
1479 if (!(regs_allocated[reg] & REG_OUT_MASK))
1480 goto reg_found2;
1482 tcc_error("could not find free output register for reloading");
1483 reg_found2:
1484 *pout_reg = reg;
1485 break;
1489 /* print sorted constraints */
1490 #ifdef ASM_DEBUG
1491 for(i=0;i<nb_operands;i++) {
1492 j = sorted_op[i];
1493 op = &operands[j];
1494 printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n",
1496 op->id ? get_tok_str(op->id, NULL) : "",
1497 op->constraint,
1498 op->vt->r,
1499 op->reg);
1501 if (*pout_reg >= 0)
1502 printf("out_reg=%d\n", *pout_reg);
1503 #endif
1506 ST_FUNC void subst_asm_operand(CString *add_str,
1507 SValue *sv, int modifier)
1509 int r, reg, size, val;
1511 r = sv->r;
1512 if ((r & VT_VALMASK) == VT_CONST) {
1513 if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' &&
1514 modifier != 'P')
1515 cstr_ccat(add_str, '$');
1516 if (r & VT_SYM) {
1517 const char *name = get_tok_str(sv->sym->v, NULL);
1518 if (sv->sym->v >= SYM_FIRST_ANOM) {
1519 /* In case of anonymous symbols ("L.42", used
1520 for static data labels) we can't find them
1521 in the C symbol table when later looking up
1522 this name. So enter them now into the asm label
1523 list when we still know the symbol. */
1524 get_asm_sym(tok_alloc_const(name), sv->sym);
1526 if (tcc_state->leading_underscore)
1527 cstr_ccat(add_str, '_');
1528 cstr_cat(add_str, name, -1);
1529 if ((uint32_t)sv->c.i == 0)
1530 goto no_offset;
1531 cstr_ccat(add_str, '+');
1533 val = sv->c.i;
1534 if (modifier == 'n')
1535 val = -val;
1536 cstr_printf(add_str, "%d", (int)sv->c.i);
1537 no_offset:;
1538 #ifdef TCC_TARGET_X86_64
1539 if (r & VT_LVAL)
1540 cstr_cat(add_str, "(%rip)", -1);
1541 #endif
1542 } else if ((r & VT_VALMASK) == VT_LOCAL) {
1543 cstr_printf(add_str, "%d(%%%s)", (int)sv->c.i, get_tok_str(TOK_ASM_xax + 5, NULL));
1544 } else if (r & VT_LVAL) {
1545 reg = r & VT_VALMASK;
1546 if (reg >= VT_CONST)
1547 tcc_internal_error("");
1548 cstr_printf(add_str, "(%%%s)", get_tok_str(TOK_ASM_xax + reg, NULL));
1549 } else {
1550 /* register case */
1551 reg = r & VT_VALMASK;
1552 if (reg >= VT_CONST)
1553 tcc_internal_error("");
1555 /* choose register operand size */
1556 if ((sv->type.t & VT_BTYPE) == VT_BYTE ||
1557 (sv->type.t & VT_BTYPE) == VT_BOOL)
1558 size = 1;
1559 else if ((sv->type.t & VT_BTYPE) == VT_SHORT)
1560 size = 2;
1561 #ifdef TCC_TARGET_X86_64
1562 else if ((sv->type.t & VT_BTYPE) == VT_LLONG ||
1563 (sv->type.t & VT_BTYPE) == VT_PTR)
1564 size = 8;
1565 #endif
1566 else
1567 size = 4;
1568 if (size == 1 && reg >= 4)
1569 size = 4;
1571 if (modifier == 'b') {
1572 if (reg >= 4)
1573 tcc_error("cannot use byte register");
1574 size = 1;
1575 } else if (modifier == 'h') {
1576 if (reg >= 4)
1577 tcc_error("cannot use byte register");
1578 size = -1;
1579 } else if (modifier == 'w') {
1580 size = 2;
1581 } else if (modifier == 'k') {
1582 size = 4;
1583 #ifdef TCC_TARGET_X86_64
1584 } else if (modifier == 'q') {
1585 size = 8;
1586 #endif
1589 switch(size) {
1590 case -1:
1591 reg = TOK_ASM_ah + reg;
1592 break;
1593 case 1:
1594 reg = TOK_ASM_al + reg;
1595 break;
1596 case 2:
1597 reg = TOK_ASM_ax + reg;
1598 break;
1599 default:
1600 reg = TOK_ASM_eax + reg;
1601 break;
1602 #ifdef TCC_TARGET_X86_64
1603 case 8:
1604 reg = TOK_ASM_rax + reg;
1605 break;
1606 #endif
1608 cstr_printf(add_str, "%%%s", get_tok_str(reg, NULL));
1612 /* generate prolog and epilog code for asm statement */
1613 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
1614 int nb_outputs, int is_output,
1615 uint8_t *clobber_regs,
1616 int out_reg)
1618 uint8_t regs_allocated[NB_ASM_REGS];
1619 ASMOperand *op;
1620 int i, reg;
1622 /* Strictly speaking %Xbp and %Xsp should be included in the
1623 call-preserved registers, but currently it doesn't matter. */
1624 #ifdef TCC_TARGET_X86_64
1625 #ifdef TCC_TARGET_PE
1626 static const uint8_t reg_saved[] = { 3, 6, 7, 12, 13, 14, 15 };
1627 #else
1628 static const uint8_t reg_saved[] = { 3, 12, 13, 14, 15 };
1629 #endif
1630 #else
1631 static const uint8_t reg_saved[] = { 3, 6, 7 };
1632 #endif
1634 /* mark all used registers */
1635 memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated));
1636 for(i = 0; i < nb_operands;i++) {
1637 op = &operands[i];
1638 if (op->reg >= 0)
1639 regs_allocated[op->reg] = 1;
1641 if (!is_output) {
1642 /* generate reg save code */
1643 for(i = 0; i < sizeof(reg_saved)/sizeof(reg_saved[0]); i++) {
1644 reg = reg_saved[i];
1645 if (regs_allocated[reg]) {
1646 if (reg >= 8)
1647 g(0x41), reg-=8;
1648 g(0x50 + reg);
1652 /* generate load code */
1653 for(i = 0; i < nb_operands; i++) {
1654 op = &operands[i];
1655 if (op->reg >= 0) {
1656 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL &&
1657 op->is_memory) {
1658 /* memory reference case (for both input and
1659 output cases) */
1660 SValue sv;
1661 sv = *op->vt;
1662 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL;
1663 sv.type.t = VT_PTR;
1664 load(op->reg, &sv);
1665 } else if (i >= nb_outputs || op->is_rw) {
1666 /* load value in register */
1667 load(op->reg, op->vt);
1668 if (op->is_llong) {
1669 SValue sv;
1670 sv = *op->vt;
1671 sv.c.i += 4;
1672 load(TREG_XDX, &sv);
1677 } else {
1678 /* generate save code */
1679 for(i = 0 ; i < nb_outputs; i++) {
1680 op = &operands[i];
1681 if (op->reg >= 0) {
1682 if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) {
1683 if (!op->is_memory) {
1684 SValue sv;
1685 sv = *op->vt;
1686 sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL;
1687 sv.type.t = VT_PTR;
1688 load(out_reg, &sv);
1690 sv = *op->vt;
1691 sv.r = (sv.r & ~VT_VALMASK) | out_reg;
1692 store(op->reg, &sv);
1694 } else {
1695 store(op->reg, op->vt);
1696 if (op->is_llong) {
1697 SValue sv;
1698 sv = *op->vt;
1699 sv.c.i += 4;
1700 store(TREG_XDX, &sv);
1705 /* generate reg restore code */
1706 for(i = sizeof(reg_saved)/sizeof(reg_saved[0]) - 1; i >= 0; i--) {
1707 reg = reg_saved[i];
1708 if (regs_allocated[reg]) {
1709 if (reg >= 8)
1710 g(0x41), reg-=8;
1711 g(0x58 + reg);
1717 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
1719 int reg;
1720 #ifdef TCC_TARGET_X86_64
1721 unsigned int type;
1722 #endif
1724 if (!strcmp(str, "memory") ||
1725 !strcmp(str, "cc") ||
1726 !strcmp(str, "flags"))
1727 return;
1728 reg = tok_alloc_const(str);
1729 if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) {
1730 reg -= TOK_ASM_eax;
1731 } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) {
1732 reg -= TOK_ASM_ax;
1733 #ifdef TCC_TARGET_X86_64
1734 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) {
1735 reg -= TOK_ASM_rax;
1736 } else if ((reg = asm_parse_numeric_reg(reg, &type)) >= 0) {
1738 #endif
1739 } else {
1740 tcc_error("invalid clobber register '%s'", str);
1742 clobber_regs[reg] = 1;