i386-gen.c

   1 /*
   2  *  X86 code generator for TCC
   3  *
   4  *  Copyright (c) 2001, 2002 Fabrice Bellard
   5  *
   6  *  This program is free software; you can redistribute it and/or modify
   7  *  it under the terms of the GNU General Public License as published by
   8  *  the Free Software Foundation; either version 2 of the License, or
   9  *  (at your option) any later version.
  10  *
  11  *  This program is distributed in the hope that it will be useful,
  12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  *  GNU General Public License for more details.
  15  *
  16  *  You should have received a copy of the GNU General Public License
  17  *  along with this program; if not, write to the Free Software
  18  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19  */
  20
  21 /* number of available registers */
  22 #define NB_REGS             4
  23
  24 /* a register can belong to several classes. The classes must be
  25    sorted from more general to more precise (see gv2() code which does
  26    assumptions on it). */
  27 #define RC_INT     0x0001 /* generic integer register */
  28 #define RC_FLOAT   0x0002 /* generic float register */
  29 #define RC_EAX     0x0004
  30 #define RC_ST0     0x0008
  31 #define RC_ECX     0x0010
  32 #define RC_EDX     0x0020
  33 #define RC_IRET    RC_EAX /* function return: integer register */
  34 #define RC_LRET    RC_EDX /* function return: second integer register */
  35 #define RC_FRET    RC_ST0 /* function return: float register */
  36
  37 /* pretty names for the registers */
  38 enum {
  39     REG_EAX = 0,
  40     REG_ECX,
  41     REG_EDX,
  42     REG_ST0,
  43 };
  44
  45 int reg_classes[NB_REGS] = {
  46     /* eax */ RC_INT | RC_EAX,
  47     /* ecx */ RC_INT | RC_ECX,
  48     /* edx */ RC_INT | RC_EDX,
  49     /* st0 */ RC_FLOAT | RC_ST0,
  50 };
  51
  52 /* return registers for function */
  53 #define REG_IRET REG_EAX /* single word int return register */
  54 #define REG_LRET REG_EDX /* second word return register (for long long) */
  55 #define REG_FRET REG_ST0 /* float return register */
  56
  57 /* defined if function parameters must be evaluated in reverse order */
  58 #define INVERT_FUNC_PARAMS
  59
  60 /* defined if structures are passed as pointers. Otherwise structures
  61    are directly pushed on stack. */
  62 //#define FUNC_STRUCT_PARAM_AS_PTR
  63
  64 /* pointer size, in bytes */
  65 #define PTR_SIZE 4
  66
  67 /* long double size and alignment, in bytes */
  68 #define LDOUBLE_SIZE  12
  69 #define LDOUBLE_ALIGN 4
  70
  71 /* relocation type for 32 bit data relocation */
  72 #define R_DATA_32 R_386_32
  73
  74 /* function call context */
  75 typedef struct GFuncContext {
  76     int args_size;
  77     int func_call; /* func call type (FUNC_STDCALL or FUNC_CDECL) */
  78 } GFuncContext;
  79
  80 /******************************************************/
  81
  82 static int *func_sub_sp_ptr;
  83 static unsigned char *func_bound_ptr;
  84 static int func_ret_sub;
  85
  86 void g(int c)
  87 {
  88     *(char *)ind++ = c;
  89 }
  90
  91 void o(int c)
  92 {
  93     while (c) {
  94         g(c);
  95         c = c / 256;
  96     }
  97 }
  98
  99 void gen_le32(int c)
 100 {
 101     g(c);
 102     g(c >> 8);
 103     g(c >> 16);
 104     g(c >> 24);
 105 }
 106
 107 void greloc_patch(unsigned char *ptr,
 108                   unsigned long addr, unsigned long val, int type)
 109 {
 110     switch(type) {
 111     case R_386_32:
 112         *(int *)ptr += val;
 113         break;
 114     case R_386_PC32:
 115         *(int *)ptr += val - addr;
 116         break;
 117     case R_386_GOTPC:
 118         *(int *)ptr += val - addr; /* XXX: use GOT address directly
 119                                       instead of relying on
 120                                       _GLOBAL_OFFSET_TABLE symbol ? */
 121         break;
 122     }
 123 }
 124
 125 /* output a symbol and patch all calls to it */
 126 void gsym_addr(int t, int a)
 127 {
 128     int n;
 129     while (t) {
 130         n = *(int *)t; /* next value */
 131         *(int *)t = a - t - 4;
 132         t = n;
 133     }
 134 }
 135
 136 void gsym(int t)
 137 {
 138     gsym_addr(t, ind);
 139 }
 140
 141 /* psym is used to put an instruction with a data field which is a
 142    reference to a symbol. It is in fact the same as oad ! */
 143 #define psym oad
 144
 145 /* instruction + 4 bytes data. Return the address of the data */
 146 int oad(int c, int s)
 147 {
 148     o(c);
 149     *(int *)ind = s;
 150     s = ind;
 151     ind = ind + 4;
 152     return s;
 153 }
 154
 155 /* output constant with relocation if 'r & VT_SYM' is true */
 156 void gen_addr32(int r, int c)
 157 {
 158     if (!(r & VT_SYM)) {
 159         gen_le32(c);
 160     } else {
 161         greloc(cur_text_section,
 162                (Sym *)c, ind - (int)cur_text_section->data, R_386_32);
 163         gen_le32(0);
 164     }
 165 }
 166
 167 /* generate a modrm reference. 'op_reg' contains the addtionnal 3
 168    opcode bits */
 169 void gen_modrm(int op_reg, int r, int c)
 170 {
 171     op_reg = op_reg << 3;
 172     if ((r & VT_VALMASK) == VT_CONST) {
 173         /* constant memory reference */
 174         o(0x05 | op_reg);
 175         gen_addr32(r, c);
 176     } else if ((r & VT_VALMASK) == VT_LOCAL) {
 177         /* currently, we use only ebp as base */
 178         if (c == (char)c) {
 179             /* short reference */
 180             o(0x45 | op_reg);
 181             g(c);
 182         } else {
 183             oad(0x85 | op_reg, c);
 184         }
 185     } else {
 186         g(0x00 | op_reg | (r & VT_VALMASK));
 187     }
 188 }
 189
 190
 191 /* load 'r' from value 'sv' */
 192 void load(int r, SValue *sv)
 193 {
 194     int v, t, ft, fc, fr;
 195     SValue v1;
 196
 197     fr = sv->r;
 198     ft = sv->t;
 199     fc = sv->c.ul;
 200
 201     v = fr & VT_VALMASK;
 202     if (fr & VT_LVAL) {
 203         if (v == VT_LLOCAL) {
 204             v1.t = VT_INT;
 205             v1.r = VT_LOCAL | VT_LVAL;
 206             v1.c.ul = fc;
 207             load(r, &v1);
 208             fr = r;
 209         }
 210         if ((ft & VT_BTYPE) == VT_FLOAT) {
 211             o(0xd9); /* flds */
 212             r = 0;
 213         } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
 214             o(0xdd); /* fldl */
 215             r = 0;
 216         } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
 217             o(0xdb); /* fldt */
 218             r = 5;
 219         } else if ((ft & VT_TYPE) == VT_BYTE) {
 220             o(0xbe0f);   /* movsbl */
 221         } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
 222             o(0xb60f);   /* movzbl */
 223         } else if ((ft & VT_TYPE) == VT_SHORT) {
 224             o(0xbf0f);   /* movswl */
 225         } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
 226             o(0xb70f);   /* movzwl */
 227         } else {
 228             o(0x8b);     /* movl */
 229         }
 230         gen_modrm(r, fr, fc);
 231     } else {
 232         if (v == VT_CONST) {
 233             o(0xb8 + r); /* mov $xx, r */
 234             gen_addr32(fr, fc);
 235         } else if (v == VT_LOCAL) {
 236             o(0x8d); /* lea xxx(%ebp), r */
 237             gen_modrm(r, VT_LOCAL, fc);
 238         } else if (v == VT_CMP) {
 239             oad(0xb8 + r, 0); /* mov $0, r */
 240             o(0x0f); /* setxx %br */
 241             o(fc);
 242             o(0xc0 + r);
 243         } else if (v == VT_JMP || v == VT_JMPI) {
 244             t = v & 1;
 245             oad(0xb8 + r, t); /* mov $1, r */
 246             oad(0xe9, 5); /* jmp after */
 247             gsym(fc);
 248             oad(0xb8 + r, t ^ 1); /* mov $0, r */
 249         } else if (v != r) {
 250             o(0x89);
 251             o(0xc0 + r + v * 8); /* mov v, r */
 252         }
 253     }
 254 }
 255
 256 /* store register 'r' in lvalue 'v' */
 257 void store(int r, SValue *v)
 258 {
 259     int fr, bt, ft, fc;
 260
 261     ft = v->t;
 262     fc = v->c.ul;
 263     fr = v->r & VT_VALMASK;
 264     bt = ft & VT_BTYPE;
 265     /* XXX: incorrect if float reg to reg */
 266     if (bt == VT_FLOAT) {
 267         o(0xd9); /* fsts */
 268         r = 2;
 269     } else if (bt == VT_DOUBLE) {
 270         o(0xdd); /* fstpl */
 271         r = 2;
 272     } else if (bt == VT_LDOUBLE) {
 273         o(0xc0d9); /* fld %st(0) */
 274         o(0xdb); /* fstpt */
 275         r = 7;
 276     } else {
 277         if (bt == VT_SHORT)
 278             o(0x66);
 279         if (bt == VT_BYTE)
 280             o(0x88);
 281         else
 282             o(0x89);
 283     }
 284     if (fr == VT_CONST ||
 285         fr == VT_LOCAL ||
 286         (v->r & VT_LVAL)) {
 287         gen_modrm(r, v->r, fc);
 288     } else if (fr != r) {
 289         o(0xc0 + fr + r * 8); /* mov r, fr */
 290     }
 291 }
 292
 293 /* start function call and return function call context */
 294 void gfunc_start(GFuncContext *c, int func_call)
 295 {
 296     c->args_size = 0;
 297     c->func_call = func_call;
 298 }
 299
 300 /* push function parameter which is in (vtop->t, vtop->c). Stack entry
 301    is then popped. */
 302 void gfunc_param(GFuncContext *c)
 303 {
 304     int size, align, r;
 305
 306     if ((vtop->t & VT_BTYPE) == VT_STRUCT) {
 307         size = type_size(vtop->t, &align);
 308         /* align to stack align size */
 309         size = (size + 3) & ~3;
 310         /* allocate the necessary size on stack */
 311         oad(0xec81, size); /* sub $xxx, %esp */
 312         /* generate structure store */
 313         r = get_reg(RC_INT);
 314         o(0x89); /* mov %esp, r */
 315         o(0xe0 + r);
 316         vset(vtop->t, r | VT_LVAL, 0);
 317         vswap();
 318         vstore();
 319         c->args_size += size;
 320     } else if (is_float(vtop->t)) {
 321         gv(RC_FLOAT); /* only one float register */
 322         if ((vtop->t & VT_BTYPE) == VT_FLOAT)
 323             size = 4;
 324         else if ((vtop->t & VT_BTYPE) == VT_DOUBLE)
 325             size = 8;
 326         else
 327             size = 12;
 328         oad(0xec81, size); /* sub $xxx, %esp */
 329         if (size == 12)
 330             o(0x7cdb);
 331         else
 332             o(0x5cd9 + size - 4); /* fstp[s|l] 0(%esp) */
 333         g(0x24);
 334         g(0x00);
 335         c->args_size += size;
 336     } else {
 337         /* simple type (currently always same size) */
 338         /* XXX: implicit cast ? */
 339         r = gv(RC_INT);
 340         if ((vtop->t & VT_BTYPE) == VT_LLONG) {
 341             size = 8;
 342             o(0x50 + vtop->r2); /* push r */
 343         } else {
 344             size = 4;
 345         }
 346         o(0x50 + r); /* push r */
 347         c->args_size += size;
 348     }
 349     vtop--;
 350 }
 351
 352 /* generate function call with address in (vtop->t, vtop->c) and free function
 353    context. Stack entry is popped */
 354 void gfunc_call(GFuncContext *c)
 355 {
 356     int r;
 357     if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 358         /* constant case */
 359         if (vtop->r & VT_SYM) {
 360             /* relocation case */
 361             greloc(cur_text_section, vtop->c.sym,
 362                    ind + 1 - (int)cur_text_section->data, R_386_PC32);
 363             oad(0xe8, -4);
 364         } else {
 365             oad(0xe8, vtop->c.ul - ind - 5);
 366         }
 367     } else {
 368         /* otherwise, indirect call */
 369         r = gv(RC_INT);
 370         o(0xff); /* call *r */
 371         o(0xd0 + r);
 372     }
 373     if (c->args_size && c->func_call == FUNC_CDECL)
 374         oad(0xc481, c->args_size); /* add $xxx, %esp */
 375     vtop--;
 376 }
 377
 378 /* generate function prolog of type 't' */
 379 void gfunc_prolog(int t)
 380 {
 381     int addr, align, size, u, func_call;
 382     Sym *sym;
 383
 384     sym = sym_find((unsigned)t >> VT_STRUCT_SHIFT);
 385     func_call = sym->r;
 386     addr = 8;
 387     /* if the function returns a structure, then add an
 388        implicit pointer parameter */
 389     func_vt = sym->t;
 390     if ((func_vt & VT_BTYPE) == VT_STRUCT) {
 391         func_vc = addr;
 392         addr += 4;
 393     }
 394     /* define parameters */
 395     while ((sym = sym->next) != NULL) {
 396         u = sym->t;
 397         sym_push(sym->v & ~SYM_FIELD, u,
 398                  VT_LOCAL | VT_LVAL, addr);
 399         size = type_size(u, &align);
 400         size = (size + 3) & ~3;
 401 #ifdef FUNC_STRUCT_PARAM_AS_PTR
 402         /* structs are passed as pointer */
 403         if ((u & VT_BTYPE) == VT_STRUCT) {
 404             size = 4;
 405         }
 406 #endif
 407         addr += size;
 408     }
 409     func_ret_sub = 0;
 410     /* pascal type call ? */
 411     if (func_call == FUNC_STDCALL)
 412         func_ret_sub = addr - 8;
 413     o(0xe58955); /* push   %ebp, mov    %esp, %ebp */
 414     func_sub_sp_ptr = (int *)oad(0xec81, 0); /* sub $xxx, %esp */
 415     /* leave some room for bound checking code */
 416     if (do_bounds_check) {
 417         oad(0xb8, 0); /* lbound section pointer */
 418         oad(0xb8, 0); /* call to function */
 419         func_bound_ptr = lbounds_section->data_ptr;
 420     }
 421 }
 422
 423 /* generate function epilog */
 424 void gfunc_epilog(void)
 425 {
 426 #ifdef CONFIG_TCC_BCHECK
 427     if (do_bounds_check && func_bound_ptr != lbounds_section->data_ptr) {
 428         int saved_ind;
 429         int *bounds_ptr;
 430         /* add end of table info */
 431         bounds_ptr = (int *)lbounds_section->data_ptr;
 432         *bounds_ptr++ = 0;
 433         lbounds_section->data_ptr = (unsigned char *)bounds_ptr;
 434         /* generate bound local allocation */
 435         saved_ind = ind;
 436         ind = (int)func_sub_sp_ptr + 4;
 437         oad(0xb8, (int)func_bound_ptr); /* mov %eax, xxx */
 438         oad(0xe8, (int)__bound_local_new - ind - 5);
 439         ind = saved_ind;
 440         /* generate bound check local freeing */
 441         o(0x5250); /* save returned value, if any */
 442         oad(0xb8, (int)func_bound_ptr); /* mov %eax, xxx */
 443         oad(0xe8, (int)__bound_local_delete - ind - 5);
 444         o(0x585a); /* restore returned value, if any */
 445     }
 446 #endif
 447     o(0xc9); /* leave */
 448     if (func_ret_sub == 0) {
 449         o(0xc3); /* ret */
 450     } else {
 451         o(0xc2); /* ret n */
 452         g(func_ret_sub);
 453         g(func_ret_sub >> 8);
 454     }
 455     /* align local size to word & save local variables */
 456     *func_sub_sp_ptr = (-loc + 3) & -4;
 457 }
 458
 459 /* generate a jump to a label */
 460 int gjmp(int t)
 461 {
 462     return psym(0xe9, t);
 463 }
 464
 465 /* generate a jump to a fixed address */
 466 void gjmp_addr(int a)
 467 {
 468     oad(0xe9, a - ind - 5);
 469 }
 470
 471 /* generate a test. set 'inv' to invert test. Stack entry is popped */
 472 int gtst(int inv, int t)
 473 {
 474     int v, *p;
 475     v = vtop->r & VT_VALMASK;
 476     if (v == VT_CMP) {
 477         /* fast case : can jump directly since flags are set */
 478         g(0x0f);
 479         t = psym((vtop->c.i - 16) ^ inv, t);
 480     } else if (v == VT_JMP || v == VT_JMPI) {
 481         /* && or || optimization */
 482         if ((v & 1) == inv) {
 483             /* insert vtop->c jump list in t */
 484             p = &vtop->c.i;
 485             while (*p != 0)
 486                 p = (int *)*p;
 487             *p = t;
 488             t = vtop->c.i;
 489         } else {
 490             t = gjmp(t);
 491             gsym(vtop->c.i);
 492         }
 493     } else {
 494         if (is_float(vtop->t)) {
 495             vpushi(0);
 496             gen_op(TOK_NE);
 497         }
 498         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
 499             /* constant jmp optimization */
 500             if ((vtop->c.i != 0) != inv)
 501                 t = gjmp(t);
 502         } else {
 503             v = gv(RC_INT);
 504             o(0x85);
 505             o(0xc0 + v * 9);
 506             g(0x0f);
 507             t = psym(0x85 ^ inv, t);
 508         }
 509     }
 510     vtop--;
 511     return t;
 512 }
 513
 514 /* generate an integer binary operation */
 515 void gen_opi(int op)
 516 {
 517     int r, fr, opc, c;
 518
 519     switch(op) {
 520     case '+':
 521     case TOK_ADDC1: /* add with carry generation */
 522         opc = 0;
 523     gen_op8:
 524         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
 525             /* constant case */
 526             vswap();
 527             r = gv(RC_INT);
 528             vswap();
 529             c = vtop->c.i;
 530             if (c == (char)c) {
 531                 /* XXX: generate inc and dec for smaller code ? */
 532                 o(0x83);
 533                 o(0xc0 | (opc << 3) | r);
 534                 g(c);
 535             } else {
 536                 o(0x81);
 537                 oad(0xc0 | (opc << 3) | r, c);
 538             }
 539         } else {
 540             gv2(RC_INT, RC_INT);
 541             r = vtop[-1].r;
 542             fr = vtop[0].r;
 543             o((opc << 3) | 0x01);
 544             o(0xc0 + r + fr * 8);
 545         }
 546         vtop--;
 547         if (op >= TOK_ULT && op <= TOK_GT) {
 548             vtop--;
 549             vset(VT_INT, VT_CMP, op);
 550         }
 551         break;
 552     case '-':
 553     case TOK_SUBC1: /* sub with carry generation */
 554         opc = 5;
 555         goto gen_op8;
 556     case TOK_ADDC2: /* add with carry use */
 557         opc = 2;
 558         goto gen_op8;
 559     case TOK_SUBC2: /* sub with carry use */
 560         opc = 3;
 561         goto gen_op8;
 562     case '&':
 563         opc = 4;
 564         goto gen_op8;
 565     case '^':
 566         opc = 6;
 567         goto gen_op8;
 568     case '|':
 569         opc = 1;
 570         goto gen_op8;
 571     case '*':
 572         gv2(RC_INT, RC_INT);
 573         r = vtop[-1].r;
 574         fr = vtop[0].r;
 575         vtop--;
 576         o(0xaf0f); /* imul fr, r */
 577         o(0xc0 + fr + r * 8);
 578         break;
 579     case TOK_SHL:
 580         opc = 4;
 581         goto gen_shift;
 582     case TOK_SHR:
 583         opc = 5;
 584         goto gen_shift;
 585     case TOK_SAR:
 586         opc = 7;
 587     gen_shift:
 588         opc = 0xc0 | (opc << 3);
 589         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
 590             /* constant case */
 591             vswap();
 592             r = gv(RC_INT);
 593             vswap();
 594             c = vtop->c.i & 0x1f;
 595             o(0xc1); /* shl/shr/sar $xxx, r */
 596             o(opc | r);
 597             g(c);
 598         } else {
 599             /* we generate the shift in ecx */
 600             gv2(RC_INT, RC_ECX);
 601             r = vtop[-1].r;
 602             o(0xd3); /* shl/shr/sar %cl, r */
 603             o(opc | r);
 604         }
 605         vtop--;
 606         break;
 607     case '/':
 608     case TOK_UDIV:
 609     case TOK_PDIV:
 610     case '%':
 611     case TOK_UMOD:
 612     case TOK_UMULL:
 613         /* first operand must be in eax */
 614         /* XXX: need better constraint for second operand */
 615         gv2(RC_EAX, RC_ECX);
 616         r = vtop[-1].r;
 617         fr = vtop[0].r;
 618         vtop--;
 619         save_reg(REG_EDX);
 620         if (op == TOK_UMULL) {
 621             o(0xf7); /* mul fr */
 622             o(0xe0 + fr);
 623             vtop->r2 = REG_EDX;
 624             r = REG_EAX;
 625         } else {
 626             if (op == TOK_UDIV || op == TOK_UMOD) {
 627                 o(0xf7d231); /* xor %edx, %edx, div fr, %eax */
 628                 o(0xf0 + fr);
 629             } else {
 630                 o(0xf799); /* cltd, idiv fr, %eax */
 631                 o(0xf8 + fr);
 632             }
 633             if (op == '%' || op == TOK_UMOD)
 634                 r = REG_EDX;
 635             else
 636                 r = REG_EAX;
 637         }
 638         vtop->r = r;
 639         break;
 640     default:
 641         opc = 7;
 642         goto gen_op8;
 643     }
 644 }
 645
 646 /* generate a floating point operation 'v = t1 op t2' instruction. The
 647    two operands are guaranted to have the same floating point type */
 648 /* XXX: need to use ST1 too */
 649 void gen_opf(int op)
 650 {
 651     int a, ft, fc, swapped, r;
 652
 653     /* convert constants to memory references */
 654     if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 655         vswap();
 656         gv(RC_FLOAT);
 657         vswap();
 658     }
 659     if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
 660         gv(RC_FLOAT);
 661
 662     /* must put at least one value in the floating point register */
 663     if ((vtop[-1].r & VT_LVAL) &&
 664         (vtop[0].r & VT_LVAL)) {
 665         vswap();
 666         gv(RC_FLOAT);
 667         vswap();
 668     }
 669     swapped = 0;
 670     /* swap the stack if needed so that t1 is the register and t2 is
 671        the memory reference */
 672     if (vtop[-1].r & VT_LVAL) {
 673         vswap();
 674         swapped = 1;
 675     }
 676     if (op >= TOK_ULT && op <= TOK_GT) {
 677         /* load on stack second operand */
 678         load(REG_ST0, vtop);
 679         save_reg(REG_EAX); /* eax is used by FP comparison code */
 680         if (op == TOK_GE || op == TOK_GT)
 681             swapped = !swapped;
 682         else if (op == TOK_EQ || op == TOK_NE)
 683             swapped = 0;
 684         if (swapped)
 685             o(0xc9d9); /* fxch %st(1) */
 686         o(0xe9da); /* fucompp */
 687         o(0xe0df); /* fnstsw %ax */
 688         if (op == TOK_EQ) {
 689             o(0x45e480); /* and $0x45, %ah */
 690             o(0x40fC80); /* cmp $0x40, %ah */
 691         } else if (op == TOK_NE) {
 692             o(0x45e480); /* and $0x45, %ah */
 693             o(0x40f480); /* xor $0x40, %ah */
 694             op = TOK_NE;
 695         } else if (op == TOK_GE || op == TOK_LE) {
 696             o(0x05c4f6); /* test $0x05, %ah */
 697             op = TOK_EQ;
 698         } else {
 699             o(0x45c4f6); /* test $0x45, %ah */
 700             op = TOK_EQ;
 701         }
 702         vtop--;
 703         vtop->r = VT_CMP;
 704         vtop->c.i = op;
 705     } else {
 706         /* no memory reference possible for long double operations */
 707         if ((vtop->t & VT_BTYPE) == VT_LDOUBLE) {
 708             load(REG_ST0, vtop);
 709             swapped = !swapped;
 710         }
 711
 712         switch(op) {
 713         default:
 714         case '+':
 715             a = 0;
 716             break;
 717         case '-':
 718             a = 4;
 719             if (swapped)
 720                 a++;
 721             break;
 722         case '*':
 723             a = 1;
 724             break;
 725         case '/':
 726             a = 6;
 727             if (swapped)
 728                 a++;
 729             break;
 730         }
 731         ft = vtop->t;
 732         fc = vtop->c.ul;
 733         if ((ft & VT_BTYPE) == VT_LDOUBLE) {
 734             o(0xde); /* fxxxp %st, %st(1) */
 735             o(0xc1 + (a << 3));
 736         } else {
 737             /* if saved lvalue, then we must reload it */
 738             r = vtop->r;
 739             if ((r & VT_VALMASK) == VT_LLOCAL) {
 740                 SValue v1;
 741                 r = get_reg(RC_INT);
 742                 v1.t = VT_INT;
 743                 v1.r = VT_LOCAL | VT_LVAL;
 744                 v1.c.ul = fc;
 745                 load(r, &v1);
 746                 fc = 0;
 747             }
 748
 749             if ((ft & VT_BTYPE) == VT_DOUBLE)
 750                 o(0xdc);
 751             else
 752                 o(0xd8);
 753             gen_modrm(a, r, fc);
 754         }
 755         vtop--;
 756     }
 757 }
 758
 759 /* FPU control word for rounding to nearest mode */
 760 /* XXX: should move that into tcc lib support code ! */
 761 static unsigned short __tcc_fpu_control = 0x137f;
 762 /* FPU control word for round to zero mode for int convertion */
 763 static unsigned short __tcc_int_fpu_control = 0x137f | 0x0c00;
 764
 765 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
 766    and 'long long' cases. */
 767 void gen_cvt_itof(int t)
 768 {
 769     save_reg(REG_ST0);
 770     gv(RC_INT);
 771     if ((vtop->t & VT_BTYPE) == VT_LLONG) {
 772         /* signed long long to float/double/long double (unsigned case
 773            is handled generically) */
 774         o(0x50 + vtop->r2); /* push r2 */
 775         o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
 776         o(0x242cdf); /* fildll (%esp) */
 777         o(0x08c483); /* add $8, %esp */
 778     } else if ((vtop->t & (VT_BTYPE | VT_UNSIGNED)) ==
 779                (VT_INT | VT_UNSIGNED)) {
 780         /* unsigned int to float/double/long double */
 781         o(0x6a); /* push $0 */
 782         g(0x00);
 783         o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
 784         o(0x242cdf); /* fildll (%esp) */
 785         o(0x08c483); /* add $8, %esp */
 786     } else {
 787         /* int to float/double/long double */
 788         o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
 789         o(0x2404db); /* fildl (%esp) */
 790         o(0x04c483); /* add $4, %esp */
 791     }
 792     vtop->r = REG_ST0;
 793 }
 794
 795 /* convert fp to int 't' type */
 796 /* XXX: handle long long case */
 797 void gen_cvt_ftoi(int t)
 798 {
 799     int r, r2, size;
 800
 801     gv(RC_FLOAT);
 802     if (t != VT_INT)
 803         size = 8;
 804     else
 805         size = 4;
 806
 807     oad(0x2dd9, (int)&__tcc_int_fpu_control); /* ldcw xxx */
 808     oad(0xec81, size); /* sub $xxx, %esp */
 809     if (size == 4)
 810         o(0x1cdb); /* fistpl */
 811     else
 812         o(0x3cdf); /* fistpll */
 813     o(0x24);
 814     oad(0x2dd9, (int)&__tcc_fpu_control); /* ldcw xxx */
 815     r = get_reg(RC_INT);
 816     o(0x58 + r); /* pop r */
 817     if (size == 8) {
 818         if (t == VT_LLONG) {
 819             vtop->r = r; /* mark reg as used */
 820             r2 = get_reg(RC_INT);
 821             o(0x58 + r2); /* pop r2 */
 822             vtop->r2 = r2;
 823         } else {
 824             o(0x04c483); /* add $4, %esp */
 825         }
 826     }
 827     vtop->r = r;
 828 }
 829
 830 /* convert from one floating point type to another */
 831 void gen_cvt_ftof(int t)
 832 {
 833     /* all we have to do on i386 is to put the float in a register */
 834     gv(RC_FLOAT);
 835 }
 836
 837 /* bound check support functions */
 838 #ifdef CONFIG_TCC_BCHECK
 839
 840 /* generate a bounded pointer addition */
 841 void gen_bounded_ptr_add(void)
 842 {
 843     int addr;
 844     /* prepare fast i386 function call (args in eax and edx) */
 845     gv2(RC_EAX, RC_EDX);
 846     /* save all temporary registers */
 847     vtop -= 2;
 848     save_regs(0);
 849     /* do a fast function call */
 850     addr = ind;
 851     oad(0xe8, (int)__bound_ptr_add - ind - 5);
 852     /* returned pointer is in eax */
 853     vtop++;
 854     vtop->r = REG_EAX | VT_BOUNDED;
 855     vtop->c.ul = addr; /* address of bounding function call point */
 856 }
 857
 858 /* patch pointer addition in vtop so that pointer dereferencing is
 859    also tested */
 860 void gen_bounded_ptr_deref(void)
 861 {
 862     void *func;
 863     int size, align, addr;
 864
 865     size = 0;
 866     /* XXX: put that code in generic part of tcc */
 867     if (!is_float(vtop->t)) {
 868         if (vtop->r & VT_LVAL_BYTE)
 869             size = 1;
 870         else if (vtop->r & VT_LVAL_SHORT)
 871             size = 2;
 872     }
 873     if (!size)
 874         size = type_size(vtop->t, &align);
 875     switch(size) {
 876     case  1: func = __bound_ptr_indir1; break;
 877     case  2: func = __bound_ptr_indir2; break;
 878     case  4: func = __bound_ptr_indir4; break;
 879     case  8: func = __bound_ptr_indir8; break;
 880     case 12: func = __bound_ptr_indir12; break;
 881     case 16: func = __bound_ptr_indir16; break;
 882     default:
 883         error("unhandled size when derefencing bounded pointer");
 884         func = NULL;
 885         break;
 886     }
 887
 888     addr = vtop->c.ul;
 889     *(int *)(addr + 1) = (int)func - addr - 5;
 890 }
 891 #endif
 892
 893 /* end of X86 code generator */
 894 /*************************************************************/
 895