arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *
   6  *  Based on i386-gen.c by Fabrice Bellard
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 #ifdef TCC_ARM_EABI
  24 #define TCC_ARM_VFP
  25 #endif
  26
  27
  28 /* number of available registers */
  29 #ifdef TCC_ARM_VFP
  30 #define NB_REGS            13
  31 #else
  32 #define NB_REGS             9
  33 #endif
  34
  35 /* a register can belong to several classes. The classes must be
  36    sorted from more general to more precise (see gv2() code which does
  37    assumptions on it). */
  38 #define RC_INT     0x0001 /* generic integer register */
  39 #define RC_FLOAT   0x0002 /* generic float register */
  40 #define RC_R0      0x0004
  41 #define RC_R1      0x0008
  42 #define RC_R2      0x0010
  43 #define RC_R3      0x0020
  44 #define RC_R12     0x0040
  45 #define RC_F0      0x0080
  46 #define RC_F1      0x0100
  47 #define RC_F2      0x0200
  48 #define RC_F3      0x0400
  49 #ifdef TCC_ARM_VFP
  50 #define RC_F4      0x0800
  51 #define RC_F5      0x1000
  52 #define RC_F6      0x2000
  53 #define RC_F7      0x4000
  54 #endif
  55 #define RC_IRET    RC_R0  /* function return: integer register */
  56 #define RC_LRET    RC_R1  /* function return: second integer register */
  57 #define RC_FRET    RC_F0  /* function return: float register */
  58
  59 /* pretty names for the registers */
  60 enum {
  61     TREG_R0 = 0,
  62     TREG_R1,
  63     TREG_R2,
  64     TREG_R3,
  65     TREG_R12,
  66     TREG_F0,
  67     TREG_F1,
  68     TREG_F2,
  69     TREG_F3,
  70 #ifdef TCC_ARM_VFP
  71     TREG_F4,
  72     TREG_F5,
  73     TREG_F6,
  74     TREG_F7,
  75 #endif
  76 };
  77
  78 const int reg_classes[NB_REGS] = {
  79     /* r0 */ RC_INT | RC_R0,
  80     /* r1 */ RC_INT | RC_R1,
  81     /* r2 */ RC_INT | RC_R2,
  82     /* r3 */ RC_INT | RC_R3,
  83     /* r12 */ RC_INT | RC_R12,
  84     /* f0 */ RC_FLOAT | RC_F0,
  85     /* f1 */ RC_FLOAT | RC_F1,
  86     /* f2 */ RC_FLOAT | RC_F2,
  87     /* f3 */ RC_FLOAT | RC_F3,
  88 #ifdef TCC_ARM_VFP
  89  /* d4/s8 */ RC_FLOAT | RC_F4,
  90 /* d5/s10 */ RC_FLOAT | RC_F5,
  91 /* d6/s12 */ RC_FLOAT | RC_F6,
  92 /* d7/s14 */ RC_FLOAT | RC_F7,
  93 #endif
  94 };
  95
  96 static int two2mask(int a,int b) {
  97   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
  98 }
  99
 100 static int regmask(int r) {
 101   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 102 }
 103
 104 #ifdef TCC_ARM_VFP
 105 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
 106 #endif
 107
 108 /* return registers for function */
 109 #define REG_IRET TREG_R0 /* single word int return register */
 110 #define REG_LRET TREG_R1 /* second word return register (for long long) */
 111 #define REG_FRET TREG_F0 /* float return register */
 112
 113 #ifdef TCC_ARM_EABI
 114 #define TOK___divdi3 TOK___aeabi_ldivmod
 115 #define TOK___moddi3 TOK___aeabi_ldivmod
 116 #define TOK___udivdi3 TOK___aeabi_uldivmod
 117 #define TOK___umoddi3 TOK___aeabi_uldivmod
 118 #endif
 119
 120 /* defined if function parameters must be evaluated in reverse order */
 121 #define INVERT_FUNC_PARAMS
 122
 123 /* defined if structures are passed as pointers. Otherwise structures
 124    are directly pushed on stack. */
 125 //#define FUNC_STRUCT_PARAM_AS_PTR
 126
 127 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 128 static CType float_type, double_type, func_float_type, func_double_type;
 129 #define func_ldouble_type func_double_type
 130 #else
 131 #define func_float_type func_old_type
 132 #define func_double_type func_old_type
 133 #define func_ldouble_type func_old_type
 134 #endif
 135
 136 /* pointer size, in bytes */
 137 #define PTR_SIZE 4
 138
 139 /* long double size and alignment, in bytes */
 140 #ifdef TCC_ARM_VFP
 141 #define LDOUBLE_SIZE  8
 142 #endif
 143
 144 #ifndef LDOUBLE_SIZE
 145 #define LDOUBLE_SIZE  8
 146 #endif
 147
 148 #ifdef TCC_ARM_EABI
 149 #define LDOUBLE_ALIGN 8
 150 #else
 151 #define LDOUBLE_ALIGN 4
 152 #endif
 153
 154 /* maximum alignment (for aligned attribute support) */
 155 #define MAX_ALIGN     8
 156
 157 #define CHAR_IS_UNSIGNED
 158
 159 /******************************************************/
 160 /* ELF defines */
 161
 162 #define EM_TCC_TARGET EM_ARM
 163
 164 /* relocation type for 32 bit data relocation */
 165 #define R_DATA_32   R_ARM_ABS32
 166 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 167 #define R_COPY      R_ARM_COPY
 168
 169 #define ELF_START_ADDR 0x00008000
 170 #define ELF_PAGE_SIZE  0x1000
 171
 172 /******************************************************/
 173 static unsigned long func_sub_sp_offset,last_itod_magic;
 174 static int leaffunc;
 175
 176 void o(unsigned long i)
 177 {
 178   /* this is a good place to start adding big-endian support*/
 179   int ind1;
 180
 181   ind1 = ind + 4;
 182   if (!cur_text_section)
 183     error("compiler error! This happens f.ex. if the compiler\n"
 184          "can't evaluate constant expressions outside of a function.");
 185   if (ind1 > cur_text_section->data_allocated)
 186     section_realloc(cur_text_section, ind1);
 187   cur_text_section->data[ind++] = i&255;
 188   i>>=8;
 189   cur_text_section->data[ind++] = i&255;
 190   i>>=8;
 191   cur_text_section->data[ind++] = i&255;
 192   i>>=8;
 193   cur_text_section->data[ind++] = i;
 194 }
 195
 196 static unsigned long stuff_const(unsigned long op,unsigned long c)
 197 {
 198   int try_neg=0;
 199   unsigned long nc = 0,negop = 0;
 200
 201   switch(op&0x1F00000)
 202   {
 203     case 0x800000: //add
 204     case 0x400000: //sub
 205       try_neg=1;
 206       negop=op^0xC00000;
 207       nc=-c;
 208       break;
 209     case 0x1A00000: //mov
 210     case 0x1E00000: //mvn
 211       try_neg=1;
 212       negop=op^0x400000;
 213       nc=~c;
 214       break;
 215     case 0x200000: //xor
 216       if(c==~0)
 217         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 218       break;
 219     case 0x0: //and
 220       if(c==~0)
 221         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 222     case 0x1C00000: //bic
 223       try_neg=1;
 224       negop=op^0x1C00000;
 225       nc=~c;
 226       break;
 227     case 0x1800000: //orr
 228       if(c==~0)
 229         return (op&0xFFF0FFFF)|0x1E00000;
 230       break;
 231   }
 232   do {
 233     unsigned long m;
 234     int i;
 235     if(c<256) /* catch undefined <<32 */
 236       return op|c;
 237     for(i=2;i<32;i+=2) {
 238       m=(0xff>>i)|(0xff<<(32-i));
 239       if(!(c&~m))
 240         return op|(i<<7)|(c<<i)|(c>>(32-i));
 241     }
 242     op=negop;
 243     c=nc;
 244   } while(try_neg--);
 245   return 0;
 246 }
 247
 248
 249 //only add,sub
 250 void stuff_const_harder(unsigned long op,unsigned long v) {
 251   unsigned long x;
 252   x=stuff_const(op,v);
 253   if(x)
 254     o(x);
 255   else {
 256     unsigned long a[16],nv,no,o2,n2;
 257     int i,j,k;
 258     a[0]=0xff;
 259     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 260     for(i=1;i<16;i++)
 261       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 262     for(i=0;i<12;i++)
 263       for(j=i<4?i+12:15;j>=i+4;j--)
 264         if((v&(a[i]|a[j]))==v) {
 265           o(stuff_const(op,v&a[i]));
 266           o(stuff_const(o2,v&a[j]));
 267           return;
 268         }
 269     no=op^0xC00000;
 270     n2=o2^0xC00000;
 271     nv=-v;
 272     for(i=0;i<12;i++)
 273       for(j=i<4?i+12:15;j>=i+4;j--)
 274         if((nv&(a[i]|a[j]))==nv) {
 275           o(stuff_const(no,nv&a[i]));
 276           o(stuff_const(n2,nv&a[j]));
 277           return;
 278         }
 279     for(i=0;i<8;i++)
 280       for(j=i+4;j<12;j++)
 281         for(k=i<4?i+12:15;k>=j+4;k--)
 282           if((v&(a[i]|a[j]|a[k]))==v) {
 283             o(stuff_const(op,v&a[i]));
 284             o(stuff_const(o2,v&a[j]));
 285             o(stuff_const(o2,v&a[k]));
 286             return;
 287           }
 288     no=op^0xC00000;
 289     nv=-v;
 290     for(i=0;i<8;i++)
 291       for(j=i+4;j<12;j++)
 292         for(k=i<4?i+12:15;k>=j+4;k--)
 293           if((nv&(a[i]|a[j]|a[k]))==nv) {
 294             o(stuff_const(no,nv&a[i]));
 295             o(stuff_const(n2,nv&a[j]));
 296             o(stuff_const(n2,nv&a[k]));
 297             return;
 298           }
 299     o(stuff_const(op,v&a[0]));
 300     o(stuff_const(o2,v&a[4]));
 301     o(stuff_const(o2,v&a[8]));
 302     o(stuff_const(o2,v&a[12]));
 303   }
 304 }
 305
 306 unsigned long encbranch(int pos,int addr,int fail)
 307 {
 308   addr-=pos+8;
 309   addr/=4;
 310   if(addr>=0x1000000 || addr<-0x1000000) {
 311     if(fail)
 312       error("FIXME: function bigger than 32MB");
 313     return 0;
 314   }
 315   return 0x0A000000|(addr&0xffffff);
 316 }
 317
 318 int decbranch(int pos)
 319 {
 320   int x;
 321   x=*(int *)(cur_text_section->data + pos);
 322   x&=0x00ffffff;
 323   if(x&0x800000)
 324     x-=0x1000000;
 325   return x*4+pos+8;
 326 }
 327
 328 /* output a symbol and patch all calls to it */
 329 void gsym_addr(int t, int a)
 330 {
 331   unsigned long *x;
 332   int lt;
 333   while(t) {
 334     x=(unsigned long *)(cur_text_section->data + t);
 335     t=decbranch(lt=t);
 336     if(a==lt+4)
 337       *x=0xE1A00000; // nop
 338     else {
 339       *x &= 0xff000000;
 340       *x |= encbranch(lt,a,1);
 341     }
 342   }
 343 }
 344
 345 void gsym(int t)
 346 {
 347   gsym_addr(t, ind);
 348 }
 349
 350 #ifdef TCC_ARM_VFP
 351 static unsigned long vfpr(int r)
 352 {
 353   if(r<TREG_F0 || r>TREG_F7)
 354     error("compiler error! register %i is no vfp register",r);
 355   return r-5;
 356 }
 357 #else
 358 static unsigned long fpr(int r)
 359 {
 360   if(r<TREG_F0 || r>TREG_F3)
 361     error("compiler error! register %i is no fpa register",r);
 362   return r-5;
 363 }
 364 #endif
 365
 366 static unsigned long intr(int r)
 367 {
 368   if(r==4)
 369     return 12;
 370   if((r<0 || r>4) && r!=14)
 371     error("compiler error! register %i is no int register",r);
 372   return r;
 373 }
 374
 375 static void calcaddr(unsigned long *base,int *off,int *sgn,int maxoff,unsigned shift)
 376 {
 377   if(*off>maxoff || *off&((1<<shift)-1)) {
 378     unsigned long x,y;
 379     x=0xE280E000;
 380     if(*sgn)
 381       x=0xE240E000;
 382     x|=(*base)<<16;
 383     *base=14; // lr
 384     y=stuff_const(x,*off&~maxoff);
 385     if(y) {
 386       o(y);
 387       *off&=maxoff;
 388       return;
 389     }
 390     y=stuff_const(x,(*off+maxoff)&~maxoff);
 391     if(y) {
 392       o(y);
 393       *sgn=!*sgn;
 394       *off=((*off+maxoff)&~maxoff)-*off;
 395       return;
 396     }
 397     stuff_const_harder(x,*off&~maxoff);
 398     *off&=maxoff;
 399   }
 400 }
 401
 402 static unsigned long mapcc(int cc)
 403 {
 404   switch(cc)
 405   {
 406     case TOK_ULT:
 407       return 0x30000000; /* CC/LO */
 408     case TOK_UGE:
 409       return 0x20000000; /* CS/HS */
 410     case TOK_EQ:
 411       return 0x00000000; /* EQ */
 412     case TOK_NE:
 413       return 0x10000000; /* NE */
 414     case TOK_ULE:
 415       return 0x90000000; /* LS */
 416     case TOK_UGT:
 417       return 0x80000000; /* HI */
 418     case TOK_Nset:
 419       return 0x40000000; /* MI */
 420     case TOK_Nclear:
 421       return 0x50000000; /* PL */
 422     case TOK_LT:
 423       return 0xB0000000; /* LT */
 424     case TOK_GE:
 425       return 0xA0000000; /* GE */
 426     case TOK_LE:
 427       return 0xD0000000; /* LE */
 428     case TOK_GT:
 429       return 0xC0000000; /* GT */
 430   }
 431   error("unexpected condition code");
 432   return 0xE0000000; /* AL */
 433 }
 434
 435 static int negcc(int cc)
 436 {
 437   switch(cc)
 438   {
 439     case TOK_ULT:
 440       return TOK_UGE;
 441     case TOK_UGE:
 442       return TOK_ULT;
 443     case TOK_EQ:
 444       return TOK_NE;
 445     case TOK_NE:
 446       return TOK_EQ;
 447     case TOK_ULE:
 448       return TOK_UGT;
 449     case TOK_UGT:
 450       return TOK_ULE;
 451     case TOK_Nset:
 452       return TOK_Nclear;
 453     case TOK_Nclear:
 454       return TOK_Nset;
 455     case TOK_LT:
 456       return TOK_GE;
 457     case TOK_GE:
 458       return TOK_LT;
 459     case TOK_LE:
 460       return TOK_GT;
 461     case TOK_GT:
 462       return TOK_LE;
 463   }
 464   error("unexpected condition code");
 465   return TOK_NE;
 466 }
 467
 468 /* load 'r' from value 'sv' */
 469 void load(int r, SValue *sv)
 470 {
 471   int v, ft, fc, fr, sign;
 472   unsigned long op;
 473   SValue v1;
 474
 475   fr = sv->r;
 476   ft = sv->type.t;
 477   fc = sv->c.ul;
 478
 479   if(fc>=0)
 480     sign=0;
 481   else {
 482     sign=1;
 483     fc=-fc;
 484   }
 485
 486   v = fr & VT_VALMASK;
 487   if (fr & VT_LVAL) {
 488     unsigned long base=0xB; // fp
 489     if(v == VT_LLOCAL) {
 490       v1.type.t = VT_PTR;
 491       v1.r = VT_LOCAL | VT_LVAL;
 492       v1.c.ul = sv->c.ul;
 493       load(base=14 /* lr */, &v1);
 494       fc=sign=0;
 495       v=VT_LOCAL;
 496     } else if(v == VT_CONST) {
 497       v1.type.t = VT_PTR;
 498       v1.r = fr&~VT_LVAL;
 499       v1.c.ul = sv->c.ul;
 500       v1.sym=sv->sym;
 501       load(base=14, &v1);
 502       fc=sign=0;
 503       v=VT_LOCAL;
 504     } else if(v < VT_CONST) {
 505       base=intr(v);
 506       fc=sign=0;
 507       v=VT_LOCAL;
 508     }
 509     if(v == VT_LOCAL) {
 510       if(is_float(ft)) {
 511         calcaddr(&base,&fc,&sign,1020,2);
 512 #ifdef TCC_ARM_VFP
 513         op=0xED100A00; /* flds */
 514         if(!sign)
 515           op|=0x800000;
 516         if ((ft & VT_BTYPE) != VT_FLOAT)
 517           op|=0x100;   /* flds -> fldd */
 518         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 519 #else
 520         op=0xED100100;
 521         if(!sign)
 522           op|=0x800000;
 523 #if LDOUBLE_SIZE == 8
 524         if ((ft & VT_BTYPE) != VT_FLOAT)
 525           op|=0x8000;
 526 #else
 527         if ((ft & VT_BTYPE) == VT_DOUBLE)
 528           op|=0x8000;
 529         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 530           op|=0x400000;
 531 #endif
 532         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 533 #endif
 534       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 535                 || (ft & VT_BTYPE) == VT_SHORT) {
 536         calcaddr(&base,&fc,&sign,255,0);
 537         op=0xE1500090;
 538         if ((ft & VT_BTYPE) == VT_SHORT)
 539           op|=0x20;
 540         if ((ft & VT_UNSIGNED) == 0)
 541           op|=0x40;
 542         if(!sign)
 543           op|=0x800000;
 544         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 545       } else {
 546         calcaddr(&base,&fc,&sign,4095,0);
 547         op=0xE5100000;
 548         if(!sign)
 549           op|=0x800000;
 550         if ((ft & VT_BTYPE) == VT_BYTE)
 551           op|=0x400000;
 552         o(op|(intr(r)<<12)|fc|(base<<16));
 553       }
 554       return;
 555     }
 556   } else {
 557     if (v == VT_CONST) {
 558       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
 559       if (fr & VT_SYM || !op) {
 560         o(0xE59F0000|(intr(r)<<12));
 561         o(0xEA000000);
 562         if(fr & VT_SYM)
 563           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 564         o(sv->c.ul);
 565       } else
 566         o(op);
 567       return;
 568     } else if (v == VT_LOCAL) {
 569       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
 570       if (fr & VT_SYM || !op) {
 571         o(0xE59F0000|(intr(r)<<12));
 572         o(0xEA000000);
 573         if(fr & VT_SYM) // needed ?
 574           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 575         o(sv->c.ul);
 576         o(0xE08B0000|(intr(r)<<12)|intr(r));
 577       } else
 578         o(op);
 579       return;
 580     } else if(v == VT_CMP) {
 581       o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
 582       o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
 583       return;
 584     } else if (v == VT_JMP || v == VT_JMPI) {
 585       int t;
 586       t = v & 1;
 587       o(0xE3A00000|(intr(r)<<12)|t);
 588       o(0xEA000000);
 589       gsym(sv->c.ul);
 590       o(0xE3A00000|(intr(r)<<12)|(t^1));
 591       return;
 592     } else if (v < VT_CONST) {
 593       if(is_float(ft))
 594 #ifdef TCC_ARM_VFP
 595         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 596 #else
 597         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 598 #endif
 599       else
 600         o(0xE1A00000|(intr(r)<<12)|intr(v));
 601       return;
 602     }
 603   }
 604   error("load unimplemented!");
 605 }
 606
 607 /* store register 'r' in lvalue 'v' */
 608 void store(int r, SValue *sv)
 609 {
 610   SValue v1;
 611   int v, ft, fc, fr, sign;
 612   unsigned long op;
 613
 614   fr = sv->r;
 615   ft = sv->type.t;
 616   fc = sv->c.ul;
 617
 618   if(fc>=0)
 619     sign=0;
 620   else {
 621     sign=1;
 622     fc=-fc;
 623   }
 624
 625   v = fr & VT_VALMASK;
 626   if (fr & VT_LVAL || fr == VT_LOCAL) {
 627     unsigned long base=0xb;
 628     if(v < VT_CONST) {
 629       base=intr(v);
 630       v=VT_LOCAL;
 631       fc=sign=0;
 632     } else if(v == VT_CONST) {
 633       v1.type.t = ft;
 634       v1.r = fr&~VT_LVAL;
 635       v1.c.ul = sv->c.ul;
 636       v1.sym=sv->sym;
 637       load(base=14, &v1);
 638       fc=sign=0;
 639       v=VT_LOCAL;
 640     }
 641     if(v == VT_LOCAL) {
 642        if(is_float(ft)) {
 643         calcaddr(&base,&fc,&sign,1020,2);
 644 #ifdef TCC_ARM_VFP
 645         op=0xED000A00; /* fsts */
 646         if(!sign)
 647           op|=0x800000;
 648         if ((ft & VT_BTYPE) != VT_FLOAT)
 649           op|=0x100;   /* fsts -> fstd */
 650         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 651 #else
 652         op=0xED000100;
 653         if(!sign)
 654           op|=0x800000;
 655 #if LDOUBLE_SIZE == 8
 656         if ((ft & VT_BTYPE) != VT_FLOAT)
 657           op|=0x8000;
 658 #else
 659         if ((ft & VT_BTYPE) == VT_DOUBLE)
 660           op|=0x8000;
 661         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 662           op|=0x400000;
 663 #endif
 664         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 665 #endif
 666         return;
 667       } else if((ft & VT_BTYPE) == VT_SHORT) {
 668         calcaddr(&base,&fc,&sign,255,0);
 669         op=0xE14000B0;
 670         if(!sign)
 671           op|=0x800000;
 672         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 673       } else {
 674         calcaddr(&base,&fc,&sign,4095,0);
 675         op=0xE5000000;
 676         if(!sign)
 677           op|=0x800000;
 678         if ((ft & VT_BTYPE) == VT_BYTE)
 679           op|=0x400000;
 680         o(op|(intr(r)<<12)|fc|(base<<16));
 681       }
 682       return;
 683     }
 684   }
 685   error("store unimplemented");
 686 }
 687
 688 static void gadd_sp(int val)
 689 {
 690   stuff_const_harder(0xE28DD000,val);
 691 }
 692
 693 /* 'is_jmp' is '1' if it is a jump */
 694 static void gcall_or_jmp(int is_jmp)
 695 {
 696   int r;
 697   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 698     unsigned long x;
 699     /* constant case */
 700     x=encbranch(ind,ind+vtop->c.ul,0);
 701     if(x) {
 702       if (vtop->r & VT_SYM) {
 703         /* relocation case */
 704         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 705       } else
 706         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 707       o(x|(is_jmp?0xE0000000:0xE1000000));
 708     } else {
 709       if(!is_jmp)
 710         o(0xE28FE004); // add lr,pc,#4
 711       o(0xE51FF004);   // ldr pc,[pc,#-4]
 712       if (vtop->r & VT_SYM)
 713         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 714       o(vtop->c.ul);
 715     }
 716   } else {
 717     /* otherwise, indirect call */
 718     r = gv(RC_INT);
 719     if(!is_jmp)
 720       o(0xE1A0E00F);       // mov lr,pc
 721     o(0xE1A0F000|intr(r)); // mov pc,r
 722   }
 723 }
 724
 725 /* Generate function call. The function address is pushed first, then
 726    all the parameters in call order. This functions pops all the
 727    parameters and the function address. */
 728 void gfunc_call(int nb_args)
 729 {
 730   int size, align, r, args_size, i;
 731   Sym *func_sym;
 732   signed char plan[4][2]={{-1,-1},{-1,-1},{-1,-1},{-1,-1}};
 733   int todo=0xf, keep, plan2[4]={0,0,0,0};
 734
 735   r = vtop->r & VT_VALMASK;
 736   if (r == VT_CMP || (r & ~1) == VT_JMP)
 737     gv(RC_INT);
 738 #ifdef TCC_ARM_EABI
 739   if((vtop[-nb_args].type.ref->type.t & VT_BTYPE) == VT_STRUCT
 740      && type_size(&vtop[-nb_args].type, &align) <= 4) {
 741     SValue tmp;
 742     tmp=vtop[-nb_args];
 743     vtop[-nb_args]=vtop[-nb_args+1];
 744     vtop[-nb_args+1]=tmp;
 745     --nb_args;
 746   }
 747
 748   vpushi(0);
 749   vtop->type.t = VT_LLONG;
 750   args_size = 0;
 751   for(i = nb_args + 1 ; i-- ;) {
 752     size = type_size(&vtop[-i].type, &align);
 753     if(args_size & (align-1)) {
 754       vpushi(0);
 755       vtop->type.t = VT_VOID; /* padding */
 756       vrott(i+2);
 757       args_size += 4;
 758       ++nb_args;
 759     }
 760     args_size += (size + 3) & -4;
 761   }
 762   vtop--;
 763 #endif
 764   args_size = 0;
 765   for(i = nb_args ; i-- && args_size < 16 ;) {
 766     switch(vtop[-i].type.t & VT_BTYPE) {
 767       case VT_STRUCT:
 768       case VT_FLOAT:
 769       case VT_DOUBLE:
 770       case VT_LDOUBLE:
 771       size = type_size(&vtop[-i].type, &align);
 772         size = (size + 3) & -4;
 773       args_size += size;
 774         break;
 775       default:
 776       plan[nb_args-1-i][0]=args_size/4;
 777       args_size += 4;
 778       if ((vtop[-i].type.t & VT_BTYPE) == VT_LLONG && args_size < 16) {
 779         plan[nb_args-1-i][1]=args_size/4;
 780         args_size += 4;
 781       }
 782     }
 783   }
 784   args_size = keep = 0;
 785   for(i = 0;i < nb_args; i++) {
 786     vnrott(keep+1);
 787     if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 788       size = type_size(&vtop->type, &align);
 789       /* align to stack align size */
 790       size = (size + 3) & -4;
 791       /* allocate the necessary size on stack */
 792       gadd_sp(-size);
 793       /* generate structure store */
 794       r = get_reg(RC_INT);
 795       o(0xE1A0000D|(intr(r)<<12));
 796       vset(&vtop->type, r | VT_LVAL, 0);
 797       vswap();
 798       vstore();
 799       vtop--;
 800       args_size += size;
 801     } else if (is_float(vtop->type.t)) {
 802 #ifdef TCC_ARM_VFP
 803       r=vfpr(gv(RC_FLOAT))<<12;
 804       size=4;
 805       if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
 806       {
 807         size=8;
 808         r|=0x101; /* fstms -> fstmd */
 809       }
 810       o(0xED2D0A01+r);
 811 #else
 812       r=fpr(gv(RC_FLOAT))<<12;
 813       if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
 814         size = 4;
 815       else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
 816         size = 8;
 817       else
 818         size = LDOUBLE_SIZE;
 819
 820       if (size == 12)
 821         r|=0x400000;
 822       else if(size == 8)
 823         r|=0x8000;
 824
 825       o(0xED2D0100|r|(size>>2));
 826 #endif
 827       vtop--;
 828       args_size += size;
 829     } else {
 830       int s;
 831       /* simple type (currently always same size) */
 832       /* XXX: implicit cast ? */
 833       size=4;
 834       if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
 835         lexpand_nr();
 836         s=RC_INT;
 837         if(nb_args-i<5 && plan[nb_args-i-1][1]!=-1) {
 838           s=regmask(plan[nb_args-i-1][1]);
 839           todo&=~(1<<plan[nb_args-i-1][1]);
 840         }
 841         if(s==RC_INT) {
 842           r = gv(s);
 843           o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
 844           vtop--;
 845         } else {
 846           plan2[keep]=s;
 847           keep++;
 848           vswap();
 849         }
 850         size = 8;
 851       }
 852       s=RC_INT;
 853       if(nb_args-i<5 && plan[nb_args-i-1][0]!=-1) {
 854         s=regmask(plan[nb_args-i-1][0]);
 855         todo&=~(1<<plan[nb_args-i-1][0]);
 856       }
 857 #ifdef TCC_ARM_EABI
 858       if(vtop->type.t == VT_VOID) {
 859         if(s == RC_INT)
 860           o(0xE24DD004); /* sub sp,sp,#4 */
 861         vtop--;
 862       } else
 863 #endif
 864       if(s == RC_INT) {
 865         r = gv(s);
 866         o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
 867         vtop--;
 868       } else {
 869         plan2[keep]=s;
 870         keep++;
 871       }
 872       args_size += size;
 873     }
 874   }
 875   for(i=keep;i--;) {
 876     gv(plan2[i]);
 877     vrott(keep);
 878   }
 879 save_regs(keep); /* save used temporary registers */
 880   keep++;
 881   if(args_size) {
 882     int n;
 883     n=args_size/4;
 884     if(n>4)
 885       n=4;
 886     todo&=((1<<n)-1);
 887     if(todo) {
 888       int i;
 889       o(0xE8BD0000|todo);
 890       for(i=0;i<4;i++)
 891         if(todo&(1<<i)) {
 892           vpushi(0);
 893           vtop->r=i;
 894           keep++;
 895         }
 896     }
 897     args_size-=n*4;
 898   }
 899   vnrott(keep);
 900   func_sym = vtop->type.ref;
 901   gcall_or_jmp(0);
 902   if (args_size)
 903       gadd_sp(args_size);
 904 #ifdef TCC_ARM_EABI
 905   if((vtop->type.ref->type.t & VT_BTYPE) == VT_STRUCT
 906      && type_size(&vtop->type.ref->type, &align) <= 4)
 907   {
 908     store(REG_IRET,vtop-keep);
 909     ++keep;
 910   }
 911 #ifdef TCC_ARM_VFP
 912   else if(is_float(vtop->type.ref->type.t)) {
 913     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
 914       o(0xEE000A10); /* fmsr s0,r0 */
 915     } else {
 916       o(0xEE000B10); /* fmdlr d0,r0 */
 917       o(0xEE201B10); /* fmdhr d0,r1 */
 918     }
 919   }
 920 #endif
 921 #endif
 922   vtop-=keep;
 923   leaffunc = 0;
 924 }
 925
 926 /* generate function prolog of type 't' */
 927 void gfunc_prolog(CType *func_type)
 928 {
 929   Sym *sym,*sym2;
 930   int n,addr,size,align;
 931
 932   sym = func_type->ref;
 933   func_vt = sym->type;
 934
 935   n = 0;
 936   addr = 0;
 937   if((func_vt.t & VT_BTYPE) == VT_STRUCT
 938      && type_size(&func_vt,&align) > 4)
 939   {
 940     func_vc = addr;
 941     addr += 4;
 942     n++;
 943   }
 944   for(sym2=sym->next;sym2 && n<4;sym2=sym2->next) {
 945     size = type_size(&sym2->type, &align);
 946     n += (size + 3) / 4;
 947   }
 948   o(0xE1A0C00D); /* mov ip,sp */
 949   if(func_type->ref->c == FUNC_ELLIPSIS)
 950     n=4;
 951   if(n) {
 952     if(n>4)
 953       n=4;
 954 #ifdef TCC_ARM_EABI
 955     n=(n+1)&-2;
 956 #endif
 957     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
 958   }
 959   o(0xE92D5800); /* save fp, ip, lr */
 960   o(0xE28DB00C); /* add fp, sp, #12 */
 961   func_sub_sp_offset = ind;
 962   o(0xE1A00000); /* nop, leave space for stack adjustment */
 963   while ((sym = sym->next)) {
 964     CType *type;
 965     type = &sym->type;
 966     size = type_size(type, &align);
 967     size = (size + 3) & -4;
 968 #ifdef TCC_ARM_EABI
 969     addr = (addr + align - 1) & -align;
 970 #endif
 971     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t), addr);
 972     addr += size;
 973   }
 974   last_itod_magic=0;
 975   leaffunc = 1;
 976   loc = -12;
 977 }
 978
 979 /* generate function epilog */
 980 void gfunc_epilog(void)
 981 {
 982   unsigned long x;
 983   int diff;
 984 #ifdef TCC_ARM_EABI
 985   if(is_float(func_vt.t)) {
 986     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
 987       o(0xEE100A10); /* fmrs r0, s0 */
 988     else {
 989       o(0xEE100B10); /* fmrdl r0, d0 */
 990       o(0xEE301B10); /* fmrdh r1, d0 */
 991     }
 992   }
 993 #endif
 994   o(0xE91BA800); /* restore fp, sp, pc */
 995   diff = (-loc + 3) & -4;
 996 #ifdef TCC_ARM_EABI
 997   if(!leaffunc)
 998     diff = (diff + 7) & -8;
 999 #endif
1000   if(diff > 12) {
1001     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1002     if(x)
1003       *(unsigned long *)(cur_text_section->data + func_sub_sp_offset) = x;
1004     else {
1005       unsigned long addr;
1006       addr=ind;
1007       o(0xE59FC004); /* ldr ip,[pc+4] */
1008       o(0xE04BD00C); /* sub sp,fp,ip  */
1009       o(0xE1A0F00E); /* mov pc,lr */
1010       o(diff);
1011       *(unsigned long *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1012     }
1013   }
1014 }
1015
1016 /* generate a jump to a label */
1017 int gjmp(int t)
1018 {
1019   int r;
1020   r=ind;
1021   o(0xE0000000|encbranch(r,t,1));
1022   return r;
1023 }
1024
1025 /* generate a jump to a fixed address */
1026 void gjmp_addr(int a)
1027 {
1028   gjmp(a);
1029 }
1030
1031 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1032 int gtst(int inv, int t)
1033 {
1034   int v, r;
1035   unsigned long op;
1036   v = vtop->r & VT_VALMASK;
1037   r=ind;
1038   if (v == VT_CMP) {
1039     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1040     op|=encbranch(r,t,1);
1041     o(op);
1042     t=r;
1043   } else if (v == VT_JMP || v == VT_JMPI) {
1044     if ((v & 1) == inv) {
1045       if(!vtop->c.i)
1046         vtop->c.i=t;
1047       else {
1048         unsigned long *x;
1049         int p,lp;
1050         if(t) {
1051           p = vtop->c.i;
1052           do {
1053             p = decbranch(lp=p);
1054           } while(p);
1055           x = (unsigned long *)(cur_text_section->data + lp);
1056           *x &= 0xff000000;
1057           *x |= encbranch(lp,t,1);
1058         }
1059         t = vtop->c.i;
1060       }
1061     } else {
1062       t = gjmp(t);
1063       gsym(vtop->c.i);
1064     }
1065   } else {
1066     if (is_float(vtop->type.t)) {
1067       r=gv(RC_FLOAT);
1068 #ifdef TCC_ARM_VFP
1069       o(0xEEB50A40|(vfpr(r)<<12)|T2CPR(vtop->type.t)); /* fcmpzX */
1070       o(0xEEF1FA10); /* fmstat */
1071 #else
1072       o(0xEE90F118|(fpr(r)<<16));
1073 #endif
1074       vtop->r = VT_CMP;
1075       vtop->c.i = TOK_NE;
1076       return gtst(inv, t);
1077     } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1078       /* constant jmp optimization */
1079       if ((vtop->c.i != 0) != inv)
1080         t = gjmp(t);
1081     } else {
1082       v = gv(RC_INT);
1083       o(0xE3300000|(intr(v)<<16));
1084       vtop->r = VT_CMP;
1085       vtop->c.i = TOK_NE;
1086       return gtst(inv, t);
1087     }
1088   }
1089   vtop--;
1090   return t;
1091 }
1092
1093 /* generate an integer binary operation */
1094 void gen_opi(int op)
1095 {
1096   int c, func = 0;
1097   unsigned long opc = 0,r,fr;
1098   unsigned short retreg = REG_IRET;
1099
1100   c=0;
1101   switch(op) {
1102     case '+':
1103       opc = 0x8;
1104       c=1;
1105       break;
1106     case TOK_ADDC1: /* add with carry generation */
1107       opc = 0x9;
1108       c=1;
1109       break;
1110     case '-':
1111       opc = 0x4;
1112       c=1;
1113       break;
1114     case TOK_SUBC1: /* sub with carry generation */
1115       opc = 0x5;
1116       c=1;
1117       break;
1118     case TOK_ADDC2: /* add with carry use */
1119       opc = 0xA;
1120       c=1;
1121       break;
1122     case TOK_SUBC2: /* sub with carry use */
1123       opc = 0xC;
1124       c=1;
1125       break;
1126     case '&':
1127       opc = 0x0;
1128       c=1;
1129       break;
1130     case '^':
1131       opc = 0x2;
1132       c=1;
1133       break;
1134     case '|':
1135       opc = 0x18;
1136       c=1;
1137       break;
1138     case '*':
1139       gv2(RC_INT, RC_INT);
1140       r = vtop[-1].r;
1141       fr = vtop[0].r;
1142       vtop--;
1143       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1144       return;
1145     case TOK_SHL:
1146       opc = 0;
1147       c=2;
1148       break;
1149     case TOK_SHR:
1150       opc = 1;
1151       c=2;
1152       break;
1153     case TOK_SAR:
1154       opc = 2;
1155       c=2;
1156       break;
1157     case '/':
1158     case TOK_PDIV:
1159       func=TOK___divsi3;
1160       c=3;
1161       break;
1162     case TOK_UDIV:
1163       func=TOK___udivsi3;
1164       c=3;
1165       break;
1166     case '%':
1167 #ifdef TCC_ARM_EABI
1168       func=TOK___aeabi_idivmod;
1169       retreg=REG_LRET;
1170 #else
1171       func=TOK___modsi3;
1172 #endif
1173       c=3;
1174       break;
1175     case TOK_UMOD:
1176 #ifdef TCC_ARM_EABI
1177       func=TOK___aeabi_uidivmod;
1178       retreg=REG_LRET;
1179 #else
1180       func=TOK___umodsi3;
1181 #endif
1182       c=3;
1183       break;
1184     case TOK_UMULL:
1185       gv2(RC_INT, RC_INT);
1186       r=intr(vtop[-1].r2=get_reg(RC_INT));
1187       c=vtop[-1].r;
1188       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1189       vtop--;
1190       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1191       return;
1192     default:
1193       opc = 0x15;
1194       c=1;
1195       break;
1196   }
1197   switch(c) {
1198     case 1:
1199       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1200         if(opc == 4 || opc == 5 || opc == 0xc) {
1201           vswap();
1202           opc|=2; // sub -> rsb
1203         }
1204       }
1205       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1206           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1207         gv(RC_INT);
1208       vswap();
1209       c=intr(gv(RC_INT));
1210       vswap();
1211       opc=0xE0000000|(opc<<20)|(c<<16);
1212       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1213         unsigned long x;
1214         x=stuff_const(opc|0x2000000,vtop->c.i);
1215         if(x) {
1216           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1217           o(x|(r<<12));
1218           goto done;
1219         }
1220       }
1221       fr=intr(gv(RC_INT));
1222       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1223       o(opc|(r<<12)|fr);
1224 done:
1225       vtop--;
1226       if (op >= TOK_ULT && op <= TOK_GT) {
1227         vtop->r = VT_CMP;
1228         vtop->c.i = op;
1229       }
1230       break;
1231     case 2:
1232       opc=0xE1A00000|(opc<<5);
1233       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1234           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1235         gv(RC_INT);
1236       vswap();
1237       r=intr(gv(RC_INT));
1238       vswap();
1239       opc|=r;
1240       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1241         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1242         c = vtop->c.i & 0x1f;
1243         o(opc|(c<<7)|(fr<<12));
1244       } else {
1245         fr=intr(gv(RC_INT));
1246         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1247         o(opc|(c<<12)|(fr<<8)|0x10);
1248       }
1249       vtop--;
1250       break;
1251     case 3:
1252       vpush_global_sym(&func_old_type, func);
1253       vrott(3);
1254       gfunc_call(2);
1255       vpushi(0);
1256       vtop->r = retreg;
1257       break;
1258     default:
1259       error("gen_opi %i unimplemented!",op);
1260   }
1261 }
1262
1263 #ifdef TCC_ARM_VFP
1264 static int is_zero(int i)
1265 {
1266   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1267     return 0;
1268   if (vtop[i].type.t == VT_FLOAT)
1269     return (vtop[i].c.f == 0.f);
1270   else if (vtop[i].type.t == VT_DOUBLE)
1271     return (vtop[i].c.d == 0.0);
1272   return (vtop[i].c.ld == 0.l);
1273 }
1274
1275 /* generate a floating point operation 'v = t1 op t2' instruction. The
1276  *    two operands are guaranted to have the same floating point type */
1277 void gen_opf(int op)
1278 {
1279   unsigned long x;
1280   int fneg=0,r;
1281   x=0xEE000A00|T2CPR(vtop->type.t);
1282   switch(op) {
1283     case '+':
1284       if(is_zero(-1))
1285         vswap();
1286       if(is_zero(0)) {
1287         vtop--;
1288         return;
1289       }
1290       x|=0x300000;
1291       break;
1292     case '-':
1293       x|=0x300040;
1294       if(is_zero(0)) {
1295         vtop--;
1296         return;
1297       }
1298       if(is_zero(-1)) {
1299         x|=0x810000; /* fsubX -> fnegX */
1300         vswap();
1301         vtop--;
1302         fneg=1;
1303       }
1304       break;
1305     case '*':
1306       x|=0x200000;
1307       break;
1308     case '/':
1309       x|=0x800000;
1310       break;
1311     default:
1312       if(op < TOK_ULT && op > TOK_GT) {
1313         error("unknown fp op %x!",op);
1314         return;
1315       }
1316       if(is_zero(-1)) {
1317         vswap();
1318         switch(op) {
1319           case TOK_LT: op=TOK_GT; break;
1320           case TOK_GE: op=TOK_ULE; break;
1321           case TOK_LE: op=TOK_GE; break;
1322           case TOK_GT: op=TOK_ULT; break;
1323         }
1324       }
1325       x|=0xB40040; /* fcmpX */
1326       if(op!=TOK_EQ && op!=TOK_NE)
1327         x|=0x80; /* fcmpX -> fcmpeX */
1328       if(is_zero(0)) {
1329         vtop--;
1330         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1331       } else {
1332         x|=vfpr(gv(RC_FLOAT));
1333         vswap();
1334         o(x|(vfpr(gv(RC_FLOAT))<<12));
1335         vtop--;
1336       }
1337       o(0xEEF1FA10); /* fmstat */
1338
1339       switch(op) {
1340         case TOK_LE: op=TOK_ULE; break;
1341         case TOK_LT: op=TOK_ULT; break;
1342         case TOK_UGE: op=TOK_GE; break;
1343         case TOK_UGT: op=TOK_GT; break;
1344       }
1345
1346       vtop->r = VT_CMP;
1347       vtop->c.i = op;
1348       return;
1349   }
1350   r=gv(RC_FLOAT);
1351   x|=vfpr(r);
1352   r=regmask(r);
1353   if(!fneg) {
1354     int r2;
1355     vswap();
1356     r2=gv(RC_FLOAT);
1357     x|=vfpr(r2)<<16;
1358     r|=regmask(r2);
1359   }
1360   vtop->r=get_reg_ex(RC_FLOAT,r);
1361   if(!fneg)
1362     vtop--;
1363   o(x|(vfpr(vtop->r)<<12));
1364 }
1365
1366 #else
1367 static int is_fconst()
1368 {
1369   long double f;
1370   int r;
1371   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1372     return 0;
1373   if (vtop->type.t == VT_FLOAT)
1374     f = vtop->c.f;
1375   else if (vtop->type.t == VT_DOUBLE)
1376     f = vtop->c.d;
1377   else
1378     f = vtop->c.ld;
1379   if(!ieee_finite(f))
1380     return 0;
1381   r=0x8;
1382   if(f<0.0) {
1383     r=0x18;
1384     f=-f;
1385   }
1386   if(f==0.0)
1387     return r;
1388   if(f==1.0)
1389     return r|1;
1390   if(f==2.0)
1391     return r|2;
1392   if(f==3.0)
1393     return r|3;
1394   if(f==4.0)
1395     return r|4;
1396   if(f==5.0)
1397     return r|5;
1398   if(f==0.5)
1399     return r|6;
1400   if(f==10.0)
1401     return r|7;
1402   return 0;
1403 }
1404
1405 /* generate a floating point operation 'v = t1 op t2' instruction. The
1406    two operands are guaranted to have the same floating point type */
1407 void gen_opf(int op)
1408 {
1409   unsigned long x;
1410   int r,r2,c1,c2;
1411   //fputs("gen_opf\n",stderr);
1412   vswap();
1413   c1 = is_fconst();
1414   vswap();
1415   c2 = is_fconst();
1416   x=0xEE000100;
1417 #if LDOUBLE_SIZE == 8
1418   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1419     x|=0x80;
1420 #else
1421   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1422     x|=0x80;
1423   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1424     x|=0x80000;
1425 #endif
1426   switch(op)
1427   {
1428     case '+':
1429       if(!c2) {
1430         vswap();
1431         c2=c1;
1432       }
1433       vswap();
1434       r=fpr(gv(RC_FLOAT));
1435       vswap();
1436       if(c2) {
1437         if(c2>0xf)
1438           x|=0x200000; // suf
1439         r2=c2&0xf;
1440       } else {
1441         r2=fpr(gv(RC_FLOAT));
1442       }
1443       break;
1444     case '-':
1445       if(c2) {
1446         if(c2<=0xf)
1447           x|=0x200000; // suf
1448         r2=c2&0xf;
1449         vswap();
1450         r=fpr(gv(RC_FLOAT));
1451         vswap();
1452       } else if(c1 && c1<=0xf) {
1453         x|=0x300000; // rsf
1454         r2=c1;
1455         r=fpr(gv(RC_FLOAT));
1456         vswap();
1457       } else {
1458         x|=0x200000; // suf
1459         vswap();
1460         r=fpr(gv(RC_FLOAT));
1461         vswap();
1462         r2=fpr(gv(RC_FLOAT));
1463       }
1464       break;
1465     case '*':
1466       if(!c2 || c2>0xf) {
1467         vswap();
1468         c2=c1;
1469       }
1470       vswap();
1471       r=fpr(gv(RC_FLOAT));
1472       vswap();
1473       if(c2 && c2<=0xf)
1474         r2=c2;
1475       else
1476         r2=fpr(gv(RC_FLOAT));
1477       x|=0x100000; // muf
1478       break;
1479     case '/':
1480       if(c2 && c2<=0xf) {
1481         x|=0x400000; // dvf
1482         r2=c2;
1483         vswap();
1484         r=fpr(gv(RC_FLOAT));
1485         vswap();
1486       } else if(c1 && c1<=0xf) {
1487         x|=0x500000; // rdf
1488         r2=c1;
1489         r=fpr(gv(RC_FLOAT));
1490         vswap();
1491       } else {
1492         x|=0x400000; // dvf
1493         vswap();
1494         r=fpr(gv(RC_FLOAT));
1495         vswap();
1496         r2=fpr(gv(RC_FLOAT));
1497       }
1498       break;
1499     default:
1500       if(op >= TOK_ULT && op <= TOK_GT) {
1501         x|=0xd0f110; // cmfe
1502 /* bug (intention?) in Linux FPU emulator
1503    doesn't set carry if equal */
1504         switch(op) {
1505           case TOK_ULT:
1506           case TOK_UGE:
1507           case TOK_ULE:
1508           case TOK_UGT:
1509             error("unsigned comparision on floats?");
1510             break;
1511           case TOK_LT:
1512             op=TOK_Nset;
1513             break;
1514           case TOK_LE:
1515             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1516             break;
1517           case TOK_EQ:
1518           case TOK_NE:
1519             x&=~0x400000; // cmfe -> cmf
1520             break;
1521         }
1522         if(c1 && !c2) {
1523           c2=c1;
1524           vswap();
1525           switch(op) {
1526             case TOK_Nset:
1527               op=TOK_GT;
1528               break;
1529             case TOK_GE:
1530               op=TOK_ULE;
1531               break;
1532             case TOK_ULE:
1533               op=TOK_GE;
1534               break;
1535             case TOK_GT:
1536               op=TOK_Nset;
1537               break;
1538           }
1539         }
1540         vswap();
1541         r=fpr(gv(RC_FLOAT));
1542         vswap();
1543         if(c2) {
1544           if(c2>0xf)
1545             x|=0x200000;
1546           r2=c2&0xf;
1547         } else {
1548           r2=fpr(gv(RC_FLOAT));
1549         }
1550         vtop[-1].r = VT_CMP;
1551         vtop[-1].c.i = op;
1552       } else {
1553         error("unknown fp op %x!",op);
1554         return;
1555       }
1556   }
1557   if(vtop[-1].r == VT_CMP)
1558     c1=15;
1559   else {
1560     c1=vtop->r;
1561     if(r2&0x8)
1562       c1=vtop[-1].r;
1563     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1564     c1=fpr(vtop[-1].r);
1565   }
1566   vtop--;
1567   o(x|(r<<16)|(c1<<12)|r2);
1568 }
1569 #endif
1570
1571 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1572    and 'long long' cases. */
1573 void gen_cvt_itof1(int t)
1574 {
1575   int r,r2,bt;
1576   bt=vtop->type.t & VT_BTYPE;
1577   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1578 #ifndef TCC_ARM_VFP
1579     unsigned int dsize=0;
1580 #endif
1581     r=intr(gv(RC_INT));
1582 #ifdef TCC_ARM_VFP
1583     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1584     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1585     r2<<=12;
1586     if(!(vtop->type.t & VT_UNSIGNED))
1587       r2|=0x80;                /* fuitoX -> fsituX */
1588     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1589 #else
1590     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1591     if((t & VT_BTYPE) != VT_FLOAT)
1592       dsize=0x80;    /* flts -> fltd */
1593     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1594     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1595       unsigned int off=0;
1596       o(0xE3500000|(r<<12));        /* cmp */
1597       r=fpr(get_reg(RC_FLOAT));
1598       if(last_itod_magic) {
1599         off=ind+8-last_itod_magic;
1600         off/=4;
1601         if(off>255)
1602           off=0;
1603       }
1604       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1605       if(!off) {
1606         o(0xEA000000);              /* b */
1607         last_itod_magic=ind;
1608         o(0x4F800000);              /* 4294967296.0f */
1609       }
1610       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1611     }
1612 #endif
1613     return;
1614   } else if(bt == VT_LLONG) {
1615     int func;
1616     CType *func_type = 0;
1617     if((t & VT_BTYPE) == VT_FLOAT) {
1618       func_type = &func_float_type;
1619       if(vtop->type.t & VT_UNSIGNED)
1620         func=TOK___floatundisf;
1621       else
1622         func=TOK___floatdisf;
1623 #if LDOUBLE_SIZE != 8
1624     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1625       func_type = &func_ldouble_type;
1626       if(vtop->type.t & VT_UNSIGNED)
1627         func=TOK___floatundixf;
1628       else
1629         func=TOK___floatdixf;
1630     } else if((t & VT_BTYPE) == VT_DOUBLE) {
1631 #else
1632     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1633 #endif
1634       func_type = &func_double_type;
1635       if(vtop->type.t & VT_UNSIGNED)
1636         func=TOK___floatundidf;
1637       else
1638         func=TOK___floatdidf;
1639     }
1640     if(func_type) {
1641       vpush_global_sym(func_type, func);
1642       vswap();
1643       gfunc_call(1);
1644       vpushi(0);
1645       vtop->r=TREG_F0;
1646       return;
1647     }
1648   }
1649   error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1650 }
1651
1652 /* convert fp to int 't' type */
1653 void gen_cvt_ftoi(int t)
1654 {
1655   int r,r2,u,func=0;
1656   u=t&VT_UNSIGNED;
1657   t&=VT_BTYPE;
1658   r2=vtop->type.t & VT_BTYPE;
1659   if(t==VT_INT) {
1660 #ifdef TCC_ARM_VFP
1661     r=vfpr(gv(RC_FLOAT));
1662     u=u?0:0x10000;
1663     o(0xEEBC0A40|(r<<12)|r|T2CPR(r2)); /* ftoXiY */
1664     r2=intr(vtop->r=get_reg(RC_INT));
1665     o(0xEE100A10|(r<<16)|(r2<<12));
1666     return;
1667 #else
1668     if(u) {
1669       if(r2 == VT_FLOAT)
1670         func=TOK___fixunssfsi;
1671 #if LDOUBLE_SIZE != 8
1672       else if(r2 == VT_LDOUBLE)
1673         func=TOK___fixunsxfsi;
1674       else if(r2 == VT_DOUBLE)
1675 #else
1676       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1677 #endif
1678         func=TOK___fixunsdfsi;
1679     } else {
1680       r=fpr(gv(RC_FLOAT));
1681       r2=intr(vtop->r=get_reg(RC_INT));
1682       o(0xEE100170|(r2<<12)|r);
1683       return;
1684     }
1685 #endif
1686   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
1687     if(r2 == VT_FLOAT)
1688       func=TOK___fixsfdi;
1689 #if LDOUBLE_SIZE != 8
1690     else if(r2 == VT_LDOUBLE)
1691       func=TOK___fixxfdi;
1692     else if(r2 == VT_DOUBLE)
1693 #else
1694     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1695 #endif
1696       func=TOK___fixdfdi;
1697   }
1698   if(func) {
1699     vpush_global_sym(&func_old_type, func);
1700     vswap();
1701     gfunc_call(1);
1702     vpushi(0);
1703     if(t == VT_LLONG)
1704       vtop->r2 = REG_LRET;
1705     vtop->r = REG_IRET;
1706     return;
1707   }
1708   error("unimplemented gen_cvt_ftoi!");
1709 }
1710
1711 /* convert from one floating point type to another */
1712 void gen_cvt_ftof(int t)
1713 {
1714 #ifdef TCC_ARM_VFP
1715   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
1716     int r=vfpr(gv(RC_FLOAT));
1717     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
1718   }
1719 #else
1720   /* all we have to do on i386 and FPA ARM is to put the float in a register */
1721   gv(RC_FLOAT);
1722 #endif
1723 }
1724
1725 /* computed goto support */
1726 void ggoto(void)
1727 {
1728   gcall_or_jmp(1);
1729   vtop--;
1730 }
1731
1732 /* end of ARM code generator */
1733 /*************************************************************/
1734