arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_CPU_VERSION
  38 # define TCC_CPU_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_IRE2    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82     TREG_SP = 13,
  83     TREG_LR,
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_IRE2 TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 #else /* ! TARGET_DEFS_ONLY */
 134 /******************************************************/
 135 #define USING_GLOBALS
 136 #include "tcc.h"
 137
 138 enum float_abi float_abi;
 139
 140 ST_DATA const int reg_classes[NB_REGS] = {
 141     /* r0 */ RC_INT | RC_R0,
 142     /* r1 */ RC_INT | RC_R1,
 143     /* r2 */ RC_INT | RC_R2,
 144     /* r3 */ RC_INT | RC_R3,
 145     /* r12 */ RC_INT | RC_R12,
 146     /* f0 */ RC_FLOAT | RC_F0,
 147     /* f1 */ RC_FLOAT | RC_F1,
 148     /* f2 */ RC_FLOAT | RC_F2,
 149     /* f3 */ RC_FLOAT | RC_F3,
 150 #ifdef TCC_ARM_VFP
 151  /* d4/s8 */ RC_FLOAT | RC_F4,
 152 /* d5/s10 */ RC_FLOAT | RC_F5,
 153 /* d6/s12 */ RC_FLOAT | RC_F6,
 154 /* d7/s14 */ RC_FLOAT | RC_F7,
 155 #endif
 156 };
 157
 158 static int func_sub_sp_offset, last_itod_magic;
 159 static int leaffunc;
 160
 161 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 162 static CType float_type, double_type, func_float_type, func_double_type;
 163 ST_FUNC void arm_init(struct TCCState *s)
 164 {
 165     float_type.t = VT_FLOAT;
 166     double_type.t = VT_DOUBLE;
 167     func_float_type.t = VT_FUNC;
 168     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 169     func_double_type.t = VT_FUNC;
 170     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 171
 172     float_abi = s->float_abi;
 173 #ifndef TCC_ARM_HARDFLOAT
 174     tcc_warning("soft float ABI currently not supported: default to softfp");
 175 #endif
 176 }
 177 #else
 178 #define func_float_type func_old_type
 179 #define func_double_type func_old_type
 180 #define func_ldouble_type func_old_type
 181 ST_FUNC void arm_init(struct TCCState *s)
 182 {
 183 #if 0
 184 #if !defined (TCC_ARM_VFP)
 185     tcc_warning("Support for FPA is deprecated and will be removed in next"
 186                 " release");
 187 #endif
 188 #if !defined (TCC_ARM_EABI)
 189     tcc_warning("Support for OABI is deprecated and will be removed in next"
 190                 " release");
 191 #endif
 192 #endif
 193 }
 194 #endif
 195
 196 static int two2mask(int a,int b) {
 197   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 198 }
 199
 200 static int regmask(int r) {
 201   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 202 }
 203
 204 /******************************************************/
 205
 206 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
 207 const char *default_elfinterp(struct TCCState *s)
 208 {
 209     if (s->float_abi == ARM_HARD_FLOAT)
 210         return "/lib/ld-linux-armhf.so.3";
 211     else
 212         return "/lib/ld-linux.so.3";
 213 }
 214 #endif
 215
 216 void o(uint32_t i)
 217 {
 218   /* this is a good place to start adding big-endian support*/
 219   int ind1;
 220   if (nocode_wanted)
 221     return;
 222   ind1 = ind + 4;
 223   if (!cur_text_section)
 224     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 225          "can't evaluate constant expressions outside of a function.");
 226   if (ind1 > cur_text_section->data_allocated)
 227     section_realloc(cur_text_section, ind1);
 228   cur_text_section->data[ind++] = i&255;
 229   i>>=8;
 230   cur_text_section->data[ind++] = i&255;
 231   i>>=8;
 232   cur_text_section->data[ind++] = i&255;
 233   i>>=8;
 234   cur_text_section->data[ind++] = i;
 235 }
 236
 237 static uint32_t stuff_const(uint32_t op, uint32_t c)
 238 {
 239   int try_neg=0;
 240   uint32_t nc = 0, negop = 0;
 241
 242   switch(op&0x1F00000)
 243   {
 244     case 0x800000: //add
 245     case 0x400000: //sub
 246       try_neg=1;
 247       negop=op^0xC00000;
 248       nc=-c;
 249       break;
 250     case 0x1A00000: //mov
 251     case 0x1E00000: //mvn
 252       try_neg=1;
 253       negop=op^0x400000;
 254       nc=~c;
 255       break;
 256     case 0x200000: //xor
 257       if(c==~0)
 258         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 259       break;
 260     case 0x0: //and
 261       if(c==~0)
 262         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 263     case 0x1C00000: //bic
 264       try_neg=1;
 265       negop=op^0x1C00000;
 266       nc=~c;
 267       break;
 268     case 0x1800000: //orr
 269       if(c==~0)
 270         return (op&0xFFF0FFFF)|0x1E00000;
 271       break;
 272   }
 273   do {
 274     uint32_t m;
 275     int i;
 276     if(c<256) /* catch undefined <<32 */
 277       return op|c;
 278     for(i=2;i<32;i+=2) {
 279       m=(0xff>>i)|(0xff<<(32-i));
 280       if(!(c&~m))
 281         return op|(i<<7)|(c<<i)|(c>>(32-i));
 282     }
 283     op=negop;
 284     c=nc;
 285   } while(try_neg--);
 286   return 0;
 287 }
 288
 289
 290 //only add,sub
 291 void stuff_const_harder(uint32_t op, uint32_t v) {
 292   uint32_t x;
 293   x=stuff_const(op,v);
 294   if(x)
 295     o(x);
 296   else {
 297     uint32_t a[16], nv, no, o2, n2;
 298     int i,j,k;
 299     a[0]=0xff;
 300     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 301     for(i=1;i<16;i++)
 302       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 303     for(i=0;i<12;i++)
 304       for(j=i<4?i+12:15;j>=i+4;j--)
 305         if((v&(a[i]|a[j]))==v) {
 306           o(stuff_const(op,v&a[i]));
 307           o(stuff_const(o2,v&a[j]));
 308           return;
 309         }
 310     no=op^0xC00000;
 311     n2=o2^0xC00000;
 312     nv=-v;
 313     for(i=0;i<12;i++)
 314       for(j=i<4?i+12:15;j>=i+4;j--)
 315         if((nv&(a[i]|a[j]))==nv) {
 316           o(stuff_const(no,nv&a[i]));
 317           o(stuff_const(n2,nv&a[j]));
 318           return;
 319         }
 320     for(i=0;i<8;i++)
 321       for(j=i+4;j<12;j++)
 322         for(k=i<4?i+12:15;k>=j+4;k--)
 323           if((v&(a[i]|a[j]|a[k]))==v) {
 324             o(stuff_const(op,v&a[i]));
 325             o(stuff_const(o2,v&a[j]));
 326             o(stuff_const(o2,v&a[k]));
 327             return;
 328           }
 329     no=op^0xC00000;
 330     nv=-v;
 331     for(i=0;i<8;i++)
 332       for(j=i+4;j<12;j++)
 333         for(k=i<4?i+12:15;k>=j+4;k--)
 334           if((nv&(a[i]|a[j]|a[k]))==nv) {
 335             o(stuff_const(no,nv&a[i]));
 336             o(stuff_const(n2,nv&a[j]));
 337             o(stuff_const(n2,nv&a[k]));
 338             return;
 339           }
 340     o(stuff_const(op,v&a[0]));
 341     o(stuff_const(o2,v&a[4]));
 342     o(stuff_const(o2,v&a[8]));
 343     o(stuff_const(o2,v&a[12]));
 344   }
 345 }
 346
 347 uint32_t encbranch(int pos, int addr, int fail)
 348 {
 349   addr-=pos+8;
 350   addr/=4;
 351   if(addr>=0x1000000 || addr<-0x1000000) {
 352     if(fail)
 353       tcc_error("FIXME: function bigger than 32MB");
 354     return 0;
 355   }
 356   return 0x0A000000|(addr&0xffffff);
 357 }
 358
 359 int decbranch(int pos)
 360 {
 361   int x;
 362   x=*(uint32_t *)(cur_text_section->data + pos);
 363   x&=0x00ffffff;
 364   if(x&0x800000)
 365     x-=0x1000000;
 366   return x*4+pos+8;
 367 }
 368
 369 /* output a symbol and patch all calls to it */
 370 void gsym_addr(int t, int a)
 371 {
 372   uint32_t *x;
 373   int lt;
 374   while(t) {
 375     x=(uint32_t *)(cur_text_section->data + t);
 376     t=decbranch(lt=t);
 377     if(a==lt+4)
 378       *x=0xE1A00000; // nop
 379     else {
 380       *x &= 0xff000000;
 381       *x |= encbranch(lt,a,1);
 382     }
 383   }
 384 }
 385
 386 #ifdef TCC_ARM_VFP
 387 static uint32_t vfpr(int r)
 388 {
 389   if(r<TREG_F0 || r>TREG_F7)
 390     tcc_error("compiler error! register %i is no vfp register",r);
 391   return r - TREG_F0;
 392 }
 393 #else
 394 static uint32_t fpr(int r)
 395 {
 396   if(r<TREG_F0 || r>TREG_F3)
 397     tcc_error("compiler error! register %i is no fpa register",r);
 398   return r - TREG_F0;
 399 }
 400 #endif
 401
 402 static uint32_t intr(int r)
 403 {
 404   if(r == TREG_R12)
 405     return 12;
 406   if(r >= TREG_R0 && r <= TREG_R3)
 407     return r - TREG_R0;
 408   if (!(r >= TREG_SP && r <= TREG_LR))
 409     tcc_error("compiler error! register %i is no int register",r);
 410   return r + (13 - TREG_SP);
 411 }
 412
 413 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 414 {
 415   if(*off>maxoff || *off&((1<<shift)-1)) {
 416     uint32_t x, y;
 417     x=0xE280E000;
 418     if(*sgn)
 419       x=0xE240E000;
 420     x|=(*base)<<16;
 421     *base=14; // lr
 422     y=stuff_const(x,*off&~maxoff);
 423     if(y) {
 424       o(y);
 425       *off&=maxoff;
 426       return;
 427     }
 428     y=stuff_const(x,(*off+maxoff)&~maxoff);
 429     if(y) {
 430       o(y);
 431       *sgn=!*sgn;
 432       *off=((*off+maxoff)&~maxoff)-*off;
 433       return;
 434     }
 435     stuff_const_harder(x,*off&~maxoff);
 436     *off&=maxoff;
 437   }
 438 }
 439
 440 static uint32_t mapcc(int cc)
 441 {
 442   switch(cc)
 443   {
 444     case TOK_ULT:
 445       return 0x30000000; /* CC/LO */
 446     case TOK_UGE:
 447       return 0x20000000; /* CS/HS */
 448     case TOK_EQ:
 449       return 0x00000000; /* EQ */
 450     case TOK_NE:
 451       return 0x10000000; /* NE */
 452     case TOK_ULE:
 453       return 0x90000000; /* LS */
 454     case TOK_UGT:
 455       return 0x80000000; /* HI */
 456     case TOK_Nset:
 457       return 0x40000000; /* MI */
 458     case TOK_Nclear:
 459       return 0x50000000; /* PL */
 460     case TOK_LT:
 461       return 0xB0000000; /* LT */
 462     case TOK_GE:
 463       return 0xA0000000; /* GE */
 464     case TOK_LE:
 465       return 0xD0000000; /* LE */
 466     case TOK_GT:
 467       return 0xC0000000; /* GT */
 468   }
 469   tcc_error("unexpected condition code");
 470   return 0xE0000000; /* AL */
 471 }
 472
 473 static int negcc(int cc)
 474 {
 475   switch(cc)
 476   {
 477     case TOK_ULT:
 478       return TOK_UGE;
 479     case TOK_UGE:
 480       return TOK_ULT;
 481     case TOK_EQ:
 482       return TOK_NE;
 483     case TOK_NE:
 484       return TOK_EQ;
 485     case TOK_ULE:
 486       return TOK_UGT;
 487     case TOK_UGT:
 488       return TOK_ULE;
 489     case TOK_Nset:
 490       return TOK_Nclear;
 491     case TOK_Nclear:
 492       return TOK_Nset;
 493     case TOK_LT:
 494       return TOK_GE;
 495     case TOK_GE:
 496       return TOK_LT;
 497     case TOK_LE:
 498       return TOK_GT;
 499     case TOK_GT:
 500       return TOK_LE;
 501   }
 502   tcc_error("unexpected condition code");
 503   return TOK_NE;
 504 }
 505
 506 /* load 'r' from value 'sv' */
 507 void load(int r, SValue *sv)
 508 {
 509   int v, ft, fc, fr, sign;
 510   uint32_t op;
 511   SValue v1;
 512
 513   fr = sv->r;
 514   ft = sv->type.t;
 515   fc = sv->c.i;
 516
 517   if(fc>=0)
 518     sign=0;
 519   else {
 520     sign=1;
 521     fc=-fc;
 522   }
 523
 524   v = fr & VT_VALMASK;
 525   if (fr & VT_LVAL) {
 526     uint32_t base = 0xB; // fp
 527     if(v == VT_LLOCAL) {
 528       v1.type.t = VT_PTR;
 529       v1.r = VT_LOCAL | VT_LVAL;
 530       v1.c.i = sv->c.i;
 531       load(TREG_LR, &v1);
 532       base = 14; /* lr */
 533       fc=sign=0;
 534       v=VT_LOCAL;
 535     } else if(v == VT_CONST) {
 536       v1.type.t = VT_PTR;
 537       v1.r = fr&~VT_LVAL;
 538       v1.c.i = sv->c.i;
 539       v1.sym=sv->sym;
 540       load(TREG_LR, &v1);
 541       base = 14; /* lr */
 542       fc=sign=0;
 543       v=VT_LOCAL;
 544     } else if(v < VT_CONST) {
 545       base=intr(v);
 546       fc=sign=0;
 547       v=VT_LOCAL;
 548     }
 549     if(v == VT_LOCAL) {
 550       if(is_float(ft)) {
 551         calcaddr(&base,&fc,&sign,1020,2);
 552 #ifdef TCC_ARM_VFP
 553         op=0xED100A00; /* flds */
 554         if(!sign)
 555           op|=0x800000;
 556         if ((ft & VT_BTYPE) != VT_FLOAT)
 557           op|=0x100;   /* flds -> fldd */
 558         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 559 #else
 560         op=0xED100100;
 561         if(!sign)
 562           op|=0x800000;
 563 #if LDOUBLE_SIZE == 8
 564         if ((ft & VT_BTYPE) != VT_FLOAT)
 565           op|=0x8000;
 566 #else
 567         if ((ft & VT_BTYPE) == VT_DOUBLE)
 568           op|=0x8000;
 569         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 570           op|=0x400000;
 571 #endif
 572         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 573 #endif
 574       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 575                 || (ft & VT_BTYPE) == VT_SHORT) {
 576         calcaddr(&base,&fc,&sign,255,0);
 577         op=0xE1500090;
 578         if ((ft & VT_BTYPE) == VT_SHORT)
 579           op|=0x20;
 580         if ((ft & VT_UNSIGNED) == 0)
 581           op|=0x40;
 582         if(!sign)
 583           op|=0x800000;
 584         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 585       } else {
 586         calcaddr(&base,&fc,&sign,4095,0);
 587         op=0xE5100000;
 588         if(!sign)
 589           op|=0x800000;
 590         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 591           op|=0x400000;
 592         o(op|(intr(r)<<12)|fc|(base<<16));
 593       }
 594       return;
 595     }
 596   } else {
 597     if (v == VT_CONST) {
 598       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.i);
 599       if (fr & VT_SYM || !op) {
 600         o(0xE59F0000|(intr(r)<<12));
 601         o(0xEA000000);
 602         if(fr & VT_SYM)
 603           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 604         o(sv->c.i);
 605       } else
 606         o(op);
 607       return;
 608     } else if (v == VT_LOCAL) {
 609       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.i);
 610       if (fr & VT_SYM || !op) {
 611         o(0xE59F0000|(intr(r)<<12));
 612         o(0xEA000000);
 613         if(fr & VT_SYM) // needed ?
 614           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 615         o(sv->c.i);
 616         o(0xE08B0000|(intr(r)<<12)|intr(r));
 617       } else
 618         o(op);
 619       return;
 620     } else if(v == VT_CMP) {
 621       o(mapcc(sv->c.i)|0x3A00001|(intr(r)<<12));
 622       o(mapcc(negcc(sv->c.i))|0x3A00000|(intr(r)<<12));
 623       return;
 624     } else if (v == VT_JMP || v == VT_JMPI) {
 625       int t;
 626       t = v & 1;
 627       o(0xE3A00000|(intr(r)<<12)|t);
 628       o(0xEA000000);
 629       gsym(sv->c.i);
 630       o(0xE3A00000|(intr(r)<<12)|(t^1));
 631       return;
 632     } else if (v < VT_CONST) {
 633       if(is_float(ft))
 634 #ifdef TCC_ARM_VFP
 635         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 636 #else
 637         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 638 #endif
 639       else
 640         o(0xE1A00000|(intr(r)<<12)|intr(v));
 641       return;
 642     }
 643   }
 644   tcc_error("load unimplemented!");
 645 }
 646
 647 /* store register 'r' in lvalue 'v' */
 648 void store(int r, SValue *sv)
 649 {
 650   SValue v1;
 651   int v, ft, fc, fr, sign;
 652   uint32_t op;
 653
 654   fr = sv->r;
 655   ft = sv->type.t;
 656   fc = sv->c.i;
 657
 658   if(fc>=0)
 659     sign=0;
 660   else {
 661     sign=1;
 662     fc=-fc;
 663   }
 664
 665   v = fr & VT_VALMASK;
 666   if (fr & VT_LVAL || fr == VT_LOCAL) {
 667     uint32_t base = 0xb; /* fp */
 668     if(v < VT_CONST) {
 669       base=intr(v);
 670       v=VT_LOCAL;
 671       fc=sign=0;
 672     } else if(v == VT_CONST) {
 673       v1.type.t = ft;
 674       v1.r = fr&~VT_LVAL;
 675       v1.c.i = sv->c.i;
 676       v1.sym=sv->sym;
 677       load(TREG_LR, &v1);
 678       base = 14; /* lr */
 679       fc=sign=0;
 680       v=VT_LOCAL;
 681     }
 682     if(v == VT_LOCAL) {
 683        if(is_float(ft)) {
 684         calcaddr(&base,&fc,&sign,1020,2);
 685 #ifdef TCC_ARM_VFP
 686         op=0xED000A00; /* fsts */
 687         if(!sign)
 688           op|=0x800000;
 689         if ((ft & VT_BTYPE) != VT_FLOAT)
 690           op|=0x100;   /* fsts -> fstd */
 691         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 692 #else
 693         op=0xED000100;
 694         if(!sign)
 695           op|=0x800000;
 696 #if LDOUBLE_SIZE == 8
 697         if ((ft & VT_BTYPE) != VT_FLOAT)
 698           op|=0x8000;
 699 #else
 700         if ((ft & VT_BTYPE) == VT_DOUBLE)
 701           op|=0x8000;
 702         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 703           op|=0x400000;
 704 #endif
 705         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 706 #endif
 707         return;
 708       } else if((ft & VT_BTYPE) == VT_SHORT) {
 709         calcaddr(&base,&fc,&sign,255,0);
 710         op=0xE14000B0;
 711         if(!sign)
 712           op|=0x800000;
 713         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 714       } else {
 715         calcaddr(&base,&fc,&sign,4095,0);
 716         op=0xE5000000;
 717         if(!sign)
 718           op|=0x800000;
 719         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 720           op|=0x400000;
 721         o(op|(intr(r)<<12)|fc|(base<<16));
 722       }
 723       return;
 724     }
 725   }
 726   tcc_error("store unimplemented");
 727 }
 728
 729 static void gadd_sp(int val)
 730 {
 731   stuff_const_harder(0xE28DD000,val);
 732 }
 733
 734 /* 'is_jmp' is '1' if it is a jump */
 735 static void gcall_or_jmp(int is_jmp)
 736 {
 737   int r;
 738   uint32_t x;
 739   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 740     /* constant case */
 741         if(vtop->r & VT_SYM){
 742                 x=encbranch(ind,ind+vtop->c.i,0);
 743                 if(x) {
 744                 /* relocation case */
 745                   greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 746                   o(x|(is_jmp?0xE0000000:0xE1000000));
 747                 } else {
 748                         if(!is_jmp)
 749                                 o(0xE28FE004); // add lr,pc,#4
 750                         o(0xE51FF004);   // ldr pc,[pc,#-4]
 751                         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 752                         o(vtop->c.i);
 753                 }
 754         }else{
 755                 if(!is_jmp)
 756                         o(0xE28FE004); // add lr,pc,#4
 757                 o(0xE51FF004);   // ldr pc,[pc,#-4]
 758                 o(vtop->c.i);
 759         }
 760   } else {
 761     /* otherwise, indirect call */
 762     r = gv(RC_INT);
 763     if(!is_jmp)
 764       o(0xE1A0E00F);       // mov lr,pc
 765     o(0xE1A0F000|intr(r)); // mov pc,r
 766   }
 767 }
 768
 769 static int unalias_ldbl(int btype)
 770 {
 771 #if LDOUBLE_SIZE == 8
 772     if (btype == VT_LDOUBLE)
 773       btype = VT_DOUBLE;
 774 #endif
 775     return btype;
 776 }
 777
 778 /* Return whether a structure is an homogeneous float aggregate or not.
 779    The answer is true if all the elements of the structure are of the same
 780    primitive float type and there is less than 4 elements.
 781
 782    type: the type corresponding to the structure to be tested */
 783 static int is_hgen_float_aggr(CType *type)
 784 {
 785   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 786     struct Sym *ref;
 787     int btype, nb_fields = 0;
 788
 789     ref = type->ref->next;
 790     btype = unalias_ldbl(ref->type.t & VT_BTYPE);
 791     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 792       for(; ref && btype == unalias_ldbl(ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 793       return !ref && nb_fields <= 4;
 794     }
 795   }
 796   return 0;
 797 }
 798
 799 struct avail_regs {
 800   signed char avail[3]; /* 3 holes max with only float and double alignments */
 801   int first_hole; /* first available hole */
 802   int last_hole; /* last available hole (none if equal to first_hole) */
 803   int first_free_reg; /* next free register in the sequence, hole excluded */
 804 };
 805
 806 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 807
 808 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 809    param) according to the rules described in the procedure call standard for
 810    the ARM architecture (AAPCS). If found, the registers are assigned to this
 811    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 812    and the parameter is a single float.
 813
 814    avregs: opaque structure to keep track of available VFP co-processor regs
 815    align: alignment constraints for the param, as returned by type_size()
 816    size: size of the parameter, as returned by type_size() */
 817 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 818 {
 819   int first_reg = 0;
 820
 821   if (avregs->first_free_reg == -1)
 822     return -1;
 823   if (align >> 3) { /* double alignment */
 824     first_reg = avregs->first_free_reg;
 825     /* alignment constraint not respected so use next reg and record hole */
 826     if (first_reg & 1)
 827       avregs->avail[avregs->last_hole++] = first_reg++;
 828   } else { /* no special alignment (float or array of float) */
 829     /* if single float and a hole is available, assign the param to it */
 830     if (size == 4 && avregs->first_hole != avregs->last_hole)
 831       return avregs->avail[avregs->first_hole++];
 832     else
 833       first_reg = avregs->first_free_reg;
 834   }
 835   if (first_reg + size / 4 <= 16) {
 836     avregs->first_free_reg = first_reg + size / 4;
 837     return first_reg;
 838   }
 839   avregs->first_free_reg = -1;
 840   return -1;
 841 }
 842
 843 /* Returns whether all params need to be passed in core registers or not.
 844    This is the case for function part of the runtime ABI. */
 845 int floats_in_core_regs(SValue *sval)
 846 {
 847   if (!sval->sym)
 848     return 0;
 849
 850   switch (sval->sym->v) {
 851     case TOK___floatundisf:
 852     case TOK___floatundidf:
 853     case TOK___fixunssfdi:
 854     case TOK___fixunsdfdi:
 855 #ifndef TCC_ARM_VFP
 856     case TOK___fixunsxfdi:
 857 #endif
 858     case TOK___floatdisf:
 859     case TOK___floatdidf:
 860     case TOK___fixsfdi:
 861     case TOK___fixdfdi:
 862       return 1;
 863
 864     default:
 865       return 0;
 866   }
 867 }
 868
 869 /* Return the number of registers needed to return the struct, or 0 if
 870    returning via struct pointer. */
 871 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) {
 872 #ifdef TCC_ARM_EABI
 873     int size, align;
 874     size = type_size(vt, &align);
 875     if (float_abi == ARM_HARD_FLOAT && !variadic &&
 876         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
 877         *ret_align = 8;
 878         *regsize = 8;
 879         ret->ref = NULL;
 880         ret->t = VT_DOUBLE;
 881         return (size + 7) >> 3;
 882     } else if (size <= 4) {
 883         *ret_align = 4;
 884         *regsize = 4;
 885         ret->ref = NULL;
 886         ret->t = VT_INT;
 887         return 1;
 888     } else
 889         return 0;
 890 #else
 891     return 0;
 892 #endif
 893 }
 894
 895 /* Parameters are classified according to how they are copied to their final
 896    destination for the function call. Because the copying is performed class
 897    after class according to the order in the union below, it is important that
 898    some constraints about the order of the members of this union are respected:
 899    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 900    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 901      VFP_STRUCT_CLASS;
 902    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 903    See the comment for the main loop in copy_params() for the reason. */
 904 enum reg_class {
 905         STACK_CLASS = 0,
 906         CORE_STRUCT_CLASS,
 907         VFP_CLASS,
 908         VFP_STRUCT_CLASS,
 909         CORE_CLASS,
 910         NB_CLASSES
 911 };
 912
 913 struct param_plan {
 914     int start; /* first reg or addr used depending on the class */
 915     int end; /* last reg used or next free addr depending on the class */
 916     SValue *sval; /* pointer to SValue on the value stack */
 917     struct param_plan *prev; /*  previous element in this class */
 918 };
 919
 920 struct plan {
 921     struct param_plan *pplans; /* array of all the param plans */
 922     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
 923 };
 924
 925 #define add_param_plan(plan,pplan,class)                        \
 926     do {                                                        \
 927         pplan.prev = plan->clsplans[class];                     \
 928         plan->pplans[plan ## _nb] = pplan;                      \
 929         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
 930     } while(0)
 931
 932 /* Assign parameters to registers and stack with alignment according to the
 933    rules in the procedure call standard for the ARM architecture (AAPCS).
 934    The overall assignment is recorded in an array of per parameter structures
 935    called parameter plans. The parameter plans are also further organized in a
 936    number of linked lists, one per class of parameter (see the comment for the
 937    definition of union reg_class).
 938
 939    nb_args: number of parameters of the function for which a call is generated
 940    float_abi: float ABI in use for this function call
 941    plan: the structure where the overall assignment is recorded
 942    todo: a bitmap that record which core registers hold a parameter
 943
 944    Returns the amount of stack space needed for parameter passing
 945
 946    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 947    is the responsibility of the caller to free this array once used (ie not
 948    before copy_params). */
 949 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
 950 {
 951   int i, size, align;
 952   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
 953   int plan_nb = 0;
 954   struct param_plan pplan;
 955   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 956
 957   ncrn = nsaa = 0;
 958   *todo = 0;
 959   plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 960   memset(plan->clsplans, 0, sizeof(plan->clsplans));
 961   for(i = nb_args; i-- ;) {
 962     int j, start_vfpreg = 0;
 963     CType type = vtop[-i].type;
 964     type.t &= ~VT_ARRAY;
 965     size = type_size(&type, &align);
 966     size = (size + 3) & ~3;
 967     align = (align + 3) & ~3;
 968     switch(vtop[-i].type.t & VT_BTYPE) {
 969       case VT_STRUCT:
 970       case VT_FLOAT:
 971       case VT_DOUBLE:
 972       case VT_LDOUBLE:
 973       if (float_abi == ARM_HARD_FLOAT) {
 974         int is_hfa = 0; /* Homogeneous float aggregate */
 975
 976         if (is_float(vtop[-i].type.t)
 977             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
 978           int end_vfpreg;
 979
 980           start_vfpreg = assign_vfpreg(&avregs, align, size);
 981           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
 982           if (start_vfpreg >= 0) {
 983             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
 984             if (is_hfa)
 985               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
 986             else
 987               add_param_plan(plan, pplan, VFP_CLASS);
 988             continue;
 989           } else
 990             break;
 991         }
 992       }
 993       ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
 994       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
 995         /* The parameter is allocated both in core register and on stack. As
 996          * such, it can be of either class: it would either be the last of
 997          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
 998         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
 999           *todo|=(1<<j);
1000         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
1001         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1002         ncrn += size/4;
1003         if (ncrn > 4)
1004           nsaa = (ncrn - 4) * 4;
1005       } else {
1006         ncrn = 4;
1007         break;
1008       }
1009       continue;
1010       default:
1011       if (ncrn < 4) {
1012         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1013
1014         if (is_long) {
1015           ncrn = (ncrn + 1) & -2;
1016           if (ncrn == 4)
1017             break;
1018         }
1019         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1020         ncrn++;
1021         if (is_long)
1022           pplan.end = ncrn++;
1023         add_param_plan(plan, pplan, CORE_CLASS);
1024         continue;
1025       }
1026     }
1027     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1028     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1029     add_param_plan(plan, pplan, STACK_CLASS);
1030     nsaa += size; /* size already rounded up before */
1031   }
1032   return nsaa;
1033 }
1034
1035 #undef add_param_plan
1036
1037 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1038    function call.
1039
1040    nb_args: number of parameters the function take
1041    plan: the overall assignment plan for parameters
1042    todo: a bitmap indicating what core reg will hold a parameter
1043
1044    Returns the number of SValue added by this function on the value stack */
1045 static int copy_params(int nb_args, struct plan *plan, int todo)
1046 {
1047   int size, align, r, i, nb_extra_sval = 0;
1048   struct param_plan *pplan;
1049   int pass = 0;
1050
1051    /* Several constraints require parameters to be copied in a specific order:
1052       - structures are copied to the stack before being loaded in a reg;
1053       - floats loaded to an odd numbered VFP reg are first copied to the
1054         preceding even numbered VFP reg and then moved to the next VFP reg.
1055
1056       It is thus important that:
1057       - structures assigned to core regs must be copied after parameters
1058         assigned to the stack but before structures assigned to VFP regs because
1059         a structure can lie partly in core registers and partly on the stack;
1060       - parameters assigned to the stack and all structures be copied before
1061         parameters assigned to a core reg since copying a parameter to the stack
1062         require using a core reg;
1063       - parameters assigned to VFP regs be copied before structures assigned to
1064         VFP regs as the copy might use an even numbered VFP reg that already
1065         holds part of a structure. */
1066 again:
1067   for(i = 0; i < NB_CLASSES; i++) {
1068     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1069
1070       if (pass
1071           && (i != CORE_CLASS || pplan->sval->r < VT_CONST))
1072         continue;
1073
1074       vpushv(pplan->sval);
1075       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1076       switch(i) {
1077         case STACK_CLASS:
1078         case CORE_STRUCT_CLASS:
1079         case VFP_STRUCT_CLASS:
1080           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1081             int padding = 0;
1082             size = type_size(&pplan->sval->type, &align);
1083             /* align to stack align size */
1084             size = (size + 3) & ~3;
1085             if (i == STACK_CLASS && pplan->prev)
1086               padding = pplan->start - pplan->prev->end;
1087             size += padding; /* Add padding if any */
1088             /* allocate the necessary size on stack */
1089             gadd_sp(-size);
1090             /* generate structure store */
1091             r = get_reg(RC_INT);
1092             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1093             vset(&vtop->type, r | VT_LVAL, 0);
1094             vswap();
1095             vstore(); /* memcpy to current sp + potential padding */
1096
1097             /* Homogeneous float aggregate are loaded to VFP registers
1098                immediately since there is no way of loading data in multiple
1099                non consecutive VFP registers as what is done for other
1100                structures (see the use of todo). */
1101             if (i == VFP_STRUCT_CLASS) {
1102               int first = pplan->start, nb = pplan->end - first + 1;
1103               /* vpop.32 {pplan->start, ..., pplan->end} */
1104               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1105               /* No need to write the register used to a SValue since VFP regs
1106                  cannot be used for gcall_or_jmp */
1107             }
1108           } else {
1109             if (is_float(pplan->sval->type.t)) {
1110 #ifdef TCC_ARM_VFP
1111               r = vfpr(gv(RC_FLOAT)) << 12;
1112               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1113                 size = 4;
1114               else {
1115                 size = 8;
1116                 r |= 0x101; /* vpush.32 -> vpush.64 */
1117               }
1118               o(0xED2D0A01 + r); /* vpush */
1119 #else
1120               r = fpr(gv(RC_FLOAT)) << 12;
1121               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1122                 size = 4;
1123               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1124                 size = 8;
1125               else
1126                 size = LDOUBLE_SIZE;
1127
1128               if (size == 12)
1129                 r |= 0x400000;
1130               else if(size == 8)
1131                 r|=0x8000;
1132
1133               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1134 #endif
1135             } else {
1136               /* simple type (currently always same size) */
1137               /* XXX: implicit cast ? */
1138               size=4;
1139               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1140                 lexpand();
1141                 size = 8;
1142                 r = gv(RC_INT);
1143                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1144                 vtop--;
1145               }
1146               r = gv(RC_INT);
1147               o(0xE52D0004|(intr(r)<<12)); /* push r */
1148             }
1149             if (i == STACK_CLASS && pplan->prev)
1150               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1151           }
1152           break;
1153
1154         case VFP_CLASS:
1155           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1156           if (pplan->start & 1) { /* Must be in upper part of double register */
1157             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1158             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1159           }
1160           break;
1161
1162         case CORE_CLASS:
1163           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1164             lexpand();
1165             gv(regmask(pplan->end));
1166             pplan->sval->r2 = vtop->r;
1167             vtop--;
1168           }
1169           gv(regmask(pplan->start));
1170           /* Mark register as used so that gcall_or_jmp use another one
1171              (regs >=4 are free as never used to pass parameters) */
1172           pplan->sval->r = vtop->r;
1173           break;
1174       }
1175       vtop--;
1176     }
1177   }
1178
1179   /* second pass to restore registers that were saved on stack by accident.
1180      Maybe redundant after the "lvalue_save" patch in tccgen.c:gv() */
1181   if (++pass < 2)
1182     goto again;
1183
1184   /* Manually free remaining registers since next parameters are loaded
1185    * manually, without the help of gv(int). */
1186   save_regs(nb_args);
1187
1188   if(todo) {
1189     o(0xE8BD0000|todo); /* pop {todo} */
1190     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1191       int r;
1192       pplan->sval->r = pplan->start;
1193       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1194          can occupy more than 2 registers. Thus, we need to push on the value
1195          stack some fake parameter to have on SValue for each registers used
1196          by a structure (r2 is not used). */
1197       for (r = pplan->start + 1; r <= pplan->end; r++) {
1198         if (todo & (1 << r)) {
1199           nb_extra_sval++;
1200           vpushi(0);
1201           vtop->r = r;
1202         }
1203       }
1204     }
1205   }
1206   return nb_extra_sval;
1207 }
1208
1209 /* Generate function call. The function address is pushed first, then
1210    all the parameters in call order. This functions pops all the
1211    parameters and the function address. */
1212 void gfunc_call(int nb_args)
1213 {
1214   int r, args_size;
1215   int def_float_abi = float_abi;
1216   int todo;
1217   struct plan plan;
1218
1219 #ifdef TCC_ARM_EABI
1220   int variadic;
1221
1222   if (float_abi == ARM_HARD_FLOAT) {
1223     variadic = (vtop[-nb_args].type.ref->f.func_type == FUNC_ELLIPSIS);
1224     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1225       float_abi = ARM_SOFTFP_FLOAT;
1226   }
1227 #endif
1228   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1229      VT_JMP anywhere except on the top of the stack because it would complicate
1230      the code generator. */
1231   r = vtop->r & VT_VALMASK;
1232   if (r == VT_CMP || (r & ~1) == VT_JMP)
1233     gv(RC_INT);
1234
1235   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1236
1237 #ifdef TCC_ARM_EABI
1238   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1239     args_size = (args_size + 7) & ~7;
1240     o(0xE24DD004); /* sub sp, sp, #4 */
1241   }
1242 #endif
1243
1244   nb_args += copy_params(nb_args, &plan, todo);
1245   tcc_free(plan.pplans);
1246
1247   /* Move fct SValue on top as required by gcall_or_jmp */
1248   vrotb(nb_args + 1);
1249   gcall_or_jmp(0);
1250   if (args_size)
1251       gadd_sp(args_size); /* pop all parameters passed on the stack */
1252 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1253   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1254     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1255       o(0xEE000A10); /*vmov s0, r0 */
1256     } else {
1257       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1258       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1259     }
1260   }
1261 #endif
1262   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1263   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1264   float_abi = def_float_abi;
1265 }
1266
1267 /* generate function prolog of type 't' */
1268 void gfunc_prolog(Sym *func_sym)
1269 {
1270   CType *func_type = &func_sym->type;
1271   Sym *sym,*sym2;
1272   int n, nf, size, align, rs, struct_ret = 0;
1273   int addr, pn, sn; /* pn=core, sn=stack */
1274   CType ret_type;
1275
1276 #ifdef TCC_ARM_EABI
1277   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1278 #endif
1279
1280   sym = func_type->ref;
1281   func_vt = sym->type;
1282   func_var = (func_type->ref->f.func_type == FUNC_ELLIPSIS);
1283
1284   n = nf = 0;
1285   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1286       !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs))
1287   {
1288     n++;
1289     struct_ret = 1;
1290     func_vc = 12; /* Offset from fp of the place to store the result */
1291   }
1292   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1293     size = type_size(&sym2->type, &align);
1294 #ifdef TCC_ARM_EABI
1295     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1296         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1297       int tmpnf = assign_vfpreg(&avregs, align, size);
1298       tmpnf += (size + 3) / 4;
1299       nf = (tmpnf > nf) ? tmpnf : nf;
1300     } else
1301 #endif
1302     if (n < 4)
1303       n += (size + 3) / 4;
1304   }
1305   o(0xE1A0C00D); /* mov ip,sp */
1306   if (func_var)
1307     n=4;
1308   if (n) {
1309     if(n>4)
1310       n=4;
1311 #ifdef TCC_ARM_EABI
1312     n=(n+1)&-2;
1313 #endif
1314     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1315   }
1316   if (nf) {
1317     if (nf>16)
1318       nf=16;
1319     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1320     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1321   }
1322   o(0xE92D5800); /* save fp, ip, lr */
1323   o(0xE1A0B00D); /* mov fp, sp */
1324   func_sub_sp_offset = ind;
1325   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1326
1327 #ifdef TCC_ARM_EABI
1328   if (float_abi == ARM_HARD_FLOAT) {
1329     func_vc += nf * 4;
1330     avregs = AVAIL_REGS_INITIALIZER;
1331   }
1332 #endif
1333   pn = struct_ret, sn = 0;
1334   while ((sym = sym->next)) {
1335     CType *type;
1336     type = &sym->type;
1337     size = type_size(type, &align);
1338     size = (size + 3) >> 2;
1339     align = (align + 3) & ~3;
1340 #ifdef TCC_ARM_EABI
1341     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1342         || is_hgen_float_aggr(&sym->type))) {
1343       int fpn = assign_vfpreg(&avregs, align, size << 2);
1344       if (fpn >= 0)
1345         addr = fpn * 4;
1346       else
1347         goto from_stack;
1348     } else
1349 #endif
1350     if (pn < 4) {
1351 #ifdef TCC_ARM_EABI
1352         pn = (pn + (align-1)/4) & -(align/4);
1353 #endif
1354       addr = (nf + pn) * 4;
1355       pn += size;
1356       if (!sn && pn > 4)
1357         sn = (pn - 4);
1358     } else {
1359 #ifdef TCC_ARM_EABI
1360 from_stack:
1361         sn = (sn + (align-1)/4) & -(align/4);
1362 #endif
1363       addr = (n + nf + sn) * 4;
1364       sn += size;
1365     }
1366     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL,
1367              addr + 12);
1368   }
1369   last_itod_magic=0;
1370   leaffunc = 1;
1371   loc = 0;
1372 }
1373
1374 /* generate function epilog */
1375 void gfunc_epilog(void)
1376 {
1377   uint32_t x;
1378   int diff;
1379   /* Copy float return value to core register if base standard is used and
1380      float computation is made with VFP */
1381 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1382   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1383     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1384       o(0xEE100A10); /* fmrs r0, s0 */
1385     else {
1386       o(0xEE100B10); /* fmrdl r0, d0 */
1387       o(0xEE301B10); /* fmrdh r1, d0 */
1388     }
1389   }
1390 #endif
1391   o(0xE89BA800); /* restore fp, sp, pc */
1392   diff = (-loc + 3) & -4;
1393 #ifdef TCC_ARM_EABI
1394   if(!leaffunc)
1395     diff = ((diff + 11) & -8) - 4;
1396 #endif
1397   if(diff > 0) {
1398     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1399     if(x)
1400       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1401     else {
1402       int addr;
1403       addr=ind;
1404       o(0xE59FC004); /* ldr ip,[pc+4] */
1405       o(0xE04BD00C); /* sub sp,fp,ip  */
1406       o(0xE1A0F00E); /* mov pc,lr */
1407       o(diff);
1408       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1409     }
1410   }
1411 }
1412
1413 ST_FUNC void gen_fill_nops(int bytes)
1414 {
1415     if ((bytes & 3))
1416       tcc_error("alignment of code section not multiple of 4");
1417     while (bytes > 0) {
1418         o(0xE1A00000);
1419         bytes -= 4;
1420     }
1421 }
1422
1423 /* generate a jump to a label */
1424 ST_FUNC int gjmp(int t)
1425 {
1426   int r;
1427   if (nocode_wanted)
1428     return t;
1429   r=ind;
1430   o(0xE0000000|encbranch(r,t,1));
1431   return r;
1432 }
1433
1434 /* generate a jump to a fixed address */
1435 ST_FUNC void gjmp_addr(int a)
1436 {
1437   gjmp(a);
1438 }
1439
1440 ST_FUNC int gjmp_cond(int op, int t)
1441 {
1442   int r;
1443   if (nocode_wanted)
1444     return t;
1445   r=ind;
1446   op=mapcc(op);
1447   op|=encbranch(r,t,1);
1448   o(op);
1449   return r;
1450 }
1451
1452 ST_FUNC int gjmp_append(int n, int t)
1453 {
1454   uint32_t *x;
1455   int p,lp;
1456   if(n) {
1457     p = n;
1458     do {
1459       p = decbranch(lp=p);
1460     } while(p);
1461     x = (uint32_t *)(cur_text_section->data + lp);
1462     *x &= 0xff000000;
1463     *x |= encbranch(lp,t,1);
1464     t = n;
1465   }
1466   return t;
1467 }
1468
1469 /* generate an integer binary operation */
1470 void gen_opi(int op)
1471 {
1472   int c, func = 0;
1473   uint32_t opc = 0, r, fr;
1474   unsigned short retreg = REG_IRET;
1475
1476   c=0;
1477   switch(op) {
1478     case '+':
1479       opc = 0x8;
1480       c=1;
1481       break;
1482     case TOK_ADDC1: /* add with carry generation */
1483       opc = 0x9;
1484       c=1;
1485       break;
1486     case '-':
1487       opc = 0x4;
1488       c=1;
1489       break;
1490     case TOK_SUBC1: /* sub with carry generation */
1491       opc = 0x5;
1492       c=1;
1493       break;
1494     case TOK_ADDC2: /* add with carry use */
1495       opc = 0xA;
1496       c=1;
1497       break;
1498     case TOK_SUBC2: /* sub with carry use */
1499       opc = 0xC;
1500       c=1;
1501       break;
1502     case '&':
1503       opc = 0x0;
1504       c=1;
1505       break;
1506     case '^':
1507       opc = 0x2;
1508       c=1;
1509       break;
1510     case '|':
1511       opc = 0x18;
1512       c=1;
1513       break;
1514     case '*':
1515       gv2(RC_INT, RC_INT);
1516       r = vtop[-1].r;
1517       fr = vtop[0].r;
1518       vtop--;
1519       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1520       return;
1521     case TOK_SHL:
1522       opc = 0;
1523       c=2;
1524       break;
1525     case TOK_SHR:
1526       opc = 1;
1527       c=2;
1528       break;
1529     case TOK_SAR:
1530       opc = 2;
1531       c=2;
1532       break;
1533     case '/':
1534     case TOK_PDIV:
1535       func=TOK___divsi3;
1536       c=3;
1537       break;
1538     case TOK_UDIV:
1539       func=TOK___udivsi3;
1540       c=3;
1541       break;
1542     case '%':
1543 #ifdef TCC_ARM_EABI
1544       func=TOK___aeabi_idivmod;
1545       retreg=REG_IRE2;
1546 #else
1547       func=TOK___modsi3;
1548 #endif
1549       c=3;
1550       break;
1551     case TOK_UMOD:
1552 #ifdef TCC_ARM_EABI
1553       func=TOK___aeabi_uidivmod;
1554       retreg=REG_IRE2;
1555 #else
1556       func=TOK___umodsi3;
1557 #endif
1558       c=3;
1559       break;
1560     case TOK_UMULL:
1561       gv2(RC_INT, RC_INT);
1562       r=intr(vtop[-1].r2=get_reg(RC_INT));
1563       c=vtop[-1].r;
1564       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1565       vtop--;
1566       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1567       return;
1568     default:
1569       opc = 0x15;
1570       c=1;
1571       break;
1572   }
1573   switch(c) {
1574     case 1:
1575       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1576         if(opc == 4 || opc == 5 || opc == 0xc) {
1577           vswap();
1578           opc|=2; // sub -> rsb
1579         }
1580       }
1581       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1582           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1583         gv(RC_INT);
1584       vswap();
1585       c=intr(gv(RC_INT));
1586       vswap();
1587       opc=0xE0000000|(opc<<20)|(c<<16);
1588       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1589         uint32_t x;
1590         x=stuff_const(opc|0x2000000,vtop->c.i);
1591         if(x) {
1592           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1593           o(x|(r<<12));
1594           goto done;
1595         }
1596       }
1597       fr=intr(gv(RC_INT));
1598       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1599       o(opc|(r<<12)|fr);
1600 done:
1601       vtop--;
1602       if (op >= TOK_ULT && op <= TOK_GT)
1603         vset_VT_CMP(op);
1604       break;
1605     case 2:
1606       opc=0xE1A00000|(opc<<5);
1607       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1608           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1609         gv(RC_INT);
1610       vswap();
1611       r=intr(gv(RC_INT));
1612       vswap();
1613       opc|=r;
1614       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1615         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1616         c = vtop->c.i & 0x1f;
1617         o(opc|(c<<7)|(fr<<12));
1618       } else {
1619         fr=intr(gv(RC_INT));
1620         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1621         o(opc|(c<<12)|(fr<<8)|0x10);
1622       }
1623       vtop--;
1624       break;
1625     case 3:
1626       vpush_global_sym(&func_old_type, func);
1627       vrott(3);
1628       gfunc_call(2);
1629       vpushi(0);
1630       vtop->r = retreg;
1631       break;
1632     default:
1633       tcc_error("gen_opi %i unimplemented!",op);
1634   }
1635 }
1636
1637 #ifdef TCC_ARM_VFP
1638 static int is_zero(int i)
1639 {
1640   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1641     return 0;
1642   if (vtop[i].type.t == VT_FLOAT)
1643     return (vtop[i].c.f == 0.f);
1644   else if (vtop[i].type.t == VT_DOUBLE)
1645     return (vtop[i].c.d == 0.0);
1646   return (vtop[i].c.ld == 0.l);
1647 }
1648
1649 /* generate a floating point operation 'v = t1 op t2' instruction. The
1650  *    two operands are guaranteed to have the same floating point type */
1651 void gen_opf(int op)
1652 {
1653   uint32_t x;
1654   int fneg=0,r;
1655   x=0xEE000A00|T2CPR(vtop->type.t);
1656   switch(op) {
1657     case '+':
1658       if(is_zero(-1))
1659         vswap();
1660       if(is_zero(0)) {
1661         vtop--;
1662         return;
1663       }
1664       x|=0x300000;
1665       break;
1666     case '-':
1667       x|=0x300040;
1668       if(is_zero(0)) {
1669         vtop--;
1670         return;
1671       }
1672       if(is_zero(-1)) {
1673         x|=0x810000; /* fsubX -> fnegX */
1674         vswap();
1675         vtop--;
1676         fneg=1;
1677       }
1678       break;
1679     case '*':
1680       x|=0x200000;
1681       break;
1682     case '/':
1683       x|=0x800000;
1684       break;
1685     default:
1686       if(op < TOK_ULT || op > TOK_GT) {
1687         tcc_error("unknown fp op %x!",op);
1688         return;
1689       }
1690       if(is_zero(-1)) {
1691         vswap();
1692         switch(op) {
1693           case TOK_LT: op=TOK_GT; break;
1694           case TOK_GE: op=TOK_ULE; break;
1695           case TOK_LE: op=TOK_GE; break;
1696           case TOK_GT: op=TOK_ULT; break;
1697         }
1698       }
1699       x|=0xB40040; /* fcmpX */
1700       if(op!=TOK_EQ && op!=TOK_NE)
1701         x|=0x80; /* fcmpX -> fcmpeX */
1702       if(is_zero(0)) {
1703         vtop--;
1704         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1705       } else {
1706         x|=vfpr(gv(RC_FLOAT));
1707         vswap();
1708         o(x|(vfpr(gv(RC_FLOAT))<<12));
1709         vtop--;
1710       }
1711       o(0xEEF1FA10); /* fmstat */
1712
1713       switch(op) {
1714         case TOK_LE: op=TOK_ULE; break;
1715         case TOK_LT: op=TOK_ULT; break;
1716         case TOK_UGE: op=TOK_GE; break;
1717         case TOK_UGT: op=TOK_GT; break;
1718       }
1719       vset_VT_CMP(op);
1720       return;
1721   }
1722   r=gv(RC_FLOAT);
1723   x|=vfpr(r);
1724   r=regmask(r);
1725   if(!fneg) {
1726     int r2;
1727     vswap();
1728     r2=gv(RC_FLOAT);
1729     x|=vfpr(r2)<<16;
1730     r|=regmask(r2);
1731   }
1732   vtop->r=get_reg_ex(RC_FLOAT,r);
1733   if(!fneg)
1734     vtop--;
1735   o(x|(vfpr(vtop->r)<<12));
1736 }
1737
1738 #else
1739 static uint32_t is_fconst()
1740 {
1741   long double f;
1742   uint32_t r;
1743   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1744     return 0;
1745   if (vtop->type.t == VT_FLOAT)
1746     f = vtop->c.f;
1747   else if (vtop->type.t == VT_DOUBLE)
1748     f = vtop->c.d;
1749   else
1750     f = vtop->c.ld;
1751   if(!ieee_finite(f))
1752     return 0;
1753   r=0x8;
1754   if(f<0.0) {
1755     r=0x18;
1756     f=-f;
1757   }
1758   if(f==0.0)
1759     return r;
1760   if(f==1.0)
1761     return r|1;
1762   if(f==2.0)
1763     return r|2;
1764   if(f==3.0)
1765     return r|3;
1766   if(f==4.0)
1767     return r|4;
1768   if(f==5.0)
1769     return r|5;
1770   if(f==0.5)
1771     return r|6;
1772   if(f==10.0)
1773     return r|7;
1774   return 0;
1775 }
1776
1777 /* generate a floating point operation 'v = t1 op t2' instruction. The
1778    two operands are guaranteed to have the same floating point type */
1779 void gen_opf(int op)
1780 {
1781   uint32_t x, r, r2, c1, c2;
1782   //fputs("gen_opf\n",stderr);
1783   vswap();
1784   c1 = is_fconst();
1785   vswap();
1786   c2 = is_fconst();
1787   x=0xEE000100;
1788 #if LDOUBLE_SIZE == 8
1789   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1790     x|=0x80;
1791 #else
1792   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1793     x|=0x80;
1794   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1795     x|=0x80000;
1796 #endif
1797   switch(op)
1798   {
1799     case '+':
1800       if(!c2) {
1801         vswap();
1802         c2=c1;
1803       }
1804       vswap();
1805       r=fpr(gv(RC_FLOAT));
1806       vswap();
1807       if(c2) {
1808         if(c2>0xf)
1809           x|=0x200000; // suf
1810         r2=c2&0xf;
1811       } else {
1812         r2=fpr(gv(RC_FLOAT));
1813       }
1814       break;
1815     case '-':
1816       if(c2) {
1817         if(c2<=0xf)
1818           x|=0x200000; // suf
1819         r2=c2&0xf;
1820         vswap();
1821         r=fpr(gv(RC_FLOAT));
1822         vswap();
1823       } else if(c1 && c1<=0xf) {
1824         x|=0x300000; // rsf
1825         r2=c1;
1826         r=fpr(gv(RC_FLOAT));
1827         vswap();
1828       } else {
1829         x|=0x200000; // suf
1830         vswap();
1831         r=fpr(gv(RC_FLOAT));
1832         vswap();
1833         r2=fpr(gv(RC_FLOAT));
1834       }
1835       break;
1836     case '*':
1837       if(!c2 || c2>0xf) {
1838         vswap();
1839         c2=c1;
1840       }
1841       vswap();
1842       r=fpr(gv(RC_FLOAT));
1843       vswap();
1844       if(c2 && c2<=0xf)
1845         r2=c2;
1846       else
1847         r2=fpr(gv(RC_FLOAT));
1848       x|=0x100000; // muf
1849       break;
1850     case '/':
1851       if(c2 && c2<=0xf) {
1852         x|=0x400000; // dvf
1853         r2=c2;
1854         vswap();
1855         r=fpr(gv(RC_FLOAT));
1856         vswap();
1857       } else if(c1 && c1<=0xf) {
1858         x|=0x500000; // rdf
1859         r2=c1;
1860         r=fpr(gv(RC_FLOAT));
1861         vswap();
1862       } else {
1863         x|=0x400000; // dvf
1864         vswap();
1865         r=fpr(gv(RC_FLOAT));
1866         vswap();
1867         r2=fpr(gv(RC_FLOAT));
1868       }
1869       break;
1870     default:
1871       if(op >= TOK_ULT && op <= TOK_GT) {
1872         x|=0xd0f110; // cmfe
1873 /* bug (intention?) in Linux FPU emulator
1874    doesn't set carry if equal */
1875         switch(op) {
1876           case TOK_ULT:
1877           case TOK_UGE:
1878           case TOK_ULE:
1879           case TOK_UGT:
1880             tcc_error("unsigned comparison on floats?");
1881             break;
1882           case TOK_LT:
1883             op=TOK_Nset;
1884             break;
1885           case TOK_LE:
1886             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1887             break;
1888           case TOK_EQ:
1889           case TOK_NE:
1890             x&=~0x400000; // cmfe -> cmf
1891             break;
1892         }
1893         if(c1 && !c2) {
1894           c2=c1;
1895           vswap();
1896           switch(op) {
1897             case TOK_Nset:
1898               op=TOK_GT;
1899               break;
1900             case TOK_GE:
1901               op=TOK_ULE;
1902               break;
1903             case TOK_ULE:
1904               op=TOK_GE;
1905               break;
1906             case TOK_GT:
1907               op=TOK_Nset;
1908               break;
1909           }
1910         }
1911         vswap();
1912         r=fpr(gv(RC_FLOAT));
1913         vswap();
1914         if(c2) {
1915           if(c2>0xf)
1916             x|=0x200000;
1917           r2=c2&0xf;
1918         } else {
1919           r2=fpr(gv(RC_FLOAT));
1920         }
1921         --vtop;
1922         vset_VT_CMP(op);
1923         ++vtop;
1924       } else {
1925         tcc_error("unknown fp op %x!",op);
1926         return;
1927       }
1928   }
1929   if(vtop[-1].r == VT_CMP)
1930     c1=15;
1931   else {
1932     c1=vtop->r;
1933     if(r2&0x8)
1934       c1=vtop[-1].r;
1935     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1936     c1=fpr(vtop[-1].r);
1937   }
1938   vtop--;
1939   o(x|(r<<16)|(c1<<12)|r2);
1940 }
1941 #endif
1942
1943 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1944    and 'long long' cases. */
1945 ST_FUNC void gen_cvt_itof(int t)
1946 {
1947   uint32_t r, r2;
1948   int bt;
1949   bt=vtop->type.t & VT_BTYPE;
1950   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1951 #ifndef TCC_ARM_VFP
1952     uint32_t dsize = 0;
1953 #endif
1954     r=intr(gv(RC_INT));
1955 #ifdef TCC_ARM_VFP
1956     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1957     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1958     r2|=r2<<12;
1959     if(!(vtop->type.t & VT_UNSIGNED))
1960       r2|=0x80;                /* fuitoX -> fsituX */
1961     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1962 #else
1963     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1964     if((t & VT_BTYPE) != VT_FLOAT)
1965       dsize=0x80;    /* flts -> fltd */
1966     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1967     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1968       uint32_t off = 0;
1969       o(0xE3500000|(r<<12));        /* cmp */
1970       r=fpr(get_reg(RC_FLOAT));
1971       if(last_itod_magic) {
1972         off=ind+8-last_itod_magic;
1973         off/=4;
1974         if(off>255)
1975           off=0;
1976       }
1977       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1978       if(!off) {
1979         o(0xEA000000);              /* b */
1980         last_itod_magic=ind;
1981         o(0x4F800000);              /* 4294967296.0f */
1982       }
1983       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1984     }
1985 #endif
1986     return;
1987   } else if(bt == VT_LLONG) {
1988     int func;
1989     CType *func_type = 0;
1990     if((t & VT_BTYPE) == VT_FLOAT) {
1991       func_type = &func_float_type;
1992       if(vtop->type.t & VT_UNSIGNED)
1993         func=TOK___floatundisf;
1994       else
1995         func=TOK___floatdisf;
1996 #if LDOUBLE_SIZE != 8
1997     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1998       func_type = &func_ldouble_type;
1999       if(vtop->type.t & VT_UNSIGNED)
2000         func=TOK___floatundixf;
2001       else
2002         func=TOK___floatdixf;
2003     } else if((t & VT_BTYPE) == VT_DOUBLE) {
2004 #else
2005     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
2006 #endif
2007       func_type = &func_double_type;
2008       if(vtop->type.t & VT_UNSIGNED)
2009         func=TOK___floatundidf;
2010       else
2011         func=TOK___floatdidf;
2012     }
2013     if(func_type) {
2014       vpush_global_sym(func_type, func);
2015       vswap();
2016       gfunc_call(1);
2017       vpushi(0);
2018       vtop->r=TREG_F0;
2019       return;
2020     }
2021   }
2022   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2023 }
2024
2025 /* convert fp to int 't' type */
2026 void gen_cvt_ftoi(int t)
2027 {
2028   uint32_t r, r2;
2029   int u, func = 0;
2030   u=t&VT_UNSIGNED;
2031   t&=VT_BTYPE;
2032   r2=vtop->type.t & VT_BTYPE;
2033   if(t==VT_INT) {
2034 #ifdef TCC_ARM_VFP
2035     r=vfpr(gv(RC_FLOAT));
2036     u=u?0:0x10000;
2037     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2038     r2=intr(vtop->r=get_reg(RC_INT));
2039     o(0xEE100A10|(r<<16)|(r2<<12));
2040     return;
2041 #else
2042     if(u) {
2043       if(r2 == VT_FLOAT)
2044         func=TOK___fixunssfsi;
2045 #if LDOUBLE_SIZE != 8
2046       else if(r2 == VT_LDOUBLE)
2047         func=TOK___fixunsxfsi;
2048       else if(r2 == VT_DOUBLE)
2049 #else
2050       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2051 #endif
2052         func=TOK___fixunsdfsi;
2053     } else {
2054       r=fpr(gv(RC_FLOAT));
2055       r2=intr(vtop->r=get_reg(RC_INT));
2056       o(0xEE100170|(r2<<12)|r);
2057       return;
2058     }
2059 #endif
2060   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2061     if(r2 == VT_FLOAT)
2062       func=TOK___fixsfdi;
2063 #if LDOUBLE_SIZE != 8
2064     else if(r2 == VT_LDOUBLE)
2065       func=TOK___fixxfdi;
2066     else if(r2 == VT_DOUBLE)
2067 #else
2068     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2069 #endif
2070       func=TOK___fixdfdi;
2071   }
2072   if(func) {
2073     vpush_global_sym(&func_old_type, func);
2074     vswap();
2075     gfunc_call(1);
2076     vpushi(0);
2077     if(t == VT_LLONG)
2078       vtop->r2 = REG_IRE2;
2079     vtop->r = REG_IRET;
2080     return;
2081   }
2082   tcc_error("unimplemented gen_cvt_ftoi!");
2083 }
2084
2085 /* convert from one floating point type to another */
2086 void gen_cvt_ftof(int t)
2087 {
2088 #ifdef TCC_ARM_VFP
2089   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2090     uint32_t r = vfpr(gv(RC_FLOAT));
2091     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2092   }
2093 #else
2094   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2095   gv(RC_FLOAT);
2096 #endif
2097 }
2098
2099 /* computed goto support */
2100 void ggoto(void)
2101 {
2102   gcall_or_jmp(1);
2103   vtop--;
2104 }
2105
2106 /* Save the stack pointer onto the stack and return the location of its address */
2107 ST_FUNC void gen_vla_sp_save(int addr) {
2108     SValue v;
2109     v.type.t = VT_PTR;
2110     v.r = VT_LOCAL | VT_LVAL;
2111     v.c.i = addr;
2112     store(TREG_SP, &v);
2113 }
2114
2115 /* Restore the SP from a location on the stack */
2116 ST_FUNC void gen_vla_sp_restore(int addr) {
2117     SValue v;
2118     v.type.t = VT_PTR;
2119     v.r = VT_LOCAL | VT_LVAL;
2120     v.c.i = addr;
2121     load(TREG_SP, &v);
2122 }
2123
2124 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2125 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2126     int r = intr(gv(RC_INT));
2127     o(0xE04D0000|(r<<12)|r); /* sub r, sp, r */
2128 #ifdef TCC_ARM_EABI
2129     if (align < 8)
2130         align = 8;
2131 #else
2132     if (align < 4)
2133         align = 4;
2134 #endif
2135     if (align & (align - 1))
2136         tcc_error("alignment is not a power of 2: %i", align);
2137     o(stuff_const(0xE3C0D000|(r<<16), align - 1)); /* bic sp, r, #align-1 */
2138     vpop();
2139 }
2140
2141 /* end of ARM code generator */
2142 /*************************************************************/
2143 #endif
2144 /*************************************************************/