arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_ARM_VERSION
  38 # define TCC_ARM_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_LRET    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82     TREG_SP = 13,
  83     TREG_LR,
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 #else /* ! TARGET_DEFS_ONLY */
 134 /******************************************************/
 135 #include "tcc.h"
 136
 137 enum float_abi float_abi;
 138
 139 ST_DATA const int reg_classes[NB_REGS] = {
 140     /* r0 */ RC_INT | RC_R0,
 141     /* r1 */ RC_INT | RC_R1,
 142     /* r2 */ RC_INT | RC_R2,
 143     /* r3 */ RC_INT | RC_R3,
 144     /* r12 */ RC_INT | RC_R12,
 145     /* f0 */ RC_FLOAT | RC_F0,
 146     /* f1 */ RC_FLOAT | RC_F1,
 147     /* f2 */ RC_FLOAT | RC_F2,
 148     /* f3 */ RC_FLOAT | RC_F3,
 149 #ifdef TCC_ARM_VFP
 150  /* d4/s8 */ RC_FLOAT | RC_F4,
 151 /* d5/s10 */ RC_FLOAT | RC_F5,
 152 /* d6/s12 */ RC_FLOAT | RC_F6,
 153 /* d7/s14 */ RC_FLOAT | RC_F7,
 154 #endif
 155 };
 156
 157 static int func_sub_sp_offset, last_itod_magic;
 158 static int leaffunc;
 159
 160 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 161 static CType float_type, double_type, func_float_type, func_double_type;
 162 ST_FUNC void arm_init(struct TCCState *s)
 163 {
 164     float_type.t = VT_FLOAT;
 165     double_type.t = VT_DOUBLE;
 166     func_float_type.t = VT_FUNC;
 167     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 168     func_double_type.t = VT_FUNC;
 169     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 170
 171     float_abi = s->float_abi;
 172 #ifndef TCC_ARM_HARDFLOAT
 173     tcc_warning("soft float ABI currently not supported: default to softfp");
 174 #endif
 175 }
 176 #else
 177 #define func_float_type func_old_type
 178 #define func_double_type func_old_type
 179 #define func_ldouble_type func_old_type
 180 ST_FUNC void arm_init(struct TCCState *s)
 181 {
 182 #if !defined (TCC_ARM_VFP)
 183     tcc_warning("Support for FPA is deprecated and will be removed in next"
 184                 " release");
 185 #endif
 186 #if !defined (TCC_ARM_EABI)
 187     tcc_warning("Support for OABI is deprecated and will be removed in next"
 188                 " release");
 189 #endif
 190 }
 191 #endif
 192
 193 static int two2mask(int a,int b) {
 194   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 195 }
 196
 197 static int regmask(int r) {
 198   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 199 }
 200
 201 /******************************************************/
 202
 203 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
 204 char *default_elfinterp(struct TCCState *s)
 205 {
 206     if (s->float_abi == ARM_HARD_FLOAT)
 207         return "/lib/ld-linux-armhf.so.3";
 208     else
 209         return "/lib/ld-linux.so.3";
 210 }
 211 #endif
 212
 213 void o(uint32_t i)
 214 {
 215   /* this is a good place to start adding big-endian support*/
 216   int ind1;
 217
 218   ind1 = ind + 4;
 219   if (!cur_text_section)
 220     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 221          "can't evaluate constant expressions outside of a function.");
 222   if (ind1 > cur_text_section->data_allocated)
 223     section_realloc(cur_text_section, ind1);
 224   cur_text_section->data[ind++] = i&255;
 225   i>>=8;
 226   cur_text_section->data[ind++] = i&255;
 227   i>>=8;
 228   cur_text_section->data[ind++] = i&255;
 229   i>>=8;
 230   cur_text_section->data[ind++] = i;
 231 }
 232
 233 static uint32_t stuff_const(uint32_t op, uint32_t c)
 234 {
 235   int try_neg=0;
 236   uint32_t nc = 0, negop = 0;
 237
 238   switch(op&0x1F00000)
 239   {
 240     case 0x800000: //add
 241     case 0x400000: //sub
 242       try_neg=1;
 243       negop=op^0xC00000;
 244       nc=-c;
 245       break;
 246     case 0x1A00000: //mov
 247     case 0x1E00000: //mvn
 248       try_neg=1;
 249       negop=op^0x400000;
 250       nc=~c;
 251       break;
 252     case 0x200000: //xor
 253       if(c==~0)
 254         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 255       break;
 256     case 0x0: //and
 257       if(c==~0)
 258         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 259     case 0x1C00000: //bic
 260       try_neg=1;
 261       negop=op^0x1C00000;
 262       nc=~c;
 263       break;
 264     case 0x1800000: //orr
 265       if(c==~0)
 266         return (op&0xFFF0FFFF)|0x1E00000;
 267       break;
 268   }
 269   do {
 270     uint32_t m;
 271     int i;
 272     if(c<256) /* catch undefined <<32 */
 273       return op|c;
 274     for(i=2;i<32;i+=2) {
 275       m=(0xff>>i)|(0xff<<(32-i));
 276       if(!(c&~m))
 277         return op|(i<<7)|(c<<i)|(c>>(32-i));
 278     }
 279     op=negop;
 280     c=nc;
 281   } while(try_neg--);
 282   return 0;
 283 }
 284
 285
 286 //only add,sub
 287 void stuff_const_harder(uint32_t op, uint32_t v) {
 288   uint32_t x;
 289   x=stuff_const(op,v);
 290   if(x)
 291     o(x);
 292   else {
 293     uint32_t a[16], nv, no, o2, n2;
 294     int i,j,k;
 295     a[0]=0xff;
 296     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 297     for(i=1;i<16;i++)
 298       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 299     for(i=0;i<12;i++)
 300       for(j=i<4?i+12:15;j>=i+4;j--)
 301         if((v&(a[i]|a[j]))==v) {
 302           o(stuff_const(op,v&a[i]));
 303           o(stuff_const(o2,v&a[j]));
 304           return;
 305         }
 306     no=op^0xC00000;
 307     n2=o2^0xC00000;
 308     nv=-v;
 309     for(i=0;i<12;i++)
 310       for(j=i<4?i+12:15;j>=i+4;j--)
 311         if((nv&(a[i]|a[j]))==nv) {
 312           o(stuff_const(no,nv&a[i]));
 313           o(stuff_const(n2,nv&a[j]));
 314           return;
 315         }
 316     for(i=0;i<8;i++)
 317       for(j=i+4;j<12;j++)
 318         for(k=i<4?i+12:15;k>=j+4;k--)
 319           if((v&(a[i]|a[j]|a[k]))==v) {
 320             o(stuff_const(op,v&a[i]));
 321             o(stuff_const(o2,v&a[j]));
 322             o(stuff_const(o2,v&a[k]));
 323             return;
 324           }
 325     no=op^0xC00000;
 326     nv=-v;
 327     for(i=0;i<8;i++)
 328       for(j=i+4;j<12;j++)
 329         for(k=i<4?i+12:15;k>=j+4;k--)
 330           if((nv&(a[i]|a[j]|a[k]))==nv) {
 331             o(stuff_const(no,nv&a[i]));
 332             o(stuff_const(n2,nv&a[j]));
 333             o(stuff_const(n2,nv&a[k]));
 334             return;
 335           }
 336     o(stuff_const(op,v&a[0]));
 337     o(stuff_const(o2,v&a[4]));
 338     o(stuff_const(o2,v&a[8]));
 339     o(stuff_const(o2,v&a[12]));
 340   }
 341 }
 342
 343 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 344 {
 345   addr-=pos+8;
 346   addr/=4;
 347   if(addr>=0x1000000 || addr<-0x1000000) {
 348     if(fail)
 349       tcc_error("FIXME: function bigger than 32MB");
 350     return 0;
 351   }
 352   return 0x0A000000|(addr&0xffffff);
 353 }
 354
 355 int decbranch(int pos)
 356 {
 357   int x;
 358   x=*(uint32_t *)(cur_text_section->data + pos);
 359   x&=0x00ffffff;
 360   if(x&0x800000)
 361     x-=0x1000000;
 362   return x*4+pos+8;
 363 }
 364
 365 /* output a symbol and patch all calls to it */
 366 void gsym_addr(int t, int a)
 367 {
 368   uint32_t *x;
 369   int lt;
 370   while(t) {
 371     x=(uint32_t *)(cur_text_section->data + t);
 372     t=decbranch(lt=t);
 373     if(a==lt+4)
 374       *x=0xE1A00000; // nop
 375     else {
 376       *x &= 0xff000000;
 377       *x |= encbranch(lt,a,1);
 378     }
 379   }
 380 }
 381
 382 void gsym(int t)
 383 {
 384   gsym_addr(t, ind);
 385 }
 386
 387 #ifdef TCC_ARM_VFP
 388 static uint32_t vfpr(int r)
 389 {
 390   if(r<TREG_F0 || r>TREG_F7)
 391     tcc_error("compiler error! register %i is no vfp register",r);
 392   return r - TREG_F0;
 393 }
 394 #else
 395 static uint32_t fpr(int r)
 396 {
 397   if(r<TREG_F0 || r>TREG_F3)
 398     tcc_error("compiler error! register %i is no fpa register",r);
 399   return r - TREG_F0;
 400 }
 401 #endif
 402
 403 static uint32_t intr(int r)
 404 {
 405   if(r == TREG_R12)
 406     return 12;
 407   if(r >= TREG_R0 && r <= TREG_R3)
 408     return r - TREG_R0;
 409   if (r >= TREG_SP && r <= TREG_LR)
 410     return r + (13 - TREG_SP);
 411   tcc_error("compiler error! register %i is no int register",r);
 412 }
 413
 414 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 415 {
 416   if(*off>maxoff || *off&((1<<shift)-1)) {
 417     uint32_t x, y;
 418     x=0xE280E000;
 419     if(*sgn)
 420       x=0xE240E000;
 421     x|=(*base)<<16;
 422     *base=14; // lr
 423     y=stuff_const(x,*off&~maxoff);
 424     if(y) {
 425       o(y);
 426       *off&=maxoff;
 427       return;
 428     }
 429     y=stuff_const(x,(*off+maxoff)&~maxoff);
 430     if(y) {
 431       o(y);
 432       *sgn=!*sgn;
 433       *off=((*off+maxoff)&~maxoff)-*off;
 434       return;
 435     }
 436     stuff_const_harder(x,*off&~maxoff);
 437     *off&=maxoff;
 438   }
 439 }
 440
 441 static uint32_t mapcc(int cc)
 442 {
 443   switch(cc)
 444   {
 445     case TOK_ULT:
 446       return 0x30000000; /* CC/LO */
 447     case TOK_UGE:
 448       return 0x20000000; /* CS/HS */
 449     case TOK_EQ:
 450       return 0x00000000; /* EQ */
 451     case TOK_NE:
 452       return 0x10000000; /* NE */
 453     case TOK_ULE:
 454       return 0x90000000; /* LS */
 455     case TOK_UGT:
 456       return 0x80000000; /* HI */
 457     case TOK_Nset:
 458       return 0x40000000; /* MI */
 459     case TOK_Nclear:
 460       return 0x50000000; /* PL */
 461     case TOK_LT:
 462       return 0xB0000000; /* LT */
 463     case TOK_GE:
 464       return 0xA0000000; /* GE */
 465     case TOK_LE:
 466       return 0xD0000000; /* LE */
 467     case TOK_GT:
 468       return 0xC0000000; /* GT */
 469   }
 470   tcc_error("unexpected condition code");
 471   return 0xE0000000; /* AL */
 472 }
 473
 474 static int negcc(int cc)
 475 {
 476   switch(cc)
 477   {
 478     case TOK_ULT:
 479       return TOK_UGE;
 480     case TOK_UGE:
 481       return TOK_ULT;
 482     case TOK_EQ:
 483       return TOK_NE;
 484     case TOK_NE:
 485       return TOK_EQ;
 486     case TOK_ULE:
 487       return TOK_UGT;
 488     case TOK_UGT:
 489       return TOK_ULE;
 490     case TOK_Nset:
 491       return TOK_Nclear;
 492     case TOK_Nclear:
 493       return TOK_Nset;
 494     case TOK_LT:
 495       return TOK_GE;
 496     case TOK_GE:
 497       return TOK_LT;
 498     case TOK_LE:
 499       return TOK_GT;
 500     case TOK_GT:
 501       return TOK_LE;
 502   }
 503   tcc_error("unexpected condition code");
 504   return TOK_NE;
 505 }
 506
 507 /* load 'r' from value 'sv' */
 508 void load(int r, SValue *sv)
 509 {
 510   int v, ft, fc, fr, sign;
 511   uint32_t op;
 512   SValue v1;
 513
 514   fr = sv->r;
 515   ft = sv->type.t;
 516   fc = sv->c.i;
 517
 518   if(fc>=0)
 519     sign=0;
 520   else {
 521     sign=1;
 522     fc=-fc;
 523   }
 524
 525   v = fr & VT_VALMASK;
 526   if (fr & VT_LVAL) {
 527     uint32_t base = 0xB; // fp
 528     if(v == VT_LLOCAL) {
 529       v1.type.t = VT_PTR;
 530       v1.r = VT_LOCAL | VT_LVAL;
 531       v1.c.i = sv->c.i;
 532       load(TREG_LR, &v1);
 533       base = 14; /* lr */
 534       fc=sign=0;
 535       v=VT_LOCAL;
 536     } else if(v == VT_CONST) {
 537       v1.type.t = VT_PTR;
 538       v1.r = fr&~VT_LVAL;
 539       v1.c.i = sv->c.i;
 540       v1.sym=sv->sym;
 541       load(TREG_LR, &v1);
 542       base = 14; /* lr */
 543       fc=sign=0;
 544       v=VT_LOCAL;
 545     } else if(v < VT_CONST) {
 546       base=intr(v);
 547       fc=sign=0;
 548       v=VT_LOCAL;
 549     }
 550     if(v == VT_LOCAL) {
 551       if(is_float(ft)) {
 552         calcaddr(&base,&fc,&sign,1020,2);
 553 #ifdef TCC_ARM_VFP
 554         op=0xED100A00; /* flds */
 555         if(!sign)
 556           op|=0x800000;
 557         if ((ft & VT_BTYPE) != VT_FLOAT)
 558           op|=0x100;   /* flds -> fldd */
 559         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 560 #else
 561         op=0xED100100;
 562         if(!sign)
 563           op|=0x800000;
 564 #if LDOUBLE_SIZE == 8
 565         if ((ft & VT_BTYPE) != VT_FLOAT)
 566           op|=0x8000;
 567 #else
 568         if ((ft & VT_BTYPE) == VT_DOUBLE)
 569           op|=0x8000;
 570         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 571           op|=0x400000;
 572 #endif
 573         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 574 #endif
 575       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 576                 || (ft & VT_BTYPE) == VT_SHORT) {
 577         calcaddr(&base,&fc,&sign,255,0);
 578         op=0xE1500090;
 579         if ((ft & VT_BTYPE) == VT_SHORT)
 580           op|=0x20;
 581         if ((ft & VT_UNSIGNED) == 0)
 582           op|=0x40;
 583         if(!sign)
 584           op|=0x800000;
 585         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 586       } else {
 587         calcaddr(&base,&fc,&sign,4095,0);
 588         op=0xE5100000;
 589         if(!sign)
 590           op|=0x800000;
 591         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 592           op|=0x400000;
 593         o(op|(intr(r)<<12)|fc|(base<<16));
 594       }
 595       return;
 596     }
 597   } else {
 598     if (v == VT_CONST) {
 599       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.i);
 600       if (fr & VT_SYM || !op) {
 601         o(0xE59F0000|(intr(r)<<12));
 602         o(0xEA000000);
 603         if(fr & VT_SYM)
 604           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 605         o(sv->c.i);
 606       } else
 607         o(op);
 608       return;
 609     } else if (v == VT_LOCAL) {
 610       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.i);
 611       if (fr & VT_SYM || !op) {
 612         o(0xE59F0000|(intr(r)<<12));
 613         o(0xEA000000);
 614         if(fr & VT_SYM) // needed ?
 615           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 616         o(sv->c.i);
 617         o(0xE08B0000|(intr(r)<<12)|intr(r));
 618       } else
 619         o(op);
 620       return;
 621     } else if(v == VT_CMP) {
 622       o(mapcc(sv->c.i)|0x3A00001|(intr(r)<<12));
 623       o(mapcc(negcc(sv->c.i))|0x3A00000|(intr(r)<<12));
 624       return;
 625     } else if (v == VT_JMP || v == VT_JMPI) {
 626       int t;
 627       t = v & 1;
 628       o(0xE3A00000|(intr(r)<<12)|t);
 629       o(0xEA000000);
 630       gsym(sv->c.i);
 631       o(0xE3A00000|(intr(r)<<12)|(t^1));
 632       return;
 633     } else if (v < VT_CONST) {
 634       if(is_float(ft))
 635 #ifdef TCC_ARM_VFP
 636         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 637 #else
 638         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 639 #endif
 640       else
 641         o(0xE1A00000|(intr(r)<<12)|intr(v));
 642       return;
 643     }
 644   }
 645   tcc_error("load unimplemented!");
 646 }
 647
 648 /* store register 'r' in lvalue 'v' */
 649 void store(int r, SValue *sv)
 650 {
 651   SValue v1;
 652   int v, ft, fc, fr, sign;
 653   uint32_t op;
 654
 655   fr = sv->r;
 656   ft = sv->type.t;
 657   fc = sv->c.i;
 658
 659   if(fc>=0)
 660     sign=0;
 661   else {
 662     sign=1;
 663     fc=-fc;
 664   }
 665
 666   v = fr & VT_VALMASK;
 667   if (fr & VT_LVAL || fr == VT_LOCAL) {
 668     uint32_t base = 0xb; /* fp */
 669     if(v < VT_CONST) {
 670       base=intr(v);
 671       v=VT_LOCAL;
 672       fc=sign=0;
 673     } else if(v == VT_CONST) {
 674       v1.type.t = ft;
 675       v1.r = fr&~VT_LVAL;
 676       v1.c.i = sv->c.i;
 677       v1.sym=sv->sym;
 678       load(TREG_LR, &v1);
 679       base = 14; /* lr */
 680       fc=sign=0;
 681       v=VT_LOCAL;
 682     }
 683     if(v == VT_LOCAL) {
 684        if(is_float(ft)) {
 685         calcaddr(&base,&fc,&sign,1020,2);
 686 #ifdef TCC_ARM_VFP
 687         op=0xED000A00; /* fsts */
 688         if(!sign)
 689           op|=0x800000;
 690         if ((ft & VT_BTYPE) != VT_FLOAT)
 691           op|=0x100;   /* fsts -> fstd */
 692         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 693 #else
 694         op=0xED000100;
 695         if(!sign)
 696           op|=0x800000;
 697 #if LDOUBLE_SIZE == 8
 698         if ((ft & VT_BTYPE) != VT_FLOAT)
 699           op|=0x8000;
 700 #else
 701         if ((ft & VT_BTYPE) == VT_DOUBLE)
 702           op|=0x8000;
 703         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 704           op|=0x400000;
 705 #endif
 706         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 707 #endif
 708         return;
 709       } else if((ft & VT_BTYPE) == VT_SHORT) {
 710         calcaddr(&base,&fc,&sign,255,0);
 711         op=0xE14000B0;
 712         if(!sign)
 713           op|=0x800000;
 714         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 715       } else {
 716         calcaddr(&base,&fc,&sign,4095,0);
 717         op=0xE5000000;
 718         if(!sign)
 719           op|=0x800000;
 720         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 721           op|=0x400000;
 722         o(op|(intr(r)<<12)|fc|(base<<16));
 723       }
 724       return;
 725     }
 726   }
 727   tcc_error("store unimplemented");
 728 }
 729
 730 static void gadd_sp(int val)
 731 {
 732   stuff_const_harder(0xE28DD000,val);
 733 }
 734
 735 /* 'is_jmp' is '1' if it is a jump */
 736 static void gcall_or_jmp(int is_jmp)
 737 {
 738   int r;
 739   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 740     uint32_t x;
 741     /* constant case */
 742     x=encbranch(ind,ind+vtop->c.i,0);
 743     if(x) {
 744       if (vtop->r & VT_SYM) {
 745         /* relocation case */
 746         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 747       } else
 748         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 749       o(x|(is_jmp?0xE0000000:0xE1000000));
 750     } else {
 751       if(!is_jmp)
 752         o(0xE28FE004); // add lr,pc,#4
 753       o(0xE51FF004);   // ldr pc,[pc,#-4]
 754       if (vtop->r & VT_SYM)
 755         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 756       o(vtop->c.i);
 757     }
 758   } else {
 759     /* otherwise, indirect call */
 760     r = gv(RC_INT);
 761     if(!is_jmp)
 762       o(0xE1A0E00F);       // mov lr,pc
 763     o(0xE1A0F000|intr(r)); // mov pc,r
 764   }
 765 }
 766
 767 static int unalias_ldbl(int btype)
 768 {
 769 #if LDOUBLE_SIZE == 8
 770     if (btype == VT_LDOUBLE)
 771       btype = VT_DOUBLE;
 772 #endif
 773     return btype;
 774 }
 775
 776 /* Return whether a structure is an homogeneous float aggregate or not.
 777    The answer is true if all the elements of the structure are of the same
 778    primitive float type and there is less than 4 elements.
 779
 780    type: the type corresponding to the structure to be tested */
 781 static int is_hgen_float_aggr(CType *type)
 782 {
 783   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 784     struct Sym *ref;
 785     int btype, nb_fields = 0;
 786
 787     ref = type->ref->next;
 788     btype = unalias_ldbl(ref->type.t & VT_BTYPE);
 789     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 790       for(; ref && btype == unalias_ldbl(ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 791       return !ref && nb_fields <= 4;
 792     }
 793   }
 794   return 0;
 795 }
 796
 797 struct avail_regs {
 798   signed char avail[3]; /* 3 holes max with only float and double alignments */
 799   int first_hole; /* first available hole */
 800   int last_hole; /* last available hole (none if equal to first_hole) */
 801   int first_free_reg; /* next free register in the sequence, hole excluded */
 802 };
 803
 804 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 805
 806 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 807    param) according to the rules described in the procedure call standard for
 808    the ARM architecture (AAPCS). If found, the registers are assigned to this
 809    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 810    and the parameter is a single float.
 811
 812    avregs: opaque structure to keep track of available VFP co-processor regs
 813    align: alignment contraints for the param, as returned by type_size()
 814    size: size of the parameter, as returned by type_size() */
 815 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 816 {
 817   int first_reg = 0;
 818
 819   if (avregs->first_free_reg == -1)
 820     return -1;
 821   if (align >> 3) { /* double alignment */
 822     first_reg = avregs->first_free_reg;
 823     /* alignment contraint not respected so use next reg and record hole */
 824     if (first_reg & 1)
 825       avregs->avail[avregs->last_hole++] = first_reg++;
 826   } else { /* no special alignment (float or array of float) */
 827     /* if single float and a hole is available, assign the param to it */
 828     if (size == 4 && avregs->first_hole != avregs->last_hole)
 829       return avregs->avail[avregs->first_hole++];
 830     else
 831       first_reg = avregs->first_free_reg;
 832   }
 833   if (first_reg + size / 4 <= 16) {
 834     avregs->first_free_reg = first_reg + size / 4;
 835     return first_reg;
 836   }
 837   avregs->first_free_reg = -1;
 838   return -1;
 839 }
 840
 841 /* Returns whether all params need to be passed in core registers or not.
 842    This is the case for function part of the runtime ABI. */
 843 int floats_in_core_regs(SValue *sval)
 844 {
 845   if (!sval->sym)
 846     return 0;
 847
 848   switch (sval->sym->v) {
 849     case TOK___floatundisf:
 850     case TOK___floatundidf:
 851     case TOK___fixunssfdi:
 852     case TOK___fixunsdfdi:
 853 #ifndef TCC_ARM_VFP
 854     case TOK___fixunsxfdi:
 855 #endif
 856     case TOK___floatdisf:
 857     case TOK___floatdidf:
 858     case TOK___fixsfdi:
 859     case TOK___fixdfdi:
 860       return 1;
 861
 862     default:
 863       return 0;
 864   }
 865 }
 866
 867 /* Return the number of registers needed to return the struct, or 0 if
 868    returning via struct pointer. */
 869 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) {
 870 #ifdef TCC_ARM_EABI
 871     int size, align;
 872     size = type_size(vt, &align);
 873     if (float_abi == ARM_HARD_FLOAT && !variadic &&
 874         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
 875         *ret_align = 8;
 876         *regsize = 8;
 877         ret->ref = NULL;
 878         ret->t = VT_DOUBLE;
 879         return (size + 7) >> 3;
 880     } else if (size <= 4) {
 881         *ret_align = 4;
 882         *regsize = 4;
 883         ret->ref = NULL;
 884         ret->t = VT_INT;
 885         return 1;
 886     } else
 887         return 0;
 888 #else
 889     return 0;
 890 #endif
 891 }
 892
 893 /* Parameters are classified according to how they are copied to their final
 894    destination for the function call. Because the copying is performed class
 895    after class according to the order in the union below, it is important that
 896    some constraints about the order of the members of this union are respected:
 897    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 898    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 899      VFP_STRUCT_CLASS;
 900    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 901    See the comment for the main loop in copy_params() for the reason. */
 902 enum reg_class {
 903         STACK_CLASS = 0,
 904         CORE_STRUCT_CLASS,
 905         VFP_CLASS,
 906         VFP_STRUCT_CLASS,
 907         CORE_CLASS,
 908         NB_CLASSES
 909 };
 910
 911 struct param_plan {
 912     int start; /* first reg or addr used depending on the class */
 913     int end; /* last reg used or next free addr depending on the class */
 914     SValue *sval; /* pointer to SValue on the value stack */
 915     struct param_plan *prev; /*  previous element in this class */
 916 };
 917
 918 struct plan {
 919     struct param_plan *pplans; /* array of all the param plans */
 920     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
 921 };
 922
 923 #define add_param_plan(plan,pplan,class)                        \
 924     do {                                                        \
 925         pplan.prev = plan->clsplans[class];                     \
 926         plan->pplans[plan ## _nb] = pplan;                      \
 927         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
 928     } while(0)
 929
 930 /* Assign parameters to registers and stack with alignment according to the
 931    rules in the procedure call standard for the ARM architecture (AAPCS).
 932    The overall assignment is recorded in an array of per parameter structures
 933    called parameter plans. The parameter plans are also further organized in a
 934    number of linked lists, one per class of parameter (see the comment for the
 935    definition of union reg_class).
 936
 937    nb_args: number of parameters of the function for which a call is generated
 938    float_abi: float ABI in use for this function call
 939    plan: the structure where the overall assignment is recorded
 940    todo: a bitmap that record which core registers hold a parameter
 941
 942    Returns the amount of stack space needed for parameter passing
 943
 944    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 945    is the responsibility of the caller to free this array once used (ie not
 946    before copy_params). */
 947 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
 948 {
 949   int i, size, align;
 950   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
 951   int plan_nb = 0;
 952   struct param_plan pplan;
 953   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 954
 955   ncrn = nsaa = 0;
 956   *todo = 0;
 957   plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 958   memset(plan->clsplans, 0, sizeof(plan->clsplans));
 959   for(i = nb_args; i-- ;) {
 960     int j, start_vfpreg = 0;
 961     CType type = vtop[-i].type;
 962     type.t &= ~VT_ARRAY;
 963     size = type_size(&type, &align);
 964     size = (size + 3) & ~3;
 965     align = (align + 3) & ~3;
 966     switch(vtop[-i].type.t & VT_BTYPE) {
 967       case VT_STRUCT:
 968       case VT_FLOAT:
 969       case VT_DOUBLE:
 970       case VT_LDOUBLE:
 971       if (float_abi == ARM_HARD_FLOAT) {
 972         int is_hfa = 0; /* Homogeneous float aggregate */
 973
 974         if (is_float(vtop[-i].type.t)
 975             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
 976           int end_vfpreg;
 977
 978           start_vfpreg = assign_vfpreg(&avregs, align, size);
 979           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
 980           if (start_vfpreg >= 0) {
 981             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
 982             if (is_hfa)
 983               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
 984             else
 985               add_param_plan(plan, pplan, VFP_CLASS);
 986             continue;
 987           } else
 988             break;
 989         }
 990       }
 991       ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
 992       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
 993         /* The parameter is allocated both in core register and on stack. As
 994          * such, it can be of either class: it would either be the last of
 995          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
 996         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
 997           *todo|=(1<<j);
 998         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
 999         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1000         ncrn += size/4;
1001         if (ncrn > 4)
1002           nsaa = (ncrn - 4) * 4;
1003       } else {
1004         ncrn = 4;
1005         break;
1006       }
1007       continue;
1008       default:
1009       if (ncrn < 4) {
1010         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1011
1012         if (is_long) {
1013           ncrn = (ncrn + 1) & -2;
1014           if (ncrn == 4)
1015             break;
1016         }
1017         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1018         ncrn++;
1019         if (is_long)
1020           pplan.end = ncrn++;
1021         add_param_plan(plan, pplan, CORE_CLASS);
1022         continue;
1023       }
1024     }
1025     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1026     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1027     add_param_plan(plan, pplan, STACK_CLASS);
1028     nsaa += size; /* size already rounded up before */
1029   }
1030   return nsaa;
1031 }
1032
1033 #undef add_param_plan
1034
1035 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1036    function call.
1037
1038    nb_args: number of parameters the function take
1039    plan: the overall assignment plan for parameters
1040    todo: a bitmap indicating what core reg will hold a parameter
1041
1042    Returns the number of SValue added by this function on the value stack */
1043 static int copy_params(int nb_args, struct plan *plan, int todo)
1044 {
1045   int size, align, r, i, nb_extra_sval = 0;
1046   struct param_plan *pplan;
1047   int pass = 0;
1048
1049    /* Several constraints require parameters to be copied in a specific order:
1050       - structures are copied to the stack before being loaded in a reg;
1051       - floats loaded to an odd numbered VFP reg are first copied to the
1052         preceding even numbered VFP reg and then moved to the next VFP reg.
1053
1054       It is thus important that:
1055       - structures assigned to core regs must be copied after parameters
1056         assigned to the stack but before structures assigned to VFP regs because
1057         a structure can lie partly in core registers and partly on the stack;
1058       - parameters assigned to the stack and all structures be copied before
1059         parameters assigned to a core reg since copying a parameter to the stack
1060         require using a core reg;
1061       - parameters assigned to VFP regs be copied before structures assigned to
1062         VFP regs as the copy might use an even numbered VFP reg that already
1063         holds part of a structure. */
1064 again:
1065   for(i = 0; i < NB_CLASSES; i++) {
1066     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1067
1068       if (pass
1069           && (i != CORE_CLASS || pplan->sval->r < VT_CONST))
1070         continue;
1071
1072       vpushv(pplan->sval);
1073       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1074       switch(i) {
1075         case STACK_CLASS:
1076         case CORE_STRUCT_CLASS:
1077         case VFP_STRUCT_CLASS:
1078           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1079             int padding = 0;
1080             size = type_size(&pplan->sval->type, &align);
1081             /* align to stack align size */
1082             size = (size + 3) & ~3;
1083             if (i == STACK_CLASS && pplan->prev)
1084               padding = pplan->start - pplan->prev->end;
1085             size += padding; /* Add padding if any */
1086             /* allocate the necessary size on stack */
1087             gadd_sp(-size);
1088             /* generate structure store */
1089             r = get_reg(RC_INT);
1090             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1091             vset(&vtop->type, r | VT_LVAL, 0);
1092             vswap();
1093             vstore(); /* memcpy to current sp + potential padding */
1094
1095             /* Homogeneous float aggregate are loaded to VFP registers
1096                immediately since there is no way of loading data in multiple
1097                non consecutive VFP registers as what is done for other
1098                structures (see the use of todo). */
1099             if (i == VFP_STRUCT_CLASS) {
1100               int first = pplan->start, nb = pplan->end - first + 1;
1101               /* vpop.32 {pplan->start, ..., pplan->end} */
1102               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1103               /* No need to write the register used to a SValue since VFP regs
1104                  cannot be used for gcall_or_jmp */
1105             }
1106           } else {
1107             if (is_float(pplan->sval->type.t)) {
1108 #ifdef TCC_ARM_VFP
1109               r = vfpr(gv(RC_FLOAT)) << 12;
1110               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1111                 size = 4;
1112               else {
1113                 size = 8;
1114                 r |= 0x101; /* vpush.32 -> vpush.64 */
1115               }
1116               o(0xED2D0A01 + r); /* vpush */
1117 #else
1118               r = fpr(gv(RC_FLOAT)) << 12;
1119               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1120                 size = 4;
1121               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1122                 size = 8;
1123               else
1124                 size = LDOUBLE_SIZE;
1125
1126               if (size == 12)
1127                 r |= 0x400000;
1128               else if(size == 8)
1129                 r|=0x8000;
1130
1131               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1132 #endif
1133             } else {
1134               /* simple type (currently always same size) */
1135               /* XXX: implicit cast ? */
1136               size=4;
1137               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1138                 lexpand_nr();
1139                 size = 8;
1140                 r = gv(RC_INT);
1141                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1142                 vtop--;
1143               }
1144               r = gv(RC_INT);
1145               o(0xE52D0004|(intr(r)<<12)); /* push r */
1146             }
1147             if (i == STACK_CLASS && pplan->prev)
1148               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1149           }
1150           break;
1151
1152         case VFP_CLASS:
1153           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1154           if (pplan->start & 1) { /* Must be in upper part of double register */
1155             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1156             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1157           }
1158           break;
1159
1160         case CORE_CLASS:
1161           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1162             lexpand_nr();
1163             gv(regmask(pplan->end));
1164             pplan->sval->r2 = vtop->r;
1165             vtop--;
1166           }
1167           gv(regmask(pplan->start));
1168           /* Mark register as used so that gcall_or_jmp use another one
1169              (regs >=4 are free as never used to pass parameters) */
1170           pplan->sval->r = vtop->r;
1171           break;
1172       }
1173       vtop--;
1174     }
1175   }
1176
1177   /* second pass to restore registers that were saved on stack by accident.
1178      Maybe redundant after the "lvalue_save" patch in tccgen.c:gv() */
1179   if (++pass < 2)
1180     goto again;
1181
1182   /* Manually free remaining registers since next parameters are loaded
1183    * manually, without the help of gv(int). */
1184   save_regs(nb_args);
1185
1186   if(todo) {
1187     o(0xE8BD0000|todo); /* pop {todo} */
1188     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1189       int r;
1190       pplan->sval->r = pplan->start;
1191       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1192          can occupy more than 2 registers. Thus, we need to push on the value
1193          stack some fake parameter to have on SValue for each registers used
1194          by a structure (r2 is not used). */
1195       for (r = pplan->start + 1; r <= pplan->end; r++) {
1196         if (todo & (1 << r)) {
1197           nb_extra_sval++;
1198           vpushi(0);
1199           vtop->r = r;
1200         }
1201       }
1202     }
1203   }
1204   return nb_extra_sval;
1205 }
1206
1207 /* Generate function call. The function address is pushed first, then
1208    all the parameters in call order. This functions pops all the
1209    parameters and the function address. */
1210 void gfunc_call(int nb_args)
1211 {
1212   int r, args_size;
1213   int def_float_abi = float_abi;
1214   int todo;
1215   struct plan plan;
1216
1217 #ifdef TCC_ARM_EABI
1218   int variadic;
1219
1220   if (float_abi == ARM_HARD_FLOAT) {
1221     variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1222     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1223       float_abi = ARM_SOFTFP_FLOAT;
1224   }
1225 #endif
1226   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1227      VT_JMP anywhere except on the top of the stack because it would complicate
1228      the code generator. */
1229   r = vtop->r & VT_VALMASK;
1230   if (r == VT_CMP || (r & ~1) == VT_JMP)
1231     gv(RC_INT);
1232
1233   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1234
1235 #ifdef TCC_ARM_EABI
1236   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1237     args_size = (args_size + 7) & ~7;
1238     o(0xE24DD004); /* sub sp, sp, #4 */
1239   }
1240 #endif
1241
1242   nb_args += copy_params(nb_args, &plan, todo);
1243   tcc_free(plan.pplans);
1244
1245   /* Move fct SValue on top as required by gcall_or_jmp */
1246   vrotb(nb_args + 1);
1247   gcall_or_jmp(0);
1248   if (args_size)
1249       gadd_sp(args_size); /* pop all parameters passed on the stack */
1250 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1251   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1252     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1253       o(0xEE000A10); /*vmov s0, r0 */
1254     } else {
1255       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1256       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1257     }
1258   }
1259 #endif
1260   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1261   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1262   float_abi = def_float_abi;
1263 }
1264
1265 /* generate function prolog of type 't' */
1266 void gfunc_prolog(CType *func_type)
1267 {
1268   Sym *sym,*sym2;
1269   int n, nf, size, align, rs, struct_ret = 0;
1270   int addr, pn, sn; /* pn=core, sn=stack */
1271   CType ret_type;
1272
1273 #ifdef TCC_ARM_EABI
1274   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1275 #endif
1276
1277   sym = func_type->ref;
1278   func_vt = sym->type;
1279   func_var = (func_type->ref->c == FUNC_ELLIPSIS);
1280
1281   n = nf = 0;
1282   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1283       !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs))
1284   {
1285     n++;
1286     struct_ret = 1;
1287     func_vc = 12; /* Offset from fp of the place to store the result */
1288   }
1289   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1290     size = type_size(&sym2->type, &align);
1291 #ifdef TCC_ARM_EABI
1292     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1293         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1294       int tmpnf = assign_vfpreg(&avregs, align, size);
1295       tmpnf += (size + 3) / 4;
1296       nf = (tmpnf > nf) ? tmpnf : nf;
1297     } else
1298 #endif
1299     if (n < 4)
1300       n += (size + 3) / 4;
1301   }
1302   o(0xE1A0C00D); /* mov ip,sp */
1303   if (func_var)
1304     n=4;
1305   if (n) {
1306     if(n>4)
1307       n=4;
1308 #ifdef TCC_ARM_EABI
1309     n=(n+1)&-2;
1310 #endif
1311     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1312   }
1313   if (nf) {
1314     if (nf>16)
1315       nf=16;
1316     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1317     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1318   }
1319   o(0xE92D5800); /* save fp, ip, lr */
1320   o(0xE1A0B00D); /* mov fp, sp */
1321   func_sub_sp_offset = ind;
1322   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1323
1324 #ifdef TCC_ARM_EABI
1325   if (float_abi == ARM_HARD_FLOAT) {
1326     func_vc += nf * 4;
1327     avregs = AVAIL_REGS_INITIALIZER;
1328   }
1329 #endif
1330   pn = struct_ret, sn = 0;
1331   while ((sym = sym->next)) {
1332     CType *type;
1333     type = &sym->type;
1334     size = type_size(type, &align);
1335     size = (size + 3) >> 2;
1336     align = (align + 3) & ~3;
1337 #ifdef TCC_ARM_EABI
1338     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1339         || is_hgen_float_aggr(&sym->type))) {
1340       int fpn = assign_vfpreg(&avregs, align, size << 2);
1341       if (fpn >= 0)
1342         addr = fpn * 4;
1343       else
1344         goto from_stack;
1345     } else
1346 #endif
1347     if (pn < 4) {
1348 #ifdef TCC_ARM_EABI
1349         pn = (pn + (align-1)/4) & -(align/4);
1350 #endif
1351       addr = (nf + pn) * 4;
1352       pn += size;
1353       if (!sn && pn > 4)
1354         sn = (pn - 4);
1355     } else {
1356 #ifdef TCC_ARM_EABI
1357 from_stack:
1358         sn = (sn + (align-1)/4) & -(align/4);
1359 #endif
1360       addr = (n + nf + sn) * 4;
1361       sn += size;
1362     }
1363     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1364              addr + 12);
1365   }
1366   last_itod_magic=0;
1367   leaffunc = 1;
1368   loc = 0;
1369 }
1370
1371 /* generate function epilog */
1372 void gfunc_epilog(void)
1373 {
1374   uint32_t x;
1375   int diff;
1376   /* Copy float return value to core register if base standard is used and
1377      float computation is made with VFP */
1378 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1379   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1380     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1381       o(0xEE100A10); /* fmrs r0, s0 */
1382     else {
1383       o(0xEE100B10); /* fmrdl r0, d0 */
1384       o(0xEE301B10); /* fmrdh r1, d0 */
1385     }
1386   }
1387 #endif
1388   o(0xE89BA800); /* restore fp, sp, pc */
1389   diff = (-loc + 3) & -4;
1390 #ifdef TCC_ARM_EABI
1391   if(!leaffunc)
1392     diff = ((diff + 11) & -8) - 4;
1393 #endif
1394   if(diff > 0) {
1395     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1396     if(x)
1397       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1398     else {
1399       int addr;
1400       addr=ind;
1401       o(0xE59FC004); /* ldr ip,[pc+4] */
1402       o(0xE04BD00C); /* sub sp,fp,ip  */
1403       o(0xE1A0F00E); /* mov pc,lr */
1404       o(diff);
1405       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1406     }
1407   }
1408 }
1409
1410 /* generate a jump to a label */
1411 int gjmp(int t)
1412 {
1413   int r;
1414   r=ind;
1415   o(0xE0000000|encbranch(r,t,1));
1416   return r;
1417 }
1418
1419 /* generate a jump to a fixed address */
1420 void gjmp_addr(int a)
1421 {
1422   gjmp(a);
1423 }
1424
1425 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1426 int gtst(int inv, int t)
1427 {
1428   int v, r;
1429   uint32_t op;
1430   v = vtop->r & VT_VALMASK;
1431   r=ind;
1432   if (v == VT_CMP) {
1433     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1434     op|=encbranch(r,t,1);
1435     o(op);
1436     t=r;
1437   } else if (v == VT_JMP || v == VT_JMPI) {
1438     if ((v & 1) == inv) {
1439       if(!vtop->c.i)
1440         vtop->c.i=t;
1441       else {
1442         uint32_t *x;
1443         int p,lp;
1444         if(t) {
1445           p = vtop->c.i;
1446           do {
1447             p = decbranch(lp=p);
1448           } while(p);
1449           x = (uint32_t *)(cur_text_section->data + lp);
1450           *x &= 0xff000000;
1451           *x |= encbranch(lp,t,1);
1452         }
1453         t = vtop->c.i;
1454       }
1455     } else {
1456       t = gjmp(t);
1457       gsym(vtop->c.i);
1458     }
1459   }
1460   vtop--;
1461   return t;
1462 }
1463
1464 /* generate an integer binary operation */
1465 void gen_opi(int op)
1466 {
1467   int c, func = 0;
1468   uint32_t opc = 0, r, fr;
1469   unsigned short retreg = REG_IRET;
1470
1471   c=0;
1472   switch(op) {
1473     case '+':
1474       opc = 0x8;
1475       c=1;
1476       break;
1477     case TOK_ADDC1: /* add with carry generation */
1478       opc = 0x9;
1479       c=1;
1480       break;
1481     case '-':
1482       opc = 0x4;
1483       c=1;
1484       break;
1485     case TOK_SUBC1: /* sub with carry generation */
1486       opc = 0x5;
1487       c=1;
1488       break;
1489     case TOK_ADDC2: /* add with carry use */
1490       opc = 0xA;
1491       c=1;
1492       break;
1493     case TOK_SUBC2: /* sub with carry use */
1494       opc = 0xC;
1495       c=1;
1496       break;
1497     case '&':
1498       opc = 0x0;
1499       c=1;
1500       break;
1501     case '^':
1502       opc = 0x2;
1503       c=1;
1504       break;
1505     case '|':
1506       opc = 0x18;
1507       c=1;
1508       break;
1509     case '*':
1510       gv2(RC_INT, RC_INT);
1511       r = vtop[-1].r;
1512       fr = vtop[0].r;
1513       vtop--;
1514       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1515       return;
1516     case TOK_SHL:
1517       opc = 0;
1518       c=2;
1519       break;
1520     case TOK_SHR:
1521       opc = 1;
1522       c=2;
1523       break;
1524     case TOK_SAR:
1525       opc = 2;
1526       c=2;
1527       break;
1528     case '/':
1529     case TOK_PDIV:
1530       func=TOK___divsi3;
1531       c=3;
1532       break;
1533     case TOK_UDIV:
1534       func=TOK___udivsi3;
1535       c=3;
1536       break;
1537     case '%':
1538 #ifdef TCC_ARM_EABI
1539       func=TOK___aeabi_idivmod;
1540       retreg=REG_LRET;
1541 #else
1542       func=TOK___modsi3;
1543 #endif
1544       c=3;
1545       break;
1546     case TOK_UMOD:
1547 #ifdef TCC_ARM_EABI
1548       func=TOK___aeabi_uidivmod;
1549       retreg=REG_LRET;
1550 #else
1551       func=TOK___umodsi3;
1552 #endif
1553       c=3;
1554       break;
1555     case TOK_UMULL:
1556       gv2(RC_INT, RC_INT);
1557       r=intr(vtop[-1].r2=get_reg(RC_INT));
1558       c=vtop[-1].r;
1559       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1560       vtop--;
1561       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1562       return;
1563     default:
1564       opc = 0x15;
1565       c=1;
1566       break;
1567   }
1568   switch(c) {
1569     case 1:
1570       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1571         if(opc == 4 || opc == 5 || opc == 0xc) {
1572           vswap();
1573           opc|=2; // sub -> rsb
1574         }
1575       }
1576       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1577           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1578         gv(RC_INT);
1579       vswap();
1580       c=intr(gv(RC_INT));
1581       vswap();
1582       opc=0xE0000000|(opc<<20)|(c<<16);
1583       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1584         uint32_t x;
1585         x=stuff_const(opc|0x2000000,vtop->c.i);
1586         if(x) {
1587           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1588           o(x|(r<<12));
1589           goto done;
1590         }
1591       }
1592       fr=intr(gv(RC_INT));
1593       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1594       o(opc|(r<<12)|fr);
1595 done:
1596       vtop--;
1597       if (op >= TOK_ULT && op <= TOK_GT) {
1598         vtop->r = VT_CMP;
1599         vtop->c.i = op;
1600       }
1601       break;
1602     case 2:
1603       opc=0xE1A00000|(opc<<5);
1604       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1605           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1606         gv(RC_INT);
1607       vswap();
1608       r=intr(gv(RC_INT));
1609       vswap();
1610       opc|=r;
1611       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1612         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1613         c = vtop->c.i & 0x1f;
1614         o(opc|(c<<7)|(fr<<12));
1615       } else {
1616         fr=intr(gv(RC_INT));
1617         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1618         o(opc|(c<<12)|(fr<<8)|0x10);
1619       }
1620       vtop--;
1621       break;
1622     case 3:
1623       vpush_global_sym(&func_old_type, func);
1624       vrott(3);
1625       gfunc_call(2);
1626       vpushi(0);
1627       vtop->r = retreg;
1628       break;
1629     default:
1630       tcc_error("gen_opi %i unimplemented!",op);
1631   }
1632 }
1633
1634 #ifdef TCC_ARM_VFP
1635 static int is_zero(int i)
1636 {
1637   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1638     return 0;
1639   if (vtop[i].type.t == VT_FLOAT)
1640     return (vtop[i].c.f == 0.f);
1641   else if (vtop[i].type.t == VT_DOUBLE)
1642     return (vtop[i].c.d == 0.0);
1643   return (vtop[i].c.ld == 0.l);
1644 }
1645
1646 /* generate a floating point operation 'v = t1 op t2' instruction. The
1647  *    two operands are guaranted to have the same floating point type */
1648 void gen_opf(int op)
1649 {
1650   uint32_t x;
1651   int fneg=0,r;
1652   x=0xEE000A00|T2CPR(vtop->type.t);
1653   switch(op) {
1654     case '+':
1655       if(is_zero(-1))
1656         vswap();
1657       if(is_zero(0)) {
1658         vtop--;
1659         return;
1660       }
1661       x|=0x300000;
1662       break;
1663     case '-':
1664       x|=0x300040;
1665       if(is_zero(0)) {
1666         vtop--;
1667         return;
1668       }
1669       if(is_zero(-1)) {
1670         x|=0x810000; /* fsubX -> fnegX */
1671         vswap();
1672         vtop--;
1673         fneg=1;
1674       }
1675       break;
1676     case '*':
1677       x|=0x200000;
1678       break;
1679     case '/':
1680       x|=0x800000;
1681       break;
1682     default:
1683       if(op < TOK_ULT || op > TOK_GT) {
1684         tcc_error("unknown fp op %x!",op);
1685         return;
1686       }
1687       if(is_zero(-1)) {
1688         vswap();
1689         switch(op) {
1690           case TOK_LT: op=TOK_GT; break;
1691           case TOK_GE: op=TOK_ULE; break;
1692           case TOK_LE: op=TOK_GE; break;
1693           case TOK_GT: op=TOK_ULT; break;
1694         }
1695       }
1696       x|=0xB40040; /* fcmpX */
1697       if(op!=TOK_EQ && op!=TOK_NE)
1698         x|=0x80; /* fcmpX -> fcmpeX */
1699       if(is_zero(0)) {
1700         vtop--;
1701         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1702       } else {
1703         x|=vfpr(gv(RC_FLOAT));
1704         vswap();
1705         o(x|(vfpr(gv(RC_FLOAT))<<12));
1706         vtop--;
1707       }
1708       o(0xEEF1FA10); /* fmstat */
1709
1710       switch(op) {
1711         case TOK_LE: op=TOK_ULE; break;
1712         case TOK_LT: op=TOK_ULT; break;
1713         case TOK_UGE: op=TOK_GE; break;
1714         case TOK_UGT: op=TOK_GT; break;
1715       }
1716
1717       vtop->r = VT_CMP;
1718       vtop->c.i = op;
1719       return;
1720   }
1721   r=gv(RC_FLOAT);
1722   x|=vfpr(r);
1723   r=regmask(r);
1724   if(!fneg) {
1725     int r2;
1726     vswap();
1727     r2=gv(RC_FLOAT);
1728     x|=vfpr(r2)<<16;
1729     r|=regmask(r2);
1730   }
1731   vtop->r=get_reg_ex(RC_FLOAT,r);
1732   if(!fneg)
1733     vtop--;
1734   o(x|(vfpr(vtop->r)<<12));
1735 }
1736
1737 #else
1738 static uint32_t is_fconst()
1739 {
1740   long double f;
1741   uint32_t r;
1742   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1743     return 0;
1744   if (vtop->type.t == VT_FLOAT)
1745     f = vtop->c.f;
1746   else if (vtop->type.t == VT_DOUBLE)
1747     f = vtop->c.d;
1748   else
1749     f = vtop->c.ld;
1750   if(!ieee_finite(f))
1751     return 0;
1752   r=0x8;
1753   if(f<0.0) {
1754     r=0x18;
1755     f=-f;
1756   }
1757   if(f==0.0)
1758     return r;
1759   if(f==1.0)
1760     return r|1;
1761   if(f==2.0)
1762     return r|2;
1763   if(f==3.0)
1764     return r|3;
1765   if(f==4.0)
1766     return r|4;
1767   if(f==5.0)
1768     return r|5;
1769   if(f==0.5)
1770     return r|6;
1771   if(f==10.0)
1772     return r|7;
1773   return 0;
1774 }
1775
1776 /* generate a floating point operation 'v = t1 op t2' instruction. The
1777    two operands are guaranted to have the same floating point type */
1778 void gen_opf(int op)
1779 {
1780   uint32_t x, r, r2, c1, c2;
1781   //fputs("gen_opf\n",stderr);
1782   vswap();
1783   c1 = is_fconst();
1784   vswap();
1785   c2 = is_fconst();
1786   x=0xEE000100;
1787 #if LDOUBLE_SIZE == 8
1788   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1789     x|=0x80;
1790 #else
1791   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1792     x|=0x80;
1793   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1794     x|=0x80000;
1795 #endif
1796   switch(op)
1797   {
1798     case '+':
1799       if(!c2) {
1800         vswap();
1801         c2=c1;
1802       }
1803       vswap();
1804       r=fpr(gv(RC_FLOAT));
1805       vswap();
1806       if(c2) {
1807         if(c2>0xf)
1808           x|=0x200000; // suf
1809         r2=c2&0xf;
1810       } else {
1811         r2=fpr(gv(RC_FLOAT));
1812       }
1813       break;
1814     case '-':
1815       if(c2) {
1816         if(c2<=0xf)
1817           x|=0x200000; // suf
1818         r2=c2&0xf;
1819         vswap();
1820         r=fpr(gv(RC_FLOAT));
1821         vswap();
1822       } else if(c1 && c1<=0xf) {
1823         x|=0x300000; // rsf
1824         r2=c1;
1825         r=fpr(gv(RC_FLOAT));
1826         vswap();
1827       } else {
1828         x|=0x200000; // suf
1829         vswap();
1830         r=fpr(gv(RC_FLOAT));
1831         vswap();
1832         r2=fpr(gv(RC_FLOAT));
1833       }
1834       break;
1835     case '*':
1836       if(!c2 || c2>0xf) {
1837         vswap();
1838         c2=c1;
1839       }
1840       vswap();
1841       r=fpr(gv(RC_FLOAT));
1842       vswap();
1843       if(c2 && c2<=0xf)
1844         r2=c2;
1845       else
1846         r2=fpr(gv(RC_FLOAT));
1847       x|=0x100000; // muf
1848       break;
1849     case '/':
1850       if(c2 && c2<=0xf) {
1851         x|=0x400000; // dvf
1852         r2=c2;
1853         vswap();
1854         r=fpr(gv(RC_FLOAT));
1855         vswap();
1856       } else if(c1 && c1<=0xf) {
1857         x|=0x500000; // rdf
1858         r2=c1;
1859         r=fpr(gv(RC_FLOAT));
1860         vswap();
1861       } else {
1862         x|=0x400000; // dvf
1863         vswap();
1864         r=fpr(gv(RC_FLOAT));
1865         vswap();
1866         r2=fpr(gv(RC_FLOAT));
1867       }
1868       break;
1869     default:
1870       if(op >= TOK_ULT && op <= TOK_GT) {
1871         x|=0xd0f110; // cmfe
1872 /* bug (intention?) in Linux FPU emulator
1873    doesn't set carry if equal */
1874         switch(op) {
1875           case TOK_ULT:
1876           case TOK_UGE:
1877           case TOK_ULE:
1878           case TOK_UGT:
1879             tcc_error("unsigned comparison on floats?");
1880             break;
1881           case TOK_LT:
1882             op=TOK_Nset;
1883             break;
1884           case TOK_LE:
1885             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1886             break;
1887           case TOK_EQ:
1888           case TOK_NE:
1889             x&=~0x400000; // cmfe -> cmf
1890             break;
1891         }
1892         if(c1 && !c2) {
1893           c2=c1;
1894           vswap();
1895           switch(op) {
1896             case TOK_Nset:
1897               op=TOK_GT;
1898               break;
1899             case TOK_GE:
1900               op=TOK_ULE;
1901               break;
1902             case TOK_ULE:
1903               op=TOK_GE;
1904               break;
1905             case TOK_GT:
1906               op=TOK_Nset;
1907               break;
1908           }
1909         }
1910         vswap();
1911         r=fpr(gv(RC_FLOAT));
1912         vswap();
1913         if(c2) {
1914           if(c2>0xf)
1915             x|=0x200000;
1916           r2=c2&0xf;
1917         } else {
1918           r2=fpr(gv(RC_FLOAT));
1919         }
1920         vtop[-1].r = VT_CMP;
1921         vtop[-1].c.i = op;
1922       } else {
1923         tcc_error("unknown fp op %x!",op);
1924         return;
1925       }
1926   }
1927   if(vtop[-1].r == VT_CMP)
1928     c1=15;
1929   else {
1930     c1=vtop->r;
1931     if(r2&0x8)
1932       c1=vtop[-1].r;
1933     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1934     c1=fpr(vtop[-1].r);
1935   }
1936   vtop--;
1937   o(x|(r<<16)|(c1<<12)|r2);
1938 }
1939 #endif
1940
1941 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1942    and 'long long' cases. */
1943 ST_FUNC void gen_cvt_itof1(int t)
1944 {
1945   uint32_t r, r2;
1946   int bt;
1947   bt=vtop->type.t & VT_BTYPE;
1948   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1949 #ifndef TCC_ARM_VFP
1950     uint32_t dsize = 0;
1951 #endif
1952     r=intr(gv(RC_INT));
1953 #ifdef TCC_ARM_VFP
1954     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1955     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1956     r2|=r2<<12;
1957     if(!(vtop->type.t & VT_UNSIGNED))
1958       r2|=0x80;                /* fuitoX -> fsituX */
1959     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1960 #else
1961     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1962     if((t & VT_BTYPE) != VT_FLOAT)
1963       dsize=0x80;    /* flts -> fltd */
1964     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1965     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1966       uint32_t off = 0;
1967       o(0xE3500000|(r<<12));        /* cmp */
1968       r=fpr(get_reg(RC_FLOAT));
1969       if(last_itod_magic) {
1970         off=ind+8-last_itod_magic;
1971         off/=4;
1972         if(off>255)
1973           off=0;
1974       }
1975       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1976       if(!off) {
1977         o(0xEA000000);              /* b */
1978         last_itod_magic=ind;
1979         o(0x4F800000);              /* 4294967296.0f */
1980       }
1981       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1982     }
1983 #endif
1984     return;
1985   } else if(bt == VT_LLONG) {
1986     int func;
1987     CType *func_type = 0;
1988     if((t & VT_BTYPE) == VT_FLOAT) {
1989       func_type = &func_float_type;
1990       if(vtop->type.t & VT_UNSIGNED)
1991         func=TOK___floatundisf;
1992       else
1993         func=TOK___floatdisf;
1994 #if LDOUBLE_SIZE != 8
1995     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1996       func_type = &func_ldouble_type;
1997       if(vtop->type.t & VT_UNSIGNED)
1998         func=TOK___floatundixf;
1999       else
2000         func=TOK___floatdixf;
2001     } else if((t & VT_BTYPE) == VT_DOUBLE) {
2002 #else
2003     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
2004 #endif
2005       func_type = &func_double_type;
2006       if(vtop->type.t & VT_UNSIGNED)
2007         func=TOK___floatundidf;
2008       else
2009         func=TOK___floatdidf;
2010     }
2011     if(func_type) {
2012       vpush_global_sym(func_type, func);
2013       vswap();
2014       gfunc_call(1);
2015       vpushi(0);
2016       vtop->r=TREG_F0;
2017       return;
2018     }
2019   }
2020   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2021 }
2022
2023 /* convert fp to int 't' type */
2024 void gen_cvt_ftoi(int t)
2025 {
2026   uint32_t r, r2;
2027   int u, func = 0;
2028   u=t&VT_UNSIGNED;
2029   t&=VT_BTYPE;
2030   r2=vtop->type.t & VT_BTYPE;
2031   if(t==VT_INT) {
2032 #ifdef TCC_ARM_VFP
2033     r=vfpr(gv(RC_FLOAT));
2034     u=u?0:0x10000;
2035     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2036     r2=intr(vtop->r=get_reg(RC_INT));
2037     o(0xEE100A10|(r<<16)|(r2<<12));
2038     return;
2039 #else
2040     if(u) {
2041       if(r2 == VT_FLOAT)
2042         func=TOK___fixunssfsi;
2043 #if LDOUBLE_SIZE != 8
2044       else if(r2 == VT_LDOUBLE)
2045         func=TOK___fixunsxfsi;
2046       else if(r2 == VT_DOUBLE)
2047 #else
2048       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2049 #endif
2050         func=TOK___fixunsdfsi;
2051     } else {
2052       r=fpr(gv(RC_FLOAT));
2053       r2=intr(vtop->r=get_reg(RC_INT));
2054       o(0xEE100170|(r2<<12)|r);
2055       return;
2056     }
2057 #endif
2058   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2059     if(r2 == VT_FLOAT)
2060       func=TOK___fixsfdi;
2061 #if LDOUBLE_SIZE != 8
2062     else if(r2 == VT_LDOUBLE)
2063       func=TOK___fixxfdi;
2064     else if(r2 == VT_DOUBLE)
2065 #else
2066     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2067 #endif
2068       func=TOK___fixdfdi;
2069   }
2070   if(func) {
2071     vpush_global_sym(&func_old_type, func);
2072     vswap();
2073     gfunc_call(1);
2074     vpushi(0);
2075     if(t == VT_LLONG)
2076       vtop->r2 = REG_LRET;
2077     vtop->r = REG_IRET;
2078     return;
2079   }
2080   tcc_error("unimplemented gen_cvt_ftoi!");
2081 }
2082
2083 /* convert from one floating point type to another */
2084 void gen_cvt_ftof(int t)
2085 {
2086 #ifdef TCC_ARM_VFP
2087   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2088     uint32_t r = vfpr(gv(RC_FLOAT));
2089     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2090   }
2091 #else
2092   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2093   gv(RC_FLOAT);
2094 #endif
2095 }
2096
2097 /* computed goto support */
2098 void ggoto(void)
2099 {
2100   gcall_or_jmp(1);
2101   vtop--;
2102 }
2103
2104 /* Save the stack pointer onto the stack and return the location of its address */
2105 ST_FUNC void gen_vla_sp_save(int addr) {
2106     SValue v;
2107     v.type.t = VT_PTR;
2108     v.r = VT_LOCAL | VT_LVAL;
2109     v.c.i = addr;
2110     store(TREG_SP, &v);
2111 }
2112
2113 /* Restore the SP from a location on the stack */
2114 ST_FUNC void gen_vla_sp_restore(int addr) {
2115     SValue v;
2116     v.type.t = VT_PTR;
2117     v.r = VT_LOCAL | VT_LVAL;
2118     v.c.i = addr;
2119     load(TREG_SP, &v);
2120 }
2121
2122 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2123 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2124     int r = intr(gv(RC_INT));
2125     o(0xE04D0000|(r<<12)|r); /* sub r, sp, r */
2126 #ifdef TCC_ARM_EABI
2127     if (align < 8)
2128         align = 8;
2129 #else
2130     if (align < 4)
2131         align = 4;
2132 #endif
2133     if (align & (align - 1))
2134         tcc_error("alignment is not a power of 2: %i", align);
2135     o(stuff_const(0xE3C0D000|(r<<16), align - 1)); /* bic sp, r, #align-1 */
2136     vpop();
2137 }
2138
2139 /* end of ARM code generator */
2140 /*************************************************************/
2141 #endif
2142 /*************************************************************/