arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_ARM_VERSION
  38 # define TCC_ARM_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_LRET    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82     TREG_SP = 13,
  83     TREG_LR,
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 #else /* ! TARGET_DEFS_ONLY */
 134 /******************************************************/
 135 #include "tcc.h"
 136
 137 enum float_abi float_abi;
 138
 139 ST_DATA const int reg_classes[NB_REGS] = {
 140     /* r0 */ RC_INT | RC_R0,
 141     /* r1 */ RC_INT | RC_R1,
 142     /* r2 */ RC_INT | RC_R2,
 143     /* r3 */ RC_INT | RC_R3,
 144     /* r12 */ RC_INT | RC_R12,
 145     /* f0 */ RC_FLOAT | RC_F0,
 146     /* f1 */ RC_FLOAT | RC_F1,
 147     /* f2 */ RC_FLOAT | RC_F2,
 148     /* f3 */ RC_FLOAT | RC_F3,
 149 #ifdef TCC_ARM_VFP
 150  /* d4/s8 */ RC_FLOAT | RC_F4,
 151 /* d5/s10 */ RC_FLOAT | RC_F5,
 152 /* d6/s12 */ RC_FLOAT | RC_F6,
 153 /* d7/s14 */ RC_FLOAT | RC_F7,
 154 #endif
 155 };
 156
 157 static int func_sub_sp_offset, last_itod_magic;
 158 static int leaffunc;
 159
 160 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 161 static CType float_type, double_type, func_float_type, func_double_type;
 162 ST_FUNC void arm_init(struct TCCState *s)
 163 {
 164     float_type.t = VT_FLOAT;
 165     double_type.t = VT_DOUBLE;
 166     func_float_type.t = VT_FUNC;
 167     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 168     func_double_type.t = VT_FUNC;
 169     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 170
 171     float_abi = s->float_abi;
 172 #ifndef TCC_ARM_HARDFLOAT
 173     tcc_warning("soft float ABI currently not supported: default to softfp");
 174 #endif
 175 }
 176 #else
 177 #define func_float_type func_old_type
 178 #define func_double_type func_old_type
 179 #define func_ldouble_type func_old_type
 180 ST_FUNC void arm_init(struct TCCState *s)
 181 {
 182 #if !defined (TCC_ARM_VFP)
 183     tcc_warning("Support for FPA is deprecated and will be removed in next"
 184                 " release");
 185 #endif
 186 #if !defined (TCC_ARM_EABI)
 187     tcc_warning("Support for OABI is deprecated and will be removed in next"
 188                 " release");
 189 #endif
 190 }
 191 #endif
 192
 193 static int two2mask(int a,int b) {
 194   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 195 }
 196
 197 static int regmask(int r) {
 198   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 199 }
 200
 201 /******************************************************/
 202
 203 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
 204 char *default_elfinterp(struct TCCState *s)
 205 {
 206     if (s->float_abi == ARM_HARD_FLOAT)
 207         return "/lib/ld-linux-armhf.so.3";
 208     else
 209         return "/lib/ld-linux.so.3";
 210 }
 211 #endif
 212
 213 void o(uint32_t i)
 214 {
 215   /* this is a good place to start adding big-endian support*/
 216   int ind1;
 217   if (nocode_wanted)
 218     return;
 219   ind1 = ind + 4;
 220   if (!cur_text_section)
 221     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 222          "can't evaluate constant expressions outside of a function.");
 223   if (ind1 > cur_text_section->data_allocated)
 224     section_realloc(cur_text_section, ind1);
 225   cur_text_section->data[ind++] = i&255;
 226   i>>=8;
 227   cur_text_section->data[ind++] = i&255;
 228   i>>=8;
 229   cur_text_section->data[ind++] = i&255;
 230   i>>=8;
 231   cur_text_section->data[ind++] = i;
 232 }
 233
 234 static uint32_t stuff_const(uint32_t op, uint32_t c)
 235 {
 236   int try_neg=0;
 237   uint32_t nc = 0, negop = 0;
 238
 239   switch(op&0x1F00000)
 240   {
 241     case 0x800000: //add
 242     case 0x400000: //sub
 243       try_neg=1;
 244       negop=op^0xC00000;
 245       nc=-c;
 246       break;
 247     case 0x1A00000: //mov
 248     case 0x1E00000: //mvn
 249       try_neg=1;
 250       negop=op^0x400000;
 251       nc=~c;
 252       break;
 253     case 0x200000: //xor
 254       if(c==~0)
 255         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 256       break;
 257     case 0x0: //and
 258       if(c==~0)
 259         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 260     case 0x1C00000: //bic
 261       try_neg=1;
 262       negop=op^0x1C00000;
 263       nc=~c;
 264       break;
 265     case 0x1800000: //orr
 266       if(c==~0)
 267         return (op&0xFFF0FFFF)|0x1E00000;
 268       break;
 269   }
 270   do {
 271     uint32_t m;
 272     int i;
 273     if(c<256) /* catch undefined <<32 */
 274       return op|c;
 275     for(i=2;i<32;i+=2) {
 276       m=(0xff>>i)|(0xff<<(32-i));
 277       if(!(c&~m))
 278         return op|(i<<7)|(c<<i)|(c>>(32-i));
 279     }
 280     op=negop;
 281     c=nc;
 282   } while(try_neg--);
 283   return 0;
 284 }
 285
 286
 287 //only add,sub
 288 void stuff_const_harder(uint32_t op, uint32_t v) {
 289   uint32_t x;
 290   x=stuff_const(op,v);
 291   if(x)
 292     o(x);
 293   else {
 294     uint32_t a[16], nv, no, o2, n2;
 295     int i,j,k;
 296     a[0]=0xff;
 297     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 298     for(i=1;i<16;i++)
 299       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 300     for(i=0;i<12;i++)
 301       for(j=i<4?i+12:15;j>=i+4;j--)
 302         if((v&(a[i]|a[j]))==v) {
 303           o(stuff_const(op,v&a[i]));
 304           o(stuff_const(o2,v&a[j]));
 305           return;
 306         }
 307     no=op^0xC00000;
 308     n2=o2^0xC00000;
 309     nv=-v;
 310     for(i=0;i<12;i++)
 311       for(j=i<4?i+12:15;j>=i+4;j--)
 312         if((nv&(a[i]|a[j]))==nv) {
 313           o(stuff_const(no,nv&a[i]));
 314           o(stuff_const(n2,nv&a[j]));
 315           return;
 316         }
 317     for(i=0;i<8;i++)
 318       for(j=i+4;j<12;j++)
 319         for(k=i<4?i+12:15;k>=j+4;k--)
 320           if((v&(a[i]|a[j]|a[k]))==v) {
 321             o(stuff_const(op,v&a[i]));
 322             o(stuff_const(o2,v&a[j]));
 323             o(stuff_const(o2,v&a[k]));
 324             return;
 325           }
 326     no=op^0xC00000;
 327     nv=-v;
 328     for(i=0;i<8;i++)
 329       for(j=i+4;j<12;j++)
 330         for(k=i<4?i+12:15;k>=j+4;k--)
 331           if((nv&(a[i]|a[j]|a[k]))==nv) {
 332             o(stuff_const(no,nv&a[i]));
 333             o(stuff_const(n2,nv&a[j]));
 334             o(stuff_const(n2,nv&a[k]));
 335             return;
 336           }
 337     o(stuff_const(op,v&a[0]));
 338     o(stuff_const(o2,v&a[4]));
 339     o(stuff_const(o2,v&a[8]));
 340     o(stuff_const(o2,v&a[12]));
 341   }
 342 }
 343
 344 uint32_t encbranch(int pos, int addr, int fail)
 345 {
 346   addr-=pos+8;
 347   addr/=4;
 348   if(addr>=0x1000000 || addr<-0x1000000) {
 349     if(fail)
 350       tcc_error("FIXME: function bigger than 32MB");
 351     return 0;
 352   }
 353   return 0x0A000000|(addr&0xffffff);
 354 }
 355
 356 int decbranch(int pos)
 357 {
 358   int x;
 359   x=*(uint32_t *)(cur_text_section->data + pos);
 360   x&=0x00ffffff;
 361   if(x&0x800000)
 362     x-=0x1000000;
 363   return x*4+pos+8;
 364 }
 365
 366 /* output a symbol and patch all calls to it */
 367 void gsym_addr(int t, int a)
 368 {
 369   uint32_t *x;
 370   int lt;
 371   while(t) {
 372     x=(uint32_t *)(cur_text_section->data + t);
 373     t=decbranch(lt=t);
 374     if(a==lt+4)
 375       *x=0xE1A00000; // nop
 376     else {
 377       *x &= 0xff000000;
 378       *x |= encbranch(lt,a,1);
 379     }
 380   }
 381 }
 382
 383 void gsym(int t)
 384 {
 385   gsym_addr(t, ind);
 386 }
 387
 388 #ifdef TCC_ARM_VFP
 389 static uint32_t vfpr(int r)
 390 {
 391   if(r<TREG_F0 || r>TREG_F7)
 392     tcc_error("compiler error! register %i is no vfp register",r);
 393   return r - TREG_F0;
 394 }
 395 #else
 396 static uint32_t fpr(int r)
 397 {
 398   if(r<TREG_F0 || r>TREG_F3)
 399     tcc_error("compiler error! register %i is no fpa register",r);
 400   return r - TREG_F0;
 401 }
 402 #endif
 403
 404 static uint32_t intr(int r)
 405 {
 406   if(r == TREG_R12)
 407     return 12;
 408   if(r >= TREG_R0 && r <= TREG_R3)
 409     return r - TREG_R0;
 410   if (r >= TREG_SP && r <= TREG_LR)
 411     return r + (13 - TREG_SP);
 412   tcc_error("compiler error! register %i is no int register",r);
 413 }
 414
 415 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 416 {
 417   if(*off>maxoff || *off&((1<<shift)-1)) {
 418     uint32_t x, y;
 419     x=0xE280E000;
 420     if(*sgn)
 421       x=0xE240E000;
 422     x|=(*base)<<16;
 423     *base=14; // lr
 424     y=stuff_const(x,*off&~maxoff);
 425     if(y) {
 426       o(y);
 427       *off&=maxoff;
 428       return;
 429     }
 430     y=stuff_const(x,(*off+maxoff)&~maxoff);
 431     if(y) {
 432       o(y);
 433       *sgn=!*sgn;
 434       *off=((*off+maxoff)&~maxoff)-*off;
 435       return;
 436     }
 437     stuff_const_harder(x,*off&~maxoff);
 438     *off&=maxoff;
 439   }
 440 }
 441
 442 static uint32_t mapcc(int cc)
 443 {
 444   switch(cc)
 445   {
 446     case TOK_ULT:
 447       return 0x30000000; /* CC/LO */
 448     case TOK_UGE:
 449       return 0x20000000; /* CS/HS */
 450     case TOK_EQ:
 451       return 0x00000000; /* EQ */
 452     case TOK_NE:
 453       return 0x10000000; /* NE */
 454     case TOK_ULE:
 455       return 0x90000000; /* LS */
 456     case TOK_UGT:
 457       return 0x80000000; /* HI */
 458     case TOK_Nset:
 459       return 0x40000000; /* MI */
 460     case TOK_Nclear:
 461       return 0x50000000; /* PL */
 462     case TOK_LT:
 463       return 0xB0000000; /* LT */
 464     case TOK_GE:
 465       return 0xA0000000; /* GE */
 466     case TOK_LE:
 467       return 0xD0000000; /* LE */
 468     case TOK_GT:
 469       return 0xC0000000; /* GT */
 470   }
 471   tcc_error("unexpected condition code");
 472   return 0xE0000000; /* AL */
 473 }
 474
 475 static int negcc(int cc)
 476 {
 477   switch(cc)
 478   {
 479     case TOK_ULT:
 480       return TOK_UGE;
 481     case TOK_UGE:
 482       return TOK_ULT;
 483     case TOK_EQ:
 484       return TOK_NE;
 485     case TOK_NE:
 486       return TOK_EQ;
 487     case TOK_ULE:
 488       return TOK_UGT;
 489     case TOK_UGT:
 490       return TOK_ULE;
 491     case TOK_Nset:
 492       return TOK_Nclear;
 493     case TOK_Nclear:
 494       return TOK_Nset;
 495     case TOK_LT:
 496       return TOK_GE;
 497     case TOK_GE:
 498       return TOK_LT;
 499     case TOK_LE:
 500       return TOK_GT;
 501     case TOK_GT:
 502       return TOK_LE;
 503   }
 504   tcc_error("unexpected condition code");
 505   return TOK_NE;
 506 }
 507
 508 /* load 'r' from value 'sv' */
 509 void load(int r, SValue *sv)
 510 {
 511   int v, ft, fc, fr, sign;
 512   uint32_t op;
 513   SValue v1;
 514
 515   fr = sv->r;
 516   ft = sv->type.t;
 517   fc = sv->c.i;
 518
 519   if(fc>=0)
 520     sign=0;
 521   else {
 522     sign=1;
 523     fc=-fc;
 524   }
 525
 526   v = fr & VT_VALMASK;
 527   if (fr & VT_LVAL) {
 528     uint32_t base = 0xB; // fp
 529     if(v == VT_LLOCAL) {
 530       v1.type.t = VT_PTR;
 531       v1.r = VT_LOCAL | VT_LVAL;
 532       v1.c.i = sv->c.i;
 533       load(TREG_LR, &v1);
 534       base = 14; /* lr */
 535       fc=sign=0;
 536       v=VT_LOCAL;
 537     } else if(v == VT_CONST) {
 538       v1.type.t = VT_PTR;
 539       v1.r = fr&~VT_LVAL;
 540       v1.c.i = sv->c.i;
 541       v1.sym=sv->sym;
 542       load(TREG_LR, &v1);
 543       base = 14; /* lr */
 544       fc=sign=0;
 545       v=VT_LOCAL;
 546     } else if(v < VT_CONST) {
 547       base=intr(v);
 548       fc=sign=0;
 549       v=VT_LOCAL;
 550     }
 551     if(v == VT_LOCAL) {
 552       if(is_float(ft)) {
 553         calcaddr(&base,&fc,&sign,1020,2);
 554 #ifdef TCC_ARM_VFP
 555         op=0xED100A00; /* flds */
 556         if(!sign)
 557           op|=0x800000;
 558         if ((ft & VT_BTYPE) != VT_FLOAT)
 559           op|=0x100;   /* flds -> fldd */
 560         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 561 #else
 562         op=0xED100100;
 563         if(!sign)
 564           op|=0x800000;
 565 #if LDOUBLE_SIZE == 8
 566         if ((ft & VT_BTYPE) != VT_FLOAT)
 567           op|=0x8000;
 568 #else
 569         if ((ft & VT_BTYPE) == VT_DOUBLE)
 570           op|=0x8000;
 571         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 572           op|=0x400000;
 573 #endif
 574         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 575 #endif
 576       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 577                 || (ft & VT_BTYPE) == VT_SHORT) {
 578         calcaddr(&base,&fc,&sign,255,0);
 579         op=0xE1500090;
 580         if ((ft & VT_BTYPE) == VT_SHORT)
 581           op|=0x20;
 582         if ((ft & VT_UNSIGNED) == 0)
 583           op|=0x40;
 584         if(!sign)
 585           op|=0x800000;
 586         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 587       } else {
 588         calcaddr(&base,&fc,&sign,4095,0);
 589         op=0xE5100000;
 590         if(!sign)
 591           op|=0x800000;
 592         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 593           op|=0x400000;
 594         o(op|(intr(r)<<12)|fc|(base<<16));
 595       }
 596       return;
 597     }
 598   } else {
 599     if (v == VT_CONST) {
 600       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.i);
 601       if (fr & VT_SYM || !op) {
 602         o(0xE59F0000|(intr(r)<<12));
 603         o(0xEA000000);
 604         if(fr & VT_SYM)
 605           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 606         o(sv->c.i);
 607       } else
 608         o(op);
 609       return;
 610     } else if (v == VT_LOCAL) {
 611       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.i);
 612       if (fr & VT_SYM || !op) {
 613         o(0xE59F0000|(intr(r)<<12));
 614         o(0xEA000000);
 615         if(fr & VT_SYM) // needed ?
 616           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 617         o(sv->c.i);
 618         o(0xE08B0000|(intr(r)<<12)|intr(r));
 619       } else
 620         o(op);
 621       return;
 622     } else if(v == VT_CMP) {
 623       o(mapcc(sv->c.i)|0x3A00001|(intr(r)<<12));
 624       o(mapcc(negcc(sv->c.i))|0x3A00000|(intr(r)<<12));
 625       return;
 626     } else if (v == VT_JMP || v == VT_JMPI) {
 627       int t;
 628       t = v & 1;
 629       o(0xE3A00000|(intr(r)<<12)|t);
 630       o(0xEA000000);
 631       gsym(sv->c.i);
 632       o(0xE3A00000|(intr(r)<<12)|(t^1));
 633       return;
 634     } else if (v < VT_CONST) {
 635       if(is_float(ft))
 636 #ifdef TCC_ARM_VFP
 637         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 638 #else
 639         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 640 #endif
 641       else
 642         o(0xE1A00000|(intr(r)<<12)|intr(v));
 643       return;
 644     }
 645   }
 646   tcc_error("load unimplemented!");
 647 }
 648
 649 /* store register 'r' in lvalue 'v' */
 650 void store(int r, SValue *sv)
 651 {
 652   SValue v1;
 653   int v, ft, fc, fr, sign;
 654   uint32_t op;
 655
 656   fr = sv->r;
 657   ft = sv->type.t;
 658   fc = sv->c.i;
 659
 660   if(fc>=0)
 661     sign=0;
 662   else {
 663     sign=1;
 664     fc=-fc;
 665   }
 666
 667   v = fr & VT_VALMASK;
 668   if (fr & VT_LVAL || fr == VT_LOCAL) {
 669     uint32_t base = 0xb; /* fp */
 670     if(v < VT_CONST) {
 671       base=intr(v);
 672       v=VT_LOCAL;
 673       fc=sign=0;
 674     } else if(v == VT_CONST) {
 675       v1.type.t = ft;
 676       v1.r = fr&~VT_LVAL;
 677       v1.c.i = sv->c.i;
 678       v1.sym=sv->sym;
 679       load(TREG_LR, &v1);
 680       base = 14; /* lr */
 681       fc=sign=0;
 682       v=VT_LOCAL;
 683     }
 684     if(v == VT_LOCAL) {
 685        if(is_float(ft)) {
 686         calcaddr(&base,&fc,&sign,1020,2);
 687 #ifdef TCC_ARM_VFP
 688         op=0xED000A00; /* fsts */
 689         if(!sign)
 690           op|=0x800000;
 691         if ((ft & VT_BTYPE) != VT_FLOAT)
 692           op|=0x100;   /* fsts -> fstd */
 693         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 694 #else
 695         op=0xED000100;
 696         if(!sign)
 697           op|=0x800000;
 698 #if LDOUBLE_SIZE == 8
 699         if ((ft & VT_BTYPE) != VT_FLOAT)
 700           op|=0x8000;
 701 #else
 702         if ((ft & VT_BTYPE) == VT_DOUBLE)
 703           op|=0x8000;
 704         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 705           op|=0x400000;
 706 #endif
 707         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 708 #endif
 709         return;
 710       } else if((ft & VT_BTYPE) == VT_SHORT) {
 711         calcaddr(&base,&fc,&sign,255,0);
 712         op=0xE14000B0;
 713         if(!sign)
 714           op|=0x800000;
 715         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 716       } else {
 717         calcaddr(&base,&fc,&sign,4095,0);
 718         op=0xE5000000;
 719         if(!sign)
 720           op|=0x800000;
 721         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 722           op|=0x400000;
 723         o(op|(intr(r)<<12)|fc|(base<<16));
 724       }
 725       return;
 726     }
 727   }
 728   tcc_error("store unimplemented");
 729 }
 730
 731 static void gadd_sp(int val)
 732 {
 733   stuff_const_harder(0xE28DD000,val);
 734 }
 735
 736 /* 'is_jmp' is '1' if it is a jump */
 737 static void gcall_or_jmp(int is_jmp)
 738 {
 739   int r;
 740   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 741     uint32_t x;
 742     /* constant case */
 743     x=encbranch(ind,ind+vtop->c.i,0);
 744     if(x) {
 745       if (vtop->r & VT_SYM) {
 746         /* relocation case */
 747         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 748       } else
 749         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 750       o(x|(is_jmp?0xE0000000:0xE1000000));
 751     } else {
 752       if(!is_jmp)
 753         o(0xE28FE004); // add lr,pc,#4
 754       o(0xE51FF004);   // ldr pc,[pc,#-4]
 755       if (vtop->r & VT_SYM)
 756         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 757       o(vtop->c.i);
 758     }
 759   } else {
 760     /* otherwise, indirect call */
 761     r = gv(RC_INT);
 762     if(!is_jmp)
 763       o(0xE1A0E00F);       // mov lr,pc
 764     o(0xE1A0F000|intr(r)); // mov pc,r
 765   }
 766 }
 767
 768 static int unalias_ldbl(int btype)
 769 {
 770 #if LDOUBLE_SIZE == 8
 771     if (btype == VT_LDOUBLE)
 772       btype = VT_DOUBLE;
 773 #endif
 774     return btype;
 775 }
 776
 777 /* Return whether a structure is an homogeneous float aggregate or not.
 778    The answer is true if all the elements of the structure are of the same
 779    primitive float type and there is less than 4 elements.
 780
 781    type: the type corresponding to the structure to be tested */
 782 static int is_hgen_float_aggr(CType *type)
 783 {
 784   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 785     struct Sym *ref;
 786     int btype, nb_fields = 0;
 787
 788     ref = type->ref->next;
 789     btype = unalias_ldbl(ref->type.t & VT_BTYPE);
 790     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 791       for(; ref && btype == unalias_ldbl(ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 792       return !ref && nb_fields <= 4;
 793     }
 794   }
 795   return 0;
 796 }
 797
 798 struct avail_regs {
 799   signed char avail[3]; /* 3 holes max with only float and double alignments */
 800   int first_hole; /* first available hole */
 801   int last_hole; /* last available hole (none if equal to first_hole) */
 802   int first_free_reg; /* next free register in the sequence, hole excluded */
 803 };
 804
 805 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 806
 807 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 808    param) according to the rules described in the procedure call standard for
 809    the ARM architecture (AAPCS). If found, the registers are assigned to this
 810    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 811    and the parameter is a single float.
 812
 813    avregs: opaque structure to keep track of available VFP co-processor regs
 814    align: alignment contraints for the param, as returned by type_size()
 815    size: size of the parameter, as returned by type_size() */
 816 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 817 {
 818   int first_reg = 0;
 819
 820   if (avregs->first_free_reg == -1)
 821     return -1;
 822   if (align >> 3) { /* double alignment */
 823     first_reg = avregs->first_free_reg;
 824     /* alignment contraint not respected so use next reg and record hole */
 825     if (first_reg & 1)
 826       avregs->avail[avregs->last_hole++] = first_reg++;
 827   } else { /* no special alignment (float or array of float) */
 828     /* if single float and a hole is available, assign the param to it */
 829     if (size == 4 && avregs->first_hole != avregs->last_hole)
 830       return avregs->avail[avregs->first_hole++];
 831     else
 832       first_reg = avregs->first_free_reg;
 833   }
 834   if (first_reg + size / 4 <= 16) {
 835     avregs->first_free_reg = first_reg + size / 4;
 836     return first_reg;
 837   }
 838   avregs->first_free_reg = -1;
 839   return -1;
 840 }
 841
 842 /* Returns whether all params need to be passed in core registers or not.
 843    This is the case for function part of the runtime ABI. */
 844 int floats_in_core_regs(SValue *sval)
 845 {
 846   if (!sval->sym)
 847     return 0;
 848
 849   switch (sval->sym->v) {
 850     case TOK___floatundisf:
 851     case TOK___floatundidf:
 852     case TOK___fixunssfdi:
 853     case TOK___fixunsdfdi:
 854 #ifndef TCC_ARM_VFP
 855     case TOK___fixunsxfdi:
 856 #endif
 857     case TOK___floatdisf:
 858     case TOK___floatdidf:
 859     case TOK___fixsfdi:
 860     case TOK___fixdfdi:
 861       return 1;
 862
 863     default:
 864       return 0;
 865   }
 866 }
 867
 868 /* Return the number of registers needed to return the struct, or 0 if
 869    returning via struct pointer. */
 870 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) {
 871 #ifdef TCC_ARM_EABI
 872     int size, align;
 873     size = type_size(vt, &align);
 874     if (float_abi == ARM_HARD_FLOAT && !variadic &&
 875         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
 876         *ret_align = 8;
 877         *regsize = 8;
 878         ret->ref = NULL;
 879         ret->t = VT_DOUBLE;
 880         return (size + 7) >> 3;
 881     } else if (size <= 4) {
 882         *ret_align = 4;
 883         *regsize = 4;
 884         ret->ref = NULL;
 885         ret->t = VT_INT;
 886         return 1;
 887     } else
 888         return 0;
 889 #else
 890     return 0;
 891 #endif
 892 }
 893
 894 /* Parameters are classified according to how they are copied to their final
 895    destination for the function call. Because the copying is performed class
 896    after class according to the order in the union below, it is important that
 897    some constraints about the order of the members of this union are respected:
 898    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 899    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 900      VFP_STRUCT_CLASS;
 901    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 902    See the comment for the main loop in copy_params() for the reason. */
 903 enum reg_class {
 904         STACK_CLASS = 0,
 905         CORE_STRUCT_CLASS,
 906         VFP_CLASS,
 907         VFP_STRUCT_CLASS,
 908         CORE_CLASS,
 909         NB_CLASSES
 910 };
 911
 912 struct param_plan {
 913     int start; /* first reg or addr used depending on the class */
 914     int end; /* last reg used or next free addr depending on the class */
 915     SValue *sval; /* pointer to SValue on the value stack */
 916     struct param_plan *prev; /*  previous element in this class */
 917 };
 918
 919 struct plan {
 920     struct param_plan *pplans; /* array of all the param plans */
 921     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
 922 };
 923
 924 #define add_param_plan(plan,pplan,class)                        \
 925     do {                                                        \
 926         pplan.prev = plan->clsplans[class];                     \
 927         plan->pplans[plan ## _nb] = pplan;                      \
 928         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
 929     } while(0)
 930
 931 /* Assign parameters to registers and stack with alignment according to the
 932    rules in the procedure call standard for the ARM architecture (AAPCS).
 933    The overall assignment is recorded in an array of per parameter structures
 934    called parameter plans. The parameter plans are also further organized in a
 935    number of linked lists, one per class of parameter (see the comment for the
 936    definition of union reg_class).
 937
 938    nb_args: number of parameters of the function for which a call is generated
 939    float_abi: float ABI in use for this function call
 940    plan: the structure where the overall assignment is recorded
 941    todo: a bitmap that record which core registers hold a parameter
 942
 943    Returns the amount of stack space needed for parameter passing
 944
 945    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 946    is the responsibility of the caller to free this array once used (ie not
 947    before copy_params). */
 948 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
 949 {
 950   int i, size, align;
 951   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
 952   int plan_nb = 0;
 953   struct param_plan pplan;
 954   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 955
 956   ncrn = nsaa = 0;
 957   *todo = 0;
 958   plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 959   memset(plan->clsplans, 0, sizeof(plan->clsplans));
 960   for(i = nb_args; i-- ;) {
 961     int j, start_vfpreg = 0;
 962     CType type = vtop[-i].type;
 963     type.t &= ~VT_ARRAY;
 964     size = type_size(&type, &align);
 965     size = (size + 3) & ~3;
 966     align = (align + 3) & ~3;
 967     switch(vtop[-i].type.t & VT_BTYPE) {
 968       case VT_STRUCT:
 969       case VT_FLOAT:
 970       case VT_DOUBLE:
 971       case VT_LDOUBLE:
 972       if (float_abi == ARM_HARD_FLOAT) {
 973         int is_hfa = 0; /* Homogeneous float aggregate */
 974
 975         if (is_float(vtop[-i].type.t)
 976             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
 977           int end_vfpreg;
 978
 979           start_vfpreg = assign_vfpreg(&avregs, align, size);
 980           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
 981           if (start_vfpreg >= 0) {
 982             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
 983             if (is_hfa)
 984               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
 985             else
 986               add_param_plan(plan, pplan, VFP_CLASS);
 987             continue;
 988           } else
 989             break;
 990         }
 991       }
 992       ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
 993       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
 994         /* The parameter is allocated both in core register and on stack. As
 995          * such, it can be of either class: it would either be the last of
 996          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
 997         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
 998           *todo|=(1<<j);
 999         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
1000         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1001         ncrn += size/4;
1002         if (ncrn > 4)
1003           nsaa = (ncrn - 4) * 4;
1004       } else {
1005         ncrn = 4;
1006         break;
1007       }
1008       continue;
1009       default:
1010       if (ncrn < 4) {
1011         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1012
1013         if (is_long) {
1014           ncrn = (ncrn + 1) & -2;
1015           if (ncrn == 4)
1016             break;
1017         }
1018         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1019         ncrn++;
1020         if (is_long)
1021           pplan.end = ncrn++;
1022         add_param_plan(plan, pplan, CORE_CLASS);
1023         continue;
1024       }
1025     }
1026     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1027     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1028     add_param_plan(plan, pplan, STACK_CLASS);
1029     nsaa += size; /* size already rounded up before */
1030   }
1031   return nsaa;
1032 }
1033
1034 #undef add_param_plan
1035
1036 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1037    function call.
1038
1039    nb_args: number of parameters the function take
1040    plan: the overall assignment plan for parameters
1041    todo: a bitmap indicating what core reg will hold a parameter
1042
1043    Returns the number of SValue added by this function on the value stack */
1044 static int copy_params(int nb_args, struct plan *plan, int todo)
1045 {
1046   int size, align, r, i, nb_extra_sval = 0;
1047   struct param_plan *pplan;
1048   int pass = 0;
1049
1050    /* Several constraints require parameters to be copied in a specific order:
1051       - structures are copied to the stack before being loaded in a reg;
1052       - floats loaded to an odd numbered VFP reg are first copied to the
1053         preceding even numbered VFP reg and then moved to the next VFP reg.
1054
1055       It is thus important that:
1056       - structures assigned to core regs must be copied after parameters
1057         assigned to the stack but before structures assigned to VFP regs because
1058         a structure can lie partly in core registers and partly on the stack;
1059       - parameters assigned to the stack and all structures be copied before
1060         parameters assigned to a core reg since copying a parameter to the stack
1061         require using a core reg;
1062       - parameters assigned to VFP regs be copied before structures assigned to
1063         VFP regs as the copy might use an even numbered VFP reg that already
1064         holds part of a structure. */
1065 again:
1066   for(i = 0; i < NB_CLASSES; i++) {
1067     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1068
1069       if (pass
1070           && (i != CORE_CLASS || pplan->sval->r < VT_CONST))
1071         continue;
1072
1073       vpushv(pplan->sval);
1074       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1075       switch(i) {
1076         case STACK_CLASS:
1077         case CORE_STRUCT_CLASS:
1078         case VFP_STRUCT_CLASS:
1079           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1080             int padding = 0;
1081             size = type_size(&pplan->sval->type, &align);
1082             /* align to stack align size */
1083             size = (size + 3) & ~3;
1084             if (i == STACK_CLASS && pplan->prev)
1085               padding = pplan->start - pplan->prev->end;
1086             size += padding; /* Add padding if any */
1087             /* allocate the necessary size on stack */
1088             gadd_sp(-size);
1089             /* generate structure store */
1090             r = get_reg(RC_INT);
1091             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1092             vset(&vtop->type, r | VT_LVAL, 0);
1093             vswap();
1094             vstore(); /* memcpy to current sp + potential padding */
1095
1096             /* Homogeneous float aggregate are loaded to VFP registers
1097                immediately since there is no way of loading data in multiple
1098                non consecutive VFP registers as what is done for other
1099                structures (see the use of todo). */
1100             if (i == VFP_STRUCT_CLASS) {
1101               int first = pplan->start, nb = pplan->end - first + 1;
1102               /* vpop.32 {pplan->start, ..., pplan->end} */
1103               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1104               /* No need to write the register used to a SValue since VFP regs
1105                  cannot be used for gcall_or_jmp */
1106             }
1107           } else {
1108             if (is_float(pplan->sval->type.t)) {
1109 #ifdef TCC_ARM_VFP
1110               r = vfpr(gv(RC_FLOAT)) << 12;
1111               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1112                 size = 4;
1113               else {
1114                 size = 8;
1115                 r |= 0x101; /* vpush.32 -> vpush.64 */
1116               }
1117               o(0xED2D0A01 + r); /* vpush */
1118 #else
1119               r = fpr(gv(RC_FLOAT)) << 12;
1120               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1121                 size = 4;
1122               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1123                 size = 8;
1124               else
1125                 size = LDOUBLE_SIZE;
1126
1127               if (size == 12)
1128                 r |= 0x400000;
1129               else if(size == 8)
1130                 r|=0x8000;
1131
1132               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1133 #endif
1134             } else {
1135               /* simple type (currently always same size) */
1136               /* XXX: implicit cast ? */
1137               size=4;
1138               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1139                 lexpand_nr();
1140                 size = 8;
1141                 r = gv(RC_INT);
1142                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1143                 vtop--;
1144               }
1145               r = gv(RC_INT);
1146               o(0xE52D0004|(intr(r)<<12)); /* push r */
1147             }
1148             if (i == STACK_CLASS && pplan->prev)
1149               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1150           }
1151           break;
1152
1153         case VFP_CLASS:
1154           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1155           if (pplan->start & 1) { /* Must be in upper part of double register */
1156             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1157             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1158           }
1159           break;
1160
1161         case CORE_CLASS:
1162           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1163             lexpand_nr();
1164             gv(regmask(pplan->end));
1165             pplan->sval->r2 = vtop->r;
1166             vtop--;
1167           }
1168           gv(regmask(pplan->start));
1169           /* Mark register as used so that gcall_or_jmp use another one
1170              (regs >=4 are free as never used to pass parameters) */
1171           pplan->sval->r = vtop->r;
1172           break;
1173       }
1174       vtop--;
1175     }
1176   }
1177
1178   /* second pass to restore registers that were saved on stack by accident.
1179      Maybe redundant after the "lvalue_save" patch in tccgen.c:gv() */
1180   if (++pass < 2)
1181     goto again;
1182
1183   /* Manually free remaining registers since next parameters are loaded
1184    * manually, without the help of gv(int). */
1185   save_regs(nb_args);
1186
1187   if(todo) {
1188     o(0xE8BD0000|todo); /* pop {todo} */
1189     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1190       int r;
1191       pplan->sval->r = pplan->start;
1192       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1193          can occupy more than 2 registers. Thus, we need to push on the value
1194          stack some fake parameter to have on SValue for each registers used
1195          by a structure (r2 is not used). */
1196       for (r = pplan->start + 1; r <= pplan->end; r++) {
1197         if (todo & (1 << r)) {
1198           nb_extra_sval++;
1199           vpushi(0);
1200           vtop->r = r;
1201         }
1202       }
1203     }
1204   }
1205   return nb_extra_sval;
1206 }
1207
1208 /* Generate function call. The function address is pushed first, then
1209    all the parameters in call order. This functions pops all the
1210    parameters and the function address. */
1211 void gfunc_call(int nb_args)
1212 {
1213   int r, args_size;
1214   int def_float_abi = float_abi;
1215   int todo;
1216   struct plan plan;
1217
1218 #ifdef TCC_ARM_EABI
1219   int variadic;
1220
1221   if (float_abi == ARM_HARD_FLOAT) {
1222     variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1223     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1224       float_abi = ARM_SOFTFP_FLOAT;
1225   }
1226 #endif
1227   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1228      VT_JMP anywhere except on the top of the stack because it would complicate
1229      the code generator. */
1230   r = vtop->r & VT_VALMASK;
1231   if (r == VT_CMP || (r & ~1) == VT_JMP)
1232     gv(RC_INT);
1233
1234   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1235
1236 #ifdef TCC_ARM_EABI
1237   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1238     args_size = (args_size + 7) & ~7;
1239     o(0xE24DD004); /* sub sp, sp, #4 */
1240   }
1241 #endif
1242
1243   nb_args += copy_params(nb_args, &plan, todo);
1244   tcc_free(plan.pplans);
1245
1246   /* Move fct SValue on top as required by gcall_or_jmp */
1247   vrotb(nb_args + 1);
1248   gcall_or_jmp(0);
1249   if (args_size)
1250       gadd_sp(args_size); /* pop all parameters passed on the stack */
1251 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1252   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1253     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1254       o(0xEE000A10); /*vmov s0, r0 */
1255     } else {
1256       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1257       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1258     }
1259   }
1260 #endif
1261   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1262   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1263   float_abi = def_float_abi;
1264 }
1265
1266 /* generate function prolog of type 't' */
1267 void gfunc_prolog(CType *func_type)
1268 {
1269   Sym *sym,*sym2;
1270   int n, nf, size, align, rs, struct_ret = 0;
1271   int addr, pn, sn; /* pn=core, sn=stack */
1272   CType ret_type;
1273
1274 #ifdef TCC_ARM_EABI
1275   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1276 #endif
1277
1278   sym = func_type->ref;
1279   func_vt = sym->type;
1280   func_var = (func_type->ref->c == FUNC_ELLIPSIS);
1281
1282   n = nf = 0;
1283   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1284       !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs))
1285   {
1286     n++;
1287     struct_ret = 1;
1288     func_vc = 12; /* Offset from fp of the place to store the result */
1289   }
1290   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1291     size = type_size(&sym2->type, &align);
1292 #ifdef TCC_ARM_EABI
1293     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1294         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1295       int tmpnf = assign_vfpreg(&avregs, align, size);
1296       tmpnf += (size + 3) / 4;
1297       nf = (tmpnf > nf) ? tmpnf : nf;
1298     } else
1299 #endif
1300     if (n < 4)
1301       n += (size + 3) / 4;
1302   }
1303   o(0xE1A0C00D); /* mov ip,sp */
1304   if (func_var)
1305     n=4;
1306   if (n) {
1307     if(n>4)
1308       n=4;
1309 #ifdef TCC_ARM_EABI
1310     n=(n+1)&-2;
1311 #endif
1312     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1313   }
1314   if (nf) {
1315     if (nf>16)
1316       nf=16;
1317     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1318     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1319   }
1320   o(0xE92D5800); /* save fp, ip, lr */
1321   o(0xE1A0B00D); /* mov fp, sp */
1322   func_sub_sp_offset = ind;
1323   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1324
1325 #ifdef TCC_ARM_EABI
1326   if (float_abi == ARM_HARD_FLOAT) {
1327     func_vc += nf * 4;
1328     avregs = AVAIL_REGS_INITIALIZER;
1329   }
1330 #endif
1331   pn = struct_ret, sn = 0;
1332   while ((sym = sym->next)) {
1333     CType *type;
1334     type = &sym->type;
1335     size = type_size(type, &align);
1336     size = (size + 3) >> 2;
1337     align = (align + 3) & ~3;
1338 #ifdef TCC_ARM_EABI
1339     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1340         || is_hgen_float_aggr(&sym->type))) {
1341       int fpn = assign_vfpreg(&avregs, align, size << 2);
1342       if (fpn >= 0)
1343         addr = fpn * 4;
1344       else
1345         goto from_stack;
1346     } else
1347 #endif
1348     if (pn < 4) {
1349 #ifdef TCC_ARM_EABI
1350         pn = (pn + (align-1)/4) & -(align/4);
1351 #endif
1352       addr = (nf + pn) * 4;
1353       pn += size;
1354       if (!sn && pn > 4)
1355         sn = (pn - 4);
1356     } else {
1357 #ifdef TCC_ARM_EABI
1358 from_stack:
1359         sn = (sn + (align-1)/4) & -(align/4);
1360 #endif
1361       addr = (n + nf + sn) * 4;
1362       sn += size;
1363     }
1364     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1365              addr + 12);
1366   }
1367   last_itod_magic=0;
1368   leaffunc = 1;
1369   loc = 0;
1370 }
1371
1372 /* generate function epilog */
1373 void gfunc_epilog(void)
1374 {
1375   uint32_t x;
1376   int diff;
1377   /* Copy float return value to core register if base standard is used and
1378      float computation is made with VFP */
1379 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1380   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1381     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1382       o(0xEE100A10); /* fmrs r0, s0 */
1383     else {
1384       o(0xEE100B10); /* fmrdl r0, d0 */
1385       o(0xEE301B10); /* fmrdh r1, d0 */
1386     }
1387   }
1388 #endif
1389   o(0xE89BA800); /* restore fp, sp, pc */
1390   diff = (-loc + 3) & -4;
1391 #ifdef TCC_ARM_EABI
1392   if(!leaffunc)
1393     diff = ((diff + 11) & -8) - 4;
1394 #endif
1395   if(diff > 0) {
1396     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1397     if(x)
1398       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1399     else {
1400       int addr;
1401       addr=ind;
1402       o(0xE59FC004); /* ldr ip,[pc+4] */
1403       o(0xE04BD00C); /* sub sp,fp,ip  */
1404       o(0xE1A0F00E); /* mov pc,lr */
1405       o(diff);
1406       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1407     }
1408   }
1409 }
1410
1411 /* generate a jump to a label */
1412 int gjmp(int t)
1413 {
1414   int r;
1415   if (nocode_wanted)
1416     return t;
1417   r=ind;
1418   o(0xE0000000|encbranch(r,t,1));
1419   return r;
1420 }
1421
1422 /* generate a jump to a fixed address */
1423 void gjmp_addr(int a)
1424 {
1425   gjmp(a);
1426 }
1427
1428 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1429 int gtst(int inv, int t)
1430 {
1431   int v, r;
1432   uint32_t op;
1433
1434   v = vtop->r & VT_VALMASK;
1435   r=ind;
1436
1437   if (nocode_wanted) {
1438     ;
1439   } else if (v == VT_CMP) {
1440     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1441     op|=encbranch(r,t,1);
1442     o(op);
1443     t=r;
1444   } else if (v == VT_JMP || v == VT_JMPI) {
1445     if ((v & 1) == inv) {
1446       if(!vtop->c.i)
1447         vtop->c.i=t;
1448       else {
1449         uint32_t *x;
1450         int p,lp;
1451         if(t) {
1452           p = vtop->c.i;
1453           do {
1454             p = decbranch(lp=p);
1455           } while(p);
1456           x = (uint32_t *)(cur_text_section->data + lp);
1457           *x &= 0xff000000;
1458           *x |= encbranch(lp,t,1);
1459         }
1460         t = vtop->c.i;
1461       }
1462     } else {
1463       t = gjmp(t);
1464       gsym(vtop->c.i);
1465     }
1466   }
1467   vtop--;
1468   return t;
1469 }
1470
1471 /* generate an integer binary operation */
1472 void gen_opi(int op)
1473 {
1474   int c, func = 0;
1475   uint32_t opc = 0, r, fr;
1476   unsigned short retreg = REG_IRET;
1477
1478   c=0;
1479   switch(op) {
1480     case '+':
1481       opc = 0x8;
1482       c=1;
1483       break;
1484     case TOK_ADDC1: /* add with carry generation */
1485       opc = 0x9;
1486       c=1;
1487       break;
1488     case '-':
1489       opc = 0x4;
1490       c=1;
1491       break;
1492     case TOK_SUBC1: /* sub with carry generation */
1493       opc = 0x5;
1494       c=1;
1495       break;
1496     case TOK_ADDC2: /* add with carry use */
1497       opc = 0xA;
1498       c=1;
1499       break;
1500     case TOK_SUBC2: /* sub with carry use */
1501       opc = 0xC;
1502       c=1;
1503       break;
1504     case '&':
1505       opc = 0x0;
1506       c=1;
1507       break;
1508     case '^':
1509       opc = 0x2;
1510       c=1;
1511       break;
1512     case '|':
1513       opc = 0x18;
1514       c=1;
1515       break;
1516     case '*':
1517       gv2(RC_INT, RC_INT);
1518       r = vtop[-1].r;
1519       fr = vtop[0].r;
1520       vtop--;
1521       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1522       return;
1523     case TOK_SHL:
1524       opc = 0;
1525       c=2;
1526       break;
1527     case TOK_SHR:
1528       opc = 1;
1529       c=2;
1530       break;
1531     case TOK_SAR:
1532       opc = 2;
1533       c=2;
1534       break;
1535     case '/':
1536     case TOK_PDIV:
1537       func=TOK___divsi3;
1538       c=3;
1539       break;
1540     case TOK_UDIV:
1541       func=TOK___udivsi3;
1542       c=3;
1543       break;
1544     case '%':
1545 #ifdef TCC_ARM_EABI
1546       func=TOK___aeabi_idivmod;
1547       retreg=REG_LRET;
1548 #else
1549       func=TOK___modsi3;
1550 #endif
1551       c=3;
1552       break;
1553     case TOK_UMOD:
1554 #ifdef TCC_ARM_EABI
1555       func=TOK___aeabi_uidivmod;
1556       retreg=REG_LRET;
1557 #else
1558       func=TOK___umodsi3;
1559 #endif
1560       c=3;
1561       break;
1562     case TOK_UMULL:
1563       gv2(RC_INT, RC_INT);
1564       r=intr(vtop[-1].r2=get_reg(RC_INT));
1565       c=vtop[-1].r;
1566       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1567       vtop--;
1568       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1569       return;
1570     default:
1571       opc = 0x15;
1572       c=1;
1573       break;
1574   }
1575   switch(c) {
1576     case 1:
1577       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1578         if(opc == 4 || opc == 5 || opc == 0xc) {
1579           vswap();
1580           opc|=2; // sub -> rsb
1581         }
1582       }
1583       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1584           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1585         gv(RC_INT);
1586       vswap();
1587       c=intr(gv(RC_INT));
1588       vswap();
1589       opc=0xE0000000|(opc<<20)|(c<<16);
1590       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1591         uint32_t x;
1592         x=stuff_const(opc|0x2000000,vtop->c.i);
1593         if(x) {
1594           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1595           o(x|(r<<12));
1596           goto done;
1597         }
1598       }
1599       fr=intr(gv(RC_INT));
1600       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1601       o(opc|(r<<12)|fr);
1602 done:
1603       vtop--;
1604       if (op >= TOK_ULT && op <= TOK_GT) {
1605         vtop->r = VT_CMP;
1606         vtop->c.i = op;
1607       }
1608       break;
1609     case 2:
1610       opc=0xE1A00000|(opc<<5);
1611       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1612           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1613         gv(RC_INT);
1614       vswap();
1615       r=intr(gv(RC_INT));
1616       vswap();
1617       opc|=r;
1618       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1619         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1620         c = vtop->c.i & 0x1f;
1621         o(opc|(c<<7)|(fr<<12));
1622       } else {
1623         fr=intr(gv(RC_INT));
1624         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1625         o(opc|(c<<12)|(fr<<8)|0x10);
1626       }
1627       vtop--;
1628       break;
1629     case 3:
1630       vpush_global_sym(&func_old_type, func);
1631       vrott(3);
1632       gfunc_call(2);
1633       vpushi(0);
1634       vtop->r = retreg;
1635       break;
1636     default:
1637       tcc_error("gen_opi %i unimplemented!",op);
1638   }
1639 }
1640
1641 #ifdef TCC_ARM_VFP
1642 static int is_zero(int i)
1643 {
1644   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1645     return 0;
1646   if (vtop[i].type.t == VT_FLOAT)
1647     return (vtop[i].c.f == 0.f);
1648   else if (vtop[i].type.t == VT_DOUBLE)
1649     return (vtop[i].c.d == 0.0);
1650   return (vtop[i].c.ld == 0.l);
1651 }
1652
1653 /* generate a floating point operation 'v = t1 op t2' instruction. The
1654  *    two operands are guaranted to have the same floating point type */
1655 void gen_opf(int op)
1656 {
1657   uint32_t x;
1658   int fneg=0,r;
1659   x=0xEE000A00|T2CPR(vtop->type.t);
1660   switch(op) {
1661     case '+':
1662       if(is_zero(-1))
1663         vswap();
1664       if(is_zero(0)) {
1665         vtop--;
1666         return;
1667       }
1668       x|=0x300000;
1669       break;
1670     case '-':
1671       x|=0x300040;
1672       if(is_zero(0)) {
1673         vtop--;
1674         return;
1675       }
1676       if(is_zero(-1)) {
1677         x|=0x810000; /* fsubX -> fnegX */
1678         vswap();
1679         vtop--;
1680         fneg=1;
1681       }
1682       break;
1683     case '*':
1684       x|=0x200000;
1685       break;
1686     case '/':
1687       x|=0x800000;
1688       break;
1689     default:
1690       if(op < TOK_ULT || op > TOK_GT) {
1691         tcc_error("unknown fp op %x!",op);
1692         return;
1693       }
1694       if(is_zero(-1)) {
1695         vswap();
1696         switch(op) {
1697           case TOK_LT: op=TOK_GT; break;
1698           case TOK_GE: op=TOK_ULE; break;
1699           case TOK_LE: op=TOK_GE; break;
1700           case TOK_GT: op=TOK_ULT; break;
1701         }
1702       }
1703       x|=0xB40040; /* fcmpX */
1704       if(op!=TOK_EQ && op!=TOK_NE)
1705         x|=0x80; /* fcmpX -> fcmpeX */
1706       if(is_zero(0)) {
1707         vtop--;
1708         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1709       } else {
1710         x|=vfpr(gv(RC_FLOAT));
1711         vswap();
1712         o(x|(vfpr(gv(RC_FLOAT))<<12));
1713         vtop--;
1714       }
1715       o(0xEEF1FA10); /* fmstat */
1716
1717       switch(op) {
1718         case TOK_LE: op=TOK_ULE; break;
1719         case TOK_LT: op=TOK_ULT; break;
1720         case TOK_UGE: op=TOK_GE; break;
1721         case TOK_UGT: op=TOK_GT; break;
1722       }
1723
1724       vtop->r = VT_CMP;
1725       vtop->c.i = op;
1726       return;
1727   }
1728   r=gv(RC_FLOAT);
1729   x|=vfpr(r);
1730   r=regmask(r);
1731   if(!fneg) {
1732     int r2;
1733     vswap();
1734     r2=gv(RC_FLOAT);
1735     x|=vfpr(r2)<<16;
1736     r|=regmask(r2);
1737   }
1738   vtop->r=get_reg_ex(RC_FLOAT,r);
1739   if(!fneg)
1740     vtop--;
1741   o(x|(vfpr(vtop->r)<<12));
1742 }
1743
1744 #else
1745 static uint32_t is_fconst()
1746 {
1747   long double f;
1748   uint32_t r;
1749   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1750     return 0;
1751   if (vtop->type.t == VT_FLOAT)
1752     f = vtop->c.f;
1753   else if (vtop->type.t == VT_DOUBLE)
1754     f = vtop->c.d;
1755   else
1756     f = vtop->c.ld;
1757   if(!ieee_finite(f))
1758     return 0;
1759   r=0x8;
1760   if(f<0.0) {
1761     r=0x18;
1762     f=-f;
1763   }
1764   if(f==0.0)
1765     return r;
1766   if(f==1.0)
1767     return r|1;
1768   if(f==2.0)
1769     return r|2;
1770   if(f==3.0)
1771     return r|3;
1772   if(f==4.0)
1773     return r|4;
1774   if(f==5.0)
1775     return r|5;
1776   if(f==0.5)
1777     return r|6;
1778   if(f==10.0)
1779     return r|7;
1780   return 0;
1781 }
1782
1783 /* generate a floating point operation 'v = t1 op t2' instruction. The
1784    two operands are guaranted to have the same floating point type */
1785 void gen_opf(int op)
1786 {
1787   uint32_t x, r, r2, c1, c2;
1788   //fputs("gen_opf\n",stderr);
1789   vswap();
1790   c1 = is_fconst();
1791   vswap();
1792   c2 = is_fconst();
1793   x=0xEE000100;
1794 #if LDOUBLE_SIZE == 8
1795   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1796     x|=0x80;
1797 #else
1798   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1799     x|=0x80;
1800   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1801     x|=0x80000;
1802 #endif
1803   switch(op)
1804   {
1805     case '+':
1806       if(!c2) {
1807         vswap();
1808         c2=c1;
1809       }
1810       vswap();
1811       r=fpr(gv(RC_FLOAT));
1812       vswap();
1813       if(c2) {
1814         if(c2>0xf)
1815           x|=0x200000; // suf
1816         r2=c2&0xf;
1817       } else {
1818         r2=fpr(gv(RC_FLOAT));
1819       }
1820       break;
1821     case '-':
1822       if(c2) {
1823         if(c2<=0xf)
1824           x|=0x200000; // suf
1825         r2=c2&0xf;
1826         vswap();
1827         r=fpr(gv(RC_FLOAT));
1828         vswap();
1829       } else if(c1 && c1<=0xf) {
1830         x|=0x300000; // rsf
1831         r2=c1;
1832         r=fpr(gv(RC_FLOAT));
1833         vswap();
1834       } else {
1835         x|=0x200000; // suf
1836         vswap();
1837         r=fpr(gv(RC_FLOAT));
1838         vswap();
1839         r2=fpr(gv(RC_FLOAT));
1840       }
1841       break;
1842     case '*':
1843       if(!c2 || c2>0xf) {
1844         vswap();
1845         c2=c1;
1846       }
1847       vswap();
1848       r=fpr(gv(RC_FLOAT));
1849       vswap();
1850       if(c2 && c2<=0xf)
1851         r2=c2;
1852       else
1853         r2=fpr(gv(RC_FLOAT));
1854       x|=0x100000; // muf
1855       break;
1856     case '/':
1857       if(c2 && c2<=0xf) {
1858         x|=0x400000; // dvf
1859         r2=c2;
1860         vswap();
1861         r=fpr(gv(RC_FLOAT));
1862         vswap();
1863       } else if(c1 && c1<=0xf) {
1864         x|=0x500000; // rdf
1865         r2=c1;
1866         r=fpr(gv(RC_FLOAT));
1867         vswap();
1868       } else {
1869         x|=0x400000; // dvf
1870         vswap();
1871         r=fpr(gv(RC_FLOAT));
1872         vswap();
1873         r2=fpr(gv(RC_FLOAT));
1874       }
1875       break;
1876     default:
1877       if(op >= TOK_ULT && op <= TOK_GT) {
1878         x|=0xd0f110; // cmfe
1879 /* bug (intention?) in Linux FPU emulator
1880    doesn't set carry if equal */
1881         switch(op) {
1882           case TOK_ULT:
1883           case TOK_UGE:
1884           case TOK_ULE:
1885           case TOK_UGT:
1886             tcc_error("unsigned comparison on floats?");
1887             break;
1888           case TOK_LT:
1889             op=TOK_Nset;
1890             break;
1891           case TOK_LE:
1892             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1893             break;
1894           case TOK_EQ:
1895           case TOK_NE:
1896             x&=~0x400000; // cmfe -> cmf
1897             break;
1898         }
1899         if(c1 && !c2) {
1900           c2=c1;
1901           vswap();
1902           switch(op) {
1903             case TOK_Nset:
1904               op=TOK_GT;
1905               break;
1906             case TOK_GE:
1907               op=TOK_ULE;
1908               break;
1909             case TOK_ULE:
1910               op=TOK_GE;
1911               break;
1912             case TOK_GT:
1913               op=TOK_Nset;
1914               break;
1915           }
1916         }
1917         vswap();
1918         r=fpr(gv(RC_FLOAT));
1919         vswap();
1920         if(c2) {
1921           if(c2>0xf)
1922             x|=0x200000;
1923           r2=c2&0xf;
1924         } else {
1925           r2=fpr(gv(RC_FLOAT));
1926         }
1927         vtop[-1].r = VT_CMP;
1928         vtop[-1].c.i = op;
1929       } else {
1930         tcc_error("unknown fp op %x!",op);
1931         return;
1932       }
1933   }
1934   if(vtop[-1].r == VT_CMP)
1935     c1=15;
1936   else {
1937     c1=vtop->r;
1938     if(r2&0x8)
1939       c1=vtop[-1].r;
1940     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1941     c1=fpr(vtop[-1].r);
1942   }
1943   vtop--;
1944   o(x|(r<<16)|(c1<<12)|r2);
1945 }
1946 #endif
1947
1948 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1949    and 'long long' cases. */
1950 ST_FUNC void gen_cvt_itof1(int t)
1951 {
1952   uint32_t r, r2;
1953   int bt;
1954   bt=vtop->type.t & VT_BTYPE;
1955   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1956 #ifndef TCC_ARM_VFP
1957     uint32_t dsize = 0;
1958 #endif
1959     r=intr(gv(RC_INT));
1960 #ifdef TCC_ARM_VFP
1961     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1962     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1963     r2|=r2<<12;
1964     if(!(vtop->type.t & VT_UNSIGNED))
1965       r2|=0x80;                /* fuitoX -> fsituX */
1966     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1967 #else
1968     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1969     if((t & VT_BTYPE) != VT_FLOAT)
1970       dsize=0x80;    /* flts -> fltd */
1971     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1972     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1973       uint32_t off = 0;
1974       o(0xE3500000|(r<<12));        /* cmp */
1975       r=fpr(get_reg(RC_FLOAT));
1976       if(last_itod_magic) {
1977         off=ind+8-last_itod_magic;
1978         off/=4;
1979         if(off>255)
1980           off=0;
1981       }
1982       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1983       if(!off) {
1984         o(0xEA000000);              /* b */
1985         last_itod_magic=ind;
1986         o(0x4F800000);              /* 4294967296.0f */
1987       }
1988       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1989     }
1990 #endif
1991     return;
1992   } else if(bt == VT_LLONG) {
1993     int func;
1994     CType *func_type = 0;
1995     if((t & VT_BTYPE) == VT_FLOAT) {
1996       func_type = &func_float_type;
1997       if(vtop->type.t & VT_UNSIGNED)
1998         func=TOK___floatundisf;
1999       else
2000         func=TOK___floatdisf;
2001 #if LDOUBLE_SIZE != 8
2002     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
2003       func_type = &func_ldouble_type;
2004       if(vtop->type.t & VT_UNSIGNED)
2005         func=TOK___floatundixf;
2006       else
2007         func=TOK___floatdixf;
2008     } else if((t & VT_BTYPE) == VT_DOUBLE) {
2009 #else
2010     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
2011 #endif
2012       func_type = &func_double_type;
2013       if(vtop->type.t & VT_UNSIGNED)
2014         func=TOK___floatundidf;
2015       else
2016         func=TOK___floatdidf;
2017     }
2018     if(func_type) {
2019       vpush_global_sym(func_type, func);
2020       vswap();
2021       gfunc_call(1);
2022       vpushi(0);
2023       vtop->r=TREG_F0;
2024       return;
2025     }
2026   }
2027   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2028 }
2029
2030 /* convert fp to int 't' type */
2031 void gen_cvt_ftoi(int t)
2032 {
2033   uint32_t r, r2;
2034   int u, func = 0;
2035   u=t&VT_UNSIGNED;
2036   t&=VT_BTYPE;
2037   r2=vtop->type.t & VT_BTYPE;
2038   if(t==VT_INT) {
2039 #ifdef TCC_ARM_VFP
2040     r=vfpr(gv(RC_FLOAT));
2041     u=u?0:0x10000;
2042     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2043     r2=intr(vtop->r=get_reg(RC_INT));
2044     o(0xEE100A10|(r<<16)|(r2<<12));
2045     return;
2046 #else
2047     if(u) {
2048       if(r2 == VT_FLOAT)
2049         func=TOK___fixunssfsi;
2050 #if LDOUBLE_SIZE != 8
2051       else if(r2 == VT_LDOUBLE)
2052         func=TOK___fixunsxfsi;
2053       else if(r2 == VT_DOUBLE)
2054 #else
2055       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2056 #endif
2057         func=TOK___fixunsdfsi;
2058     } else {
2059       r=fpr(gv(RC_FLOAT));
2060       r2=intr(vtop->r=get_reg(RC_INT));
2061       o(0xEE100170|(r2<<12)|r);
2062       return;
2063     }
2064 #endif
2065   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2066     if(r2 == VT_FLOAT)
2067       func=TOK___fixsfdi;
2068 #if LDOUBLE_SIZE != 8
2069     else if(r2 == VT_LDOUBLE)
2070       func=TOK___fixxfdi;
2071     else if(r2 == VT_DOUBLE)
2072 #else
2073     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2074 #endif
2075       func=TOK___fixdfdi;
2076   }
2077   if(func) {
2078     vpush_global_sym(&func_old_type, func);
2079     vswap();
2080     gfunc_call(1);
2081     vpushi(0);
2082     if(t == VT_LLONG)
2083       vtop->r2 = REG_LRET;
2084     vtop->r = REG_IRET;
2085     return;
2086   }
2087   tcc_error("unimplemented gen_cvt_ftoi!");
2088 }
2089
2090 /* convert from one floating point type to another */
2091 void gen_cvt_ftof(int t)
2092 {
2093 #ifdef TCC_ARM_VFP
2094   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2095     uint32_t r = vfpr(gv(RC_FLOAT));
2096     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2097   }
2098 #else
2099   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2100   gv(RC_FLOAT);
2101 #endif
2102 }
2103
2104 /* computed goto support */
2105 void ggoto(void)
2106 {
2107   gcall_or_jmp(1);
2108   vtop--;
2109 }
2110
2111 /* Save the stack pointer onto the stack and return the location of its address */
2112 ST_FUNC void gen_vla_sp_save(int addr) {
2113     SValue v;
2114     v.type.t = VT_PTR;
2115     v.r = VT_LOCAL | VT_LVAL;
2116     v.c.i = addr;
2117     store(TREG_SP, &v);
2118 }
2119
2120 /* Restore the SP from a location on the stack */
2121 ST_FUNC void gen_vla_sp_restore(int addr) {
2122     SValue v;
2123     v.type.t = VT_PTR;
2124     v.r = VT_LOCAL | VT_LVAL;
2125     v.c.i = addr;
2126     load(TREG_SP, &v);
2127 }
2128
2129 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2130 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2131     int r = intr(gv(RC_INT));
2132     o(0xE04D0000|(r<<12)|r); /* sub r, sp, r */
2133 #ifdef TCC_ARM_EABI
2134     if (align < 8)
2135         align = 8;
2136 #else
2137     if (align < 4)
2138         align = 4;
2139 #endif
2140     if (align & (align - 1))
2141         tcc_error("alignment is not a power of 2: %i", align);
2142     o(stuff_const(0xE3C0D000|(r<<16), align - 1)); /* bic sp, r, #align-1 */
2143     vpop();
2144 }
2145
2146 /* end of ARM code generator */
2147 /*************************************************************/
2148 #endif
2149 /*************************************************************/
2150
2151 #ifndef TCC_IS_NATIVE
2152 #include "arm-asm.c"
2153 #endif