arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_ARM_VERSION
  38 # define TCC_ARM_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_LRET    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82 };
  83
  84 #ifdef TCC_ARM_VFP
  85 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  86 #endif
  87
  88 /* return registers for function */
  89 #define REG_IRET TREG_R0 /* single word int return register */
  90 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  91 #define REG_FRET TREG_F0 /* float return register */
  92
  93 #ifdef TCC_ARM_EABI
  94 #define TOK___divdi3 TOK___aeabi_ldivmod
  95 #define TOK___moddi3 TOK___aeabi_ldivmod
  96 #define TOK___udivdi3 TOK___aeabi_uldivmod
  97 #define TOK___umoddi3 TOK___aeabi_uldivmod
  98 #endif
  99
 100 /* defined if function parameters must be evaluated in reverse order */
 101 #define INVERT_FUNC_PARAMS
 102
 103 /* defined if structures are passed as pointers. Otherwise structures
 104    are directly pushed on stack. */
 105 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 106
 107 /* pointer size, in bytes */
 108 #define PTR_SIZE 4
 109
 110 /* long double size and alignment, in bytes */
 111 #ifdef TCC_ARM_VFP
 112 #define LDOUBLE_SIZE  8
 113 #endif
 114
 115 #ifndef LDOUBLE_SIZE
 116 #define LDOUBLE_SIZE  8
 117 #endif
 118
 119 #ifdef TCC_ARM_EABI
 120 #define LDOUBLE_ALIGN 8
 121 #else
 122 #define LDOUBLE_ALIGN 4
 123 #endif
 124
 125 /* maximum alignment (for aligned attribute support) */
 126 #define MAX_ALIGN     8
 127
 128 #define CHAR_IS_UNSIGNED
 129
 130 /******************************************************/
 131 /* ELF defines */
 132
 133 #define EM_TCC_TARGET EM_ARM
 134
 135 /* relocation type for 32 bit data relocation */
 136 #define R_DATA_32   R_ARM_ABS32
 137 #define R_DATA_PTR  R_ARM_ABS32
 138 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 139 #define R_COPY      R_ARM_COPY
 140
 141 #define ELF_START_ADDR 0x00008000
 142 #define ELF_PAGE_SIZE  0x1000
 143
 144 enum float_abi {
 145     ARM_SOFTFP_FLOAT,
 146     ARM_HARD_FLOAT,
 147 };
 148
 149 enum float_abi float_abi;
 150
 151 /******************************************************/
 152 #else /* ! TARGET_DEFS_ONLY */
 153 /******************************************************/
 154 #include "tcc.h"
 155
 156 ST_DATA const int reg_classes[NB_REGS] = {
 157     /* r0 */ RC_INT | RC_R0,
 158     /* r1 */ RC_INT | RC_R1,
 159     /* r2 */ RC_INT | RC_R2,
 160     /* r3 */ RC_INT | RC_R3,
 161     /* r12 */ RC_INT | RC_R12,
 162     /* f0 */ RC_FLOAT | RC_F0,
 163     /* f1 */ RC_FLOAT | RC_F1,
 164     /* f2 */ RC_FLOAT | RC_F2,
 165     /* f3 */ RC_FLOAT | RC_F3,
 166 #ifdef TCC_ARM_VFP
 167  /* d4/s8 */ RC_FLOAT | RC_F4,
 168 /* d5/s10 */ RC_FLOAT | RC_F5,
 169 /* d6/s12 */ RC_FLOAT | RC_F6,
 170 /* d7/s14 */ RC_FLOAT | RC_F7,
 171 #endif
 172 };
 173
 174 static int func_sub_sp_offset, last_itod_magic;
 175 static int leaffunc;
 176
 177 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 178 static CType float_type, double_type, func_float_type, func_double_type;
 179 ST_FUNC void arm_init(struct TCCState *s)
 180 {
 181     float_type.t = VT_FLOAT;
 182     double_type.t = VT_DOUBLE;
 183     func_float_type.t = VT_FUNC;
 184     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 185     func_double_type.t = VT_FUNC;
 186     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 187
 188     float_abi = s->float_abi;
 189 }
 190 #else
 191 #define func_float_type func_old_type
 192 #define func_double_type func_old_type
 193 #define func_ldouble_type func_old_type
 194 ST_FUNC void arm_init(void) {}
 195 #endif
 196
 197 static int two2mask(int a,int b) {
 198   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 199 }
 200
 201 static int regmask(int r) {
 202   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 203 }
 204
 205 /******************************************************/
 206
 207 #ifdef TCC_ARM_EABI
 208 char *default_elfinterp(struct TCCState *s)
 209 {
 210     if (s->float_abi == ARM_HARD_FLOAT)
 211         return "/lib/ld-linux-armhf.so.3";
 212     else
 213         return "/lib/ld-linux.so.3";
 214 }
 215 #endif
 216
 217 void o(uint32_t i)
 218 {
 219   /* this is a good place to start adding big-endian support*/
 220   int ind1;
 221
 222   ind1 = ind + 4;
 223   if (!cur_text_section)
 224     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 225          "can't evaluate constant expressions outside of a function.");
 226   if (ind1 > cur_text_section->data_allocated)
 227     section_realloc(cur_text_section, ind1);
 228   cur_text_section->data[ind++] = i&255;
 229   i>>=8;
 230   cur_text_section->data[ind++] = i&255;
 231   i>>=8;
 232   cur_text_section->data[ind++] = i&255;
 233   i>>=8;
 234   cur_text_section->data[ind++] = i;
 235 }
 236
 237 static uint32_t stuff_const(uint32_t op, uint32_t c)
 238 {
 239   int try_neg=0;
 240   uint32_t nc = 0, negop = 0;
 241
 242   switch(op&0x1F00000)
 243   {
 244     case 0x800000: //add
 245     case 0x400000: //sub
 246       try_neg=1;
 247       negop=op^0xC00000;
 248       nc=-c;
 249       break;
 250     case 0x1A00000: //mov
 251     case 0x1E00000: //mvn
 252       try_neg=1;
 253       negop=op^0x400000;
 254       nc=~c;
 255       break;
 256     case 0x200000: //xor
 257       if(c==~0)
 258         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 259       break;
 260     case 0x0: //and
 261       if(c==~0)
 262         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 263     case 0x1C00000: //bic
 264       try_neg=1;
 265       negop=op^0x1C00000;
 266       nc=~c;
 267       break;
 268     case 0x1800000: //orr
 269       if(c==~0)
 270         return (op&0xFFF0FFFF)|0x1E00000;
 271       break;
 272   }
 273   do {
 274     uint32_t m;
 275     int i;
 276     if(c<256) /* catch undefined <<32 */
 277       return op|c;
 278     for(i=2;i<32;i+=2) {
 279       m=(0xff>>i)|(0xff<<(32-i));
 280       if(!(c&~m))
 281         return op|(i<<7)|(c<<i)|(c>>(32-i));
 282     }
 283     op=negop;
 284     c=nc;
 285   } while(try_neg--);
 286   return 0;
 287 }
 288
 289
 290 //only add,sub
 291 void stuff_const_harder(uint32_t op, uint32_t v) {
 292   uint32_t x;
 293   x=stuff_const(op,v);
 294   if(x)
 295     o(x);
 296   else {
 297     uint32_t a[16], nv, no, o2, n2;
 298     int i,j,k;
 299     a[0]=0xff;
 300     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 301     for(i=1;i<16;i++)
 302       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 303     for(i=0;i<12;i++)
 304       for(j=i<4?i+12:15;j>=i+4;j--)
 305         if((v&(a[i]|a[j]))==v) {
 306           o(stuff_const(op,v&a[i]));
 307           o(stuff_const(o2,v&a[j]));
 308           return;
 309         }
 310     no=op^0xC00000;
 311     n2=o2^0xC00000;
 312     nv=-v;
 313     for(i=0;i<12;i++)
 314       for(j=i<4?i+12:15;j>=i+4;j--)
 315         if((nv&(a[i]|a[j]))==nv) {
 316           o(stuff_const(no,nv&a[i]));
 317           o(stuff_const(n2,nv&a[j]));
 318           return;
 319         }
 320     for(i=0;i<8;i++)
 321       for(j=i+4;j<12;j++)
 322         for(k=i<4?i+12:15;k>=j+4;k--)
 323           if((v&(a[i]|a[j]|a[k]))==v) {
 324             o(stuff_const(op,v&a[i]));
 325             o(stuff_const(o2,v&a[j]));
 326             o(stuff_const(o2,v&a[k]));
 327             return;
 328           }
 329     no=op^0xC00000;
 330     nv=-v;
 331     for(i=0;i<8;i++)
 332       for(j=i+4;j<12;j++)
 333         for(k=i<4?i+12:15;k>=j+4;k--)
 334           if((nv&(a[i]|a[j]|a[k]))==nv) {
 335             o(stuff_const(no,nv&a[i]));
 336             o(stuff_const(n2,nv&a[j]));
 337             o(stuff_const(n2,nv&a[k]));
 338             return;
 339           }
 340     o(stuff_const(op,v&a[0]));
 341     o(stuff_const(o2,v&a[4]));
 342     o(stuff_const(o2,v&a[8]));
 343     o(stuff_const(o2,v&a[12]));
 344   }
 345 }
 346
 347 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 348 {
 349   addr-=pos+8;
 350   addr/=4;
 351   if(addr>=0x1000000 || addr<-0x1000000) {
 352     if(fail)
 353       tcc_error("FIXME: function bigger than 32MB");
 354     return 0;
 355   }
 356   return 0x0A000000|(addr&0xffffff);
 357 }
 358
 359 int decbranch(int pos)
 360 {
 361   int x;
 362   x=*(uint32_t *)(cur_text_section->data + pos);
 363   x&=0x00ffffff;
 364   if(x&0x800000)
 365     x-=0x1000000;
 366   return x*4+pos+8;
 367 }
 368
 369 /* output a symbol and patch all calls to it */
 370 void gsym_addr(int t, int a)
 371 {
 372   uint32_t *x;
 373   int lt;
 374   while(t) {
 375     x=(uint32_t *)(cur_text_section->data + t);
 376     t=decbranch(lt=t);
 377     if(a==lt+4)
 378       *x=0xE1A00000; // nop
 379     else {
 380       *x &= 0xff000000;
 381       *x |= encbranch(lt,a,1);
 382     }
 383   }
 384 }
 385
 386 void gsym(int t)
 387 {
 388   gsym_addr(t, ind);
 389 }
 390
 391 #ifdef TCC_ARM_VFP
 392 static uint32_t vfpr(int r)
 393 {
 394   if(r<TREG_F0 || r>TREG_F7)
 395     tcc_error("compiler error! register %i is no vfp register",r);
 396   return r-5;
 397 }
 398 #else
 399 static uint32_t fpr(int r)
 400 {
 401   if(r<TREG_F0 || r>TREG_F3)
 402     tcc_error("compiler error! register %i is no fpa register",r);
 403   return r-5;
 404 }
 405 #endif
 406
 407 static uint32_t intr(int r)
 408 {
 409   if(r==4)
 410     return 12;
 411   if((r<0 || r>4) && r!=14)
 412     tcc_error("compiler error! register %i is no int register",r);
 413   return r;
 414 }
 415
 416 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 417 {
 418   if(*off>maxoff || *off&((1<<shift)-1)) {
 419     uint32_t x, y;
 420     x=0xE280E000;
 421     if(*sgn)
 422       x=0xE240E000;
 423     x|=(*base)<<16;
 424     *base=14; // lr
 425     y=stuff_const(x,*off&~maxoff);
 426     if(y) {
 427       o(y);
 428       *off&=maxoff;
 429       return;
 430     }
 431     y=stuff_const(x,(*off+maxoff)&~maxoff);
 432     if(y) {
 433       o(y);
 434       *sgn=!*sgn;
 435       *off=((*off+maxoff)&~maxoff)-*off;
 436       return;
 437     }
 438     stuff_const_harder(x,*off&~maxoff);
 439     *off&=maxoff;
 440   }
 441 }
 442
 443 static uint32_t mapcc(int cc)
 444 {
 445   switch(cc)
 446   {
 447     case TOK_ULT:
 448       return 0x30000000; /* CC/LO */
 449     case TOK_UGE:
 450       return 0x20000000; /* CS/HS */
 451     case TOK_EQ:
 452       return 0x00000000; /* EQ */
 453     case TOK_NE:
 454       return 0x10000000; /* NE */
 455     case TOK_ULE:
 456       return 0x90000000; /* LS */
 457     case TOK_UGT:
 458       return 0x80000000; /* HI */
 459     case TOK_Nset:
 460       return 0x40000000; /* MI */
 461     case TOK_Nclear:
 462       return 0x50000000; /* PL */
 463     case TOK_LT:
 464       return 0xB0000000; /* LT */
 465     case TOK_GE:
 466       return 0xA0000000; /* GE */
 467     case TOK_LE:
 468       return 0xD0000000; /* LE */
 469     case TOK_GT:
 470       return 0xC0000000; /* GT */
 471   }
 472   tcc_error("unexpected condition code");
 473   return 0xE0000000; /* AL */
 474 }
 475
 476 static int negcc(int cc)
 477 {
 478   switch(cc)
 479   {
 480     case TOK_ULT:
 481       return TOK_UGE;
 482     case TOK_UGE:
 483       return TOK_ULT;
 484     case TOK_EQ:
 485       return TOK_NE;
 486     case TOK_NE:
 487       return TOK_EQ;
 488     case TOK_ULE:
 489       return TOK_UGT;
 490     case TOK_UGT:
 491       return TOK_ULE;
 492     case TOK_Nset:
 493       return TOK_Nclear;
 494     case TOK_Nclear:
 495       return TOK_Nset;
 496     case TOK_LT:
 497       return TOK_GE;
 498     case TOK_GE:
 499       return TOK_LT;
 500     case TOK_LE:
 501       return TOK_GT;
 502     case TOK_GT:
 503       return TOK_LE;
 504   }
 505   tcc_error("unexpected condition code");
 506   return TOK_NE;
 507 }
 508
 509 /* load 'r' from value 'sv' */
 510 void load(int r, SValue *sv)
 511 {
 512   int v, ft, fc, fr, sign;
 513   uint32_t op;
 514   SValue v1;
 515
 516   fr = sv->r;
 517   ft = sv->type.t;
 518   fc = sv->c.ul;
 519
 520   if(fc>=0)
 521     sign=0;
 522   else {
 523     sign=1;
 524     fc=-fc;
 525   }
 526
 527   v = fr & VT_VALMASK;
 528   if (fr & VT_LVAL) {
 529     uint32_t base = 0xB; // fp
 530     if(v == VT_LLOCAL) {
 531       v1.type.t = VT_PTR;
 532       v1.r = VT_LOCAL | VT_LVAL;
 533       v1.c.ul = sv->c.ul;
 534       load(base=14 /* lr */, &v1);
 535       fc=sign=0;
 536       v=VT_LOCAL;
 537     } else if(v == VT_CONST) {
 538       v1.type.t = VT_PTR;
 539       v1.r = fr&~VT_LVAL;
 540       v1.c.ul = sv->c.ul;
 541       v1.sym=sv->sym;
 542       load(base=14, &v1);
 543       fc=sign=0;
 544       v=VT_LOCAL;
 545     } else if(v < VT_CONST) {
 546       base=intr(v);
 547       fc=sign=0;
 548       v=VT_LOCAL;
 549     }
 550     if(v == VT_LOCAL) {
 551       if(is_float(ft)) {
 552         calcaddr(&base,&fc,&sign,1020,2);
 553 #ifdef TCC_ARM_VFP
 554         op=0xED100A00; /* flds */
 555         if(!sign)
 556           op|=0x800000;
 557         if ((ft & VT_BTYPE) != VT_FLOAT)
 558           op|=0x100;   /* flds -> fldd */
 559         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 560 #else
 561         op=0xED100100;
 562         if(!sign)
 563           op|=0x800000;
 564 #if LDOUBLE_SIZE == 8
 565         if ((ft & VT_BTYPE) != VT_FLOAT)
 566           op|=0x8000;
 567 #else
 568         if ((ft & VT_BTYPE) == VT_DOUBLE)
 569           op|=0x8000;
 570         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 571           op|=0x400000;
 572 #endif
 573         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 574 #endif
 575       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 576                 || (ft & VT_BTYPE) == VT_SHORT) {
 577         calcaddr(&base,&fc,&sign,255,0);
 578         op=0xE1500090;
 579         if ((ft & VT_BTYPE) == VT_SHORT)
 580           op|=0x20;
 581         if ((ft & VT_UNSIGNED) == 0)
 582           op|=0x40;
 583         if(!sign)
 584           op|=0x800000;
 585         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 586       } else {
 587         calcaddr(&base,&fc,&sign,4095,0);
 588         op=0xE5100000;
 589         if(!sign)
 590           op|=0x800000;
 591         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 592           op|=0x400000;
 593         o(op|(intr(r)<<12)|fc|(base<<16));
 594       }
 595       return;
 596     }
 597   } else {
 598     if (v == VT_CONST) {
 599       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
 600       if (fr & VT_SYM || !op) {
 601         o(0xE59F0000|(intr(r)<<12));
 602         o(0xEA000000);
 603         if(fr & VT_SYM)
 604           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 605         o(sv->c.ul);
 606       } else
 607         o(op);
 608       return;
 609     } else if (v == VT_LOCAL) {
 610       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
 611       if (fr & VT_SYM || !op) {
 612         o(0xE59F0000|(intr(r)<<12));
 613         o(0xEA000000);
 614         if(fr & VT_SYM) // needed ?
 615           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 616         o(sv->c.ul);
 617         o(0xE08B0000|(intr(r)<<12)|intr(r));
 618       } else
 619         o(op);
 620       return;
 621     } else if(v == VT_CMP) {
 622       o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
 623       o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
 624       return;
 625     } else if (v == VT_JMP || v == VT_JMPI) {
 626       int t;
 627       t = v & 1;
 628       o(0xE3A00000|(intr(r)<<12)|t);
 629       o(0xEA000000);
 630       gsym(sv->c.ul);
 631       o(0xE3A00000|(intr(r)<<12)|(t^1));
 632       return;
 633     } else if (v < VT_CONST) {
 634       if(is_float(ft))
 635 #ifdef TCC_ARM_VFP
 636         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 637 #else
 638         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 639 #endif
 640       else
 641         o(0xE1A00000|(intr(r)<<12)|intr(v));
 642       return;
 643     }
 644   }
 645   tcc_error("load unimplemented!");
 646 }
 647
 648 /* store register 'r' in lvalue 'v' */
 649 void store(int r, SValue *sv)
 650 {
 651   SValue v1;
 652   int v, ft, fc, fr, sign;
 653   uint32_t op;
 654
 655   fr = sv->r;
 656   ft = sv->type.t;
 657   fc = sv->c.ul;
 658
 659   if(fc>=0)
 660     sign=0;
 661   else {
 662     sign=1;
 663     fc=-fc;
 664   }
 665
 666   v = fr & VT_VALMASK;
 667   if (fr & VT_LVAL || fr == VT_LOCAL) {
 668     uint32_t base = 0xb;
 669     if(v < VT_CONST) {
 670       base=intr(v);
 671       v=VT_LOCAL;
 672       fc=sign=0;
 673     } else if(v == VT_CONST) {
 674       v1.type.t = ft;
 675       v1.r = fr&~VT_LVAL;
 676       v1.c.ul = sv->c.ul;
 677       v1.sym=sv->sym;
 678       load(base=14, &v1);
 679       fc=sign=0;
 680       v=VT_LOCAL;
 681     }
 682     if(v == VT_LOCAL) {
 683        if(is_float(ft)) {
 684         calcaddr(&base,&fc,&sign,1020,2);
 685 #ifdef TCC_ARM_VFP
 686         op=0xED000A00; /* fsts */
 687         if(!sign)
 688           op|=0x800000;
 689         if ((ft & VT_BTYPE) != VT_FLOAT)
 690           op|=0x100;   /* fsts -> fstd */
 691         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 692 #else
 693         op=0xED000100;
 694         if(!sign)
 695           op|=0x800000;
 696 #if LDOUBLE_SIZE == 8
 697         if ((ft & VT_BTYPE) != VT_FLOAT)
 698           op|=0x8000;
 699 #else
 700         if ((ft & VT_BTYPE) == VT_DOUBLE)
 701           op|=0x8000;
 702         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 703           op|=0x400000;
 704 #endif
 705         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 706 #endif
 707         return;
 708       } else if((ft & VT_BTYPE) == VT_SHORT) {
 709         calcaddr(&base,&fc,&sign,255,0);
 710         op=0xE14000B0;
 711         if(!sign)
 712           op|=0x800000;
 713         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 714       } else {
 715         calcaddr(&base,&fc,&sign,4095,0);
 716         op=0xE5000000;
 717         if(!sign)
 718           op|=0x800000;
 719         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 720           op|=0x400000;
 721         o(op|(intr(r)<<12)|fc|(base<<16));
 722       }
 723       return;
 724     }
 725   }
 726   tcc_error("store unimplemented");
 727 }
 728
 729 static void gadd_sp(int val)
 730 {
 731   stuff_const_harder(0xE28DD000,val);
 732 }
 733
 734 /* 'is_jmp' is '1' if it is a jump */
 735 static void gcall_or_jmp(int is_jmp)
 736 {
 737   int r;
 738   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 739     uint32_t x;
 740     /* constant case */
 741     x=encbranch(ind,ind+vtop->c.ul,0);
 742     if(x) {
 743       if (vtop->r & VT_SYM) {
 744         /* relocation case */
 745         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 746       } else
 747         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 748       o(x|(is_jmp?0xE0000000:0xE1000000));
 749     } else {
 750       if(!is_jmp)
 751         o(0xE28FE004); // add lr,pc,#4
 752       o(0xE51FF004);   // ldr pc,[pc,#-4]
 753       if (vtop->r & VT_SYM)
 754         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 755       o(vtop->c.ul);
 756     }
 757   } else {
 758     /* otherwise, indirect call */
 759     r = gv(RC_INT);
 760     if(!is_jmp)
 761       o(0xE1A0E00F);       // mov lr,pc
 762     o(0xE1A0F000|intr(r)); // mov pc,r
 763   }
 764 }
 765
 766 /* Return whether a structure is an homogeneous float aggregate or not.
 767    The answer is true if all the elements of the structure are of the same
 768    primitive float type and there is less than 4 elements.
 769
 770    type: the type corresponding to the structure to be tested */
 771 static int is_hgen_float_aggr(CType *type)
 772 {
 773   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 774     struct Sym *ref;
 775     int btype, nb_fields = 0;
 776
 777     ref = type->ref->next;
 778     btype = ref->type.t & VT_BTYPE;
 779     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 780       for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 781       return !ref && nb_fields <= 4;
 782     }
 783   }
 784   return 0;
 785 }
 786
 787 struct avail_regs {
 788   signed char avail[3]; /* 3 holes max with only float and double alignments */
 789   int first_hole; /* first available hole */
 790   int last_hole; /* last available hole (none if equal to first_hole) */
 791   int first_free_reg; /* next free register in the sequence, hole excluded */
 792 };
 793
 794 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 795
 796 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 797    param) according to the rules described in the procedure call standard for
 798    the ARM architecture (AAPCS). If found, the registers are assigned to this
 799    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 800    and the parameter is a single float.
 801
 802    avregs: opaque structure to keep track of available VFP co-processor regs
 803    align: alignment contraints for the param, as returned by type_size()
 804    size: size of the parameter, as returned by type_size() */
 805 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 806 {
 807   int first_reg = 0;
 808
 809   if (avregs->first_free_reg == -1)
 810     return -1;
 811   if (align >> 3) { /* double alignment */
 812     first_reg = avregs->first_free_reg;
 813     /* alignment contraint not respected so use next reg and record hole */
 814     if (first_reg & 1)
 815       avregs->avail[avregs->last_hole++] = first_reg++;
 816   } else { /* no special alignment (float or array of float) */
 817     /* if single float and a hole is available, assign the param to it */
 818     if (size == 4 && avregs->first_hole != avregs->last_hole)
 819       return avregs->avail[avregs->first_hole++];
 820     else
 821       first_reg = avregs->first_free_reg;
 822   }
 823   if (first_reg + size / 4 <= 16) {
 824     avregs->first_free_reg = first_reg + size / 4;
 825     return first_reg;
 826   }
 827   avregs->first_free_reg = -1;
 828   return -1;
 829 }
 830
 831 /* Returns whether all params need to be passed in core registers or not.
 832    This is the case for function part of the runtime ABI. */
 833 int floats_in_core_regs(SValue *sval)
 834 {
 835   if (!sval->sym)
 836     return 0;
 837
 838   switch (sval->sym->v) {
 839     case TOK___floatundisf:
 840     case TOK___floatundidf:
 841     case TOK___fixunssfdi:
 842     case TOK___fixunsdfdi:
 843 #ifndef TCC_ARM_VFP
 844     case TOK___fixunsxfdi:
 845 #endif
 846     case TOK___floatdisf:
 847     case TOK___floatdidf:
 848     case TOK___fixsfdi:
 849     case TOK___fixdfdi:
 850       return 1;
 851
 852     default:
 853       return 0;
 854   }
 855 }
 856
 857 /* Return the number of registers needed to return the struct, or 0 if
 858    returning via struct pointer. */
 859 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align) {
 860 #ifdef TCC_ARM_EABI
 861     int size, align;
 862     size = type_size(vt, &align);
 863     if (float_abi == ARM_HARD_FLOAT && !variadic &&
 864         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
 865         *ret_align = 8;
 866         ret->ref = NULL;
 867         ret->t = VT_DOUBLE;
 868         return (size + 7) >> 3;
 869     } else if (size <= 4) {
 870         *ret_align = 4;
 871         ret->ref = NULL;
 872         ret->t = VT_INT;
 873         return 1;
 874     } else
 875         return 0;
 876 #else
 877     return 0;
 878 #endif
 879 }
 880
 881 /* Parameters are classified according to how they are copied to their final
 882    destination for the function call. Because the copying is performed class
 883    after class according to the order in the union below, it is important that
 884    some constraints about the order of the members of this union are respected:
 885    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 886    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 887      VFP_STRUCT_CLASS;
 888    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 889    See the comment for the main loop in copy_params() for the reason. */
 890 enum reg_class {
 891         STACK_CLASS = 0,
 892         CORE_STRUCT_CLASS,
 893         VFP_CLASS,
 894         VFP_STRUCT_CLASS,
 895         CORE_CLASS,
 896         NB_CLASSES
 897 };
 898
 899 struct param_plan {
 900     int start; /* first reg or addr used depending on the class */
 901     int end; /* last reg used or next free addr depending on the class */
 902     SValue *sval; /* pointer to SValue on the value stack */
 903     struct param_plan *prev; /*  previous element in this class */
 904 };
 905
 906 struct plan {
 907     struct param_plan *pplans; /* array of all the param plans */
 908     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
 909 };
 910
 911 #define add_param_plan(plan,pplan,class)                        \
 912     do {                                                        \
 913         pplan.prev = plan->clsplans[class];                     \
 914         plan->pplans[plan ## _nb] = pplan;                      \
 915         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
 916     } while(0)
 917
 918 /* Assign parameters to registers and stack with alignment according to the
 919    rules in the procedure call standard for the ARM architecture (AAPCS).
 920    The overall assignment is recorded in an array of per parameter structures
 921    called parameter plans. The parameter plans are also further organized in a
 922    number of linked lists, one per class of parameter (see the comment for the
 923    definition of union reg_class).
 924
 925    nb_args: number of parameters of the function for which a call is generated
 926    corefloat: whether to pass float via core registers or not
 927    plan: the structure where the overall assignment is recorded
 928    todo: a bitmap that record which core registers hold a parameter
 929
 930    Returns the amount of stack space needed for parameter passing
 931
 932    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 933    is the responsability of the caller to free this array once used (ie not
 934    before copy_params). */
 935 static int assign_regs(int nb_args, int corefloat, struct plan *plan, int *todo)
 936 {
 937   int i, size, align;
 938   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
 939   int plan_nb = 0;
 940   struct param_plan pplan;
 941   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 942
 943   ncrn = nsaa = 0;
 944   *todo = 0;
 945   plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 946   memset(plan->clsplans, 0, sizeof(plan->clsplans));
 947   for(i = nb_args; i-- ;) {
 948     int j, start_vfpreg = 0;
 949     size = type_size(&vtop[-i].type, &align);
 950     switch(vtop[-i].type.t & VT_BTYPE) {
 951       case VT_STRUCT:
 952       case VT_FLOAT:
 953       case VT_DOUBLE:
 954       case VT_LDOUBLE:
 955       if (!corefloat) {
 956         int is_hfa = 0; /* Homogeneous float aggregate */
 957
 958         if (is_float(vtop[-i].type.t)
 959             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
 960           int end_vfpreg;
 961
 962           start_vfpreg = assign_vfpreg(&avregs, align, size);
 963           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
 964           if (start_vfpreg >= 0) {
 965             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
 966             if (is_hfa)
 967               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
 968             else
 969               add_param_plan(plan, pplan, VFP_CLASS);
 970             continue;
 971           } else
 972             break;
 973         }
 974       }
 975       ncrn = (ncrn + (align-1)/4) & -(align/4);
 976       size = (size + 3) & -4;
 977       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
 978         /* The parameter is allocated both in core register and on stack. As
 979          * such, it can be of either class: it would either be the last of
 980          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
 981         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
 982           *todo|=(1<<j);
 983         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
 984         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
 985         ncrn += size/4;
 986         if (ncrn > 4)
 987           nsaa = (ncrn - 4) * 4;
 988       } else {
 989         ncrn = 4;
 990         break;
 991       }
 992       continue;
 993       default:
 994       if (ncrn < 4) {
 995         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
 996
 997         if (is_long) {
 998           ncrn = (ncrn + 1) & -2;
 999           if (ncrn == 4)
1000             break;
1001         }
1002         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1003         ncrn++;
1004         if (is_long)
1005           pplan.end = ncrn++;
1006         add_param_plan(plan, pplan, CORE_CLASS);
1007         continue;
1008       }
1009     }
1010     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1011     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1012     add_param_plan(plan, pplan, STACK_CLASS);
1013     nsaa += size; /* size already rounded up before */
1014   }
1015   return nsaa;
1016 }
1017
1018 #undef add_param_plan
1019
1020 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1021    function call.
1022
1023    nb_args: number of parameters the function take
1024    plan: the overall assignment plan for parameters
1025    todo: a bitmap indicating what core reg will hold a parameter
1026
1027    Returns the number of SValue added by this function on the value stack */
1028 static int copy_params(int nb_args, struct plan *plan, int todo)
1029 {
1030   int size, align, r, i, nb_extra_sval = 0;
1031   struct param_plan *pplan;
1032
1033    /* Several constraints require parameters to be copied in a specific order:
1034       - structures are copied to the stack before being loaded in a reg;
1035       - floats loaded to an odd numbered VFP reg are first copied to the
1036         preceding even numbered VFP reg and then moved to the next VFP reg.
1037
1038       It is thus important that:
1039       - structures assigned to core regs must be copied after parameters
1040         assigned to the stack but before structures assigned to VFP regs because
1041         a structure can lie partly in core registers and partly on the stack;
1042       - parameters assigned to the stack and all structures be copied before
1043         parameters assigned to a core reg since copying a parameter to the stack
1044         require using a core reg;
1045       - parameters assigned to VFP regs be copied before structures assigned to
1046         VFP regs as the copy might use an even numbered VFP reg that already
1047         holds part of a structure. */
1048   for(i = 0; i < NB_CLASSES; i++) {
1049     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1050       vpushv(pplan->sval);
1051       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1052       switch(i) {
1053         case STACK_CLASS:
1054         case CORE_STRUCT_CLASS:
1055         case VFP_STRUCT_CLASS:
1056           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1057             int padding = 0;
1058             size = type_size(&pplan->sval->type, &align);
1059             /* align to stack align size */
1060             size = (size + 3) & ~3;
1061             if (i == STACK_CLASS && pplan->prev)
1062               padding = pplan->start - pplan->prev->end;
1063             size += padding; /* Add padding if any */
1064             /* allocate the necessary size on stack */
1065             gadd_sp(-size);
1066             /* generate structure store */
1067             r = get_reg(RC_INT);
1068             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1069             vset(&vtop->type, r | VT_LVAL, 0);
1070             vswap();
1071             vstore(); /* memcpy to current sp + potential padding */
1072
1073             /* Homogeneous float aggregate are loaded to VFP registers
1074                immediately since there is no way of loading data in multiple
1075                non consecutive VFP registers as what is done for other
1076                structures (see the use of todo). */
1077             if (i == VFP_STRUCT_CLASS) {
1078               int first = pplan->start, nb = pplan->end - first + 1;
1079               /* vpop.32 {pplan->start, ..., pplan->end} */
1080               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1081               /* No need to write the register used to a SValue since VFP regs
1082                  cannot be used for gcall_or_jmp */
1083             }
1084           } else {
1085             if (is_float(pplan->sval->type.t)) {
1086 #ifdef TCC_ARM_VFP
1087               r = vfpr(gv(RC_FLOAT)) << 12;
1088               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1089                 size = 4;
1090               else {
1091                 size = 8;
1092                 r |= 0x101; /* vpush.32 -> vpush.64 */
1093               }
1094               o(0xED2D0A01 + r); /* vpush */
1095 #else
1096               r = fpr(gv(RC_FLOAT)) << 12;
1097               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1098                 size = 4;
1099               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1100                 size = 8;
1101               else
1102                 size = LDOUBLE_SIZE;
1103
1104               if (size == 12)
1105                 r |= 0x400000;
1106               else if(size == 8)
1107                 r|=0x8000;
1108
1109               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1110 #endif
1111             } else {
1112               /* simple type (currently always same size) */
1113               /* XXX: implicit cast ? */
1114               size=4;
1115               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1116                 lexpand_nr();
1117                 size = 8;
1118                 r = gv(RC_INT);
1119                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1120                 vtop--;
1121               }
1122               r = gv(RC_INT);
1123               o(0xE52D0004|(intr(r)<<12)); /* push r */
1124             }
1125             if (i == STACK_CLASS && pplan->prev)
1126               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1127           }
1128           break;
1129
1130         case VFP_CLASS:
1131           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1132           if (pplan->start & 1) { /* Must be in upper part of double register */
1133             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1134             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1135           }
1136           break;
1137
1138         case CORE_CLASS:
1139           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1140             lexpand_nr();
1141             gv(regmask(pplan->end));
1142             pplan->sval->r2 = vtop->r;
1143             vtop--;
1144           }
1145           gv(regmask(pplan->start));
1146           /* Mark register as used so that gcall_or_jmp use another one
1147              (regs >=4 are free as never used to pass parameters) */
1148           pplan->sval->r = vtop->r;
1149           break;
1150       }
1151       vtop--;
1152     }
1153   }
1154
1155   /* Manually free remaining registers since next parameters are loaded
1156    * manually, without the help of gv(int). */
1157   save_regs(nb_args);
1158
1159   if(todo) {
1160     o(0xE8BD0000|todo); /* pop {todo} */
1161     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1162       int r;
1163       pplan->sval->r = pplan->start;
1164       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1165          can occupy more than 2 registers. Thus, we need to push on the value
1166          stack some fake parameter to have on SValue for each registers used
1167          by a structure (r2 is not used). */
1168       for (r = pplan->start + 1; r <= pplan->end; r++) {
1169         if (todo & (1 << r)) {
1170           nb_extra_sval++;
1171           vpushi(0);
1172           vtop->r = r;
1173         }
1174       }
1175     }
1176   }
1177   return nb_extra_sval;
1178 }
1179
1180 /* Generate function call. The function address is pushed first, then
1181    all the parameters in call order. This functions pops all the
1182    parameters and the function address. */
1183 void gfunc_call(int nb_args)
1184 {
1185   int r, args_size;
1186   int variadic, corefloat = 1;
1187   int todo;
1188   struct plan plan;
1189
1190 #ifdef TCC_ARM_EABI
1191   if (float_abi == ARM_HARD_FLOAT) {
1192     variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1193     corefloat = variadic || floats_in_core_regs(&vtop[-nb_args]);
1194   }
1195 #endif
1196   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1197      VT_JMP anywhere except on the top of the stack because it would complicate
1198      the code generator. */
1199   r = vtop->r & VT_VALMASK;
1200   if (r == VT_CMP || (r & ~1) == VT_JMP)
1201     gv(RC_INT);
1202
1203   args_size = assign_regs(nb_args, corefloat, &plan, &todo);
1204
1205 #ifdef TCC_ARM_EABI
1206   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1207     args_size = (args_size + 7) & ~7;
1208     o(0xE24DD004); /* sub sp, sp, #4 */
1209   }
1210 #endif
1211
1212   nb_args += copy_params(nb_args, &plan, todo);
1213   tcc_free(plan.pplans);
1214
1215   /* Move fct SValue on top as required by gcall_or_jmp */
1216   vrotb(nb_args + 1);
1217   gcall_or_jmp(0);
1218   if (args_size)
1219       gadd_sp(args_size); /* pop all parameters passed on the stack */
1220 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1221   if(float_abi == ARM_SOFTFP_FLOAT && corefloat &&
1222      is_float(vtop->type.ref->type.t)) {
1223     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1224       o(0xEE000A10); /*vmov s0, r0 */
1225     } else {
1226       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1227       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1228     }
1229   }
1230 #endif
1231   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1232   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1233 }
1234
1235 /* generate function prolog of type 't' */
1236 void gfunc_prolog(CType *func_type)
1237 {
1238   Sym *sym,*sym2;
1239   int n, nf, size, align, struct_ret = 0;
1240   int addr, pn, sn; /* pn=core, sn=stack */
1241   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1242   CType ret_type;
1243
1244   sym = func_type->ref;
1245   func_vt = sym->type;
1246   func_var = (func_type->ref->c == FUNC_ELLIPSIS);
1247
1248   n = nf = 0;
1249   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1250       !gfunc_sret(&func_vt, func_var, &ret_type, &align))
1251   {
1252     n++;
1253     struct_ret = 1;
1254     func_vc = 12; /* Offset from fp of the place to store the result */
1255   }
1256   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1257     size = type_size(&sym2->type, &align);
1258 #ifdef TCC_ARM_EABI
1259     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1260         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1261       int tmpnf = assign_vfpreg(&avregs, align, size);
1262       tmpnf += (size + 3) / 4;
1263       nf = (tmpnf > nf) ? tmpnf : nf;
1264     } else
1265 #endif
1266     if (n < 4)
1267       n += (size + 3) / 4;
1268   }
1269   o(0xE1A0C00D); /* mov ip,sp */
1270   if (func_var)
1271     n=4;
1272   if (n) {
1273     if(n>4)
1274       n=4;
1275 #ifdef TCC_ARM_EABI
1276     n=(n+1)&-2;
1277 #endif
1278     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1279   }
1280   if (nf) {
1281     if (nf>16)
1282       nf=16;
1283     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1284     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1285   }
1286   o(0xE92D5800); /* save fp, ip, lr */
1287   o(0xE1A0B00D); /* mov fp, sp */
1288   func_sub_sp_offset = ind;
1289   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1290
1291 #ifdef TCC_ARM_EABI
1292   if (float_abi == ARM_HARD_FLOAT) {
1293     func_vc += nf * 4;
1294     avregs = AVAIL_REGS_INITIALIZER;
1295   }
1296 #endif
1297   pn = struct_ret, sn = 0;
1298   while ((sym = sym->next)) {
1299     CType *type;
1300     type = &sym->type;
1301     size = type_size(type, &align);
1302     size = (size + 3) >> 2;
1303     align = (align + 3) & ~3;
1304 #ifdef TCC_ARM_EABI
1305     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1306         || is_hgen_float_aggr(&sym->type))) {
1307       int fpn = assign_vfpreg(&avregs, align, size << 2);
1308       if (fpn >= 0)
1309         addr = fpn * 4;
1310       else
1311         goto from_stack;
1312     } else
1313 #endif
1314     if (pn < 4) {
1315 #ifdef TCC_ARM_EABI
1316         pn = (pn + (align-1)/4) & -(align/4);
1317 #endif
1318       addr = (nf + pn) * 4;
1319       pn += size;
1320       if (!sn && pn > 4)
1321         sn = (pn - 4);
1322     } else {
1323 from_stack:
1324 #ifdef TCC_ARM_EABI
1325         sn = (sn + (align-1)/4) & -(align/4);
1326 #endif
1327       addr = (n + nf + sn) * 4;
1328       sn += size;
1329     }
1330     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1331              addr + 12);
1332   }
1333   last_itod_magic=0;
1334   leaffunc = 1;
1335   loc = 0;
1336 }
1337
1338 /* generate function epilog */
1339 void gfunc_epilog(void)
1340 {
1341   uint32_t x;
1342   int diff;
1343   /* Copy float return value to core register if base standard is used and
1344      float computation is made with VFP */
1345 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1346   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1347     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1348       o(0xEE100A10); /* fmrs r0, s0 */
1349     else {
1350       o(0xEE100B10); /* fmrdl r0, d0 */
1351       o(0xEE301B10); /* fmrdh r1, d0 */
1352     }
1353   }
1354 #endif
1355   o(0xE89BA800); /* restore fp, sp, pc */
1356   diff = (-loc + 3) & -4;
1357 #ifdef TCC_ARM_EABI
1358   if(!leaffunc)
1359     diff = ((diff + 11) & -8) - 4;
1360 #endif
1361   if(diff > 0) {
1362     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1363     if(x)
1364       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1365     else {
1366       int addr;
1367       addr=ind;
1368       o(0xE59FC004); /* ldr ip,[pc+4] */
1369       o(0xE04BD00C); /* sub sp,fp,ip  */
1370       o(0xE1A0F00E); /* mov pc,lr */
1371       o(diff);
1372       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1373     }
1374   }
1375 }
1376
1377 /* generate a jump to a label */
1378 int gjmp(int t)
1379 {
1380   int r;
1381   r=ind;
1382   o(0xE0000000|encbranch(r,t,1));
1383   return r;
1384 }
1385
1386 /* generate a jump to a fixed address */
1387 void gjmp_addr(int a)
1388 {
1389   gjmp(a);
1390 }
1391
1392 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1393 int gtst(int inv, int t)
1394 {
1395   int v, r;
1396   uint32_t op;
1397   v = vtop->r & VT_VALMASK;
1398   r=ind;
1399   if (v == VT_CMP) {
1400     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1401     op|=encbranch(r,t,1);
1402     o(op);
1403     t=r;
1404   } else { /* VT_JMP || VT_JMPI */
1405     if ((v & 1) == inv) {
1406       if(!vtop->c.i)
1407         vtop->c.i=t;
1408       else {
1409         uint32_t *x;
1410         int p,lp;
1411         if(t) {
1412           p = vtop->c.i;
1413           do {
1414             p = decbranch(lp=p);
1415           } while(p);
1416           x = (uint32_t *)(cur_text_section->data + lp);
1417           *x &= 0xff000000;
1418           *x |= encbranch(lp,t,1);
1419         }
1420         t = vtop->c.i;
1421       }
1422     } else {
1423       t = gjmp(t);
1424       gsym(vtop->c.i);
1425     }
1426   }
1427   vtop--;
1428   return t;
1429 }
1430
1431 /* generate an integer binary operation */
1432 void gen_opi(int op)
1433 {
1434   int c, func = 0;
1435   uint32_t opc = 0, r, fr;
1436   unsigned short retreg = REG_IRET;
1437
1438   c=0;
1439   switch(op) {
1440     case '+':
1441       opc = 0x8;
1442       c=1;
1443       break;
1444     case TOK_ADDC1: /* add with carry generation */
1445       opc = 0x9;
1446       c=1;
1447       break;
1448     case '-':
1449       opc = 0x4;
1450       c=1;
1451       break;
1452     case TOK_SUBC1: /* sub with carry generation */
1453       opc = 0x5;
1454       c=1;
1455       break;
1456     case TOK_ADDC2: /* add with carry use */
1457       opc = 0xA;
1458       c=1;
1459       break;
1460     case TOK_SUBC2: /* sub with carry use */
1461       opc = 0xC;
1462       c=1;
1463       break;
1464     case '&':
1465       opc = 0x0;
1466       c=1;
1467       break;
1468     case '^':
1469       opc = 0x2;
1470       c=1;
1471       break;
1472     case '|':
1473       opc = 0x18;
1474       c=1;
1475       break;
1476     case '*':
1477       gv2(RC_INT, RC_INT);
1478       r = vtop[-1].r;
1479       fr = vtop[0].r;
1480       vtop--;
1481       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1482       return;
1483     case TOK_SHL:
1484       opc = 0;
1485       c=2;
1486       break;
1487     case TOK_SHR:
1488       opc = 1;
1489       c=2;
1490       break;
1491     case TOK_SAR:
1492       opc = 2;
1493       c=2;
1494       break;
1495     case '/':
1496     case TOK_PDIV:
1497       func=TOK___divsi3;
1498       c=3;
1499       break;
1500     case TOK_UDIV:
1501       func=TOK___udivsi3;
1502       c=3;
1503       break;
1504     case '%':
1505 #ifdef TCC_ARM_EABI
1506       func=TOK___aeabi_idivmod;
1507       retreg=REG_LRET;
1508 #else
1509       func=TOK___modsi3;
1510 #endif
1511       c=3;
1512       break;
1513     case TOK_UMOD:
1514 #ifdef TCC_ARM_EABI
1515       func=TOK___aeabi_uidivmod;
1516       retreg=REG_LRET;
1517 #else
1518       func=TOK___umodsi3;
1519 #endif
1520       c=3;
1521       break;
1522     case TOK_UMULL:
1523       gv2(RC_INT, RC_INT);
1524       r=intr(vtop[-1].r2=get_reg(RC_INT));
1525       c=vtop[-1].r;
1526       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1527       vtop--;
1528       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1529       return;
1530     default:
1531       opc = 0x15;
1532       c=1;
1533       break;
1534   }
1535   switch(c) {
1536     case 1:
1537       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1538         if(opc == 4 || opc == 5 || opc == 0xc) {
1539           vswap();
1540           opc|=2; // sub -> rsb
1541         }
1542       }
1543       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1544           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1545         gv(RC_INT);
1546       vswap();
1547       c=intr(gv(RC_INT));
1548       vswap();
1549       opc=0xE0000000|(opc<<20)|(c<<16);
1550       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1551         uint32_t x;
1552         x=stuff_const(opc|0x2000000,vtop->c.i);
1553         if(x) {
1554           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1555           o(x|(r<<12));
1556           goto done;
1557         }
1558       }
1559       fr=intr(gv(RC_INT));
1560       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1561       o(opc|(r<<12)|fr);
1562 done:
1563       vtop--;
1564       if (op >= TOK_ULT && op <= TOK_GT) {
1565         vtop->r = VT_CMP;
1566         vtop->c.i = op;
1567       }
1568       break;
1569     case 2:
1570       opc=0xE1A00000|(opc<<5);
1571       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1572           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1573         gv(RC_INT);
1574       vswap();
1575       r=intr(gv(RC_INT));
1576       vswap();
1577       opc|=r;
1578       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1579         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1580         c = vtop->c.i & 0x1f;
1581         o(opc|(c<<7)|(fr<<12));
1582       } else {
1583         fr=intr(gv(RC_INT));
1584         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1585         o(opc|(c<<12)|(fr<<8)|0x10);
1586       }
1587       vtop--;
1588       break;
1589     case 3:
1590       vpush_global_sym(&func_old_type, func);
1591       vrott(3);
1592       gfunc_call(2);
1593       vpushi(0);
1594       vtop->r = retreg;
1595       break;
1596     default:
1597       tcc_error("gen_opi %i unimplemented!",op);
1598   }
1599 }
1600
1601 #ifdef TCC_ARM_VFP
1602 static int is_zero(int i)
1603 {
1604   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1605     return 0;
1606   if (vtop[i].type.t == VT_FLOAT)
1607     return (vtop[i].c.f == 0.f);
1608   else if (vtop[i].type.t == VT_DOUBLE)
1609     return (vtop[i].c.d == 0.0);
1610   return (vtop[i].c.ld == 0.l);
1611 }
1612
1613 /* generate a floating point operation 'v = t1 op t2' instruction. The
1614  *    two operands are guaranted to have the same floating point type */
1615 void gen_opf(int op)
1616 {
1617   uint32_t x;
1618   int fneg=0,r;
1619   x=0xEE000A00|T2CPR(vtop->type.t);
1620   switch(op) {
1621     case '+':
1622       if(is_zero(-1))
1623         vswap();
1624       if(is_zero(0)) {
1625         vtop--;
1626         return;
1627       }
1628       x|=0x300000;
1629       break;
1630     case '-':
1631       x|=0x300040;
1632       if(is_zero(0)) {
1633         vtop--;
1634         return;
1635       }
1636       if(is_zero(-1)) {
1637         x|=0x810000; /* fsubX -> fnegX */
1638         vswap();
1639         vtop--;
1640         fneg=1;
1641       }
1642       break;
1643     case '*':
1644       x|=0x200000;
1645       break;
1646     case '/':
1647       x|=0x800000;
1648       break;
1649     default:
1650       if(op < TOK_ULT || op > TOK_GT) {
1651         tcc_error("unknown fp op %x!",op);
1652         return;
1653       }
1654       if(is_zero(-1)) {
1655         vswap();
1656         switch(op) {
1657           case TOK_LT: op=TOK_GT; break;
1658           case TOK_GE: op=TOK_ULE; break;
1659           case TOK_LE: op=TOK_GE; break;
1660           case TOK_GT: op=TOK_ULT; break;
1661         }
1662       }
1663       x|=0xB40040; /* fcmpX */
1664       if(op!=TOK_EQ && op!=TOK_NE)
1665         x|=0x80; /* fcmpX -> fcmpeX */
1666       if(is_zero(0)) {
1667         vtop--;
1668         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1669       } else {
1670         x|=vfpr(gv(RC_FLOAT));
1671         vswap();
1672         o(x|(vfpr(gv(RC_FLOAT))<<12));
1673         vtop--;
1674       }
1675       o(0xEEF1FA10); /* fmstat */
1676
1677       switch(op) {
1678         case TOK_LE: op=TOK_ULE; break;
1679         case TOK_LT: op=TOK_ULT; break;
1680         case TOK_UGE: op=TOK_GE; break;
1681         case TOK_UGT: op=TOK_GT; break;
1682       }
1683
1684       vtop->r = VT_CMP;
1685       vtop->c.i = op;
1686       return;
1687   }
1688   r=gv(RC_FLOAT);
1689   x|=vfpr(r);
1690   r=regmask(r);
1691   if(!fneg) {
1692     int r2;
1693     vswap();
1694     r2=gv(RC_FLOAT);
1695     x|=vfpr(r2)<<16;
1696     r|=regmask(r2);
1697   }
1698   vtop->r=get_reg_ex(RC_FLOAT,r);
1699   if(!fneg)
1700     vtop--;
1701   o(x|(vfpr(vtop->r)<<12));
1702 }
1703
1704 #else
1705 static uint32_t is_fconst()
1706 {
1707   long double f;
1708   uint32_t r;
1709   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1710     return 0;
1711   if (vtop->type.t == VT_FLOAT)
1712     f = vtop->c.f;
1713   else if (vtop->type.t == VT_DOUBLE)
1714     f = vtop->c.d;
1715   else
1716     f = vtop->c.ld;
1717   if(!ieee_finite(f))
1718     return 0;
1719   r=0x8;
1720   if(f<0.0) {
1721     r=0x18;
1722     f=-f;
1723   }
1724   if(f==0.0)
1725     return r;
1726   if(f==1.0)
1727     return r|1;
1728   if(f==2.0)
1729     return r|2;
1730   if(f==3.0)
1731     return r|3;
1732   if(f==4.0)
1733     return r|4;
1734   if(f==5.0)
1735     return r|5;
1736   if(f==0.5)
1737     return r|6;
1738   if(f==10.0)
1739     return r|7;
1740   return 0;
1741 }
1742
1743 /* generate a floating point operation 'v = t1 op t2' instruction. The
1744    two operands are guaranted to have the same floating point type */
1745 void gen_opf(int op)
1746 {
1747   uint32_t x, r, r2, c1, c2;
1748   //fputs("gen_opf\n",stderr);
1749   vswap();
1750   c1 = is_fconst();
1751   vswap();
1752   c2 = is_fconst();
1753   x=0xEE000100;
1754 #if LDOUBLE_SIZE == 8
1755   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1756     x|=0x80;
1757 #else
1758   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1759     x|=0x80;
1760   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1761     x|=0x80000;
1762 #endif
1763   switch(op)
1764   {
1765     case '+':
1766       if(!c2) {
1767         vswap();
1768         c2=c1;
1769       }
1770       vswap();
1771       r=fpr(gv(RC_FLOAT));
1772       vswap();
1773       if(c2) {
1774         if(c2>0xf)
1775           x|=0x200000; // suf
1776         r2=c2&0xf;
1777       } else {
1778         r2=fpr(gv(RC_FLOAT));
1779       }
1780       break;
1781     case '-':
1782       if(c2) {
1783         if(c2<=0xf)
1784           x|=0x200000; // suf
1785         r2=c2&0xf;
1786         vswap();
1787         r=fpr(gv(RC_FLOAT));
1788         vswap();
1789       } else if(c1 && c1<=0xf) {
1790         x|=0x300000; // rsf
1791         r2=c1;
1792         r=fpr(gv(RC_FLOAT));
1793         vswap();
1794       } else {
1795         x|=0x200000; // suf
1796         vswap();
1797         r=fpr(gv(RC_FLOAT));
1798         vswap();
1799         r2=fpr(gv(RC_FLOAT));
1800       }
1801       break;
1802     case '*':
1803       if(!c2 || c2>0xf) {
1804         vswap();
1805         c2=c1;
1806       }
1807       vswap();
1808       r=fpr(gv(RC_FLOAT));
1809       vswap();
1810       if(c2 && c2<=0xf)
1811         r2=c2;
1812       else
1813         r2=fpr(gv(RC_FLOAT));
1814       x|=0x100000; // muf
1815       break;
1816     case '/':
1817       if(c2 && c2<=0xf) {
1818         x|=0x400000; // dvf
1819         r2=c2;
1820         vswap();
1821         r=fpr(gv(RC_FLOAT));
1822         vswap();
1823       } else if(c1 && c1<=0xf) {
1824         x|=0x500000; // rdf
1825         r2=c1;
1826         r=fpr(gv(RC_FLOAT));
1827         vswap();
1828       } else {
1829         x|=0x400000; // dvf
1830         vswap();
1831         r=fpr(gv(RC_FLOAT));
1832         vswap();
1833         r2=fpr(gv(RC_FLOAT));
1834       }
1835       break;
1836     default:
1837       if(op >= TOK_ULT && op <= TOK_GT) {
1838         x|=0xd0f110; // cmfe
1839 /* bug (intention?) in Linux FPU emulator
1840    doesn't set carry if equal */
1841         switch(op) {
1842           case TOK_ULT:
1843           case TOK_UGE:
1844           case TOK_ULE:
1845           case TOK_UGT:
1846             tcc_error("unsigned comparision on floats?");
1847             break;
1848           case TOK_LT:
1849             op=TOK_Nset;
1850             break;
1851           case TOK_LE:
1852             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1853             break;
1854           case TOK_EQ:
1855           case TOK_NE:
1856             x&=~0x400000; // cmfe -> cmf
1857             break;
1858         }
1859         if(c1 && !c2) {
1860           c2=c1;
1861           vswap();
1862           switch(op) {
1863             case TOK_Nset:
1864               op=TOK_GT;
1865               break;
1866             case TOK_GE:
1867               op=TOK_ULE;
1868               break;
1869             case TOK_ULE:
1870               op=TOK_GE;
1871               break;
1872             case TOK_GT:
1873               op=TOK_Nset;
1874               break;
1875           }
1876         }
1877         vswap();
1878         r=fpr(gv(RC_FLOAT));
1879         vswap();
1880         if(c2) {
1881           if(c2>0xf)
1882             x|=0x200000;
1883           r2=c2&0xf;
1884         } else {
1885           r2=fpr(gv(RC_FLOAT));
1886         }
1887         vtop[-1].r = VT_CMP;
1888         vtop[-1].c.i = op;
1889       } else {
1890         tcc_error("unknown fp op %x!",op);
1891         return;
1892       }
1893   }
1894   if(vtop[-1].r == VT_CMP)
1895     c1=15;
1896   else {
1897     c1=vtop->r;
1898     if(r2&0x8)
1899       c1=vtop[-1].r;
1900     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1901     c1=fpr(vtop[-1].r);
1902   }
1903   vtop--;
1904   o(x|(r<<16)|(c1<<12)|r2);
1905 }
1906 #endif
1907
1908 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1909    and 'long long' cases. */
1910 ST_FUNC void gen_cvt_itof1(int t)
1911 {
1912   uint32_t r, r2;
1913   int bt;
1914   bt=vtop->type.t & VT_BTYPE;
1915   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1916 #ifndef TCC_ARM_VFP
1917     uint32_t dsize = 0;
1918 #endif
1919     r=intr(gv(RC_INT));
1920 #ifdef TCC_ARM_VFP
1921     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1922     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1923     r2|=r2<<12;
1924     if(!(vtop->type.t & VT_UNSIGNED))
1925       r2|=0x80;                /* fuitoX -> fsituX */
1926     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1927 #else
1928     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1929     if((t & VT_BTYPE) != VT_FLOAT)
1930       dsize=0x80;    /* flts -> fltd */
1931     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1932     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1933       uint32_t off = 0;
1934       o(0xE3500000|(r<<12));        /* cmp */
1935       r=fpr(get_reg(RC_FLOAT));
1936       if(last_itod_magic) {
1937         off=ind+8-last_itod_magic;
1938         off/=4;
1939         if(off>255)
1940           off=0;
1941       }
1942       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1943       if(!off) {
1944         o(0xEA000000);              /* b */
1945         last_itod_magic=ind;
1946         o(0x4F800000);              /* 4294967296.0f */
1947       }
1948       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1949     }
1950 #endif
1951     return;
1952   } else if(bt == VT_LLONG) {
1953     int func;
1954     CType *func_type = 0;
1955     if((t & VT_BTYPE) == VT_FLOAT) {
1956       func_type = &func_float_type;
1957       if(vtop->type.t & VT_UNSIGNED)
1958         func=TOK___floatundisf;
1959       else
1960         func=TOK___floatdisf;
1961 #if LDOUBLE_SIZE != 8
1962     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1963       func_type = &func_ldouble_type;
1964       if(vtop->type.t & VT_UNSIGNED)
1965         func=TOK___floatundixf;
1966       else
1967         func=TOK___floatdixf;
1968     } else if((t & VT_BTYPE) == VT_DOUBLE) {
1969 #else
1970     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1971 #endif
1972       func_type = &func_double_type;
1973       if(vtop->type.t & VT_UNSIGNED)
1974         func=TOK___floatundidf;
1975       else
1976         func=TOK___floatdidf;
1977     }
1978     if(func_type) {
1979       vpush_global_sym(func_type, func);
1980       vswap();
1981       gfunc_call(1);
1982       vpushi(0);
1983       vtop->r=TREG_F0;
1984       return;
1985     }
1986   }
1987   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1988 }
1989
1990 /* convert fp to int 't' type */
1991 void gen_cvt_ftoi(int t)
1992 {
1993   uint32_t r, r2;
1994   int u, func = 0;
1995   u=t&VT_UNSIGNED;
1996   t&=VT_BTYPE;
1997   r2=vtop->type.t & VT_BTYPE;
1998   if(t==VT_INT) {
1999 #ifdef TCC_ARM_VFP
2000     r=vfpr(gv(RC_FLOAT));
2001     u=u?0:0x10000;
2002     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2003     r2=intr(vtop->r=get_reg(RC_INT));
2004     o(0xEE100A10|(r<<16)|(r2<<12));
2005     return;
2006 #else
2007     if(u) {
2008       if(r2 == VT_FLOAT)
2009         func=TOK___fixunssfsi;
2010 #if LDOUBLE_SIZE != 8
2011       else if(r2 == VT_LDOUBLE)
2012         func=TOK___fixunsxfsi;
2013       else if(r2 == VT_DOUBLE)
2014 #else
2015       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2016 #endif
2017         func=TOK___fixunsdfsi;
2018     } else {
2019       r=fpr(gv(RC_FLOAT));
2020       r2=intr(vtop->r=get_reg(RC_INT));
2021       o(0xEE100170|(r2<<12)|r);
2022       return;
2023     }
2024 #endif
2025   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2026     if(r2 == VT_FLOAT)
2027       func=TOK___fixsfdi;
2028 #if LDOUBLE_SIZE != 8
2029     else if(r2 == VT_LDOUBLE)
2030       func=TOK___fixxfdi;
2031     else if(r2 == VT_DOUBLE)
2032 #else
2033     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2034 #endif
2035       func=TOK___fixdfdi;
2036   }
2037   if(func) {
2038     vpush_global_sym(&func_old_type, func);
2039     vswap();
2040     gfunc_call(1);
2041     vpushi(0);
2042     if(t == VT_LLONG)
2043       vtop->r2 = REG_LRET;
2044     vtop->r = REG_IRET;
2045     return;
2046   }
2047   tcc_error("unimplemented gen_cvt_ftoi!");
2048 }
2049
2050 /* convert from one floating point type to another */
2051 void gen_cvt_ftof(int t)
2052 {
2053 #ifdef TCC_ARM_VFP
2054   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2055     uint32_t r = vfpr(gv(RC_FLOAT));
2056     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2057   }
2058 #else
2059   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2060   gv(RC_FLOAT);
2061 #endif
2062 }
2063
2064 /* computed goto support */
2065 void ggoto(void)
2066 {
2067   gcall_or_jmp(1);
2068   vtop--;
2069 }
2070
2071 /* Save the stack pointer onto the stack and return the location of its address */
2072 ST_FUNC void gen_vla_sp_save(int addr) {
2073     tcc_error("variable length arrays unsupported for this target");
2074 }
2075
2076 /* Restore the SP from a location on the stack */
2077 ST_FUNC void gen_vla_sp_restore(int addr) {
2078     tcc_error("variable length arrays unsupported for this target");
2079 }
2080
2081 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2082 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2083     tcc_error("variable length arrays unsupported for this target");
2084 }
2085
2086 /* end of ARM code generator */
2087 /*************************************************************/
2088 #endif
2089 /*************************************************************/