arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_ARM_VERSION
  38 # define TCC_ARM_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_LRET    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82 };
  83
  84 #ifdef TCC_ARM_VFP
  85 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  86 #endif
  87
  88 /* return registers for function */
  89 #define REG_IRET TREG_R0 /* single word int return register */
  90 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  91 #define REG_FRET TREG_F0 /* float return register */
  92
  93 #ifdef TCC_ARM_EABI
  94 #define TOK___divdi3 TOK___aeabi_ldivmod
  95 #define TOK___moddi3 TOK___aeabi_ldivmod
  96 #define TOK___udivdi3 TOK___aeabi_uldivmod
  97 #define TOK___umoddi3 TOK___aeabi_uldivmod
  98 #endif
  99
 100 /* defined if function parameters must be evaluated in reverse order */
 101 #define INVERT_FUNC_PARAMS
 102
 103 /* defined if structures are passed as pointers. Otherwise structures
 104    are directly pushed on stack. */
 105 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 106
 107 /* pointer size, in bytes */
 108 #define PTR_SIZE 4
 109
 110 /* long double size and alignment, in bytes */
 111 #ifdef TCC_ARM_VFP
 112 #define LDOUBLE_SIZE  8
 113 #endif
 114
 115 #ifndef LDOUBLE_SIZE
 116 #define LDOUBLE_SIZE  8
 117 #endif
 118
 119 #ifdef TCC_ARM_EABI
 120 #define LDOUBLE_ALIGN 8
 121 #else
 122 #define LDOUBLE_ALIGN 4
 123 #endif
 124
 125 /* maximum alignment (for aligned attribute support) */
 126 #define MAX_ALIGN     8
 127
 128 #define CHAR_IS_UNSIGNED
 129
 130 /******************************************************/
 131 /* ELF defines */
 132
 133 #define EM_TCC_TARGET EM_ARM
 134
 135 /* relocation type for 32 bit data relocation */
 136 #define R_DATA_32   R_ARM_ABS32
 137 #define R_DATA_PTR  R_ARM_ABS32
 138 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 139 #define R_COPY      R_ARM_COPY
 140
 141 #define ELF_START_ADDR 0x00008000
 142 #define ELF_PAGE_SIZE  0x1000
 143
 144 enum float_abi {
 145     ARM_SOFTFP_FLOAT,
 146     ARM_HARD_FLOAT,
 147 };
 148
 149 enum float_abi float_abi;
 150
 151 /******************************************************/
 152 #else /* ! TARGET_DEFS_ONLY */
 153 /******************************************************/
 154 #include "tcc.h"
 155
 156 ST_DATA const int reg_classes[NB_REGS] = {
 157     /* r0 */ RC_INT | RC_R0,
 158     /* r1 */ RC_INT | RC_R1,
 159     /* r2 */ RC_INT | RC_R2,
 160     /* r3 */ RC_INT | RC_R3,
 161     /* r12 */ RC_INT | RC_R12,
 162     /* f0 */ RC_FLOAT | RC_F0,
 163     /* f1 */ RC_FLOAT | RC_F1,
 164     /* f2 */ RC_FLOAT | RC_F2,
 165     /* f3 */ RC_FLOAT | RC_F3,
 166 #ifdef TCC_ARM_VFP
 167  /* d4/s8 */ RC_FLOAT | RC_F4,
 168 /* d5/s10 */ RC_FLOAT | RC_F5,
 169 /* d6/s12 */ RC_FLOAT | RC_F6,
 170 /* d7/s14 */ RC_FLOAT | RC_F7,
 171 #endif
 172 };
 173
 174 static int func_sub_sp_offset, last_itod_magic;
 175 static int leaffunc;
 176
 177 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 178 static CType float_type, double_type, func_float_type, func_double_type;
 179 ST_FUNC void arm_init(struct TCCState *s)
 180 {
 181     float_type.t = VT_FLOAT;
 182     double_type.t = VT_DOUBLE;
 183     func_float_type.t = VT_FUNC;
 184     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 185     func_double_type.t = VT_FUNC;
 186     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 187
 188     float_abi = s->float_abi;
 189 }
 190 #else
 191 #define func_float_type func_old_type
 192 #define func_double_type func_old_type
 193 #define func_ldouble_type func_old_type
 194 ST_FUNC void arm_init(struct TCCState *s) {}
 195 #endif
 196
 197 static int two2mask(int a,int b) {
 198   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 199 }
 200
 201 static int regmask(int r) {
 202   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 203 }
 204
 205 /******************************************************/
 206
 207 #ifdef TCC_ARM_EABI
 208 char *default_elfinterp(struct TCCState *s)
 209 {
 210     if (s->float_abi == ARM_HARD_FLOAT)
 211         return "/lib/ld-linux-armhf.so.3";
 212     else
 213         return "/lib/ld-linux.so.3";
 214 }
 215 #endif
 216
 217 void o(uint32_t i)
 218 {
 219   /* this is a good place to start adding big-endian support*/
 220   int ind1;
 221
 222   ind1 = ind + 4;
 223   if (!cur_text_section)
 224     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 225          "can't evaluate constant expressions outside of a function.");
 226   if (ind1 > cur_text_section->data_allocated)
 227     section_realloc(cur_text_section, ind1);
 228   cur_text_section->data[ind++] = i&255;
 229   i>>=8;
 230   cur_text_section->data[ind++] = i&255;
 231   i>>=8;
 232   cur_text_section->data[ind++] = i&255;
 233   i>>=8;
 234   cur_text_section->data[ind++] = i;
 235 }
 236
 237 static uint32_t stuff_const(uint32_t op, uint32_t c)
 238 {
 239   int try_neg=0;
 240   uint32_t nc = 0, negop = 0;
 241
 242   switch(op&0x1F00000)
 243   {
 244     case 0x800000: //add
 245     case 0x400000: //sub
 246       try_neg=1;
 247       negop=op^0xC00000;
 248       nc=-c;
 249       break;
 250     case 0x1A00000: //mov
 251     case 0x1E00000: //mvn
 252       try_neg=1;
 253       negop=op^0x400000;
 254       nc=~c;
 255       break;
 256     case 0x200000: //xor
 257       if(c==~0)
 258         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 259       break;
 260     case 0x0: //and
 261       if(c==~0)
 262         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 263     case 0x1C00000: //bic
 264       try_neg=1;
 265       negop=op^0x1C00000;
 266       nc=~c;
 267       break;
 268     case 0x1800000: //orr
 269       if(c==~0)
 270         return (op&0xFFF0FFFF)|0x1E00000;
 271       break;
 272   }
 273   do {
 274     uint32_t m;
 275     int i;
 276     if(c<256) /* catch undefined <<32 */
 277       return op|c;
 278     for(i=2;i<32;i+=2) {
 279       m=(0xff>>i)|(0xff<<(32-i));
 280       if(!(c&~m))
 281         return op|(i<<7)|(c<<i)|(c>>(32-i));
 282     }
 283     op=negop;
 284     c=nc;
 285   } while(try_neg--);
 286   return 0;
 287 }
 288
 289
 290 //only add,sub
 291 void stuff_const_harder(uint32_t op, uint32_t v) {
 292   uint32_t x;
 293   x=stuff_const(op,v);
 294   if(x)
 295     o(x);
 296   else {
 297     uint32_t a[16], nv, no, o2, n2;
 298     int i,j,k;
 299     a[0]=0xff;
 300     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 301     for(i=1;i<16;i++)
 302       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 303     for(i=0;i<12;i++)
 304       for(j=i<4?i+12:15;j>=i+4;j--)
 305         if((v&(a[i]|a[j]))==v) {
 306           o(stuff_const(op,v&a[i]));
 307           o(stuff_const(o2,v&a[j]));
 308           return;
 309         }
 310     no=op^0xC00000;
 311     n2=o2^0xC00000;
 312     nv=-v;
 313     for(i=0;i<12;i++)
 314       for(j=i<4?i+12:15;j>=i+4;j--)
 315         if((nv&(a[i]|a[j]))==nv) {
 316           o(stuff_const(no,nv&a[i]));
 317           o(stuff_const(n2,nv&a[j]));
 318           return;
 319         }
 320     for(i=0;i<8;i++)
 321       for(j=i+4;j<12;j++)
 322         for(k=i<4?i+12:15;k>=j+4;k--)
 323           if((v&(a[i]|a[j]|a[k]))==v) {
 324             o(stuff_const(op,v&a[i]));
 325             o(stuff_const(o2,v&a[j]));
 326             o(stuff_const(o2,v&a[k]));
 327             return;
 328           }
 329     no=op^0xC00000;
 330     nv=-v;
 331     for(i=0;i<8;i++)
 332       for(j=i+4;j<12;j++)
 333         for(k=i<4?i+12:15;k>=j+4;k--)
 334           if((nv&(a[i]|a[j]|a[k]))==nv) {
 335             o(stuff_const(no,nv&a[i]));
 336             o(stuff_const(n2,nv&a[j]));
 337             o(stuff_const(n2,nv&a[k]));
 338             return;
 339           }
 340     o(stuff_const(op,v&a[0]));
 341     o(stuff_const(o2,v&a[4]));
 342     o(stuff_const(o2,v&a[8]));
 343     o(stuff_const(o2,v&a[12]));
 344   }
 345 }
 346
 347 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 348 {
 349   addr-=pos+8;
 350   addr/=4;
 351   if(addr>=0x1000000 || addr<-0x1000000) {
 352     if(fail)
 353       tcc_error("FIXME: function bigger than 32MB");
 354     return 0;
 355   }
 356   return 0x0A000000|(addr&0xffffff);
 357 }
 358
 359 int decbranch(int pos)
 360 {
 361   int x;
 362   x=*(uint32_t *)(cur_text_section->data + pos);
 363   x&=0x00ffffff;
 364   if(x&0x800000)
 365     x-=0x1000000;
 366   return x*4+pos+8;
 367 }
 368
 369 /* output a symbol and patch all calls to it */
 370 void gsym_addr(int t, int a)
 371 {
 372   uint32_t *x;
 373   int lt;
 374   while(t) {
 375     x=(uint32_t *)(cur_text_section->data + t);
 376     t=decbranch(lt=t);
 377     if(a==lt+4)
 378       *x=0xE1A00000; // nop
 379     else {
 380       *x &= 0xff000000;
 381       *x |= encbranch(lt,a,1);
 382     }
 383   }
 384 }
 385
 386 void gsym(int t)
 387 {
 388   gsym_addr(t, ind);
 389 }
 390
 391 #ifdef TCC_ARM_VFP
 392 static uint32_t vfpr(int r)
 393 {
 394   if(r<TREG_F0 || r>TREG_F7)
 395     tcc_error("compiler error! register %i is no vfp register",r);
 396   return r-5;
 397 }
 398 #else
 399 static uint32_t fpr(int r)
 400 {
 401   if(r<TREG_F0 || r>TREG_F3)
 402     tcc_error("compiler error! register %i is no fpa register",r);
 403   return r-5;
 404 }
 405 #endif
 406
 407 static uint32_t intr(int r)
 408 {
 409   if(r==4)
 410     return 12;
 411   if((r<0 || r>4) && r!=14)
 412     tcc_error("compiler error! register %i is no int register",r);
 413   return r;
 414 }
 415
 416 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 417 {
 418   if(*off>maxoff || *off&((1<<shift)-1)) {
 419     uint32_t x, y;
 420     x=0xE280E000;
 421     if(*sgn)
 422       x=0xE240E000;
 423     x|=(*base)<<16;
 424     *base=14; // lr
 425     y=stuff_const(x,*off&~maxoff);
 426     if(y) {
 427       o(y);
 428       *off&=maxoff;
 429       return;
 430     }
 431     y=stuff_const(x,(*off+maxoff)&~maxoff);
 432     if(y) {
 433       o(y);
 434       *sgn=!*sgn;
 435       *off=((*off+maxoff)&~maxoff)-*off;
 436       return;
 437     }
 438     stuff_const_harder(x,*off&~maxoff);
 439     *off&=maxoff;
 440   }
 441 }
 442
 443 static uint32_t mapcc(int cc)
 444 {
 445   switch(cc)
 446   {
 447     case TOK_ULT:
 448       return 0x30000000; /* CC/LO */
 449     case TOK_UGE:
 450       return 0x20000000; /* CS/HS */
 451     case TOK_EQ:
 452       return 0x00000000; /* EQ */
 453     case TOK_NE:
 454       return 0x10000000; /* NE */
 455     case TOK_ULE:
 456       return 0x90000000; /* LS */
 457     case TOK_UGT:
 458       return 0x80000000; /* HI */
 459     case TOK_Nset:
 460       return 0x40000000; /* MI */
 461     case TOK_Nclear:
 462       return 0x50000000; /* PL */
 463     case TOK_LT:
 464       return 0xB0000000; /* LT */
 465     case TOK_GE:
 466       return 0xA0000000; /* GE */
 467     case TOK_LE:
 468       return 0xD0000000; /* LE */
 469     case TOK_GT:
 470       return 0xC0000000; /* GT */
 471   }
 472   tcc_error("unexpected condition code");
 473   return 0xE0000000; /* AL */
 474 }
 475
 476 static int negcc(int cc)
 477 {
 478   switch(cc)
 479   {
 480     case TOK_ULT:
 481       return TOK_UGE;
 482     case TOK_UGE:
 483       return TOK_ULT;
 484     case TOK_EQ:
 485       return TOK_NE;
 486     case TOK_NE:
 487       return TOK_EQ;
 488     case TOK_ULE:
 489       return TOK_UGT;
 490     case TOK_UGT:
 491       return TOK_ULE;
 492     case TOK_Nset:
 493       return TOK_Nclear;
 494     case TOK_Nclear:
 495       return TOK_Nset;
 496     case TOK_LT:
 497       return TOK_GE;
 498     case TOK_GE:
 499       return TOK_LT;
 500     case TOK_LE:
 501       return TOK_GT;
 502     case TOK_GT:
 503       return TOK_LE;
 504   }
 505   tcc_error("unexpected condition code");
 506   return TOK_NE;
 507 }
 508
 509 /* load 'r' from value 'sv' */
 510 void load(int r, SValue *sv)
 511 {
 512   int v, ft, fc, fr, sign;
 513   uint32_t op;
 514   SValue v1;
 515
 516   fr = sv->r;
 517   ft = sv->type.t;
 518   fc = sv->c.ul;
 519
 520   if(fc>=0)
 521     sign=0;
 522   else {
 523     sign=1;
 524     fc=-fc;
 525   }
 526
 527   v = fr & VT_VALMASK;
 528   if (fr & VT_LVAL) {
 529     uint32_t base = 0xB; // fp
 530     if(v == VT_LLOCAL) {
 531       v1.type.t = VT_PTR;
 532       v1.r = VT_LOCAL | VT_LVAL;
 533       v1.c.ul = sv->c.ul;
 534       load(base=14 /* lr */, &v1);
 535       fc=sign=0;
 536       v=VT_LOCAL;
 537     } else if(v == VT_CONST) {
 538       v1.type.t = VT_PTR;
 539       v1.r = fr&~VT_LVAL;
 540       v1.c.ul = sv->c.ul;
 541       v1.sym=sv->sym;
 542       load(base=14, &v1);
 543       fc=sign=0;
 544       v=VT_LOCAL;
 545     } else if(v < VT_CONST) {
 546       base=intr(v);
 547       fc=sign=0;
 548       v=VT_LOCAL;
 549     }
 550     if(v == VT_LOCAL) {
 551       if(is_float(ft)) {
 552         calcaddr(&base,&fc,&sign,1020,2);
 553 #ifdef TCC_ARM_VFP
 554         op=0xED100A00; /* flds */
 555         if(!sign)
 556           op|=0x800000;
 557         if ((ft & VT_BTYPE) != VT_FLOAT)
 558           op|=0x100;   /* flds -> fldd */
 559         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 560 #else
 561         op=0xED100100;
 562         if(!sign)
 563           op|=0x800000;
 564 #if LDOUBLE_SIZE == 8
 565         if ((ft & VT_BTYPE) != VT_FLOAT)
 566           op|=0x8000;
 567 #else
 568         if ((ft & VT_BTYPE) == VT_DOUBLE)
 569           op|=0x8000;
 570         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 571           op|=0x400000;
 572 #endif
 573         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 574 #endif
 575       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 576                 || (ft & VT_BTYPE) == VT_SHORT) {
 577         calcaddr(&base,&fc,&sign,255,0);
 578         op=0xE1500090;
 579         if ((ft & VT_BTYPE) == VT_SHORT)
 580           op|=0x20;
 581         if ((ft & VT_UNSIGNED) == 0)
 582           op|=0x40;
 583         if(!sign)
 584           op|=0x800000;
 585         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 586       } else {
 587         calcaddr(&base,&fc,&sign,4095,0);
 588         op=0xE5100000;
 589         if(!sign)
 590           op|=0x800000;
 591         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 592           op|=0x400000;
 593         o(op|(intr(r)<<12)|fc|(base<<16));
 594       }
 595       return;
 596     }
 597   } else {
 598     if (v == VT_CONST) {
 599       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
 600       if (fr & VT_SYM || !op) {
 601         o(0xE59F0000|(intr(r)<<12));
 602         o(0xEA000000);
 603         if(fr & VT_SYM)
 604           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 605         o(sv->c.ul);
 606       } else
 607         o(op);
 608       return;
 609     } else if (v == VT_LOCAL) {
 610       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
 611       if (fr & VT_SYM || !op) {
 612         o(0xE59F0000|(intr(r)<<12));
 613         o(0xEA000000);
 614         if(fr & VT_SYM) // needed ?
 615           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 616         o(sv->c.ul);
 617         o(0xE08B0000|(intr(r)<<12)|intr(r));
 618       } else
 619         o(op);
 620       return;
 621     } else if(v == VT_CMP) {
 622       o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
 623       o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
 624       return;
 625     } else if (v == VT_JMP || v == VT_JMPI) {
 626       int t;
 627       t = v & 1;
 628       o(0xE3A00000|(intr(r)<<12)|t);
 629       o(0xEA000000);
 630       gsym(sv->c.ul);
 631       o(0xE3A00000|(intr(r)<<12)|(t^1));
 632       return;
 633     } else if (v < VT_CONST) {
 634       if(is_float(ft))
 635 #ifdef TCC_ARM_VFP
 636         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 637 #else
 638         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 639 #endif
 640       else
 641         o(0xE1A00000|(intr(r)<<12)|intr(v));
 642       return;
 643     }
 644   }
 645   tcc_error("load unimplemented!");
 646 }
 647
 648 /* store register 'r' in lvalue 'v' */
 649 void store(int r, SValue *sv)
 650 {
 651   SValue v1;
 652   int v, ft, fc, fr, sign;
 653   uint32_t op;
 654
 655   fr = sv->r;
 656   ft = sv->type.t;
 657   fc = sv->c.ul;
 658
 659   if(fc>=0)
 660     sign=0;
 661   else {
 662     sign=1;
 663     fc=-fc;
 664   }
 665
 666   v = fr & VT_VALMASK;
 667   if (fr & VT_LVAL || fr == VT_LOCAL) {
 668     uint32_t base = 0xb;
 669     if(v < VT_CONST) {
 670       base=intr(v);
 671       v=VT_LOCAL;
 672       fc=sign=0;
 673     } else if(v == VT_CONST) {
 674       v1.type.t = ft;
 675       v1.r = fr&~VT_LVAL;
 676       v1.c.ul = sv->c.ul;
 677       v1.sym=sv->sym;
 678       load(base=14, &v1);
 679       fc=sign=0;
 680       v=VT_LOCAL;
 681     }
 682     if(v == VT_LOCAL) {
 683        if(is_float(ft)) {
 684         calcaddr(&base,&fc,&sign,1020,2);
 685 #ifdef TCC_ARM_VFP
 686         op=0xED000A00; /* fsts */
 687         if(!sign)
 688           op|=0x800000;
 689         if ((ft & VT_BTYPE) != VT_FLOAT)
 690           op|=0x100;   /* fsts -> fstd */
 691         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 692 #else
 693         op=0xED000100;
 694         if(!sign)
 695           op|=0x800000;
 696 #if LDOUBLE_SIZE == 8
 697         if ((ft & VT_BTYPE) != VT_FLOAT)
 698           op|=0x8000;
 699 #else
 700         if ((ft & VT_BTYPE) == VT_DOUBLE)
 701           op|=0x8000;
 702         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 703           op|=0x400000;
 704 #endif
 705         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 706 #endif
 707         return;
 708       } else if((ft & VT_BTYPE) == VT_SHORT) {
 709         calcaddr(&base,&fc,&sign,255,0);
 710         op=0xE14000B0;
 711         if(!sign)
 712           op|=0x800000;
 713         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 714       } else {
 715         calcaddr(&base,&fc,&sign,4095,0);
 716         op=0xE5000000;
 717         if(!sign)
 718           op|=0x800000;
 719         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 720           op|=0x400000;
 721         o(op|(intr(r)<<12)|fc|(base<<16));
 722       }
 723       return;
 724     }
 725   }
 726   tcc_error("store unimplemented");
 727 }
 728
 729 static void gadd_sp(int val)
 730 {
 731   stuff_const_harder(0xE28DD000,val);
 732 }
 733
 734 /* 'is_jmp' is '1' if it is a jump */
 735 static void gcall_or_jmp(int is_jmp)
 736 {
 737   int r;
 738   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 739     uint32_t x;
 740     /* constant case */
 741     x=encbranch(ind,ind+vtop->c.ul,0);
 742     if(x) {
 743       if (vtop->r & VT_SYM) {
 744         /* relocation case */
 745         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 746       } else
 747         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 748       o(x|(is_jmp?0xE0000000:0xE1000000));
 749     } else {
 750       if(!is_jmp)
 751         o(0xE28FE004); // add lr,pc,#4
 752       o(0xE51FF004);   // ldr pc,[pc,#-4]
 753       if (vtop->r & VT_SYM)
 754         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 755       o(vtop->c.ul);
 756     }
 757   } else {
 758     /* otherwise, indirect call */
 759     r = gv(RC_INT);
 760     if(!is_jmp)
 761       o(0xE1A0E00F);       // mov lr,pc
 762     o(0xE1A0F000|intr(r)); // mov pc,r
 763   }
 764 }
 765
 766 /* Return whether a structure is an homogeneous float aggregate or not.
 767    The answer is true if all the elements of the structure are of the same
 768    primitive float type and there is less than 4 elements.
 769
 770    type: the type corresponding to the structure to be tested */
 771 static int is_hgen_float_aggr(CType *type)
 772 {
 773   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 774     struct Sym *ref;
 775     int btype, nb_fields = 0;
 776
 777     ref = type->ref->next;
 778     btype = ref->type.t & VT_BTYPE;
 779     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 780       for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 781       return !ref && nb_fields <= 4;
 782     }
 783   }
 784   return 0;
 785 }
 786
 787 struct avail_regs {
 788   signed char avail[3]; /* 3 holes max with only float and double alignments */
 789   int first_hole; /* first available hole */
 790   int last_hole; /* last available hole (none if equal to first_hole) */
 791   int first_free_reg; /* next free register in the sequence, hole excluded */
 792 };
 793
 794 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 795
 796 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 797    param) according to the rules described in the procedure call standard for
 798    the ARM architecture (AAPCS). If found, the registers are assigned to this
 799    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 800    and the parameter is a single float.
 801
 802    avregs: opaque structure to keep track of available VFP co-processor regs
 803    align: alignment contraints for the param, as returned by type_size()
 804    size: size of the parameter, as returned by type_size() */
 805 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 806 {
 807   int first_reg = 0;
 808
 809   if (avregs->first_free_reg == -1)
 810     return -1;
 811   if (align >> 3) { /* double alignment */
 812     first_reg = avregs->first_free_reg;
 813     /* alignment contraint not respected so use next reg and record hole */
 814     if (first_reg & 1)
 815       avregs->avail[avregs->last_hole++] = first_reg++;
 816   } else { /* no special alignment (float or array of float) */
 817     /* if single float and a hole is available, assign the param to it */
 818     if (size == 4 && avregs->first_hole != avregs->last_hole)
 819       return avregs->avail[avregs->first_hole++];
 820     else
 821       first_reg = avregs->first_free_reg;
 822   }
 823   if (first_reg + size / 4 <= 16) {
 824     avregs->first_free_reg = first_reg + size / 4;
 825     return first_reg;
 826   }
 827   avregs->first_free_reg = -1;
 828   return -1;
 829 }
 830
 831 /* Returns whether all params need to be passed in core registers or not.
 832    This is the case for function part of the runtime ABI. */
 833 int floats_in_core_regs(SValue *sval)
 834 {
 835   if (!sval->sym)
 836     return 0;
 837
 838   switch (sval->sym->v) {
 839     case TOK___floatundisf:
 840     case TOK___floatundidf:
 841     case TOK___fixunssfdi:
 842     case TOK___fixunsdfdi:
 843 #ifndef TCC_ARM_VFP
 844     case TOK___fixunsxfdi:
 845 #endif
 846     case TOK___floatdisf:
 847     case TOK___floatdidf:
 848     case TOK___fixsfdi:
 849     case TOK___fixdfdi:
 850       return 1;
 851
 852     default:
 853       return 0;
 854   }
 855 }
 856
 857 /* Return the number of registers needed to return the struct, or 0 if
 858    returning via struct pointer. */
 859 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align) {
 860 #ifdef TCC_ARM_EABI
 861     int size, align;
 862     size = type_size(vt, &align);
 863     if (float_abi == ARM_HARD_FLOAT && !variadic &&
 864         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
 865         *ret_align = 8;
 866         ret->ref = NULL;
 867         ret->t = VT_DOUBLE;
 868         return (size + 7) >> 3;
 869     } else if (size <= 4) {
 870         *ret_align = 4;
 871         ret->ref = NULL;
 872         ret->t = VT_INT;
 873         return 1;
 874     } else
 875         return 0;
 876 #else
 877     return 0;
 878 #endif
 879 }
 880
 881 /* Parameters are classified according to how they are copied to their final
 882    destination for the function call. Because the copying is performed class
 883    after class according to the order in the union below, it is important that
 884    some constraints about the order of the members of this union are respected:
 885    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 886    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 887      VFP_STRUCT_CLASS;
 888    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 889    See the comment for the main loop in copy_params() for the reason. */
 890 enum reg_class {
 891         STACK_CLASS = 0,
 892         CORE_STRUCT_CLASS,
 893         VFP_CLASS,
 894         VFP_STRUCT_CLASS,
 895         CORE_CLASS,
 896         NB_CLASSES
 897 };
 898
 899 struct param_plan {
 900     int start; /* first reg or addr used depending on the class */
 901     int end; /* last reg used or next free addr depending on the class */
 902     SValue *sval; /* pointer to SValue on the value stack */
 903     struct param_plan *prev; /*  previous element in this class */
 904 };
 905
 906 struct plan {
 907     struct param_plan *pplans; /* array of all the param plans */
 908     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
 909 };
 910
 911 #define add_param_plan(plan,pplan,class)                        \
 912     do {                                                        \
 913         pplan.prev = plan->clsplans[class];                     \
 914         plan->pplans[plan ## _nb] = pplan;                      \
 915         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
 916     } while(0)
 917
 918 /* Assign parameters to registers and stack with alignment according to the
 919    rules in the procedure call standard for the ARM architecture (AAPCS).
 920    The overall assignment is recorded in an array of per parameter structures
 921    called parameter plans. The parameter plans are also further organized in a
 922    number of linked lists, one per class of parameter (see the comment for the
 923    definition of union reg_class).
 924
 925    nb_args: number of parameters of the function for which a call is generated
 926    float_abi: float ABI in use for this function call
 927    plan: the structure where the overall assignment is recorded
 928    todo: a bitmap that record which core registers hold a parameter
 929
 930    Returns the amount of stack space needed for parameter passing
 931
 932    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 933    is the responsability of the caller to free this array once used (ie not
 934    before copy_params). */
 935 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
 936 {
 937   int i, size, align;
 938   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
 939   int plan_nb = 0;
 940   struct param_plan pplan;
 941   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 942
 943   ncrn = nsaa = 0;
 944   *todo = 0;
 945   plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 946   memset(plan->clsplans, 0, sizeof(plan->clsplans));
 947   for(i = nb_args; i-- ;) {
 948     int j, start_vfpreg = 0;
 949     size = type_size(&vtop[-i].type, &align);
 950     switch(vtop[-i].type.t & VT_BTYPE) {
 951       case VT_STRUCT:
 952       case VT_FLOAT:
 953       case VT_DOUBLE:
 954       case VT_LDOUBLE:
 955       if (float_abi == ARM_HARD_FLOAT) {
 956         int is_hfa = 0; /* Homogeneous float aggregate */
 957
 958         if (is_float(vtop[-i].type.t)
 959             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
 960           int end_vfpreg;
 961
 962           start_vfpreg = assign_vfpreg(&avregs, align, size);
 963           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
 964           if (start_vfpreg >= 0) {
 965             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
 966             if (is_hfa)
 967               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
 968             else
 969               add_param_plan(plan, pplan, VFP_CLASS);
 970             continue;
 971           } else
 972             break;
 973         }
 974       }
 975       ncrn = (ncrn + (align-1)/4) & -(align/4);
 976       size = (size + 3) & -4;
 977       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
 978         /* The parameter is allocated both in core register and on stack. As
 979          * such, it can be of either class: it would either be the last of
 980          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
 981         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
 982           *todo|=(1<<j);
 983         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
 984         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
 985         ncrn += size/4;
 986         if (ncrn > 4)
 987           nsaa = (ncrn - 4) * 4;
 988       } else {
 989         ncrn = 4;
 990         break;
 991       }
 992       continue;
 993       default:
 994       if (ncrn < 4) {
 995         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
 996
 997         if (is_long) {
 998           ncrn = (ncrn + 1) & -2;
 999           if (ncrn == 4)
1000             break;
1001         }
1002         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1003         ncrn++;
1004         if (is_long)
1005           pplan.end = ncrn++;
1006         add_param_plan(plan, pplan, CORE_CLASS);
1007         continue;
1008       }
1009     }
1010     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1011     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1012     add_param_plan(plan, pplan, STACK_CLASS);
1013     nsaa += size; /* size already rounded up before */
1014   }
1015   return nsaa;
1016 }
1017
1018 #undef add_param_plan
1019
1020 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1021    function call.
1022
1023    nb_args: number of parameters the function take
1024    plan: the overall assignment plan for parameters
1025    todo: a bitmap indicating what core reg will hold a parameter
1026
1027    Returns the number of SValue added by this function on the value stack */
1028 static int copy_params(int nb_args, struct plan *plan, int todo)
1029 {
1030   int size, align, r, i, nb_extra_sval = 0;
1031   struct param_plan *pplan;
1032
1033    /* Several constraints require parameters to be copied in a specific order:
1034       - structures are copied to the stack before being loaded in a reg;
1035       - floats loaded to an odd numbered VFP reg are first copied to the
1036         preceding even numbered VFP reg and then moved to the next VFP reg.
1037
1038       It is thus important that:
1039       - structures assigned to core regs must be copied after parameters
1040         assigned to the stack but before structures assigned to VFP regs because
1041         a structure can lie partly in core registers and partly on the stack;
1042       - parameters assigned to the stack and all structures be copied before
1043         parameters assigned to a core reg since copying a parameter to the stack
1044         require using a core reg;
1045       - parameters assigned to VFP regs be copied before structures assigned to
1046         VFP regs as the copy might use an even numbered VFP reg that already
1047         holds part of a structure. */
1048   for(i = 0; i < NB_CLASSES; i++) {
1049     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1050       vpushv(pplan->sval);
1051       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1052       switch(i) {
1053         case STACK_CLASS:
1054         case CORE_STRUCT_CLASS:
1055         case VFP_STRUCT_CLASS:
1056           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1057             int padding = 0;
1058             size = type_size(&pplan->sval->type, &align);
1059             /* align to stack align size */
1060             size = (size + 3) & ~3;
1061             if (i == STACK_CLASS && pplan->prev)
1062               padding = pplan->start - pplan->prev->end;
1063             size += padding; /* Add padding if any */
1064             /* allocate the necessary size on stack */
1065             gadd_sp(-size);
1066             /* generate structure store */
1067             r = get_reg(RC_INT);
1068             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1069             vset(&vtop->type, r | VT_LVAL, 0);
1070             vswap();
1071             vstore(); /* memcpy to current sp + potential padding */
1072
1073             /* Homogeneous float aggregate are loaded to VFP registers
1074                immediately since there is no way of loading data in multiple
1075                non consecutive VFP registers as what is done for other
1076                structures (see the use of todo). */
1077             if (i == VFP_STRUCT_CLASS) {
1078               int first = pplan->start, nb = pplan->end - first + 1;
1079               /* vpop.32 {pplan->start, ..., pplan->end} */
1080               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1081               /* No need to write the register used to a SValue since VFP regs
1082                  cannot be used for gcall_or_jmp */
1083             }
1084           } else {
1085             if (is_float(pplan->sval->type.t)) {
1086 #ifdef TCC_ARM_VFP
1087               r = vfpr(gv(RC_FLOAT)) << 12;
1088               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1089                 size = 4;
1090               else {
1091                 size = 8;
1092                 r |= 0x101; /* vpush.32 -> vpush.64 */
1093               }
1094               o(0xED2D0A01 + r); /* vpush */
1095 #else
1096               r = fpr(gv(RC_FLOAT)) << 12;
1097               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1098                 size = 4;
1099               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1100                 size = 8;
1101               else
1102                 size = LDOUBLE_SIZE;
1103
1104               if (size == 12)
1105                 r |= 0x400000;
1106               else if(size == 8)
1107                 r|=0x8000;
1108
1109               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1110 #endif
1111             } else {
1112               /* simple type (currently always same size) */
1113               /* XXX: implicit cast ? */
1114               size=4;
1115               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1116                 lexpand_nr();
1117                 size = 8;
1118                 r = gv(RC_INT);
1119                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1120                 vtop--;
1121               }
1122               r = gv(RC_INT);
1123               o(0xE52D0004|(intr(r)<<12)); /* push r */
1124             }
1125             if (i == STACK_CLASS && pplan->prev)
1126               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1127           }
1128           break;
1129
1130         case VFP_CLASS:
1131           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1132           if (pplan->start & 1) { /* Must be in upper part of double register */
1133             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1134             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1135           }
1136           break;
1137
1138         case CORE_CLASS:
1139           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1140             lexpand_nr();
1141             gv(regmask(pplan->end));
1142             pplan->sval->r2 = vtop->r;
1143             vtop--;
1144           }
1145           gv(regmask(pplan->start));
1146           /* Mark register as used so that gcall_or_jmp use another one
1147              (regs >=4 are free as never used to pass parameters) */
1148           pplan->sval->r = vtop->r;
1149           break;
1150       }
1151       vtop--;
1152     }
1153   }
1154
1155   /* Manually free remaining registers since next parameters are loaded
1156    * manually, without the help of gv(int). */
1157   save_regs(nb_args);
1158
1159   if(todo) {
1160     o(0xE8BD0000|todo); /* pop {todo} */
1161     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1162       int r;
1163       pplan->sval->r = pplan->start;
1164       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1165          can occupy more than 2 registers. Thus, we need to push on the value
1166          stack some fake parameter to have on SValue for each registers used
1167          by a structure (r2 is not used). */
1168       for (r = pplan->start + 1; r <= pplan->end; r++) {
1169         if (todo & (1 << r)) {
1170           nb_extra_sval++;
1171           vpushi(0);
1172           vtop->r = r;
1173         }
1174       }
1175     }
1176   }
1177   return nb_extra_sval;
1178 }
1179
1180 /* Generate function call. The function address is pushed first, then
1181    all the parameters in call order. This functions pops all the
1182    parameters and the function address. */
1183 void gfunc_call(int nb_args)
1184 {
1185   int r, args_size;
1186   int variadic, def_float_abi = float_abi;
1187   int todo;
1188   struct plan plan;
1189
1190 #ifdef TCC_ARM_EABI
1191   if (float_abi == ARM_HARD_FLOAT) {
1192     variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1193     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1194       float_abi = ARM_SOFTFP_FLOAT;
1195   }
1196 #endif
1197   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1198      VT_JMP anywhere except on the top of the stack because it would complicate
1199      the code generator. */
1200   r = vtop->r & VT_VALMASK;
1201   if (r == VT_CMP || (r & ~1) == VT_JMP)
1202     gv(RC_INT);
1203
1204   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1205
1206 #ifdef TCC_ARM_EABI
1207   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1208     args_size = (args_size + 7) & ~7;
1209     o(0xE24DD004); /* sub sp, sp, #4 */
1210   }
1211 #endif
1212
1213   nb_args += copy_params(nb_args, &plan, todo);
1214   tcc_free(plan.pplans);
1215
1216   /* Move fct SValue on top as required by gcall_or_jmp */
1217   vrotb(nb_args + 1);
1218   gcall_or_jmp(0);
1219   if (args_size)
1220       gadd_sp(args_size); /* pop all parameters passed on the stack */
1221 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1222   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1223     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1224       o(0xEE000A10); /*vmov s0, r0 */
1225     } else {
1226       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1227       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1228     }
1229   }
1230 #endif
1231   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1232   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1233   float_abi = def_float_abi;
1234 }
1235
1236 /* generate function prolog of type 't' */
1237 void gfunc_prolog(CType *func_type)
1238 {
1239   Sym *sym,*sym2;
1240   int n, nf, size, align, struct_ret = 0;
1241   int addr, pn, sn; /* pn=core, sn=stack */
1242   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1243   CType ret_type;
1244
1245   sym = func_type->ref;
1246   func_vt = sym->type;
1247   func_var = (func_type->ref->c == FUNC_ELLIPSIS);
1248
1249   n = nf = 0;
1250   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1251       !gfunc_sret(&func_vt, func_var, &ret_type, &align))
1252   {
1253     n++;
1254     struct_ret = 1;
1255     func_vc = 12; /* Offset from fp of the place to store the result */
1256   }
1257   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1258     size = type_size(&sym2->type, &align);
1259 #ifdef TCC_ARM_EABI
1260     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1261         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1262       int tmpnf = assign_vfpreg(&avregs, align, size);
1263       tmpnf += (size + 3) / 4;
1264       nf = (tmpnf > nf) ? tmpnf : nf;
1265     } else
1266 #endif
1267     if (n < 4)
1268       n += (size + 3) / 4;
1269   }
1270   o(0xE1A0C00D); /* mov ip,sp */
1271   if (func_var)
1272     n=4;
1273   if (n) {
1274     if(n>4)
1275       n=4;
1276 #ifdef TCC_ARM_EABI
1277     n=(n+1)&-2;
1278 #endif
1279     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1280   }
1281   if (nf) {
1282     if (nf>16)
1283       nf=16;
1284     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1285     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1286   }
1287   o(0xE92D5800); /* save fp, ip, lr */
1288   o(0xE1A0B00D); /* mov fp, sp */
1289   func_sub_sp_offset = ind;
1290   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1291
1292 #ifdef TCC_ARM_EABI
1293   if (float_abi == ARM_HARD_FLOAT) {
1294     func_vc += nf * 4;
1295     avregs = AVAIL_REGS_INITIALIZER;
1296   }
1297 #endif
1298   pn = struct_ret, sn = 0;
1299   while ((sym = sym->next)) {
1300     CType *type;
1301     type = &sym->type;
1302     size = type_size(type, &align);
1303     size = (size + 3) >> 2;
1304     align = (align + 3) & ~3;
1305 #ifdef TCC_ARM_EABI
1306     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1307         || is_hgen_float_aggr(&sym->type))) {
1308       int fpn = assign_vfpreg(&avregs, align, size << 2);
1309       if (fpn >= 0)
1310         addr = fpn * 4;
1311       else
1312         goto from_stack;
1313     } else
1314 #endif
1315     if (pn < 4) {
1316 #ifdef TCC_ARM_EABI
1317         pn = (pn + (align-1)/4) & -(align/4);
1318 #endif
1319       addr = (nf + pn) * 4;
1320       pn += size;
1321       if (!sn && pn > 4)
1322         sn = (pn - 4);
1323     } else {
1324 from_stack:
1325 #ifdef TCC_ARM_EABI
1326         sn = (sn + (align-1)/4) & -(align/4);
1327 #endif
1328       addr = (n + nf + sn) * 4;
1329       sn += size;
1330     }
1331     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1332              addr + 12);
1333   }
1334   last_itod_magic=0;
1335   leaffunc = 1;
1336   loc = 0;
1337 }
1338
1339 /* generate function epilog */
1340 void gfunc_epilog(void)
1341 {
1342   uint32_t x;
1343   int diff;
1344   /* Copy float return value to core register if base standard is used and
1345      float computation is made with VFP */
1346 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1347   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1348     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1349       o(0xEE100A10); /* fmrs r0, s0 */
1350     else {
1351       o(0xEE100B10); /* fmrdl r0, d0 */
1352       o(0xEE301B10); /* fmrdh r1, d0 */
1353     }
1354   }
1355 #endif
1356   o(0xE89BA800); /* restore fp, sp, pc */
1357   diff = (-loc + 3) & -4;
1358 #ifdef TCC_ARM_EABI
1359   if(!leaffunc)
1360     diff = ((diff + 11) & -8) - 4;
1361 #endif
1362   if(diff > 0) {
1363     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1364     if(x)
1365       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1366     else {
1367       int addr;
1368       addr=ind;
1369       o(0xE59FC004); /* ldr ip,[pc+4] */
1370       o(0xE04BD00C); /* sub sp,fp,ip  */
1371       o(0xE1A0F00E); /* mov pc,lr */
1372       o(diff);
1373       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1374     }
1375   }
1376 }
1377
1378 /* generate a jump to a label */
1379 int gjmp(int t)
1380 {
1381   int r;
1382   r=ind;
1383   o(0xE0000000|encbranch(r,t,1));
1384   return r;
1385 }
1386
1387 /* generate a jump to a fixed address */
1388 void gjmp_addr(int a)
1389 {
1390   gjmp(a);
1391 }
1392
1393 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1394 int gtst(int inv, int t)
1395 {
1396   int v, r;
1397   uint32_t op;
1398   v = vtop->r & VT_VALMASK;
1399   r=ind;
1400   if (v == VT_CMP) {
1401     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1402     op|=encbranch(r,t,1);
1403     o(op);
1404     t=r;
1405   } else { /* VT_JMP || VT_JMPI */
1406     if ((v & 1) == inv) {
1407       if(!vtop->c.i)
1408         vtop->c.i=t;
1409       else {
1410         uint32_t *x;
1411         int p,lp;
1412         if(t) {
1413           p = vtop->c.i;
1414           do {
1415             p = decbranch(lp=p);
1416           } while(p);
1417           x = (uint32_t *)(cur_text_section->data + lp);
1418           *x &= 0xff000000;
1419           *x |= encbranch(lp,t,1);
1420         }
1421         t = vtop->c.i;
1422       }
1423     } else {
1424       t = gjmp(t);
1425       gsym(vtop->c.i);
1426     }
1427   }
1428   vtop--;
1429   return t;
1430 }
1431
1432 /* generate an integer binary operation */
1433 void gen_opi(int op)
1434 {
1435   int c, func = 0;
1436   uint32_t opc = 0, r, fr;
1437   unsigned short retreg = REG_IRET;
1438
1439   c=0;
1440   switch(op) {
1441     case '+':
1442       opc = 0x8;
1443       c=1;
1444       break;
1445     case TOK_ADDC1: /* add with carry generation */
1446       opc = 0x9;
1447       c=1;
1448       break;
1449     case '-':
1450       opc = 0x4;
1451       c=1;
1452       break;
1453     case TOK_SUBC1: /* sub with carry generation */
1454       opc = 0x5;
1455       c=1;
1456       break;
1457     case TOK_ADDC2: /* add with carry use */
1458       opc = 0xA;
1459       c=1;
1460       break;
1461     case TOK_SUBC2: /* sub with carry use */
1462       opc = 0xC;
1463       c=1;
1464       break;
1465     case '&':
1466       opc = 0x0;
1467       c=1;
1468       break;
1469     case '^':
1470       opc = 0x2;
1471       c=1;
1472       break;
1473     case '|':
1474       opc = 0x18;
1475       c=1;
1476       break;
1477     case '*':
1478       gv2(RC_INT, RC_INT);
1479       r = vtop[-1].r;
1480       fr = vtop[0].r;
1481       vtop--;
1482       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1483       return;
1484     case TOK_SHL:
1485       opc = 0;
1486       c=2;
1487       break;
1488     case TOK_SHR:
1489       opc = 1;
1490       c=2;
1491       break;
1492     case TOK_SAR:
1493       opc = 2;
1494       c=2;
1495       break;
1496     case '/':
1497     case TOK_PDIV:
1498       func=TOK___divsi3;
1499       c=3;
1500       break;
1501     case TOK_UDIV:
1502       func=TOK___udivsi3;
1503       c=3;
1504       break;
1505     case '%':
1506 #ifdef TCC_ARM_EABI
1507       func=TOK___aeabi_idivmod;
1508       retreg=REG_LRET;
1509 #else
1510       func=TOK___modsi3;
1511 #endif
1512       c=3;
1513       break;
1514     case TOK_UMOD:
1515 #ifdef TCC_ARM_EABI
1516       func=TOK___aeabi_uidivmod;
1517       retreg=REG_LRET;
1518 #else
1519       func=TOK___umodsi3;
1520 #endif
1521       c=3;
1522       break;
1523     case TOK_UMULL:
1524       gv2(RC_INT, RC_INT);
1525       r=intr(vtop[-1].r2=get_reg(RC_INT));
1526       c=vtop[-1].r;
1527       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1528       vtop--;
1529       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1530       return;
1531     default:
1532       opc = 0x15;
1533       c=1;
1534       break;
1535   }
1536   switch(c) {
1537     case 1:
1538       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1539         if(opc == 4 || opc == 5 || opc == 0xc) {
1540           vswap();
1541           opc|=2; // sub -> rsb
1542         }
1543       }
1544       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1545           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1546         gv(RC_INT);
1547       vswap();
1548       c=intr(gv(RC_INT));
1549       vswap();
1550       opc=0xE0000000|(opc<<20)|(c<<16);
1551       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1552         uint32_t x;
1553         x=stuff_const(opc|0x2000000,vtop->c.i);
1554         if(x) {
1555           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1556           o(x|(r<<12));
1557           goto done;
1558         }
1559       }
1560       fr=intr(gv(RC_INT));
1561       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1562       o(opc|(r<<12)|fr);
1563 done:
1564       vtop--;
1565       if (op >= TOK_ULT && op <= TOK_GT) {
1566         vtop->r = VT_CMP;
1567         vtop->c.i = op;
1568       }
1569       break;
1570     case 2:
1571       opc=0xE1A00000|(opc<<5);
1572       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1573           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1574         gv(RC_INT);
1575       vswap();
1576       r=intr(gv(RC_INT));
1577       vswap();
1578       opc|=r;
1579       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1580         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1581         c = vtop->c.i & 0x1f;
1582         o(opc|(c<<7)|(fr<<12));
1583       } else {
1584         fr=intr(gv(RC_INT));
1585         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1586         o(opc|(c<<12)|(fr<<8)|0x10);
1587       }
1588       vtop--;
1589       break;
1590     case 3:
1591       vpush_global_sym(&func_old_type, func);
1592       vrott(3);
1593       gfunc_call(2);
1594       vpushi(0);
1595       vtop->r = retreg;
1596       break;
1597     default:
1598       tcc_error("gen_opi %i unimplemented!",op);
1599   }
1600 }
1601
1602 #ifdef TCC_ARM_VFP
1603 static int is_zero(int i)
1604 {
1605   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1606     return 0;
1607   if (vtop[i].type.t == VT_FLOAT)
1608     return (vtop[i].c.f == 0.f);
1609   else if (vtop[i].type.t == VT_DOUBLE)
1610     return (vtop[i].c.d == 0.0);
1611   return (vtop[i].c.ld == 0.l);
1612 }
1613
1614 /* generate a floating point operation 'v = t1 op t2' instruction. The
1615  *    two operands are guaranted to have the same floating point type */
1616 void gen_opf(int op)
1617 {
1618   uint32_t x;
1619   int fneg=0,r;
1620   x=0xEE000A00|T2CPR(vtop->type.t);
1621   switch(op) {
1622     case '+':
1623       if(is_zero(-1))
1624         vswap();
1625       if(is_zero(0)) {
1626         vtop--;
1627         return;
1628       }
1629       x|=0x300000;
1630       break;
1631     case '-':
1632       x|=0x300040;
1633       if(is_zero(0)) {
1634         vtop--;
1635         return;
1636       }
1637       if(is_zero(-1)) {
1638         x|=0x810000; /* fsubX -> fnegX */
1639         vswap();
1640         vtop--;
1641         fneg=1;
1642       }
1643       break;
1644     case '*':
1645       x|=0x200000;
1646       break;
1647     case '/':
1648       x|=0x800000;
1649       break;
1650     default:
1651       if(op < TOK_ULT || op > TOK_GT) {
1652         tcc_error("unknown fp op %x!",op);
1653         return;
1654       }
1655       if(is_zero(-1)) {
1656         vswap();
1657         switch(op) {
1658           case TOK_LT: op=TOK_GT; break;
1659           case TOK_GE: op=TOK_ULE; break;
1660           case TOK_LE: op=TOK_GE; break;
1661           case TOK_GT: op=TOK_ULT; break;
1662         }
1663       }
1664       x|=0xB40040; /* fcmpX */
1665       if(op!=TOK_EQ && op!=TOK_NE)
1666         x|=0x80; /* fcmpX -> fcmpeX */
1667       if(is_zero(0)) {
1668         vtop--;
1669         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1670       } else {
1671         x|=vfpr(gv(RC_FLOAT));
1672         vswap();
1673         o(x|(vfpr(gv(RC_FLOAT))<<12));
1674         vtop--;
1675       }
1676       o(0xEEF1FA10); /* fmstat */
1677
1678       switch(op) {
1679         case TOK_LE: op=TOK_ULE; break;
1680         case TOK_LT: op=TOK_ULT; break;
1681         case TOK_UGE: op=TOK_GE; break;
1682         case TOK_UGT: op=TOK_GT; break;
1683       }
1684
1685       vtop->r = VT_CMP;
1686       vtop->c.i = op;
1687       return;
1688   }
1689   r=gv(RC_FLOAT);
1690   x|=vfpr(r);
1691   r=regmask(r);
1692   if(!fneg) {
1693     int r2;
1694     vswap();
1695     r2=gv(RC_FLOAT);
1696     x|=vfpr(r2)<<16;
1697     r|=regmask(r2);
1698   }
1699   vtop->r=get_reg_ex(RC_FLOAT,r);
1700   if(!fneg)
1701     vtop--;
1702   o(x|(vfpr(vtop->r)<<12));
1703 }
1704
1705 #else
1706 static uint32_t is_fconst()
1707 {
1708   long double f;
1709   uint32_t r;
1710   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1711     return 0;
1712   if (vtop->type.t == VT_FLOAT)
1713     f = vtop->c.f;
1714   else if (vtop->type.t == VT_DOUBLE)
1715     f = vtop->c.d;
1716   else
1717     f = vtop->c.ld;
1718   if(!ieee_finite(f))
1719     return 0;
1720   r=0x8;
1721   if(f<0.0) {
1722     r=0x18;
1723     f=-f;
1724   }
1725   if(f==0.0)
1726     return r;
1727   if(f==1.0)
1728     return r|1;
1729   if(f==2.0)
1730     return r|2;
1731   if(f==3.0)
1732     return r|3;
1733   if(f==4.0)
1734     return r|4;
1735   if(f==5.0)
1736     return r|5;
1737   if(f==0.5)
1738     return r|6;
1739   if(f==10.0)
1740     return r|7;
1741   return 0;
1742 }
1743
1744 /* generate a floating point operation 'v = t1 op t2' instruction. The
1745    two operands are guaranted to have the same floating point type */
1746 void gen_opf(int op)
1747 {
1748   uint32_t x, r, r2, c1, c2;
1749   //fputs("gen_opf\n",stderr);
1750   vswap();
1751   c1 = is_fconst();
1752   vswap();
1753   c2 = is_fconst();
1754   x=0xEE000100;
1755 #if LDOUBLE_SIZE == 8
1756   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1757     x|=0x80;
1758 #else
1759   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1760     x|=0x80;
1761   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1762     x|=0x80000;
1763 #endif
1764   switch(op)
1765   {
1766     case '+':
1767       if(!c2) {
1768         vswap();
1769         c2=c1;
1770       }
1771       vswap();
1772       r=fpr(gv(RC_FLOAT));
1773       vswap();
1774       if(c2) {
1775         if(c2>0xf)
1776           x|=0x200000; // suf
1777         r2=c2&0xf;
1778       } else {
1779         r2=fpr(gv(RC_FLOAT));
1780       }
1781       break;
1782     case '-':
1783       if(c2) {
1784         if(c2<=0xf)
1785           x|=0x200000; // suf
1786         r2=c2&0xf;
1787         vswap();
1788         r=fpr(gv(RC_FLOAT));
1789         vswap();
1790       } else if(c1 && c1<=0xf) {
1791         x|=0x300000; // rsf
1792         r2=c1;
1793         r=fpr(gv(RC_FLOAT));
1794         vswap();
1795       } else {
1796         x|=0x200000; // suf
1797         vswap();
1798         r=fpr(gv(RC_FLOAT));
1799         vswap();
1800         r2=fpr(gv(RC_FLOAT));
1801       }
1802       break;
1803     case '*':
1804       if(!c2 || c2>0xf) {
1805         vswap();
1806         c2=c1;
1807       }
1808       vswap();
1809       r=fpr(gv(RC_FLOAT));
1810       vswap();
1811       if(c2 && c2<=0xf)
1812         r2=c2;
1813       else
1814         r2=fpr(gv(RC_FLOAT));
1815       x|=0x100000; // muf
1816       break;
1817     case '/':
1818       if(c2 && c2<=0xf) {
1819         x|=0x400000; // dvf
1820         r2=c2;
1821         vswap();
1822         r=fpr(gv(RC_FLOAT));
1823         vswap();
1824       } else if(c1 && c1<=0xf) {
1825         x|=0x500000; // rdf
1826         r2=c1;
1827         r=fpr(gv(RC_FLOAT));
1828         vswap();
1829       } else {
1830         x|=0x400000; // dvf
1831         vswap();
1832         r=fpr(gv(RC_FLOAT));
1833         vswap();
1834         r2=fpr(gv(RC_FLOAT));
1835       }
1836       break;
1837     default:
1838       if(op >= TOK_ULT && op <= TOK_GT) {
1839         x|=0xd0f110; // cmfe
1840 /* bug (intention?) in Linux FPU emulator
1841    doesn't set carry if equal */
1842         switch(op) {
1843           case TOK_ULT:
1844           case TOK_UGE:
1845           case TOK_ULE:
1846           case TOK_UGT:
1847             tcc_error("unsigned comparision on floats?");
1848             break;
1849           case TOK_LT:
1850             op=TOK_Nset;
1851             break;
1852           case TOK_LE:
1853             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1854             break;
1855           case TOK_EQ:
1856           case TOK_NE:
1857             x&=~0x400000; // cmfe -> cmf
1858             break;
1859         }
1860         if(c1 && !c2) {
1861           c2=c1;
1862           vswap();
1863           switch(op) {
1864             case TOK_Nset:
1865               op=TOK_GT;
1866               break;
1867             case TOK_GE:
1868               op=TOK_ULE;
1869               break;
1870             case TOK_ULE:
1871               op=TOK_GE;
1872               break;
1873             case TOK_GT:
1874               op=TOK_Nset;
1875               break;
1876           }
1877         }
1878         vswap();
1879         r=fpr(gv(RC_FLOAT));
1880         vswap();
1881         if(c2) {
1882           if(c2>0xf)
1883             x|=0x200000;
1884           r2=c2&0xf;
1885         } else {
1886           r2=fpr(gv(RC_FLOAT));
1887         }
1888         vtop[-1].r = VT_CMP;
1889         vtop[-1].c.i = op;
1890       } else {
1891         tcc_error("unknown fp op %x!",op);
1892         return;
1893       }
1894   }
1895   if(vtop[-1].r == VT_CMP)
1896     c1=15;
1897   else {
1898     c1=vtop->r;
1899     if(r2&0x8)
1900       c1=vtop[-1].r;
1901     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1902     c1=fpr(vtop[-1].r);
1903   }
1904   vtop--;
1905   o(x|(r<<16)|(c1<<12)|r2);
1906 }
1907 #endif
1908
1909 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1910    and 'long long' cases. */
1911 ST_FUNC void gen_cvt_itof1(int t)
1912 {
1913   uint32_t r, r2;
1914   int bt;
1915   bt=vtop->type.t & VT_BTYPE;
1916   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1917 #ifndef TCC_ARM_VFP
1918     uint32_t dsize = 0;
1919 #endif
1920     r=intr(gv(RC_INT));
1921 #ifdef TCC_ARM_VFP
1922     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1923     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1924     r2|=r2<<12;
1925     if(!(vtop->type.t & VT_UNSIGNED))
1926       r2|=0x80;                /* fuitoX -> fsituX */
1927     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1928 #else
1929     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1930     if((t & VT_BTYPE) != VT_FLOAT)
1931       dsize=0x80;    /* flts -> fltd */
1932     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1933     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1934       uint32_t off = 0;
1935       o(0xE3500000|(r<<12));        /* cmp */
1936       r=fpr(get_reg(RC_FLOAT));
1937       if(last_itod_magic) {
1938         off=ind+8-last_itod_magic;
1939         off/=4;
1940         if(off>255)
1941           off=0;
1942       }
1943       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1944       if(!off) {
1945         o(0xEA000000);              /* b */
1946         last_itod_magic=ind;
1947         o(0x4F800000);              /* 4294967296.0f */
1948       }
1949       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1950     }
1951 #endif
1952     return;
1953   } else if(bt == VT_LLONG) {
1954     int func;
1955     CType *func_type = 0;
1956     if((t & VT_BTYPE) == VT_FLOAT) {
1957       func_type = &func_float_type;
1958       if(vtop->type.t & VT_UNSIGNED)
1959         func=TOK___floatundisf;
1960       else
1961         func=TOK___floatdisf;
1962 #if LDOUBLE_SIZE != 8
1963     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1964       func_type = &func_ldouble_type;
1965       if(vtop->type.t & VT_UNSIGNED)
1966         func=TOK___floatundixf;
1967       else
1968         func=TOK___floatdixf;
1969     } else if((t & VT_BTYPE) == VT_DOUBLE) {
1970 #else
1971     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1972 #endif
1973       func_type = &func_double_type;
1974       if(vtop->type.t & VT_UNSIGNED)
1975         func=TOK___floatundidf;
1976       else
1977         func=TOK___floatdidf;
1978     }
1979     if(func_type) {
1980       vpush_global_sym(func_type, func);
1981       vswap();
1982       gfunc_call(1);
1983       vpushi(0);
1984       vtop->r=TREG_F0;
1985       return;
1986     }
1987   }
1988   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1989 }
1990
1991 /* convert fp to int 't' type */
1992 void gen_cvt_ftoi(int t)
1993 {
1994   uint32_t r, r2;
1995   int u, func = 0;
1996   u=t&VT_UNSIGNED;
1997   t&=VT_BTYPE;
1998   r2=vtop->type.t & VT_BTYPE;
1999   if(t==VT_INT) {
2000 #ifdef TCC_ARM_VFP
2001     r=vfpr(gv(RC_FLOAT));
2002     u=u?0:0x10000;
2003     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2004     r2=intr(vtop->r=get_reg(RC_INT));
2005     o(0xEE100A10|(r<<16)|(r2<<12));
2006     return;
2007 #else
2008     if(u) {
2009       if(r2 == VT_FLOAT)
2010         func=TOK___fixunssfsi;
2011 #if LDOUBLE_SIZE != 8
2012       else if(r2 == VT_LDOUBLE)
2013         func=TOK___fixunsxfsi;
2014       else if(r2 == VT_DOUBLE)
2015 #else
2016       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2017 #endif
2018         func=TOK___fixunsdfsi;
2019     } else {
2020       r=fpr(gv(RC_FLOAT));
2021       r2=intr(vtop->r=get_reg(RC_INT));
2022       o(0xEE100170|(r2<<12)|r);
2023       return;
2024     }
2025 #endif
2026   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2027     if(r2 == VT_FLOAT)
2028       func=TOK___fixsfdi;
2029 #if LDOUBLE_SIZE != 8
2030     else if(r2 == VT_LDOUBLE)
2031       func=TOK___fixxfdi;
2032     else if(r2 == VT_DOUBLE)
2033 #else
2034     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2035 #endif
2036       func=TOK___fixdfdi;
2037   }
2038   if(func) {
2039     vpush_global_sym(&func_old_type, func);
2040     vswap();
2041     gfunc_call(1);
2042     vpushi(0);
2043     if(t == VT_LLONG)
2044       vtop->r2 = REG_LRET;
2045     vtop->r = REG_IRET;
2046     return;
2047   }
2048   tcc_error("unimplemented gen_cvt_ftoi!");
2049 }
2050
2051 /* convert from one floating point type to another */
2052 void gen_cvt_ftof(int t)
2053 {
2054 #ifdef TCC_ARM_VFP
2055   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2056     uint32_t r = vfpr(gv(RC_FLOAT));
2057     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2058   }
2059 #else
2060   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2061   gv(RC_FLOAT);
2062 #endif
2063 }
2064
2065 /* computed goto support */
2066 void ggoto(void)
2067 {
2068   gcall_or_jmp(1);
2069   vtop--;
2070 }
2071
2072 /* Save the stack pointer onto the stack and return the location of its address */
2073 ST_FUNC void gen_vla_sp_save(int addr) {
2074     tcc_error("variable length arrays unsupported for this target");
2075 }
2076
2077 /* Restore the SP from a location on the stack */
2078 ST_FUNC void gen_vla_sp_restore(int addr) {
2079     tcc_error("variable length arrays unsupported for this target");
2080 }
2081
2082 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2083 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2084     tcc_error("variable length arrays unsupported for this target");
2085 }
2086
2087 /* end of ARM code generator */
2088 /*************************************************************/
2089 #endif
2090 /*************************************************************/