arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_ARM_VERSION
  38 # define TCC_ARM_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_LRET    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82     TREG_SP = 13,
  83     TREG_LR,
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 /* ELF defines */
 134
 135 #define EM_TCC_TARGET EM_ARM
 136
 137 /* relocation type for 32 bit data relocation */
 138 #define R_DATA_32   R_ARM_ABS32
 139 #define R_DATA_PTR  R_ARM_ABS32
 140 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 141 #define R_GLOB_DAT  R_ARM_GLOB_DAT
 142 #define R_COPY      R_ARM_COPY
 143
 144 #define ELF_START_ADDR 0x00008000
 145 #define ELF_PAGE_SIZE  0x1000
 146
 147 enum float_abi {
 148     ARM_SOFTFP_FLOAT,
 149     ARM_HARD_FLOAT,
 150 };
 151
 152 /******************************************************/
 153 #else /* ! TARGET_DEFS_ONLY */
 154 /******************************************************/
 155 #include "tcc.h"
 156
 157 enum float_abi float_abi;
 158
 159 ST_DATA const int reg_classes[NB_REGS] = {
 160     /* r0 */ RC_INT | RC_R0,
 161     /* r1 */ RC_INT | RC_R1,
 162     /* r2 */ RC_INT | RC_R2,
 163     /* r3 */ RC_INT | RC_R3,
 164     /* r12 */ RC_INT | RC_R12,
 165     /* f0 */ RC_FLOAT | RC_F0,
 166     /* f1 */ RC_FLOAT | RC_F1,
 167     /* f2 */ RC_FLOAT | RC_F2,
 168     /* f3 */ RC_FLOAT | RC_F3,
 169 #ifdef TCC_ARM_VFP
 170  /* d4/s8 */ RC_FLOAT | RC_F4,
 171 /* d5/s10 */ RC_FLOAT | RC_F5,
 172 /* d6/s12 */ RC_FLOAT | RC_F6,
 173 /* d7/s14 */ RC_FLOAT | RC_F7,
 174 #endif
 175 };
 176
 177 static int func_sub_sp_offset, last_itod_magic;
 178 static int leaffunc;
 179
 180 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 181 static CType float_type, double_type, func_float_type, func_double_type;
 182 ST_FUNC void arm_init(struct TCCState *s)
 183 {
 184     float_type.t = VT_FLOAT;
 185     double_type.t = VT_DOUBLE;
 186     func_float_type.t = VT_FUNC;
 187     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 188     func_double_type.t = VT_FUNC;
 189     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 190
 191     float_abi = s->float_abi;
 192 #ifndef TCC_ARM_HARDFLOAT
 193     tcc_warning("soft float ABI currently not supported: default to softfp");
 194 #endif
 195 }
 196 #else
 197 #define func_float_type func_old_type
 198 #define func_double_type func_old_type
 199 #define func_ldouble_type func_old_type
 200 ST_FUNC void arm_init(struct TCCState *s)
 201 {
 202 #if !defined (TCC_ARM_VFP)
 203     tcc_warning("Support for FPA is deprecated and will be removed in next"
 204                 " release");
 205 #endif
 206 #if !defined (TCC_ARM_EABI)
 207     tcc_warning("Support for OABI is deprecated and will be removed in next"
 208                 " release");
 209 #endif
 210 }
 211 #endif
 212
 213 static int two2mask(int a,int b) {
 214   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 215 }
 216
 217 static int regmask(int r) {
 218   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 219 }
 220
 221 /******************************************************/
 222
 223 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
 224 char *default_elfinterp(struct TCCState *s)
 225 {
 226     if (s->float_abi == ARM_HARD_FLOAT)
 227         return "/lib/ld-linux-armhf.so.3";
 228     else
 229         return "/lib/ld-linux.so.3";
 230 }
 231 #endif
 232
 233 void o(uint32_t i)
 234 {
 235   /* this is a good place to start adding big-endian support*/
 236   int ind1;
 237
 238   ind1 = ind + 4;
 239   if (!cur_text_section)
 240     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 241          "can't evaluate constant expressions outside of a function.");
 242   if (ind1 > cur_text_section->data_allocated)
 243     section_realloc(cur_text_section, ind1);
 244   cur_text_section->data[ind++] = i&255;
 245   i>>=8;
 246   cur_text_section->data[ind++] = i&255;
 247   i>>=8;
 248   cur_text_section->data[ind++] = i&255;
 249   i>>=8;
 250   cur_text_section->data[ind++] = i;
 251 }
 252
 253 static uint32_t stuff_const(uint32_t op, uint32_t c)
 254 {
 255   int try_neg=0;
 256   uint32_t nc = 0, negop = 0;
 257
 258   switch(op&0x1F00000)
 259   {
 260     case 0x800000: //add
 261     case 0x400000: //sub
 262       try_neg=1;
 263       negop=op^0xC00000;
 264       nc=-c;
 265       break;
 266     case 0x1A00000: //mov
 267     case 0x1E00000: //mvn
 268       try_neg=1;
 269       negop=op^0x400000;
 270       nc=~c;
 271       break;
 272     case 0x200000: //xor
 273       if(c==~0)
 274         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 275       break;
 276     case 0x0: //and
 277       if(c==~0)
 278         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 279     case 0x1C00000: //bic
 280       try_neg=1;
 281       negop=op^0x1C00000;
 282       nc=~c;
 283       break;
 284     case 0x1800000: //orr
 285       if(c==~0)
 286         return (op&0xFFF0FFFF)|0x1E00000;
 287       break;
 288   }
 289   do {
 290     uint32_t m;
 291     int i;
 292     if(c<256) /* catch undefined <<32 */
 293       return op|c;
 294     for(i=2;i<32;i+=2) {
 295       m=(0xff>>i)|(0xff<<(32-i));
 296       if(!(c&~m))
 297         return op|(i<<7)|(c<<i)|(c>>(32-i));
 298     }
 299     op=negop;
 300     c=nc;
 301   } while(try_neg--);
 302   return 0;
 303 }
 304
 305
 306 //only add,sub
 307 void stuff_const_harder(uint32_t op, uint32_t v) {
 308   uint32_t x;
 309   x=stuff_const(op,v);
 310   if(x)
 311     o(x);
 312   else {
 313     uint32_t a[16], nv, no, o2, n2;
 314     int i,j,k;
 315     a[0]=0xff;
 316     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 317     for(i=1;i<16;i++)
 318       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 319     for(i=0;i<12;i++)
 320       for(j=i<4?i+12:15;j>=i+4;j--)
 321         if((v&(a[i]|a[j]))==v) {
 322           o(stuff_const(op,v&a[i]));
 323           o(stuff_const(o2,v&a[j]));
 324           return;
 325         }
 326     no=op^0xC00000;
 327     n2=o2^0xC00000;
 328     nv=-v;
 329     for(i=0;i<12;i++)
 330       for(j=i<4?i+12:15;j>=i+4;j--)
 331         if((nv&(a[i]|a[j]))==nv) {
 332           o(stuff_const(no,nv&a[i]));
 333           o(stuff_const(n2,nv&a[j]));
 334           return;
 335         }
 336     for(i=0;i<8;i++)
 337       for(j=i+4;j<12;j++)
 338         for(k=i<4?i+12:15;k>=j+4;k--)
 339           if((v&(a[i]|a[j]|a[k]))==v) {
 340             o(stuff_const(op,v&a[i]));
 341             o(stuff_const(o2,v&a[j]));
 342             o(stuff_const(o2,v&a[k]));
 343             return;
 344           }
 345     no=op^0xC00000;
 346     nv=-v;
 347     for(i=0;i<8;i++)
 348       for(j=i+4;j<12;j++)
 349         for(k=i<4?i+12:15;k>=j+4;k--)
 350           if((nv&(a[i]|a[j]|a[k]))==nv) {
 351             o(stuff_const(no,nv&a[i]));
 352             o(stuff_const(n2,nv&a[j]));
 353             o(stuff_const(n2,nv&a[k]));
 354             return;
 355           }
 356     o(stuff_const(op,v&a[0]));
 357     o(stuff_const(o2,v&a[4]));
 358     o(stuff_const(o2,v&a[8]));
 359     o(stuff_const(o2,v&a[12]));
 360   }
 361 }
 362
 363 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 364 {
 365   addr-=pos+8;
 366   addr/=4;
 367   if(addr>=0x1000000 || addr<-0x1000000) {
 368     if(fail)
 369       tcc_error("FIXME: function bigger than 32MB");
 370     return 0;
 371   }
 372   return 0x0A000000|(addr&0xffffff);
 373 }
 374
 375 int decbranch(int pos)
 376 {
 377   int x;
 378   x=*(uint32_t *)(cur_text_section->data + pos);
 379   x&=0x00ffffff;
 380   if(x&0x800000)
 381     x-=0x1000000;
 382   return x*4+pos+8;
 383 }
 384
 385 /* output a symbol and patch all calls to it */
 386 void gsym_addr(int t, int a)
 387 {
 388   uint32_t *x;
 389   int lt;
 390   while(t) {
 391     x=(uint32_t *)(cur_text_section->data + t);
 392     t=decbranch(lt=t);
 393     if(a==lt+4)
 394       *x=0xE1A00000; // nop
 395     else {
 396       *x &= 0xff000000;
 397       *x |= encbranch(lt,a,1);
 398     }
 399   }
 400 }
 401
 402 void gsym(int t)
 403 {
 404   gsym_addr(t, ind);
 405 }
 406
 407 #ifdef TCC_ARM_VFP
 408 static uint32_t vfpr(int r)
 409 {
 410   if(r<TREG_F0 || r>TREG_F7)
 411     tcc_error("compiler error! register %i is no vfp register",r);
 412   return r - TREG_F0;
 413 }
 414 #else
 415 static uint32_t fpr(int r)
 416 {
 417   if(r<TREG_F0 || r>TREG_F3)
 418     tcc_error("compiler error! register %i is no fpa register",r);
 419   return r - TREG_F0;
 420 }
 421 #endif
 422
 423 static uint32_t intr(int r)
 424 {
 425   if(r == TREG_R12)
 426     return 12;
 427   if(r >= TREG_R0 && r <= TREG_R3)
 428     return r - TREG_R0;
 429   if (r >= TREG_SP && r <= TREG_LR)
 430     return r + (13 - TREG_SP);
 431   tcc_error("compiler error! register %i is no int register",r);
 432 }
 433
 434 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 435 {
 436   if(*off>maxoff || *off&((1<<shift)-1)) {
 437     uint32_t x, y;
 438     x=0xE280E000;
 439     if(*sgn)
 440       x=0xE240E000;
 441     x|=(*base)<<16;
 442     *base=14; // lr
 443     y=stuff_const(x,*off&~maxoff);
 444     if(y) {
 445       o(y);
 446       *off&=maxoff;
 447       return;
 448     }
 449     y=stuff_const(x,(*off+maxoff)&~maxoff);
 450     if(y) {
 451       o(y);
 452       *sgn=!*sgn;
 453       *off=((*off+maxoff)&~maxoff)-*off;
 454       return;
 455     }
 456     stuff_const_harder(x,*off&~maxoff);
 457     *off&=maxoff;
 458   }
 459 }
 460
 461 static uint32_t mapcc(int cc)
 462 {
 463   switch(cc)
 464   {
 465     case TOK_ULT:
 466       return 0x30000000; /* CC/LO */
 467     case TOK_UGE:
 468       return 0x20000000; /* CS/HS */
 469     case TOK_EQ:
 470       return 0x00000000; /* EQ */
 471     case TOK_NE:
 472       return 0x10000000; /* NE */
 473     case TOK_ULE:
 474       return 0x90000000; /* LS */
 475     case TOK_UGT:
 476       return 0x80000000; /* HI */
 477     case TOK_Nset:
 478       return 0x40000000; /* MI */
 479     case TOK_Nclear:
 480       return 0x50000000; /* PL */
 481     case TOK_LT:
 482       return 0xB0000000; /* LT */
 483     case TOK_GE:
 484       return 0xA0000000; /* GE */
 485     case TOK_LE:
 486       return 0xD0000000; /* LE */
 487     case TOK_GT:
 488       return 0xC0000000; /* GT */
 489   }
 490   tcc_error("unexpected condition code");
 491   return 0xE0000000; /* AL */
 492 }
 493
 494 static int negcc(int cc)
 495 {
 496   switch(cc)
 497   {
 498     case TOK_ULT:
 499       return TOK_UGE;
 500     case TOK_UGE:
 501       return TOK_ULT;
 502     case TOK_EQ:
 503       return TOK_NE;
 504     case TOK_NE:
 505       return TOK_EQ;
 506     case TOK_ULE:
 507       return TOK_UGT;
 508     case TOK_UGT:
 509       return TOK_ULE;
 510     case TOK_Nset:
 511       return TOK_Nclear;
 512     case TOK_Nclear:
 513       return TOK_Nset;
 514     case TOK_LT:
 515       return TOK_GE;
 516     case TOK_GE:
 517       return TOK_LT;
 518     case TOK_LE:
 519       return TOK_GT;
 520     case TOK_GT:
 521       return TOK_LE;
 522   }
 523   tcc_error("unexpected condition code");
 524   return TOK_NE;
 525 }
 526
 527 /* load 'r' from value 'sv' */
 528 void load(int r, SValue *sv)
 529 {
 530   int v, ft, fc, fr, sign;
 531   uint32_t op;
 532   SValue v1;
 533
 534   fr = sv->r;
 535   ft = sv->type.t;
 536   fc = sv->c.i;
 537
 538   if(fc>=0)
 539     sign=0;
 540   else {
 541     sign=1;
 542     fc=-fc;
 543   }
 544
 545   v = fr & VT_VALMASK;
 546   if (fr & VT_LVAL) {
 547     uint32_t base = 0xB; // fp
 548     if(v == VT_LLOCAL) {
 549       v1.type.t = VT_PTR;
 550       v1.r = VT_LOCAL | VT_LVAL;
 551       v1.c.i = sv->c.i;
 552       load(TREG_LR, &v1);
 553       base = 14; /* lr */
 554       fc=sign=0;
 555       v=VT_LOCAL;
 556     } else if(v == VT_CONST) {
 557       v1.type.t = VT_PTR;
 558       v1.r = fr&~VT_LVAL;
 559       v1.c.i = sv->c.i;
 560       v1.sym=sv->sym;
 561       load(TREG_LR, &v1);
 562       base = 14; /* lr */
 563       fc=sign=0;
 564       v=VT_LOCAL;
 565     } else if(v < VT_CONST) {
 566       base=intr(v);
 567       fc=sign=0;
 568       v=VT_LOCAL;
 569     }
 570     if(v == VT_LOCAL) {
 571       if(is_float(ft)) {
 572         calcaddr(&base,&fc,&sign,1020,2);
 573 #ifdef TCC_ARM_VFP
 574         op=0xED100A00; /* flds */
 575         if(!sign)
 576           op|=0x800000;
 577         if ((ft & VT_BTYPE) != VT_FLOAT)
 578           op|=0x100;   /* flds -> fldd */
 579         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 580 #else
 581         op=0xED100100;
 582         if(!sign)
 583           op|=0x800000;
 584 #if LDOUBLE_SIZE == 8
 585         if ((ft & VT_BTYPE) != VT_FLOAT)
 586           op|=0x8000;
 587 #else
 588         if ((ft & VT_BTYPE) == VT_DOUBLE)
 589           op|=0x8000;
 590         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 591           op|=0x400000;
 592 #endif
 593         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 594 #endif
 595       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 596                 || (ft & VT_BTYPE) == VT_SHORT) {
 597         calcaddr(&base,&fc,&sign,255,0);
 598         op=0xE1500090;
 599         if ((ft & VT_BTYPE) == VT_SHORT)
 600           op|=0x20;
 601         if ((ft & VT_UNSIGNED) == 0)
 602           op|=0x40;
 603         if(!sign)
 604           op|=0x800000;
 605         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 606       } else {
 607         calcaddr(&base,&fc,&sign,4095,0);
 608         op=0xE5100000;
 609         if(!sign)
 610           op|=0x800000;
 611         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 612           op|=0x400000;
 613         o(op|(intr(r)<<12)|fc|(base<<16));
 614       }
 615       return;
 616     }
 617   } else {
 618     if (v == VT_CONST) {
 619       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.i);
 620       if (fr & VT_SYM || !op) {
 621         o(0xE59F0000|(intr(r)<<12));
 622         o(0xEA000000);
 623         if(fr & VT_SYM)
 624           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 625         o(sv->c.i);
 626       } else
 627         o(op);
 628       return;
 629     } else if (v == VT_LOCAL) {
 630       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.i);
 631       if (fr & VT_SYM || !op) {
 632         o(0xE59F0000|(intr(r)<<12));
 633         o(0xEA000000);
 634         if(fr & VT_SYM) // needed ?
 635           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 636         o(sv->c.i);
 637         o(0xE08B0000|(intr(r)<<12)|intr(r));
 638       } else
 639         o(op);
 640       return;
 641     } else if(v == VT_CMP) {
 642       o(mapcc(sv->c.i)|0x3A00001|(intr(r)<<12));
 643       o(mapcc(negcc(sv->c.i))|0x3A00000|(intr(r)<<12));
 644       return;
 645     } else if (v == VT_JMP || v == VT_JMPI) {
 646       int t;
 647       t = v & 1;
 648       o(0xE3A00000|(intr(r)<<12)|t);
 649       o(0xEA000000);
 650       gsym(sv->c.i);
 651       o(0xE3A00000|(intr(r)<<12)|(t^1));
 652       return;
 653     } else if (v < VT_CONST) {
 654       if(is_float(ft))
 655 #ifdef TCC_ARM_VFP
 656         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 657 #else
 658         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 659 #endif
 660       else
 661         o(0xE1A00000|(intr(r)<<12)|intr(v));
 662       return;
 663     }
 664   }
 665   tcc_error("load unimplemented!");
 666 }
 667
 668 /* store register 'r' in lvalue 'v' */
 669 void store(int r, SValue *sv)
 670 {
 671   SValue v1;
 672   int v, ft, fc, fr, sign;
 673   uint32_t op;
 674
 675   fr = sv->r;
 676   ft = sv->type.t;
 677   fc = sv->c.i;
 678
 679   if(fc>=0)
 680     sign=0;
 681   else {
 682     sign=1;
 683     fc=-fc;
 684   }
 685
 686   v = fr & VT_VALMASK;
 687   if (fr & VT_LVAL || fr == VT_LOCAL) {
 688     uint32_t base = 0xb; /* fp */
 689     if(v < VT_CONST) {
 690       base=intr(v);
 691       v=VT_LOCAL;
 692       fc=sign=0;
 693     } else if(v == VT_CONST) {
 694       v1.type.t = ft;
 695       v1.r = fr&~VT_LVAL;
 696       v1.c.i = sv->c.i;
 697       v1.sym=sv->sym;
 698       load(TREG_LR, &v1);
 699       base = 14; /* lr */
 700       fc=sign=0;
 701       v=VT_LOCAL;
 702     }
 703     if(v == VT_LOCAL) {
 704        if(is_float(ft)) {
 705         calcaddr(&base,&fc,&sign,1020,2);
 706 #ifdef TCC_ARM_VFP
 707         op=0xED000A00; /* fsts */
 708         if(!sign)
 709           op|=0x800000;
 710         if ((ft & VT_BTYPE) != VT_FLOAT)
 711           op|=0x100;   /* fsts -> fstd */
 712         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 713 #else
 714         op=0xED000100;
 715         if(!sign)
 716           op|=0x800000;
 717 #if LDOUBLE_SIZE == 8
 718         if ((ft & VT_BTYPE) != VT_FLOAT)
 719           op|=0x8000;
 720 #else
 721         if ((ft & VT_BTYPE) == VT_DOUBLE)
 722           op|=0x8000;
 723         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 724           op|=0x400000;
 725 #endif
 726         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 727 #endif
 728         return;
 729       } else if((ft & VT_BTYPE) == VT_SHORT) {
 730         calcaddr(&base,&fc,&sign,255,0);
 731         op=0xE14000B0;
 732         if(!sign)
 733           op|=0x800000;
 734         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 735       } else {
 736         calcaddr(&base,&fc,&sign,4095,0);
 737         op=0xE5000000;
 738         if(!sign)
 739           op|=0x800000;
 740         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 741           op|=0x400000;
 742         o(op|(intr(r)<<12)|fc|(base<<16));
 743       }
 744       return;
 745     }
 746   }
 747   tcc_error("store unimplemented");
 748 }
 749
 750 static void gadd_sp(int val)
 751 {
 752   stuff_const_harder(0xE28DD000,val);
 753 }
 754
 755 /* 'is_jmp' is '1' if it is a jump */
 756 static void gcall_or_jmp(int is_jmp)
 757 {
 758   int r;
 759   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 760     uint32_t x;
 761     /* constant case */
 762     x=encbranch(ind,ind+vtop->c.i,0);
 763     if(x) {
 764       if (vtop->r & VT_SYM) {
 765         /* relocation case */
 766         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 767       } else
 768         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 769       o(x|(is_jmp?0xE0000000:0xE1000000));
 770     } else {
 771       if(!is_jmp)
 772         o(0xE28FE004); // add lr,pc,#4
 773       o(0xE51FF004);   // ldr pc,[pc,#-4]
 774       if (vtop->r & VT_SYM)
 775         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 776       o(vtop->c.i);
 777     }
 778   } else {
 779     /* otherwise, indirect call */
 780     r = gv(RC_INT);
 781     if(!is_jmp)
 782       o(0xE1A0E00F);       // mov lr,pc
 783     o(0xE1A0F000|intr(r)); // mov pc,r
 784   }
 785 }
 786
 787 static int unalias_ldbl(int btype)
 788 {
 789 #if LDOUBLE_SIZE == 8
 790     if (btype == VT_LDOUBLE)
 791       btype = VT_DOUBLE;
 792 #endif
 793     return btype;
 794 }
 795
 796 /* Return whether a structure is an homogeneous float aggregate or not.
 797    The answer is true if all the elements of the structure are of the same
 798    primitive float type and there is less than 4 elements.
 799
 800    type: the type corresponding to the structure to be tested */
 801 static int is_hgen_float_aggr(CType *type)
 802 {
 803   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 804     struct Sym *ref;
 805     int btype, nb_fields = 0;
 806
 807     ref = type->ref->next;
 808     btype = unalias_ldbl(ref->type.t & VT_BTYPE);
 809     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 810       for(; ref && btype == unalias_ldbl(ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 811       return !ref && nb_fields <= 4;
 812     }
 813   }
 814   return 0;
 815 }
 816
 817 struct avail_regs {
 818   signed char avail[3]; /* 3 holes max with only float and double alignments */
 819   int first_hole; /* first available hole */
 820   int last_hole; /* last available hole (none if equal to first_hole) */
 821   int first_free_reg; /* next free register in the sequence, hole excluded */
 822 };
 823
 824 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 825
 826 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 827    param) according to the rules described in the procedure call standard for
 828    the ARM architecture (AAPCS). If found, the registers are assigned to this
 829    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 830    and the parameter is a single float.
 831
 832    avregs: opaque structure to keep track of available VFP co-processor regs
 833    align: alignment contraints for the param, as returned by type_size()
 834    size: size of the parameter, as returned by type_size() */
 835 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 836 {
 837   int first_reg = 0;
 838
 839   if (avregs->first_free_reg == -1)
 840     return -1;
 841   if (align >> 3) { /* double alignment */
 842     first_reg = avregs->first_free_reg;
 843     /* alignment contraint not respected so use next reg and record hole */
 844     if (first_reg & 1)
 845       avregs->avail[avregs->last_hole++] = first_reg++;
 846   } else { /* no special alignment (float or array of float) */
 847     /* if single float and a hole is available, assign the param to it */
 848     if (size == 4 && avregs->first_hole != avregs->last_hole)
 849       return avregs->avail[avregs->first_hole++];
 850     else
 851       first_reg = avregs->first_free_reg;
 852   }
 853   if (first_reg + size / 4 <= 16) {
 854     avregs->first_free_reg = first_reg + size / 4;
 855     return first_reg;
 856   }
 857   avregs->first_free_reg = -1;
 858   return -1;
 859 }
 860
 861 /* Returns whether all params need to be passed in core registers or not.
 862    This is the case for function part of the runtime ABI. */
 863 int floats_in_core_regs(SValue *sval)
 864 {
 865   if (!sval->sym)
 866     return 0;
 867
 868   switch (sval->sym->v) {
 869     case TOK___floatundisf:
 870     case TOK___floatundidf:
 871     case TOK___fixunssfdi:
 872     case TOK___fixunsdfdi:
 873 #ifndef TCC_ARM_VFP
 874     case TOK___fixunsxfdi:
 875 #endif
 876     case TOK___floatdisf:
 877     case TOK___floatdidf:
 878     case TOK___fixsfdi:
 879     case TOK___fixdfdi:
 880       return 1;
 881
 882     default:
 883       return 0;
 884   }
 885 }
 886
 887 /* Return the number of registers needed to return the struct, or 0 if
 888    returning via struct pointer. */
 889 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) {
 890 #ifdef TCC_ARM_EABI
 891     int size, align;
 892     size = type_size(vt, &align);
 893     if (float_abi == ARM_HARD_FLOAT && !variadic &&
 894         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
 895         *ret_align = 8;
 896         *regsize = 8;
 897         ret->ref = NULL;
 898         ret->t = VT_DOUBLE;
 899         return (size + 7) >> 3;
 900     } else if (size <= 4) {
 901         *ret_align = 4;
 902         *regsize = 4;
 903         ret->ref = NULL;
 904         ret->t = VT_INT;
 905         return 1;
 906     } else
 907         return 0;
 908 #else
 909     return 0;
 910 #endif
 911 }
 912
 913 /* Parameters are classified according to how they are copied to their final
 914    destination for the function call. Because the copying is performed class
 915    after class according to the order in the union below, it is important that
 916    some constraints about the order of the members of this union are respected:
 917    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 918    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 919      VFP_STRUCT_CLASS;
 920    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 921    See the comment for the main loop in copy_params() for the reason. */
 922 enum reg_class {
 923         STACK_CLASS = 0,
 924         CORE_STRUCT_CLASS,
 925         VFP_CLASS,
 926         VFP_STRUCT_CLASS,
 927         CORE_CLASS,
 928         NB_CLASSES
 929 };
 930
 931 struct param_plan {
 932     int start; /* first reg or addr used depending on the class */
 933     int end; /* last reg used or next free addr depending on the class */
 934     SValue *sval; /* pointer to SValue on the value stack */
 935     struct param_plan *prev; /*  previous element in this class */
 936 };
 937
 938 struct plan {
 939     struct param_plan *pplans; /* array of all the param plans */
 940     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
 941 };
 942
 943 #define add_param_plan(plan,pplan,class)                        \
 944     do {                                                        \
 945         pplan.prev = plan->clsplans[class];                     \
 946         plan->pplans[plan ## _nb] = pplan;                      \
 947         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
 948     } while(0)
 949
 950 /* Assign parameters to registers and stack with alignment according to the
 951    rules in the procedure call standard for the ARM architecture (AAPCS).
 952    The overall assignment is recorded in an array of per parameter structures
 953    called parameter plans. The parameter plans are also further organized in a
 954    number of linked lists, one per class of parameter (see the comment for the
 955    definition of union reg_class).
 956
 957    nb_args: number of parameters of the function for which a call is generated
 958    float_abi: float ABI in use for this function call
 959    plan: the structure where the overall assignment is recorded
 960    todo: a bitmap that record which core registers hold a parameter
 961
 962    Returns the amount of stack space needed for parameter passing
 963
 964    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 965    is the responsibility of the caller to free this array once used (ie not
 966    before copy_params). */
 967 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
 968 {
 969   int i, size, align;
 970   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
 971   int plan_nb = 0;
 972   struct param_plan pplan;
 973   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 974
 975   ncrn = nsaa = 0;
 976   *todo = 0;
 977   plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 978   memset(plan->clsplans, 0, sizeof(plan->clsplans));
 979   for(i = nb_args; i-- ;) {
 980     int j, start_vfpreg = 0;
 981     CType type = vtop[-i].type;
 982     type.t &= ~VT_ARRAY;
 983     size = type_size(&type, &align);
 984     size = (size + 3) & ~3;
 985     align = (align + 3) & ~3;
 986     switch(vtop[-i].type.t & VT_BTYPE) {
 987       case VT_STRUCT:
 988       case VT_FLOAT:
 989       case VT_DOUBLE:
 990       case VT_LDOUBLE:
 991       if (float_abi == ARM_HARD_FLOAT) {
 992         int is_hfa = 0; /* Homogeneous float aggregate */
 993
 994         if (is_float(vtop[-i].type.t)
 995             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
 996           int end_vfpreg;
 997
 998           start_vfpreg = assign_vfpreg(&avregs, align, size);
 999           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
1000           if (start_vfpreg >= 0) {
1001             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
1002             if (is_hfa)
1003               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
1004             else
1005               add_param_plan(plan, pplan, VFP_CLASS);
1006             continue;
1007           } else
1008             break;
1009         }
1010       }
1011       ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
1012       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
1013         /* The parameter is allocated both in core register and on stack. As
1014          * such, it can be of either class: it would either be the last of
1015          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
1016         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
1017           *todo|=(1<<j);
1018         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
1019         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1020         ncrn += size/4;
1021         if (ncrn > 4)
1022           nsaa = (ncrn - 4) * 4;
1023       } else {
1024         ncrn = 4;
1025         break;
1026       }
1027       continue;
1028       default:
1029       if (ncrn < 4) {
1030         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1031
1032         if (is_long) {
1033           ncrn = (ncrn + 1) & -2;
1034           if (ncrn == 4)
1035             break;
1036         }
1037         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1038         ncrn++;
1039         if (is_long)
1040           pplan.end = ncrn++;
1041         add_param_plan(plan, pplan, CORE_CLASS);
1042         continue;
1043       }
1044     }
1045     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1046     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1047     add_param_plan(plan, pplan, STACK_CLASS);
1048     nsaa += size; /* size already rounded up before */
1049   }
1050   return nsaa;
1051 }
1052
1053 #undef add_param_plan
1054
1055 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1056    function call.
1057
1058    nb_args: number of parameters the function take
1059    plan: the overall assignment plan for parameters
1060    todo: a bitmap indicating what core reg will hold a parameter
1061
1062    Returns the number of SValue added by this function on the value stack */
1063 static int copy_params(int nb_args, struct plan *plan, int todo)
1064 {
1065   int size, align, r, i, nb_extra_sval = 0;
1066   struct param_plan *pplan;
1067   int pass = 0;
1068
1069    /* Several constraints require parameters to be copied in a specific order:
1070       - structures are copied to the stack before being loaded in a reg;
1071       - floats loaded to an odd numbered VFP reg are first copied to the
1072         preceding even numbered VFP reg and then moved to the next VFP reg.
1073
1074       It is thus important that:
1075       - structures assigned to core regs must be copied after parameters
1076         assigned to the stack but before structures assigned to VFP regs because
1077         a structure can lie partly in core registers and partly on the stack;
1078       - parameters assigned to the stack and all structures be copied before
1079         parameters assigned to a core reg since copying a parameter to the stack
1080         require using a core reg;
1081       - parameters assigned to VFP regs be copied before structures assigned to
1082         VFP regs as the copy might use an even numbered VFP reg that already
1083         holds part of a structure. */
1084 again:
1085   for(i = 0; i < NB_CLASSES; i++) {
1086     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1087
1088       if (pass
1089           && (i != CORE_CLASS || pplan->sval->r < VT_CONST))
1090         continue;
1091
1092       vpushv(pplan->sval);
1093       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1094       switch(i) {
1095         case STACK_CLASS:
1096         case CORE_STRUCT_CLASS:
1097         case VFP_STRUCT_CLASS:
1098           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1099             int padding = 0;
1100             size = type_size(&pplan->sval->type, &align);
1101             /* align to stack align size */
1102             size = (size + 3) & ~3;
1103             if (i == STACK_CLASS && pplan->prev)
1104               padding = pplan->start - pplan->prev->end;
1105             size += padding; /* Add padding if any */
1106             /* allocate the necessary size on stack */
1107             gadd_sp(-size);
1108             /* generate structure store */
1109             r = get_reg(RC_INT);
1110             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1111             vset(&vtop->type, r | VT_LVAL, 0);
1112             vswap();
1113             vstore(); /* memcpy to current sp + potential padding */
1114
1115             /* Homogeneous float aggregate are loaded to VFP registers
1116                immediately since there is no way of loading data in multiple
1117                non consecutive VFP registers as what is done for other
1118                structures (see the use of todo). */
1119             if (i == VFP_STRUCT_CLASS) {
1120               int first = pplan->start, nb = pplan->end - first + 1;
1121               /* vpop.32 {pplan->start, ..., pplan->end} */
1122               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1123               /* No need to write the register used to a SValue since VFP regs
1124                  cannot be used for gcall_or_jmp */
1125             }
1126           } else {
1127             if (is_float(pplan->sval->type.t)) {
1128 #ifdef TCC_ARM_VFP
1129               r = vfpr(gv(RC_FLOAT)) << 12;
1130               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1131                 size = 4;
1132               else {
1133                 size = 8;
1134                 r |= 0x101; /* vpush.32 -> vpush.64 */
1135               }
1136               o(0xED2D0A01 + r); /* vpush */
1137 #else
1138               r = fpr(gv(RC_FLOAT)) << 12;
1139               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1140                 size = 4;
1141               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1142                 size = 8;
1143               else
1144                 size = LDOUBLE_SIZE;
1145
1146               if (size == 12)
1147                 r |= 0x400000;
1148               else if(size == 8)
1149                 r|=0x8000;
1150
1151               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1152 #endif
1153             } else {
1154               /* simple type (currently always same size) */
1155               /* XXX: implicit cast ? */
1156               size=4;
1157               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1158                 lexpand_nr();
1159                 size = 8;
1160                 r = gv(RC_INT);
1161                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1162                 vtop--;
1163               }
1164               r = gv(RC_INT);
1165               o(0xE52D0004|(intr(r)<<12)); /* push r */
1166             }
1167             if (i == STACK_CLASS && pplan->prev)
1168               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1169           }
1170           break;
1171
1172         case VFP_CLASS:
1173           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1174           if (pplan->start & 1) { /* Must be in upper part of double register */
1175             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1176             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1177           }
1178           break;
1179
1180         case CORE_CLASS:
1181           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1182             lexpand_nr();
1183             gv(regmask(pplan->end));
1184             pplan->sval->r2 = vtop->r;
1185             vtop--;
1186           }
1187           gv(regmask(pplan->start));
1188           /* Mark register as used so that gcall_or_jmp use another one
1189              (regs >=4 are free as never used to pass parameters) */
1190           pplan->sval->r = vtop->r;
1191           break;
1192       }
1193       vtop--;
1194     }
1195   }
1196
1197   /* second pass to restore registers that were saved on stack by accident.
1198      Maybe redundant after the "lvalue_save" patch in tccgen.c:gv() */
1199   if (++pass < 2)
1200     goto again;
1201
1202   /* Manually free remaining registers since next parameters are loaded
1203    * manually, without the help of gv(int). */
1204   save_regs(nb_args);
1205
1206   if(todo) {
1207     o(0xE8BD0000|todo); /* pop {todo} */
1208     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1209       int r;
1210       pplan->sval->r = pplan->start;
1211       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1212          can occupy more than 2 registers. Thus, we need to push on the value
1213          stack some fake parameter to have on SValue for each registers used
1214          by a structure (r2 is not used). */
1215       for (r = pplan->start + 1; r <= pplan->end; r++) {
1216         if (todo & (1 << r)) {
1217           nb_extra_sval++;
1218           vpushi(0);
1219           vtop->r = r;
1220         }
1221       }
1222     }
1223   }
1224   return nb_extra_sval;
1225 }
1226
1227 /* Generate function call. The function address is pushed first, then
1228    all the parameters in call order. This functions pops all the
1229    parameters and the function address. */
1230 void gfunc_call(int nb_args)
1231 {
1232   int r, args_size;
1233   int def_float_abi = float_abi;
1234   int todo;
1235   struct plan plan;
1236
1237 #ifdef TCC_ARM_EABI
1238   int variadic;
1239
1240   if (float_abi == ARM_HARD_FLOAT) {
1241     variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1242     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1243       float_abi = ARM_SOFTFP_FLOAT;
1244   }
1245 #endif
1246   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1247      VT_JMP anywhere except on the top of the stack because it would complicate
1248      the code generator. */
1249   r = vtop->r & VT_VALMASK;
1250   if (r == VT_CMP || (r & ~1) == VT_JMP)
1251     gv(RC_INT);
1252
1253   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1254
1255 #ifdef TCC_ARM_EABI
1256   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1257     args_size = (args_size + 7) & ~7;
1258     o(0xE24DD004); /* sub sp, sp, #4 */
1259   }
1260 #endif
1261
1262   nb_args += copy_params(nb_args, &plan, todo);
1263   tcc_free(plan.pplans);
1264
1265   /* Move fct SValue on top as required by gcall_or_jmp */
1266   vrotb(nb_args + 1);
1267   gcall_or_jmp(0);
1268   if (args_size)
1269       gadd_sp(args_size); /* pop all parameters passed on the stack */
1270 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1271   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1272     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1273       o(0xEE000A10); /*vmov s0, r0 */
1274     } else {
1275       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1276       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1277     }
1278   }
1279 #endif
1280   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1281   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1282   float_abi = def_float_abi;
1283 }
1284
1285 /* generate function prolog of type 't' */
1286 void gfunc_prolog(CType *func_type)
1287 {
1288   Sym *sym,*sym2;
1289   int n, nf, size, align, rs, struct_ret = 0;
1290   int addr, pn, sn; /* pn=core, sn=stack */
1291   CType ret_type;
1292
1293 #ifdef TCC_ARM_EABI
1294   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1295 #endif
1296
1297   sym = func_type->ref;
1298   func_vt = sym->type;
1299   func_var = (func_type->ref->c == FUNC_ELLIPSIS);
1300
1301   n = nf = 0;
1302   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1303       !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs))
1304   {
1305     n++;
1306     struct_ret = 1;
1307     func_vc = 12; /* Offset from fp of the place to store the result */
1308   }
1309   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1310     size = type_size(&sym2->type, &align);
1311 #ifdef TCC_ARM_EABI
1312     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1313         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1314       int tmpnf = assign_vfpreg(&avregs, align, size);
1315       tmpnf += (size + 3) / 4;
1316       nf = (tmpnf > nf) ? tmpnf : nf;
1317     } else
1318 #endif
1319     if (n < 4)
1320       n += (size + 3) / 4;
1321   }
1322   o(0xE1A0C00D); /* mov ip,sp */
1323   if (func_var)
1324     n=4;
1325   if (n) {
1326     if(n>4)
1327       n=4;
1328 #ifdef TCC_ARM_EABI
1329     n=(n+1)&-2;
1330 #endif
1331     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1332   }
1333   if (nf) {
1334     if (nf>16)
1335       nf=16;
1336     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1337     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1338   }
1339   o(0xE92D5800); /* save fp, ip, lr */
1340   o(0xE1A0B00D); /* mov fp, sp */
1341   func_sub_sp_offset = ind;
1342   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1343
1344 #ifdef TCC_ARM_EABI
1345   if (float_abi == ARM_HARD_FLOAT) {
1346     func_vc += nf * 4;
1347     avregs = AVAIL_REGS_INITIALIZER;
1348   }
1349 #endif
1350   pn = struct_ret, sn = 0;
1351   while ((sym = sym->next)) {
1352     CType *type;
1353     type = &sym->type;
1354     size = type_size(type, &align);
1355     size = (size + 3) >> 2;
1356     align = (align + 3) & ~3;
1357 #ifdef TCC_ARM_EABI
1358     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1359         || is_hgen_float_aggr(&sym->type))) {
1360       int fpn = assign_vfpreg(&avregs, align, size << 2);
1361       if (fpn >= 0)
1362         addr = fpn * 4;
1363       else
1364         goto from_stack;
1365     } else
1366 #endif
1367     if (pn < 4) {
1368 #ifdef TCC_ARM_EABI
1369         pn = (pn + (align-1)/4) & -(align/4);
1370 #endif
1371       addr = (nf + pn) * 4;
1372       pn += size;
1373       if (!sn && pn > 4)
1374         sn = (pn - 4);
1375     } else {
1376 #ifdef TCC_ARM_EABI
1377 from_stack:
1378         sn = (sn + (align-1)/4) & -(align/4);
1379 #endif
1380       addr = (n + nf + sn) * 4;
1381       sn += size;
1382     }
1383     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1384              addr + 12);
1385   }
1386   last_itod_magic=0;
1387   leaffunc = 1;
1388   loc = 0;
1389 }
1390
1391 /* generate function epilog */
1392 void gfunc_epilog(void)
1393 {
1394   uint32_t x;
1395   int diff;
1396   /* Copy float return value to core register if base standard is used and
1397      float computation is made with VFP */
1398 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1399   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1400     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1401       o(0xEE100A10); /* fmrs r0, s0 */
1402     else {
1403       o(0xEE100B10); /* fmrdl r0, d0 */
1404       o(0xEE301B10); /* fmrdh r1, d0 */
1405     }
1406   }
1407 #endif
1408   o(0xE89BA800); /* restore fp, sp, pc */
1409   diff = (-loc + 3) & -4;
1410 #ifdef TCC_ARM_EABI
1411   if(!leaffunc)
1412     diff = ((diff + 11) & -8) - 4;
1413 #endif
1414   if(diff > 0) {
1415     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1416     if(x)
1417       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1418     else {
1419       int addr;
1420       addr=ind;
1421       o(0xE59FC004); /* ldr ip,[pc+4] */
1422       o(0xE04BD00C); /* sub sp,fp,ip  */
1423       o(0xE1A0F00E); /* mov pc,lr */
1424       o(diff);
1425       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1426     }
1427   }
1428 }
1429
1430 /* generate a jump to a label */
1431 int gjmp(int t)
1432 {
1433   int r;
1434   r=ind;
1435   o(0xE0000000|encbranch(r,t,1));
1436   return r;
1437 }
1438
1439 /* generate a jump to a fixed address */
1440 void gjmp_addr(int a)
1441 {
1442   gjmp(a);
1443 }
1444
1445 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1446 int gtst(int inv, int t)
1447 {
1448   int v, r;
1449   uint32_t op;
1450   v = vtop->r & VT_VALMASK;
1451   r=ind;
1452   if (v == VT_CMP) {
1453     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1454     op|=encbranch(r,t,1);
1455     o(op);
1456     t=r;
1457   } else if (v == VT_JMP || v == VT_JMPI) {
1458     if ((v & 1) == inv) {
1459       if(!vtop->c.i)
1460         vtop->c.i=t;
1461       else {
1462         uint32_t *x;
1463         int p,lp;
1464         if(t) {
1465           p = vtop->c.i;
1466           do {
1467             p = decbranch(lp=p);
1468           } while(p);
1469           x = (uint32_t *)(cur_text_section->data + lp);
1470           *x &= 0xff000000;
1471           *x |= encbranch(lp,t,1);
1472         }
1473         t = vtop->c.i;
1474       }
1475     } else {
1476       t = gjmp(t);
1477       gsym(vtop->c.i);
1478     }
1479   }
1480   vtop--;
1481   return t;
1482 }
1483
1484 /* generate an integer binary operation */
1485 void gen_opi(int op)
1486 {
1487   int c, func = 0;
1488   uint32_t opc = 0, r, fr;
1489   unsigned short retreg = REG_IRET;
1490
1491   c=0;
1492   switch(op) {
1493     case '+':
1494       opc = 0x8;
1495       c=1;
1496       break;
1497     case TOK_ADDC1: /* add with carry generation */
1498       opc = 0x9;
1499       c=1;
1500       break;
1501     case '-':
1502       opc = 0x4;
1503       c=1;
1504       break;
1505     case TOK_SUBC1: /* sub with carry generation */
1506       opc = 0x5;
1507       c=1;
1508       break;
1509     case TOK_ADDC2: /* add with carry use */
1510       opc = 0xA;
1511       c=1;
1512       break;
1513     case TOK_SUBC2: /* sub with carry use */
1514       opc = 0xC;
1515       c=1;
1516       break;
1517     case '&':
1518       opc = 0x0;
1519       c=1;
1520       break;
1521     case '^':
1522       opc = 0x2;
1523       c=1;
1524       break;
1525     case '|':
1526       opc = 0x18;
1527       c=1;
1528       break;
1529     case '*':
1530       gv2(RC_INT, RC_INT);
1531       r = vtop[-1].r;
1532       fr = vtop[0].r;
1533       vtop--;
1534       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1535       return;
1536     case TOK_SHL:
1537       opc = 0;
1538       c=2;
1539       break;
1540     case TOK_SHR:
1541       opc = 1;
1542       c=2;
1543       break;
1544     case TOK_SAR:
1545       opc = 2;
1546       c=2;
1547       break;
1548     case '/':
1549     case TOK_PDIV:
1550       func=TOK___divsi3;
1551       c=3;
1552       break;
1553     case TOK_UDIV:
1554       func=TOK___udivsi3;
1555       c=3;
1556       break;
1557     case '%':
1558 #ifdef TCC_ARM_EABI
1559       func=TOK___aeabi_idivmod;
1560       retreg=REG_LRET;
1561 #else
1562       func=TOK___modsi3;
1563 #endif
1564       c=3;
1565       break;
1566     case TOK_UMOD:
1567 #ifdef TCC_ARM_EABI
1568       func=TOK___aeabi_uidivmod;
1569       retreg=REG_LRET;
1570 #else
1571       func=TOK___umodsi3;
1572 #endif
1573       c=3;
1574       break;
1575     case TOK_UMULL:
1576       gv2(RC_INT, RC_INT);
1577       r=intr(vtop[-1].r2=get_reg(RC_INT));
1578       c=vtop[-1].r;
1579       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1580       vtop--;
1581       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1582       return;
1583     default:
1584       opc = 0x15;
1585       c=1;
1586       break;
1587   }
1588   switch(c) {
1589     case 1:
1590       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1591         if(opc == 4 || opc == 5 || opc == 0xc) {
1592           vswap();
1593           opc|=2; // sub -> rsb
1594         }
1595       }
1596       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1597           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1598         gv(RC_INT);
1599       vswap();
1600       c=intr(gv(RC_INT));
1601       vswap();
1602       opc=0xE0000000|(opc<<20)|(c<<16);
1603       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1604         uint32_t x;
1605         x=stuff_const(opc|0x2000000,vtop->c.i);
1606         if(x) {
1607           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1608           o(x|(r<<12));
1609           goto done;
1610         }
1611       }
1612       fr=intr(gv(RC_INT));
1613       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1614       o(opc|(r<<12)|fr);
1615 done:
1616       vtop--;
1617       if (op >= TOK_ULT && op <= TOK_GT) {
1618         vtop->r = VT_CMP;
1619         vtop->c.i = op;
1620       }
1621       break;
1622     case 2:
1623       opc=0xE1A00000|(opc<<5);
1624       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1625           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1626         gv(RC_INT);
1627       vswap();
1628       r=intr(gv(RC_INT));
1629       vswap();
1630       opc|=r;
1631       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1632         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1633         c = vtop->c.i & 0x1f;
1634         o(opc|(c<<7)|(fr<<12));
1635       } else {
1636         fr=intr(gv(RC_INT));
1637         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1638         o(opc|(c<<12)|(fr<<8)|0x10);
1639       }
1640       vtop--;
1641       break;
1642     case 3:
1643       vpush_global_sym(&func_old_type, func);
1644       vrott(3);
1645       gfunc_call(2);
1646       vpushi(0);
1647       vtop->r = retreg;
1648       break;
1649     default:
1650       tcc_error("gen_opi %i unimplemented!",op);
1651   }
1652 }
1653
1654 #ifdef TCC_ARM_VFP
1655 static int is_zero(int i)
1656 {
1657   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1658     return 0;
1659   if (vtop[i].type.t == VT_FLOAT)
1660     return (vtop[i].c.f == 0.f);
1661   else if (vtop[i].type.t == VT_DOUBLE)
1662     return (vtop[i].c.d == 0.0);
1663   return (vtop[i].c.ld == 0.l);
1664 }
1665
1666 /* generate a floating point operation 'v = t1 op t2' instruction. The
1667  *    two operands are guaranted to have the same floating point type */
1668 void gen_opf(int op)
1669 {
1670   uint32_t x;
1671   int fneg=0,r;
1672   x=0xEE000A00|T2CPR(vtop->type.t);
1673   switch(op) {
1674     case '+':
1675       if(is_zero(-1))
1676         vswap();
1677       if(is_zero(0)) {
1678         vtop--;
1679         return;
1680       }
1681       x|=0x300000;
1682       break;
1683     case '-':
1684       x|=0x300040;
1685       if(is_zero(0)) {
1686         vtop--;
1687         return;
1688       }
1689       if(is_zero(-1)) {
1690         x|=0x810000; /* fsubX -> fnegX */
1691         vswap();
1692         vtop--;
1693         fneg=1;
1694       }
1695       break;
1696     case '*':
1697       x|=0x200000;
1698       break;
1699     case '/':
1700       x|=0x800000;
1701       break;
1702     default:
1703       if(op < TOK_ULT || op > TOK_GT) {
1704         tcc_error("unknown fp op %x!",op);
1705         return;
1706       }
1707       if(is_zero(-1)) {
1708         vswap();
1709         switch(op) {
1710           case TOK_LT: op=TOK_GT; break;
1711           case TOK_GE: op=TOK_ULE; break;
1712           case TOK_LE: op=TOK_GE; break;
1713           case TOK_GT: op=TOK_ULT; break;
1714         }
1715       }
1716       x|=0xB40040; /* fcmpX */
1717       if(op!=TOK_EQ && op!=TOK_NE)
1718         x|=0x80; /* fcmpX -> fcmpeX */
1719       if(is_zero(0)) {
1720         vtop--;
1721         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1722       } else {
1723         x|=vfpr(gv(RC_FLOAT));
1724         vswap();
1725         o(x|(vfpr(gv(RC_FLOAT))<<12));
1726         vtop--;
1727       }
1728       o(0xEEF1FA10); /* fmstat */
1729
1730       switch(op) {
1731         case TOK_LE: op=TOK_ULE; break;
1732         case TOK_LT: op=TOK_ULT; break;
1733         case TOK_UGE: op=TOK_GE; break;
1734         case TOK_UGT: op=TOK_GT; break;
1735       }
1736
1737       vtop->r = VT_CMP;
1738       vtop->c.i = op;
1739       return;
1740   }
1741   r=gv(RC_FLOAT);
1742   x|=vfpr(r);
1743   r=regmask(r);
1744   if(!fneg) {
1745     int r2;
1746     vswap();
1747     r2=gv(RC_FLOAT);
1748     x|=vfpr(r2)<<16;
1749     r|=regmask(r2);
1750   }
1751   vtop->r=get_reg_ex(RC_FLOAT,r);
1752   if(!fneg)
1753     vtop--;
1754   o(x|(vfpr(vtop->r)<<12));
1755 }
1756
1757 #else
1758 static uint32_t is_fconst()
1759 {
1760   long double f;
1761   uint32_t r;
1762   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1763     return 0;
1764   if (vtop->type.t == VT_FLOAT)
1765     f = vtop->c.f;
1766   else if (vtop->type.t == VT_DOUBLE)
1767     f = vtop->c.d;
1768   else
1769     f = vtop->c.ld;
1770   if(!ieee_finite(f))
1771     return 0;
1772   r=0x8;
1773   if(f<0.0) {
1774     r=0x18;
1775     f=-f;
1776   }
1777   if(f==0.0)
1778     return r;
1779   if(f==1.0)
1780     return r|1;
1781   if(f==2.0)
1782     return r|2;
1783   if(f==3.0)
1784     return r|3;
1785   if(f==4.0)
1786     return r|4;
1787   if(f==5.0)
1788     return r|5;
1789   if(f==0.5)
1790     return r|6;
1791   if(f==10.0)
1792     return r|7;
1793   return 0;
1794 }
1795
1796 /* generate a floating point operation 'v = t1 op t2' instruction. The
1797    two operands are guaranted to have the same floating point type */
1798 void gen_opf(int op)
1799 {
1800   uint32_t x, r, r2, c1, c2;
1801   //fputs("gen_opf\n",stderr);
1802   vswap();
1803   c1 = is_fconst();
1804   vswap();
1805   c2 = is_fconst();
1806   x=0xEE000100;
1807 #if LDOUBLE_SIZE == 8
1808   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1809     x|=0x80;
1810 #else
1811   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1812     x|=0x80;
1813   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1814     x|=0x80000;
1815 #endif
1816   switch(op)
1817   {
1818     case '+':
1819       if(!c2) {
1820         vswap();
1821         c2=c1;
1822       }
1823       vswap();
1824       r=fpr(gv(RC_FLOAT));
1825       vswap();
1826       if(c2) {
1827         if(c2>0xf)
1828           x|=0x200000; // suf
1829         r2=c2&0xf;
1830       } else {
1831         r2=fpr(gv(RC_FLOAT));
1832       }
1833       break;
1834     case '-':
1835       if(c2) {
1836         if(c2<=0xf)
1837           x|=0x200000; // suf
1838         r2=c2&0xf;
1839         vswap();
1840         r=fpr(gv(RC_FLOAT));
1841         vswap();
1842       } else if(c1 && c1<=0xf) {
1843         x|=0x300000; // rsf
1844         r2=c1;
1845         r=fpr(gv(RC_FLOAT));
1846         vswap();
1847       } else {
1848         x|=0x200000; // suf
1849         vswap();
1850         r=fpr(gv(RC_FLOAT));
1851         vswap();
1852         r2=fpr(gv(RC_FLOAT));
1853       }
1854       break;
1855     case '*':
1856       if(!c2 || c2>0xf) {
1857         vswap();
1858         c2=c1;
1859       }
1860       vswap();
1861       r=fpr(gv(RC_FLOAT));
1862       vswap();
1863       if(c2 && c2<=0xf)
1864         r2=c2;
1865       else
1866         r2=fpr(gv(RC_FLOAT));
1867       x|=0x100000; // muf
1868       break;
1869     case '/':
1870       if(c2 && c2<=0xf) {
1871         x|=0x400000; // dvf
1872         r2=c2;
1873         vswap();
1874         r=fpr(gv(RC_FLOAT));
1875         vswap();
1876       } else if(c1 && c1<=0xf) {
1877         x|=0x500000; // rdf
1878         r2=c1;
1879         r=fpr(gv(RC_FLOAT));
1880         vswap();
1881       } else {
1882         x|=0x400000; // dvf
1883         vswap();
1884         r=fpr(gv(RC_FLOAT));
1885         vswap();
1886         r2=fpr(gv(RC_FLOAT));
1887       }
1888       break;
1889     default:
1890       if(op >= TOK_ULT && op <= TOK_GT) {
1891         x|=0xd0f110; // cmfe
1892 /* bug (intention?) in Linux FPU emulator
1893    doesn't set carry if equal */
1894         switch(op) {
1895           case TOK_ULT:
1896           case TOK_UGE:
1897           case TOK_ULE:
1898           case TOK_UGT:
1899             tcc_error("unsigned comparison on floats?");
1900             break;
1901           case TOK_LT:
1902             op=TOK_Nset;
1903             break;
1904           case TOK_LE:
1905             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1906             break;
1907           case TOK_EQ:
1908           case TOK_NE:
1909             x&=~0x400000; // cmfe -> cmf
1910             break;
1911         }
1912         if(c1 && !c2) {
1913           c2=c1;
1914           vswap();
1915           switch(op) {
1916             case TOK_Nset:
1917               op=TOK_GT;
1918               break;
1919             case TOK_GE:
1920               op=TOK_ULE;
1921               break;
1922             case TOK_ULE:
1923               op=TOK_GE;
1924               break;
1925             case TOK_GT:
1926               op=TOK_Nset;
1927               break;
1928           }
1929         }
1930         vswap();
1931         r=fpr(gv(RC_FLOAT));
1932         vswap();
1933         if(c2) {
1934           if(c2>0xf)
1935             x|=0x200000;
1936           r2=c2&0xf;
1937         } else {
1938           r2=fpr(gv(RC_FLOAT));
1939         }
1940         vtop[-1].r = VT_CMP;
1941         vtop[-1].c.i = op;
1942       } else {
1943         tcc_error("unknown fp op %x!",op);
1944         return;
1945       }
1946   }
1947   if(vtop[-1].r == VT_CMP)
1948     c1=15;
1949   else {
1950     c1=vtop->r;
1951     if(r2&0x8)
1952       c1=vtop[-1].r;
1953     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1954     c1=fpr(vtop[-1].r);
1955   }
1956   vtop--;
1957   o(x|(r<<16)|(c1<<12)|r2);
1958 }
1959 #endif
1960
1961 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1962    and 'long long' cases. */
1963 ST_FUNC void gen_cvt_itof1(int t)
1964 {
1965   uint32_t r, r2;
1966   int bt;
1967   bt=vtop->type.t & VT_BTYPE;
1968   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1969 #ifndef TCC_ARM_VFP
1970     uint32_t dsize = 0;
1971 #endif
1972     r=intr(gv(RC_INT));
1973 #ifdef TCC_ARM_VFP
1974     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1975     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1976     r2|=r2<<12;
1977     if(!(vtop->type.t & VT_UNSIGNED))
1978       r2|=0x80;                /* fuitoX -> fsituX */
1979     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1980 #else
1981     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1982     if((t & VT_BTYPE) != VT_FLOAT)
1983       dsize=0x80;    /* flts -> fltd */
1984     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1985     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1986       uint32_t off = 0;
1987       o(0xE3500000|(r<<12));        /* cmp */
1988       r=fpr(get_reg(RC_FLOAT));
1989       if(last_itod_magic) {
1990         off=ind+8-last_itod_magic;
1991         off/=4;
1992         if(off>255)
1993           off=0;
1994       }
1995       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1996       if(!off) {
1997         o(0xEA000000);              /* b */
1998         last_itod_magic=ind;
1999         o(0x4F800000);              /* 4294967296.0f */
2000       }
2001       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
2002     }
2003 #endif
2004     return;
2005   } else if(bt == VT_LLONG) {
2006     int func;
2007     CType *func_type = 0;
2008     if((t & VT_BTYPE) == VT_FLOAT) {
2009       func_type = &func_float_type;
2010       if(vtop->type.t & VT_UNSIGNED)
2011         func=TOK___floatundisf;
2012       else
2013         func=TOK___floatdisf;
2014 #if LDOUBLE_SIZE != 8
2015     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
2016       func_type = &func_ldouble_type;
2017       if(vtop->type.t & VT_UNSIGNED)
2018         func=TOK___floatundixf;
2019       else
2020         func=TOK___floatdixf;
2021     } else if((t & VT_BTYPE) == VT_DOUBLE) {
2022 #else
2023     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
2024 #endif
2025       func_type = &func_double_type;
2026       if(vtop->type.t & VT_UNSIGNED)
2027         func=TOK___floatundidf;
2028       else
2029         func=TOK___floatdidf;
2030     }
2031     if(func_type) {
2032       vpush_global_sym(func_type, func);
2033       vswap();
2034       gfunc_call(1);
2035       vpushi(0);
2036       vtop->r=TREG_F0;
2037       return;
2038     }
2039   }
2040   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2041 }
2042
2043 /* convert fp to int 't' type */
2044 void gen_cvt_ftoi(int t)
2045 {
2046   uint32_t r, r2;
2047   int u, func = 0;
2048   u=t&VT_UNSIGNED;
2049   t&=VT_BTYPE;
2050   r2=vtop->type.t & VT_BTYPE;
2051   if(t==VT_INT) {
2052 #ifdef TCC_ARM_VFP
2053     r=vfpr(gv(RC_FLOAT));
2054     u=u?0:0x10000;
2055     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2056     r2=intr(vtop->r=get_reg(RC_INT));
2057     o(0xEE100A10|(r<<16)|(r2<<12));
2058     return;
2059 #else
2060     if(u) {
2061       if(r2 == VT_FLOAT)
2062         func=TOK___fixunssfsi;
2063 #if LDOUBLE_SIZE != 8
2064       else if(r2 == VT_LDOUBLE)
2065         func=TOK___fixunsxfsi;
2066       else if(r2 == VT_DOUBLE)
2067 #else
2068       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2069 #endif
2070         func=TOK___fixunsdfsi;
2071     } else {
2072       r=fpr(gv(RC_FLOAT));
2073       r2=intr(vtop->r=get_reg(RC_INT));
2074       o(0xEE100170|(r2<<12)|r);
2075       return;
2076     }
2077 #endif
2078   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2079     if(r2 == VT_FLOAT)
2080       func=TOK___fixsfdi;
2081 #if LDOUBLE_SIZE != 8
2082     else if(r2 == VT_LDOUBLE)
2083       func=TOK___fixxfdi;
2084     else if(r2 == VT_DOUBLE)
2085 #else
2086     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2087 #endif
2088       func=TOK___fixdfdi;
2089   }
2090   if(func) {
2091     vpush_global_sym(&func_old_type, func);
2092     vswap();
2093     gfunc_call(1);
2094     vpushi(0);
2095     if(t == VT_LLONG)
2096       vtop->r2 = REG_LRET;
2097     vtop->r = REG_IRET;
2098     return;
2099   }
2100   tcc_error("unimplemented gen_cvt_ftoi!");
2101 }
2102
2103 /* convert from one floating point type to another */
2104 void gen_cvt_ftof(int t)
2105 {
2106 #ifdef TCC_ARM_VFP
2107   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2108     uint32_t r = vfpr(gv(RC_FLOAT));
2109     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2110   }
2111 #else
2112   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2113   gv(RC_FLOAT);
2114 #endif
2115 }
2116
2117 /* computed goto support */
2118 void ggoto(void)
2119 {
2120   gcall_or_jmp(1);
2121   vtop--;
2122 }
2123
2124 /* Save the stack pointer onto the stack and return the location of its address */
2125 ST_FUNC void gen_vla_sp_save(int addr) {
2126     SValue v;
2127     v.type.t = VT_PTR;
2128     v.r = VT_LOCAL | VT_LVAL;
2129     v.c.i = addr;
2130     store(TREG_SP, &v);
2131 }
2132
2133 /* Restore the SP from a location on the stack */
2134 ST_FUNC void gen_vla_sp_restore(int addr) {
2135     SValue v;
2136     v.type.t = VT_PTR;
2137     v.r = VT_LOCAL | VT_LVAL;
2138     v.c.i = addr;
2139     load(TREG_SP, &v);
2140 }
2141
2142 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2143 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2144     int r = intr(gv(RC_INT));
2145     o(0xE04D0000|(r<<12)|r); /* sub r, sp, r */
2146 #ifdef TCC_ARM_EABI
2147     if (align < 8)
2148         align = 8;
2149 #else
2150     if (align < 4)
2151         align = 4;
2152 #endif
2153     if (align & (align - 1))
2154         tcc_error("alignment is not a power of 2: %i", align);
2155     o(stuff_const(0xE3C0D000|(r<<16), align - 1)); /* bic sp, r, #align-1 */
2156     vpop();
2157 }
2158
2159 /* end of ARM code generator */
2160 /*************************************************************/
2161 #endif
2162 /*************************************************************/