arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_ARM_VERSION
  38 # define TCC_ARM_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_LRET    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82     TREG_SP = 13,
  83     TREG_LR,
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 /* ELF defines */
 134
 135 #define EM_TCC_TARGET EM_ARM
 136
 137 /* relocation type for 32 bit data relocation */
 138 #define R_DATA_32   R_ARM_ABS32
 139 #define R_DATA_PTR  R_ARM_ABS32
 140 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 141 #define R_COPY      R_ARM_COPY
 142
 143 #define ELF_START_ADDR 0x00008000
 144 #define ELF_PAGE_SIZE  0x1000
 145
 146 enum float_abi {
 147     ARM_SOFTFP_FLOAT,
 148     ARM_HARD_FLOAT,
 149 };
 150
 151 /******************************************************/
 152 #else /* ! TARGET_DEFS_ONLY */
 153 /******************************************************/
 154 #include "tcc.h"
 155
 156 enum float_abi float_abi;
 157
 158 ST_DATA const int reg_classes[NB_REGS] = {
 159     /* r0 */ RC_INT | RC_R0,
 160     /* r1 */ RC_INT | RC_R1,
 161     /* r2 */ RC_INT | RC_R2,
 162     /* r3 */ RC_INT | RC_R3,
 163     /* r12 */ RC_INT | RC_R12,
 164     /* f0 */ RC_FLOAT | RC_F0,
 165     /* f1 */ RC_FLOAT | RC_F1,
 166     /* f2 */ RC_FLOAT | RC_F2,
 167     /* f3 */ RC_FLOAT | RC_F3,
 168 #ifdef TCC_ARM_VFP
 169  /* d4/s8 */ RC_FLOAT | RC_F4,
 170 /* d5/s10 */ RC_FLOAT | RC_F5,
 171 /* d6/s12 */ RC_FLOAT | RC_F6,
 172 /* d7/s14 */ RC_FLOAT | RC_F7,
 173 #endif
 174 };
 175
 176 static int func_sub_sp_offset, last_itod_magic;
 177 static int leaffunc;
 178
 179 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 180 static CType float_type, double_type, func_float_type, func_double_type;
 181 ST_FUNC void arm_init(struct TCCState *s)
 182 {
 183     float_type.t = VT_FLOAT;
 184     double_type.t = VT_DOUBLE;
 185     func_float_type.t = VT_FUNC;
 186     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 187     func_double_type.t = VT_FUNC;
 188     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 189
 190     float_abi = s->float_abi;
 191 #ifndef TCC_ARM_HARDFLOAT
 192     tcc_warning("soft float ABI currently not supported: default to softfp");
 193 #endif
 194 }
 195 #else
 196 #define func_float_type func_old_type
 197 #define func_double_type func_old_type
 198 #define func_ldouble_type func_old_type
 199 ST_FUNC void arm_init(struct TCCState *s)
 200 {
 201 #if !defined (TCC_ARM_VFP)
 202     tcc_warning("Support for FPA is deprecated and will be removed in next"
 203                 " release");
 204 #endif
 205 #if !defined (TCC_ARM_EABI)
 206     tcc_warning("Support for OABI is deprecated and will be removed in next"
 207                 " release");
 208 #endif
 209 }
 210 #endif
 211
 212 static int two2mask(int a,int b) {
 213   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 214 }
 215
 216 static int regmask(int r) {
 217   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 218 }
 219
 220 /******************************************************/
 221
 222 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
 223 char *default_elfinterp(struct TCCState *s)
 224 {
 225     if (s->float_abi == ARM_HARD_FLOAT)
 226         return "/lib/ld-linux-armhf.so.3";
 227     else
 228         return "/lib/ld-linux.so.3";
 229 }
 230 #endif
 231
 232 void o(uint32_t i)
 233 {
 234   /* this is a good place to start adding big-endian support*/
 235   int ind1;
 236
 237   ind1 = ind + 4;
 238   if (!cur_text_section)
 239     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 240          "can't evaluate constant expressions outside of a function.");
 241   if (ind1 > cur_text_section->data_allocated)
 242     section_realloc(cur_text_section, ind1);
 243   cur_text_section->data[ind++] = i&255;
 244   i>>=8;
 245   cur_text_section->data[ind++] = i&255;
 246   i>>=8;
 247   cur_text_section->data[ind++] = i&255;
 248   i>>=8;
 249   cur_text_section->data[ind++] = i;
 250 }
 251
 252 static uint32_t stuff_const(uint32_t op, uint32_t c)
 253 {
 254   int try_neg=0;
 255   uint32_t nc = 0, negop = 0;
 256
 257   switch(op&0x1F00000)
 258   {
 259     case 0x800000: //add
 260     case 0x400000: //sub
 261       try_neg=1;
 262       negop=op^0xC00000;
 263       nc=-c;
 264       break;
 265     case 0x1A00000: //mov
 266     case 0x1E00000: //mvn
 267       try_neg=1;
 268       negop=op^0x400000;
 269       nc=~c;
 270       break;
 271     case 0x200000: //xor
 272       if(c==~0)
 273         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 274       break;
 275     case 0x0: //and
 276       if(c==~0)
 277         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 278     case 0x1C00000: //bic
 279       try_neg=1;
 280       negop=op^0x1C00000;
 281       nc=~c;
 282       break;
 283     case 0x1800000: //orr
 284       if(c==~0)
 285         return (op&0xFFF0FFFF)|0x1E00000;
 286       break;
 287   }
 288   do {
 289     uint32_t m;
 290     int i;
 291     if(c<256) /* catch undefined <<32 */
 292       return op|c;
 293     for(i=2;i<32;i+=2) {
 294       m=(0xff>>i)|(0xff<<(32-i));
 295       if(!(c&~m))
 296         return op|(i<<7)|(c<<i)|(c>>(32-i));
 297     }
 298     op=negop;
 299     c=nc;
 300   } while(try_neg--);
 301   return 0;
 302 }
 303
 304
 305 //only add,sub
 306 void stuff_const_harder(uint32_t op, uint32_t v) {
 307   uint32_t x;
 308   x=stuff_const(op,v);
 309   if(x)
 310     o(x);
 311   else {
 312     uint32_t a[16], nv, no, o2, n2;
 313     int i,j,k;
 314     a[0]=0xff;
 315     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 316     for(i=1;i<16;i++)
 317       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 318     for(i=0;i<12;i++)
 319       for(j=i<4?i+12:15;j>=i+4;j--)
 320         if((v&(a[i]|a[j]))==v) {
 321           o(stuff_const(op,v&a[i]));
 322           o(stuff_const(o2,v&a[j]));
 323           return;
 324         }
 325     no=op^0xC00000;
 326     n2=o2^0xC00000;
 327     nv=-v;
 328     for(i=0;i<12;i++)
 329       for(j=i<4?i+12:15;j>=i+4;j--)
 330         if((nv&(a[i]|a[j]))==nv) {
 331           o(stuff_const(no,nv&a[i]));
 332           o(stuff_const(n2,nv&a[j]));
 333           return;
 334         }
 335     for(i=0;i<8;i++)
 336       for(j=i+4;j<12;j++)
 337         for(k=i<4?i+12:15;k>=j+4;k--)
 338           if((v&(a[i]|a[j]|a[k]))==v) {
 339             o(stuff_const(op,v&a[i]));
 340             o(stuff_const(o2,v&a[j]));
 341             o(stuff_const(o2,v&a[k]));
 342             return;
 343           }
 344     no=op^0xC00000;
 345     nv=-v;
 346     for(i=0;i<8;i++)
 347       for(j=i+4;j<12;j++)
 348         for(k=i<4?i+12:15;k>=j+4;k--)
 349           if((nv&(a[i]|a[j]|a[k]))==nv) {
 350             o(stuff_const(no,nv&a[i]));
 351             o(stuff_const(n2,nv&a[j]));
 352             o(stuff_const(n2,nv&a[k]));
 353             return;
 354           }
 355     o(stuff_const(op,v&a[0]));
 356     o(stuff_const(o2,v&a[4]));
 357     o(stuff_const(o2,v&a[8]));
 358     o(stuff_const(o2,v&a[12]));
 359   }
 360 }
 361
 362 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 363 {
 364   addr-=pos+8;
 365   addr/=4;
 366   if(addr>=0x1000000 || addr<-0x1000000) {
 367     if(fail)
 368       tcc_error("FIXME: function bigger than 32MB");
 369     return 0;
 370   }
 371   return 0x0A000000|(addr&0xffffff);
 372 }
 373
 374 int decbranch(int pos)
 375 {
 376   int x;
 377   x=*(uint32_t *)(cur_text_section->data + pos);
 378   x&=0x00ffffff;
 379   if(x&0x800000)
 380     x-=0x1000000;
 381   return x*4+pos+8;
 382 }
 383
 384 /* output a symbol and patch all calls to it */
 385 void gsym_addr(int t, int a)
 386 {
 387   uint32_t *x;
 388   int lt;
 389   while(t) {
 390     x=(uint32_t *)(cur_text_section->data + t);
 391     t=decbranch(lt=t);
 392     if(a==lt+4)
 393       *x=0xE1A00000; // nop
 394     else {
 395       *x &= 0xff000000;
 396       *x |= encbranch(lt,a,1);
 397     }
 398   }
 399 }
 400
 401 void gsym(int t)
 402 {
 403   gsym_addr(t, ind);
 404 }
 405
 406 #ifdef TCC_ARM_VFP
 407 static uint32_t vfpr(int r)
 408 {
 409   if(r<TREG_F0 || r>TREG_F7)
 410     tcc_error("compiler error! register %i is no vfp register",r);
 411   return r - TREG_F0;
 412 }
 413 #else
 414 static uint32_t fpr(int r)
 415 {
 416   if(r<TREG_F0 || r>TREG_F3)
 417     tcc_error("compiler error! register %i is no fpa register",r);
 418   return r - TREG_F0;
 419 }
 420 #endif
 421
 422 static uint32_t intr(int r)
 423 {
 424   if(r == TREG_R12)
 425     return 12;
 426   if(r >= TREG_R0 && r <= TREG_R3)
 427     return r - TREG_R0;
 428   if (r >= TREG_SP && r <= TREG_LR)
 429     return r + (13 - TREG_SP);
 430   tcc_error("compiler error! register %i is no int register",r);
 431 }
 432
 433 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 434 {
 435   if(*off>maxoff || *off&((1<<shift)-1)) {
 436     uint32_t x, y;
 437     x=0xE280E000;
 438     if(*sgn)
 439       x=0xE240E000;
 440     x|=(*base)<<16;
 441     *base=14; // lr
 442     y=stuff_const(x,*off&~maxoff);
 443     if(y) {
 444       o(y);
 445       *off&=maxoff;
 446       return;
 447     }
 448     y=stuff_const(x,(*off+maxoff)&~maxoff);
 449     if(y) {
 450       o(y);
 451       *sgn=!*sgn;
 452       *off=((*off+maxoff)&~maxoff)-*off;
 453       return;
 454     }
 455     stuff_const_harder(x,*off&~maxoff);
 456     *off&=maxoff;
 457   }
 458 }
 459
 460 static uint32_t mapcc(int cc)
 461 {
 462   switch(cc)
 463   {
 464     case TOK_ULT:
 465       return 0x30000000; /* CC/LO */
 466     case TOK_UGE:
 467       return 0x20000000; /* CS/HS */
 468     case TOK_EQ:
 469       return 0x00000000; /* EQ */
 470     case TOK_NE:
 471       return 0x10000000; /* NE */
 472     case TOK_ULE:
 473       return 0x90000000; /* LS */
 474     case TOK_UGT:
 475       return 0x80000000; /* HI */
 476     case TOK_Nset:
 477       return 0x40000000; /* MI */
 478     case TOK_Nclear:
 479       return 0x50000000; /* PL */
 480     case TOK_LT:
 481       return 0xB0000000; /* LT */
 482     case TOK_GE:
 483       return 0xA0000000; /* GE */
 484     case TOK_LE:
 485       return 0xD0000000; /* LE */
 486     case TOK_GT:
 487       return 0xC0000000; /* GT */
 488   }
 489   tcc_error("unexpected condition code");
 490   return 0xE0000000; /* AL */
 491 }
 492
 493 static int negcc(int cc)
 494 {
 495   switch(cc)
 496   {
 497     case TOK_ULT:
 498       return TOK_UGE;
 499     case TOK_UGE:
 500       return TOK_ULT;
 501     case TOK_EQ:
 502       return TOK_NE;
 503     case TOK_NE:
 504       return TOK_EQ;
 505     case TOK_ULE:
 506       return TOK_UGT;
 507     case TOK_UGT:
 508       return TOK_ULE;
 509     case TOK_Nset:
 510       return TOK_Nclear;
 511     case TOK_Nclear:
 512       return TOK_Nset;
 513     case TOK_LT:
 514       return TOK_GE;
 515     case TOK_GE:
 516       return TOK_LT;
 517     case TOK_LE:
 518       return TOK_GT;
 519     case TOK_GT:
 520       return TOK_LE;
 521   }
 522   tcc_error("unexpected condition code");
 523   return TOK_NE;
 524 }
 525
 526 /* load 'r' from value 'sv' */
 527 void load(int r, SValue *sv)
 528 {
 529   int v, ft, fc, fr, sign;
 530   uint32_t op;
 531   SValue v1;
 532
 533   fr = sv->r;
 534   ft = sv->type.t;
 535   fc = sv->c.i;
 536
 537   if(fc>=0)
 538     sign=0;
 539   else {
 540     sign=1;
 541     fc=-fc;
 542   }
 543
 544   v = fr & VT_VALMASK;
 545   if (fr & VT_LVAL) {
 546     uint32_t base = 0xB; // fp
 547     if(v == VT_LLOCAL) {
 548       v1.type.t = VT_PTR;
 549       v1.r = VT_LOCAL | VT_LVAL;
 550       v1.c.i = sv->c.i;
 551       load(TREG_LR, &v1);
 552       base = 14; /* lr */
 553       fc=sign=0;
 554       v=VT_LOCAL;
 555     } else if(v == VT_CONST) {
 556       v1.type.t = VT_PTR;
 557       v1.r = fr&~VT_LVAL;
 558       v1.c.i = sv->c.i;
 559       v1.sym=sv->sym;
 560       load(TREG_LR, &v1);
 561       base = 14; /* lr */
 562       fc=sign=0;
 563       v=VT_LOCAL;
 564     } else if(v < VT_CONST) {
 565       base=intr(v);
 566       fc=sign=0;
 567       v=VT_LOCAL;
 568     }
 569     if(v == VT_LOCAL) {
 570       if(is_float(ft)) {
 571         calcaddr(&base,&fc,&sign,1020,2);
 572 #ifdef TCC_ARM_VFP
 573         op=0xED100A00; /* flds */
 574         if(!sign)
 575           op|=0x800000;
 576         if ((ft & VT_BTYPE) != VT_FLOAT)
 577           op|=0x100;   /* flds -> fldd */
 578         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 579 #else
 580         op=0xED100100;
 581         if(!sign)
 582           op|=0x800000;
 583 #if LDOUBLE_SIZE == 8
 584         if ((ft & VT_BTYPE) != VT_FLOAT)
 585           op|=0x8000;
 586 #else
 587         if ((ft & VT_BTYPE) == VT_DOUBLE)
 588           op|=0x8000;
 589         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 590           op|=0x400000;
 591 #endif
 592         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 593 #endif
 594       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 595                 || (ft & VT_BTYPE) == VT_SHORT) {
 596         calcaddr(&base,&fc,&sign,255,0);
 597         op=0xE1500090;
 598         if ((ft & VT_BTYPE) == VT_SHORT)
 599           op|=0x20;
 600         if ((ft & VT_UNSIGNED) == 0)
 601           op|=0x40;
 602         if(!sign)
 603           op|=0x800000;
 604         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 605       } else {
 606         calcaddr(&base,&fc,&sign,4095,0);
 607         op=0xE5100000;
 608         if(!sign)
 609           op|=0x800000;
 610         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 611           op|=0x400000;
 612         o(op|(intr(r)<<12)|fc|(base<<16));
 613       }
 614       return;
 615     }
 616   } else {
 617     if (v == VT_CONST) {
 618       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.i);
 619       if (fr & VT_SYM || !op) {
 620         o(0xE59F0000|(intr(r)<<12));
 621         o(0xEA000000);
 622         if(fr & VT_SYM)
 623           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 624         o(sv->c.i);
 625       } else
 626         o(op);
 627       return;
 628     } else if (v == VT_LOCAL) {
 629       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.i);
 630       if (fr & VT_SYM || !op) {
 631         o(0xE59F0000|(intr(r)<<12));
 632         o(0xEA000000);
 633         if(fr & VT_SYM) // needed ?
 634           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 635         o(sv->c.i);
 636         o(0xE08B0000|(intr(r)<<12)|intr(r));
 637       } else
 638         o(op);
 639       return;
 640     } else if(v == VT_CMP) {
 641       o(mapcc(sv->c.i)|0x3A00001|(intr(r)<<12));
 642       o(mapcc(negcc(sv->c.i))|0x3A00000|(intr(r)<<12));
 643       return;
 644     } else if (v == VT_JMP || v == VT_JMPI) {
 645       int t;
 646       t = v & 1;
 647       o(0xE3A00000|(intr(r)<<12)|t);
 648       o(0xEA000000);
 649       gsym(sv->c.i);
 650       o(0xE3A00000|(intr(r)<<12)|(t^1));
 651       return;
 652     } else if (v < VT_CONST) {
 653       if(is_float(ft))
 654 #ifdef TCC_ARM_VFP
 655         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 656 #else
 657         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 658 #endif
 659       else
 660         o(0xE1A00000|(intr(r)<<12)|intr(v));
 661       return;
 662     }
 663   }
 664   tcc_error("load unimplemented!");
 665 }
 666
 667 /* store register 'r' in lvalue 'v' */
 668 void store(int r, SValue *sv)
 669 {
 670   SValue v1;
 671   int v, ft, fc, fr, sign;
 672   uint32_t op;
 673
 674   fr = sv->r;
 675   ft = sv->type.t;
 676   fc = sv->c.i;
 677
 678   if(fc>=0)
 679     sign=0;
 680   else {
 681     sign=1;
 682     fc=-fc;
 683   }
 684
 685   v = fr & VT_VALMASK;
 686   if (fr & VT_LVAL || fr == VT_LOCAL) {
 687     uint32_t base = 0xb; /* fp */
 688     if(v < VT_CONST) {
 689       base=intr(v);
 690       v=VT_LOCAL;
 691       fc=sign=0;
 692     } else if(v == VT_CONST) {
 693       v1.type.t = ft;
 694       v1.r = fr&~VT_LVAL;
 695       v1.c.i = sv->c.i;
 696       v1.sym=sv->sym;
 697       load(TREG_LR, &v1);
 698       base = 14; /* lr */
 699       fc=sign=0;
 700       v=VT_LOCAL;
 701     }
 702     if(v == VT_LOCAL) {
 703        if(is_float(ft)) {
 704         calcaddr(&base,&fc,&sign,1020,2);
 705 #ifdef TCC_ARM_VFP
 706         op=0xED000A00; /* fsts */
 707         if(!sign)
 708           op|=0x800000;
 709         if ((ft & VT_BTYPE) != VT_FLOAT)
 710           op|=0x100;   /* fsts -> fstd */
 711         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 712 #else
 713         op=0xED000100;
 714         if(!sign)
 715           op|=0x800000;
 716 #if LDOUBLE_SIZE == 8
 717         if ((ft & VT_BTYPE) != VT_FLOAT)
 718           op|=0x8000;
 719 #else
 720         if ((ft & VT_BTYPE) == VT_DOUBLE)
 721           op|=0x8000;
 722         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 723           op|=0x400000;
 724 #endif
 725         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 726 #endif
 727         return;
 728       } else if((ft & VT_BTYPE) == VT_SHORT) {
 729         calcaddr(&base,&fc,&sign,255,0);
 730         op=0xE14000B0;
 731         if(!sign)
 732           op|=0x800000;
 733         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 734       } else {
 735         calcaddr(&base,&fc,&sign,4095,0);
 736         op=0xE5000000;
 737         if(!sign)
 738           op|=0x800000;
 739         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 740           op|=0x400000;
 741         o(op|(intr(r)<<12)|fc|(base<<16));
 742       }
 743       return;
 744     }
 745   }
 746   tcc_error("store unimplemented");
 747 }
 748
 749 static void gadd_sp(int val)
 750 {
 751   stuff_const_harder(0xE28DD000,val);
 752 }
 753
 754 /* 'is_jmp' is '1' if it is a jump */
 755 static void gcall_or_jmp(int is_jmp)
 756 {
 757   int r;
 758   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 759     uint32_t x;
 760     /* constant case */
 761     x=encbranch(ind,ind+vtop->c.i,0);
 762     if(x) {
 763       if (vtop->r & VT_SYM) {
 764         /* relocation case */
 765         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 766       } else
 767         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 768       o(x|(is_jmp?0xE0000000:0xE1000000));
 769     } else {
 770       if(!is_jmp)
 771         o(0xE28FE004); // add lr,pc,#4
 772       o(0xE51FF004);   // ldr pc,[pc,#-4]
 773       if (vtop->r & VT_SYM)
 774         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 775       o(vtop->c.i);
 776     }
 777   } else {
 778     /* otherwise, indirect call */
 779     r = gv(RC_INT);
 780     if(!is_jmp)
 781       o(0xE1A0E00F);       // mov lr,pc
 782     o(0xE1A0F000|intr(r)); // mov pc,r
 783   }
 784 }
 785
 786 static int unalias_ldbl(int btype)
 787 {
 788 #if LDOUBLE_SIZE == 8
 789     if (btype == VT_LDOUBLE)
 790       btype = VT_DOUBLE;
 791 #endif
 792     return btype;
 793 }
 794
 795 /* Return whether a structure is an homogeneous float aggregate or not.
 796    The answer is true if all the elements of the structure are of the same
 797    primitive float type and there is less than 4 elements.
 798
 799    type: the type corresponding to the structure to be tested */
 800 static int is_hgen_float_aggr(CType *type)
 801 {
 802   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 803     struct Sym *ref;
 804     int btype, nb_fields = 0;
 805
 806     ref = type->ref->next;
 807     btype = unalias_ldbl(ref->type.t & VT_BTYPE);
 808     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 809       for(; ref && btype == unalias_ldbl(ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 810       return !ref && nb_fields <= 4;
 811     }
 812   }
 813   return 0;
 814 }
 815
 816 struct avail_regs {
 817   signed char avail[3]; /* 3 holes max with only float and double alignments */
 818   int first_hole; /* first available hole */
 819   int last_hole; /* last available hole (none if equal to first_hole) */
 820   int first_free_reg; /* next free register in the sequence, hole excluded */
 821 };
 822
 823 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 824
 825 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 826    param) according to the rules described in the procedure call standard for
 827    the ARM architecture (AAPCS). If found, the registers are assigned to this
 828    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 829    and the parameter is a single float.
 830
 831    avregs: opaque structure to keep track of available VFP co-processor regs
 832    align: alignment contraints for the param, as returned by type_size()
 833    size: size of the parameter, as returned by type_size() */
 834 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 835 {
 836   int first_reg = 0;
 837
 838   if (avregs->first_free_reg == -1)
 839     return -1;
 840   if (align >> 3) { /* double alignment */
 841     first_reg = avregs->first_free_reg;
 842     /* alignment contraint not respected so use next reg and record hole */
 843     if (first_reg & 1)
 844       avregs->avail[avregs->last_hole++] = first_reg++;
 845   } else { /* no special alignment (float or array of float) */
 846     /* if single float and a hole is available, assign the param to it */
 847     if (size == 4 && avregs->first_hole != avregs->last_hole)
 848       return avregs->avail[avregs->first_hole++];
 849     else
 850       first_reg = avregs->first_free_reg;
 851   }
 852   if (first_reg + size / 4 <= 16) {
 853     avregs->first_free_reg = first_reg + size / 4;
 854     return first_reg;
 855   }
 856   avregs->first_free_reg = -1;
 857   return -1;
 858 }
 859
 860 /* Returns whether all params need to be passed in core registers or not.
 861    This is the case for function part of the runtime ABI. */
 862 int floats_in_core_regs(SValue *sval)
 863 {
 864   if (!sval->sym)
 865     return 0;
 866
 867   switch (sval->sym->v) {
 868     case TOK___floatundisf:
 869     case TOK___floatundidf:
 870     case TOK___fixunssfdi:
 871     case TOK___fixunsdfdi:
 872 #ifndef TCC_ARM_VFP
 873     case TOK___fixunsxfdi:
 874 #endif
 875     case TOK___floatdisf:
 876     case TOK___floatdidf:
 877     case TOK___fixsfdi:
 878     case TOK___fixdfdi:
 879       return 1;
 880
 881     default:
 882       return 0;
 883   }
 884 }
 885
 886 /* Return the number of registers needed to return the struct, or 0 if
 887    returning via struct pointer. */
 888 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) {
 889 #ifdef TCC_ARM_EABI
 890     int size, align;
 891     size = type_size(vt, &align);
 892     if (float_abi == ARM_HARD_FLOAT && !variadic &&
 893         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
 894         *ret_align = 8;
 895         *regsize = 8;
 896         ret->ref = NULL;
 897         ret->t = VT_DOUBLE;
 898         return (size + 7) >> 3;
 899     } else if (size <= 4) {
 900         *ret_align = 4;
 901         *regsize = 4;
 902         ret->ref = NULL;
 903         ret->t = VT_INT;
 904         return 1;
 905     } else
 906         return 0;
 907 #else
 908     return 0;
 909 #endif
 910 }
 911
 912 /* Parameters are classified according to how they are copied to their final
 913    destination for the function call. Because the copying is performed class
 914    after class according to the order in the union below, it is important that
 915    some constraints about the order of the members of this union are respected:
 916    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 917    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 918      VFP_STRUCT_CLASS;
 919    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 920    See the comment for the main loop in copy_params() for the reason. */
 921 enum reg_class {
 922         STACK_CLASS = 0,
 923         CORE_STRUCT_CLASS,
 924         VFP_CLASS,
 925         VFP_STRUCT_CLASS,
 926         CORE_CLASS,
 927         NB_CLASSES
 928 };
 929
 930 struct param_plan {
 931     int start; /* first reg or addr used depending on the class */
 932     int end; /* last reg used or next free addr depending on the class */
 933     SValue *sval; /* pointer to SValue on the value stack */
 934     struct param_plan *prev; /*  previous element in this class */
 935 };
 936
 937 struct plan {
 938     struct param_plan *pplans; /* array of all the param plans */
 939     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
 940 };
 941
 942 #define add_param_plan(plan,pplan,class)                        \
 943     do {                                                        \
 944         pplan.prev = plan->clsplans[class];                     \
 945         plan->pplans[plan ## _nb] = pplan;                      \
 946         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
 947     } while(0)
 948
 949 /* Assign parameters to registers and stack with alignment according to the
 950    rules in the procedure call standard for the ARM architecture (AAPCS).
 951    The overall assignment is recorded in an array of per parameter structures
 952    called parameter plans. The parameter plans are also further organized in a
 953    number of linked lists, one per class of parameter (see the comment for the
 954    definition of union reg_class).
 955
 956    nb_args: number of parameters of the function for which a call is generated
 957    float_abi: float ABI in use for this function call
 958    plan: the structure where the overall assignment is recorded
 959    todo: a bitmap that record which core registers hold a parameter
 960
 961    Returns the amount of stack space needed for parameter passing
 962
 963    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 964    is the responsibility of the caller to free this array once used (ie not
 965    before copy_params). */
 966 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
 967 {
 968   int i, size, align;
 969   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
 970   int plan_nb = 0;
 971   struct param_plan pplan;
 972   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 973
 974   ncrn = nsaa = 0;
 975   *todo = 0;
 976   plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 977   memset(plan->clsplans, 0, sizeof(plan->clsplans));
 978   for(i = nb_args; i-- ;) {
 979     int j, start_vfpreg = 0;
 980     CType type = vtop[-i].type;
 981     type.t &= ~VT_ARRAY;
 982     size = type_size(&type, &align);
 983     size = (size + 3) & ~3;
 984     align = (align + 3) & ~3;
 985     switch(vtop[-i].type.t & VT_BTYPE) {
 986       case VT_STRUCT:
 987       case VT_FLOAT:
 988       case VT_DOUBLE:
 989       case VT_LDOUBLE:
 990       if (float_abi == ARM_HARD_FLOAT) {
 991         int is_hfa = 0; /* Homogeneous float aggregate */
 992
 993         if (is_float(vtop[-i].type.t)
 994             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
 995           int end_vfpreg;
 996
 997           start_vfpreg = assign_vfpreg(&avregs, align, size);
 998           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
 999           if (start_vfpreg >= 0) {
1000             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
1001             if (is_hfa)
1002               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
1003             else
1004               add_param_plan(plan, pplan, VFP_CLASS);
1005             continue;
1006           } else
1007             break;
1008         }
1009       }
1010       ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
1011       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
1012         /* The parameter is allocated both in core register and on stack. As
1013          * such, it can be of either class: it would either be the last of
1014          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
1015         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
1016           *todo|=(1<<j);
1017         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
1018         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1019         ncrn += size/4;
1020         if (ncrn > 4)
1021           nsaa = (ncrn - 4) * 4;
1022       } else {
1023         ncrn = 4;
1024         break;
1025       }
1026       continue;
1027       default:
1028       if (ncrn < 4) {
1029         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1030
1031         if (is_long) {
1032           ncrn = (ncrn + 1) & -2;
1033           if (ncrn == 4)
1034             break;
1035         }
1036         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1037         ncrn++;
1038         if (is_long)
1039           pplan.end = ncrn++;
1040         add_param_plan(plan, pplan, CORE_CLASS);
1041         continue;
1042       }
1043     }
1044     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1045     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1046     add_param_plan(plan, pplan, STACK_CLASS);
1047     nsaa += size; /* size already rounded up before */
1048   }
1049   return nsaa;
1050 }
1051
1052 #undef add_param_plan
1053
1054 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1055    function call.
1056
1057    nb_args: number of parameters the function take
1058    plan: the overall assignment plan for parameters
1059    todo: a bitmap indicating what core reg will hold a parameter
1060
1061    Returns the number of SValue added by this function on the value stack */
1062 static int copy_params(int nb_args, struct plan *plan, int todo)
1063 {
1064   int size, align, r, i, nb_extra_sval = 0;
1065   struct param_plan *pplan;
1066   int pass = 0;
1067
1068    /* Several constraints require parameters to be copied in a specific order:
1069       - structures are copied to the stack before being loaded in a reg;
1070       - floats loaded to an odd numbered VFP reg are first copied to the
1071         preceding even numbered VFP reg and then moved to the next VFP reg.
1072
1073       It is thus important that:
1074       - structures assigned to core regs must be copied after parameters
1075         assigned to the stack but before structures assigned to VFP regs because
1076         a structure can lie partly in core registers and partly on the stack;
1077       - parameters assigned to the stack and all structures be copied before
1078         parameters assigned to a core reg since copying a parameter to the stack
1079         require using a core reg;
1080       - parameters assigned to VFP regs be copied before structures assigned to
1081         VFP regs as the copy might use an even numbered VFP reg that already
1082         holds part of a structure. */
1083 again:
1084   for(i = 0; i < NB_CLASSES; i++) {
1085     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1086
1087       if (pass
1088           && (i != CORE_CLASS || pplan->sval->r < VT_CONST))
1089         continue;
1090
1091       vpushv(pplan->sval);
1092       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1093       switch(i) {
1094         case STACK_CLASS:
1095         case CORE_STRUCT_CLASS:
1096         case VFP_STRUCT_CLASS:
1097           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1098             int padding = 0;
1099             size = type_size(&pplan->sval->type, &align);
1100             /* align to stack align size */
1101             size = (size + 3) & ~3;
1102             if (i == STACK_CLASS && pplan->prev)
1103               padding = pplan->start - pplan->prev->end;
1104             size += padding; /* Add padding if any */
1105             /* allocate the necessary size on stack */
1106             gadd_sp(-size);
1107             /* generate structure store */
1108             r = get_reg(RC_INT);
1109             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1110             vset(&vtop->type, r | VT_LVAL, 0);
1111             vswap();
1112             vstore(); /* memcpy to current sp + potential padding */
1113
1114             /* Homogeneous float aggregate are loaded to VFP registers
1115                immediately since there is no way of loading data in multiple
1116                non consecutive VFP registers as what is done for other
1117                structures (see the use of todo). */
1118             if (i == VFP_STRUCT_CLASS) {
1119               int first = pplan->start, nb = pplan->end - first + 1;
1120               /* vpop.32 {pplan->start, ..., pplan->end} */
1121               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1122               /* No need to write the register used to a SValue since VFP regs
1123                  cannot be used for gcall_or_jmp */
1124             }
1125           } else {
1126             if (is_float(pplan->sval->type.t)) {
1127 #ifdef TCC_ARM_VFP
1128               r = vfpr(gv(RC_FLOAT)) << 12;
1129               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1130                 size = 4;
1131               else {
1132                 size = 8;
1133                 r |= 0x101; /* vpush.32 -> vpush.64 */
1134               }
1135               o(0xED2D0A01 + r); /* vpush */
1136 #else
1137               r = fpr(gv(RC_FLOAT)) << 12;
1138               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1139                 size = 4;
1140               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1141                 size = 8;
1142               else
1143                 size = LDOUBLE_SIZE;
1144
1145               if (size == 12)
1146                 r |= 0x400000;
1147               else if(size == 8)
1148                 r|=0x8000;
1149
1150               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1151 #endif
1152             } else {
1153               /* simple type (currently always same size) */
1154               /* XXX: implicit cast ? */
1155               size=4;
1156               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1157                 lexpand_nr();
1158                 size = 8;
1159                 r = gv(RC_INT);
1160                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1161                 vtop--;
1162               }
1163               r = gv(RC_INT);
1164               o(0xE52D0004|(intr(r)<<12)); /* push r */
1165             }
1166             if (i == STACK_CLASS && pplan->prev)
1167               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1168           }
1169           break;
1170
1171         case VFP_CLASS:
1172           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1173           if (pplan->start & 1) { /* Must be in upper part of double register */
1174             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1175             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1176           }
1177           break;
1178
1179         case CORE_CLASS:
1180           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1181             lexpand_nr();
1182             gv(regmask(pplan->end));
1183             pplan->sval->r2 = vtop->r;
1184             vtop--;
1185           }
1186           gv(regmask(pplan->start));
1187           /* Mark register as used so that gcall_or_jmp use another one
1188              (regs >=4 are free as never used to pass parameters) */
1189           pplan->sval->r = vtop->r;
1190           break;
1191       }
1192       vtop--;
1193     }
1194   }
1195
1196   /* second pass to restore registers that were saved on stack by accident.
1197      Maybe redundant after the "lvalue_save" patch in tccgen.c:gv() */
1198   if (++pass < 2)
1199     goto again;
1200
1201   /* Manually free remaining registers since next parameters are loaded
1202    * manually, without the help of gv(int). */
1203   save_regs(nb_args);
1204
1205   if(todo) {
1206     o(0xE8BD0000|todo); /* pop {todo} */
1207     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1208       int r;
1209       pplan->sval->r = pplan->start;
1210       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1211          can occupy more than 2 registers. Thus, we need to push on the value
1212          stack some fake parameter to have on SValue for each registers used
1213          by a structure (r2 is not used). */
1214       for (r = pplan->start + 1; r <= pplan->end; r++) {
1215         if (todo & (1 << r)) {
1216           nb_extra_sval++;
1217           vpushi(0);
1218           vtop->r = r;
1219         }
1220       }
1221     }
1222   }
1223   return nb_extra_sval;
1224 }
1225
1226 /* Generate function call. The function address is pushed first, then
1227    all the parameters in call order. This functions pops all the
1228    parameters and the function address. */
1229 void gfunc_call(int nb_args)
1230 {
1231   int r, args_size;
1232   int def_float_abi = float_abi;
1233   int todo;
1234   struct plan plan;
1235
1236 #ifdef TCC_ARM_EABI
1237   int variadic;
1238
1239   if (float_abi == ARM_HARD_FLOAT) {
1240     variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1241     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1242       float_abi = ARM_SOFTFP_FLOAT;
1243   }
1244 #endif
1245   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1246      VT_JMP anywhere except on the top of the stack because it would complicate
1247      the code generator. */
1248   r = vtop->r & VT_VALMASK;
1249   if (r == VT_CMP || (r & ~1) == VT_JMP)
1250     gv(RC_INT);
1251
1252   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1253
1254 #ifdef TCC_ARM_EABI
1255   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1256     args_size = (args_size + 7) & ~7;
1257     o(0xE24DD004); /* sub sp, sp, #4 */
1258   }
1259 #endif
1260
1261   nb_args += copy_params(nb_args, &plan, todo);
1262   tcc_free(plan.pplans);
1263
1264   /* Move fct SValue on top as required by gcall_or_jmp */
1265   vrotb(nb_args + 1);
1266   gcall_or_jmp(0);
1267   if (args_size)
1268       gadd_sp(args_size); /* pop all parameters passed on the stack */
1269 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1270   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1271     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1272       o(0xEE000A10); /*vmov s0, r0 */
1273     } else {
1274       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1275       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1276     }
1277   }
1278 #endif
1279   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1280   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1281   float_abi = def_float_abi;
1282 }
1283
1284 /* generate function prolog of type 't' */
1285 void gfunc_prolog(CType *func_type)
1286 {
1287   Sym *sym,*sym2;
1288   int n, nf, size, align, rs, struct_ret = 0;
1289   int addr, pn, sn; /* pn=core, sn=stack */
1290   CType ret_type;
1291
1292 #ifdef TCC_ARM_EABI
1293   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1294 #endif
1295
1296   sym = func_type->ref;
1297   func_vt = sym->type;
1298   func_var = (func_type->ref->c == FUNC_ELLIPSIS);
1299
1300   n = nf = 0;
1301   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1302       !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs))
1303   {
1304     n++;
1305     struct_ret = 1;
1306     func_vc = 12; /* Offset from fp of the place to store the result */
1307   }
1308   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1309     size = type_size(&sym2->type, &align);
1310 #ifdef TCC_ARM_EABI
1311     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1312         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1313       int tmpnf = assign_vfpreg(&avregs, align, size);
1314       tmpnf += (size + 3) / 4;
1315       nf = (tmpnf > nf) ? tmpnf : nf;
1316     } else
1317 #endif
1318     if (n < 4)
1319       n += (size + 3) / 4;
1320   }
1321   o(0xE1A0C00D); /* mov ip,sp */
1322   if (func_var)
1323     n=4;
1324   if (n) {
1325     if(n>4)
1326       n=4;
1327 #ifdef TCC_ARM_EABI
1328     n=(n+1)&-2;
1329 #endif
1330     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1331   }
1332   if (nf) {
1333     if (nf>16)
1334       nf=16;
1335     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1336     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1337   }
1338   o(0xE92D5800); /* save fp, ip, lr */
1339   o(0xE1A0B00D); /* mov fp, sp */
1340   func_sub_sp_offset = ind;
1341   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1342
1343 #ifdef TCC_ARM_EABI
1344   if (float_abi == ARM_HARD_FLOAT) {
1345     func_vc += nf * 4;
1346     avregs = AVAIL_REGS_INITIALIZER;
1347   }
1348 #endif
1349   pn = struct_ret, sn = 0;
1350   while ((sym = sym->next)) {
1351     CType *type;
1352     type = &sym->type;
1353     size = type_size(type, &align);
1354     size = (size + 3) >> 2;
1355     align = (align + 3) & ~3;
1356 #ifdef TCC_ARM_EABI
1357     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1358         || is_hgen_float_aggr(&sym->type))) {
1359       int fpn = assign_vfpreg(&avregs, align, size << 2);
1360       if (fpn >= 0)
1361         addr = fpn * 4;
1362       else
1363         goto from_stack;
1364     } else
1365 #endif
1366     if (pn < 4) {
1367 #ifdef TCC_ARM_EABI
1368         pn = (pn + (align-1)/4) & -(align/4);
1369 #endif
1370       addr = (nf + pn) * 4;
1371       pn += size;
1372       if (!sn && pn > 4)
1373         sn = (pn - 4);
1374     } else {
1375 #ifdef TCC_ARM_EABI
1376 from_stack:
1377         sn = (sn + (align-1)/4) & -(align/4);
1378 #endif
1379       addr = (n + nf + sn) * 4;
1380       sn += size;
1381     }
1382     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1383              addr + 12);
1384   }
1385   last_itod_magic=0;
1386   leaffunc = 1;
1387   loc = 0;
1388 }
1389
1390 /* generate function epilog */
1391 void gfunc_epilog(void)
1392 {
1393   uint32_t x;
1394   int diff;
1395   /* Copy float return value to core register if base standard is used and
1396      float computation is made with VFP */
1397 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1398   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1399     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1400       o(0xEE100A10); /* fmrs r0, s0 */
1401     else {
1402       o(0xEE100B10); /* fmrdl r0, d0 */
1403       o(0xEE301B10); /* fmrdh r1, d0 */
1404     }
1405   }
1406 #endif
1407   o(0xE89BA800); /* restore fp, sp, pc */
1408   diff = (-loc + 3) & -4;
1409 #ifdef TCC_ARM_EABI
1410   if(!leaffunc)
1411     diff = ((diff + 11) & -8) - 4;
1412 #endif
1413   if(diff > 0) {
1414     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1415     if(x)
1416       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1417     else {
1418       int addr;
1419       addr=ind;
1420       o(0xE59FC004); /* ldr ip,[pc+4] */
1421       o(0xE04BD00C); /* sub sp,fp,ip  */
1422       o(0xE1A0F00E); /* mov pc,lr */
1423       o(diff);
1424       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1425     }
1426   }
1427 }
1428
1429 /* generate a jump to a label */
1430 int gjmp(int t)
1431 {
1432   int r;
1433   r=ind;
1434   o(0xE0000000|encbranch(r,t,1));
1435   return r;
1436 }
1437
1438 /* generate a jump to a fixed address */
1439 void gjmp_addr(int a)
1440 {
1441   gjmp(a);
1442 }
1443
1444 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1445 int gtst(int inv, int t)
1446 {
1447   int v, r;
1448   uint32_t op;
1449   v = vtop->r & VT_VALMASK;
1450   r=ind;
1451   if (v == VT_CMP) {
1452     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1453     op|=encbranch(r,t,1);
1454     o(op);
1455     t=r;
1456   } else if (v == VT_JMP || v == VT_JMPI) {
1457     if ((v & 1) == inv) {
1458       if(!vtop->c.i)
1459         vtop->c.i=t;
1460       else {
1461         uint32_t *x;
1462         int p,lp;
1463         if(t) {
1464           p = vtop->c.i;
1465           do {
1466             p = decbranch(lp=p);
1467           } while(p);
1468           x = (uint32_t *)(cur_text_section->data + lp);
1469           *x &= 0xff000000;
1470           *x |= encbranch(lp,t,1);
1471         }
1472         t = vtop->c.i;
1473       }
1474     } else {
1475       t = gjmp(t);
1476       gsym(vtop->c.i);
1477     }
1478   }
1479   vtop--;
1480   return t;
1481 }
1482
1483 /* generate an integer binary operation */
1484 void gen_opi(int op)
1485 {
1486   int c, func = 0;
1487   uint32_t opc = 0, r, fr;
1488   unsigned short retreg = REG_IRET;
1489
1490   c=0;
1491   switch(op) {
1492     case '+':
1493       opc = 0x8;
1494       c=1;
1495       break;
1496     case TOK_ADDC1: /* add with carry generation */
1497       opc = 0x9;
1498       c=1;
1499       break;
1500     case '-':
1501       opc = 0x4;
1502       c=1;
1503       break;
1504     case TOK_SUBC1: /* sub with carry generation */
1505       opc = 0x5;
1506       c=1;
1507       break;
1508     case TOK_ADDC2: /* add with carry use */
1509       opc = 0xA;
1510       c=1;
1511       break;
1512     case TOK_SUBC2: /* sub with carry use */
1513       opc = 0xC;
1514       c=1;
1515       break;
1516     case '&':
1517       opc = 0x0;
1518       c=1;
1519       break;
1520     case '^':
1521       opc = 0x2;
1522       c=1;
1523       break;
1524     case '|':
1525       opc = 0x18;
1526       c=1;
1527       break;
1528     case '*':
1529       gv2(RC_INT, RC_INT);
1530       r = vtop[-1].r;
1531       fr = vtop[0].r;
1532       vtop--;
1533       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1534       return;
1535     case TOK_SHL:
1536       opc = 0;
1537       c=2;
1538       break;
1539     case TOK_SHR:
1540       opc = 1;
1541       c=2;
1542       break;
1543     case TOK_SAR:
1544       opc = 2;
1545       c=2;
1546       break;
1547     case '/':
1548     case TOK_PDIV:
1549       func=TOK___divsi3;
1550       c=3;
1551       break;
1552     case TOK_UDIV:
1553       func=TOK___udivsi3;
1554       c=3;
1555       break;
1556     case '%':
1557 #ifdef TCC_ARM_EABI
1558       func=TOK___aeabi_idivmod;
1559       retreg=REG_LRET;
1560 #else
1561       func=TOK___modsi3;
1562 #endif
1563       c=3;
1564       break;
1565     case TOK_UMOD:
1566 #ifdef TCC_ARM_EABI
1567       func=TOK___aeabi_uidivmod;
1568       retreg=REG_LRET;
1569 #else
1570       func=TOK___umodsi3;
1571 #endif
1572       c=3;
1573       break;
1574     case TOK_UMULL:
1575       gv2(RC_INT, RC_INT);
1576       r=intr(vtop[-1].r2=get_reg(RC_INT));
1577       c=vtop[-1].r;
1578       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1579       vtop--;
1580       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1581       return;
1582     default:
1583       opc = 0x15;
1584       c=1;
1585       break;
1586   }
1587   switch(c) {
1588     case 1:
1589       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1590         if(opc == 4 || opc == 5 || opc == 0xc) {
1591           vswap();
1592           opc|=2; // sub -> rsb
1593         }
1594       }
1595       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1596           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1597         gv(RC_INT);
1598       vswap();
1599       c=intr(gv(RC_INT));
1600       vswap();
1601       opc=0xE0000000|(opc<<20)|(c<<16);
1602       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1603         uint32_t x;
1604         x=stuff_const(opc|0x2000000,vtop->c.i);
1605         if(x) {
1606           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1607           o(x|(r<<12));
1608           goto done;
1609         }
1610       }
1611       fr=intr(gv(RC_INT));
1612       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1613       o(opc|(r<<12)|fr);
1614 done:
1615       vtop--;
1616       if (op >= TOK_ULT && op <= TOK_GT) {
1617         vtop->r = VT_CMP;
1618         vtop->c.i = op;
1619       }
1620       break;
1621     case 2:
1622       opc=0xE1A00000|(opc<<5);
1623       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1624           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1625         gv(RC_INT);
1626       vswap();
1627       r=intr(gv(RC_INT));
1628       vswap();
1629       opc|=r;
1630       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1631         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1632         c = vtop->c.i & 0x1f;
1633         o(opc|(c<<7)|(fr<<12));
1634       } else {
1635         fr=intr(gv(RC_INT));
1636         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1637         o(opc|(c<<12)|(fr<<8)|0x10);
1638       }
1639       vtop--;
1640       break;
1641     case 3:
1642       vpush_global_sym(&func_old_type, func);
1643       vrott(3);
1644       gfunc_call(2);
1645       vpushi(0);
1646       vtop->r = retreg;
1647       break;
1648     default:
1649       tcc_error("gen_opi %i unimplemented!",op);
1650   }
1651 }
1652
1653 #ifdef TCC_ARM_VFP
1654 static int is_zero(int i)
1655 {
1656   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1657     return 0;
1658   if (vtop[i].type.t == VT_FLOAT)
1659     return (vtop[i].c.f == 0.f);
1660   else if (vtop[i].type.t == VT_DOUBLE)
1661     return (vtop[i].c.d == 0.0);
1662   return (vtop[i].c.ld == 0.l);
1663 }
1664
1665 /* generate a floating point operation 'v = t1 op t2' instruction. The
1666  *    two operands are guaranted to have the same floating point type */
1667 void gen_opf(int op)
1668 {
1669   uint32_t x;
1670   int fneg=0,r;
1671   x=0xEE000A00|T2CPR(vtop->type.t);
1672   switch(op) {
1673     case '+':
1674       if(is_zero(-1))
1675         vswap();
1676       if(is_zero(0)) {
1677         vtop--;
1678         return;
1679       }
1680       x|=0x300000;
1681       break;
1682     case '-':
1683       x|=0x300040;
1684       if(is_zero(0)) {
1685         vtop--;
1686         return;
1687       }
1688       if(is_zero(-1)) {
1689         x|=0x810000; /* fsubX -> fnegX */
1690         vswap();
1691         vtop--;
1692         fneg=1;
1693       }
1694       break;
1695     case '*':
1696       x|=0x200000;
1697       break;
1698     case '/':
1699       x|=0x800000;
1700       break;
1701     default:
1702       if(op < TOK_ULT || op > TOK_GT) {
1703         tcc_error("unknown fp op %x!",op);
1704         return;
1705       }
1706       if(is_zero(-1)) {
1707         vswap();
1708         switch(op) {
1709           case TOK_LT: op=TOK_GT; break;
1710           case TOK_GE: op=TOK_ULE; break;
1711           case TOK_LE: op=TOK_GE; break;
1712           case TOK_GT: op=TOK_ULT; break;
1713         }
1714       }
1715       x|=0xB40040; /* fcmpX */
1716       if(op!=TOK_EQ && op!=TOK_NE)
1717         x|=0x80; /* fcmpX -> fcmpeX */
1718       if(is_zero(0)) {
1719         vtop--;
1720         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1721       } else {
1722         x|=vfpr(gv(RC_FLOAT));
1723         vswap();
1724         o(x|(vfpr(gv(RC_FLOAT))<<12));
1725         vtop--;
1726       }
1727       o(0xEEF1FA10); /* fmstat */
1728
1729       switch(op) {
1730         case TOK_LE: op=TOK_ULE; break;
1731         case TOK_LT: op=TOK_ULT; break;
1732         case TOK_UGE: op=TOK_GE; break;
1733         case TOK_UGT: op=TOK_GT; break;
1734       }
1735
1736       vtop->r = VT_CMP;
1737       vtop->c.i = op;
1738       return;
1739   }
1740   r=gv(RC_FLOAT);
1741   x|=vfpr(r);
1742   r=regmask(r);
1743   if(!fneg) {
1744     int r2;
1745     vswap();
1746     r2=gv(RC_FLOAT);
1747     x|=vfpr(r2)<<16;
1748     r|=regmask(r2);
1749   }
1750   vtop->r=get_reg_ex(RC_FLOAT,r);
1751   if(!fneg)
1752     vtop--;
1753   o(x|(vfpr(vtop->r)<<12));
1754 }
1755
1756 #else
1757 static uint32_t is_fconst()
1758 {
1759   long double f;
1760   uint32_t r;
1761   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1762     return 0;
1763   if (vtop->type.t == VT_FLOAT)
1764     f = vtop->c.f;
1765   else if (vtop->type.t == VT_DOUBLE)
1766     f = vtop->c.d;
1767   else
1768     f = vtop->c.ld;
1769   if(!ieee_finite(f))
1770     return 0;
1771   r=0x8;
1772   if(f<0.0) {
1773     r=0x18;
1774     f=-f;
1775   }
1776   if(f==0.0)
1777     return r;
1778   if(f==1.0)
1779     return r|1;
1780   if(f==2.0)
1781     return r|2;
1782   if(f==3.0)
1783     return r|3;
1784   if(f==4.0)
1785     return r|4;
1786   if(f==5.0)
1787     return r|5;
1788   if(f==0.5)
1789     return r|6;
1790   if(f==10.0)
1791     return r|7;
1792   return 0;
1793 }
1794
1795 /* generate a floating point operation 'v = t1 op t2' instruction. The
1796    two operands are guaranted to have the same floating point type */
1797 void gen_opf(int op)
1798 {
1799   uint32_t x, r, r2, c1, c2;
1800   //fputs("gen_opf\n",stderr);
1801   vswap();
1802   c1 = is_fconst();
1803   vswap();
1804   c2 = is_fconst();
1805   x=0xEE000100;
1806 #if LDOUBLE_SIZE == 8
1807   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1808     x|=0x80;
1809 #else
1810   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1811     x|=0x80;
1812   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1813     x|=0x80000;
1814 #endif
1815   switch(op)
1816   {
1817     case '+':
1818       if(!c2) {
1819         vswap();
1820         c2=c1;
1821       }
1822       vswap();
1823       r=fpr(gv(RC_FLOAT));
1824       vswap();
1825       if(c2) {
1826         if(c2>0xf)
1827           x|=0x200000; // suf
1828         r2=c2&0xf;
1829       } else {
1830         r2=fpr(gv(RC_FLOAT));
1831       }
1832       break;
1833     case '-':
1834       if(c2) {
1835         if(c2<=0xf)
1836           x|=0x200000; // suf
1837         r2=c2&0xf;
1838         vswap();
1839         r=fpr(gv(RC_FLOAT));
1840         vswap();
1841       } else if(c1 && c1<=0xf) {
1842         x|=0x300000; // rsf
1843         r2=c1;
1844         r=fpr(gv(RC_FLOAT));
1845         vswap();
1846       } else {
1847         x|=0x200000; // suf
1848         vswap();
1849         r=fpr(gv(RC_FLOAT));
1850         vswap();
1851         r2=fpr(gv(RC_FLOAT));
1852       }
1853       break;
1854     case '*':
1855       if(!c2 || c2>0xf) {
1856         vswap();
1857         c2=c1;
1858       }
1859       vswap();
1860       r=fpr(gv(RC_FLOAT));
1861       vswap();
1862       if(c2 && c2<=0xf)
1863         r2=c2;
1864       else
1865         r2=fpr(gv(RC_FLOAT));
1866       x|=0x100000; // muf
1867       break;
1868     case '/':
1869       if(c2 && c2<=0xf) {
1870         x|=0x400000; // dvf
1871         r2=c2;
1872         vswap();
1873         r=fpr(gv(RC_FLOAT));
1874         vswap();
1875       } else if(c1 && c1<=0xf) {
1876         x|=0x500000; // rdf
1877         r2=c1;
1878         r=fpr(gv(RC_FLOAT));
1879         vswap();
1880       } else {
1881         x|=0x400000; // dvf
1882         vswap();
1883         r=fpr(gv(RC_FLOAT));
1884         vswap();
1885         r2=fpr(gv(RC_FLOAT));
1886       }
1887       break;
1888     default:
1889       if(op >= TOK_ULT && op <= TOK_GT) {
1890         x|=0xd0f110; // cmfe
1891 /* bug (intention?) in Linux FPU emulator
1892    doesn't set carry if equal */
1893         switch(op) {
1894           case TOK_ULT:
1895           case TOK_UGE:
1896           case TOK_ULE:
1897           case TOK_UGT:
1898             tcc_error("unsigned comparison on floats?");
1899             break;
1900           case TOK_LT:
1901             op=TOK_Nset;
1902             break;
1903           case TOK_LE:
1904             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1905             break;
1906           case TOK_EQ:
1907           case TOK_NE:
1908             x&=~0x400000; // cmfe -> cmf
1909             break;
1910         }
1911         if(c1 && !c2) {
1912           c2=c1;
1913           vswap();
1914           switch(op) {
1915             case TOK_Nset:
1916               op=TOK_GT;
1917               break;
1918             case TOK_GE:
1919               op=TOK_ULE;
1920               break;
1921             case TOK_ULE:
1922               op=TOK_GE;
1923               break;
1924             case TOK_GT:
1925               op=TOK_Nset;
1926               break;
1927           }
1928         }
1929         vswap();
1930         r=fpr(gv(RC_FLOAT));
1931         vswap();
1932         if(c2) {
1933           if(c2>0xf)
1934             x|=0x200000;
1935           r2=c2&0xf;
1936         } else {
1937           r2=fpr(gv(RC_FLOAT));
1938         }
1939         vtop[-1].r = VT_CMP;
1940         vtop[-1].c.i = op;
1941       } else {
1942         tcc_error("unknown fp op %x!",op);
1943         return;
1944       }
1945   }
1946   if(vtop[-1].r == VT_CMP)
1947     c1=15;
1948   else {
1949     c1=vtop->r;
1950     if(r2&0x8)
1951       c1=vtop[-1].r;
1952     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1953     c1=fpr(vtop[-1].r);
1954   }
1955   vtop--;
1956   o(x|(r<<16)|(c1<<12)|r2);
1957 }
1958 #endif
1959
1960 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1961    and 'long long' cases. */
1962 ST_FUNC void gen_cvt_itof1(int t)
1963 {
1964   uint32_t r, r2;
1965   int bt;
1966   bt=vtop->type.t & VT_BTYPE;
1967   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1968 #ifndef TCC_ARM_VFP
1969     uint32_t dsize = 0;
1970 #endif
1971     r=intr(gv(RC_INT));
1972 #ifdef TCC_ARM_VFP
1973     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1974     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1975     r2|=r2<<12;
1976     if(!(vtop->type.t & VT_UNSIGNED))
1977       r2|=0x80;                /* fuitoX -> fsituX */
1978     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1979 #else
1980     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1981     if((t & VT_BTYPE) != VT_FLOAT)
1982       dsize=0x80;    /* flts -> fltd */
1983     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1984     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1985       uint32_t off = 0;
1986       o(0xE3500000|(r<<12));        /* cmp */
1987       r=fpr(get_reg(RC_FLOAT));
1988       if(last_itod_magic) {
1989         off=ind+8-last_itod_magic;
1990         off/=4;
1991         if(off>255)
1992           off=0;
1993       }
1994       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1995       if(!off) {
1996         o(0xEA000000);              /* b */
1997         last_itod_magic=ind;
1998         o(0x4F800000);              /* 4294967296.0f */
1999       }
2000       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
2001     }
2002 #endif
2003     return;
2004   } else if(bt == VT_LLONG) {
2005     int func;
2006     CType *func_type = 0;
2007     if((t & VT_BTYPE) == VT_FLOAT) {
2008       func_type = &func_float_type;
2009       if(vtop->type.t & VT_UNSIGNED)
2010         func=TOK___floatundisf;
2011       else
2012         func=TOK___floatdisf;
2013 #if LDOUBLE_SIZE != 8
2014     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
2015       func_type = &func_ldouble_type;
2016       if(vtop->type.t & VT_UNSIGNED)
2017         func=TOK___floatundixf;
2018       else
2019         func=TOK___floatdixf;
2020     } else if((t & VT_BTYPE) == VT_DOUBLE) {
2021 #else
2022     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
2023 #endif
2024       func_type = &func_double_type;
2025       if(vtop->type.t & VT_UNSIGNED)
2026         func=TOK___floatundidf;
2027       else
2028         func=TOK___floatdidf;
2029     }
2030     if(func_type) {
2031       vpush_global_sym(func_type, func);
2032       vswap();
2033       gfunc_call(1);
2034       vpushi(0);
2035       vtop->r=TREG_F0;
2036       return;
2037     }
2038   }
2039   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2040 }
2041
2042 /* convert fp to int 't' type */
2043 void gen_cvt_ftoi(int t)
2044 {
2045   uint32_t r, r2;
2046   int u, func = 0;
2047   u=t&VT_UNSIGNED;
2048   t&=VT_BTYPE;
2049   r2=vtop->type.t & VT_BTYPE;
2050   if(t==VT_INT) {
2051 #ifdef TCC_ARM_VFP
2052     r=vfpr(gv(RC_FLOAT));
2053     u=u?0:0x10000;
2054     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2055     r2=intr(vtop->r=get_reg(RC_INT));
2056     o(0xEE100A10|(r<<16)|(r2<<12));
2057     return;
2058 #else
2059     if(u) {
2060       if(r2 == VT_FLOAT)
2061         func=TOK___fixunssfsi;
2062 #if LDOUBLE_SIZE != 8
2063       else if(r2 == VT_LDOUBLE)
2064         func=TOK___fixunsxfsi;
2065       else if(r2 == VT_DOUBLE)
2066 #else
2067       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2068 #endif
2069         func=TOK___fixunsdfsi;
2070     } else {
2071       r=fpr(gv(RC_FLOAT));
2072       r2=intr(vtop->r=get_reg(RC_INT));
2073       o(0xEE100170|(r2<<12)|r);
2074       return;
2075     }
2076 #endif
2077   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2078     if(r2 == VT_FLOAT)
2079       func=TOK___fixsfdi;
2080 #if LDOUBLE_SIZE != 8
2081     else if(r2 == VT_LDOUBLE)
2082       func=TOK___fixxfdi;
2083     else if(r2 == VT_DOUBLE)
2084 #else
2085     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2086 #endif
2087       func=TOK___fixdfdi;
2088   }
2089   if(func) {
2090     vpush_global_sym(&func_old_type, func);
2091     vswap();
2092     gfunc_call(1);
2093     vpushi(0);
2094     if(t == VT_LLONG)
2095       vtop->r2 = REG_LRET;
2096     vtop->r = REG_IRET;
2097     return;
2098   }
2099   tcc_error("unimplemented gen_cvt_ftoi!");
2100 }
2101
2102 /* convert from one floating point type to another */
2103 void gen_cvt_ftof(int t)
2104 {
2105 #ifdef TCC_ARM_VFP
2106   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2107     uint32_t r = vfpr(gv(RC_FLOAT));
2108     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2109   }
2110 #else
2111   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2112   gv(RC_FLOAT);
2113 #endif
2114 }
2115
2116 /* computed goto support */
2117 void ggoto(void)
2118 {
2119   gcall_or_jmp(1);
2120   vtop--;
2121 }
2122
2123 /* Save the stack pointer onto the stack and return the location of its address */
2124 ST_FUNC void gen_vla_sp_save(int addr) {
2125     SValue v;
2126     v.type.t = VT_PTR;
2127     v.r = VT_LOCAL | VT_LVAL;
2128     v.c.i = addr;
2129     store(TREG_SP, &v);
2130 }
2131
2132 /* Restore the SP from a location on the stack */
2133 ST_FUNC void gen_vla_sp_restore(int addr) {
2134     SValue v;
2135     v.type.t = VT_PTR;
2136     v.r = VT_LOCAL | VT_LVAL;
2137     v.c.i = addr;
2138     load(TREG_SP, &v);
2139 }
2140
2141 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2142 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2143     int r = intr(gv(RC_INT));
2144     o(0xE04D0000|(r<<12)|r); /* sub r, sp, r */
2145 #ifdef TCC_ARM_EABI
2146     if (align < 8)
2147         align = 8;
2148 #else
2149     if (align < 4)
2150         align = 4;
2151 #endif
2152     if (align & (align - 1))
2153         tcc_error("alignment is not a power of 2: %i", align);
2154     o(stuff_const(0xE3C0D000|(r<<16), align - 1)); /* bic sp, r, #align-1 */
2155     vpop();
2156 }
2157
2158 /* end of ARM code generator */
2159 /*************************************************************/
2160 #endif
2161 /*************************************************************/