arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_CPU_VERSION
  38 # define TCC_CPU_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_IRE2    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82     TREG_SP = 13,
  83     TREG_LR,
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_IRE2 TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 #else /* ! TARGET_DEFS_ONLY */
 134 /******************************************************/
 135 #define USING_GLOBALS
 136 #include "tcc.h"
 137
 138 enum float_abi float_abi;
 139
 140 ST_DATA const int reg_classes[NB_REGS] = {
 141     /* r0 */ RC_INT | RC_R0,
 142     /* r1 */ RC_INT | RC_R1,
 143     /* r2 */ RC_INT | RC_R2,
 144     /* r3 */ RC_INT | RC_R3,
 145     /* r12 */ RC_INT | RC_R12,
 146     /* f0 */ RC_FLOAT | RC_F0,
 147     /* f1 */ RC_FLOAT | RC_F1,
 148     /* f2 */ RC_FLOAT | RC_F2,
 149     /* f3 */ RC_FLOAT | RC_F3,
 150 #ifdef TCC_ARM_VFP
 151  /* d4/s8 */ RC_FLOAT | RC_F4,
 152 /* d5/s10 */ RC_FLOAT | RC_F5,
 153 /* d6/s12 */ RC_FLOAT | RC_F6,
 154 /* d7/s14 */ RC_FLOAT | RC_F7,
 155 #endif
 156 };
 157
 158 static int func_sub_sp_offset, last_itod_magic;
 159 static int leaffunc;
 160
 161 #if defined(CONFIG_TCC_BCHECK)
 162 static addr_t func_bound_offset;
 163 static unsigned long func_bound_ind;
 164 static int func_bound_add_epilog;
 165 #endif
 166
 167 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 168 static CType float_type, double_type, func_float_type, func_double_type;
 169 ST_FUNC void arm_init(struct TCCState *s)
 170 {
 171     float_type.t = VT_FLOAT;
 172     double_type.t = VT_DOUBLE;
 173     func_float_type.t = VT_FUNC;
 174     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 175     func_double_type.t = VT_FUNC;
 176     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 177
 178     float_abi = s->float_abi;
 179 #ifndef TCC_ARM_HARDFLOAT
 180 # warning "soft float ABI currently not supported: default to softfp"
 181 #endif
 182 }
 183 #else
 184 #define func_float_type func_old_type
 185 #define func_double_type func_old_type
 186 #define func_ldouble_type func_old_type
 187 ST_FUNC void arm_init(struct TCCState *s)
 188 {
 189 #if 0
 190 #if !defined (TCC_ARM_VFP)
 191     tcc_warning("Support for FPA is deprecated and will be removed in next"
 192                 " release");
 193 #endif
 194 #if !defined (TCC_ARM_EABI)
 195     tcc_warning("Support for OABI is deprecated and will be removed in next"
 196                 " release");
 197 #endif
 198 #endif
 199 }
 200 #endif
 201
 202 #define CHECK_R(r) ((r) >= TREG_R0 && (r) <= TREG_LR)
 203
 204 static int two2mask(int a,int b) {
 205   if (!CHECK_R(a) || !CHECK_R(b))
 206     tcc_error("compiler error! registers %i,%i is not valid",a,b);
 207   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 208 }
 209
 210 static int regmask(int r) {
 211   if (!CHECK_R(r))
 212     tcc_error("compiler error! register %i is not valid",r);
 213   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 214 }
 215
 216 /******************************************************/
 217
 218 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
 219 const char *default_elfinterp(struct TCCState *s)
 220 {
 221     if (s->float_abi == ARM_HARD_FLOAT)
 222         return "/lib/ld-linux-armhf.so.3";
 223     else
 224         return "/lib/ld-linux.so.3";
 225 }
 226 #endif
 227
 228 void o(uint32_t i)
 229 {
 230   /* this is a good place to start adding big-endian support*/
 231   int ind1;
 232   if (nocode_wanted)
 233     return;
 234   ind1 = ind + 4;
 235   if (!cur_text_section)
 236     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 237          "can't evaluate constant expressions outside of a function.");
 238   if (ind1 > cur_text_section->data_allocated)
 239     section_realloc(cur_text_section, ind1);
 240   cur_text_section->data[ind++] = i&255;
 241   i>>=8;
 242   cur_text_section->data[ind++] = i&255;
 243   i>>=8;
 244   cur_text_section->data[ind++] = i&255;
 245   i>>=8;
 246   cur_text_section->data[ind++] = i;
 247 }
 248
 249 static uint32_t stuff_const(uint32_t op, uint32_t c)
 250 {
 251   int try_neg=0;
 252   uint32_t nc = 0, negop = 0;
 253
 254   switch(op&0x1F00000)
 255   {
 256     case 0x800000: //add
 257     case 0x400000: //sub
 258       try_neg=1;
 259       negop=op^0xC00000;
 260       nc=-c;
 261       break;
 262     case 0x1A00000: //mov
 263     case 0x1E00000: //mvn
 264       try_neg=1;
 265       negop=op^0x400000;
 266       nc=~c;
 267       break;
 268     case 0x200000: //xor
 269       if(c==~0)
 270         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 271       break;
 272     case 0x0: //and
 273       if(c==~0)
 274         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 275     case 0x1C00000: //bic
 276       try_neg=1;
 277       negop=op^0x1C00000;
 278       nc=~c;
 279       break;
 280     case 0x1800000: //orr
 281       if(c==~0)
 282         return (op&0xFFF0FFFF)|0x1E00000;
 283       break;
 284   }
 285   do {
 286     uint32_t m;
 287     int i;
 288     if(c<256) /* catch undefined <<32 */
 289       return op|c;
 290     for(i=2;i<32;i+=2) {
 291       m=(0xff>>i)|(0xff<<(32-i));
 292       if(!(c&~m))
 293         return op|(i<<7)|(c<<i)|(c>>(32-i));
 294     }
 295     op=negop;
 296     c=nc;
 297   } while(try_neg--);
 298   return 0;
 299 }
 300
 301
 302 //only add,sub
 303 void stuff_const_harder(uint32_t op, uint32_t v) {
 304   uint32_t x;
 305   x=stuff_const(op,v);
 306   if(x)
 307     o(x);
 308   else {
 309     uint32_t a[16], nv, no, o2, n2;
 310     int i,j,k;
 311     a[0]=0xff;
 312     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 313     for(i=1;i<16;i++)
 314       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 315     for(i=0;i<12;i++)
 316       for(j=i<4?i+12:15;j>=i+4;j--)
 317         if((v&(a[i]|a[j]))==v) {
 318           o(stuff_const(op,v&a[i]));
 319           o(stuff_const(o2,v&a[j]));
 320           return;
 321         }
 322     no=op^0xC00000;
 323     n2=o2^0xC00000;
 324     nv=-v;
 325     for(i=0;i<12;i++)
 326       for(j=i<4?i+12:15;j>=i+4;j--)
 327         if((nv&(a[i]|a[j]))==nv) {
 328           o(stuff_const(no,nv&a[i]));
 329           o(stuff_const(n2,nv&a[j]));
 330           return;
 331         }
 332     for(i=0;i<8;i++)
 333       for(j=i+4;j<12;j++)
 334         for(k=i<4?i+12:15;k>=j+4;k--)
 335           if((v&(a[i]|a[j]|a[k]))==v) {
 336             o(stuff_const(op,v&a[i]));
 337             o(stuff_const(o2,v&a[j]));
 338             o(stuff_const(o2,v&a[k]));
 339             return;
 340           }
 341     no=op^0xC00000;
 342     nv=-v;
 343     for(i=0;i<8;i++)
 344       for(j=i+4;j<12;j++)
 345         for(k=i<4?i+12:15;k>=j+4;k--)
 346           if((nv&(a[i]|a[j]|a[k]))==nv) {
 347             o(stuff_const(no,nv&a[i]));
 348             o(stuff_const(n2,nv&a[j]));
 349             o(stuff_const(n2,nv&a[k]));
 350             return;
 351           }
 352     o(stuff_const(op,v&a[0]));
 353     o(stuff_const(o2,v&a[4]));
 354     o(stuff_const(o2,v&a[8]));
 355     o(stuff_const(o2,v&a[12]));
 356   }
 357 }
 358
 359 uint32_t encbranch(int pos, int addr, int fail)
 360 {
 361   addr-=pos+8;
 362   addr/=4;
 363   if(addr>=0x1000000 || addr<-0x1000000) {
 364     if(fail)
 365       tcc_error("FIXME: function bigger than 32MB");
 366     return 0;
 367   }
 368   return 0x0A000000|(addr&0xffffff);
 369 }
 370
 371 int decbranch(int pos)
 372 {
 373   int x;
 374   x=*(uint32_t *)(cur_text_section->data + pos);
 375   x&=0x00ffffff;
 376   if(x&0x800000)
 377     x-=0x1000000;
 378   return x*4+pos+8;
 379 }
 380
 381 /* output a symbol and patch all calls to it */
 382 void gsym_addr(int t, int a)
 383 {
 384   uint32_t *x;
 385   int lt;
 386   while(t) {
 387     x=(uint32_t *)(cur_text_section->data + t);
 388     t=decbranch(lt=t);
 389     if(a==lt+4)
 390       *x=0xE1A00000; // nop
 391     else {
 392       *x &= 0xff000000;
 393       *x |= encbranch(lt,a,1);
 394     }
 395   }
 396 }
 397
 398 #ifdef TCC_ARM_VFP
 399 static uint32_t vfpr(int r)
 400 {
 401   if(r<TREG_F0 || r>TREG_F7)
 402     tcc_error("compiler error! register %i is no vfp register",r);
 403   return r - TREG_F0;
 404 }
 405 #else
 406 static uint32_t fpr(int r)
 407 {
 408   if(r<TREG_F0 || r>TREG_F3)
 409     tcc_error("compiler error! register %i is no fpa register",r);
 410   return r - TREG_F0;
 411 }
 412 #endif
 413
 414 static uint32_t intr(int r)
 415 {
 416   if(r == TREG_R12)
 417     return 12;
 418   if(r >= TREG_R0 && r <= TREG_R3)
 419     return r - TREG_R0;
 420   if (!(r >= TREG_SP && r <= TREG_LR))
 421     tcc_error("compiler error! register %i is no int register",r);
 422   return r + (13 - TREG_SP);
 423 }
 424
 425 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 426 {
 427   if(*off>maxoff || *off&((1<<shift)-1)) {
 428     uint32_t x, y;
 429     x=0xE280E000;
 430     if(*sgn)
 431       x=0xE240E000;
 432     x|=(*base)<<16;
 433     *base=14; // lr
 434     y=stuff_const(x,*off&~maxoff);
 435     if(y) {
 436       o(y);
 437       *off&=maxoff;
 438       return;
 439     }
 440     y=stuff_const(x,(*off+maxoff)&~maxoff);
 441     if(y) {
 442       o(y);
 443       *sgn=!*sgn;
 444       *off=((*off+maxoff)&~maxoff)-*off;
 445       return;
 446     }
 447     stuff_const_harder(x,*off&~maxoff);
 448     *off&=maxoff;
 449   }
 450 }
 451
 452 static uint32_t mapcc(int cc)
 453 {
 454   switch(cc)
 455   {
 456     case TOK_ULT:
 457       return 0x30000000; /* CC/LO */
 458     case TOK_UGE:
 459       return 0x20000000; /* CS/HS */
 460     case TOK_EQ:
 461       return 0x00000000; /* EQ */
 462     case TOK_NE:
 463       return 0x10000000; /* NE */
 464     case TOK_ULE:
 465       return 0x90000000; /* LS */
 466     case TOK_UGT:
 467       return 0x80000000; /* HI */
 468     case TOK_Nset:
 469       return 0x40000000; /* MI */
 470     case TOK_Nclear:
 471       return 0x50000000; /* PL */
 472     case TOK_LT:
 473       return 0xB0000000; /* LT */
 474     case TOK_GE:
 475       return 0xA0000000; /* GE */
 476     case TOK_LE:
 477       return 0xD0000000; /* LE */
 478     case TOK_GT:
 479       return 0xC0000000; /* GT */
 480   }
 481   tcc_error("unexpected condition code");
 482   return 0xE0000000; /* AL */
 483 }
 484
 485 static int negcc(int cc)
 486 {
 487   switch(cc)
 488   {
 489     case TOK_ULT:
 490       return TOK_UGE;
 491     case TOK_UGE:
 492       return TOK_ULT;
 493     case TOK_EQ:
 494       return TOK_NE;
 495     case TOK_NE:
 496       return TOK_EQ;
 497     case TOK_ULE:
 498       return TOK_UGT;
 499     case TOK_UGT:
 500       return TOK_ULE;
 501     case TOK_Nset:
 502       return TOK_Nclear;
 503     case TOK_Nclear:
 504       return TOK_Nset;
 505     case TOK_LT:
 506       return TOK_GE;
 507     case TOK_GE:
 508       return TOK_LT;
 509     case TOK_LE:
 510       return TOK_GT;
 511     case TOK_GT:
 512       return TOK_LE;
 513   }
 514   tcc_error("unexpected condition code");
 515   return TOK_NE;
 516 }
 517
 518 /* load 'r' from value 'sv' */
 519 void load(int r, SValue *sv)
 520 {
 521   int v, ft, fc, fr, sign;
 522   uint32_t op;
 523   SValue v1;
 524
 525   fr = sv->r;
 526   ft = sv->type.t;
 527   fc = sv->c.i;
 528
 529   if(fc>=0)
 530     sign=0;
 531   else {
 532     sign=1;
 533     fc=-fc;
 534   }
 535
 536   v = fr & VT_VALMASK;
 537   if (fr & VT_LVAL) {
 538     uint32_t base = 0xB; // fp
 539     if(v == VT_LLOCAL) {
 540       v1.type.t = VT_PTR;
 541       v1.r = VT_LOCAL | VT_LVAL;
 542       v1.c.i = sv->c.i;
 543       load(TREG_LR, &v1);
 544       base = 14; /* lr */
 545       fc=sign=0;
 546       v=VT_LOCAL;
 547     } else if(v == VT_CONST) {
 548       v1.type.t = VT_PTR;
 549       v1.r = fr&~VT_LVAL;
 550       v1.c.i = sv->c.i;
 551       v1.sym=sv->sym;
 552       load(TREG_LR, &v1);
 553       base = 14; /* lr */
 554       fc=sign=0;
 555       v=VT_LOCAL;
 556     } else if(v < VT_CONST) {
 557       base=intr(v);
 558       fc=sign=0;
 559       v=VT_LOCAL;
 560     }
 561     if(v == VT_LOCAL) {
 562       if(is_float(ft)) {
 563         calcaddr(&base,&fc,&sign,1020,2);
 564 #ifdef TCC_ARM_VFP
 565         op=0xED100A00; /* flds */
 566         if(!sign)
 567           op|=0x800000;
 568         if ((ft & VT_BTYPE) != VT_FLOAT)
 569           op|=0x100;   /* flds -> fldd */
 570         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 571 #else
 572         op=0xED100100;
 573         if(!sign)
 574           op|=0x800000;
 575 #if LDOUBLE_SIZE == 8
 576         if ((ft & VT_BTYPE) != VT_FLOAT)
 577           op|=0x8000;
 578 #else
 579         if ((ft & VT_BTYPE) == VT_DOUBLE)
 580           op|=0x8000;
 581         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 582           op|=0x400000;
 583 #endif
 584         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 585 #endif
 586       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 587                 || (ft & VT_BTYPE) == VT_SHORT) {
 588         calcaddr(&base,&fc,&sign,255,0);
 589         op=0xE1500090;
 590         if ((ft & VT_BTYPE) == VT_SHORT)
 591           op|=0x20;
 592         if ((ft & VT_UNSIGNED) == 0)
 593           op|=0x40;
 594         if(!sign)
 595           op|=0x800000;
 596         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 597       } else {
 598         calcaddr(&base,&fc,&sign,4095,0);
 599         op=0xE5100000;
 600         if(!sign)
 601           op|=0x800000;
 602         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 603           op|=0x400000;
 604         o(op|(intr(r)<<12)|fc|(base<<16));
 605       }
 606       return;
 607     }
 608   } else {
 609     if (v == VT_CONST) {
 610       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.i);
 611       if (fr & VT_SYM || !op) {
 612         o(0xE59F0000|(intr(r)<<12));
 613         o(0xEA000000);
 614         if(fr & VT_SYM)
 615           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 616         o(sv->c.i);
 617       } else
 618         o(op);
 619       return;
 620     } else if (v == VT_LOCAL) {
 621       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.i);
 622       if (fr & VT_SYM || !op) {
 623         o(0xE59F0000|(intr(r)<<12));
 624         o(0xEA000000);
 625         if(fr & VT_SYM) // needed ?
 626           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 627         o(sv->c.i);
 628         o(0xE08B0000|(intr(r)<<12)|intr(r));
 629       } else
 630         o(op);
 631       return;
 632     } else if(v == VT_CMP) {
 633       o(mapcc(sv->c.i)|0x3A00001|(intr(r)<<12));
 634       o(mapcc(negcc(sv->c.i))|0x3A00000|(intr(r)<<12));
 635       return;
 636     } else if (v == VT_JMP || v == VT_JMPI) {
 637       int t;
 638       t = v & 1;
 639       o(0xE3A00000|(intr(r)<<12)|t);
 640       o(0xEA000000);
 641       gsym(sv->c.i);
 642       o(0xE3A00000|(intr(r)<<12)|(t^1));
 643       return;
 644     } else if (v < VT_CONST) {
 645       if(is_float(ft))
 646 #ifdef TCC_ARM_VFP
 647         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 648 #else
 649         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 650 #endif
 651       else
 652         o(0xE1A00000|(intr(r)<<12)|intr(v));
 653       return;
 654     }
 655   }
 656   tcc_error("load unimplemented!");
 657 }
 658
 659 /* store register 'r' in lvalue 'v' */
 660 void store(int r, SValue *sv)
 661 {
 662   SValue v1;
 663   int v, ft, fc, fr, sign;
 664   uint32_t op;
 665
 666   fr = sv->r;
 667   ft = sv->type.t;
 668   fc = sv->c.i;
 669
 670   if(fc>=0)
 671     sign=0;
 672   else {
 673     sign=1;
 674     fc=-fc;
 675   }
 676
 677   v = fr & VT_VALMASK;
 678   if (fr & VT_LVAL || fr == VT_LOCAL) {
 679     uint32_t base = 0xb; /* fp */
 680     if(v < VT_CONST) {
 681       base=intr(v);
 682       v=VT_LOCAL;
 683       fc=sign=0;
 684     } else if(v == VT_CONST) {
 685       v1.type.t = ft;
 686       v1.r = fr&~VT_LVAL;
 687       v1.c.i = sv->c.i;
 688       v1.sym=sv->sym;
 689       load(TREG_LR, &v1);
 690       base = 14; /* lr */
 691       fc=sign=0;
 692       v=VT_LOCAL;
 693     }
 694     if(v == VT_LOCAL) {
 695        if(is_float(ft)) {
 696         calcaddr(&base,&fc,&sign,1020,2);
 697 #ifdef TCC_ARM_VFP
 698         op=0xED000A00; /* fsts */
 699         if(!sign)
 700           op|=0x800000;
 701         if ((ft & VT_BTYPE) != VT_FLOAT)
 702           op|=0x100;   /* fsts -> fstd */
 703         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 704 #else
 705         op=0xED000100;
 706         if(!sign)
 707           op|=0x800000;
 708 #if LDOUBLE_SIZE == 8
 709         if ((ft & VT_BTYPE) != VT_FLOAT)
 710           op|=0x8000;
 711 #else
 712         if ((ft & VT_BTYPE) == VT_DOUBLE)
 713           op|=0x8000;
 714         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 715           op|=0x400000;
 716 #endif
 717         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 718 #endif
 719         return;
 720       } else if((ft & VT_BTYPE) == VT_SHORT) {
 721         calcaddr(&base,&fc,&sign,255,0);
 722         op=0xE14000B0;
 723         if(!sign)
 724           op|=0x800000;
 725         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 726       } else {
 727         calcaddr(&base,&fc,&sign,4095,0);
 728         op=0xE5000000;
 729         if(!sign)
 730           op|=0x800000;
 731         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 732           op|=0x400000;
 733         o(op|(intr(r)<<12)|fc|(base<<16));
 734       }
 735       return;
 736     }
 737   }
 738   tcc_error("store unimplemented");
 739 }
 740
 741 static void gadd_sp(int val)
 742 {
 743   stuff_const_harder(0xE28DD000,val);
 744 }
 745
 746 /* 'is_jmp' is '1' if it is a jump */
 747 static void gcall_or_jmp(int is_jmp)
 748 {
 749   int r;
 750   uint32_t x;
 751   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 752     /* constant case */
 753         if(vtop->r & VT_SYM){
 754                 x=encbranch(ind,ind+vtop->c.i,0);
 755                 if(x) {
 756                 /* relocation case */
 757                   greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 758                   o(x|(is_jmp?0xE0000000:0xE1000000));
 759                 } else {
 760                         if(!is_jmp)
 761                                 o(0xE28FE004); // add lr,pc,#4
 762                         o(0xE51FF004);   // ldr pc,[pc,#-4]
 763                         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 764                         o(vtop->c.i);
 765                 }
 766 #ifdef CONFIG_TCC_BCHECK
 767                 if (tcc_state->do_bounds_check &&
 768                     (vtop->sym->v == TOK_setjmp ||
 769                      vtop->sym->v == TOK__setjmp ||
 770                      vtop->sym->v == TOK_sigsetjmp ||
 771                      vtop->sym->v == TOK___sigsetjmp))
 772                     func_bound_add_epilog = 1;
 773 #endif
 774         }else{
 775                 if(!is_jmp)
 776                         o(0xE28FE004); // add lr,pc,#4
 777                 o(0xE51FF004);   // ldr pc,[pc,#-4]
 778                 o(vtop->c.i);
 779         }
 780   } else {
 781     /* otherwise, indirect call */
 782 #ifdef CONFIG_TCC_BCHECK
 783     vtop->r &= ~VT_MUSTBOUND;
 784 #endif
 785     r = gv(RC_INT);
 786     if(!is_jmp)
 787       o(0xE1A0E00F);       // mov lr,pc
 788     o(0xE1A0F000|intr(r)); // mov pc,r
 789   }
 790 }
 791
 792 #if defined(CONFIG_TCC_BCHECK)
 793
 794 static void gen_bounds_call(int v)
 795 {
 796     Sym *sym = external_global_sym(v, &func_old_type);
 797
 798     greloc(cur_text_section, sym, ind, R_ARM_PC24);
 799     o(0xebfffffe);
 800 }
 801
 802 /* generate a bounded pointer addition */
 803 ST_FUNC void gen_bounded_ptr_add(void)
 804 {
 805     vpush_global_sym(&func_old_type, TOK___bound_ptr_add);
 806     vrott(3);
 807     gfunc_call(2);
 808     vpushi(0);
 809     /* returned pointer is in REG_IRET */
 810     vtop->r = REG_IRET | VT_BOUNDED;
 811     if (nocode_wanted)
 812         return;
 813     /* relocation offset of the bounding function call point */
 814     vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(Elf32_Rel));
 815 }
 816
 817 /* patch pointer addition in vtop so that pointer dereferencing is
 818    also tested */
 819 ST_FUNC void gen_bounded_ptr_deref(void)
 820 {
 821     addr_t func;
 822     int size, align;
 823     Elf32_Rel *rel;
 824     Sym *sym;
 825
 826     if (nocode_wanted)
 827         return;
 828
 829     size = type_size(&vtop->type, &align);
 830     switch(size) {
 831     case  1: func = TOK___bound_ptr_indir1; break;
 832     case  2: func = TOK___bound_ptr_indir2; break;
 833     case  4: func = TOK___bound_ptr_indir4; break;
 834     case  8: func = TOK___bound_ptr_indir8; break;
 835     case 12: func = TOK___bound_ptr_indir12; break;
 836     case 16: func = TOK___bound_ptr_indir16; break;
 837     default:
 838         /* may happen with struct member access */
 839         return;
 840         //tcc_error("unhandled size when dereferencing bounded pointer");
 841         //func = 0;
 842         //break;
 843     }
 844     sym = external_global_sym(func, &func_old_type);
 845     if (!sym->c)
 846         put_extern_sym(sym, NULL, 0, 0);
 847     /* patch relocation */
 848     /* XXX: find a better solution ? */
 849     rel = (Elf32_Rel *)(cur_text_section->reloc->data + vtop->c.i);
 850     rel->r_info = ELF32_R_INFO(sym->c, ELF32_R_TYPE(rel->r_info));
 851 }
 852
 853 static void gen_bounds_prolog(void)
 854 {
 855     /* leave some room for bound checking code */
 856     func_bound_offset = lbounds_section->data_offset;
 857     func_bound_ind = ind;
 858     func_bound_add_epilog = 0;
 859     o(0xe1a00000);  /* ld r0,lbounds_section->data_offset */
 860     o(0xe1a00000);
 861     o(0xe1a00000);
 862     o(0xe1a00000);  /* call __bound_local_new */
 863 }
 864
 865 static void gen_bounds_epilog(void)
 866 {
 867     addr_t saved_ind;
 868     addr_t *bounds_ptr;
 869     Sym *sym_data;
 870     int offset_modified = func_bound_offset != lbounds_section->data_offset;
 871
 872     if (!offset_modified && !func_bound_add_epilog)
 873         return;
 874
 875     /* add end of table info */
 876     bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
 877     *bounds_ptr = 0;
 878
 879     sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
 880                            func_bound_offset, lbounds_section->data_offset);
 881
 882     /* generate bound local allocation */
 883     if (offset_modified) {
 884         saved_ind = ind;
 885         ind = func_bound_ind;
 886         o(0xe59f0000);  /* ldr r0, [pc] */
 887         o(0xea000000);  /* b $+4 */
 888         greloc(cur_text_section, sym_data, ind, R_ARM_ABS32);
 889         o(0x00000000);  /* lbounds_section->data_offset */
 890         gen_bounds_call(TOK___bound_local_new);
 891         ind = saved_ind;
 892     }
 893
 894     /* generate bound check local freeing */
 895     o(0xe92d0003);  /* push {r0,r1} */
 896     o(0xed2d0b02);  /* vpush {d0} */
 897     o(0xe59f0000);  /* ldr r0, [pc] */
 898     o(0xea000000);  /* b $+4 */
 899     greloc(cur_text_section, sym_data, ind, R_ARM_ABS32);
 900     o(0x00000000);  /* lbounds_section->data_offset */
 901     gen_bounds_call(TOK___bound_local_delete);
 902     o(0xecbd0b02); /* vpop {d0} */
 903     o(0xe8bd0003); /* pop {r0,r1} */
 904 }
 905 #endif
 906
 907 static int unalias_ldbl(int btype)
 908 {
 909 #if LDOUBLE_SIZE == 8
 910     if (btype == VT_LDOUBLE)
 911       btype = VT_DOUBLE;
 912 #endif
 913     return btype;
 914 }
 915
 916 /* Return whether a structure is an homogeneous float aggregate or not.
 917    The answer is true if all the elements of the structure are of the same
 918    primitive float type and there is less than 4 elements.
 919
 920    type: the type corresponding to the structure to be tested */
 921 static int is_hgen_float_aggr(CType *type)
 922 {
 923   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 924     struct Sym *ref;
 925     int btype, nb_fields = 0;
 926
 927     ref = type->ref->next;
 928     btype = unalias_ldbl(ref->type.t & VT_BTYPE);
 929     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 930       for(; ref && btype == unalias_ldbl(ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 931       return !ref && nb_fields <= 4;
 932     }
 933   }
 934   return 0;
 935 }
 936
 937 struct avail_regs {
 938   signed char avail[3]; /* 3 holes max with only float and double alignments */
 939   int first_hole; /* first available hole */
 940   int last_hole; /* last available hole (none if equal to first_hole) */
 941   int first_free_reg; /* next free register in the sequence, hole excluded */
 942 };
 943
 944 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 945
 946 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 947    param) according to the rules described in the procedure call standard for
 948    the ARM architecture (AAPCS). If found, the registers are assigned to this
 949    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 950    and the parameter is a single float.
 951
 952    avregs: opaque structure to keep track of available VFP co-processor regs
 953    align: alignment constraints for the param, as returned by type_size()
 954    size: size of the parameter, as returned by type_size() */
 955 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 956 {
 957   int first_reg = 0;
 958
 959   if (avregs->first_free_reg == -1)
 960     return -1;
 961   if (align >> 3) { /* double alignment */
 962     first_reg = avregs->first_free_reg;
 963     /* alignment constraint not respected so use next reg and record hole */
 964     if (first_reg & 1)
 965       avregs->avail[avregs->last_hole++] = first_reg++;
 966   } else { /* no special alignment (float or array of float) */
 967     /* if single float and a hole is available, assign the param to it */
 968     if (size == 4 && avregs->first_hole != avregs->last_hole)
 969       return avregs->avail[avregs->first_hole++];
 970     else
 971       first_reg = avregs->first_free_reg;
 972   }
 973   if (first_reg + size / 4 <= 16) {
 974     avregs->first_free_reg = first_reg + size / 4;
 975     return first_reg;
 976   }
 977   avregs->first_free_reg = -1;
 978   return -1;
 979 }
 980
 981 /* Returns whether all params need to be passed in core registers or not.
 982    This is the case for function part of the runtime ABI. */
 983 int floats_in_core_regs(SValue *sval)
 984 {
 985   if (!sval->sym)
 986     return 0;
 987
 988   switch (sval->sym->v) {
 989     case TOK___floatundisf:
 990     case TOK___floatundidf:
 991     case TOK___fixunssfdi:
 992     case TOK___fixunsdfdi:
 993 #ifndef TCC_ARM_VFP
 994     case TOK___fixunsxfdi:
 995 #endif
 996     case TOK___floatdisf:
 997     case TOK___floatdidf:
 998     case TOK___fixsfdi:
 999     case TOK___fixdfdi:
1000       return 1;
1001
1002     default:
1003       return 0;
1004   }
1005 }
1006
1007 /* Return the number of registers needed to return the struct, or 0 if
1008    returning via struct pointer. */
1009 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) {
1010 #ifdef TCC_ARM_EABI
1011     int size, align;
1012     size = type_size(vt, &align);
1013     if (float_abi == ARM_HARD_FLOAT && !variadic &&
1014         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
1015         *ret_align = 8;
1016         *regsize = 8;
1017         ret->ref = NULL;
1018         ret->t = VT_DOUBLE;
1019         return (size + 7) >> 3;
1020     } else if (size <= 4) {
1021         *ret_align = 4;
1022         *regsize = 4;
1023         ret->ref = NULL;
1024         ret->t = VT_INT;
1025         return 1;
1026     } else
1027         return 0;
1028 #else
1029     return 0;
1030 #endif
1031 }
1032
1033 /* Parameters are classified according to how they are copied to their final
1034    destination for the function call. Because the copying is performed class
1035    after class according to the order in the union below, it is important that
1036    some constraints about the order of the members of this union are respected:
1037    - CORE_STRUCT_CLASS must come after STACK_CLASS;
1038    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
1039      VFP_STRUCT_CLASS;
1040    - VFP_STRUCT_CLASS must come after VFP_CLASS.
1041    See the comment for the main loop in copy_params() for the reason. */
1042 enum reg_class {
1043         STACK_CLASS = 0,
1044         CORE_STRUCT_CLASS,
1045         VFP_CLASS,
1046         VFP_STRUCT_CLASS,
1047         CORE_CLASS,
1048         NB_CLASSES
1049 };
1050
1051 struct param_plan {
1052     int start; /* first reg or addr used depending on the class */
1053     int end; /* last reg used or next free addr depending on the class */
1054     SValue *sval; /* pointer to SValue on the value stack */
1055     struct param_plan *prev; /*  previous element in this class */
1056 };
1057
1058 struct plan {
1059     struct param_plan *pplans; /* array of all the param plans */
1060     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
1061 };
1062
1063 #define add_param_plan(plan,pplan,class)                        \
1064     do {                                                        \
1065         pplan.prev = plan->clsplans[class];                     \
1066         plan->pplans[plan ## _nb] = pplan;                      \
1067         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
1068     } while(0)
1069
1070 /* Assign parameters to registers and stack with alignment according to the
1071    rules in the procedure call standard for the ARM architecture (AAPCS).
1072    The overall assignment is recorded in an array of per parameter structures
1073    called parameter plans. The parameter plans are also further organized in a
1074    number of linked lists, one per class of parameter (see the comment for the
1075    definition of union reg_class).
1076
1077    nb_args: number of parameters of the function for which a call is generated
1078    float_abi: float ABI in use for this function call
1079    plan: the structure where the overall assignment is recorded
1080    todo: a bitmap that record which core registers hold a parameter
1081
1082    Returns the amount of stack space needed for parameter passing
1083
1084    Note: this function allocated an array in plan->pplans with tcc_malloc. It
1085    is the responsibility of the caller to free this array once used (ie not
1086    before copy_params). */
1087 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
1088 {
1089   int i, size, align;
1090   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
1091   int plan_nb = 0;
1092   struct param_plan pplan;
1093   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1094
1095   ncrn = nsaa = 0;
1096   *todo = 0;
1097   plan->pplans = nb_args ? tcc_malloc(nb_args * sizeof(*plan->pplans)) : NULL;
1098   memset(plan->clsplans, 0, sizeof(plan->clsplans));
1099   for(i = nb_args; i-- ;) {
1100     int j, start_vfpreg = 0;
1101     CType type = vtop[-i].type;
1102     type.t &= ~VT_ARRAY;
1103     size = type_size(&type, &align);
1104     size = (size + 3) & ~3;
1105     align = (align + 3) & ~3;
1106     switch(vtop[-i].type.t & VT_BTYPE) {
1107       case VT_STRUCT:
1108       case VT_FLOAT:
1109       case VT_DOUBLE:
1110       case VT_LDOUBLE:
1111       if (float_abi == ARM_HARD_FLOAT) {
1112         int is_hfa = 0; /* Homogeneous float aggregate */
1113
1114         if (is_float(vtop[-i].type.t)
1115             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
1116           int end_vfpreg;
1117
1118           start_vfpreg = assign_vfpreg(&avregs, align, size);
1119           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
1120           if (start_vfpreg >= 0) {
1121             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
1122             if (is_hfa)
1123               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
1124             else
1125               add_param_plan(plan, pplan, VFP_CLASS);
1126             continue;
1127           } else
1128             break;
1129         }
1130       }
1131       ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
1132       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
1133         /* The parameter is allocated both in core register and on stack. As
1134          * such, it can be of either class: it would either be the last of
1135          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
1136         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
1137           *todo|=(1<<j);
1138         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
1139         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1140         ncrn += size/4;
1141         if (ncrn > 4)
1142           nsaa = (ncrn - 4) * 4;
1143       } else {
1144         ncrn = 4;
1145         break;
1146       }
1147       continue;
1148       default:
1149       if (ncrn < 4) {
1150         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1151
1152         if (is_long) {
1153           ncrn = (ncrn + 1) & -2;
1154           if (ncrn == 4)
1155             break;
1156         }
1157         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1158         ncrn++;
1159         if (is_long)
1160           pplan.end = ncrn++;
1161         add_param_plan(plan, pplan, CORE_CLASS);
1162         continue;
1163       }
1164     }
1165     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1166     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1167     add_param_plan(plan, pplan, STACK_CLASS);
1168     nsaa += size; /* size already rounded up before */
1169   }
1170   return nsaa;
1171 }
1172
1173 #undef add_param_plan
1174
1175 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1176    function call.
1177
1178    nb_args: number of parameters the function take
1179    plan: the overall assignment plan for parameters
1180    todo: a bitmap indicating what core reg will hold a parameter
1181
1182    Returns the number of SValue added by this function on the value stack */
1183 static int copy_params(int nb_args, struct plan *plan, int todo)
1184 {
1185   int size, align, r, i, nb_extra_sval = 0;
1186   struct param_plan *pplan;
1187   int pass = 0;
1188
1189    /* Several constraints require parameters to be copied in a specific order:
1190       - structures are copied to the stack before being loaded in a reg;
1191       - floats loaded to an odd numbered VFP reg are first copied to the
1192         preceding even numbered VFP reg and then moved to the next VFP reg.
1193
1194       It is thus important that:
1195       - structures assigned to core regs must be copied after parameters
1196         assigned to the stack but before structures assigned to VFP regs because
1197         a structure can lie partly in core registers and partly on the stack;
1198       - parameters assigned to the stack and all structures be copied before
1199         parameters assigned to a core reg since copying a parameter to the stack
1200         require using a core reg;
1201       - parameters assigned to VFP regs be copied before structures assigned to
1202         VFP regs as the copy might use an even numbered VFP reg that already
1203         holds part of a structure. */
1204 again:
1205   for(i = 0; i < NB_CLASSES; i++) {
1206     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1207
1208       if (pass
1209           && (i != CORE_CLASS || pplan->sval->r < VT_CONST))
1210         continue;
1211
1212       vpushv(pplan->sval);
1213       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1214       switch(i) {
1215         case STACK_CLASS:
1216         case CORE_STRUCT_CLASS:
1217         case VFP_STRUCT_CLASS:
1218           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1219             int padding = 0;
1220             size = type_size(&pplan->sval->type, &align);
1221             /* align to stack align size */
1222             size = (size + 3) & ~3;
1223             if (i == STACK_CLASS && pplan->prev)
1224               padding = pplan->start - pplan->prev->end;
1225             size += padding; /* Add padding if any */
1226             /* allocate the necessary size on stack */
1227             gadd_sp(-size);
1228             /* generate structure store */
1229             r = get_reg(RC_INT);
1230             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1231             vset(&vtop->type, r | VT_LVAL, 0);
1232             vswap();
1233             vstore(); /* memcpy to current sp + potential padding */
1234
1235             /* Homogeneous float aggregate are loaded to VFP registers
1236                immediately since there is no way of loading data in multiple
1237                non consecutive VFP registers as what is done for other
1238                structures (see the use of todo). */
1239             if (i == VFP_STRUCT_CLASS) {
1240               int first = pplan->start, nb = pplan->end - first + 1;
1241               /* vpop.32 {pplan->start, ..., pplan->end} */
1242               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1243               /* No need to write the register used to a SValue since VFP regs
1244                  cannot be used for gcall_or_jmp */
1245             }
1246           } else {
1247             if (is_float(pplan->sval->type.t)) {
1248 #ifdef TCC_ARM_VFP
1249               r = vfpr(gv(RC_FLOAT)) << 12;
1250               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1251                 size = 4;
1252               else {
1253                 size = 8;
1254                 r |= 0x101; /* vpush.32 -> vpush.64 */
1255               }
1256               o(0xED2D0A01 + r); /* vpush */
1257 #else
1258               r = fpr(gv(RC_FLOAT)) << 12;
1259               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1260                 size = 4;
1261               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1262                 size = 8;
1263               else
1264                 size = LDOUBLE_SIZE;
1265
1266               if (size == 12)
1267                 r |= 0x400000;
1268               else if(size == 8)
1269                 r|=0x8000;
1270
1271               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1272 #endif
1273             } else {
1274               /* simple type (currently always same size) */
1275               /* XXX: implicit cast ? */
1276               size=4;
1277               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1278                 lexpand();
1279                 size = 8;
1280                 r = gv(RC_INT);
1281                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1282                 vtop--;
1283               }
1284               r = gv(RC_INT);
1285               o(0xE52D0004|(intr(r)<<12)); /* push r */
1286             }
1287             if (i == STACK_CLASS && pplan->prev)
1288               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1289           }
1290           break;
1291
1292         case VFP_CLASS:
1293           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1294           if (pplan->start & 1) { /* Must be in upper part of double register */
1295             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1296             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1297           }
1298           break;
1299
1300         case CORE_CLASS:
1301           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1302             lexpand();
1303             gv(regmask(pplan->end));
1304             pplan->sval->r2 = vtop->r;
1305             vtop--;
1306           }
1307           gv(regmask(pplan->start));
1308           /* Mark register as used so that gcall_or_jmp use another one
1309              (regs >=4 are free as never used to pass parameters) */
1310           pplan->sval->r = vtop->r;
1311           break;
1312       }
1313       vtop--;
1314     }
1315   }
1316
1317   /* second pass to restore registers that were saved on stack by accident.
1318      Maybe redundant after the "lvalue_save" patch in tccgen.c:gv() */
1319   if (++pass < 2)
1320     goto again;
1321
1322   /* Manually free remaining registers since next parameters are loaded
1323    * manually, without the help of gv(int). */
1324   save_regs(nb_args);
1325
1326   if(todo) {
1327     o(0xE8BD0000|todo); /* pop {todo} */
1328     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1329       int r;
1330       pplan->sval->r = pplan->start;
1331       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1332          can occupy more than 2 registers. Thus, we need to push on the value
1333          stack some fake parameter to have on SValue for each registers used
1334          by a structure (r2 is not used). */
1335       for (r = pplan->start + 1; r <= pplan->end; r++) {
1336         if (todo & (1 << r)) {
1337           nb_extra_sval++;
1338           vpushi(0);
1339           vtop->r = r;
1340         }
1341       }
1342     }
1343   }
1344   return nb_extra_sval;
1345 }
1346
1347 /* Generate function call. The function address is pushed first, then
1348    all the parameters in call order. This functions pops all the
1349    parameters and the function address. */
1350 void gfunc_call(int nb_args)
1351 {
1352   int r, args_size;
1353   int def_float_abi = float_abi;
1354   int todo;
1355   struct plan plan;
1356 #ifdef TCC_ARM_EABI
1357   int variadic;
1358 #endif
1359
1360 #ifdef CONFIG_TCC_BCHECK
1361   if (tcc_state->do_bounds_check)
1362     gbound_args(nb_args);
1363 #endif
1364
1365 #ifdef TCC_ARM_EABI
1366   if (float_abi == ARM_HARD_FLOAT) {
1367     variadic = (vtop[-nb_args].type.ref->f.func_type == FUNC_ELLIPSIS);
1368     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1369       float_abi = ARM_SOFTFP_FLOAT;
1370   }
1371 #endif
1372   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1373      VT_JMP anywhere except on the top of the stack because it would complicate
1374      the code generator. */
1375   r = vtop->r & VT_VALMASK;
1376   if (r == VT_CMP || (r & ~1) == VT_JMP)
1377     gv(RC_INT);
1378
1379   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1380
1381 #ifdef TCC_ARM_EABI
1382   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1383     args_size = (args_size + 7) & ~7;
1384     o(0xE24DD004); /* sub sp, sp, #4 */
1385   }
1386 #endif
1387
1388   nb_args += copy_params(nb_args, &plan, todo);
1389   tcc_free(plan.pplans);
1390
1391   /* Move fct SValue on top as required by gcall_or_jmp */
1392   vrotb(nb_args + 1);
1393   gcall_or_jmp(0);
1394   if (args_size)
1395       gadd_sp(args_size); /* pop all parameters passed on the stack */
1396 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1397   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1398     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1399       o(0xEE000A10); /*vmov s0, r0 */
1400     } else {
1401       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1402       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1403     }
1404   }
1405 #endif
1406   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1407   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1408   float_abi = def_float_abi;
1409 }
1410
1411 /* generate function prolog of type 't' */
1412 void gfunc_prolog(Sym *func_sym)
1413 {
1414   CType *func_type = &func_sym->type;
1415   Sym *sym,*sym2;
1416   int n, nf, size, align, rs, struct_ret = 0;
1417   int addr, pn, sn; /* pn=core, sn=stack */
1418   CType ret_type;
1419
1420 #ifdef TCC_ARM_EABI
1421   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1422 #endif
1423
1424   sym = func_type->ref;
1425
1426   n = nf = 0;
1427   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1428       !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs))
1429   {
1430     n++;
1431     struct_ret = 1;
1432     func_vc = 12; /* Offset from fp of the place to store the result */
1433   }
1434   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1435     size = type_size(&sym2->type, &align);
1436 #ifdef TCC_ARM_EABI
1437     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1438         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1439       int tmpnf = assign_vfpreg(&avregs, align, size);
1440       tmpnf += (size + 3) / 4;
1441       nf = (tmpnf > nf) ? tmpnf : nf;
1442     } else
1443 #endif
1444     if (n < 4)
1445       n += (size + 3) / 4;
1446   }
1447   o(0xE1A0C00D); /* mov ip,sp */
1448   if (func_var)
1449     n=4;
1450   if (n) {
1451     if(n>4)
1452       n=4;
1453 #ifdef TCC_ARM_EABI
1454     n=(n+1)&-2;
1455 #endif
1456     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1457   }
1458   if (nf) {
1459     if (nf>16)
1460       nf=16;
1461     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1462     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1463   }
1464   o(0xE92D5800); /* save fp, ip, lr */
1465   o(0xE1A0B00D); /* mov fp, sp */
1466   func_sub_sp_offset = ind;
1467   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1468
1469 #ifdef TCC_ARM_EABI
1470   if (float_abi == ARM_HARD_FLOAT) {
1471     func_vc += nf * 4;
1472     avregs = AVAIL_REGS_INITIALIZER;
1473   }
1474 #endif
1475   pn = struct_ret, sn = 0;
1476   while ((sym = sym->next)) {
1477     CType *type;
1478     type = &sym->type;
1479     size = type_size(type, &align);
1480     size = (size + 3) >> 2;
1481     align = (align + 3) & ~3;
1482 #ifdef TCC_ARM_EABI
1483     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1484         || is_hgen_float_aggr(&sym->type))) {
1485       int fpn = assign_vfpreg(&avregs, align, size << 2);
1486       if (fpn >= 0)
1487         addr = fpn * 4;
1488       else
1489         goto from_stack;
1490     } else
1491 #endif
1492     if (pn < 4) {
1493 #ifdef TCC_ARM_EABI
1494         pn = (pn + (align-1)/4) & -(align/4);
1495 #endif
1496       addr = (nf + pn) * 4;
1497       pn += size;
1498       if (!sn && pn > 4)
1499         sn = (pn - 4);
1500     } else {
1501 #ifdef TCC_ARM_EABI
1502 from_stack:
1503         sn = (sn + (align-1)/4) & -(align/4);
1504 #endif
1505       addr = (n + nf + sn) * 4;
1506       sn += size;
1507     }
1508     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL,
1509              addr + 12);
1510   }
1511   last_itod_magic=0;
1512   leaffunc = 1;
1513   loc = 0;
1514 #ifdef CONFIG_TCC_BCHECK
1515   if (tcc_state->do_bounds_check)
1516     gen_bounds_prolog();
1517 #endif
1518 }
1519
1520 /* generate function epilog */
1521 void gfunc_epilog(void)
1522 {
1523   uint32_t x;
1524   int diff;
1525
1526 #ifdef CONFIG_TCC_BCHECK
1527   if (tcc_state->do_bounds_check)
1528     gen_bounds_epilog();
1529 #endif
1530   /* Copy float return value to core register if base standard is used and
1531      float computation is made with VFP */
1532 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1533   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1534     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1535       o(0xEE100A10); /* fmrs r0, s0 */
1536     else {
1537       o(0xEE100B10); /* fmrdl r0, d0 */
1538       o(0xEE301B10); /* fmrdh r1, d0 */
1539     }
1540   }
1541 #endif
1542   o(0xE89BA800); /* restore fp, sp, pc */
1543   diff = (-loc + 3) & -4;
1544 #ifdef TCC_ARM_EABI
1545   if(!leaffunc)
1546     diff = ((diff + 11) & -8) - 4;
1547 #endif
1548   if(diff > 0) {
1549     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1550     if(x)
1551       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1552     else {
1553       int addr;
1554       addr=ind;
1555       o(0xE59FC004); /* ldr ip,[pc+4] */
1556       o(0xE04BD00C); /* sub sp,fp,ip  */
1557       o(0xE1A0F00E); /* mov pc,lr */
1558       o(diff);
1559       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1560     }
1561   }
1562 }
1563
1564 ST_FUNC void gen_fill_nops(int bytes)
1565 {
1566     if ((bytes & 3))
1567       tcc_error("alignment of code section not multiple of 4");
1568     while (bytes > 0) {
1569         o(0xE1A00000);
1570         bytes -= 4;
1571     }
1572 }
1573
1574 /* generate a jump to a label */
1575 ST_FUNC int gjmp(int t)
1576 {
1577   int r;
1578   if (nocode_wanted)
1579     return t;
1580   r=ind;
1581   o(0xE0000000|encbranch(r,t,1));
1582   return r;
1583 }
1584
1585 /* generate a jump to a fixed address */
1586 ST_FUNC void gjmp_addr(int a)
1587 {
1588   gjmp(a);
1589 }
1590
1591 ST_FUNC int gjmp_cond(int op, int t)
1592 {
1593   int r;
1594   if (nocode_wanted)
1595     return t;
1596   r=ind;
1597   op=mapcc(op);
1598   op|=encbranch(r,t,1);
1599   o(op);
1600   return r;
1601 }
1602
1603 ST_FUNC int gjmp_append(int n, int t)
1604 {
1605   uint32_t *x;
1606   int p,lp;
1607   if(n) {
1608     p = n;
1609     do {
1610       p = decbranch(lp=p);
1611     } while(p);
1612     x = (uint32_t *)(cur_text_section->data + lp);
1613     *x &= 0xff000000;
1614     *x |= encbranch(lp,t,1);
1615     t = n;
1616   }
1617   return t;
1618 }
1619
1620 /* generate an integer binary operation */
1621 void gen_opi(int op)
1622 {
1623   int c, func = 0;
1624   uint32_t opc = 0, r, fr;
1625   unsigned short retreg = REG_IRET;
1626
1627   c=0;
1628   switch(op) {
1629     case '+':
1630       opc = 0x8;
1631       c=1;
1632       break;
1633     case TOK_ADDC1: /* add with carry generation */
1634       opc = 0x9;
1635       c=1;
1636       break;
1637     case '-':
1638       opc = 0x4;
1639       c=1;
1640       break;
1641     case TOK_SUBC1: /* sub with carry generation */
1642       opc = 0x5;
1643       c=1;
1644       break;
1645     case TOK_ADDC2: /* add with carry use */
1646       opc = 0xA;
1647       c=1;
1648       break;
1649     case TOK_SUBC2: /* sub with carry use */
1650       opc = 0xC;
1651       c=1;
1652       break;
1653     case '&':
1654       opc = 0x0;
1655       c=1;
1656       break;
1657     case '^':
1658       opc = 0x2;
1659       c=1;
1660       break;
1661     case '|':
1662       opc = 0x18;
1663       c=1;
1664       break;
1665     case '*':
1666       gv2(RC_INT, RC_INT);
1667       r = vtop[-1].r;
1668       fr = vtop[0].r;
1669       vtop--;
1670       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1671       return;
1672     case TOK_SHL:
1673       opc = 0;
1674       c=2;
1675       break;
1676     case TOK_SHR:
1677       opc = 1;
1678       c=2;
1679       break;
1680     case TOK_SAR:
1681       opc = 2;
1682       c=2;
1683       break;
1684     case '/':
1685     case TOK_PDIV:
1686       func=TOK___divsi3;
1687       c=3;
1688       break;
1689     case TOK_UDIV:
1690       func=TOK___udivsi3;
1691       c=3;
1692       break;
1693     case '%':
1694 #ifdef TCC_ARM_EABI
1695       func=TOK___aeabi_idivmod;
1696       retreg=REG_IRE2;
1697 #else
1698       func=TOK___modsi3;
1699 #endif
1700       c=3;
1701       break;
1702     case TOK_UMOD:
1703 #ifdef TCC_ARM_EABI
1704       func=TOK___aeabi_uidivmod;
1705       retreg=REG_IRE2;
1706 #else
1707       func=TOK___umodsi3;
1708 #endif
1709       c=3;
1710       break;
1711     case TOK_UMULL:
1712       gv2(RC_INT, RC_INT);
1713       r=intr(vtop[-1].r2=get_reg(RC_INT));
1714       c=vtop[-1].r;
1715       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1716       vtop--;
1717       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1718       return;
1719     default:
1720       opc = 0x15;
1721       c=1;
1722       break;
1723   }
1724   switch(c) {
1725     case 1:
1726       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1727         if(opc == 4 || opc == 5 || opc == 0xc) {
1728           vswap();
1729           opc|=2; // sub -> rsb
1730         }
1731       }
1732       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1733           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1734         gv(RC_INT);
1735       vswap();
1736       c=intr(gv(RC_INT));
1737       vswap();
1738       opc=0xE0000000|(opc<<20);
1739       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1740         uint32_t x;
1741         x=stuff_const(opc|0x2000000|(c<<16),vtop->c.i);
1742         if(x) {
1743           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1744           o(x|(r<<12));
1745           goto done;
1746         }
1747       }
1748       fr=intr(gv(RC_INT));
1749       if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1750         vswap();
1751         c=intr(gv(RC_INT));
1752         vswap();
1753       }
1754       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1755       o(opc|(c<<16)|(r<<12)|fr);
1756 done:
1757       vtop--;
1758       if (op >= TOK_ULT && op <= TOK_GT)
1759         vset_VT_CMP(op);
1760       break;
1761     case 2:
1762       opc=0xE1A00000|(opc<<5);
1763       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1764           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1765         gv(RC_INT);
1766       vswap();
1767       r=intr(gv(RC_INT));
1768       vswap();
1769       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1770         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1771         c = vtop->c.i & 0x1f;
1772         o(opc|r|(c<<7)|(fr<<12));
1773       } else {
1774         fr=intr(gv(RC_INT));
1775         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1776           vswap();
1777           r=intr(gv(RC_INT));
1778           vswap();
1779         }
1780         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1781         o(opc|r|(c<<12)|(fr<<8)|0x10);
1782       }
1783       vtop--;
1784       break;
1785     case 3:
1786       vpush_global_sym(&func_old_type, func);
1787       vrott(3);
1788       gfunc_call(2);
1789       vpushi(0);
1790       vtop->r = retreg;
1791       break;
1792     default:
1793       tcc_error("gen_opi %i unimplemented!",op);
1794   }
1795 }
1796
1797 #ifdef TCC_ARM_VFP
1798 static int is_zero(int i)
1799 {
1800   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1801     return 0;
1802   if (vtop[i].type.t == VT_FLOAT)
1803     return (vtop[i].c.f == 0.f);
1804   else if (vtop[i].type.t == VT_DOUBLE)
1805     return (vtop[i].c.d == 0.0);
1806   return (vtop[i].c.ld == 0.l);
1807 }
1808
1809 /* generate a floating point operation 'v = t1 op t2' instruction. The
1810  *    two operands are guaranteed to have the same floating point type */
1811 void gen_opf(int op)
1812 {
1813   uint32_t x;
1814   int fneg=0,r;
1815   x=0xEE000A00|T2CPR(vtop->type.t);
1816   switch(op) {
1817     case '+':
1818       if(is_zero(-1))
1819         vswap();
1820       if(is_zero(0)) {
1821         vtop--;
1822         return;
1823       }
1824       x|=0x300000;
1825       break;
1826     case '-':
1827       x|=0x300040;
1828       if(is_zero(0)) {
1829         vtop--;
1830         return;
1831       }
1832       if(is_zero(-1)) {
1833         x|=0x810000; /* fsubX -> fnegX */
1834         vswap();
1835         vtop--;
1836         fneg=1;
1837       }
1838       break;
1839     case '*':
1840       x|=0x200000;
1841       break;
1842     case '/':
1843       x|=0x800000;
1844       break;
1845     default:
1846       if(op < TOK_ULT || op > TOK_GT) {
1847         tcc_error("unknown fp op %x!",op);
1848         return;
1849       }
1850       if(is_zero(-1)) {
1851         vswap();
1852         switch(op) {
1853           case TOK_LT: op=TOK_GT; break;
1854           case TOK_GE: op=TOK_ULE; break;
1855           case TOK_LE: op=TOK_GE; break;
1856           case TOK_GT: op=TOK_ULT; break;
1857         }
1858       }
1859       x|=0xB40040; /* fcmpX */
1860       if(op!=TOK_EQ && op!=TOK_NE)
1861         x|=0x80; /* fcmpX -> fcmpeX */
1862       if(is_zero(0)) {
1863         vtop--;
1864         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1865       } else {
1866         gv2(RC_FLOAT,RC_FLOAT);
1867         x|=vfpr(vtop[0].r);
1868         o(x|(vfpr(vtop[-1].r) << 12));
1869         vtop--;
1870       }
1871       o(0xEEF1FA10); /* fmstat */
1872
1873       switch(op) {
1874         case TOK_LE: op=TOK_ULE; break;
1875         case TOK_LT: op=TOK_ULT; break;
1876         case TOK_UGE: op=TOK_GE; break;
1877         case TOK_UGT: op=TOK_GT; break;
1878       }
1879       vset_VT_CMP(op);
1880       return;
1881   }
1882   r=gv(RC_FLOAT);
1883   x|=vfpr(r);
1884   r=regmask(r);
1885   if(!fneg) {
1886     int r2;
1887     vswap();
1888     r2=gv(RC_FLOAT);
1889     x|=vfpr(r2)<<16;
1890     r|=regmask(r2);
1891     if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1892       vswap();
1893       r=gv(RC_FLOAT);
1894       vswap();
1895       x=(x&~0xf)|vfpr(r);
1896     }
1897   }
1898   vtop->r=get_reg_ex(RC_FLOAT,r);
1899   if(!fneg)
1900     vtop--;
1901   o(x|(vfpr(vtop->r)<<12));
1902 }
1903
1904 #else
1905 static uint32_t is_fconst()
1906 {
1907   long double f;
1908   uint32_t r;
1909   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1910     return 0;
1911   if (vtop->type.t == VT_FLOAT)
1912     f = vtop->c.f;
1913   else if (vtop->type.t == VT_DOUBLE)
1914     f = vtop->c.d;
1915   else
1916     f = vtop->c.ld;
1917   if(!ieee_finite(f))
1918     return 0;
1919   r=0x8;
1920   if(f<0.0) {
1921     r=0x18;
1922     f=-f;
1923   }
1924   if(f==0.0)
1925     return r;
1926   if(f==1.0)
1927     return r|1;
1928   if(f==2.0)
1929     return r|2;
1930   if(f==3.0)
1931     return r|3;
1932   if(f==4.0)
1933     return r|4;
1934   if(f==5.0)
1935     return r|5;
1936   if(f==0.5)
1937     return r|6;
1938   if(f==10.0)
1939     return r|7;
1940   return 0;
1941 }
1942
1943 /* generate a floating point operation 'v = t1 op t2' instruction. The
1944    two operands are guaranteed to have the same floating point type */
1945 void gen_opf(int op)
1946 {
1947   uint32_t x, r, r2, c1, c2;
1948   //fputs("gen_opf\n",stderr);
1949   vswap();
1950   c1 = is_fconst();
1951   vswap();
1952   c2 = is_fconst();
1953   x=0xEE000100;
1954 #if LDOUBLE_SIZE == 8
1955   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1956     x|=0x80;
1957 #else
1958   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1959     x|=0x80;
1960   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1961     x|=0x80000;
1962 #endif
1963   switch(op)
1964   {
1965     case '+':
1966       if(!c2) {
1967         vswap();
1968         c2=c1;
1969       }
1970       vswap();
1971       r=fpr(gv(RC_FLOAT));
1972       vswap();
1973       if(c2) {
1974         if(c2>0xf)
1975           x|=0x200000; // suf
1976         r2=c2&0xf;
1977       } else {
1978         r2=fpr(gv(RC_FLOAT));
1979         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1980           vswap();
1981           r=fpr(gv(RC_FLOAT));
1982           vswap();
1983         }
1984       }
1985       break;
1986     case '-':
1987       if(c2) {
1988         if(c2<=0xf)
1989           x|=0x200000; // suf
1990         r2=c2&0xf;
1991         vswap();
1992         r=fpr(gv(RC_FLOAT));
1993         vswap();
1994       } else if(c1 && c1<=0xf) {
1995         x|=0x300000; // rsf
1996         r2=c1;
1997         r=fpr(gv(RC_FLOAT));
1998         vswap();
1999       } else {
2000         x|=0x200000; // suf
2001         vswap();
2002         r=fpr(gv(RC_FLOAT));
2003         vswap();
2004         r2=fpr(gv(RC_FLOAT));
2005         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
2006           vswap();
2007           r=fpr(gv(RC_FLOAT));
2008           vswap();
2009         }
2010       }
2011       break;
2012     case '*':
2013       if(!c2 || c2>0xf) {
2014         vswap();
2015         c2=c1;
2016       }
2017       vswap();
2018       r=fpr(gv(RC_FLOAT));
2019       vswap();
2020       if(c2 && c2<=0xf)
2021         r2=c2;
2022       else {
2023         r2=fpr(gv(RC_FLOAT));
2024         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
2025           vswap();
2026           r=fpr(gv(RC_FLOAT));
2027           vswap();
2028         }
2029       }
2030       x|=0x100000; // muf
2031       break;
2032     case '/':
2033       if(c2 && c2<=0xf) {
2034         x|=0x400000; // dvf
2035         r2=c2;
2036         vswap();
2037         r=fpr(gv(RC_FLOAT));
2038         vswap();
2039       } else if(c1 && c1<=0xf) {
2040         x|=0x500000; // rdf
2041         r2=c1;
2042         r=fpr(gv(RC_FLOAT));
2043         vswap();
2044       } else {
2045         x|=0x400000; // dvf
2046         vswap();
2047         r=fpr(gv(RC_FLOAT));
2048         vswap();
2049         r2=fpr(gv(RC_FLOAT));
2050         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
2051           vswap();
2052           r=fpr(gv(RC_FLOAT));
2053           vswap();
2054         }
2055       }
2056       break;
2057     default:
2058       if(op >= TOK_ULT && op <= TOK_GT) {
2059         x|=0xd0f110; // cmfe
2060 /* bug (intention?) in Linux FPU emulator
2061    doesn't set carry if equal */
2062         switch(op) {
2063           case TOK_ULT:
2064           case TOK_UGE:
2065           case TOK_ULE:
2066           case TOK_UGT:
2067             tcc_error("unsigned comparison on floats?");
2068             break;
2069           case TOK_LT:
2070             op=TOK_Nset;
2071             break;
2072           case TOK_LE:
2073             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
2074             break;
2075           case TOK_EQ:
2076           case TOK_NE:
2077             x&=~0x400000; // cmfe -> cmf
2078             break;
2079         }
2080         if(c1 && !c2) {
2081           c2=c1;
2082           vswap();
2083           switch(op) {
2084             case TOK_Nset:
2085               op=TOK_GT;
2086               break;
2087             case TOK_GE:
2088               op=TOK_ULE;
2089               break;
2090             case TOK_ULE:
2091               op=TOK_GE;
2092               break;
2093             case TOK_GT:
2094               op=TOK_Nset;
2095               break;
2096           }
2097         }
2098         vswap();
2099         r=fpr(gv(RC_FLOAT));
2100         vswap();
2101         if(c2) {
2102           if(c2>0xf)
2103             x|=0x200000;
2104           r2=c2&0xf;
2105         } else {
2106           r2=fpr(gv(RC_FLOAT));
2107           if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
2108             vswap();
2109             r=fpr(gv(RC_FLOAT));
2110             vswap();
2111           }
2112         }
2113         --vtop;
2114         vset_VT_CMP(op);
2115         ++vtop;
2116       } else {
2117         tcc_error("unknown fp op %x!",op);
2118         return;
2119       }
2120   }
2121   if(vtop[-1].r == VT_CMP)
2122     c1=15;
2123   else {
2124     c1=vtop->r;
2125     if(r2&0x8)
2126       c1=vtop[-1].r;
2127     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
2128     c1=fpr(vtop[-1].r);
2129   }
2130   vtop--;
2131   o(x|(r<<16)|(c1<<12)|r2);
2132 }
2133 #endif
2134
2135 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2136    and 'long long' cases. */
2137 ST_FUNC void gen_cvt_itof(int t)
2138 {
2139   uint32_t r, r2;
2140   int bt;
2141   bt=vtop->type.t & VT_BTYPE;
2142   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
2143 #ifndef TCC_ARM_VFP
2144     uint32_t dsize = 0;
2145 #endif
2146     r=intr(gv(RC_INT));
2147 #ifdef TCC_ARM_VFP
2148     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
2149     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
2150     r2|=r2<<12;
2151     if(!(vtop->type.t & VT_UNSIGNED))
2152       r2|=0x80;                /* fuitoX -> fsituX */
2153     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
2154 #else
2155     r2=fpr(vtop->r=get_reg(RC_FLOAT));
2156     if((t & VT_BTYPE) != VT_FLOAT)
2157       dsize=0x80;    /* flts -> fltd */
2158     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
2159     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
2160       uint32_t off = 0;
2161       o(0xE3500000|(r<<12));        /* cmp */
2162       r=fpr(get_reg(RC_FLOAT));
2163       if(last_itod_magic) {
2164         off=ind+8-last_itod_magic;
2165         off/=4;
2166         if(off>255)
2167           off=0;
2168       }
2169       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
2170       if(!off) {
2171         o(0xEA000000);              /* b */
2172         last_itod_magic=ind;
2173         o(0x4F800000);              /* 4294967296.0f */
2174       }
2175       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
2176     }
2177 #endif
2178     return;
2179   } else if(bt == VT_LLONG) {
2180     int func;
2181     CType *func_type = 0;
2182     if((t & VT_BTYPE) == VT_FLOAT) {
2183       func_type = &func_float_type;
2184       if(vtop->type.t & VT_UNSIGNED)
2185         func=TOK___floatundisf;
2186       else
2187         func=TOK___floatdisf;
2188 #if LDOUBLE_SIZE != 8
2189     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
2190       func_type = &func_ldouble_type;
2191       if(vtop->type.t & VT_UNSIGNED)
2192         func=TOK___floatundixf;
2193       else
2194         func=TOK___floatdixf;
2195     } else if((t & VT_BTYPE) == VT_DOUBLE) {
2196 #else
2197     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
2198 #endif
2199       func_type = &func_double_type;
2200       if(vtop->type.t & VT_UNSIGNED)
2201         func=TOK___floatundidf;
2202       else
2203         func=TOK___floatdidf;
2204     }
2205     if(func_type) {
2206       vpush_global_sym(func_type, func);
2207       vswap();
2208       gfunc_call(1);
2209       vpushi(0);
2210       vtop->r=TREG_F0;
2211       return;
2212     }
2213   }
2214   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2215 }
2216
2217 /* convert fp to int 't' type */
2218 void gen_cvt_ftoi(int t)
2219 {
2220   uint32_t r, r2;
2221   int u, func = 0;
2222   u=t&VT_UNSIGNED;
2223   t&=VT_BTYPE;
2224   r2=vtop->type.t & VT_BTYPE;
2225   if(t==VT_INT) {
2226 #ifdef TCC_ARM_VFP
2227     r=vfpr(gv(RC_FLOAT));
2228     u=u?0:0x10000;
2229     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2230     r2=intr(vtop->r=get_reg(RC_INT));
2231     o(0xEE100A10|(r<<16)|(r2<<12));
2232     return;
2233 #else
2234     if(u) {
2235       if(r2 == VT_FLOAT)
2236         func=TOK___fixunssfsi;
2237 #if LDOUBLE_SIZE != 8
2238       else if(r2 == VT_LDOUBLE)
2239         func=TOK___fixunsxfsi;
2240       else if(r2 == VT_DOUBLE)
2241 #else
2242       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2243 #endif
2244         func=TOK___fixunsdfsi;
2245     } else {
2246       r=fpr(gv(RC_FLOAT));
2247       r2=intr(vtop->r=get_reg(RC_INT));
2248       o(0xEE100170|(r2<<12)|r);
2249       return;
2250     }
2251 #endif
2252   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2253     if(r2 == VT_FLOAT)
2254       func=TOK___fixsfdi;
2255 #if LDOUBLE_SIZE != 8
2256     else if(r2 == VT_LDOUBLE)
2257       func=TOK___fixxfdi;
2258     else if(r2 == VT_DOUBLE)
2259 #else
2260     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2261 #endif
2262       func=TOK___fixdfdi;
2263   }
2264   if(func) {
2265     vpush_global_sym(&func_old_type, func);
2266     vswap();
2267     gfunc_call(1);
2268     vpushi(0);
2269     if(t == VT_LLONG)
2270       vtop->r2 = REG_IRE2;
2271     vtop->r = REG_IRET;
2272     return;
2273   }
2274   tcc_error("unimplemented gen_cvt_ftoi!");
2275 }
2276
2277 /* convert from one floating point type to another */
2278 void gen_cvt_ftof(int t)
2279 {
2280 #ifdef TCC_ARM_VFP
2281   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2282     uint32_t r = vfpr(gv(RC_FLOAT));
2283     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2284   }
2285 #else
2286   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2287   gv(RC_FLOAT);
2288 #endif
2289 }
2290
2291 /* computed goto support */
2292 void ggoto(void)
2293 {
2294   gcall_or_jmp(1);
2295   vtop--;
2296 }
2297
2298 /* Save the stack pointer onto the stack and return the location of its address */
2299 ST_FUNC void gen_vla_sp_save(int addr) {
2300     SValue v;
2301     v.type.t = VT_PTR;
2302     v.r = VT_LOCAL | VT_LVAL;
2303     v.c.i = addr;
2304     store(TREG_SP, &v);
2305 }
2306
2307 /* Restore the SP from a location on the stack */
2308 ST_FUNC void gen_vla_sp_restore(int addr) {
2309     SValue v;
2310     v.type.t = VT_PTR;
2311     v.r = VT_LOCAL | VT_LVAL;
2312     v.c.i = addr;
2313     load(TREG_SP, &v);
2314 }
2315
2316 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2317 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2318     int r;
2319 #if defined(CONFIG_TCC_BCHECK)
2320     if (tcc_state->do_bounds_check)
2321         vpushv(vtop);
2322 #endif
2323     r = intr(gv(RC_INT));
2324     o(0xE04D0000|(r<<12)|r); /* sub r, sp, r */
2325 #ifdef TCC_ARM_EABI
2326     if (align < 8)
2327         align = 8;
2328 #else
2329     if (align < 4)
2330         align = 4;
2331 #endif
2332     if (align & (align - 1))
2333         tcc_error("alignment is not a power of 2: %i", align);
2334     o(stuff_const(0xE3C0D000|(r<<16), align - 1)); /* bic sp, r, #align-1 */
2335     vpop();
2336 #if defined(CONFIG_TCC_BCHECK)
2337     if (tcc_state->do_bounds_check) {
2338         vpushi(0);
2339         vtop->r = TREG_R0;
2340         o(0xe1a0000d | (vtop->r << 12)); // mov r0,sp
2341         vswap();
2342         vpush_global_sym(&func_old_type, TOK___bound_new_region);
2343         vrott(3);
2344         gfunc_call(2);
2345         func_bound_add_epilog = 1;
2346     }
2347 #endif
2348 }
2349
2350 /* end of ARM code generator */
2351 /*************************************************************/
2352 #endif
2353 /*************************************************************/