arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_CPU_VERSION
  38 # define TCC_CPU_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_IRE2    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82     TREG_SP = 13,
  83     TREG_LR,
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_IRE2 TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 #else /* ! TARGET_DEFS_ONLY */
 134 /******************************************************/
 135 #define USING_GLOBALS
 136 #include "tcc.h"
 137
 138 enum float_abi float_abi;
 139
 140 ST_DATA const int reg_classes[NB_REGS] = {
 141     /* r0 */ RC_INT | RC_R0,
 142     /* r1 */ RC_INT | RC_R1,
 143     /* r2 */ RC_INT | RC_R2,
 144     /* r3 */ RC_INT | RC_R3,
 145     /* r12 */ RC_INT | RC_R12,
 146     /* f0 */ RC_FLOAT | RC_F0,
 147     /* f1 */ RC_FLOAT | RC_F1,
 148     /* f2 */ RC_FLOAT | RC_F2,
 149     /* f3 */ RC_FLOAT | RC_F3,
 150 #ifdef TCC_ARM_VFP
 151  /* d4/s8 */ RC_FLOAT | RC_F4,
 152 /* d5/s10 */ RC_FLOAT | RC_F5,
 153 /* d6/s12 */ RC_FLOAT | RC_F6,
 154 /* d7/s14 */ RC_FLOAT | RC_F7,
 155 #endif
 156 };
 157
 158 static int func_sub_sp_offset, last_itod_magic;
 159 static int leaffunc;
 160
 161 #if defined(CONFIG_TCC_BCHECK)
 162 static addr_t func_bound_offset;
 163 static unsigned long func_bound_ind;
 164 ST_DATA int func_bound_add_epilog;
 165 #endif
 166
 167 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 168 static CType float_type, double_type, func_float_type, func_double_type;
 169 ST_FUNC void arm_init(struct TCCState *s)
 170 {
 171     float_type.t = VT_FLOAT;
 172     double_type.t = VT_DOUBLE;
 173     func_float_type.t = VT_FUNC;
 174     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 175     func_double_type.t = VT_FUNC;
 176     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 177
 178     float_abi = s->float_abi;
 179 #ifndef TCC_ARM_HARDFLOAT
 180 # warning "soft float ABI currently not supported: default to softfp"
 181 #endif
 182 }
 183 #else
 184 #define func_float_type func_old_type
 185 #define func_double_type func_old_type
 186 #define func_ldouble_type func_old_type
 187 ST_FUNC void arm_init(struct TCCState *s)
 188 {
 189 #if 0
 190 #if !defined (TCC_ARM_VFP)
 191     tcc_warning("Support for FPA is deprecated and will be removed in next"
 192                 " release");
 193 #endif
 194 #if !defined (TCC_ARM_EABI)
 195     tcc_warning("Support for OABI is deprecated and will be removed in next"
 196                 " release");
 197 #endif
 198 #endif
 199 }
 200 #endif
 201
 202 #define CHECK_R(r) ((r) >= TREG_R0 && (r) <= TREG_LR)
 203
 204 static int two2mask(int a,int b) {
 205   if (!CHECK_R(a) || !CHECK_R(b))
 206     tcc_error("compiler error! registers %i,%i is not valid",a,b);
 207   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 208 }
 209
 210 static int regmask(int r) {
 211   if (!CHECK_R(r))
 212     tcc_error("compiler error! register %i is not valid",r);
 213   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 214 }
 215
 216 /******************************************************/
 217
 218 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
 219 const char *default_elfinterp(struct TCCState *s)
 220 {
 221     if (s->float_abi == ARM_HARD_FLOAT)
 222         return "/lib/ld-linux-armhf.so.3";
 223     else
 224         return "/lib/ld-linux.so.3";
 225 }
 226 #endif
 227
 228 void o(uint32_t i)
 229 {
 230   /* this is a good place to start adding big-endian support*/
 231   int ind1;
 232   if (nocode_wanted)
 233     return;
 234   ind1 = ind + 4;
 235   if (!cur_text_section)
 236     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 237          "can't evaluate constant expressions outside of a function.");
 238   if (ind1 > cur_text_section->data_allocated)
 239     section_realloc(cur_text_section, ind1);
 240   cur_text_section->data[ind++] = i&255;
 241   i>>=8;
 242   cur_text_section->data[ind++] = i&255;
 243   i>>=8;
 244   cur_text_section->data[ind++] = i&255;
 245   i>>=8;
 246   cur_text_section->data[ind++] = i;
 247 }
 248
 249 static uint32_t stuff_const(uint32_t op, uint32_t c)
 250 {
 251   int try_neg=0;
 252   uint32_t nc = 0, negop = 0;
 253
 254   switch(op&0x1F00000)
 255   {
 256     case 0x800000: //add
 257     case 0x400000: //sub
 258       try_neg=1;
 259       negop=op^0xC00000;
 260       nc=-c;
 261       break;
 262     case 0x1A00000: //mov
 263     case 0x1E00000: //mvn
 264       try_neg=1;
 265       negop=op^0x400000;
 266       nc=~c;
 267       break;
 268     case 0x200000: //xor
 269       if(c==~0)
 270         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 271       break;
 272     case 0x0: //and
 273       if(c==~0)
 274         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 275     case 0x1C00000: //bic
 276       try_neg=1;
 277       negop=op^0x1C00000;
 278       nc=~c;
 279       break;
 280     case 0x1800000: //orr
 281       if(c==~0)
 282         return (op&0xFFF0FFFF)|0x1E00000;
 283       break;
 284   }
 285   do {
 286     uint32_t m;
 287     int i;
 288     if(c<256) /* catch undefined <<32 */
 289       return op|c;
 290     for(i=2;i<32;i+=2) {
 291       m=(0xff>>i)|(0xff<<(32-i));
 292       if(!(c&~m))
 293         return op|(i<<7)|(c<<i)|(c>>(32-i));
 294     }
 295     op=negop;
 296     c=nc;
 297   } while(try_neg--);
 298   return 0;
 299 }
 300
 301
 302 //only add,sub
 303 void stuff_const_harder(uint32_t op, uint32_t v) {
 304   uint32_t x;
 305   x=stuff_const(op,v);
 306   if(x)
 307     o(x);
 308   else {
 309     uint32_t a[16], nv, no, o2, n2;
 310     int i,j,k;
 311     a[0]=0xff;
 312     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 313     for(i=1;i<16;i++)
 314       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 315     for(i=0;i<12;i++)
 316       for(j=i<4?i+12:15;j>=i+4;j--)
 317         if((v&(a[i]|a[j]))==v) {
 318           o(stuff_const(op,v&a[i]));
 319           o(stuff_const(o2,v&a[j]));
 320           return;
 321         }
 322     no=op^0xC00000;
 323     n2=o2^0xC00000;
 324     nv=-v;
 325     for(i=0;i<12;i++)
 326       for(j=i<4?i+12:15;j>=i+4;j--)
 327         if((nv&(a[i]|a[j]))==nv) {
 328           o(stuff_const(no,nv&a[i]));
 329           o(stuff_const(n2,nv&a[j]));
 330           return;
 331         }
 332     for(i=0;i<8;i++)
 333       for(j=i+4;j<12;j++)
 334         for(k=i<4?i+12:15;k>=j+4;k--)
 335           if((v&(a[i]|a[j]|a[k]))==v) {
 336             o(stuff_const(op,v&a[i]));
 337             o(stuff_const(o2,v&a[j]));
 338             o(stuff_const(o2,v&a[k]));
 339             return;
 340           }
 341     no=op^0xC00000;
 342     nv=-v;
 343     for(i=0;i<8;i++)
 344       for(j=i+4;j<12;j++)
 345         for(k=i<4?i+12:15;k>=j+4;k--)
 346           if((nv&(a[i]|a[j]|a[k]))==nv) {
 347             o(stuff_const(no,nv&a[i]));
 348             o(stuff_const(n2,nv&a[j]));
 349             o(stuff_const(n2,nv&a[k]));
 350             return;
 351           }
 352     o(stuff_const(op,v&a[0]));
 353     o(stuff_const(o2,v&a[4]));
 354     o(stuff_const(o2,v&a[8]));
 355     o(stuff_const(o2,v&a[12]));
 356   }
 357 }
 358
 359 uint32_t encbranch(int pos, int addr, int fail)
 360 {
 361   addr-=pos+8;
 362   addr/=4;
 363   if(addr>=0x1000000 || addr<-0x1000000) {
 364     if(fail)
 365       tcc_error("FIXME: function bigger than 32MB");
 366     return 0;
 367   }
 368   return 0x0A000000|(addr&0xffffff);
 369 }
 370
 371 int decbranch(int pos)
 372 {
 373   int x;
 374   x=*(uint32_t *)(cur_text_section->data + pos);
 375   x&=0x00ffffff;
 376   if(x&0x800000)
 377     x-=0x1000000;
 378   return x*4+pos+8;
 379 }
 380
 381 /* output a symbol and patch all calls to it */
 382 void gsym_addr(int t, int a)
 383 {
 384   uint32_t *x;
 385   int lt;
 386   while(t) {
 387     x=(uint32_t *)(cur_text_section->data + t);
 388     t=decbranch(lt=t);
 389     if(a==lt+4)
 390       *x=0xE1A00000; // nop
 391     else {
 392       *x &= 0xff000000;
 393       *x |= encbranch(lt,a,1);
 394     }
 395   }
 396 }
 397
 398 #ifdef TCC_ARM_VFP
 399 static uint32_t vfpr(int r)
 400 {
 401   if(r<TREG_F0 || r>TREG_F7)
 402     tcc_error("compiler error! register %i is no vfp register",r);
 403   return r - TREG_F0;
 404 }
 405 #else
 406 static uint32_t fpr(int r)
 407 {
 408   if(r<TREG_F0 || r>TREG_F3)
 409     tcc_error("compiler error! register %i is no fpa register",r);
 410   return r - TREG_F0;
 411 }
 412 #endif
 413
 414 static uint32_t intr(int r)
 415 {
 416   if(r == TREG_R12)
 417     return 12;
 418   if(r >= TREG_R0 && r <= TREG_R3)
 419     return r - TREG_R0;
 420   if (!(r >= TREG_SP && r <= TREG_LR))
 421     tcc_error("compiler error! register %i is no int register",r);
 422   return r + (13 - TREG_SP);
 423 }
 424
 425 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 426 {
 427   if(*off>maxoff || *off&((1<<shift)-1)) {
 428     uint32_t x, y;
 429     x=0xE280E000;
 430     if(*sgn)
 431       x=0xE240E000;
 432     x|=(*base)<<16;
 433     *base=14; // lr
 434     y=stuff_const(x,*off&~maxoff);
 435     if(y) {
 436       o(y);
 437       *off&=maxoff;
 438       return;
 439     }
 440     y=stuff_const(x,(*off+maxoff)&~maxoff);
 441     if(y) {
 442       o(y);
 443       *sgn=!*sgn;
 444       *off=((*off+maxoff)&~maxoff)-*off;
 445       return;
 446     }
 447     stuff_const_harder(x,*off&~maxoff);
 448     *off&=maxoff;
 449   }
 450 }
 451
 452 static uint32_t mapcc(int cc)
 453 {
 454   switch(cc)
 455   {
 456     case TOK_ULT:
 457       return 0x30000000; /* CC/LO */
 458     case TOK_UGE:
 459       return 0x20000000; /* CS/HS */
 460     case TOK_EQ:
 461       return 0x00000000; /* EQ */
 462     case TOK_NE:
 463       return 0x10000000; /* NE */
 464     case TOK_ULE:
 465       return 0x90000000; /* LS */
 466     case TOK_UGT:
 467       return 0x80000000; /* HI */
 468     case TOK_Nset:
 469       return 0x40000000; /* MI */
 470     case TOK_Nclear:
 471       return 0x50000000; /* PL */
 472     case TOK_LT:
 473       return 0xB0000000; /* LT */
 474     case TOK_GE:
 475       return 0xA0000000; /* GE */
 476     case TOK_LE:
 477       return 0xD0000000; /* LE */
 478     case TOK_GT:
 479       return 0xC0000000; /* GT */
 480   }
 481   tcc_error("unexpected condition code");
 482   return 0xE0000000; /* AL */
 483 }
 484
 485 static int negcc(int cc)
 486 {
 487   switch(cc)
 488   {
 489     case TOK_ULT:
 490       return TOK_UGE;
 491     case TOK_UGE:
 492       return TOK_ULT;
 493     case TOK_EQ:
 494       return TOK_NE;
 495     case TOK_NE:
 496       return TOK_EQ;
 497     case TOK_ULE:
 498       return TOK_UGT;
 499     case TOK_UGT:
 500       return TOK_ULE;
 501     case TOK_Nset:
 502       return TOK_Nclear;
 503     case TOK_Nclear:
 504       return TOK_Nset;
 505     case TOK_LT:
 506       return TOK_GE;
 507     case TOK_GE:
 508       return TOK_LT;
 509     case TOK_LE:
 510       return TOK_GT;
 511     case TOK_GT:
 512       return TOK_LE;
 513   }
 514   tcc_error("unexpected condition code");
 515   return TOK_NE;
 516 }
 517
 518 /* load 'r' from value 'sv' */
 519 void load(int r, SValue *sv)
 520 {
 521   int v, ft, fc, fr, sign;
 522   uint32_t op;
 523   SValue v1;
 524
 525   fr = sv->r;
 526   ft = sv->type.t;
 527   fc = sv->c.i;
 528
 529   if(fc>=0)
 530     sign=0;
 531   else {
 532     sign=1;
 533     fc=-fc;
 534   }
 535
 536   v = fr & VT_VALMASK;
 537   if (fr & VT_LVAL) {
 538     uint32_t base = 0xB; // fp
 539     if(v == VT_LLOCAL) {
 540       v1.type.t = VT_PTR;
 541       v1.r = VT_LOCAL | VT_LVAL;
 542       v1.c.i = sv->c.i;
 543       load(TREG_LR, &v1);
 544       base = 14; /* lr */
 545       fc=sign=0;
 546       v=VT_LOCAL;
 547     } else if(v == VT_CONST) {
 548       v1.type.t = VT_PTR;
 549       v1.r = fr&~VT_LVAL;
 550       v1.c.i = sv->c.i;
 551       v1.sym=sv->sym;
 552       load(TREG_LR, &v1);
 553       base = 14; /* lr */
 554       fc=sign=0;
 555       v=VT_LOCAL;
 556     } else if(v < VT_CONST) {
 557       base=intr(v);
 558       fc=sign=0;
 559       v=VT_LOCAL;
 560     }
 561     if(v == VT_LOCAL) {
 562       if(is_float(ft)) {
 563         calcaddr(&base,&fc,&sign,1020,2);
 564 #ifdef TCC_ARM_VFP
 565         op=0xED100A00; /* flds */
 566         if(!sign)
 567           op|=0x800000;
 568         if ((ft & VT_BTYPE) != VT_FLOAT)
 569           op|=0x100;   /* flds -> fldd */
 570         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 571 #else
 572         op=0xED100100;
 573         if(!sign)
 574           op|=0x800000;
 575 #if LDOUBLE_SIZE == 8
 576         if ((ft & VT_BTYPE) != VT_FLOAT)
 577           op|=0x8000;
 578 #else
 579         if ((ft & VT_BTYPE) == VT_DOUBLE)
 580           op|=0x8000;
 581         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 582           op|=0x400000;
 583 #endif
 584         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 585 #endif
 586       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 587                 || (ft & VT_BTYPE) == VT_SHORT) {
 588         calcaddr(&base,&fc,&sign,255,0);
 589         op=0xE1500090;
 590         if ((ft & VT_BTYPE) == VT_SHORT)
 591           op|=0x20;
 592         if ((ft & VT_UNSIGNED) == 0)
 593           op|=0x40;
 594         if(!sign)
 595           op|=0x800000;
 596         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 597       } else {
 598         calcaddr(&base,&fc,&sign,4095,0);
 599         op=0xE5100000;
 600         if(!sign)
 601           op|=0x800000;
 602         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 603           op|=0x400000;
 604         o(op|(intr(r)<<12)|fc|(base<<16));
 605       }
 606       return;
 607     }
 608   } else {
 609     if (v == VT_CONST) {
 610       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.i);
 611       if (fr & VT_SYM || !op) {
 612         o(0xE59F0000|(intr(r)<<12));
 613         o(0xEA000000);
 614         if(fr & VT_SYM)
 615           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 616         o(sv->c.i);
 617       } else
 618         o(op);
 619       return;
 620     } else if (v == VT_LOCAL) {
 621       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.i);
 622       if (fr & VT_SYM || !op) {
 623         o(0xE59F0000|(intr(r)<<12));
 624         o(0xEA000000);
 625         if(fr & VT_SYM) // needed ?
 626           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 627         o(sv->c.i);
 628         o(0xE08B0000|(intr(r)<<12)|intr(r));
 629       } else
 630         o(op);
 631       return;
 632     } else if(v == VT_CMP) {
 633       o(mapcc(sv->c.i)|0x3A00001|(intr(r)<<12));
 634       o(mapcc(negcc(sv->c.i))|0x3A00000|(intr(r)<<12));
 635       return;
 636     } else if (v == VT_JMP || v == VT_JMPI) {
 637       int t;
 638       t = v & 1;
 639       o(0xE3A00000|(intr(r)<<12)|t);
 640       o(0xEA000000);
 641       gsym(sv->c.i);
 642       o(0xE3A00000|(intr(r)<<12)|(t^1));
 643       return;
 644     } else if (v < VT_CONST) {
 645       if(is_float(ft))
 646 #ifdef TCC_ARM_VFP
 647         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 648 #else
 649         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 650 #endif
 651       else
 652         o(0xE1A00000|(intr(r)<<12)|intr(v));
 653       return;
 654     }
 655   }
 656   tcc_error("load unimplemented!");
 657 }
 658
 659 /* store register 'r' in lvalue 'v' */
 660 void store(int r, SValue *sv)
 661 {
 662   SValue v1;
 663   int v, ft, fc, fr, sign;
 664   uint32_t op;
 665
 666   fr = sv->r;
 667   ft = sv->type.t;
 668   fc = sv->c.i;
 669
 670   if(fc>=0)
 671     sign=0;
 672   else {
 673     sign=1;
 674     fc=-fc;
 675   }
 676
 677   v = fr & VT_VALMASK;
 678   if (fr & VT_LVAL || fr == VT_LOCAL) {
 679     uint32_t base = 0xb; /* fp */
 680     if(v < VT_CONST) {
 681       base=intr(v);
 682       v=VT_LOCAL;
 683       fc=sign=0;
 684     } else if(v == VT_CONST) {
 685       v1.type.t = ft;
 686       v1.r = fr&~VT_LVAL;
 687       v1.c.i = sv->c.i;
 688       v1.sym=sv->sym;
 689       load(TREG_LR, &v1);
 690       base = 14; /* lr */
 691       fc=sign=0;
 692       v=VT_LOCAL;
 693     }
 694     if(v == VT_LOCAL) {
 695        if(is_float(ft)) {
 696         calcaddr(&base,&fc,&sign,1020,2);
 697 #ifdef TCC_ARM_VFP
 698         op=0xED000A00; /* fsts */
 699         if(!sign)
 700           op|=0x800000;
 701         if ((ft & VT_BTYPE) != VT_FLOAT)
 702           op|=0x100;   /* fsts -> fstd */
 703         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 704 #else
 705         op=0xED000100;
 706         if(!sign)
 707           op|=0x800000;
 708 #if LDOUBLE_SIZE == 8
 709         if ((ft & VT_BTYPE) != VT_FLOAT)
 710           op|=0x8000;
 711 #else
 712         if ((ft & VT_BTYPE) == VT_DOUBLE)
 713           op|=0x8000;
 714         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 715           op|=0x400000;
 716 #endif
 717         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 718 #endif
 719         return;
 720       } else if((ft & VT_BTYPE) == VT_SHORT) {
 721         calcaddr(&base,&fc,&sign,255,0);
 722         op=0xE14000B0;
 723         if(!sign)
 724           op|=0x800000;
 725         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 726       } else {
 727         calcaddr(&base,&fc,&sign,4095,0);
 728         op=0xE5000000;
 729         if(!sign)
 730           op|=0x800000;
 731         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 732           op|=0x400000;
 733         o(op|(intr(r)<<12)|fc|(base<<16));
 734       }
 735       return;
 736     }
 737   }
 738   tcc_error("store unimplemented");
 739 }
 740
 741 static void gadd_sp(int val)
 742 {
 743   stuff_const_harder(0xE28DD000,val);
 744 }
 745
 746 /* 'is_jmp' is '1' if it is a jump */
 747 static void gcall_or_jmp(int is_jmp)
 748 {
 749   int r;
 750   uint32_t x;
 751   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 752     /* constant case */
 753         if(vtop->r & VT_SYM){
 754                 x=encbranch(ind,ind+vtop->c.i,0);
 755                 if(x) {
 756                 /* relocation case */
 757                   greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 758                   o(x|(is_jmp?0xE0000000:0xE1000000));
 759                 } else {
 760                         if(!is_jmp)
 761                                 o(0xE28FE004); // add lr,pc,#4
 762                         o(0xE51FF004);   // ldr pc,[pc,#-4]
 763                         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 764                         o(vtop->c.i);
 765                 }
 766         }else{
 767                 if(!is_jmp)
 768                         o(0xE28FE004); // add lr,pc,#4
 769                 o(0xE51FF004);   // ldr pc,[pc,#-4]
 770                 o(vtop->c.i);
 771         }
 772   } else {
 773     /* otherwise, indirect call */
 774 #ifdef CONFIG_TCC_BCHECK
 775     vtop->r &= ~VT_MUSTBOUND;
 776 #endif
 777     r = gv(RC_INT);
 778     if(!is_jmp)
 779       o(0xE1A0E00F);       // mov lr,pc
 780     o(0xE1A0F000|intr(r)); // mov pc,r
 781   }
 782 }
 783
 784 #if defined(CONFIG_TCC_BCHECK)
 785
 786 static void gen_bounds_call(int v)
 787 {
 788     Sym *sym = external_global_sym(v, &func_old_type);
 789
 790     greloc(cur_text_section, sym, ind, R_ARM_PC24);
 791     o(0xebfffffe);
 792 }
 793
 794 /* generate a bounded pointer addition */
 795 ST_FUNC void gen_bounded_ptr_add(void)
 796 {
 797     vpush_global_sym(&func_old_type, TOK___bound_ptr_add);
 798     vrott(3);
 799     gfunc_call(2);
 800     vpushi(0);
 801     /* returned pointer is in REG_IRET */
 802     vtop->r = REG_IRET | VT_BOUNDED;
 803     if (nocode_wanted)
 804         return;
 805     /* relocation offset of the bounding function call point */
 806     vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(Elf32_Rel));
 807 }
 808
 809 /* patch pointer addition in vtop so that pointer dereferencing is
 810    also tested */
 811 ST_FUNC void gen_bounded_ptr_deref(void)
 812 {
 813     addr_t func;
 814     int size, align;
 815     Elf32_Rel *rel;
 816     Sym *sym;
 817
 818     if (nocode_wanted)
 819         return;
 820
 821     size = type_size(&vtop->type, &align);
 822     switch(size) {
 823     case  1: func = TOK___bound_ptr_indir1; break;
 824     case  2: func = TOK___bound_ptr_indir2; break;
 825     case  4: func = TOK___bound_ptr_indir4; break;
 826     case  8: func = TOK___bound_ptr_indir8; break;
 827     case 12: func = TOK___bound_ptr_indir12; break;
 828     case 16: func = TOK___bound_ptr_indir16; break;
 829     default:
 830         /* may happen with struct member access */
 831         return;
 832         //tcc_error("unhandled size when dereferencing bounded pointer");
 833         //func = 0;
 834         //break;
 835     }
 836     sym = external_global_sym(func, &func_old_type);
 837     if (!sym->c)
 838         put_extern_sym(sym, NULL, 0, 0);
 839     /* patch relocation */
 840     /* XXX: find a better solution ? */
 841     rel = (Elf32_Rel *)(cur_text_section->reloc->data + vtop->c.i);
 842     rel->r_info = ELF32_R_INFO(sym->c, ELF32_R_TYPE(rel->r_info));
 843 }
 844
 845 static void gen_bounds_prolog(void)
 846 {
 847     /* leave some room for bound checking code */
 848     func_bound_offset = lbounds_section->data_offset;
 849     func_bound_ind = ind;
 850     func_bound_add_epilog = 0;
 851     o(0xe1a00000);  /* ld r0,lbounds_section->data_offset */
 852     o(0xe1a00000);
 853     o(0xe1a00000);
 854     o(0xe1a00000);  /* call __bound_local_new */
 855 }
 856
 857 static void gen_bounds_epilog(void)
 858 {
 859     addr_t saved_ind;
 860     addr_t *bounds_ptr;
 861     Sym *sym_data;
 862     int offset_modified = func_bound_offset != lbounds_section->data_offset;
 863
 864     if (!offset_modified && !func_bound_add_epilog)
 865         return;
 866
 867     /* add end of table info */
 868     bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
 869     *bounds_ptr = 0;
 870
 871     sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
 872                            func_bound_offset, lbounds_section->data_offset);
 873
 874     /* generate bound local allocation */
 875     if (offset_modified) {
 876         saved_ind = ind;
 877         ind = func_bound_ind;
 878         o(0xe59f0000);  /* ldr r0, [pc] */
 879         o(0xea000000);  /* b $+4 */
 880         greloc(cur_text_section, sym_data, ind, R_ARM_ABS32);
 881         o(0x00000000);  /* lbounds_section->data_offset */
 882         gen_bounds_call(TOK___bound_local_new);
 883         ind = saved_ind;
 884     }
 885
 886     /* generate bound check local freeing */
 887     o(0xe92d0003);  /* push {r0,r1} */
 888     o(0xed2d0b02);  /* vpush {d0} */
 889     o(0xe59f0000);  /* ldr r0, [pc] */
 890     o(0xea000000);  /* b $+4 */
 891     greloc(cur_text_section, sym_data, ind, R_ARM_ABS32);
 892     o(0x00000000);  /* lbounds_section->data_offset */
 893     gen_bounds_call(TOK___bound_local_delete);
 894     o(0xecbd0b02); /* vpop {d0} */
 895     o(0xe8bd0003); /* pop {r0,r1} */
 896 }
 897 #endif
 898
 899 static int unalias_ldbl(int btype)
 900 {
 901 #if LDOUBLE_SIZE == 8
 902     if (btype == VT_LDOUBLE)
 903       btype = VT_DOUBLE;
 904 #endif
 905     return btype;
 906 }
 907
 908 /* Return whether a structure is an homogeneous float aggregate or not.
 909    The answer is true if all the elements of the structure are of the same
 910    primitive float type and there is less than 4 elements.
 911
 912    type: the type corresponding to the structure to be tested */
 913 static int is_hgen_float_aggr(CType *type)
 914 {
 915   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 916     struct Sym *ref;
 917     int btype, nb_fields = 0;
 918
 919     ref = type->ref->next;
 920     if (ref) {
 921       btype = unalias_ldbl(ref->type.t & VT_BTYPE);
 922       if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 923         for(; ref && btype == unalias_ldbl(ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 924         return !ref && nb_fields <= 4;
 925       }
 926     }
 927   }
 928   return 0;
 929 }
 930
 931 struct avail_regs {
 932   signed char avail[3]; /* 3 holes max with only float and double alignments */
 933   int first_hole; /* first available hole */
 934   int last_hole; /* last available hole (none if equal to first_hole) */
 935   int first_free_reg; /* next free register in the sequence, hole excluded */
 936 };
 937
 938 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 939    param) according to the rules described in the procedure call standard for
 940    the ARM architecture (AAPCS). If found, the registers are assigned to this
 941    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 942    and the parameter is a single float.
 943
 944    avregs: opaque structure to keep track of available VFP co-processor regs
 945    align: alignment constraints for the param, as returned by type_size()
 946    size: size of the parameter, as returned by type_size() */
 947 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 948 {
 949   int first_reg = 0;
 950
 951   if (avregs->first_free_reg == -1)
 952     return -1;
 953   if (align >> 3) { /* double alignment */
 954     first_reg = avregs->first_free_reg;
 955     /* alignment constraint not respected so use next reg and record hole */
 956     if (first_reg & 1)
 957       avregs->avail[avregs->last_hole++] = first_reg++;
 958   } else { /* no special alignment (float or array of float) */
 959     /* if single float and a hole is available, assign the param to it */
 960     if (size == 4 && avregs->first_hole != avregs->last_hole)
 961       return avregs->avail[avregs->first_hole++];
 962     else
 963       first_reg = avregs->first_free_reg;
 964   }
 965   if (first_reg + size / 4 <= 16) {
 966     avregs->first_free_reg = first_reg + size / 4;
 967     return first_reg;
 968   }
 969   avregs->first_free_reg = -1;
 970   return -1;
 971 }
 972
 973 /* Returns whether all params need to be passed in core registers or not.
 974    This is the case for function part of the runtime ABI. */
 975 int floats_in_core_regs(SValue *sval)
 976 {
 977   if (!sval->sym)
 978     return 0;
 979
 980   switch (sval->sym->v) {
 981     case TOK___floatundisf:
 982     case TOK___floatundidf:
 983     case TOK___fixunssfdi:
 984     case TOK___fixunsdfdi:
 985 #ifndef TCC_ARM_VFP
 986     case TOK___fixunsxfdi:
 987 #endif
 988     case TOK___floatdisf:
 989     case TOK___floatdidf:
 990     case TOK___fixsfdi:
 991     case TOK___fixdfdi:
 992       return 1;
 993
 994     default:
 995       return 0;
 996   }
 997 }
 998
 999 /* Return the number of registers needed to return the struct, or 0 if
1000    returning via struct pointer. */
1001 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) {
1002 #ifdef TCC_ARM_EABI
1003     int size, align;
1004     size = type_size(vt, &align);
1005     if (float_abi == ARM_HARD_FLOAT && !variadic &&
1006         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
1007         *ret_align = 8;
1008         *regsize = 8;
1009         ret->ref = NULL;
1010         ret->t = VT_DOUBLE;
1011         return (size + 7) >> 3;
1012     } else if (size > 0 && size <= 4) {
1013         *ret_align = 4;
1014         *regsize = 4;
1015         ret->ref = NULL;
1016         ret->t = VT_INT;
1017         return 1;
1018     } else
1019         return 0;
1020 #else
1021     return 0;
1022 #endif
1023 }
1024
1025 /* Parameters are classified according to how they are copied to their final
1026    destination for the function call. Because the copying is performed class
1027    after class according to the order in the union below, it is important that
1028    some constraints about the order of the members of this union are respected:
1029    - CORE_STRUCT_CLASS must come after STACK_CLASS;
1030    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
1031      VFP_STRUCT_CLASS;
1032    - VFP_STRUCT_CLASS must come after VFP_CLASS.
1033    See the comment for the main loop in copy_params() for the reason. */
1034 enum reg_class {
1035         STACK_CLASS = 0,
1036         CORE_STRUCT_CLASS,
1037         VFP_CLASS,
1038         VFP_STRUCT_CLASS,
1039         CORE_CLASS,
1040         NB_CLASSES
1041 };
1042
1043 struct param_plan {
1044     int start; /* first reg or addr used depending on the class */
1045     int end; /* last reg used or next free addr depending on the class */
1046     SValue *sval; /* pointer to SValue on the value stack */
1047     struct param_plan *prev; /*  previous element in this class */
1048 };
1049
1050 struct plan {
1051     struct param_plan *pplans; /* array of all the param plans */
1052     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
1053     int nb_plans;
1054 };
1055
1056 static void add_param_plan(struct plan* plan, int cls, int start, int end, SValue *v)
1057 {
1058     struct param_plan *p = &plan->pplans[plan->nb_plans++];
1059     p->prev = plan->clsplans[cls];
1060     plan->clsplans[cls] = p;
1061     p->start = start, p->end = end, p->sval = v;
1062 }
1063
1064 /* Assign parameters to registers and stack with alignment according to the
1065    rules in the procedure call standard for the ARM architecture (AAPCS).
1066    The overall assignment is recorded in an array of per parameter structures
1067    called parameter plans. The parameter plans are also further organized in a
1068    number of linked lists, one per class of parameter (see the comment for the
1069    definition of union reg_class).
1070
1071    nb_args: number of parameters of the function for which a call is generated
1072    float_abi: float ABI in use for this function call
1073    plan: the structure where the overall assignment is recorded
1074    todo: a bitmap that record which core registers hold a parameter
1075
1076    Returns the amount of stack space needed for parameter passing
1077
1078    Note: this function allocated an array in plan->pplans with tcc_malloc. It
1079    is the responsibility of the caller to free this array once used (ie not
1080    before copy_params). */
1081 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
1082 {
1083   int i, size, align;
1084   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
1085   struct avail_regs avregs = {{0}};
1086
1087   ncrn = nsaa = 0;
1088   *todo = 0;
1089
1090   for(i = nb_args; i-- ;) {
1091     int j, start_vfpreg = 0;
1092     CType type = vtop[-i].type;
1093     type.t &= ~VT_ARRAY;
1094     size = type_size(&type, &align);
1095     size = (size + 3) & ~3;
1096     align = (align + 3) & ~3;
1097     switch(vtop[-i].type.t & VT_BTYPE) {
1098       case VT_STRUCT:
1099       case VT_FLOAT:
1100       case VT_DOUBLE:
1101       case VT_LDOUBLE:
1102       if (float_abi == ARM_HARD_FLOAT) {
1103         int is_hfa = 0; /* Homogeneous float aggregate */
1104
1105         if (is_float(vtop[-i].type.t)
1106             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
1107           int end_vfpreg;
1108
1109           start_vfpreg = assign_vfpreg(&avregs, align, size);
1110           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
1111           if (start_vfpreg >= 0) {
1112             add_param_plan(plan, is_hfa ? VFP_STRUCT_CLASS : VFP_CLASS,
1113                 start_vfpreg, end_vfpreg, &vtop[-i]);
1114             continue;
1115           } else
1116             break;
1117         }
1118       }
1119       ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
1120       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
1121         /* The parameter is allocated both in core register and on stack. As
1122          * such, it can be of either class: it would either be the last of
1123          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
1124         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
1125           *todo|=(1<<j);
1126         add_param_plan(plan, CORE_STRUCT_CLASS, ncrn, j, &vtop[-i]);
1127         ncrn += size/4;
1128         if (ncrn > 4)
1129           nsaa = (ncrn - 4) * 4;
1130       } else {
1131         ncrn = 4;
1132         break;
1133       }
1134       continue;
1135       default:
1136       if (ncrn < 4) {
1137         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1138
1139         if (is_long) {
1140           ncrn = (ncrn + 1) & -2;
1141           if (ncrn == 4)
1142             break;
1143         }
1144         add_param_plan(plan, CORE_CLASS, ncrn, ncrn + is_long, &vtop[-i]);
1145         ncrn += 1 + is_long;
1146         continue;
1147       }
1148     }
1149     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1150     add_param_plan(plan, STACK_CLASS, nsaa, nsaa + size, &vtop[-i]);
1151     nsaa += size; /* size already rounded up before */
1152   }
1153   return nsaa;
1154 }
1155
1156 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1157    function call.
1158
1159    nb_args: number of parameters the function take
1160    plan: the overall assignment plan for parameters
1161    todo: a bitmap indicating what core reg will hold a parameter
1162
1163    Returns the number of SValue added by this function on the value stack */
1164 static int copy_params(int nb_args, struct plan *plan, int todo)
1165 {
1166   int size, align, r, i, nb_extra_sval = 0;
1167   struct param_plan *pplan;
1168   int pass = 0;
1169
1170    /* Several constraints require parameters to be copied in a specific order:
1171       - structures are copied to the stack before being loaded in a reg;
1172       - floats loaded to an odd numbered VFP reg are first copied to the
1173         preceding even numbered VFP reg and then moved to the next VFP reg.
1174
1175       It is thus important that:
1176       - structures assigned to core regs must be copied after parameters
1177         assigned to the stack but before structures assigned to VFP regs because
1178         a structure can lie partly in core registers and partly on the stack;
1179       - parameters assigned to the stack and all structures be copied before
1180         parameters assigned to a core reg since copying a parameter to the stack
1181         require using a core reg;
1182       - parameters assigned to VFP regs be copied before structures assigned to
1183         VFP regs as the copy might use an even numbered VFP reg that already
1184         holds part of a structure. */
1185 again:
1186   for(i = 0; i < NB_CLASSES; i++) {
1187     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1188
1189       if (pass
1190           && (i != CORE_CLASS || pplan->sval->r < VT_CONST))
1191         continue;
1192
1193       vpushv(pplan->sval);
1194       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1195       switch(i) {
1196         case STACK_CLASS:
1197         case CORE_STRUCT_CLASS:
1198         case VFP_STRUCT_CLASS:
1199           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1200             int padding = 0;
1201             size = type_size(&pplan->sval->type, &align);
1202             /* align to stack align size */
1203             size = (size + 3) & ~3;
1204             if (i == STACK_CLASS && pplan->prev)
1205               padding = pplan->start - pplan->prev->end;
1206             size += padding; /* Add padding if any */
1207             /* allocate the necessary size on stack */
1208             gadd_sp(-size);
1209             /* generate structure store */
1210             r = get_reg(RC_INT);
1211             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1212             vset(&vtop->type, r | VT_LVAL, 0);
1213             vswap();
1214             vstore(); /* memcpy to current sp + potential padding */
1215
1216             /* Homogeneous float aggregate are loaded to VFP registers
1217                immediately since there is no way of loading data in multiple
1218                non consecutive VFP registers as what is done for other
1219                structures (see the use of todo). */
1220             if (i == VFP_STRUCT_CLASS) {
1221               int first = pplan->start, nb = pplan->end - first + 1;
1222               /* vpop.32 {pplan->start, ..., pplan->end} */
1223               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1224               /* No need to write the register used to a SValue since VFP regs
1225                  cannot be used for gcall_or_jmp */
1226             }
1227           } else {
1228             if (is_float(pplan->sval->type.t)) {
1229 #ifdef TCC_ARM_VFP
1230               r = vfpr(gv(RC_FLOAT)) << 12;
1231               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1232                 size = 4;
1233               else {
1234                 size = 8;
1235                 r |= 0x101; /* vpush.32 -> vpush.64 */
1236               }
1237               o(0xED2D0A01 + r); /* vpush */
1238 #else
1239               r = fpr(gv(RC_FLOAT)) << 12;
1240               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1241                 size = 4;
1242               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1243                 size = 8;
1244               else
1245                 size = LDOUBLE_SIZE;
1246
1247               if (size == 12)
1248                 r |= 0x400000;
1249               else if(size == 8)
1250                 r|=0x8000;
1251
1252               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1253 #endif
1254             } else {
1255               /* simple type (currently always same size) */
1256               /* XXX: implicit cast ? */
1257               size=4;
1258               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1259                 lexpand();
1260                 size = 8;
1261                 r = gv(RC_INT);
1262                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1263                 vtop--;
1264               }
1265               r = gv(RC_INT);
1266               o(0xE52D0004|(intr(r)<<12)); /* push r */
1267             }
1268             if (i == STACK_CLASS && pplan->prev)
1269               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1270           }
1271           break;
1272
1273         case VFP_CLASS:
1274           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1275           if (pplan->start & 1) { /* Must be in upper part of double register */
1276             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1277             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1278           }
1279           break;
1280
1281         case CORE_CLASS:
1282           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1283             lexpand();
1284             gv(regmask(pplan->end));
1285             pplan->sval->r2 = vtop->r;
1286             vtop--;
1287           }
1288           gv(regmask(pplan->start));
1289           /* Mark register as used so that gcall_or_jmp use another one
1290              (regs >=4 are free as never used to pass parameters) */
1291           pplan->sval->r = vtop->r;
1292           break;
1293       }
1294       vtop--;
1295     }
1296   }
1297
1298   /* second pass to restore registers that were saved on stack by accident.
1299      Maybe redundant after the "lvalue_save" patch in tccgen.c:gv() */
1300   if (++pass < 2)
1301     goto again;
1302
1303   /* Manually free remaining registers since next parameters are loaded
1304    * manually, without the help of gv(int). */
1305   save_regs(nb_args);
1306
1307   if(todo) {
1308     o(0xE8BD0000|todo); /* pop {todo} */
1309     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1310       int r;
1311       pplan->sval->r = pplan->start;
1312       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1313          can occupy more than 2 registers. Thus, we need to push on the value
1314          stack some fake parameter to have on SValue for each registers used
1315          by a structure (r2 is not used). */
1316       for (r = pplan->start + 1; r <= pplan->end; r++) {
1317         if (todo & (1 << r)) {
1318           nb_extra_sval++;
1319           vpushi(0);
1320           vtop->r = r;
1321         }
1322       }
1323     }
1324   }
1325   return nb_extra_sval;
1326 }
1327
1328 /* Generate function call. The function address is pushed first, then
1329    all the parameters in call order. This functions pops all the
1330    parameters and the function address. */
1331 void gfunc_call(int nb_args)
1332 {
1333   int r, args_size;
1334   int def_float_abi = float_abi;
1335   int todo;
1336   struct plan plan;
1337 #ifdef TCC_ARM_EABI
1338   int variadic;
1339 #endif
1340
1341 #ifdef CONFIG_TCC_BCHECK
1342   if (tcc_state->do_bounds_check)
1343     gbound_args(nb_args);
1344 #endif
1345
1346 #ifdef TCC_ARM_EABI
1347   if (float_abi == ARM_HARD_FLOAT) {
1348     variadic = (vtop[-nb_args].type.ref->f.func_type == FUNC_ELLIPSIS);
1349     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1350       float_abi = ARM_SOFTFP_FLOAT;
1351   }
1352 #endif
1353   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1354      VT_JMP anywhere except on the top of the stack because it would complicate
1355      the code generator. */
1356   r = vtop->r & VT_VALMASK;
1357   if (r == VT_CMP || (r & ~1) == VT_JMP)
1358     gv(RC_INT);
1359
1360   memset(&plan, 0, sizeof plan);
1361   if (nb_args)
1362     plan.pplans = tcc_malloc(nb_args * sizeof(*plan.pplans));
1363
1364   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1365
1366 #ifdef TCC_ARM_EABI
1367   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1368     args_size = (args_size + 7) & ~7;
1369     o(0xE24DD004); /* sub sp, sp, #4 */
1370   }
1371 #endif
1372
1373   nb_args += copy_params(nb_args, &plan, todo);
1374   tcc_free(plan.pplans);
1375
1376   /* Move fct SValue on top as required by gcall_or_jmp */
1377   vrotb(nb_args + 1);
1378   gcall_or_jmp(0);
1379   if (args_size)
1380       gadd_sp(args_size); /* pop all parameters passed on the stack */
1381 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1382   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1383     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1384       o(0xEE000A10); /*vmov s0, r0 */
1385     } else {
1386       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1387       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1388     }
1389   }
1390 #endif
1391   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1392   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1393   float_abi = def_float_abi;
1394 }
1395
1396 /* generate function prolog of type 't' */
1397 void gfunc_prolog(Sym *func_sym)
1398 {
1399   CType *func_type = &func_sym->type;
1400   Sym *sym,*sym2;
1401   int n, nf, size, align, rs, struct_ret = 0;
1402   int addr, pn, sn; /* pn=core, sn=stack */
1403   CType ret_type;
1404
1405 #ifdef TCC_ARM_EABI
1406   struct avail_regs avregs = {{0}};
1407 #endif
1408
1409   sym = func_type->ref;
1410
1411   n = nf = 0;
1412   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1413       !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs))
1414   {
1415     n++;
1416     struct_ret = 1;
1417     func_vc = 12; /* Offset from fp of the place to store the result */
1418   }
1419   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1420     size = type_size(&sym2->type, &align);
1421 #ifdef TCC_ARM_EABI
1422     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1423         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1424       int tmpnf = assign_vfpreg(&avregs, align, size);
1425       tmpnf += (size + 3) / 4;
1426       nf = (tmpnf > nf) ? tmpnf : nf;
1427     } else
1428 #endif
1429     if (n < 4)
1430       n += (size + 3) / 4;
1431   }
1432   o(0xE1A0C00D); /* mov ip,sp */
1433   if (func_var)
1434     n=4;
1435   if (n) {
1436     if(n>4)
1437       n=4;
1438 #ifdef TCC_ARM_EABI
1439     n=(n+1)&-2;
1440 #endif
1441     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1442   }
1443   if (nf) {
1444     if (nf>16)
1445       nf=16;
1446     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1447     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1448   }
1449   o(0xE92D5800); /* save fp, ip, lr */
1450   o(0xE1A0B00D); /* mov fp, sp */
1451   func_sub_sp_offset = ind;
1452   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1453
1454 #ifdef TCC_ARM_EABI
1455   if (float_abi == ARM_HARD_FLOAT) {
1456     func_vc += nf * 4;
1457     memset(&avregs, 0, sizeof avregs);
1458   }
1459 #endif
1460   pn = struct_ret, sn = 0;
1461   while ((sym = sym->next)) {
1462     CType *type;
1463     type = &sym->type;
1464     size = type_size(type, &align);
1465     size = (size + 3) >> 2;
1466     align = (align + 3) & ~3;
1467 #ifdef TCC_ARM_EABI
1468     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1469         || is_hgen_float_aggr(&sym->type))) {
1470       int fpn = assign_vfpreg(&avregs, align, size << 2);
1471       if (fpn >= 0)
1472         addr = fpn * 4;
1473       else
1474         goto from_stack;
1475     } else
1476 #endif
1477     if (pn < 4) {
1478 #ifdef TCC_ARM_EABI
1479         pn = (pn + (align-1)/4) & -(align/4);
1480 #endif
1481       addr = (nf + pn) * 4;
1482       pn += size;
1483       if (!sn && pn > 4)
1484         sn = (pn - 4);
1485     } else {
1486 #ifdef TCC_ARM_EABI
1487 from_stack:
1488         sn = (sn + (align-1)/4) & -(align/4);
1489 #endif
1490       addr = (n + nf + sn) * 4;
1491       sn += size;
1492     }
1493     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL,
1494              addr + 12);
1495   }
1496   last_itod_magic=0;
1497   leaffunc = 1;
1498   loc = 0;
1499 #ifdef CONFIG_TCC_BCHECK
1500   if (tcc_state->do_bounds_check)
1501     gen_bounds_prolog();
1502 #endif
1503 }
1504
1505 /* generate function epilog */
1506 void gfunc_epilog(void)
1507 {
1508   uint32_t x;
1509   int diff;
1510
1511 #ifdef CONFIG_TCC_BCHECK
1512   if (tcc_state->do_bounds_check)
1513     gen_bounds_epilog();
1514 #endif
1515   /* Copy float return value to core register if base standard is used and
1516      float computation is made with VFP */
1517 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1518   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1519     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1520       o(0xEE100A10); /* fmrs r0, s0 */
1521     else {
1522       o(0xEE100B10); /* fmrdl r0, d0 */
1523       o(0xEE301B10); /* fmrdh r1, d0 */
1524     }
1525   }
1526 #endif
1527   o(0xE89BA800); /* restore fp, sp, pc */
1528   diff = (-loc + 3) & -4;
1529 #ifdef TCC_ARM_EABI
1530   if(!leaffunc)
1531     diff = ((diff + 11) & -8) - 4;
1532 #endif
1533   if(diff > 0) {
1534     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1535     if(x)
1536       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1537     else {
1538       int addr;
1539       addr=ind;
1540       o(0xE59FC004); /* ldr ip,[pc+4] */
1541       o(0xE04BD00C); /* sub sp,fp,ip  */
1542       o(0xE1A0F00E); /* mov pc,lr */
1543       o(diff);
1544       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1545     }
1546   }
1547 }
1548
1549 ST_FUNC void gen_fill_nops(int bytes)
1550 {
1551     if ((bytes & 3))
1552       tcc_error("alignment of code section not multiple of 4");
1553     while (bytes > 0) {
1554         o(0xE1A00000);
1555         bytes -= 4;
1556     }
1557 }
1558
1559 /* generate a jump to a label */
1560 ST_FUNC int gjmp(int t)
1561 {
1562   int r;
1563   if (nocode_wanted)
1564     return t;
1565   r=ind;
1566   o(0xE0000000|encbranch(r,t,1));
1567   return r;
1568 }
1569
1570 /* generate a jump to a fixed address */
1571 ST_FUNC void gjmp_addr(int a)
1572 {
1573   gjmp(a);
1574 }
1575
1576 ST_FUNC int gjmp_cond(int op, int t)
1577 {
1578   int r;
1579   if (nocode_wanted)
1580     return t;
1581   r=ind;
1582   op=mapcc(op);
1583   op|=encbranch(r,t,1);
1584   o(op);
1585   return r;
1586 }
1587
1588 ST_FUNC int gjmp_append(int n, int t)
1589 {
1590   uint32_t *x;
1591   int p,lp;
1592   if(n) {
1593     p = n;
1594     do {
1595       p = decbranch(lp=p);
1596     } while(p);
1597     x = (uint32_t *)(cur_text_section->data + lp);
1598     *x &= 0xff000000;
1599     *x |= encbranch(lp,t,1);
1600     t = n;
1601   }
1602   return t;
1603 }
1604
1605 /* generate an integer binary operation */
1606 void gen_opi(int op)
1607 {
1608   int c, func = 0;
1609   uint32_t opc = 0, r, fr;
1610   unsigned short retreg = REG_IRET;
1611
1612   c=0;
1613   switch(op) {
1614     case '+':
1615       opc = 0x8;
1616       c=1;
1617       break;
1618     case TOK_ADDC1: /* add with carry generation */
1619       opc = 0x9;
1620       c=1;
1621       break;
1622     case '-':
1623       opc = 0x4;
1624       c=1;
1625       break;
1626     case TOK_SUBC1: /* sub with carry generation */
1627       opc = 0x5;
1628       c=1;
1629       break;
1630     case TOK_ADDC2: /* add with carry use */
1631       opc = 0xA;
1632       c=1;
1633       break;
1634     case TOK_SUBC2: /* sub with carry use */
1635       opc = 0xC;
1636       c=1;
1637       break;
1638     case '&':
1639       opc = 0x0;
1640       c=1;
1641       break;
1642     case '^':
1643       opc = 0x2;
1644       c=1;
1645       break;
1646     case '|':
1647       opc = 0x18;
1648       c=1;
1649       break;
1650     case '*':
1651       gv2(RC_INT, RC_INT);
1652       r = vtop[-1].r;
1653       fr = vtop[0].r;
1654       vtop--;
1655       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1656       return;
1657     case TOK_SHL:
1658       opc = 0;
1659       c=2;
1660       break;
1661     case TOK_SHR:
1662       opc = 1;
1663       c=2;
1664       break;
1665     case TOK_SAR:
1666       opc = 2;
1667       c=2;
1668       break;
1669     case '/':
1670     case TOK_PDIV:
1671       func=TOK___divsi3;
1672       c=3;
1673       break;
1674     case TOK_UDIV:
1675       func=TOK___udivsi3;
1676       c=3;
1677       break;
1678     case '%':
1679 #ifdef TCC_ARM_EABI
1680       func=TOK___aeabi_idivmod;
1681       retreg=REG_IRE2;
1682 #else
1683       func=TOK___modsi3;
1684 #endif
1685       c=3;
1686       break;
1687     case TOK_UMOD:
1688 #ifdef TCC_ARM_EABI
1689       func=TOK___aeabi_uidivmod;
1690       retreg=REG_IRE2;
1691 #else
1692       func=TOK___umodsi3;
1693 #endif
1694       c=3;
1695       break;
1696     case TOK_UMULL:
1697       gv2(RC_INT, RC_INT);
1698       r=intr(vtop[-1].r2=get_reg(RC_INT));
1699       c=vtop[-1].r;
1700       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1701       vtop--;
1702       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1703       return;
1704     default:
1705       opc = 0x15;
1706       c=1;
1707       break;
1708   }
1709   switch(c) {
1710     case 1:
1711       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1712         if(opc == 4 || opc == 5 || opc == 0xc) {
1713           vswap();
1714           opc|=2; // sub -> rsb
1715         }
1716       }
1717       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1718           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1719         gv(RC_INT);
1720       vswap();
1721       c=intr(gv(RC_INT));
1722       vswap();
1723       opc=0xE0000000|(opc<<20);
1724       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1725         uint32_t x;
1726         x=stuff_const(opc|0x2000000|(c<<16),vtop->c.i);
1727         if(x) {
1728           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1729           o(x|(r<<12));
1730           goto done;
1731         }
1732       }
1733       fr=intr(gv(RC_INT));
1734       if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1735         vswap();
1736         c=intr(gv(RC_INT));
1737         vswap();
1738       }
1739       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1740       o(opc|(c<<16)|(r<<12)|fr);
1741 done:
1742       vtop--;
1743       if (op >= TOK_ULT && op <= TOK_GT)
1744         vset_VT_CMP(op);
1745       break;
1746     case 2:
1747       opc=0xE1A00000|(opc<<5);
1748       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1749           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1750         gv(RC_INT);
1751       vswap();
1752       r=intr(gv(RC_INT));
1753       vswap();
1754       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1755         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1756         c = vtop->c.i & 0x1f;
1757         o(opc|r|(c<<7)|(fr<<12));
1758       } else {
1759         fr=intr(gv(RC_INT));
1760         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1761           vswap();
1762           r=intr(gv(RC_INT));
1763           vswap();
1764         }
1765         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1766         o(opc|r|(c<<12)|(fr<<8)|0x10);
1767       }
1768       vtop--;
1769       break;
1770     case 3:
1771       vpush_global_sym(&func_old_type, func);
1772       vrott(3);
1773       gfunc_call(2);
1774       vpushi(0);
1775       vtop->r = retreg;
1776       break;
1777     default:
1778       tcc_error("gen_opi %i unimplemented!",op);
1779   }
1780 }
1781
1782 #ifdef TCC_ARM_VFP
1783 static int is_zero(int i)
1784 {
1785   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1786     return 0;
1787   if (vtop[i].type.t == VT_FLOAT)
1788     return (vtop[i].c.f == 0.f);
1789   else if (vtop[i].type.t == VT_DOUBLE)
1790     return (vtop[i].c.d == 0.0);
1791   return (vtop[i].c.ld == 0.l);
1792 }
1793
1794 /* generate a floating point operation 'v = t1 op t2' instruction. The
1795  *    two operands are guaranteed to have the same floating point type */
1796 void gen_opf(int op)
1797 {
1798   uint32_t x;
1799   int fneg=0,r;
1800   x=0xEE000A00|T2CPR(vtop->type.t);
1801   switch(op) {
1802     case '+':
1803       if(is_zero(-1))
1804         vswap();
1805       if(is_zero(0)) {
1806         vtop--;
1807         return;
1808       }
1809       x|=0x300000;
1810       break;
1811     case '-':
1812       x|=0x300040;
1813       if(is_zero(0)) {
1814         vtop--;
1815         return;
1816       }
1817       if(is_zero(-1)) {
1818         x|=0x810000; /* fsubX -> fnegX */
1819         vswap();
1820         vtop--;
1821         fneg=1;
1822       }
1823       break;
1824     case '*':
1825       x|=0x200000;
1826       break;
1827     case '/':
1828       x|=0x800000;
1829       break;
1830     default:
1831       if(op < TOK_ULT || op > TOK_GT) {
1832         tcc_error("unknown fp op %x!",op);
1833         return;
1834       }
1835       if(is_zero(-1)) {
1836         vswap();
1837         switch(op) {
1838           case TOK_LT: op=TOK_GT; break;
1839           case TOK_GE: op=TOK_ULE; break;
1840           case TOK_LE: op=TOK_GE; break;
1841           case TOK_GT: op=TOK_ULT; break;
1842         }
1843       }
1844       x|=0xB40040; /* fcmpX */
1845       if(op!=TOK_EQ && op!=TOK_NE)
1846         x|=0x80; /* fcmpX -> fcmpeX */
1847       if(is_zero(0)) {
1848         vtop--;
1849         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1850       } else {
1851         gv2(RC_FLOAT,RC_FLOAT);
1852         x|=vfpr(vtop[0].r);
1853         o(x|(vfpr(vtop[-1].r) << 12));
1854         vtop--;
1855       }
1856       o(0xEEF1FA10); /* fmstat */
1857
1858       switch(op) {
1859         case TOK_LE: op=TOK_ULE; break;
1860         case TOK_LT: op=TOK_ULT; break;
1861         case TOK_UGE: op=TOK_GE; break;
1862         case TOK_UGT: op=TOK_GT; break;
1863       }
1864       vset_VT_CMP(op);
1865       return;
1866   }
1867   r=gv(RC_FLOAT);
1868   x|=vfpr(r);
1869   r=regmask(r);
1870   if(!fneg) {
1871     int r2;
1872     vswap();
1873     r2=gv(RC_FLOAT);
1874     x|=vfpr(r2)<<16;
1875     r|=regmask(r2);
1876     if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1877       vswap();
1878       r=gv(RC_FLOAT);
1879       vswap();
1880       x=(x&~0xf)|vfpr(r);
1881     }
1882   }
1883   vtop->r=get_reg_ex(RC_FLOAT,r);
1884   if(!fneg)
1885     vtop--;
1886   o(x|(vfpr(vtop->r)<<12));
1887 }
1888
1889 #else
1890 static uint32_t is_fconst()
1891 {
1892   long double f;
1893   uint32_t r;
1894   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1895     return 0;
1896   if (vtop->type.t == VT_FLOAT)
1897     f = vtop->c.f;
1898   else if (vtop->type.t == VT_DOUBLE)
1899     f = vtop->c.d;
1900   else
1901     f = vtop->c.ld;
1902   if(!ieee_finite(f))
1903     return 0;
1904   r=0x8;
1905   if(f<0.0) {
1906     r=0x18;
1907     f=-f;
1908   }
1909   if(f==0.0)
1910     return r;
1911   if(f==1.0)
1912     return r|1;
1913   if(f==2.0)
1914     return r|2;
1915   if(f==3.0)
1916     return r|3;
1917   if(f==4.0)
1918     return r|4;
1919   if(f==5.0)
1920     return r|5;
1921   if(f==0.5)
1922     return r|6;
1923   if(f==10.0)
1924     return r|7;
1925   return 0;
1926 }
1927
1928 /* generate a floating point operation 'v = t1 op t2' instruction. The
1929    two operands are guaranteed to have the same floating point type */
1930 void gen_opf(int op)
1931 {
1932   uint32_t x, r, r2, c1, c2;
1933   //fputs("gen_opf\n",stderr);
1934   vswap();
1935   c1 = is_fconst();
1936   vswap();
1937   c2 = is_fconst();
1938   x=0xEE000100;
1939 #if LDOUBLE_SIZE == 8
1940   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1941     x|=0x80;
1942 #else
1943   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1944     x|=0x80;
1945   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1946     x|=0x80000;
1947 #endif
1948   switch(op)
1949   {
1950     case '+':
1951       if(!c2) {
1952         vswap();
1953         c2=c1;
1954       }
1955       vswap();
1956       r=fpr(gv(RC_FLOAT));
1957       vswap();
1958       if(c2) {
1959         if(c2>0xf)
1960           x|=0x200000; // suf
1961         r2=c2&0xf;
1962       } else {
1963         r2=fpr(gv(RC_FLOAT));
1964         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1965           vswap();
1966           r=fpr(gv(RC_FLOAT));
1967           vswap();
1968         }
1969       }
1970       break;
1971     case '-':
1972       if(c2) {
1973         if(c2<=0xf)
1974           x|=0x200000; // suf
1975         r2=c2&0xf;
1976         vswap();
1977         r=fpr(gv(RC_FLOAT));
1978         vswap();
1979       } else if(c1 && c1<=0xf) {
1980         x|=0x300000; // rsf
1981         r2=c1;
1982         r=fpr(gv(RC_FLOAT));
1983         vswap();
1984       } else {
1985         x|=0x200000; // suf
1986         vswap();
1987         r=fpr(gv(RC_FLOAT));
1988         vswap();
1989         r2=fpr(gv(RC_FLOAT));
1990         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1991           vswap();
1992           r=fpr(gv(RC_FLOAT));
1993           vswap();
1994         }
1995       }
1996       break;
1997     case '*':
1998       if(!c2 || c2>0xf) {
1999         vswap();
2000         c2=c1;
2001       }
2002       vswap();
2003       r=fpr(gv(RC_FLOAT));
2004       vswap();
2005       if(c2 && c2<=0xf)
2006         r2=c2;
2007       else {
2008         r2=fpr(gv(RC_FLOAT));
2009         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
2010           vswap();
2011           r=fpr(gv(RC_FLOAT));
2012           vswap();
2013         }
2014       }
2015       x|=0x100000; // muf
2016       break;
2017     case '/':
2018       if(c2 && c2<=0xf) {
2019         x|=0x400000; // dvf
2020         r2=c2;
2021         vswap();
2022         r=fpr(gv(RC_FLOAT));
2023         vswap();
2024       } else if(c1 && c1<=0xf) {
2025         x|=0x500000; // rdf
2026         r2=c1;
2027         r=fpr(gv(RC_FLOAT));
2028         vswap();
2029       } else {
2030         x|=0x400000; // dvf
2031         vswap();
2032         r=fpr(gv(RC_FLOAT));
2033         vswap();
2034         r2=fpr(gv(RC_FLOAT));
2035         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
2036           vswap();
2037           r=fpr(gv(RC_FLOAT));
2038           vswap();
2039         }
2040       }
2041       break;
2042     default:
2043       if(op >= TOK_ULT && op <= TOK_GT) {
2044         x|=0xd0f110; // cmfe
2045 /* bug (intention?) in Linux FPU emulator
2046    doesn't set carry if equal */
2047         switch(op) {
2048           case TOK_ULT:
2049           case TOK_UGE:
2050           case TOK_ULE:
2051           case TOK_UGT:
2052             tcc_error("unsigned comparison on floats?");
2053             break;
2054           case TOK_LT:
2055             op=TOK_Nset;
2056             break;
2057           case TOK_LE:
2058             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
2059             break;
2060           case TOK_EQ:
2061           case TOK_NE:
2062             x&=~0x400000; // cmfe -> cmf
2063             break;
2064         }
2065         if(c1 && !c2) {
2066           c2=c1;
2067           vswap();
2068           switch(op) {
2069             case TOK_Nset:
2070               op=TOK_GT;
2071               break;
2072             case TOK_GE:
2073               op=TOK_ULE;
2074               break;
2075             case TOK_ULE:
2076               op=TOK_GE;
2077               break;
2078             case TOK_GT:
2079               op=TOK_Nset;
2080               break;
2081           }
2082         }
2083         vswap();
2084         r=fpr(gv(RC_FLOAT));
2085         vswap();
2086         if(c2) {
2087           if(c2>0xf)
2088             x|=0x200000;
2089           r2=c2&0xf;
2090         } else {
2091           r2=fpr(gv(RC_FLOAT));
2092           if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
2093             vswap();
2094             r=fpr(gv(RC_FLOAT));
2095             vswap();
2096           }
2097         }
2098         --vtop;
2099         vset_VT_CMP(op);
2100         ++vtop;
2101       } else {
2102         tcc_error("unknown fp op %x!",op);
2103         return;
2104       }
2105   }
2106   if(vtop[-1].r == VT_CMP)
2107     c1=15;
2108   else {
2109     c1=vtop->r;
2110     if(r2&0x8)
2111       c1=vtop[-1].r;
2112     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
2113     c1=fpr(vtop[-1].r);
2114   }
2115   vtop--;
2116   o(x|(r<<16)|(c1<<12)|r2);
2117 }
2118 #endif
2119
2120 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2121    and 'long long' cases. */
2122 ST_FUNC void gen_cvt_itof(int t)
2123 {
2124   uint32_t r, r2;
2125   int bt;
2126   bt=vtop->type.t & VT_BTYPE;
2127   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
2128 #ifndef TCC_ARM_VFP
2129     uint32_t dsize = 0;
2130 #endif
2131     r=intr(gv(RC_INT));
2132 #ifdef TCC_ARM_VFP
2133     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
2134     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
2135     r2|=r2<<12;
2136     if(!(vtop->type.t & VT_UNSIGNED))
2137       r2|=0x80;                /* fuitoX -> fsituX */
2138     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
2139 #else
2140     r2=fpr(vtop->r=get_reg(RC_FLOAT));
2141     if((t & VT_BTYPE) != VT_FLOAT)
2142       dsize=0x80;    /* flts -> fltd */
2143     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
2144     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
2145       uint32_t off = 0;
2146       o(0xE3500000|(r<<12));        /* cmp */
2147       r=fpr(get_reg(RC_FLOAT));
2148       if(last_itod_magic) {
2149         off=ind+8-last_itod_magic;
2150         off/=4;
2151         if(off>255)
2152           off=0;
2153       }
2154       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
2155       if(!off) {
2156         o(0xEA000000);              /* b */
2157         last_itod_magic=ind;
2158         o(0x4F800000);              /* 4294967296.0f */
2159       }
2160       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
2161     }
2162 #endif
2163     return;
2164   } else if(bt == VT_LLONG) {
2165     int func;
2166     CType *func_type = 0;
2167     if((t & VT_BTYPE) == VT_FLOAT) {
2168       func_type = &func_float_type;
2169       if(vtop->type.t & VT_UNSIGNED)
2170         func=TOK___floatundisf;
2171       else
2172         func=TOK___floatdisf;
2173 #if LDOUBLE_SIZE != 8
2174     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
2175       func_type = &func_ldouble_type;
2176       if(vtop->type.t & VT_UNSIGNED)
2177         func=TOK___floatundixf;
2178       else
2179         func=TOK___floatdixf;
2180     } else if((t & VT_BTYPE) == VT_DOUBLE) {
2181 #else
2182     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
2183 #endif
2184       func_type = &func_double_type;
2185       if(vtop->type.t & VT_UNSIGNED)
2186         func=TOK___floatundidf;
2187       else
2188         func=TOK___floatdidf;
2189     }
2190     if(func_type) {
2191       vpush_global_sym(func_type, func);
2192       vswap();
2193       gfunc_call(1);
2194       vpushi(0);
2195       vtop->r=TREG_F0;
2196       return;
2197     }
2198   }
2199   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2200 }
2201
2202 /* convert fp to int 't' type */
2203 void gen_cvt_ftoi(int t)
2204 {
2205   uint32_t r, r2;
2206   int u, func = 0;
2207   u=t&VT_UNSIGNED;
2208   t&=VT_BTYPE;
2209   r2=vtop->type.t & VT_BTYPE;
2210   if(t==VT_INT) {
2211 #ifdef TCC_ARM_VFP
2212     r=vfpr(gv(RC_FLOAT));
2213     u=u?0:0x10000;
2214     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2215     r2=intr(vtop->r=get_reg(RC_INT));
2216     o(0xEE100A10|(r<<16)|(r2<<12));
2217     return;
2218 #else
2219     if(u) {
2220       if(r2 == VT_FLOAT)
2221         func=TOK___fixunssfsi;
2222 #if LDOUBLE_SIZE != 8
2223       else if(r2 == VT_LDOUBLE)
2224         func=TOK___fixunsxfsi;
2225       else if(r2 == VT_DOUBLE)
2226 #else
2227       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2228 #endif
2229         func=TOK___fixunsdfsi;
2230     } else {
2231       r=fpr(gv(RC_FLOAT));
2232       r2=intr(vtop->r=get_reg(RC_INT));
2233       o(0xEE100170|(r2<<12)|r);
2234       return;
2235     }
2236 #endif
2237   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2238     if(r2 == VT_FLOAT)
2239       func=TOK___fixsfdi;
2240 #if LDOUBLE_SIZE != 8
2241     else if(r2 == VT_LDOUBLE)
2242       func=TOK___fixxfdi;
2243     else if(r2 == VT_DOUBLE)
2244 #else
2245     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2246 #endif
2247       func=TOK___fixdfdi;
2248   }
2249   if(func) {
2250     vpush_global_sym(&func_old_type, func);
2251     vswap();
2252     gfunc_call(1);
2253     vpushi(0);
2254     if(t == VT_LLONG)
2255       vtop->r2 = REG_IRE2;
2256     vtop->r = REG_IRET;
2257     return;
2258   }
2259   tcc_error("unimplemented gen_cvt_ftoi!");
2260 }
2261
2262 /* convert from one floating point type to another */
2263 void gen_cvt_ftof(int t)
2264 {
2265 #ifdef TCC_ARM_VFP
2266   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2267     uint32_t r = vfpr(gv(RC_FLOAT));
2268     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2269   }
2270 #else
2271   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2272   gv(RC_FLOAT);
2273 #endif
2274 }
2275
2276 /* computed goto support */
2277 void ggoto(void)
2278 {
2279   gcall_or_jmp(1);
2280   vtop--;
2281 }
2282
2283 /* Save the stack pointer onto the stack and return the location of its address */
2284 ST_FUNC void gen_vla_sp_save(int addr) {
2285     SValue v;
2286     v.type.t = VT_PTR;
2287     v.r = VT_LOCAL | VT_LVAL;
2288     v.c.i = addr;
2289     store(TREG_SP, &v);
2290 }
2291
2292 /* Restore the SP from a location on the stack */
2293 ST_FUNC void gen_vla_sp_restore(int addr) {
2294     SValue v;
2295     v.type.t = VT_PTR;
2296     v.r = VT_LOCAL | VT_LVAL;
2297     v.c.i = addr;
2298     load(TREG_SP, &v);
2299 }
2300
2301 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2302 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2303     int r;
2304 #if defined(CONFIG_TCC_BCHECK)
2305     if (tcc_state->do_bounds_check)
2306         vpushv(vtop);
2307 #endif
2308     r = intr(gv(RC_INT));
2309     o(0xE04D0000|(r<<12)|r); /* sub r, sp, r */
2310 #ifdef TCC_ARM_EABI
2311     if (align < 8)
2312         align = 8;
2313 #else
2314     if (align < 4)
2315         align = 4;
2316 #endif
2317     if (align & (align - 1))
2318         tcc_error("alignment is not a power of 2: %i", align);
2319     o(stuff_const(0xE3C0D000|(r<<16), align - 1)); /* bic sp, r, #align-1 */
2320     vpop();
2321 #if defined(CONFIG_TCC_BCHECK)
2322     if (tcc_state->do_bounds_check) {
2323         vpushi(0);
2324         vtop->r = TREG_R0;
2325         o(0xe1a0000d | (vtop->r << 12)); // mov r0,sp
2326         vswap();
2327         vpush_global_sym(&func_old_type, TOK___bound_new_region);
2328         vrott(3);
2329         gfunc_call(2);
2330         func_bound_add_epilog = 1;
2331     }
2332 #endif
2333 }
2334
2335 /* end of ARM code generator */
2336 /*************************************************************/
2337 #endif
2338 /*************************************************************/