arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_CPU_VERSION
  38 # define TCC_CPU_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_IRE2    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82     TREG_SP = 13,
  83     TREG_LR,
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_IRE2 TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 #ifdef TCC_ARM_HARDFLOAT
 133 # define ARM_FLOAT_ABI ARM_HARD_FLOAT
 134 #else
 135 # define ARM_FLOAT_ABI ARM_SOFTFP_FLOAT
 136 #endif
 137
 138 /******************************************************/
 139 #else /* ! TARGET_DEFS_ONLY */
 140 /******************************************************/
 141 #define USING_GLOBALS
 142 #include "tcc.h"
 143
 144 ST_DATA const char * const target_machine_defs =
 145     "__arm__\0"
 146     "__arm\0"
 147     "arm\0"
 148     "__arm_elf__\0"
 149     "__arm_elf\0"
 150     "arm_elf\0"
 151     "__ARM_ARCH_4__\0"
 152     "__ARMEL__\0"
 153     "__APCS_32__\0"
 154 #if defined TCC_ARM_EABI
 155     "__ARM_EABI__\0"
 156 #endif
 157     ;
 158
 159 enum float_abi float_abi;
 160
 161 ST_DATA const int reg_classes[NB_REGS] = {
 162     /* r0 */ RC_INT | RC_R0,
 163     /* r1 */ RC_INT | RC_R1,
 164     /* r2 */ RC_INT | RC_R2,
 165     /* r3 */ RC_INT | RC_R3,
 166     /* r12 */ RC_INT | RC_R12,
 167     /* f0 */ RC_FLOAT | RC_F0,
 168     /* f1 */ RC_FLOAT | RC_F1,
 169     /* f2 */ RC_FLOAT | RC_F2,
 170     /* f3 */ RC_FLOAT | RC_F3,
 171 #ifdef TCC_ARM_VFP
 172  /* d4/s8 */ RC_FLOAT | RC_F4,
 173 /* d5/s10 */ RC_FLOAT | RC_F5,
 174 /* d6/s12 */ RC_FLOAT | RC_F6,
 175 /* d7/s14 */ RC_FLOAT | RC_F7,
 176 #endif
 177 };
 178
 179 static int func_sub_sp_offset, last_itod_magic;
 180 static int leaffunc;
 181
 182 #if defined(CONFIG_TCC_BCHECK)
 183 static addr_t func_bound_offset;
 184 static unsigned long func_bound_ind;
 185 ST_DATA int func_bound_add_epilog;
 186 #endif
 187
 188 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 189 static CType float_type, double_type, func_float_type, func_double_type;
 190 ST_FUNC void arm_init(struct TCCState *s)
 191 {
 192     float_type.t = VT_FLOAT;
 193     double_type.t = VT_DOUBLE;
 194     func_float_type.t = VT_FUNC;
 195     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 196     func_double_type.t = VT_FUNC;
 197     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 198
 199     float_abi = s->float_abi;
 200 #ifndef TCC_ARM_HARDFLOAT
 201 // XXX: Works on OpenBSD
 202 // # warning "soft float ABI currently not supported: default to softfp"
 203 #endif
 204 }
 205 #else
 206 #define func_float_type func_old_type
 207 #define func_double_type func_old_type
 208 #define func_ldouble_type func_old_type
 209 ST_FUNC void arm_init(struct TCCState *s)
 210 {
 211 #if 0
 212 #if !defined (TCC_ARM_VFP)
 213     tcc_warning("Support for FPA is deprecated and will be removed in next"
 214                 " release");
 215 #endif
 216 #if !defined (TCC_ARM_EABI)
 217     tcc_warning("Support for OABI is deprecated and will be removed in next"
 218                 " release");
 219 #endif
 220 #endif
 221 }
 222 #endif
 223
 224 #define CHECK_R(r) ((r) >= TREG_R0 && (r) <= TREG_LR)
 225
 226 static int two2mask(int a,int b) {
 227   if (!CHECK_R(a) || !CHECK_R(b))
 228     tcc_error("compiler error! registers %i,%i is not valid",a,b);
 229   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 230 }
 231
 232 static int regmask(int r) {
 233   if (!CHECK_R(r))
 234     tcc_error("compiler error! register %i is not valid",r);
 235   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 236 }
 237
 238 /******************************************************/
 239
 240 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
 241 const char *default_elfinterp(struct TCCState *s)
 242 {
 243     if (s->float_abi == ARM_HARD_FLOAT)
 244         return "/lib/ld-linux-armhf.so.3";
 245     else
 246         return "/lib/ld-linux.so.3";
 247 }
 248 #endif
 249
 250 void o(uint32_t i)
 251 {
 252   /* this is a good place to start adding big-endian support*/
 253   int ind1;
 254   if (nocode_wanted)
 255     return;
 256   ind1 = ind + 4;
 257   if (!cur_text_section)
 258     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 259          "can't evaluate constant expressions outside of a function.");
 260   if (ind1 > cur_text_section->data_allocated)
 261     section_realloc(cur_text_section, ind1);
 262   cur_text_section->data[ind++] = i&255;
 263   i>>=8;
 264   cur_text_section->data[ind++] = i&255;
 265   i>>=8;
 266   cur_text_section->data[ind++] = i&255;
 267   i>>=8;
 268   cur_text_section->data[ind++] = i;
 269 }
 270
 271 static uint32_t stuff_const(uint32_t op, uint32_t c)
 272 {
 273   int try_neg=0;
 274   uint32_t nc = 0, negop = 0;
 275
 276   switch(op&0x1F00000)
 277   {
 278     case 0x800000: //add
 279     case 0x400000: //sub
 280       try_neg=1;
 281       negop=op^0xC00000;
 282       nc=-c;
 283       break;
 284     case 0x1A00000: //mov
 285     case 0x1E00000: //mvn
 286       try_neg=1;
 287       negop=op^0x400000;
 288       nc=~c;
 289       break;
 290     case 0x200000: //xor
 291       if(c==~0)
 292         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 293       break;
 294     case 0x0: //and
 295       if(c==~0)
 296         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 297     case 0x1C00000: //bic
 298       try_neg=1;
 299       negop=op^0x1C00000;
 300       nc=~c;
 301       break;
 302     case 0x1800000: //orr
 303       if(c==~0)
 304         return (op&0xFFF0FFFF)|0x1E00000;
 305       break;
 306   }
 307   do {
 308     uint32_t m;
 309     int i;
 310     if(c<256) /* catch undefined <<32 */
 311       return op|c;
 312     for(i=2;i<32;i+=2) {
 313       m=(0xff>>i)|(0xff<<(32-i));
 314       if(!(c&~m))
 315         return op|(i<<7)|(c<<i)|(c>>(32-i));
 316     }
 317     op=negop;
 318     c=nc;
 319   } while(try_neg--);
 320   return 0;
 321 }
 322
 323
 324 //only add,sub
 325 void stuff_const_harder(uint32_t op, uint32_t v) {
 326   uint32_t x;
 327   x=stuff_const(op,v);
 328   if(x)
 329     o(x);
 330   else {
 331     uint32_t a[16], nv, no, o2, n2;
 332     int i,j,k;
 333     a[0]=0xff;
 334     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 335     for(i=1;i<16;i++)
 336       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 337     for(i=0;i<12;i++)
 338       for(j=i<4?i+12:15;j>=i+4;j--)
 339         if((v&(a[i]|a[j]))==v) {
 340           o(stuff_const(op,v&a[i]));
 341           o(stuff_const(o2,v&a[j]));
 342           return;
 343         }
 344     no=op^0xC00000;
 345     n2=o2^0xC00000;
 346     nv=-v;
 347     for(i=0;i<12;i++)
 348       for(j=i<4?i+12:15;j>=i+4;j--)
 349         if((nv&(a[i]|a[j]))==nv) {
 350           o(stuff_const(no,nv&a[i]));
 351           o(stuff_const(n2,nv&a[j]));
 352           return;
 353         }
 354     for(i=0;i<8;i++)
 355       for(j=i+4;j<12;j++)
 356         for(k=i<4?i+12:15;k>=j+4;k--)
 357           if((v&(a[i]|a[j]|a[k]))==v) {
 358             o(stuff_const(op,v&a[i]));
 359             o(stuff_const(o2,v&a[j]));
 360             o(stuff_const(o2,v&a[k]));
 361             return;
 362           }
 363     no=op^0xC00000;
 364     nv=-v;
 365     for(i=0;i<8;i++)
 366       for(j=i+4;j<12;j++)
 367         for(k=i<4?i+12:15;k>=j+4;k--)
 368           if((nv&(a[i]|a[j]|a[k]))==nv) {
 369             o(stuff_const(no,nv&a[i]));
 370             o(stuff_const(n2,nv&a[j]));
 371             o(stuff_const(n2,nv&a[k]));
 372             return;
 373           }
 374     o(stuff_const(op,v&a[0]));
 375     o(stuff_const(o2,v&a[4]));
 376     o(stuff_const(o2,v&a[8]));
 377     o(stuff_const(o2,v&a[12]));
 378   }
 379 }
 380
 381 uint32_t encbranch(int pos, int addr, int fail)
 382 {
 383   addr-=pos+8;
 384   addr/=4;
 385   if(addr>=0x1000000 || addr<-0x1000000) {
 386     if(fail)
 387       tcc_error("FIXME: function bigger than 32MB");
 388     return 0;
 389   }
 390   return 0x0A000000|(addr&0xffffff);
 391 }
 392
 393 int decbranch(int pos)
 394 {
 395   int x;
 396   x=*(uint32_t *)(cur_text_section->data + pos);
 397   x&=0x00ffffff;
 398   if(x&0x800000)
 399     x-=0x1000000;
 400   return x*4+pos+8;
 401 }
 402
 403 /* output a symbol and patch all calls to it */
 404 void gsym_addr(int t, int a)
 405 {
 406   uint32_t *x;
 407   int lt;
 408   while(t) {
 409     x=(uint32_t *)(cur_text_section->data + t);
 410     t=decbranch(lt=t);
 411     if(a==lt+4)
 412       *x=0xE1A00000; // nop
 413     else {
 414       *x &= 0xff000000;
 415       *x |= encbranch(lt,a,1);
 416     }
 417   }
 418 }
 419
 420 #ifdef TCC_ARM_VFP
 421 static uint32_t vfpr(int r)
 422 {
 423   if(r<TREG_F0 || r>TREG_F7)
 424     tcc_error("compiler error! register %i is no vfp register",r);
 425   return r - TREG_F0;
 426 }
 427 #else
 428 static uint32_t fpr(int r)
 429 {
 430   if(r<TREG_F0 || r>TREG_F3)
 431     tcc_error("compiler error! register %i is no fpa register",r);
 432   return r - TREG_F0;
 433 }
 434 #endif
 435
 436 static uint32_t intr(int r)
 437 {
 438   if(r == TREG_R12)
 439     return 12;
 440   if(r >= TREG_R0 && r <= TREG_R3)
 441     return r - TREG_R0;
 442   if (!(r >= TREG_SP && r <= TREG_LR))
 443     tcc_error("compiler error! register %i is no int register",r);
 444   return r + (13 - TREG_SP);
 445 }
 446
 447 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 448 {
 449   if(*off>maxoff || *off&((1<<shift)-1)) {
 450     uint32_t x, y;
 451     x=0xE280E000;
 452     if(*sgn)
 453       x=0xE240E000;
 454     x|=(*base)<<16;
 455     *base=14; // lr
 456     y=stuff_const(x,*off&~maxoff);
 457     if(y) {
 458       o(y);
 459       *off&=maxoff;
 460       return;
 461     }
 462     y=stuff_const(x,(*off+maxoff)&~maxoff);
 463     if(y) {
 464       o(y);
 465       *sgn=!*sgn;
 466       *off=((*off+maxoff)&~maxoff)-*off;
 467       return;
 468     }
 469     stuff_const_harder(x,*off&~maxoff);
 470     *off&=maxoff;
 471   }
 472 }
 473
 474 static uint32_t mapcc(int cc)
 475 {
 476   switch(cc)
 477   {
 478     case TOK_ULT:
 479       return 0x30000000; /* CC/LO */
 480     case TOK_UGE:
 481       return 0x20000000; /* CS/HS */
 482     case TOK_EQ:
 483       return 0x00000000; /* EQ */
 484     case TOK_NE:
 485       return 0x10000000; /* NE */
 486     case TOK_ULE:
 487       return 0x90000000; /* LS */
 488     case TOK_UGT:
 489       return 0x80000000; /* HI */
 490     case TOK_Nset:
 491       return 0x40000000; /* MI */
 492     case TOK_Nclear:
 493       return 0x50000000; /* PL */
 494     case TOK_LT:
 495       return 0xB0000000; /* LT */
 496     case TOK_GE:
 497       return 0xA0000000; /* GE */
 498     case TOK_LE:
 499       return 0xD0000000; /* LE */
 500     case TOK_GT:
 501       return 0xC0000000; /* GT */
 502   }
 503   tcc_error("unexpected condition code");
 504   return 0xE0000000; /* AL */
 505 }
 506
 507 static int negcc(int cc)
 508 {
 509   switch(cc)
 510   {
 511     case TOK_ULT:
 512       return TOK_UGE;
 513     case TOK_UGE:
 514       return TOK_ULT;
 515     case TOK_EQ:
 516       return TOK_NE;
 517     case TOK_NE:
 518       return TOK_EQ;
 519     case TOK_ULE:
 520       return TOK_UGT;
 521     case TOK_UGT:
 522       return TOK_ULE;
 523     case TOK_Nset:
 524       return TOK_Nclear;
 525     case TOK_Nclear:
 526       return TOK_Nset;
 527     case TOK_LT:
 528       return TOK_GE;
 529     case TOK_GE:
 530       return TOK_LT;
 531     case TOK_LE:
 532       return TOK_GT;
 533     case TOK_GT:
 534       return TOK_LE;
 535   }
 536   tcc_error("unexpected condition code");
 537   return TOK_NE;
 538 }
 539
 540 /* Load value into register r.
 541    Use relative/got addressing to avoid setting DT_TEXTREL */
 542 static void load_value(SValue *sv, int r)
 543 {
 544     o(0xE59F0000|(intr(r)<<12)); /* ldr r, [pc] */
 545     o(0xEA000000); /* b $+4 */
 546 #ifndef CONFIG_TCC_PIC
 547     if(sv->r & VT_SYM)
 548         greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 549     o(sv->c.i);
 550 #else
 551     if(sv->r & VT_SYM) {
 552         if (sv->sym->type.t & VT_STATIC) {
 553             greloc(cur_text_section, sv->sym, ind, R_ARM_REL32);
 554             o(sv->c.i - 12);
 555             o(0xe080000f | (intr(r)<<12) | (intr(r)<<16));  // add rx,rx,pc
 556         }
 557         else {
 558             greloc(cur_text_section, sv->sym, ind, R_ARM_GOT_PREL);
 559             o(-12);
 560             o(0xe080000f | (intr(r)<<12) | (intr(r)<<16));  // add rx,rx,pc
 561             o(0xe5900000 | (intr(r)<<12) | (intr(r)<<16));  // ldr rx,[rx]
 562             if (sv->c.i)
 563               stuff_const_harder(0xe2800000 | (intr(r)<<12) | (intr(r)<<16),
 564                                  sv->c.i);
 565         }
 566     }
 567     else
 568         o(sv->c.i);
 569 #endif
 570 }
 571
 572 /* load 'r' from value 'sv' */
 573 void load(int r, SValue *sv)
 574 {
 575   int v, ft, fc, fr, sign;
 576   uint32_t op;
 577   SValue v1;
 578
 579   fr = sv->r;
 580   ft = sv->type.t;
 581   fc = sv->c.i;
 582
 583   if(fc>=0)
 584     sign=0;
 585   else {
 586     sign=1;
 587     fc=-fc;
 588   }
 589
 590   v = fr & VT_VALMASK;
 591   if (fr & VT_LVAL) {
 592     uint32_t base = 0xB; // fp
 593     if(v == VT_LLOCAL) {
 594       v1.type.t = VT_PTR;
 595       v1.r = VT_LOCAL | VT_LVAL;
 596       v1.c.i = sv->c.i;
 597       load(TREG_LR, &v1);
 598       base = 14; /* lr */
 599       fc=sign=0;
 600       v=VT_LOCAL;
 601     } else if(v == VT_CONST) {
 602       v1.type.t = VT_PTR;
 603       v1.r = fr&~VT_LVAL;
 604       v1.c.i = sv->c.i;
 605       v1.sym=sv->sym;
 606       load(TREG_LR, &v1);
 607       base = 14; /* lr */
 608       fc=sign=0;
 609       v=VT_LOCAL;
 610     } else if(v < VT_CONST) {
 611       base=intr(v);
 612       fc=sign=0;
 613       v=VT_LOCAL;
 614     }
 615     if(v == VT_LOCAL) {
 616       if(is_float(ft)) {
 617         calcaddr(&base,&fc,&sign,1020,2);
 618 #ifdef TCC_ARM_VFP
 619         op=0xED100A00; /* flds */
 620         if(!sign)
 621           op|=0x800000;
 622         if ((ft & VT_BTYPE) != VT_FLOAT)
 623           op|=0x100;   /* flds -> fldd */
 624         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 625 #else
 626         op=0xED100100;
 627         if(!sign)
 628           op|=0x800000;
 629 #if LDOUBLE_SIZE == 8
 630         if ((ft & VT_BTYPE) != VT_FLOAT)
 631           op|=0x8000;
 632 #else
 633         if ((ft & VT_BTYPE) == VT_DOUBLE)
 634           op|=0x8000;
 635         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 636           op|=0x400000;
 637 #endif
 638         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 639 #endif
 640       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 641                 || (ft & VT_BTYPE) == VT_SHORT) {
 642         calcaddr(&base,&fc,&sign,255,0);
 643         op=0xE1500090;
 644         if ((ft & VT_BTYPE) == VT_SHORT)
 645           op|=0x20;
 646         if ((ft & VT_UNSIGNED) == 0)
 647           op|=0x40;
 648         if(!sign)
 649           op|=0x800000;
 650         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 651       } else {
 652         calcaddr(&base,&fc,&sign,4095,0);
 653         op=0xE5100000;
 654         if(!sign)
 655           op|=0x800000;
 656         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 657           op|=0x400000;
 658         o(op|(intr(r)<<12)|fc|(base<<16));
 659       }
 660       return;
 661     }
 662   } else {
 663     if (v == VT_CONST) {
 664       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.i);
 665       if (fr & VT_SYM || !op)
 666         load_value(sv, r);
 667       else
 668         o(op);
 669       return;
 670     } else if (v == VT_LOCAL) {
 671       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.i);
 672       if (fr & VT_SYM || !op) {
 673         load_value(sv, r);
 674         o(0xE08B0000|(intr(r)<<12)|intr(r));
 675       } else
 676         o(op);
 677       return;
 678     } else if(v == VT_CMP) {
 679       o(mapcc(sv->c.i)|0x3A00001|(intr(r)<<12));
 680       o(mapcc(negcc(sv->c.i))|0x3A00000|(intr(r)<<12));
 681       return;
 682     } else if (v == VT_JMP || v == VT_JMPI) {
 683       int t;
 684       t = v & 1;
 685       o(0xE3A00000|(intr(r)<<12)|t);
 686       o(0xEA000000);
 687       gsym(sv->c.i);
 688       o(0xE3A00000|(intr(r)<<12)|(t^1));
 689       return;
 690     } else if (v < VT_CONST) {
 691       if(is_float(ft))
 692 #ifdef TCC_ARM_VFP
 693         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 694 #else
 695         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 696 #endif
 697       else
 698         o(0xE1A00000|(intr(r)<<12)|intr(v));
 699       return;
 700     }
 701   }
 702   tcc_error("load unimplemented!");
 703 }
 704
 705 /* store register 'r' in lvalue 'v' */
 706 void store(int r, SValue *sv)
 707 {
 708   SValue v1;
 709   int v, ft, fc, fr, sign;
 710   uint32_t op;
 711
 712   fr = sv->r;
 713   ft = sv->type.t;
 714   fc = sv->c.i;
 715
 716   if(fc>=0)
 717     sign=0;
 718   else {
 719     sign=1;
 720     fc=-fc;
 721   }
 722
 723   v = fr & VT_VALMASK;
 724   if (fr & VT_LVAL || fr == VT_LOCAL) {
 725     uint32_t base = 0xb; /* fp */
 726     if(v < VT_CONST) {
 727       base=intr(v);
 728       v=VT_LOCAL;
 729       fc=sign=0;
 730     } else if(v == VT_CONST) {
 731       v1.type.t = ft;
 732       v1.r = fr&~VT_LVAL;
 733       v1.c.i = sv->c.i;
 734       v1.sym=sv->sym;
 735       load(TREG_LR, &v1);
 736       base = 14; /* lr */
 737       fc=sign=0;
 738       v=VT_LOCAL;
 739     }
 740     if(v == VT_LOCAL) {
 741        if(is_float(ft)) {
 742         calcaddr(&base,&fc,&sign,1020,2);
 743 #ifdef TCC_ARM_VFP
 744         op=0xED000A00; /* fsts */
 745         if(!sign)
 746           op|=0x800000;
 747         if ((ft & VT_BTYPE) != VT_FLOAT)
 748           op|=0x100;   /* fsts -> fstd */
 749         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 750 #else
 751         op=0xED000100;
 752         if(!sign)
 753           op|=0x800000;
 754 #if LDOUBLE_SIZE == 8
 755         if ((ft & VT_BTYPE) != VT_FLOAT)
 756           op|=0x8000;
 757 #else
 758         if ((ft & VT_BTYPE) == VT_DOUBLE)
 759           op|=0x8000;
 760         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 761           op|=0x400000;
 762 #endif
 763         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 764 #endif
 765         return;
 766       } else if((ft & VT_BTYPE) == VT_SHORT) {
 767         calcaddr(&base,&fc,&sign,255,0);
 768         op=0xE14000B0;
 769         if(!sign)
 770           op|=0x800000;
 771         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 772       } else {
 773         calcaddr(&base,&fc,&sign,4095,0);
 774         op=0xE5000000;
 775         if(!sign)
 776           op|=0x800000;
 777         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 778           op|=0x400000;
 779         o(op|(intr(r)<<12)|fc|(base<<16));
 780       }
 781       return;
 782     }
 783   }
 784   tcc_error("store unimplemented");
 785 }
 786
 787 static void gadd_sp(int val)
 788 {
 789   stuff_const_harder(0xE28DD000,val);
 790 }
 791
 792 /* 'is_jmp' is '1' if it is a jump */
 793 static void gcall_or_jmp(int is_jmp)
 794 {
 795   int r;
 796   uint32_t x;
 797   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 798     /* constant case */
 799     if(vtop->r & VT_SYM){
 800         x=encbranch(ind,ind+vtop->c.i,0);
 801         if(x) {
 802             /* relocation case */
 803             greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 804             o(x|(is_jmp?0xE0000000:0xE1000000));
 805         } else {
 806             r = TREG_LR;
 807             load_value(vtop, r);
 808             if(is_jmp)
 809                 o(0xE1A0F000 | intr(r)); // mov pc, r
 810             else
 811                 o(0xe12fff30 | intr(r)); // blx r
 812         }
 813      }else{
 814         if(!is_jmp)
 815             o(0xE28FE004); // add lr,pc,#4
 816         o(0xE51FF004);   // ldr pc,[pc,#-4]
 817         o(vtop->c.i);
 818      }
 819   } else {
 820     /* otherwise, indirect call */
 821 #ifdef CONFIG_TCC_BCHECK
 822     vtop->r &= ~VT_MUSTBOUND;
 823 #endif
 824     r = gv(RC_INT);
 825     if(!is_jmp)
 826       o(0xE1A0E00F);       // mov lr,pc
 827     o(0xE1A0F000|intr(r)); // mov pc,r
 828   }
 829 }
 830
 831 #if defined(CONFIG_TCC_BCHECK)
 832
 833 static void gen_bounds_call(int v)
 834 {
 835     Sym *sym = external_helper_sym(v);
 836
 837     greloc(cur_text_section, sym, ind, R_ARM_PC24);
 838     o(0xebfffffe);
 839 }
 840
 841 static void gen_bounds_prolog(void)
 842 {
 843     /* leave some room for bound checking code */
 844     func_bound_offset = lbounds_section->data_offset;
 845     func_bound_ind = ind;
 846     func_bound_add_epilog = 0;
 847     o(0xe1a00000);  /* ld r0,lbounds_section->data_offset */
 848     o(0xe1a00000);
 849     o(0xe1a00000);
 850     o(0xe1a00000);
 851     o(0xe1a00000);  /* call __bound_local_new */
 852 }
 853
 854 static void gen_bounds_epilog(void)
 855 {
 856     addr_t saved_ind;
 857     addr_t *bounds_ptr;
 858     Sym *sym_data;
 859     int offset_modified = func_bound_offset != lbounds_section->data_offset;
 860
 861     if (!offset_modified && !func_bound_add_epilog)
 862         return;
 863
 864     /* add end of table info */
 865     bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
 866     *bounds_ptr = 0;
 867
 868     sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
 869                            func_bound_offset, PTR_SIZE);
 870
 871     /* generate bound local allocation */
 872     if (offset_modified) {
 873         saved_ind = ind;
 874         ind = func_bound_ind;
 875         o(0xe59f0000);  /* ldr r0, [pc] */
 876         o(0xea000000);  /* b $+4 */
 877         greloc(cur_text_section, sym_data, ind, R_ARM_REL32);
 878         o(-12);  /* lbounds_section->data_offset */
 879         o(0xe080000f);  /* add r0,r0,pc */
 880         gen_bounds_call(TOK___bound_local_new);
 881         ind = saved_ind;
 882     }
 883
 884     /* generate bound check local freeing */
 885     o(0xe92d0003);  /* push {r0,r1} */
 886     o(0xed2d0b04);  /* vpush {d0,d1} */
 887     o(0xe59f0000);  /* ldr r0, [pc] */
 888     o(0xea000000);  /* b $+4 */
 889     greloc(cur_text_section, sym_data, ind, R_ARM_REL32);
 890     o(-12);  /* lbounds_section->data_offset */
 891     o(0xe080000f);  /* add r0,r0,pc */
 892     gen_bounds_call(TOK___bound_local_delete);
 893     o(0xecbd0b04); /* vpop {d0,d1} */
 894     o(0xe8bd0003); /* pop {r0,r1} */
 895 }
 896 #endif
 897
 898 static int unalias_ldbl(int btype)
 899 {
 900 #if LDOUBLE_SIZE == 8
 901     if (btype == VT_LDOUBLE)
 902       btype = VT_DOUBLE;
 903 #endif
 904     return btype;
 905 }
 906
 907 /* Return whether a structure is an homogeneous float aggregate or not.
 908    The answer is true if all the elements of the structure are of the same
 909    primitive float type and there is less than 4 elements.
 910
 911    type: the type corresponding to the structure to be tested */
 912 static int is_hgen_float_aggr(CType *type)
 913 {
 914   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 915     struct Sym *ref;
 916     int btype, nb_fields = 0;
 917
 918     ref = type->ref->next;
 919     if (ref) {
 920       btype = unalias_ldbl(ref->type.t & VT_BTYPE);
 921       if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 922         for(; ref && btype == unalias_ldbl(ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 923         return !ref && nb_fields <= 4;
 924       }
 925     }
 926   }
 927   return 0;
 928 }
 929
 930 struct avail_regs {
 931   signed char avail[3]; /* 3 holes max with only float and double alignments */
 932   int first_hole; /* first available hole */
 933   int last_hole; /* last available hole (none if equal to first_hole) */
 934   int first_free_reg; /* next free register in the sequence, hole excluded */
 935 };
 936
 937 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 938    param) according to the rules described in the procedure call standard for
 939    the ARM architecture (AAPCS). If found, the registers are assigned to this
 940    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 941    and the parameter is a single float.
 942
 943    avregs: opaque structure to keep track of available VFP co-processor regs
 944    align: alignment constraints for the param, as returned by type_size()
 945    size: size of the parameter, as returned by type_size() */
 946 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 947 {
 948   int first_reg = 0;
 949
 950   if (avregs->first_free_reg == -1)
 951     return -1;
 952   if (align >> 3) { /* double alignment */
 953     first_reg = avregs->first_free_reg;
 954     /* alignment constraint not respected so use next reg and record hole */
 955     if (first_reg & 1)
 956       avregs->avail[avregs->last_hole++] = first_reg++;
 957   } else { /* no special alignment (float or array of float) */
 958     /* if single float and a hole is available, assign the param to it */
 959     if (size == 4 && avregs->first_hole != avregs->last_hole)
 960       return avregs->avail[avregs->first_hole++];
 961     else
 962       first_reg = avregs->first_free_reg;
 963   }
 964   if (first_reg + size / 4 <= 16) {
 965     avregs->first_free_reg = first_reg + size / 4;
 966     return first_reg;
 967   }
 968   avregs->first_free_reg = -1;
 969   return -1;
 970 }
 971
 972 /* Returns whether all params need to be passed in core registers or not.
 973    This is the case for function part of the runtime ABI. */
 974 int floats_in_core_regs(SValue *sval)
 975 {
 976   if (!sval->sym)
 977     return 0;
 978
 979   switch (sval->sym->v) {
 980     case TOK___floatundisf:
 981     case TOK___floatundidf:
 982     case TOK___fixunssfdi:
 983     case TOK___fixunsdfdi:
 984 #ifndef TCC_ARM_VFP
 985     case TOK___fixunsxfdi:
 986 #endif
 987     case TOK___floatdisf:
 988     case TOK___floatdidf:
 989     case TOK___fixsfdi:
 990     case TOK___fixdfdi:
 991       return 1;
 992
 993     default:
 994       return 0;
 995   }
 996 }
 997
 998 /* Return the number of registers needed to return the struct, or 0 if
 999    returning via struct pointer. */
1000 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) {
1001 #ifdef TCC_ARM_EABI
1002     int size, align;
1003     size = type_size(vt, &align);
1004     if (float_abi == ARM_HARD_FLOAT && !variadic &&
1005         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
1006         *ret_align = 8;
1007         *regsize = 8;
1008         ret->ref = NULL;
1009         ret->t = VT_DOUBLE;
1010         return (size + 7) >> 3;
1011     } else if (size > 0 && size <= 4) {
1012         *ret_align = 4;
1013         *regsize = 4;
1014         ret->ref = NULL;
1015         ret->t = VT_INT;
1016         return 1;
1017     } else
1018         return 0;
1019 #else
1020     return 0;
1021 #endif
1022 }
1023
1024 /* Parameters are classified according to how they are copied to their final
1025    destination for the function call. Because the copying is performed class
1026    after class according to the order in the union below, it is important that
1027    some constraints about the order of the members of this union are respected:
1028    - CORE_STRUCT_CLASS must come after STACK_CLASS;
1029    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
1030      VFP_STRUCT_CLASS;
1031    - VFP_STRUCT_CLASS must come after VFP_CLASS.
1032    See the comment for the main loop in copy_params() for the reason. */
1033 enum reg_class {
1034         STACK_CLASS = 0,
1035         CORE_STRUCT_CLASS,
1036         VFP_CLASS,
1037         VFP_STRUCT_CLASS,
1038         CORE_CLASS,
1039         NB_CLASSES
1040 };
1041
1042 struct param_plan {
1043     int start; /* first reg or addr used depending on the class */
1044     int end; /* last reg used or next free addr depending on the class */
1045     SValue *sval; /* pointer to SValue on the value stack */
1046     struct param_plan *prev; /*  previous element in this class */
1047 };
1048
1049 struct plan {
1050     struct param_plan *pplans; /* array of all the param plans */
1051     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
1052     int nb_plans;
1053 };
1054
1055 static void add_param_plan(struct plan* plan, int cls, int start, int end, SValue *v)
1056 {
1057     struct param_plan *p = &plan->pplans[plan->nb_plans++];
1058     p->prev = plan->clsplans[cls];
1059     plan->clsplans[cls] = p;
1060     p->start = start, p->end = end, p->sval = v;
1061 }
1062
1063 /* Assign parameters to registers and stack with alignment according to the
1064    rules in the procedure call standard for the ARM architecture (AAPCS).
1065    The overall assignment is recorded in an array of per parameter structures
1066    called parameter plans. The parameter plans are also further organized in a
1067    number of linked lists, one per class of parameter (see the comment for the
1068    definition of union reg_class).
1069
1070    nb_args: number of parameters of the function for which a call is generated
1071    float_abi: float ABI in use for this function call
1072    plan: the structure where the overall assignment is recorded
1073    todo: a bitmap that record which core registers hold a parameter
1074
1075    Returns the amount of stack space needed for parameter passing
1076
1077    Note: this function allocated an array in plan->pplans with tcc_malloc. It
1078    is the responsibility of the caller to free this array once used (ie not
1079    before copy_params). */
1080 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
1081 {
1082   int i, size, align;
1083   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
1084   struct avail_regs avregs = {{0}};
1085
1086   ncrn = nsaa = 0;
1087   *todo = 0;
1088
1089   for(i = nb_args; i-- ;) {
1090     int j, start_vfpreg = 0;
1091     CType type = vtop[-i].type;
1092     type.t &= ~VT_ARRAY;
1093     size = type_size(&type, &align);
1094     size = (size + 3) & ~3;
1095     align = (align + 3) & ~3;
1096     switch(vtop[-i].type.t & VT_BTYPE) {
1097       case VT_STRUCT:
1098       case VT_FLOAT:
1099       case VT_DOUBLE:
1100       case VT_LDOUBLE:
1101       if (float_abi == ARM_HARD_FLOAT) {
1102         int is_hfa = 0; /* Homogeneous float aggregate */
1103
1104         if (is_float(vtop[-i].type.t)
1105             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
1106           int end_vfpreg;
1107
1108           start_vfpreg = assign_vfpreg(&avregs, align, size);
1109           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
1110           if (start_vfpreg >= 0) {
1111             add_param_plan(plan, is_hfa ? VFP_STRUCT_CLASS : VFP_CLASS,
1112                 start_vfpreg, end_vfpreg, &vtop[-i]);
1113             continue;
1114           } else
1115             break;
1116         }
1117       }
1118       ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
1119       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
1120         /* The parameter is allocated both in core register and on stack. As
1121          * such, it can be of either class: it would either be the last of
1122          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
1123         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
1124           *todo|=(1<<j);
1125         add_param_plan(plan, CORE_STRUCT_CLASS, ncrn, j, &vtop[-i]);
1126         ncrn += size/4;
1127         if (ncrn > 4)
1128           nsaa = (ncrn - 4) * 4;
1129       } else {
1130         ncrn = 4;
1131         break;
1132       }
1133       continue;
1134       default:
1135       if (ncrn < 4) {
1136         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1137
1138         if (is_long) {
1139           ncrn = (ncrn + 1) & -2;
1140           if (ncrn == 4)
1141             break;
1142         }
1143         add_param_plan(plan, CORE_CLASS, ncrn, ncrn + is_long, &vtop[-i]);
1144         ncrn += 1 + is_long;
1145         continue;
1146       }
1147     }
1148     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1149     add_param_plan(plan, STACK_CLASS, nsaa, nsaa + size, &vtop[-i]);
1150     nsaa += size; /* size already rounded up before */
1151   }
1152   return nsaa;
1153 }
1154
1155 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1156    function call.
1157
1158    nb_args: number of parameters the function take
1159    plan: the overall assignment plan for parameters
1160    todo: a bitmap indicating what core reg will hold a parameter
1161
1162    Returns the number of SValue added by this function on the value stack */
1163 static int copy_params(int nb_args, struct plan *plan, int todo)
1164 {
1165   int size, align, r, i, nb_extra_sval = 0;
1166   struct param_plan *pplan;
1167   int pass = 0;
1168
1169    /* Several constraints require parameters to be copied in a specific order:
1170       - structures are copied to the stack before being loaded in a reg;
1171       - floats loaded to an odd numbered VFP reg are first copied to the
1172         preceding even numbered VFP reg and then moved to the next VFP reg.
1173
1174       It is thus important that:
1175       - structures assigned to core regs must be copied after parameters
1176         assigned to the stack but before structures assigned to VFP regs because
1177         a structure can lie partly in core registers and partly on the stack;
1178       - parameters assigned to the stack and all structures be copied before
1179         parameters assigned to a core reg since copying a parameter to the stack
1180         require using a core reg;
1181       - parameters assigned to VFP regs be copied before structures assigned to
1182         VFP regs as the copy might use an even numbered VFP reg that already
1183         holds part of a structure. */
1184 again:
1185   for(i = 0; i < NB_CLASSES; i++) {
1186     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1187
1188       if (pass
1189           && (i != CORE_CLASS || pplan->sval->r < VT_CONST))
1190         continue;
1191
1192       vpushv(pplan->sval);
1193       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1194       switch(i) {
1195         case STACK_CLASS:
1196         case CORE_STRUCT_CLASS:
1197         case VFP_STRUCT_CLASS:
1198           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1199             int padding = 0;
1200             size = type_size(&pplan->sval->type, &align);
1201             /* align to stack align size */
1202             size = (size + 3) & ~3;
1203             if (i == STACK_CLASS && pplan->prev)
1204               padding = pplan->start - pplan->prev->end;
1205             size += padding; /* Add padding if any */
1206             /* allocate the necessary size on stack */
1207             gadd_sp(-size);
1208             /* generate structure store */
1209             r = get_reg(RC_INT);
1210             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1211             vset(&vtop->type, r | VT_LVAL, 0);
1212             vswap();
1213             /* XXX: optimize. Save all register because memcpy can use them */
1214             o(0xED2D0A00|(0&1)<<22|(0>>1)<<12|16); /* vpush {s0-s15} */
1215             vstore(); /* memcpy to current sp + potential padding */
1216             o(0xECBD0A00|(0&1)<<22|(0>>1)<<12|16); /* vpop {s0-s15} */
1217
1218             /* Homogeneous float aggregate are loaded to VFP registers
1219                immediately since there is no way of loading data in multiple
1220                non consecutive VFP registers as what is done for other
1221                structures (see the use of todo). */
1222             if (i == VFP_STRUCT_CLASS) {
1223               int first = pplan->start, nb = pplan->end - first + 1;
1224               /* vpop.32 {pplan->start, ..., pplan->end} */
1225               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1226               /* No need to write the register used to a SValue since VFP regs
1227                  cannot be used for gcall_or_jmp */
1228             }
1229           } else {
1230             if (is_float(pplan->sval->type.t)) {
1231 #ifdef TCC_ARM_VFP
1232               r = vfpr(gv(RC_FLOAT)) << 12;
1233               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1234                 size = 4;
1235               else {
1236                 size = 8;
1237                 r |= 0x101; /* vpush.32 -> vpush.64 */
1238               }
1239               o(0xED2D0A01 + r); /* vpush */
1240 #else
1241               r = fpr(gv(RC_FLOAT)) << 12;
1242               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1243                 size = 4;
1244               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1245                 size = 8;
1246               else
1247                 size = LDOUBLE_SIZE;
1248
1249               if (size == 12)
1250                 r |= 0x400000;
1251               else if(size == 8)
1252                 r|=0x8000;
1253
1254               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1255 #endif
1256             } else {
1257               /* simple type (currently always same size) */
1258               /* XXX: implicit cast ? */
1259               size=4;
1260               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1261                 lexpand();
1262                 size = 8;
1263                 r = gv(RC_INT);
1264                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1265                 vtop--;
1266               }
1267               r = gv(RC_INT);
1268               o(0xE52D0004|(intr(r)<<12)); /* push r */
1269             }
1270             if (i == STACK_CLASS && pplan->prev)
1271               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1272           }
1273           break;
1274
1275         case VFP_CLASS:
1276           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1277           if (pplan->start & 1) { /* Must be in upper part of double register */
1278             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1279             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1280           }
1281           break;
1282
1283         case CORE_CLASS:
1284           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1285             lexpand();
1286             gv(regmask(pplan->end));
1287             pplan->sval->r2 = vtop->r;
1288             vtop--;
1289           }
1290           gv(regmask(pplan->start));
1291           /* Mark register as used so that gcall_or_jmp use another one
1292              (regs >=4 are free as never used to pass parameters) */
1293           pplan->sval->r = vtop->r;
1294           break;
1295       }
1296       vtop--;
1297     }
1298   }
1299
1300   /* second pass to restore registers that were saved on stack by accident.
1301      Maybe redundant after the "lvalue_save" patch in tccgen.c:gv() */
1302   if (++pass < 2)
1303     goto again;
1304
1305   /* Manually free remaining registers since next parameters are loaded
1306    * manually, without the help of gv(int). */
1307   save_regs(nb_args);
1308
1309   if(todo) {
1310     o(0xE8BD0000|todo); /* pop {todo} */
1311     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1312       int r;
1313       pplan->sval->r = pplan->start;
1314       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1315          can occupy more than 2 registers. Thus, we need to push on the value
1316          stack some fake parameter to have on SValue for each registers used
1317          by a structure (r2 is not used). */
1318       for (r = pplan->start + 1; r <= pplan->end; r++) {
1319         if (todo & (1 << r)) {
1320           nb_extra_sval++;
1321           vpushi(0);
1322           vtop->r = r;
1323         }
1324       }
1325     }
1326   }
1327   return nb_extra_sval;
1328 }
1329
1330 /* Generate function call. The function address is pushed first, then
1331    all the parameters in call order. This functions pops all the
1332    parameters and the function address. */
1333 void gfunc_call(int nb_args)
1334 {
1335   int r, args_size;
1336   int def_float_abi = float_abi;
1337   int todo;
1338   struct plan plan;
1339 #ifdef TCC_ARM_EABI
1340   int variadic;
1341 #endif
1342
1343 #ifdef CONFIG_TCC_BCHECK
1344   if (tcc_state->do_bounds_check)
1345     gbound_args(nb_args);
1346 #endif
1347
1348 #ifdef TCC_ARM_EABI
1349   if (float_abi == ARM_HARD_FLOAT) {
1350     variadic = (vtop[-nb_args].type.ref->f.func_type == FUNC_ELLIPSIS);
1351     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1352       float_abi = ARM_SOFTFP_FLOAT;
1353   }
1354 #endif
1355   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1356      VT_JMP anywhere except on the top of the stack because it would complicate
1357      the code generator. */
1358   r = vtop->r & VT_VALMASK;
1359   if (r == VT_CMP || (r & ~1) == VT_JMP)
1360     gv(RC_INT);
1361
1362   memset(&plan, 0, sizeof plan);
1363   if (nb_args)
1364     plan.pplans = tcc_malloc(nb_args * sizeof(*plan.pplans));
1365
1366   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1367
1368 #ifdef TCC_ARM_EABI
1369   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1370     args_size = (args_size + 7) & ~7;
1371     o(0xE24DD004); /* sub sp, sp, #4 */
1372   }
1373 #endif
1374
1375   nb_args += copy_params(nb_args, &plan, todo);
1376   tcc_free(plan.pplans);
1377
1378   /* Move fct SValue on top as required by gcall_or_jmp */
1379   vrotb(nb_args + 1);
1380   gcall_or_jmp(0);
1381   if (args_size)
1382       gadd_sp(args_size); /* pop all parameters passed on the stack */
1383 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1384   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1385     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1386       o(0xEE000A10); /*vmov s0, r0 */
1387     } else {
1388       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1389       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1390     }
1391   }
1392 #endif
1393   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1394   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1395   float_abi = def_float_abi;
1396 }
1397
1398 /* generate function prolog of type 't' */
1399 void gfunc_prolog(Sym *func_sym)
1400 {
1401   CType *func_type = &func_sym->type;
1402   Sym *sym,*sym2;
1403   int n, nf, size, align, rs, struct_ret = 0;
1404   int addr, pn, sn; /* pn=core, sn=stack */
1405   CType ret_type;
1406
1407 #ifdef TCC_ARM_EABI
1408   struct avail_regs avregs = {{0}};
1409 #endif
1410
1411   sym = func_type->ref;
1412
1413   n = nf = 0;
1414   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1415       !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs))
1416   {
1417     n++;
1418     struct_ret = 1;
1419     func_vc = 12; /* Offset from fp of the place to store the result */
1420   }
1421   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1422     size = type_size(&sym2->type, &align);
1423 #ifdef TCC_ARM_EABI
1424     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1425         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1426       int tmpnf = assign_vfpreg(&avregs, align, size);
1427       tmpnf += (size + 3) / 4;
1428       nf = (tmpnf > nf) ? tmpnf : nf;
1429     } else
1430 #endif
1431     if (n < 4)
1432       n += (size + 3) / 4;
1433   }
1434   o(0xE1A0C00D); /* mov ip,sp */
1435   if (func_var)
1436     n=4;
1437   if (n) {
1438     if(n>4)
1439       n=4;
1440 #ifdef TCC_ARM_EABI
1441     n=(n+1)&-2;
1442 #endif
1443     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1444   }
1445   if (nf) {
1446     if (nf>16)
1447       nf=16;
1448     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1449     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1450   }
1451   o(0xE92D5800); /* save fp, ip, lr */
1452   o(0xE1A0B00D); /* mov fp, sp */
1453   func_sub_sp_offset = ind;
1454   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1455
1456 #ifdef TCC_ARM_EABI
1457   if (float_abi == ARM_HARD_FLOAT) {
1458     func_vc += nf * 4;
1459     memset(&avregs, 0, sizeof avregs);
1460   }
1461 #endif
1462   pn = struct_ret, sn = 0;
1463   while ((sym = sym->next)) {
1464     CType *type;
1465     type = &sym->type;
1466     size = type_size(type, &align);
1467     size = (size + 3) >> 2;
1468     align = (align + 3) & ~3;
1469 #ifdef TCC_ARM_EABI
1470     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1471         || is_hgen_float_aggr(&sym->type))) {
1472       int fpn = assign_vfpreg(&avregs, align, size << 2);
1473       if (fpn >= 0)
1474         addr = fpn * 4;
1475       else
1476         goto from_stack;
1477     } else
1478 #endif
1479     if (pn < 4) {
1480 #ifdef TCC_ARM_EABI
1481         pn = (pn + (align-1)/4) & -(align/4);
1482 #endif
1483       addr = (nf + pn) * 4;
1484       pn += size;
1485       if (!sn && pn > 4)
1486         sn = (pn - 4);
1487     } else {
1488 #ifdef TCC_ARM_EABI
1489 from_stack:
1490         sn = (sn + (align-1)/4) & -(align/4);
1491 #endif
1492       addr = (n + nf + sn) * 4;
1493       sn += size;
1494     }
1495     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL,
1496              addr + 12);
1497   }
1498   last_itod_magic=0;
1499   leaffunc = 1;
1500   loc = 0;
1501 #ifdef CONFIG_TCC_BCHECK
1502   if (tcc_state->do_bounds_check)
1503     gen_bounds_prolog();
1504 #endif
1505 }
1506
1507 /* generate function epilog */
1508 void gfunc_epilog(void)
1509 {
1510   uint32_t x;
1511   int diff;
1512
1513 #ifdef CONFIG_TCC_BCHECK
1514   if (tcc_state->do_bounds_check)
1515     gen_bounds_epilog();
1516 #endif
1517   /* Copy float return value to core register if base standard is used and
1518      float computation is made with VFP */
1519 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1520   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1521     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1522       o(0xEE100A10); /* fmrs r0, s0 */
1523     else {
1524       o(0xEE100B10); /* fmrdl r0, d0 */
1525       o(0xEE301B10); /* fmrdh r1, d0 */
1526     }
1527   }
1528 #endif
1529   o(0xE89BA800); /* restore fp, sp, pc */
1530   diff = (-loc + 3) & -4;
1531 #ifdef TCC_ARM_EABI
1532   if(!leaffunc)
1533     diff = ((diff + 11) & -8) - 4;
1534 #endif
1535   if(diff > 0) {
1536     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1537     if(x)
1538       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1539     else {
1540       int addr;
1541       addr=ind;
1542       o(0xE59FC004); /* ldr ip,[pc+4] */
1543       o(0xE04BD00C); /* sub sp,fp,ip  */
1544       o(0xE1A0F00E); /* mov pc,lr */
1545       o(diff);
1546       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1547     }
1548   }
1549 }
1550
1551 ST_FUNC void gen_fill_nops(int bytes)
1552 {
1553     if ((bytes & 3))
1554       tcc_error("alignment of code section not multiple of 4");
1555     while (bytes > 0) {
1556         o(0xE1A00000);
1557         bytes -= 4;
1558     }
1559 }
1560
1561 /* generate a jump to a label */
1562 ST_FUNC int gjmp(int t)
1563 {
1564   int r;
1565   if (nocode_wanted)
1566     return t;
1567   r=ind;
1568   o(0xE0000000|encbranch(r,t,1));
1569   return r;
1570 }
1571
1572 /* generate a jump to a fixed address */
1573 ST_FUNC void gjmp_addr(int a)
1574 {
1575   gjmp(a);
1576 }
1577
1578 ST_FUNC int gjmp_cond(int op, int t)
1579 {
1580   int r;
1581   if (nocode_wanted)
1582     return t;
1583   r=ind;
1584   op=mapcc(op);
1585   op|=encbranch(r,t,1);
1586   o(op);
1587   return r;
1588 }
1589
1590 ST_FUNC int gjmp_append(int n, int t)
1591 {
1592   uint32_t *x;
1593   int p,lp;
1594   if(n) {
1595     p = n;
1596     do {
1597       p = decbranch(lp=p);
1598     } while(p);
1599     x = (uint32_t *)(cur_text_section->data + lp);
1600     *x &= 0xff000000;
1601     *x |= encbranch(lp,t,1);
1602     t = n;
1603   }
1604   return t;
1605 }
1606
1607 /* generate an integer binary operation */
1608 void gen_opi(int op)
1609 {
1610   int c, func = 0;
1611   uint32_t opc = 0, r, fr;
1612   unsigned short retreg = REG_IRET;
1613
1614   c=0;
1615   switch(op) {
1616     case '+':
1617       opc = 0x8;
1618       c=1;
1619       break;
1620     case TOK_ADDC1: /* add with carry generation */
1621       opc = 0x9;
1622       c=1;
1623       break;
1624     case '-':
1625       opc = 0x4;
1626       c=1;
1627       break;
1628     case TOK_SUBC1: /* sub with carry generation */
1629       opc = 0x5;
1630       c=1;
1631       break;
1632     case TOK_ADDC2: /* add with carry use */
1633       opc = 0xA;
1634       c=1;
1635       break;
1636     case TOK_SUBC2: /* sub with carry use */
1637       opc = 0xC;
1638       c=1;
1639       break;
1640     case '&':
1641       opc = 0x0;
1642       c=1;
1643       break;
1644     case '^':
1645       opc = 0x2;
1646       c=1;
1647       break;
1648     case '|':
1649       opc = 0x18;
1650       c=1;
1651       break;
1652     case '*':
1653       gv2(RC_INT, RC_INT);
1654       r = vtop[-1].r;
1655       fr = vtop[0].r;
1656       vtop--;
1657       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1658       return;
1659     case TOK_SHL:
1660       opc = 0;
1661       c=2;
1662       break;
1663     case TOK_SHR:
1664       opc = 1;
1665       c=2;
1666       break;
1667     case TOK_SAR:
1668       opc = 2;
1669       c=2;
1670       break;
1671     case '/':
1672     case TOK_PDIV:
1673       func=TOK___divsi3;
1674       c=3;
1675       break;
1676     case TOK_UDIV:
1677       func=TOK___udivsi3;
1678       c=3;
1679       break;
1680     case '%':
1681 #ifdef TCC_ARM_EABI
1682       func=TOK___aeabi_idivmod;
1683       retreg=REG_IRE2;
1684 #else
1685       func=TOK___modsi3;
1686 #endif
1687       c=3;
1688       break;
1689     case TOK_UMOD:
1690 #ifdef TCC_ARM_EABI
1691       func=TOK___aeabi_uidivmod;
1692       retreg=REG_IRE2;
1693 #else
1694       func=TOK___umodsi3;
1695 #endif
1696       c=3;
1697       break;
1698     case TOK_UMULL:
1699       gv2(RC_INT, RC_INT);
1700       r=intr(vtop[-1].r2=get_reg(RC_INT));
1701       c=vtop[-1].r;
1702       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1703       vtop--;
1704       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1705       return;
1706     default:
1707       opc = 0x15;
1708       c=1;
1709       break;
1710   }
1711   switch(c) {
1712     case 1:
1713       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1714         if(opc == 4 || opc == 5 || opc == 0xc) {
1715           vswap();
1716           opc|=2; // sub -> rsb
1717         }
1718       }
1719       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1720           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1721         gv(RC_INT);
1722       vswap();
1723       c=intr(gv(RC_INT));
1724       vswap();
1725       opc=0xE0000000|(opc<<20);
1726       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1727         uint32_t x;
1728         x=stuff_const(opc|0x2000000|(c<<16),vtop->c.i);
1729         if(x) {
1730           if ((x & 0xfff00000) == 0xe3500000)   // cmp rx,#c
1731             o(x);
1732           else {
1733             r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1734             o(x|(r<<12));
1735           }
1736           goto done;
1737         }
1738       }
1739       fr=intr(gv(RC_INT));
1740 #ifdef CONFIG_TCC_BCHECK
1741       if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1742         vswap();
1743         c=intr(gv(RC_INT));
1744         vswap();
1745       }
1746 #endif
1747       if ((opc & 0xfff00000) == 0xe1500000) // cmp rx,ry
1748         o(opc|(c<<16)|fr);
1749       else {
1750         r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1751         o(opc|(c<<16)|(r<<12)|fr);
1752       }
1753 done:
1754       vtop--;
1755       if (op >= TOK_ULT && op <= TOK_GT)
1756         vset_VT_CMP(op);
1757       break;
1758     case 2:
1759       opc=0xE1A00000|(opc<<5);
1760       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1761           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1762         gv(RC_INT);
1763       vswap();
1764       r=intr(gv(RC_INT));
1765       vswap();
1766       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1767         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1768         c = vtop->c.i & 0x1f;
1769         o(opc|r|(c<<7)|(fr<<12));
1770       } else {
1771         fr=intr(gv(RC_INT));
1772 #ifdef CONFIG_TCC_BCHECK
1773         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1774           vswap();
1775           r=intr(gv(RC_INT));
1776           vswap();
1777         }
1778 #endif
1779         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1780         o(opc|r|(c<<12)|(fr<<8)|0x10);
1781       }
1782       vtop--;
1783       break;
1784     case 3:
1785       vpush_helper_func(func);
1786       vrott(3);
1787       gfunc_call(2);
1788       vpushi(0);
1789       vtop->r = retreg;
1790       break;
1791     default:
1792       tcc_error("gen_opi %i unimplemented!",op);
1793   }
1794 }
1795
1796 #ifdef TCC_ARM_VFP
1797 static int is_zero(int i)
1798 {
1799   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1800     return 0;
1801   if (vtop[i].type.t == VT_FLOAT)
1802     return (vtop[i].c.f == 0.f);
1803   else if (vtop[i].type.t == VT_DOUBLE)
1804     return (vtop[i].c.d == 0.0);
1805   return (vtop[i].c.ld == 0.l);
1806 }
1807
1808 /* generate a floating point operation 'v = t1 op t2' instruction. The
1809  *    two operands are guaranteed to have the same floating point type */
1810 void gen_opf(int op)
1811 {
1812   uint32_t x;
1813   int fneg=0,r;
1814   x=0xEE000A00|T2CPR(vtop->type.t);
1815   switch(op) {
1816     case '+':
1817       if(is_zero(-1))
1818         vswap();
1819       if(is_zero(0)) {
1820         vtop--;
1821         return;
1822       }
1823       x|=0x300000;
1824       break;
1825     case '-':
1826       x|=0x300040;
1827       if(is_zero(0)) {
1828         vtop--;
1829         return;
1830       }
1831       if(is_zero(-1)) {
1832         x|=0x810000; /* fsubX -> fnegX */
1833         vswap();
1834         vtop--;
1835         fneg=1;
1836       }
1837       break;
1838     case '*':
1839       x|=0x200000;
1840       break;
1841     case '/':
1842       x|=0x800000;
1843       break;
1844     default:
1845       if(op < TOK_ULT || op > TOK_GT) {
1846         tcc_error("unknown fp op %x!",op);
1847         return;
1848       }
1849       if(is_zero(-1)) {
1850         vswap();
1851         switch(op) {
1852           case TOK_LT: op=TOK_GT; break;
1853           case TOK_GE: op=TOK_ULE; break;
1854           case TOK_LE: op=TOK_GE; break;
1855           case TOK_GT: op=TOK_ULT; break;
1856         }
1857       }
1858       x|=0xB40040; /* fcmpX */
1859       if(op!=TOK_EQ && op!=TOK_NE)
1860         x|=0x80; /* fcmpX -> fcmpeX */
1861       if(is_zero(0)) {
1862         vtop--;
1863         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1864       } else {
1865         gv2(RC_FLOAT,RC_FLOAT);
1866         x|=vfpr(vtop[0].r);
1867         o(x|(vfpr(vtop[-1].r) << 12));
1868         vtop--;
1869       }
1870       o(0xEEF1FA10); /* fmstat */
1871
1872       switch(op) {
1873         case TOK_LE: op=TOK_ULE; break;
1874         case TOK_LT: op=TOK_ULT; break;
1875         case TOK_UGE: op=TOK_GE; break;
1876         case TOK_UGT: op=TOK_GT; break;
1877       }
1878       vset_VT_CMP(op);
1879       return;
1880   }
1881   r=gv(RC_FLOAT);
1882   x|=vfpr(r);
1883   r=regmask(r);
1884   if(!fneg) {
1885     int r2;
1886     vswap();
1887     r2=gv(RC_FLOAT);
1888     x|=vfpr(r2)<<16;
1889     r|=regmask(r2);
1890 #ifdef CONFIG_TCC_BCHECK
1891     if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1892       vswap();
1893       r=gv(RC_FLOAT);
1894       vswap();
1895       x=(x&~0xf)|vfpr(r);
1896     }
1897 #endif
1898   }
1899   vtop->r=get_reg_ex(RC_FLOAT,r);
1900   if(!fneg)
1901     vtop--;
1902   o(x|(vfpr(vtop->r)<<12));
1903 }
1904
1905 #else
1906 static uint32_t is_fconst()
1907 {
1908   long double f;
1909   uint32_t r;
1910   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1911     return 0;
1912   if (vtop->type.t == VT_FLOAT)
1913     f = vtop->c.f;
1914   else if (vtop->type.t == VT_DOUBLE)
1915     f = vtop->c.d;
1916   else
1917     f = vtop->c.ld;
1918   if(!ieee_finite(f))
1919     return 0;
1920   r=0x8;
1921   if(f<0.0) {
1922     r=0x18;
1923     f=-f;
1924   }
1925   if(f==0.0)
1926     return r;
1927   if(f==1.0)
1928     return r|1;
1929   if(f==2.0)
1930     return r|2;
1931   if(f==3.0)
1932     return r|3;
1933   if(f==4.0)
1934     return r|4;
1935   if(f==5.0)
1936     return r|5;
1937   if(f==0.5)
1938     return r|6;
1939   if(f==10.0)
1940     return r|7;
1941   return 0;
1942 }
1943
1944 /* generate a floating point operation 'v = t1 op t2' instruction. The
1945    two operands are guaranteed to have the same floating point type */
1946 void gen_opf(int op)
1947 {
1948   uint32_t x, r, r2, c1, c2;
1949   //fputs("gen_opf\n",stderr);
1950   vswap();
1951   c1 = is_fconst();
1952   vswap();
1953   c2 = is_fconst();
1954   x=0xEE000100;
1955 #if LDOUBLE_SIZE == 8
1956   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1957     x|=0x80;
1958 #else
1959   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1960     x|=0x80;
1961   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1962     x|=0x80000;
1963 #endif
1964   switch(op)
1965   {
1966     case '+':
1967       if(!c2) {
1968         vswap();
1969         c2=c1;
1970       }
1971       vswap();
1972       r=fpr(gv(RC_FLOAT));
1973       vswap();
1974       if(c2) {
1975         if(c2>0xf)
1976           x|=0x200000; // suf
1977         r2=c2&0xf;
1978       } else {
1979         r2=fpr(gv(RC_FLOAT));
1980 #ifdef CONFIG_TCC_BCHECK
1981         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1982           vswap();
1983           r=fpr(gv(RC_FLOAT));
1984           vswap();
1985         }
1986 #endif
1987       }
1988       break;
1989     case '-':
1990       if(c2) {
1991         if(c2<=0xf)
1992           x|=0x200000; // suf
1993         r2=c2&0xf;
1994         vswap();
1995         r=fpr(gv(RC_FLOAT));
1996         vswap();
1997       } else if(c1 && c1<=0xf) {
1998         x|=0x300000; // rsf
1999         r2=c1;
2000         r=fpr(gv(RC_FLOAT));
2001         vswap();
2002       } else {
2003         x|=0x200000; // suf
2004         vswap();
2005         r=fpr(gv(RC_FLOAT));
2006         vswap();
2007         r2=fpr(gv(RC_FLOAT));
2008 #ifdef CONFIG_TCC_BCHECK
2009         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
2010           vswap();
2011           r=fpr(gv(RC_FLOAT));
2012           vswap();
2013         }
2014 #endif
2015       }
2016       break;
2017     case '*':
2018       if(!c2 || c2>0xf) {
2019         vswap();
2020         c2=c1;
2021       }
2022       vswap();
2023       r=fpr(gv(RC_FLOAT));
2024       vswap();
2025       if(c2 && c2<=0xf)
2026         r2=c2;
2027       else {
2028         r2=fpr(gv(RC_FLOAT));
2029 #ifdef CONFIG_TCC_BCHECK
2030         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
2031           vswap();
2032           r=fpr(gv(RC_FLOAT));
2033           vswap();
2034         }
2035 #endif
2036       }
2037       x|=0x100000; // muf
2038       break;
2039     case '/':
2040       if(c2 && c2<=0xf) {
2041         x|=0x400000; // dvf
2042         r2=c2;
2043         vswap();
2044         r=fpr(gv(RC_FLOAT));
2045         vswap();
2046       } else if(c1 && c1<=0xf) {
2047         x|=0x500000; // rdf
2048         r2=c1;
2049         r=fpr(gv(RC_FLOAT));
2050         vswap();
2051       } else {
2052         x|=0x400000; // dvf
2053         vswap();
2054         r=fpr(gv(RC_FLOAT));
2055         vswap();
2056         r2=fpr(gv(RC_FLOAT));
2057 #ifdef CONFIG_TCC_BCHECK
2058         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
2059           vswap();
2060           r=fpr(gv(RC_FLOAT));
2061           vswap();
2062         }
2063 #endif
2064       }
2065       break;
2066     default:
2067       if(op >= TOK_ULT && op <= TOK_GT) {
2068         x|=0xd0f110; // cmfe
2069 /* bug (intention?) in Linux FPU emulator
2070    doesn't set carry if equal */
2071         switch(op) {
2072           case TOK_ULT:
2073           case TOK_UGE:
2074           case TOK_ULE:
2075           case TOK_UGT:
2076             tcc_error("unsigned comparison on floats?");
2077             break;
2078           case TOK_LT:
2079             op=TOK_Nset;
2080             break;
2081           case TOK_LE:
2082             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
2083             break;
2084           case TOK_EQ:
2085           case TOK_NE:
2086             x&=~0x400000; // cmfe -> cmf
2087             break;
2088         }
2089         if(c1 && !c2) {
2090           c2=c1;
2091           vswap();
2092           switch(op) {
2093             case TOK_Nset:
2094               op=TOK_GT;
2095               break;
2096             case TOK_GE:
2097               op=TOK_ULE;
2098               break;
2099             case TOK_ULE:
2100               op=TOK_GE;
2101               break;
2102             case TOK_GT:
2103               op=TOK_Nset;
2104               break;
2105           }
2106         }
2107         vswap();
2108         r=fpr(gv(RC_FLOAT));
2109         vswap();
2110         if(c2) {
2111           if(c2>0xf)
2112             x|=0x200000;
2113           r2=c2&0xf;
2114         } else {
2115           r2=fpr(gv(RC_FLOAT));
2116 #ifdef CONFIG_TCC_BCHECK
2117           if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
2118             vswap();
2119             r=fpr(gv(RC_FLOAT));
2120             vswap();
2121           }
2122 #endif
2123         }
2124         --vtop;
2125         vset_VT_CMP(op);
2126         ++vtop;
2127       } else {
2128         tcc_error("unknown fp op %x!",op);
2129         return;
2130       }
2131   }
2132   if(vtop[-1].r == VT_CMP)
2133     c1=15;
2134   else {
2135     c1=vtop->r;
2136     if(r2&0x8)
2137       c1=vtop[-1].r;
2138     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
2139     c1=fpr(vtop[-1].r);
2140   }
2141   vtop--;
2142   o(x|(r<<16)|(c1<<12)|r2);
2143 }
2144 #endif
2145
2146 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2147    and 'long long' cases. */
2148 ST_FUNC void gen_cvt_itof(int t)
2149 {
2150   uint32_t r, r2;
2151   int bt;
2152   bt=vtop->type.t & VT_BTYPE;
2153   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
2154 #ifndef TCC_ARM_VFP
2155     uint32_t dsize = 0;
2156 #endif
2157     r=intr(gv(RC_INT));
2158 #ifdef TCC_ARM_VFP
2159     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
2160     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
2161     r2|=r2<<12;
2162     if(!(vtop->type.t & VT_UNSIGNED))
2163       r2|=0x80;                /* fuitoX -> fsituX */
2164     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
2165 #else
2166     r2=fpr(vtop->r=get_reg(RC_FLOAT));
2167     if((t & VT_BTYPE) != VT_FLOAT)
2168       dsize=0x80;    /* flts -> fltd */
2169     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
2170     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
2171       uint32_t off = 0;
2172       o(0xE3500000|(r<<12));        /* cmp */
2173       r=fpr(get_reg(RC_FLOAT));
2174       if(last_itod_magic) {
2175         off=ind+8-last_itod_magic;
2176         off/=4;
2177         if(off>255)
2178           off=0;
2179       }
2180       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
2181       if(!off) {
2182         o(0xEA000000);              /* b */
2183         last_itod_magic=ind;
2184         o(0x4F800000);              /* 4294967296.0f */
2185       }
2186       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
2187     }
2188 #endif
2189     return;
2190   } else if(bt == VT_LLONG) {
2191     int func;
2192     CType *func_type = 0;
2193     if((t & VT_BTYPE) == VT_FLOAT) {
2194       func_type = &func_float_type;
2195       if(vtop->type.t & VT_UNSIGNED)
2196         func=TOK___floatundisf;
2197       else
2198         func=TOK___floatdisf;
2199 #if LDOUBLE_SIZE != 8
2200     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
2201       func_type = &func_ldouble_type;
2202       if(vtop->type.t & VT_UNSIGNED)
2203         func=TOK___floatundixf;
2204       else
2205         func=TOK___floatdixf;
2206     } else if((t & VT_BTYPE) == VT_DOUBLE) {
2207 #else
2208     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
2209 #endif
2210       func_type = &func_double_type;
2211       if(vtop->type.t & VT_UNSIGNED)
2212         func=TOK___floatundidf;
2213       else
2214         func=TOK___floatdidf;
2215     }
2216     if(func_type) {
2217       vpushsym(func_type, external_helper_sym(func));
2218       vswap();
2219       gfunc_call(1);
2220       vpushi(0);
2221       vtop->r=TREG_F0;
2222       return;
2223     }
2224   }
2225   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2226 }
2227
2228 /* convert fp to int 't' type */
2229 void gen_cvt_ftoi(int t)
2230 {
2231   uint32_t r, r2;
2232   int u, func = 0;
2233   u=t&VT_UNSIGNED;
2234   t&=VT_BTYPE;
2235   r2=vtop->type.t & VT_BTYPE;
2236   if(t==VT_INT) {
2237 #ifdef TCC_ARM_VFP
2238     r=vfpr(gv(RC_FLOAT));
2239     u=u?0:0x10000;
2240     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2241     r2=intr(vtop->r=get_reg(RC_INT));
2242     o(0xEE100A10|(r<<16)|(r2<<12));
2243     return;
2244 #else
2245     if(u) {
2246       if(r2 == VT_FLOAT)
2247         func=TOK___fixunssfsi;
2248 #if LDOUBLE_SIZE != 8
2249       else if(r2 == VT_LDOUBLE)
2250         func=TOK___fixunsxfsi;
2251       else if(r2 == VT_DOUBLE)
2252 #else
2253       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2254 #endif
2255         func=TOK___fixunsdfsi;
2256     } else {
2257       r=fpr(gv(RC_FLOAT));
2258       r2=intr(vtop->r=get_reg(RC_INT));
2259       o(0xEE100170|(r2<<12)|r);
2260       return;
2261     }
2262 #endif
2263   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2264     if(r2 == VT_FLOAT)
2265       func=TOK___fixsfdi;
2266 #if LDOUBLE_SIZE != 8
2267     else if(r2 == VT_LDOUBLE)
2268       func=TOK___fixxfdi;
2269     else if(r2 == VT_DOUBLE)
2270 #else
2271     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2272 #endif
2273       func=TOK___fixdfdi;
2274   }
2275   if(func) {
2276     vpush_helper_func(func);
2277     vswap();
2278     gfunc_call(1);
2279     vpushi(0);
2280     if(t == VT_LLONG)
2281       vtop->r2 = REG_IRE2;
2282     vtop->r = REG_IRET;
2283     return;
2284   }
2285   tcc_error("unimplemented gen_cvt_ftoi!");
2286 }
2287
2288 /* convert from one floating point type to another */
2289 void gen_cvt_ftof(int t)
2290 {
2291 #ifdef TCC_ARM_VFP
2292   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2293     uint32_t r = vfpr(gv(RC_FLOAT));
2294     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2295   }
2296 #else
2297   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2298   gv(RC_FLOAT);
2299 #endif
2300 }
2301
2302 /* increment tcov counter */
2303 ST_FUNC void gen_increment_tcov (SValue *sv)
2304 {
2305   int r1, r2;
2306
2307   vpushv(sv);
2308   vtop->r = r1 = get_reg(RC_INT);
2309   r2 = get_reg(RC_INT);
2310   o(0xE59F0000 | (intr(r1)<<12)); // ldr r1,[pc]
2311   o(0xEA000000); // b $+4
2312   greloc(cur_text_section, sv->sym, ind, R_ARM_REL32);
2313   o(-12);
2314   o(0xe080000f | (intr(r1)<<16) | (intr(r1)<<12)); // add r1,r1,pc
2315   o(0xe5900000 | (intr(r1)<<16) | (intr(r2)<<12)); // ldr r2, [r1]
2316   o(0xe2900001 | (intr(r2)<<16) | (intr(r2)<<12)); // adds r2, r2, #1
2317   o(0xe5800000 | (intr(r1)<<16) | (intr(r2)<<12)); // str r2, [r1]
2318   o(0xe2800004 | (intr(r1)<<16) | (intr(r1)<<12)); // add r1, r1, #4
2319   o(0xe5900000 | (intr(r1)<<16) | (intr(r2)<<12)); // ldr r2, [r1]
2320   o(0xe2a00000 | (intr(r2)<<16) | (intr(r2)<<12)); // adc r2, r2, #0
2321   o(0xe5800000 | (intr(r1)<<16) | (intr(r2)<<12)); // str r2, [r1]
2322   vpop();
2323 }
2324
2325 /* computed goto support */
2326 void ggoto(void)
2327 {
2328   gcall_or_jmp(1);
2329   vtop--;
2330 }
2331
2332 /* Save the stack pointer onto the stack and return the location of its address */
2333 ST_FUNC void gen_vla_sp_save(int addr) {
2334     SValue v;
2335     v.type.t = VT_PTR;
2336     v.r = VT_LOCAL | VT_LVAL;
2337     v.c.i = addr;
2338     store(TREG_SP, &v);
2339 }
2340
2341 /* Restore the SP from a location on the stack */
2342 ST_FUNC void gen_vla_sp_restore(int addr) {
2343     SValue v;
2344     v.type.t = VT_PTR;
2345     v.r = VT_LOCAL | VT_LVAL;
2346     v.c.i = addr;
2347     load(TREG_SP, &v);
2348 }
2349
2350 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2351 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2352     int r;
2353 #if defined(CONFIG_TCC_BCHECK)
2354     if (tcc_state->do_bounds_check)
2355         vpushv(vtop);
2356 #endif
2357     r = intr(gv(RC_INT));
2358 #if defined(CONFIG_TCC_BCHECK)
2359     if (tcc_state->do_bounds_check)
2360         o(0xe2800001 | (r<<16)|(r<<12)); /* add r,r,#1 */
2361 #endif
2362     o(0xE04D0000|(r<<12)|r); /* sub r, sp, r */
2363 #ifdef TCC_ARM_EABI
2364     if (align < 8)
2365         align = 8;
2366 #else
2367     if (align < 4)
2368         align = 4;
2369 #endif
2370     if (align & (align - 1))
2371         tcc_error("alignment is not a power of 2: %i", align);
2372     o(stuff_const(0xE3C0D000|(r<<16), align - 1)); /* bic sp, r, #align-1 */
2373     vpop();
2374 #if defined(CONFIG_TCC_BCHECK)
2375     if (tcc_state->do_bounds_check) {
2376         vpushi(0);
2377         vtop->r = TREG_R0;
2378         o(0xe1a0000d | (vtop->r << 12)); // mov r0,sp
2379         vswap();
2380         vpush_helper_func(TOK___bound_new_region);
2381         vrott(3);
2382         gfunc_call(2);
2383         func_bound_add_epilog = 1;
2384     }
2385 #endif
2386 }
2387
2388 /* end of ARM code generator */
2389 /*************************************************************/
2390 #endif
2391 /*************************************************************/