arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_ARM_VERSION
  38 # define TCC_ARM_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_LRET    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64 #define RC_MASK    (RC_INT|RC_FLOAT)
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82 };
  83
  84 #ifdef TCC_ARM_VFP
  85 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  86 #endif
  87
  88 /* return registers for function */
  89 #define REG_IRET TREG_R0 /* single word int return register */
  90 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  91 #define REG_FRET TREG_F0 /* float return register */
  92
  93 #ifdef TCC_ARM_EABI
  94 #define TOK___divdi3 TOK___aeabi_ldivmod
  95 #define TOK___moddi3 TOK___aeabi_ldivmod
  96 #define TOK___udivdi3 TOK___aeabi_uldivmod
  97 #define TOK___umoddi3 TOK___aeabi_uldivmod
  98 #endif
  99
 100 /* defined if function parameters must be evaluated in reverse order */
 101 #define INVERT_FUNC_PARAMS
 102
 103 /* defined if structures are passed as pointers. Otherwise structures
 104    are directly pushed on stack. */
 105 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 106
 107 /* pointer size, in bytes */
 108 #define PTR_SIZE 4
 109
 110 /* long double size and alignment, in bytes */
 111 #ifdef TCC_ARM_VFP
 112 #define LDOUBLE_SIZE  8
 113 #endif
 114
 115 #ifndef LDOUBLE_SIZE
 116 #define LDOUBLE_SIZE  8
 117 #endif
 118
 119 #ifdef TCC_ARM_EABI
 120 #define LDOUBLE_ALIGN 8
 121 #else
 122 #define LDOUBLE_ALIGN 4
 123 #endif
 124
 125 /* maximum alignment (for aligned attribute support) */
 126 #define MAX_ALIGN     8
 127
 128 #define CHAR_IS_UNSIGNED
 129
 130 /******************************************************/
 131 /* ELF defines */
 132
 133 #define EM_TCC_TARGET EM_ARM
 134
 135 /* relocation type for 32 bit data relocation */
 136 #define R_DATA_32   R_ARM_ABS32
 137 #define R_DATA_PTR  R_ARM_ABS32
 138 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 139 #define R_COPY      R_ARM_COPY
 140
 141 #define ELF_START_ADDR 0x00008000
 142 #define ELF_PAGE_SIZE  0x1000
 143
 144 enum float_abi {
 145     ARM_SOFTFP_FLOAT,
 146     ARM_HARD_FLOAT,
 147 };
 148
 149 /******************************************************/
 150 #else /* ! TARGET_DEFS_ONLY */
 151 /******************************************************/
 152 #include "tcc.h"
 153
 154 enum float_abi float_abi;
 155
 156 ST_DATA const int reg_classes[NB_REGS] = {
 157     /* r0 */ RC_INT | RC_R0,
 158     /* r1 */ RC_INT | RC_R1,
 159     /* r2 */ RC_INT | RC_R2,
 160     /* r3 */ RC_INT | RC_R3,
 161     /* r12 */ RC_INT | RC_R12,
 162     /* f0 */ RC_FLOAT | RC_F0,
 163     /* f1 */ RC_FLOAT | RC_F1,
 164     /* f2 */ RC_FLOAT | RC_F2,
 165     /* f3 */ RC_FLOAT | RC_F3,
 166 #ifdef TCC_ARM_VFP
 167  /* d4/s8 */ RC_FLOAT | RC_F4,
 168 /* d5/s10 */ RC_FLOAT | RC_F5,
 169 /* d6/s12 */ RC_FLOAT | RC_F6,
 170 /* d7/s14 */ RC_FLOAT | RC_F7,
 171 #endif
 172 };
 173
 174 static int func_sub_sp_offset, last_itod_magic;
 175 static int leaffunc;
 176
 177 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 178 static CType float_type, double_type, func_float_type, func_double_type;
 179 ST_FUNC void arm_init(struct TCCState *s)
 180 {
 181     float_type.t = VT_FLOAT;
 182     double_type.t = VT_DOUBLE;
 183     func_float_type.t = VT_FUNC;
 184     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 185     func_double_type.t = VT_FUNC;
 186     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 187
 188     float_abi = s->float_abi;
 189 #ifndef TCC_ARM_HARDFLOAT
 190     tcc_warning("soft float ABI currently not supported: default to softfp");
 191 #endif
 192 }
 193 #else
 194 #define func_float_type func_old_type
 195 #define func_double_type func_old_type
 196 #define func_ldouble_type func_old_type
 197 ST_FUNC void arm_init(struct TCCState *s)
 198 {
 199 #if !defined (TCC_ARM_VFP)
 200     tcc_warning("Support for FPA is deprecated and will be removed in next"
 201                 " release");
 202 #endif
 203 #if !defined (TCC_ARM_EABI)
 204     tcc_warning("Support for OABI is deprecated and will be removed in next"
 205                 " release");
 206 #endif
 207 }
 208 #endif
 209
 210 static int two2mask(int a,int b) {
 211   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 212 }
 213
 214 static int regmask(int r) {
 215   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 216 }
 217
 218 /******************************************************/
 219
 220 #ifdef TCC_ARM_EABI
 221 char *default_elfinterp(struct TCCState *s)
 222 {
 223     if (s->float_abi == ARM_HARD_FLOAT)
 224         return "/lib/ld-linux-armhf.so.3";
 225     else
 226         return "/lib/ld-linux.so.3";
 227 }
 228 #endif
 229
 230 void o(uint32_t i)
 231 {
 232   /* this is a good place to start adding big-endian support*/
 233   int ind1;
 234
 235   ind1 = ind + 4;
 236   if (!cur_text_section)
 237     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 238          "can't evaluate constant expressions outside of a function.");
 239   if (ind1 > cur_text_section->data_allocated)
 240     section_realloc(cur_text_section, ind1);
 241   cur_text_section->data[ind++] = i&255;
 242   i>>=8;
 243   cur_text_section->data[ind++] = i&255;
 244   i>>=8;
 245   cur_text_section->data[ind++] = i&255;
 246   i>>=8;
 247   cur_text_section->data[ind++] = i;
 248 }
 249
 250 static uint32_t stuff_const(uint32_t op, uint32_t c)
 251 {
 252   int try_neg=0;
 253   uint32_t nc = 0, negop = 0;
 254
 255   switch(op&0x1F00000)
 256   {
 257     case 0x800000: //add
 258     case 0x400000: //sub
 259       try_neg=1;
 260       negop=op^0xC00000;
 261       nc=-c;
 262       break;
 263     case 0x1A00000: //mov
 264     case 0x1E00000: //mvn
 265       try_neg=1;
 266       negop=op^0x400000;
 267       nc=~c;
 268       break;
 269     case 0x200000: //xor
 270       if(c==~0)
 271         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 272       break;
 273     case 0x0: //and
 274       if(c==~0)
 275         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 276     case 0x1C00000: //bic
 277       try_neg=1;
 278       negop=op^0x1C00000;
 279       nc=~c;
 280       break;
 281     case 0x1800000: //orr
 282       if(c==~0)
 283         return (op&0xFFF0FFFF)|0x1E00000;
 284       break;
 285   }
 286   do {
 287     uint32_t m;
 288     int i;
 289     if(c<256) /* catch undefined <<32 */
 290       return op|c;
 291     for(i=2;i<32;i+=2) {
 292       m=(0xff>>i)|(0xff<<(32-i));
 293       if(!(c&~m))
 294         return op|(i<<7)|(c<<i)|(c>>(32-i));
 295     }
 296     op=negop;
 297     c=nc;
 298   } while(try_neg--);
 299   return 0;
 300 }
 301
 302
 303 //only add,sub
 304 void stuff_const_harder(uint32_t op, uint32_t v) {
 305   uint32_t x;
 306   x=stuff_const(op,v);
 307   if(x)
 308     o(x);
 309   else {
 310     uint32_t a[16], nv, no, o2, n2;
 311     int i,j,k;
 312     a[0]=0xff;
 313     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 314     for(i=1;i<16;i++)
 315       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 316     for(i=0;i<12;i++)
 317       for(j=i<4?i+12:15;j>=i+4;j--)
 318         if((v&(a[i]|a[j]))==v) {
 319           o(stuff_const(op,v&a[i]));
 320           o(stuff_const(o2,v&a[j]));
 321           return;
 322         }
 323     no=op^0xC00000;
 324     n2=o2^0xC00000;
 325     nv=-v;
 326     for(i=0;i<12;i++)
 327       for(j=i<4?i+12:15;j>=i+4;j--)
 328         if((nv&(a[i]|a[j]))==nv) {
 329           o(stuff_const(no,nv&a[i]));
 330           o(stuff_const(n2,nv&a[j]));
 331           return;
 332         }
 333     for(i=0;i<8;i++)
 334       for(j=i+4;j<12;j++)
 335         for(k=i<4?i+12:15;k>=j+4;k--)
 336           if((v&(a[i]|a[j]|a[k]))==v) {
 337             o(stuff_const(op,v&a[i]));
 338             o(stuff_const(o2,v&a[j]));
 339             o(stuff_const(o2,v&a[k]));
 340             return;
 341           }
 342     no=op^0xC00000;
 343     nv=-v;
 344     for(i=0;i<8;i++)
 345       for(j=i+4;j<12;j++)
 346         for(k=i<4?i+12:15;k>=j+4;k--)
 347           if((nv&(a[i]|a[j]|a[k]))==nv) {
 348             o(stuff_const(no,nv&a[i]));
 349             o(stuff_const(n2,nv&a[j]));
 350             o(stuff_const(n2,nv&a[k]));
 351             return;
 352           }
 353     o(stuff_const(op,v&a[0]));
 354     o(stuff_const(o2,v&a[4]));
 355     o(stuff_const(o2,v&a[8]));
 356     o(stuff_const(o2,v&a[12]));
 357   }
 358 }
 359
 360 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 361 {
 362   addr-=pos+8;
 363   addr/=4;
 364   if(addr>=0x1000000 || addr<-0x1000000) {
 365     if(fail)
 366       tcc_error("FIXME: function bigger than 32MB");
 367     return 0;
 368   }
 369   return 0x0A000000|(addr&0xffffff);
 370 }
 371
 372 int decbranch(int pos)
 373 {
 374   int x;
 375   x=*(uint32_t *)(cur_text_section->data + pos);
 376   x&=0x00ffffff;
 377   if(x&0x800000)
 378     x-=0x1000000;
 379   return x*4+pos+8;
 380 }
 381
 382 /* output a symbol and patch all calls to it */
 383 void gsym_addr(int t, int a)
 384 {
 385   uint32_t *x;
 386   int lt;
 387   while(t) {
 388     x=(uint32_t *)(cur_text_section->data + t);
 389     t=decbranch(lt=t);
 390     if(a==lt+4)
 391       *x=0xE1A00000; // nop
 392     else {
 393       *x &= 0xff000000;
 394       *x |= encbranch(lt,a,1);
 395     }
 396   }
 397 }
 398
 399 void gsym(int t)
 400 {
 401   gsym_addr(t, ind);
 402 }
 403
 404 #ifdef TCC_ARM_VFP
 405 static uint32_t vfpr(int r)
 406 {
 407   if(r<TREG_F0 || r>TREG_F7)
 408     tcc_error("compiler error! register %i is no vfp register",r);
 409   return r-5;
 410 }
 411 #else
 412 static uint32_t fpr(int r)
 413 {
 414   if(r<TREG_F0 || r>TREG_F3)
 415     tcc_error("compiler error! register %i is no fpa register",r);
 416   return r-5;
 417 }
 418 #endif
 419
 420 static uint32_t intr(int r)
 421 {
 422   if(r==4)
 423     return 12;
 424   if((r<0 || r>4) && r!=14)
 425     tcc_error("compiler error! register %i is no int register",r);
 426   return r;
 427 }
 428
 429 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 430 {
 431   if(*off>maxoff || *off&((1<<shift)-1)) {
 432     uint32_t x, y;
 433     x=0xE280E000;
 434     if(*sgn)
 435       x=0xE240E000;
 436     x|=(*base)<<16;
 437     *base=14; // lr
 438     y=stuff_const(x,*off&~maxoff);
 439     if(y) {
 440       o(y);
 441       *off&=maxoff;
 442       return;
 443     }
 444     y=stuff_const(x,(*off+maxoff)&~maxoff);
 445     if(y) {
 446       o(y);
 447       *sgn=!*sgn;
 448       *off=((*off+maxoff)&~maxoff)-*off;
 449       return;
 450     }
 451     stuff_const_harder(x,*off&~maxoff);
 452     *off&=maxoff;
 453   }
 454 }
 455
 456 static uint32_t mapcc(int cc)
 457 {
 458   switch(cc)
 459   {
 460     case TOK_ULT:
 461       return 0x30000000; /* CC/LO */
 462     case TOK_UGE:
 463       return 0x20000000; /* CS/HS */
 464     case TOK_EQ:
 465       return 0x00000000; /* EQ */
 466     case TOK_NE:
 467       return 0x10000000; /* NE */
 468     case TOK_ULE:
 469       return 0x90000000; /* LS */
 470     case TOK_UGT:
 471       return 0x80000000; /* HI */
 472     case TOK_Nset:
 473       return 0x40000000; /* MI */
 474     case TOK_Nclear:
 475       return 0x50000000; /* PL */
 476     case TOK_LT:
 477       return 0xB0000000; /* LT */
 478     case TOK_GE:
 479       return 0xA0000000; /* GE */
 480     case TOK_LE:
 481       return 0xD0000000; /* LE */
 482     case TOK_GT:
 483       return 0xC0000000; /* GT */
 484   }
 485   tcc_error("unexpected condition code");
 486   return 0xE0000000; /* AL */
 487 }
 488
 489 static int negcc(int cc)
 490 {
 491   switch(cc)
 492   {
 493     case TOK_ULT:
 494       return TOK_UGE;
 495     case TOK_UGE:
 496       return TOK_ULT;
 497     case TOK_EQ:
 498       return TOK_NE;
 499     case TOK_NE:
 500       return TOK_EQ;
 501     case TOK_ULE:
 502       return TOK_UGT;
 503     case TOK_UGT:
 504       return TOK_ULE;
 505     case TOK_Nset:
 506       return TOK_Nclear;
 507     case TOK_Nclear:
 508       return TOK_Nset;
 509     case TOK_LT:
 510       return TOK_GE;
 511     case TOK_GE:
 512       return TOK_LT;
 513     case TOK_LE:
 514       return TOK_GT;
 515     case TOK_GT:
 516       return TOK_LE;
 517   }
 518   tcc_error("unexpected condition code");
 519   return TOK_NE;
 520 }
 521
 522 /* load 'r' from value 'sv' */
 523 void load(int r, SValue *sv)
 524 {
 525   int v, ft, fc, fr, sign;
 526   uint32_t op;
 527   SValue v1;
 528
 529   fr = sv->r;
 530   ft = sv->type.t;
 531   fc = sv->c.ul;
 532
 533   if(fc>=0)
 534     sign=0;
 535   else {
 536     sign=1;
 537     fc=-fc;
 538   }
 539
 540   v = fr & VT_VALMASK;
 541   if (fr & VT_LVAL) {
 542     uint32_t base = 0xB; // fp
 543     if(fr & VT_TMP){
 544                         int size, align;
 545                         if((ft & VT_BTYPE) == VT_FUNC)
 546                                 size = PTR_SIZE;
 547                         else
 548                                 size = type_size(&sv->type, &align);
 549                         loc_stack(size, 0);
 550                 }
 551     if(v == VT_LLOCAL) {
 552       v1.type.t = VT_PTR;
 553       v1.r = VT_LOCAL | VT_LVAL;
 554       v1.c.ul = sv->c.ul;
 555       load(base=14 /* lr */, &v1);
 556       fc=sign=0;
 557       v=VT_LOCAL;
 558     } else if(v == VT_CONST) {
 559       v1.type.t = VT_PTR;
 560       v1.r = fr&~VT_LVAL;
 561       v1.c.ul = sv->c.ul;
 562       v1.sym=sv->sym;
 563       load(base=14, &v1);
 564       fc=sign=0;
 565       v=VT_LOCAL;
 566     } else if(v < VT_CONST) {
 567       base=intr(v);
 568       fc=sign=0;
 569       v=VT_LOCAL;
 570     }
 571     if(v == VT_LOCAL) {
 572       if(is_float(ft)) {
 573         calcaddr(&base,&fc,&sign,1020,2);
 574 #ifdef TCC_ARM_VFP
 575         op=0xED100A00; /* flds */
 576         if(!sign)
 577           op|=0x800000;
 578         if ((ft & VT_BTYPE) != VT_FLOAT)
 579           op|=0x100;   /* flds -> fldd */
 580         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 581 #else
 582         op=0xED100100;
 583         if(!sign)
 584           op|=0x800000;
 585 #if LDOUBLE_SIZE == 8
 586         if ((ft & VT_BTYPE) != VT_FLOAT)
 587           op|=0x8000;
 588 #else
 589         if ((ft & VT_BTYPE) == VT_DOUBLE)
 590           op|=0x8000;
 591         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 592           op|=0x400000;
 593 #endif
 594         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 595 #endif
 596       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 597                 || (ft & VT_BTYPE) == VT_SHORT) {
 598         calcaddr(&base,&fc,&sign,255,0);
 599         op=0xE1500090;
 600         if ((ft & VT_BTYPE) == VT_SHORT)
 601           op|=0x20;
 602         if ((ft & VT_UNSIGNED) == 0)
 603           op|=0x40;
 604         if(!sign)
 605           op|=0x800000;
 606         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 607       } else {
 608         calcaddr(&base,&fc,&sign,4095,0);
 609         op=0xE5100000;
 610         if(!sign)
 611           op|=0x800000;
 612         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 613           op|=0x400000;
 614         o(op|(intr(r)<<12)|fc|(base<<16));
 615       }
 616       return;
 617     }
 618   } else {
 619     if (v == VT_CONST) {
 620       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
 621       if (fr & VT_SYM || !op) {
 622         o(0xE59F0000|(intr(r)<<12));
 623         o(0xEA000000);
 624         if(fr & VT_SYM)
 625           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 626         o(sv->c.ul);
 627       } else
 628         o(op);
 629       return;
 630     } else if (v == VT_LOCAL) {
 631       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
 632       if (fr & VT_SYM || !op) {
 633         o(0xE59F0000|(intr(r)<<12));
 634         o(0xEA000000);
 635         if(fr & VT_SYM) // needed ?
 636           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 637         o(sv->c.ul);
 638         o(0xE08B0000|(intr(r)<<12)|intr(r));
 639       } else
 640         o(op);
 641       return;
 642     } else if(v == VT_CMP) {
 643       o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
 644       o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
 645       return;
 646     } else if (v == VT_JMP || v == VT_JMPI) {
 647       int t;
 648       t = v & 1;
 649       o(0xE3A00000|(intr(r)<<12)|t);
 650       o(0xEA000000);
 651       gsym(sv->c.ul);
 652       o(0xE3A00000|(intr(r)<<12)|(t^1));
 653       return;
 654     } else if (v < VT_CONST) {
 655       if(is_float(ft))
 656 #ifdef TCC_ARM_VFP
 657         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 658 #else
 659         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 660 #endif
 661       else
 662         o(0xE1A00000|(intr(r)<<12)|intr(v));
 663       return;
 664     }
 665   }
 666   tcc_error("load unimplemented!");
 667 }
 668
 669 /* store register 'r' in lvalue 'v' */
 670 void store(int r, SValue *sv)
 671 {
 672   SValue v1;
 673   int v, ft, fc, fr, sign;
 674   uint32_t op;
 675
 676   fr = sv->r;
 677   ft = sv->type.t;
 678   fc = sv->c.ul;
 679
 680   if(fc>=0)
 681     sign=0;
 682   else {
 683     sign=1;
 684     fc=-fc;
 685   }
 686
 687   v = fr & VT_VALMASK;
 688   if (fr & VT_LVAL || fr == VT_LOCAL) {
 689     uint32_t base = 0xb;
 690     if(v < VT_CONST) {
 691       base=intr(v);
 692       v=VT_LOCAL;
 693       fc=sign=0;
 694     } else if(v == VT_CONST) {
 695       v1.type.t = ft;
 696       v1.r = fr&~VT_LVAL;
 697       v1.c.ul = sv->c.ul;
 698       v1.sym=sv->sym;
 699       load(base=14, &v1);
 700       fc=sign=0;
 701       v=VT_LOCAL;
 702     }
 703     if(v == VT_LOCAL) {
 704        if(is_float(ft)) {
 705         calcaddr(&base,&fc,&sign,1020,2);
 706 #ifdef TCC_ARM_VFP
 707         op=0xED000A00; /* fsts */
 708         if(!sign)
 709           op|=0x800000;
 710         if ((ft & VT_BTYPE) != VT_FLOAT)
 711           op|=0x100;   /* fsts -> fstd */
 712         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 713 #else
 714         op=0xED000100;
 715         if(!sign)
 716           op|=0x800000;
 717 #if LDOUBLE_SIZE == 8
 718         if ((ft & VT_BTYPE) != VT_FLOAT)
 719           op|=0x8000;
 720 #else
 721         if ((ft & VT_BTYPE) == VT_DOUBLE)
 722           op|=0x8000;
 723         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 724           op|=0x400000;
 725 #endif
 726         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 727 #endif
 728         return;
 729       } else if((ft & VT_BTYPE) == VT_SHORT) {
 730         calcaddr(&base,&fc,&sign,255,0);
 731         op=0xE14000B0;
 732         if(!sign)
 733           op|=0x800000;
 734         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 735       } else {
 736         calcaddr(&base,&fc,&sign,4095,0);
 737         op=0xE5000000;
 738         if(!sign)
 739           op|=0x800000;
 740         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 741           op|=0x400000;
 742         o(op|(intr(r)<<12)|fc|(base<<16));
 743       }
 744       return;
 745     }
 746   }
 747   tcc_error("store unimplemented");
 748 }
 749
 750 static void gadd_sp(int val)
 751 {
 752   stuff_const_harder(0xE28DD000,val);
 753 }
 754
 755 /* 'is_jmp' is '1' if it is a jump */
 756 static void gcall_or_jmp(int is_jmp)
 757 {
 758   int r;
 759   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 760     uint32_t x;
 761     /* constant case */
 762     x=encbranch(ind,ind+vtop->c.ul,0);
 763     if(x) {
 764       if (vtop->r & VT_SYM) {
 765         /* relocation case */
 766         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 767       } else
 768         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 769       o(x|(is_jmp?0xE0000000:0xE1000000));
 770     } else {
 771       if(!is_jmp)
 772         o(0xE28FE004); // add lr,pc,#4
 773       o(0xE51FF004);   // ldr pc,[pc,#-4]
 774       if (vtop->r & VT_SYM)
 775         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 776       o(vtop->c.ul);
 777     }
 778   } else {
 779     /* otherwise, indirect call */
 780     r = gv(RC_INT);
 781     if(!is_jmp)
 782       o(0xE1A0E00F);       // mov lr,pc
 783     o(0xE1A0F000|intr(r)); // mov pc,r
 784   }
 785 }
 786
 787 /* Return whether a structure is an homogeneous float aggregate or not.
 788    The answer is true if all the elements of the structure are of the same
 789    primitive float type and there is less than 4 elements.
 790
 791    type: the type corresponding to the structure to be tested */
 792 static int is_hgen_float_aggr(CType *type)
 793 {
 794   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 795     struct Sym *ref;
 796     int btype, nb_fields = 0;
 797
 798     ref = type->ref->next;
 799     btype = ref->type.t & VT_BTYPE;
 800     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 801       for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 802       return !ref && nb_fields <= 4;
 803     }
 804   }
 805   return 0;
 806 }
 807
 808 struct avail_regs {
 809   signed char avail[3]; /* 3 holes max with only float and double alignments */
 810   int first_hole; /* first available hole */
 811   int last_hole; /* last available hole (none if equal to first_hole) */
 812   int first_free_reg; /* next free register in the sequence, hole excluded */
 813 };
 814
 815 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 816
 817 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 818    param) according to the rules described in the procedure call standard for
 819    the ARM architecture (AAPCS). If found, the registers are assigned to this
 820    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 821    and the parameter is a single float.
 822
 823    avregs: opaque structure to keep track of available VFP co-processor regs
 824    align: alignment contraints for the param, as returned by type_size()
 825    size: size of the parameter, as returned by type_size() */
 826 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 827 {
 828   int first_reg = 0;
 829
 830   if (avregs->first_free_reg == -1)
 831     return -1;
 832   if (align >> 3) { /* double alignment */
 833     first_reg = avregs->first_free_reg;
 834     /* alignment contraint not respected so use next reg and record hole */
 835     if (first_reg & 1)
 836       avregs->avail[avregs->last_hole++] = first_reg++;
 837   } else { /* no special alignment (float or array of float) */
 838     /* if single float and a hole is available, assign the param to it */
 839     if (size == 4 && avregs->first_hole != avregs->last_hole)
 840       return avregs->avail[avregs->first_hole++];
 841     else
 842       first_reg = avregs->first_free_reg;
 843   }
 844   if (first_reg + size / 4 <= 16) {
 845     avregs->first_free_reg = first_reg + size / 4;
 846     return first_reg;
 847   }
 848   avregs->first_free_reg = -1;
 849   return -1;
 850 }
 851
 852 /* Returns whether all params need to be passed in core registers or not.
 853    This is the case for function part of the runtime ABI. */
 854 int floats_in_core_regs(SValue *sval)
 855 {
 856   if (!sval->sym)
 857     return 0;
 858
 859   switch (sval->sym->v) {
 860     case TOK___floatundisf:
 861     case TOK___floatundidf:
 862     case TOK___fixunssfdi:
 863     case TOK___fixunsdfdi:
 864 #ifndef TCC_ARM_VFP
 865     case TOK___fixunsxfdi:
 866 #endif
 867     case TOK___floatdisf:
 868     case TOK___floatdidf:
 869     case TOK___fixsfdi:
 870     case TOK___fixdfdi:
 871       return 1;
 872
 873     default:
 874       return 0;
 875   }
 876 }
 877
 878 /* Return the number of registers needed to return the struct, or 0 if
 879    returning via struct pointer. */
 880 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align) {
 881 #ifdef TCC_ARM_EABI
 882     int size, align;
 883     size = type_size(vt, &align);
 884     if (float_abi == ARM_HARD_FLOAT && !variadic &&
 885         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
 886         *ret_align = 8;
 887         ret->ref = NULL;
 888         ret->t = VT_DOUBLE;
 889         return (size + 7) >> 3;
 890     } else if (size <= 4) {
 891         *ret_align = 4;
 892         ret->ref = NULL;
 893         ret->t = VT_INT;
 894         return 1;
 895     } else
 896         return 0;
 897 #else
 898     return 0;
 899 #endif
 900 }
 901
 902 /* Parameters are classified according to how they are copied to their final
 903    destination for the function call. Because the copying is performed class
 904    after class according to the order in the union below, it is important that
 905    some constraints about the order of the members of this union are respected:
 906    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 907    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 908      VFP_STRUCT_CLASS;
 909    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 910    See the comment for the main loop in copy_params() for the reason. */
 911 enum reg_class {
 912         STACK_CLASS = 0,
 913         CORE_STRUCT_CLASS,
 914         VFP_CLASS,
 915         VFP_STRUCT_CLASS,
 916         CORE_CLASS,
 917         NB_CLASSES
 918 };
 919
 920 struct param_plan {
 921     int start; /* first reg or addr used depending on the class */
 922     int end; /* last reg used or next free addr depending on the class */
 923     SValue *sval; /* pointer to SValue on the value stack */
 924     struct param_plan *prev; /*  previous element in this class */
 925 };
 926
 927 struct plan {
 928     struct param_plan *pplans; /* array of all the param plans */
 929     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
 930 };
 931
 932 #define add_param_plan(plan,pplan,class)                        \
 933     do {                                                        \
 934         pplan.prev = plan->clsplans[class];                     \
 935         plan->pplans[plan ## _nb] = pplan;                      \
 936         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
 937     } while(0)
 938
 939 /* Assign parameters to registers and stack with alignment according to the
 940    rules in the procedure call standard for the ARM architecture (AAPCS).
 941    The overall assignment is recorded in an array of per parameter structures
 942    called parameter plans. The parameter plans are also further organized in a
 943    number of linked lists, one per class of parameter (see the comment for the
 944    definition of union reg_class).
 945
 946    nb_args: number of parameters of the function for which a call is generated
 947    float_abi: float ABI in use for this function call
 948    plan: the structure where the overall assignment is recorded
 949    todo: a bitmap that record which core registers hold a parameter
 950
 951    Returns the amount of stack space needed for parameter passing
 952
 953    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 954    is the responsibility of the caller to free this array once used (ie not
 955    before copy_params). */
 956 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
 957 {
 958   int i, size, align;
 959   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
 960   int plan_nb = 0;
 961   struct param_plan pplan;
 962   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 963
 964   ncrn = nsaa = 0;
 965   *todo = 0;
 966   plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 967   memset(plan->clsplans, 0, sizeof(plan->clsplans));
 968   for(i = nb_args; i-- ;) {
 969     int j, start_vfpreg = 0;
 970     CType type = vtop[-i].type;
 971     type.t &= ~VT_ARRAY;
 972     size = type_size(&type, &align);
 973     size = (size + 3) & ~3;
 974     align = (align + 3) & ~3;
 975     switch(vtop[-i].type.t & VT_BTYPE) {
 976       case VT_STRUCT:
 977       case VT_FLOAT:
 978       case VT_DOUBLE:
 979       case VT_LDOUBLE:
 980       if (float_abi == ARM_HARD_FLOAT) {
 981         int is_hfa = 0; /* Homogeneous float aggregate */
 982
 983         if (is_float(vtop[-i].type.t)
 984             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
 985           int end_vfpreg;
 986
 987           start_vfpreg = assign_vfpreg(&avregs, align, size);
 988           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
 989           if (start_vfpreg >= 0) {
 990             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
 991             if (is_hfa)
 992               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
 993             else
 994               add_param_plan(plan, pplan, VFP_CLASS);
 995             continue;
 996           } else
 997             break;
 998         }
 999       }
1000       ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
1001       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
1002         /* The parameter is allocated both in core register and on stack. As
1003          * such, it can be of either class: it would either be the last of
1004          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
1005         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
1006           *todo|=(1<<j);
1007         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
1008         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1009         ncrn += size/4;
1010         if (ncrn > 4)
1011           nsaa = (ncrn - 4) * 4;
1012       } else {
1013         ncrn = 4;
1014         break;
1015       }
1016       continue;
1017       default:
1018       if (ncrn < 4) {
1019         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1020
1021         if (is_long) {
1022           ncrn = (ncrn + 1) & -2;
1023           if (ncrn == 4)
1024             break;
1025         }
1026         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1027         ncrn++;
1028         if (is_long)
1029           pplan.end = ncrn++;
1030         add_param_plan(plan, pplan, CORE_CLASS);
1031         continue;
1032       }
1033     }
1034     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1035     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1036     add_param_plan(plan, pplan, STACK_CLASS);
1037     nsaa += size; /* size already rounded up before */
1038   }
1039   return nsaa;
1040 }
1041
1042 #undef add_param_plan
1043
1044 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1045    function call.
1046
1047    nb_args: number of parameters the function take
1048    plan: the overall assignment plan for parameters
1049    todo: a bitmap indicating what core reg will hold a parameter
1050
1051    Returns the number of SValue added by this function on the value stack */
1052 static int copy_params(int nb_args, struct plan *plan, int todo)
1053 {
1054   int size, align, r, i, nb_extra_sval = 0;
1055   struct param_plan *pplan;
1056
1057    /* Several constraints require parameters to be copied in a specific order:
1058       - structures are copied to the stack before being loaded in a reg;
1059       - floats loaded to an odd numbered VFP reg are first copied to the
1060         preceding even numbered VFP reg and then moved to the next VFP reg.
1061
1062       It is thus important that:
1063       - structures assigned to core regs must be copied after parameters
1064         assigned to the stack but before structures assigned to VFP regs because
1065         a structure can lie partly in core registers and partly on the stack;
1066       - parameters assigned to the stack and all structures be copied before
1067         parameters assigned to a core reg since copying a parameter to the stack
1068         require using a core reg;
1069       - parameters assigned to VFP regs be copied before structures assigned to
1070         VFP regs as the copy might use an even numbered VFP reg that already
1071         holds part of a structure. */
1072   for(i = 0; i < NB_CLASSES; i++) {
1073     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1074       vpushv(pplan->sval);
1075       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1076       switch(i) {
1077         case STACK_CLASS:
1078         case CORE_STRUCT_CLASS:
1079         case VFP_STRUCT_CLASS:
1080           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1081             int padding = 0;
1082             size = type_size(&pplan->sval->type, &align);
1083             /* align to stack align size */
1084             size = (size + 3) & ~3;
1085             if (i == STACK_CLASS && pplan->prev)
1086               padding = pplan->start - pplan->prev->end;
1087             size += padding; /* Add padding if any */
1088             /* allocate the necessary size on stack */
1089             gadd_sp(-size);
1090             /* generate structure store */
1091             r = get_reg(RC_INT);
1092             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1093             vset(&vtop->type, r | VT_LVAL, 0);
1094             vswap();
1095             vstore(); /* memcpy to current sp + potential padding */
1096
1097             /* Homogeneous float aggregate are loaded to VFP registers
1098                immediately since there is no way of loading data in multiple
1099                non consecutive VFP registers as what is done for other
1100                structures (see the use of todo). */
1101             if (i == VFP_STRUCT_CLASS) {
1102               int first = pplan->start, nb = pplan->end - first + 1;
1103               /* vpop.32 {pplan->start, ..., pplan->end} */
1104               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1105               /* No need to write the register used to a SValue since VFP regs
1106                  cannot be used for gcall_or_jmp */
1107             }
1108           } else {
1109             if (is_float(pplan->sval->type.t)) {
1110 #ifdef TCC_ARM_VFP
1111               r = vfpr(gv(RC_FLOAT)) << 12;
1112               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1113                 size = 4;
1114               else {
1115                 size = 8;
1116                 r |= 0x101; /* vpush.32 -> vpush.64 */
1117               }
1118               o(0xED2D0A01 + r); /* vpush */
1119 #else
1120               r = fpr(gv(RC_FLOAT)) << 12;
1121               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1122                 size = 4;
1123               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1124                 size = 8;
1125               else
1126                 size = LDOUBLE_SIZE;
1127
1128               if (size == 12)
1129                 r |= 0x400000;
1130               else if(size == 8)
1131                 r|=0x8000;
1132
1133               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1134 #endif
1135             } else {
1136               /* simple type (currently always same size) */
1137               /* XXX: implicit cast ? */
1138               size=4;
1139               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1140                 lexpand_nr();
1141                 size = 8;
1142                 r = gv(RC_INT);
1143                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1144                 vtop--;
1145               }
1146               r = gv(RC_INT);
1147               o(0xE52D0004|(intr(r)<<12)); /* push r */
1148             }
1149             if (i == STACK_CLASS && pplan->prev)
1150               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1151           }
1152           break;
1153
1154         case VFP_CLASS:
1155           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1156           if (pplan->start & 1) { /* Must be in upper part of double register */
1157             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1158             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1159           }
1160           break;
1161
1162         case CORE_CLASS:
1163           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1164             lexpand_nr();
1165             gv(regmask(pplan->end));
1166             pplan->sval->r2 = vtop->r;
1167             vtop--;
1168           }
1169           gv(regmask(pplan->start));
1170           /* Mark register as used so that gcall_or_jmp use another one
1171              (regs >=4 are free as never used to pass parameters) */
1172           pplan->sval->r = vtop->r;
1173           break;
1174       }
1175       vtop--;
1176     }
1177   }
1178
1179   /* Manually free remaining registers since next parameters are loaded
1180    * manually, without the help of gv(int). */
1181   save_regs(nb_args);
1182
1183   if(todo) {
1184     o(0xE8BD0000|todo); /* pop {todo} */
1185     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1186       int r;
1187       pplan->sval->r = pplan->start;
1188       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1189          can occupy more than 2 registers. Thus, we need to push on the value
1190          stack some fake parameter to have on SValue for each registers used
1191          by a structure (r2 is not used). */
1192       for (r = pplan->start + 1; r <= pplan->end; r++) {
1193         if (todo & (1 << r)) {
1194           nb_extra_sval++;
1195           vpushi(0);
1196           vtop->r = r;
1197         }
1198       }
1199     }
1200   }
1201   return nb_extra_sval;
1202 }
1203
1204 /* Generate function call. The function address is pushed first, then
1205    all the parameters in call order. This functions pops all the
1206    parameters and the function address. */
1207 void gfunc_call(int nb_args)
1208 {
1209   int r, args_size;
1210   int variadic, def_float_abi = float_abi;
1211   int todo;
1212   struct plan plan;
1213
1214 #ifdef TCC_ARM_EABI
1215   if (float_abi == ARM_HARD_FLOAT) {
1216     variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1217     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1218       float_abi = ARM_SOFTFP_FLOAT;
1219   }
1220 #endif
1221   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1222      VT_JMP anywhere except on the top of the stack because it would complicate
1223      the code generator. */
1224   r = vtop->r & VT_VALMASK;
1225   if (r == VT_CMP || (r & ~1) == VT_JMP)
1226     gv(RC_INT);
1227
1228   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1229
1230 #ifdef TCC_ARM_EABI
1231   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1232     args_size = (args_size + 7) & ~7;
1233     o(0xE24DD004); /* sub sp, sp, #4 */
1234   }
1235 #endif
1236
1237   nb_args += copy_params(nb_args, &plan, todo);
1238   tcc_free(plan.pplans);
1239
1240   /* Move fct SValue on top as required by gcall_or_jmp */
1241   vrotb(nb_args + 1);
1242   gcall_or_jmp(0);
1243   if (args_size)
1244       gadd_sp(args_size); /* pop all parameters passed on the stack */
1245 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1246   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1247     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1248       o(0xEE000A10); /*vmov s0, r0 */
1249     } else {
1250       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1251       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1252     }
1253   }
1254 #endif
1255   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1256   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1257   float_abi = def_float_abi;
1258 }
1259
1260 /* generate function prolog of type 't' */
1261 void gfunc_prolog(CType *func_type)
1262 {
1263   Sym *sym,*sym2;
1264   int n, nf, size, align, struct_ret = 0;
1265   int addr, pn, sn; /* pn=core, sn=stack */
1266   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1267   CType ret_type;
1268
1269   sym = func_type->ref;
1270   func_vt = sym->type;
1271   func_var = (func_type->ref->c == FUNC_ELLIPSIS);
1272
1273   n = nf = 0;
1274   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1275       !gfunc_sret(&func_vt, func_var, &ret_type, &align))
1276   {
1277     n++;
1278     struct_ret = 1;
1279     func_vc = 12; /* Offset from fp of the place to store the result */
1280   }
1281   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1282     size = type_size(&sym2->type, &align);
1283 #ifdef TCC_ARM_EABI
1284     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1285         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1286       int tmpnf = assign_vfpreg(&avregs, align, size);
1287       tmpnf += (size + 3) / 4;
1288       nf = (tmpnf > nf) ? tmpnf : nf;
1289     } else
1290 #endif
1291     if (n < 4)
1292       n += (size + 3) / 4;
1293   }
1294   o(0xE1A0C00D); /* mov ip,sp */
1295   if (func_var)
1296     n=4;
1297   if (n) {
1298     if(n>4)
1299       n=4;
1300 #ifdef TCC_ARM_EABI
1301     n=(n+1)&-2;
1302 #endif
1303     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1304   }
1305   if (nf) {
1306     if (nf>16)
1307       nf=16;
1308     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1309     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1310   }
1311   o(0xE92D5800); /* save fp, ip, lr */
1312   o(0xE1A0B00D); /* mov fp, sp */
1313   func_sub_sp_offset = ind;
1314   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1315
1316 #ifdef TCC_ARM_EABI
1317   if (float_abi == ARM_HARD_FLOAT) {
1318     func_vc += nf * 4;
1319     avregs = AVAIL_REGS_INITIALIZER;
1320   }
1321 #endif
1322   pn = struct_ret, sn = 0;
1323   while ((sym = sym->next)) {
1324     CType *type;
1325     type = &sym->type;
1326     size = type_size(type, &align);
1327     size = (size + 3) >> 2;
1328     align = (align + 3) & ~3;
1329 #ifdef TCC_ARM_EABI
1330     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1331         || is_hgen_float_aggr(&sym->type))) {
1332       int fpn = assign_vfpreg(&avregs, align, size << 2);
1333       if (fpn >= 0)
1334         addr = fpn * 4;
1335       else
1336         goto from_stack;
1337     } else
1338 #endif
1339     if (pn < 4) {
1340 #ifdef TCC_ARM_EABI
1341         pn = (pn + (align-1)/4) & -(align/4);
1342 #endif
1343       addr = (nf + pn) * 4;
1344       pn += size;
1345       if (!sn && pn > 4)
1346         sn = (pn - 4);
1347     } else {
1348 from_stack:
1349 #ifdef TCC_ARM_EABI
1350         sn = (sn + (align-1)/4) & -(align/4);
1351 #endif
1352       addr = (n + nf + sn) * 4;
1353       sn += size;
1354     }
1355     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1356              addr + 12);
1357   }
1358   last_itod_magic=0;
1359   leaffunc = 1;
1360   loc = 0;
1361 }
1362
1363 /* generate function epilog */
1364 void gfunc_epilog(void)
1365 {
1366   uint32_t x;
1367   int diff;
1368   /* Copy float return value to core register if base standard is used and
1369      float computation is made with VFP */
1370 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1371   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1372     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1373       o(0xEE100A10); /* fmrs r0, s0 */
1374     else {
1375       o(0xEE100B10); /* fmrdl r0, d0 */
1376       o(0xEE301B10); /* fmrdh r1, d0 */
1377     }
1378   }
1379 #endif
1380   o(0xE89BA800); /* restore fp, sp, pc */
1381   diff = (-loc + 3) & -4;
1382 #ifdef TCC_ARM_EABI
1383   if(!leaffunc)
1384     diff = ((diff + 11) & -8) - 4;
1385 #endif
1386   if(diff > 0) {
1387     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1388     if(x)
1389       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1390     else {
1391       int addr;
1392       addr=ind;
1393       o(0xE59FC004); /* ldr ip,[pc+4] */
1394       o(0xE04BD00C); /* sub sp,fp,ip  */
1395       o(0xE1A0F00E); /* mov pc,lr */
1396       o(diff);
1397       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1398     }
1399   }
1400 }
1401
1402 /* generate a jump to a label */
1403 int gjmp(int t)
1404 {
1405   int r;
1406   r=ind;
1407   o(0xE0000000|encbranch(r,t,1));
1408   return r;
1409 }
1410
1411 /* generate a jump to a fixed address */
1412 void gjmp_addr(int a)
1413 {
1414   gjmp(a);
1415 }
1416
1417 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1418 int gtst(int inv, int t)
1419 {
1420         int v, r;
1421         uint32_t op;
1422         v = vtop->r & VT_VALMASK;
1423         r=ind;
1424         if (v == VT_CMP) {
1425                 op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1426                 op|=encbranch(r,t,1);
1427                 o(op);
1428                 t=r;
1429         } else if (v == VT_JMP || v == VT_JMPI) {
1430                 if ((v & 1) == inv) {
1431                         if(!vtop->c.i)
1432                                 vtop->c.i=t;
1433                         else {
1434                                 uint32_t *x;
1435                                 int p,lp;
1436                                 if(t) {
1437                                         p = vtop->c.i;
1438                                         do {
1439                                                 p = decbranch(lp=p);
1440                                         } while(p);
1441                                         x = (uint32_t *)(cur_text_section->data + lp);
1442                                         *x &= 0xff000000;
1443                                         *x |= encbranch(lp,t,1);
1444                                 }
1445                         t = vtop->c.i;
1446                         }
1447                 } else {
1448                 t = gjmp(t);
1449                 gsym(vtop->c.i);
1450     }
1451         } else {
1452                 if (is_float(vtop->type.t)) {
1453                   r=gv(RC_FLOAT);
1454 #ifdef TCC_ARM_VFP
1455                         o(0xEEB50A40|(vfpr(r)<<12)|T2CPR(vtop->type.t)); /* fcmpzX */
1456                         o(0xEEF1FA10); /* fmstat */
1457 #else
1458                         o(0xEE90F118|(fpr(r)<<16));
1459 #endif
1460                         vtop->r = VT_CMP;
1461                         vtop->c.i = TOK_NE;
1462                         return gtst(inv, t);
1463                 } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1464                         /* constant jmp optimization */
1465                         if ((vtop->c.i != 0) != inv)
1466                         t = gjmp(t);
1467                 } else {
1468                           v = gv(RC_INT);
1469                           o(0xE3300000|(intr(v)<<16));
1470                           vtop->r = VT_CMP;
1471                           vtop->c.i = TOK_NE;
1472                           return gtst(inv, t);
1473                 }
1474   }
1475   vtop--;
1476   return t;
1477 }
1478
1479 /* generate an integer binary operation */
1480 void gen_opi(int op)
1481 {
1482   int c, func = 0;
1483   uint32_t opc = 0, r, fr;
1484   unsigned short retreg = REG_IRET;
1485
1486   c=0;
1487   switch(op) {
1488     case '+':
1489       opc = 0x8;
1490       c=1;
1491       break;
1492     case TOK_ADDC1: /* add with carry generation */
1493       opc = 0x9;
1494       c=1;
1495       break;
1496     case '-':
1497       opc = 0x4;
1498       c=1;
1499       break;
1500     case TOK_SUBC1: /* sub with carry generation */
1501       opc = 0x5;
1502       c=1;
1503       break;
1504     case TOK_ADDC2: /* add with carry use */
1505       opc = 0xA;
1506       c=1;
1507       break;
1508     case TOK_SUBC2: /* sub with carry use */
1509       opc = 0xC;
1510       c=1;
1511       break;
1512     case '&':
1513       opc = 0x0;
1514       c=1;
1515       break;
1516     case '^':
1517       opc = 0x2;
1518       c=1;
1519       break;
1520     case '|':
1521       opc = 0x18;
1522       c=1;
1523       break;
1524     case '*':
1525       gv2(RC_INT, RC_INT);
1526       r = vtop[-1].r;
1527       fr = vtop[0].r;
1528       vtop--;
1529       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1530       return;
1531     case TOK_SHL:
1532       opc = 0;
1533       c=2;
1534       break;
1535     case TOK_SHR:
1536       opc = 1;
1537       c=2;
1538       break;
1539     case TOK_SAR:
1540       opc = 2;
1541       c=2;
1542       break;
1543     case '/':
1544     case TOK_PDIV:
1545       func=TOK___divsi3;
1546       c=3;
1547       break;
1548     case TOK_UDIV:
1549       func=TOK___udivsi3;
1550       c=3;
1551       break;
1552     case '%':
1553 #ifdef TCC_ARM_EABI
1554       func=TOK___aeabi_idivmod;
1555       retreg=REG_LRET;
1556 #else
1557       func=TOK___modsi3;
1558 #endif
1559       c=3;
1560       break;
1561     case TOK_UMOD:
1562 #ifdef TCC_ARM_EABI
1563       func=TOK___aeabi_uidivmod;
1564       retreg=REG_LRET;
1565 #else
1566       func=TOK___umodsi3;
1567 #endif
1568       c=3;
1569       break;
1570     case TOK_UMULL:
1571       gv2(RC_INT, RC_INT);
1572       r=intr(vtop[-1].r2=get_reg(RC_INT));
1573       c=vtop[-1].r;
1574       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1575       vtop--;
1576       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1577       return;
1578     default:
1579       opc = 0x15;
1580       c=1;
1581       break;
1582   }
1583   switch(c) {
1584     case 1:
1585       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1586         if(opc == 4 || opc == 5 || opc == 0xc) {
1587           vswap();
1588           opc|=2; // sub -> rsb
1589         }
1590       }
1591       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1592           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1593         gv(RC_INT);
1594       vswap();
1595       c=intr(gv(RC_INT));
1596       vswap();
1597       opc=0xE0000000|(opc<<20)|(c<<16);
1598       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1599         uint32_t x;
1600         x=stuff_const(opc|0x2000000,vtop->c.i);
1601         if(x) {
1602           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1603           o(x|(r<<12));
1604           goto done;
1605         }
1606       }
1607       fr=intr(gv(RC_INT));
1608       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1609       o(opc|(r<<12)|fr);
1610 done:
1611       vtop--;
1612       if (op >= TOK_ULT && op <= TOK_GT) {
1613         vtop->r = VT_CMP;
1614         vtop->c.i = op;
1615       }
1616       break;
1617     case 2:
1618       opc=0xE1A00000|(opc<<5);
1619       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1620           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1621         gv(RC_INT);
1622       vswap();
1623       r=intr(gv(RC_INT));
1624       vswap();
1625       opc|=r;
1626       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1627         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1628         c = vtop->c.i & 0x1f;
1629         o(opc|(c<<7)|(fr<<12));
1630       } else {
1631         fr=intr(gv(RC_INT));
1632         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1633         o(opc|(c<<12)|(fr<<8)|0x10);
1634       }
1635       vtop--;
1636       break;
1637     case 3:
1638       vpush_global_sym(&func_old_type, func);
1639       vrott(3);
1640       gfunc_call(2);
1641       vpushi(0);
1642       vtop->r = retreg;
1643       break;
1644     default:
1645       tcc_error("gen_opi %i unimplemented!",op);
1646   }
1647 }
1648
1649 #ifdef TCC_ARM_VFP
1650 static int is_zero(int i)
1651 {
1652   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1653     return 0;
1654   if (vtop[i].type.t == VT_FLOAT)
1655     return (vtop[i].c.f == 0.f);
1656   else if (vtop[i].type.t == VT_DOUBLE)
1657     return (vtop[i].c.d == 0.0);
1658   return (vtop[i].c.ld == 0.l);
1659 }
1660
1661 /* generate a floating point operation 'v = t1 op t2' instruction. The
1662  *    two operands are guaranted to have the same floating point type */
1663 void gen_opf(int op)
1664 {
1665   uint32_t x;
1666   int fneg=0,r;
1667   x=0xEE000A00|T2CPR(vtop->type.t);
1668   switch(op) {
1669     case '+':
1670       if(is_zero(-1))
1671         vswap();
1672       if(is_zero(0)) {
1673         vtop--;
1674         return;
1675       }
1676       x|=0x300000;
1677       break;
1678     case '-':
1679       x|=0x300040;
1680       if(is_zero(0)) {
1681         vtop--;
1682         return;
1683       }
1684       if(is_zero(-1)) {
1685         x|=0x810000; /* fsubX -> fnegX */
1686         vswap();
1687         vtop--;
1688         fneg=1;
1689       }
1690       break;
1691     case '*':
1692       x|=0x200000;
1693       break;
1694     case '/':
1695       x|=0x800000;
1696       break;
1697     default:
1698       if(op < TOK_ULT || op > TOK_GT) {
1699         tcc_error("unknown fp op %x!",op);
1700         return;
1701       }
1702       if(is_zero(-1)) {
1703         vswap();
1704         switch(op) {
1705           case TOK_LT: op=TOK_GT; break;
1706           case TOK_GE: op=TOK_ULE; break;
1707           case TOK_LE: op=TOK_GE; break;
1708           case TOK_GT: op=TOK_ULT; break;
1709         }
1710       }
1711       x|=0xB40040; /* fcmpX */
1712       if(op!=TOK_EQ && op!=TOK_NE)
1713         x|=0x80; /* fcmpX -> fcmpeX */
1714       if(is_zero(0)) {
1715         vtop--;
1716         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1717       } else {
1718         x|=vfpr(gv(RC_FLOAT));
1719         vswap();
1720         o(x|(vfpr(gv(RC_FLOAT))<<12));
1721         vtop--;
1722       }
1723       o(0xEEF1FA10); /* fmstat */
1724
1725       switch(op) {
1726         case TOK_LE: op=TOK_ULE; break;
1727         case TOK_LT: op=TOK_ULT; break;
1728         case TOK_UGE: op=TOK_GE; break;
1729         case TOK_UGT: op=TOK_GT; break;
1730       }
1731
1732       vtop->r = VT_CMP;
1733       vtop->c.i = op;
1734       return;
1735   }
1736   r=gv(RC_FLOAT);
1737   x|=vfpr(r);
1738   r=regmask(r);
1739   if(!fneg) {
1740     int r2;
1741     vswap();
1742     r2=gv(RC_FLOAT);
1743     x|=vfpr(r2)<<16;
1744     r|=regmask(r2);
1745   }
1746   vtop->r=get_reg_ex(RC_FLOAT,r);
1747   if(!fneg)
1748     vtop--;
1749   o(x|(vfpr(vtop->r)<<12));
1750 }
1751
1752 #else
1753 static uint32_t is_fconst()
1754 {
1755   long double f;
1756   uint32_t r;
1757   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1758     return 0;
1759   if (vtop->type.t == VT_FLOAT)
1760     f = vtop->c.f;
1761   else if (vtop->type.t == VT_DOUBLE)
1762     f = vtop->c.d;
1763   else
1764     f = vtop->c.ld;
1765   if(!ieee_finite(f))
1766     return 0;
1767   r=0x8;
1768   if(f<0.0) {
1769     r=0x18;
1770     f=-f;
1771   }
1772   if(f==0.0)
1773     return r;
1774   if(f==1.0)
1775     return r|1;
1776   if(f==2.0)
1777     return r|2;
1778   if(f==3.0)
1779     return r|3;
1780   if(f==4.0)
1781     return r|4;
1782   if(f==5.0)
1783     return r|5;
1784   if(f==0.5)
1785     return r|6;
1786   if(f==10.0)
1787     return r|7;
1788   return 0;
1789 }
1790
1791 /* generate a floating point operation 'v = t1 op t2' instruction. The
1792    two operands are guaranted to have the same floating point type */
1793 void gen_opf(int op)
1794 {
1795   uint32_t x, r, r2, c1, c2;
1796   //fputs("gen_opf\n",stderr);
1797   vswap();
1798   c1 = is_fconst();
1799   vswap();
1800   c2 = is_fconst();
1801   x=0xEE000100;
1802 #if LDOUBLE_SIZE == 8
1803   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1804     x|=0x80;
1805 #else
1806   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1807     x|=0x80;
1808   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1809     x|=0x80000;
1810 #endif
1811   switch(op)
1812   {
1813     case '+':
1814       if(!c2) {
1815         vswap();
1816         c2=c1;
1817       }
1818       vswap();
1819       r=fpr(gv(RC_FLOAT));
1820       vswap();
1821       if(c2) {
1822         if(c2>0xf)
1823           x|=0x200000; // suf
1824         r2=c2&0xf;
1825       } else {
1826         r2=fpr(gv(RC_FLOAT));
1827       }
1828       break;
1829     case '-':
1830       if(c2) {
1831         if(c2<=0xf)
1832           x|=0x200000; // suf
1833         r2=c2&0xf;
1834         vswap();
1835         r=fpr(gv(RC_FLOAT));
1836         vswap();
1837       } else if(c1 && c1<=0xf) {
1838         x|=0x300000; // rsf
1839         r2=c1;
1840         r=fpr(gv(RC_FLOAT));
1841         vswap();
1842       } else {
1843         x|=0x200000; // suf
1844         vswap();
1845         r=fpr(gv(RC_FLOAT));
1846         vswap();
1847         r2=fpr(gv(RC_FLOAT));
1848       }
1849       break;
1850     case '*':
1851       if(!c2 || c2>0xf) {
1852         vswap();
1853         c2=c1;
1854       }
1855       vswap();
1856       r=fpr(gv(RC_FLOAT));
1857       vswap();
1858       if(c2 && c2<=0xf)
1859         r2=c2;
1860       else
1861         r2=fpr(gv(RC_FLOAT));
1862       x|=0x100000; // muf
1863       break;
1864     case '/':
1865       if(c2 && c2<=0xf) {
1866         x|=0x400000; // dvf
1867         r2=c2;
1868         vswap();
1869         r=fpr(gv(RC_FLOAT));
1870         vswap();
1871       } else if(c1 && c1<=0xf) {
1872         x|=0x500000; // rdf
1873         r2=c1;
1874         r=fpr(gv(RC_FLOAT));
1875         vswap();
1876       } else {
1877         x|=0x400000; // dvf
1878         vswap();
1879         r=fpr(gv(RC_FLOAT));
1880         vswap();
1881         r2=fpr(gv(RC_FLOAT));
1882       }
1883       break;
1884     default:
1885       if(op >= TOK_ULT && op <= TOK_GT) {
1886         x|=0xd0f110; // cmfe
1887 /* bug (intention?) in Linux FPU emulator
1888    doesn't set carry if equal */
1889         switch(op) {
1890           case TOK_ULT:
1891           case TOK_UGE:
1892           case TOK_ULE:
1893           case TOK_UGT:
1894             tcc_error("unsigned comparison on floats?");
1895             break;
1896           case TOK_LT:
1897             op=TOK_Nset;
1898             break;
1899           case TOK_LE:
1900             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1901             break;
1902           case TOK_EQ:
1903           case TOK_NE:
1904             x&=~0x400000; // cmfe -> cmf
1905             break;
1906         }
1907         if(c1 && !c2) {
1908           c2=c1;
1909           vswap();
1910           switch(op) {
1911             case TOK_Nset:
1912               op=TOK_GT;
1913               break;
1914             case TOK_GE:
1915               op=TOK_ULE;
1916               break;
1917             case TOK_ULE:
1918               op=TOK_GE;
1919               break;
1920             case TOK_GT:
1921               op=TOK_Nset;
1922               break;
1923           }
1924         }
1925         vswap();
1926         r=fpr(gv(RC_FLOAT));
1927         vswap();
1928         if(c2) {
1929           if(c2>0xf)
1930             x|=0x200000;
1931           r2=c2&0xf;
1932         } else {
1933           r2=fpr(gv(RC_FLOAT));
1934         }
1935         vtop[-1].r = VT_CMP;
1936         vtop[-1].c.i = op;
1937       } else {
1938         tcc_error("unknown fp op %x!",op);
1939         return;
1940       }
1941   }
1942   if(vtop[-1].r == VT_CMP)
1943     c1=15;
1944   else {
1945     c1=vtop->r;
1946     if(r2&0x8)
1947       c1=vtop[-1].r;
1948     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1949     c1=fpr(vtop[-1].r);
1950   }
1951   vtop--;
1952   o(x|(r<<16)|(c1<<12)|r2);
1953 }
1954 #endif
1955
1956 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1957    and 'long long' cases. */
1958 ST_FUNC void gen_cvt_itof1(int t)
1959 {
1960   uint32_t r, r2;
1961   int bt;
1962   bt=vtop->type.t & VT_BTYPE;
1963   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1964 #ifndef TCC_ARM_VFP
1965     uint32_t dsize = 0;
1966 #endif
1967     r=intr(gv(RC_INT));
1968 #ifdef TCC_ARM_VFP
1969     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1970     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1971     r2|=r2<<12;
1972     if(!(vtop->type.t & VT_UNSIGNED))
1973       r2|=0x80;                /* fuitoX -> fsituX */
1974     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1975 #else
1976     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1977     if((t & VT_BTYPE) != VT_FLOAT)
1978       dsize=0x80;    /* flts -> fltd */
1979     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1980     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1981       uint32_t off = 0;
1982       o(0xE3500000|(r<<12));        /* cmp */
1983       r=fpr(get_reg(RC_FLOAT));
1984       if(last_itod_magic) {
1985         off=ind+8-last_itod_magic;
1986         off/=4;
1987         if(off>255)
1988           off=0;
1989       }
1990       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1991       if(!off) {
1992         o(0xEA000000);              /* b */
1993         last_itod_magic=ind;
1994         o(0x4F800000);              /* 4294967296.0f */
1995       }
1996       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1997     }
1998 #endif
1999     return;
2000   } else if(bt == VT_LLONG) {
2001     int func;
2002     CType *func_type = 0;
2003     if((t & VT_BTYPE) == VT_FLOAT) {
2004       func_type = &func_float_type;
2005       if(vtop->type.t & VT_UNSIGNED)
2006         func=TOK___floatundisf;
2007       else
2008         func=TOK___floatdisf;
2009 #if LDOUBLE_SIZE != 8
2010     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
2011       func_type = &func_ldouble_type;
2012       if(vtop->type.t & VT_UNSIGNED)
2013         func=TOK___floatundixf;
2014       else
2015         func=TOK___floatdixf;
2016     } else if((t & VT_BTYPE) == VT_DOUBLE) {
2017 #else
2018     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
2019 #endif
2020       func_type = &func_double_type;
2021       if(vtop->type.t & VT_UNSIGNED)
2022         func=TOK___floatundidf;
2023       else
2024         func=TOK___floatdidf;
2025     }
2026     if(func_type) {
2027       vpush_global_sym(func_type, func);
2028       vswap();
2029       gfunc_call(1);
2030       vpushi(0);
2031       vtop->r=TREG_F0;
2032       return;
2033     }
2034   }
2035   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2036 }
2037
2038 /* convert fp to int 't' type */
2039 void gen_cvt_ftoi(int t)
2040 {
2041   uint32_t r, r2;
2042   int u, func = 0;
2043   u=t&VT_UNSIGNED;
2044   t&=VT_BTYPE;
2045   r2=vtop->type.t & VT_BTYPE;
2046   if(t==VT_INT) {
2047 #ifdef TCC_ARM_VFP
2048     r=vfpr(gv(RC_FLOAT));
2049     u=u?0:0x10000;
2050     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2051     r2=intr(vtop->r=get_reg(RC_INT));
2052     o(0xEE100A10|(r<<16)|(r2<<12));
2053     return;
2054 #else
2055     if(u) {
2056       if(r2 == VT_FLOAT)
2057         func=TOK___fixunssfsi;
2058 #if LDOUBLE_SIZE != 8
2059       else if(r2 == VT_LDOUBLE)
2060         func=TOK___fixunsxfsi;
2061       else if(r2 == VT_DOUBLE)
2062 #else
2063       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2064 #endif
2065         func=TOK___fixunsdfsi;
2066     } else {
2067       r=fpr(gv(RC_FLOAT));
2068       r2=intr(vtop->r=get_reg(RC_INT));
2069       o(0xEE100170|(r2<<12)|r);
2070       return;
2071     }
2072 #endif
2073   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2074     if(r2 == VT_FLOAT)
2075       func=TOK___fixsfdi;
2076 #if LDOUBLE_SIZE != 8
2077     else if(r2 == VT_LDOUBLE)
2078       func=TOK___fixxfdi;
2079     else if(r2 == VT_DOUBLE)
2080 #else
2081     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2082 #endif
2083       func=TOK___fixdfdi;
2084   }
2085   if(func) {
2086     vpush_global_sym(&func_old_type, func);
2087     vswap();
2088     gfunc_call(1);
2089     vpushi(0);
2090     if(t == VT_LLONG)
2091       vtop->r2 = REG_LRET;
2092     vtop->r = REG_IRET;
2093     return;
2094   }
2095   tcc_error("unimplemented gen_cvt_ftoi!");
2096 }
2097
2098 /* convert from one floating point type to another */
2099 void gen_cvt_ftof(int t)
2100 {
2101 #ifdef TCC_ARM_VFP
2102   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2103     uint32_t r = vfpr(gv(RC_FLOAT));
2104     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2105   }
2106 #else
2107   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2108   gv(RC_FLOAT);
2109 #endif
2110 }
2111
2112 /* computed goto support */
2113 void ggoto(void)
2114 {
2115   gcall_or_jmp(1);
2116   vtop--;
2117 }
2118
2119 /* Save the stack pointer onto the stack and return the location of its address */
2120 ST_FUNC void gen_vla_sp_save(int addr) {
2121     tcc_error("variable length arrays unsupported for this target");
2122 }
2123
2124 /* Restore the SP from a location on the stack */
2125 ST_FUNC void gen_vla_sp_restore(int addr) {
2126     tcc_error("variable length arrays unsupported for this target");
2127 }
2128
2129 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2130 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2131     tcc_error("variable length arrays unsupported for this target");
2132 }
2133
2134 /* end of ARM code generator */
2135 /*************************************************************/
2136 #endif
2137 /*************************************************************/