arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #ifdef TCC_ARM_EABI
  27 #ifndef TCC_ARM_VFP /* Avoid useless warning */
  28 #define TCC_ARM_VFP
  29 #endif
  30 #endif
  31
  32 /* number of available registers */
  33 #ifdef TCC_ARM_VFP
  34 #define NB_REGS            13
  35 #else
  36 #define NB_REGS             9
  37 #endif
  38
  39 #ifndef TCC_ARM_VERSION
  40 # define TCC_ARM_VERSION 5
  41 #endif
  42
  43 /* a register can belong to several classes. The classes must be
  44    sorted from more general to more precise (see gv2() code which does
  45    assumptions on it). */
  46 #define RC_INT     0x0001 /* generic integer register */
  47 #define RC_FLOAT   0x0002 /* generic float register */
  48 #define RC_R0      0x0004
  49 #define RC_R1      0x0008
  50 #define RC_R2      0x0010
  51 #define RC_R3      0x0020
  52 #define RC_R12     0x0040
  53 #define RC_F0      0x0080
  54 #define RC_F1      0x0100
  55 #define RC_F2      0x0200
  56 #define RC_F3      0x0400
  57 #ifdef TCC_ARM_VFP
  58 #define RC_F4      0x0800
  59 #define RC_F5      0x1000
  60 #define RC_F6      0x2000
  61 #define RC_F7      0x4000
  62 #endif
  63 #define RC_IRET    RC_R0  /* function return: integer register */
  64 #define RC_LRET    RC_R1  /* function return: second integer register */
  65 #define RC_FRET    RC_F0  /* function return: float register */
  66
  67 /* pretty names for the registers */
  68 enum {
  69     TREG_R0 = 0,
  70     TREG_R1,
  71     TREG_R2,
  72     TREG_R3,
  73     TREG_R12,
  74     TREG_F0,
  75     TREG_F1,
  76     TREG_F2,
  77     TREG_F3,
  78 #ifdef TCC_ARM_VFP
  79     TREG_F4,
  80     TREG_F5,
  81     TREG_F6,
  82     TREG_F7,
  83 #endif
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 /* ELF defines */
 134
 135 #define EM_TCC_TARGET EM_ARM
 136
 137 /* relocation type for 32 bit data relocation */
 138 #define R_DATA_32   R_ARM_ABS32
 139 #define R_DATA_PTR  R_ARM_ABS32
 140 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 141 #define R_COPY      R_ARM_COPY
 142
 143 #define ELF_START_ADDR 0x00008000
 144 #define ELF_PAGE_SIZE  0x1000
 145
 146 /******************************************************/
 147 #else /* ! TARGET_DEFS_ONLY */
 148 /******************************************************/
 149 #include "tcc.h"
 150
 151 ST_DATA const int reg_classes[NB_REGS] = {
 152     /* r0 */ RC_INT | RC_R0,
 153     /* r1 */ RC_INT | RC_R1,
 154     /* r2 */ RC_INT | RC_R2,
 155     /* r3 */ RC_INT | RC_R3,
 156     /* r12 */ RC_INT | RC_R12,
 157     /* f0 */ RC_FLOAT | RC_F0,
 158     /* f1 */ RC_FLOAT | RC_F1,
 159     /* f2 */ RC_FLOAT | RC_F2,
 160     /* f3 */ RC_FLOAT | RC_F3,
 161 #ifdef TCC_ARM_VFP
 162  /* d4/s8 */ RC_FLOAT | RC_F4,
 163 /* d5/s10 */ RC_FLOAT | RC_F5,
 164 /* d6/s12 */ RC_FLOAT | RC_F6,
 165 /* d7/s14 */ RC_FLOAT | RC_F7,
 166 #endif
 167 };
 168
 169 static int func_sub_sp_offset, last_itod_magic;
 170 static int leaffunc;
 171
 172 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 173 static CType float_type, double_type, func_float_type, func_double_type;
 174 ST_FUNC void arm_init_types(void)
 175 {
 176     float_type.t = VT_FLOAT;
 177     double_type.t = VT_DOUBLE;
 178     func_float_type.t = VT_FUNC;
 179     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 180     func_double_type.t = VT_FUNC;
 181     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 182 }
 183 #else
 184 #define func_float_type func_old_type
 185 #define func_double_type func_old_type
 186 #define func_ldouble_type func_old_type
 187 ST_FUNC void arm_init_types(void) {}
 188 #endif
 189
 190 static int two2mask(int a,int b) {
 191   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 192 }
 193
 194 static int regmask(int r) {
 195   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 196 }
 197
 198 /******************************************************/
 199
 200 void o(uint32_t i)
 201 {
 202   /* this is a good place to start adding big-endian support*/
 203   int ind1;
 204
 205   ind1 = ind + 4;
 206   if (!cur_text_section)
 207     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 208          "can't evaluate constant expressions outside of a function.");
 209   if (ind1 > cur_text_section->data_allocated)
 210     section_realloc(cur_text_section, ind1);
 211   cur_text_section->data[ind++] = i&255;
 212   i>>=8;
 213   cur_text_section->data[ind++] = i&255;
 214   i>>=8;
 215   cur_text_section->data[ind++] = i&255;
 216   i>>=8;
 217   cur_text_section->data[ind++] = i;
 218 }
 219
 220 static uint32_t stuff_const(uint32_t op, uint32_t c)
 221 {
 222   int try_neg=0;
 223   uint32_t nc = 0, negop = 0;
 224
 225   switch(op&0x1F00000)
 226   {
 227     case 0x800000: //add
 228     case 0x400000: //sub
 229       try_neg=1;
 230       negop=op^0xC00000;
 231       nc=-c;
 232       break;
 233     case 0x1A00000: //mov
 234     case 0x1E00000: //mvn
 235       try_neg=1;
 236       negop=op^0x400000;
 237       nc=~c;
 238       break;
 239     case 0x200000: //xor
 240       if(c==~0)
 241         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 242       break;
 243     case 0x0: //and
 244       if(c==~0)
 245         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 246     case 0x1C00000: //bic
 247       try_neg=1;
 248       negop=op^0x1C00000;
 249       nc=~c;
 250       break;
 251     case 0x1800000: //orr
 252       if(c==~0)
 253         return (op&0xFFF0FFFF)|0x1E00000;
 254       break;
 255   }
 256   do {
 257     uint32_t m;
 258     int i;
 259     if(c<256) /* catch undefined <<32 */
 260       return op|c;
 261     for(i=2;i<32;i+=2) {
 262       m=(0xff>>i)|(0xff<<(32-i));
 263       if(!(c&~m))
 264         return op|(i<<7)|(c<<i)|(c>>(32-i));
 265     }
 266     op=negop;
 267     c=nc;
 268   } while(try_neg--);
 269   return 0;
 270 }
 271
 272
 273 //only add,sub
 274 void stuff_const_harder(uint32_t op, uint32_t v) {
 275   uint32_t x;
 276   x=stuff_const(op,v);
 277   if(x)
 278     o(x);
 279   else {
 280     uint32_t a[16], nv, no, o2, n2;
 281     int i,j,k;
 282     a[0]=0xff;
 283     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 284     for(i=1;i<16;i++)
 285       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 286     for(i=0;i<12;i++)
 287       for(j=i<4?i+12:15;j>=i+4;j--)
 288         if((v&(a[i]|a[j]))==v) {
 289           o(stuff_const(op,v&a[i]));
 290           o(stuff_const(o2,v&a[j]));
 291           return;
 292         }
 293     no=op^0xC00000;
 294     n2=o2^0xC00000;
 295     nv=-v;
 296     for(i=0;i<12;i++)
 297       for(j=i<4?i+12:15;j>=i+4;j--)
 298         if((nv&(a[i]|a[j]))==nv) {
 299           o(stuff_const(no,nv&a[i]));
 300           o(stuff_const(n2,nv&a[j]));
 301           return;
 302         }
 303     for(i=0;i<8;i++)
 304       for(j=i+4;j<12;j++)
 305         for(k=i<4?i+12:15;k>=j+4;k--)
 306           if((v&(a[i]|a[j]|a[k]))==v) {
 307             o(stuff_const(op,v&a[i]));
 308             o(stuff_const(o2,v&a[j]));
 309             o(stuff_const(o2,v&a[k]));
 310             return;
 311           }
 312     no=op^0xC00000;
 313     nv=-v;
 314     for(i=0;i<8;i++)
 315       for(j=i+4;j<12;j++)
 316         for(k=i<4?i+12:15;k>=j+4;k--)
 317           if((nv&(a[i]|a[j]|a[k]))==nv) {
 318             o(stuff_const(no,nv&a[i]));
 319             o(stuff_const(n2,nv&a[j]));
 320             o(stuff_const(n2,nv&a[k]));
 321             return;
 322           }
 323     o(stuff_const(op,v&a[0]));
 324     o(stuff_const(o2,v&a[4]));
 325     o(stuff_const(o2,v&a[8]));
 326     o(stuff_const(o2,v&a[12]));
 327   }
 328 }
 329
 330 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 331 {
 332   addr-=pos+8;
 333   addr/=4;
 334   if(addr>=0x1000000 || addr<-0x1000000) {
 335     if(fail)
 336       tcc_error("FIXME: function bigger than 32MB");
 337     return 0;
 338   }
 339   return 0x0A000000|(addr&0xffffff);
 340 }
 341
 342 int decbranch(int pos)
 343 {
 344   int x;
 345   x=*(uint32_t *)(cur_text_section->data + pos);
 346   x&=0x00ffffff;
 347   if(x&0x800000)
 348     x-=0x1000000;
 349   return x*4+pos+8;
 350 }
 351
 352 /* output a symbol and patch all calls to it */
 353 void gsym_addr(int t, int a)
 354 {
 355   uint32_t *x;
 356   int lt;
 357   while(t) {
 358     x=(uint32_t *)(cur_text_section->data + t);
 359     t=decbranch(lt=t);
 360     if(a==lt+4)
 361       *x=0xE1A00000; // nop
 362     else {
 363       *x &= 0xff000000;
 364       *x |= encbranch(lt,a,1);
 365     }
 366   }
 367 }
 368
 369 void gsym(int t)
 370 {
 371   gsym_addr(t, ind);
 372 }
 373
 374 #ifdef TCC_ARM_VFP
 375 static uint32_t vfpr(int r)
 376 {
 377   if(r<TREG_F0 || r>TREG_F7)
 378     tcc_error("compiler error! register %i is no vfp register",r);
 379   return r-5;
 380 }
 381 #else
 382 static uint32_t fpr(int r)
 383 {
 384   if(r<TREG_F0 || r>TREG_F3)
 385     tcc_error("compiler error! register %i is no fpa register",r);
 386   return r-5;
 387 }
 388 #endif
 389
 390 static uint32_t intr(int r)
 391 {
 392   if(r==4)
 393     return 12;
 394   if((r<0 || r>4) && r!=14)
 395     tcc_error("compiler error! register %i is no int register",r);
 396   return r;
 397 }
 398
 399 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 400 {
 401   if(*off>maxoff || *off&((1<<shift)-1)) {
 402     uint32_t x, y;
 403     x=0xE280E000;
 404     if(*sgn)
 405       x=0xE240E000;
 406     x|=(*base)<<16;
 407     *base=14; // lr
 408     y=stuff_const(x,*off&~maxoff);
 409     if(y) {
 410       o(y);
 411       *off&=maxoff;
 412       return;
 413     }
 414     y=stuff_const(x,(*off+maxoff)&~maxoff);
 415     if(y) {
 416       o(y);
 417       *sgn=!*sgn;
 418       *off=((*off+maxoff)&~maxoff)-*off;
 419       return;
 420     }
 421     stuff_const_harder(x,*off&~maxoff);
 422     *off&=maxoff;
 423   }
 424 }
 425
 426 static uint32_t mapcc(int cc)
 427 {
 428   switch(cc)
 429   {
 430     case TOK_ULT:
 431       return 0x30000000; /* CC/LO */
 432     case TOK_UGE:
 433       return 0x20000000; /* CS/HS */
 434     case TOK_EQ:
 435       return 0x00000000; /* EQ */
 436     case TOK_NE:
 437       return 0x10000000; /* NE */
 438     case TOK_ULE:
 439       return 0x90000000; /* LS */
 440     case TOK_UGT:
 441       return 0x80000000; /* HI */
 442     case TOK_Nset:
 443       return 0x40000000; /* MI */
 444     case TOK_Nclear:
 445       return 0x50000000; /* PL */
 446     case TOK_LT:
 447       return 0xB0000000; /* LT */
 448     case TOK_GE:
 449       return 0xA0000000; /* GE */
 450     case TOK_LE:
 451       return 0xD0000000; /* LE */
 452     case TOK_GT:
 453       return 0xC0000000; /* GT */
 454   }
 455   tcc_error("unexpected condition code");
 456   return 0xE0000000; /* AL */
 457 }
 458
 459 static int negcc(int cc)
 460 {
 461   switch(cc)
 462   {
 463     case TOK_ULT:
 464       return TOK_UGE;
 465     case TOK_UGE:
 466       return TOK_ULT;
 467     case TOK_EQ:
 468       return TOK_NE;
 469     case TOK_NE:
 470       return TOK_EQ;
 471     case TOK_ULE:
 472       return TOK_UGT;
 473     case TOK_UGT:
 474       return TOK_ULE;
 475     case TOK_Nset:
 476       return TOK_Nclear;
 477     case TOK_Nclear:
 478       return TOK_Nset;
 479     case TOK_LT:
 480       return TOK_GE;
 481     case TOK_GE:
 482       return TOK_LT;
 483     case TOK_LE:
 484       return TOK_GT;
 485     case TOK_GT:
 486       return TOK_LE;
 487   }
 488   tcc_error("unexpected condition code");
 489   return TOK_NE;
 490 }
 491
 492 /* load 'r' from value 'sv' */
 493 void load(int r, SValue *sv)
 494 {
 495   int v, ft, fc, fr, sign;
 496   uint32_t op;
 497   SValue v1;
 498
 499   fr = sv->r;
 500   ft = sv->type.t;
 501   fc = sv->c.ul;
 502
 503   if(fc>=0)
 504     sign=0;
 505   else {
 506     sign=1;
 507     fc=-fc;
 508   }
 509
 510   v = fr & VT_VALMASK;
 511   if (fr & VT_LVAL) {
 512     uint32_t base = 0xB; // fp
 513     if(v == VT_LLOCAL) {
 514       v1.type.t = VT_PTR;
 515       v1.r = VT_LOCAL | VT_LVAL;
 516       v1.c.ul = sv->c.ul;
 517       load(base=14 /* lr */, &v1);
 518       fc=sign=0;
 519       v=VT_LOCAL;
 520     } else if(v == VT_CONST) {
 521       v1.type.t = VT_PTR;
 522       v1.r = fr&~VT_LVAL;
 523       v1.c.ul = sv->c.ul;
 524       v1.sym=sv->sym;
 525       load(base=14, &v1);
 526       fc=sign=0;
 527       v=VT_LOCAL;
 528     } else if(v < VT_CONST) {
 529       base=intr(v);
 530       fc=sign=0;
 531       v=VT_LOCAL;
 532     }
 533     if(v == VT_LOCAL) {
 534       if(is_float(ft)) {
 535         calcaddr(&base,&fc,&sign,1020,2);
 536 #ifdef TCC_ARM_VFP
 537         op=0xED100A00; /* flds */
 538         if(!sign)
 539           op|=0x800000;
 540         if ((ft & VT_BTYPE) != VT_FLOAT)
 541           op|=0x100;   /* flds -> fldd */
 542         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 543 #else
 544         op=0xED100100;
 545         if(!sign)
 546           op|=0x800000;
 547 #if LDOUBLE_SIZE == 8
 548         if ((ft & VT_BTYPE) != VT_FLOAT)
 549           op|=0x8000;
 550 #else
 551         if ((ft & VT_BTYPE) == VT_DOUBLE)
 552           op|=0x8000;
 553         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 554           op|=0x400000;
 555 #endif
 556         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 557 #endif
 558       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 559                 || (ft & VT_BTYPE) == VT_SHORT) {
 560         calcaddr(&base,&fc,&sign,255,0);
 561         op=0xE1500090;
 562         if ((ft & VT_BTYPE) == VT_SHORT)
 563           op|=0x20;
 564         if ((ft & VT_UNSIGNED) == 0)
 565           op|=0x40;
 566         if(!sign)
 567           op|=0x800000;
 568         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 569       } else {
 570         calcaddr(&base,&fc,&sign,4095,0);
 571         op=0xE5100000;
 572         if(!sign)
 573           op|=0x800000;
 574         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 575           op|=0x400000;
 576         o(op|(intr(r)<<12)|fc|(base<<16));
 577       }
 578       return;
 579     }
 580   } else {
 581     if (v == VT_CONST) {
 582       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
 583       if (fr & VT_SYM || !op) {
 584         o(0xE59F0000|(intr(r)<<12));
 585         o(0xEA000000);
 586         if(fr & VT_SYM)
 587           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 588         o(sv->c.ul);
 589       } else
 590         o(op);
 591       return;
 592     } else if (v == VT_LOCAL) {
 593       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
 594       if (fr & VT_SYM || !op) {
 595         o(0xE59F0000|(intr(r)<<12));
 596         o(0xEA000000);
 597         if(fr & VT_SYM) // needed ?
 598           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 599         o(sv->c.ul);
 600         o(0xE08B0000|(intr(r)<<12)|intr(r));
 601       } else
 602         o(op);
 603       return;
 604     } else if(v == VT_CMP) {
 605       o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
 606       o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
 607       return;
 608     } else if (v == VT_JMP || v == VT_JMPI) {
 609       int t;
 610       t = v & 1;
 611       o(0xE3A00000|(intr(r)<<12)|t);
 612       o(0xEA000000);
 613       gsym(sv->c.ul);
 614       o(0xE3A00000|(intr(r)<<12)|(t^1));
 615       return;
 616     } else if (v < VT_CONST) {
 617       if(is_float(ft))
 618 #ifdef TCC_ARM_VFP
 619         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 620 #else
 621         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 622 #endif
 623       else
 624         o(0xE1A00000|(intr(r)<<12)|intr(v));
 625       return;
 626     }
 627   }
 628   tcc_error("load unimplemented!");
 629 }
 630
 631 /* store register 'r' in lvalue 'v' */
 632 void store(int r, SValue *sv)
 633 {
 634   SValue v1;
 635   int v, ft, fc, fr, sign;
 636   uint32_t op;
 637
 638   fr = sv->r;
 639   ft = sv->type.t;
 640   fc = sv->c.ul;
 641
 642   if(fc>=0)
 643     sign=0;
 644   else {
 645     sign=1;
 646     fc=-fc;
 647   }
 648
 649   v = fr & VT_VALMASK;
 650   if (fr & VT_LVAL || fr == VT_LOCAL) {
 651     uint32_t base = 0xb;
 652     if(v < VT_CONST) {
 653       base=intr(v);
 654       v=VT_LOCAL;
 655       fc=sign=0;
 656     } else if(v == VT_CONST) {
 657       v1.type.t = ft;
 658       v1.r = fr&~VT_LVAL;
 659       v1.c.ul = sv->c.ul;
 660       v1.sym=sv->sym;
 661       load(base=14, &v1);
 662       fc=sign=0;
 663       v=VT_LOCAL;
 664     }
 665     if(v == VT_LOCAL) {
 666        if(is_float(ft)) {
 667         calcaddr(&base,&fc,&sign,1020,2);
 668 #ifdef TCC_ARM_VFP
 669         op=0xED000A00; /* fsts */
 670         if(!sign)
 671           op|=0x800000;
 672         if ((ft & VT_BTYPE) != VT_FLOAT)
 673           op|=0x100;   /* fsts -> fstd */
 674         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 675 #else
 676         op=0xED000100;
 677         if(!sign)
 678           op|=0x800000;
 679 #if LDOUBLE_SIZE == 8
 680         if ((ft & VT_BTYPE) != VT_FLOAT)
 681           op|=0x8000;
 682 #else
 683         if ((ft & VT_BTYPE) == VT_DOUBLE)
 684           op|=0x8000;
 685         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 686           op|=0x400000;
 687 #endif
 688         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 689 #endif
 690         return;
 691       } else if((ft & VT_BTYPE) == VT_SHORT) {
 692         calcaddr(&base,&fc,&sign,255,0);
 693         op=0xE14000B0;
 694         if(!sign)
 695           op|=0x800000;
 696         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 697       } else {
 698         calcaddr(&base,&fc,&sign,4095,0);
 699         op=0xE5000000;
 700         if(!sign)
 701           op|=0x800000;
 702         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 703           op|=0x400000;
 704         o(op|(intr(r)<<12)|fc|(base<<16));
 705       }
 706       return;
 707     }
 708   }
 709   tcc_error("store unimplemented");
 710 }
 711
 712 static void gadd_sp(int val)
 713 {
 714   stuff_const_harder(0xE28DD000,val);
 715 }
 716
 717 /* 'is_jmp' is '1' if it is a jump */
 718 static void gcall_or_jmp(int is_jmp)
 719 {
 720   int r;
 721   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 722     uint32_t x;
 723     /* constant case */
 724     x=encbranch(ind,ind+vtop->c.ul,0);
 725     if(x) {
 726       if (vtop->r & VT_SYM) {
 727         /* relocation case */
 728         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 729       } else
 730         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 731       o(x|(is_jmp?0xE0000000:0xE1000000));
 732     } else {
 733       if(!is_jmp)
 734         o(0xE28FE004); // add lr,pc,#4
 735       o(0xE51FF004);   // ldr pc,[pc,#-4]
 736       if (vtop->r & VT_SYM)
 737         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 738       o(vtop->c.ul);
 739     }
 740   } else {
 741     /* otherwise, indirect call */
 742     r = gv(RC_INT);
 743     if(!is_jmp)
 744       o(0xE1A0E00F);       // mov lr,pc
 745     o(0xE1A0F000|intr(r)); // mov pc,r
 746   }
 747 }
 748
 749 /* Return whether a structure is an homogeneous float aggregate or not.
 750    The answer is true if all the elements of the structure are of the same
 751    primitive float type and there is less than 4 elements.
 752
 753    type: the type corresponding to the structure to be tested */
 754 static int is_hgen_float_aggr(CType *type)
 755 {
 756   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 757     struct Sym *ref;
 758     int btype, nb_fields = 0;
 759
 760     ref = type->ref->next;
 761     btype = ref->type.t & VT_BTYPE;
 762     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 763       for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 764       return !ref && nb_fields <= 4;
 765     }
 766   }
 767   return 0;
 768 }
 769
 770 struct avail_regs {
 771   signed char avail[3]; /* 3 holes max with only float and double alignments */
 772   int first_hole; /* first available hole */
 773   int last_hole; /* last available hole (none if equal to first_hole) */
 774   int first_free_reg; /* next free register in the sequence, hole excluded */
 775 };
 776
 777 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 778
 779 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 780    param) according to the rules described in the procedure call standard for
 781    the ARM architecture (AAPCS). If found, the registers are assigned to this
 782    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 783    and the parameter is a single float.
 784
 785    avregs: opaque structure to keep track of available VFP co-processor regs
 786    align: alignment contraints for the param, as returned by type_size()
 787    size: size of the parameter, as returned by type_size() */
 788 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 789 {
 790   int first_reg = 0;
 791
 792   if (avregs->first_free_reg == -1)
 793     return -1;
 794   if (align >> 3) { /* double alignment */
 795     first_reg = avregs->first_free_reg;
 796     /* alignment contraint not respected so use next reg and record hole */
 797     if (first_reg & 1)
 798       avregs->avail[avregs->last_hole++] = first_reg++;
 799   } else { /* no special alignment (float or array of float) */
 800     /* if single float and a hole is available, assign the param to it */
 801     if (size == 4 && avregs->first_hole != avregs->last_hole)
 802       return avregs->avail[avregs->first_hole++];
 803     else
 804       first_reg = avregs->first_free_reg;
 805   }
 806   if (first_reg + size / 4 <= 16) {
 807     avregs->first_free_reg = first_reg + size / 4;
 808     return first_reg;
 809   }
 810   avregs->first_free_reg = -1;
 811   return -1;
 812 }
 813
 814 /* Returns whether all params need to be passed in core registers or not.
 815    This is the case for function part of the runtime ABI. */
 816 int floats_in_core_regs(SValue *sval)
 817 {
 818   if (!sval->sym)
 819     return 0;
 820
 821   switch (sval->sym->v) {
 822     case TOK___floatundisf:
 823     case TOK___floatundidf:
 824     case TOK___fixunssfdi:
 825     case TOK___fixunsdfdi:
 826 #ifndef TCC_ARM_VFP
 827     case TOK___fixunsxfdi:
 828 #endif
 829     case TOK___floatdisf:
 830     case TOK___floatdidf:
 831     case TOK___fixsfdi:
 832     case TOK___fixdfdi:
 833       return 1;
 834
 835     default:
 836       return 0;
 837   }
 838 }
 839
 840 /* Return the number of registers needed to return the struct, or 0 if
 841    returning via struct pointer. */
 842 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
 843 #ifdef TCC_ARM_EABI
 844     int size, align;
 845     size = type_size(vt, &align);
 846 #ifdef TCC_ARM_HARDFLOAT
 847     if (is_float(vt->t) || is_hgen_float_aggr(vt)) {
 848         *ret_align = 8;
 849         ret->ref = NULL;
 850         ret->t = VT_DOUBLE;
 851         return (size + 7) >> 3;
 852     } else
 853 #endif
 854     if (size > 4) {
 855         return 0;
 856     } else {
 857         *ret_align = 4;
 858         ret->ref = NULL;
 859         ret->t = VT_INT;
 860         return 1;
 861     }
 862 #else
 863     return 0;
 864 #endif
 865 }
 866
 867 /* Parameters are classified according to how they are copied to their final
 868    destination for the function call. Because the copying is performed class
 869    after class according to the order in the union below, it is important that
 870    some constraints about the order of the members of this union are respected:
 871    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 872    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 873      VFP_STRUCT_CLASS;
 874    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 875    See the comment for the main loop in copy_params() for the reason. */
 876 enum reg_class {
 877         STACK_CLASS = 0,
 878         CORE_STRUCT_CLASS,
 879         VFP_CLASS,
 880         VFP_STRUCT_CLASS,
 881         CORE_CLASS,
 882         NB_CLASSES
 883 };
 884
 885 struct param_plan {
 886     int start; /* first reg or addr used depending on the class */
 887     int end; /* last reg used or next free addr depending on the class */
 888     SValue *sval; /* pointer to SValue on the value stack */
 889     struct param_plan *prev; /*  previous element in this class */
 890 };
 891
 892 struct plan {
 893     struct param_plan *pplans; /* array of all the param plans */
 894     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
 895 };
 896
 897 #define add_param_plan(plan,pplan,class)                        \
 898     do {                                                        \
 899         pplan.prev = plan->clsplans[class];                     \
 900         plan->pplans[plan ## _nb] = pplan;                      \
 901         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
 902     } while(0)
 903
 904 /* Assign parameters to registers and stack with alignment according to the
 905    rules in the procedure call standard for the ARM architecture (AAPCS).
 906    The overall assignment is recorded in an array of per parameter structures
 907    called parameter plans. The parameter plans are also further organized in a
 908    number of linked lists, one per class of parameter (see the comment for the
 909    definition of union reg_class).
 910
 911    nb_args: number of parameters of the function for which a call is generated
 912    corefloat: whether to pass float via core registers or not
 913    plan: the structure where the overall assignment is recorded
 914    todo: a bitmap that record which core registers hold a parameter
 915
 916    Returns the amount of stack space needed for parameter passing
 917
 918    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 919    is the responsability of the caller to free this array once used (ie not
 920    before copy_params). */
 921 static int assign_regs(int nb_args, int corefloat, struct plan *plan, int *todo)
 922 {
 923   int i, size, align;
 924   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
 925   int plan_nb = 0;
 926   struct param_plan pplan;
 927   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 928
 929   ncrn = nsaa = 0;
 930   *todo = 0;
 931   plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 932   memset(plan->clsplans, 0, sizeof(plan->clsplans));
 933   for(i = nb_args; i-- ;) {
 934     int j, start_vfpreg = 0;
 935     size = type_size(&vtop[-i].type, &align);
 936     switch(vtop[-i].type.t & VT_BTYPE) {
 937       case VT_STRUCT:
 938       case VT_FLOAT:
 939       case VT_DOUBLE:
 940       case VT_LDOUBLE:
 941       if (!corefloat) {
 942         int is_hfa = 0; /* Homogeneous float aggregate */
 943
 944         if (is_float(vtop[-i].type.t)
 945             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
 946           int end_vfpreg;
 947
 948           start_vfpreg = assign_vfpreg(&avregs, align, size);
 949           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
 950           if (start_vfpreg >= 0) {
 951             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
 952             if (is_hfa)
 953               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
 954             else
 955               add_param_plan(plan, pplan, VFP_CLASS);
 956             continue;
 957           } else
 958             break;
 959         }
 960       }
 961       ncrn = (ncrn + (align-1)/4) & -(align/4);
 962       size = (size + 3) & -4;
 963       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
 964         /* The parameter is allocated both in core register and on stack. As
 965          * such, it can be of either class: it would either be the last of
 966          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
 967         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
 968           *todo|=(1<<j);
 969         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
 970         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
 971         ncrn += size/4;
 972         if (ncrn > 4)
 973           nsaa = (ncrn - 4) * 4;
 974       } else {
 975         ncrn = 4;
 976         break;
 977       }
 978       continue;
 979       default:
 980       if (ncrn < 4) {
 981         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
 982
 983         if (is_long) {
 984           ncrn = (ncrn + 1) & -2;
 985           if (ncrn == 4)
 986             break;
 987         }
 988         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
 989         ncrn++;
 990         if (is_long)
 991           pplan.end = ncrn++;
 992         add_param_plan(plan, pplan, CORE_CLASS);
 993         continue;
 994       }
 995     }
 996     nsaa = (nsaa + (align - 1)) & ~(align - 1);
 997     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
 998     add_param_plan(plan, pplan, STACK_CLASS);
 999     nsaa += size; /* size already rounded up before */
1000   }
1001   return nsaa;
1002 }
1003
1004 #undef add_param_plan
1005
1006 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1007    function call.
1008
1009    nb_args: number of parameters the function take
1010    plan: the overall assignment plan for parameters
1011    todo: a bitmap indicating what core reg will hold a parameter
1012
1013    Returns the number of SValue added by this function on the value stack */
1014 static int copy_params(int nb_args, struct plan *plan, int todo)
1015 {
1016   int size, align, r, i, nb_extra_sval = 0;
1017   struct param_plan *pplan;
1018
1019    /* Several constraints require parameters to be copied in a specific order:
1020       - structures are copied to the stack before being loaded in a reg;
1021       - floats loaded to an odd numbered VFP reg are first copied to the
1022         preceding even numbered VFP reg and then moved to the next VFP reg.
1023
1024       It is thus important that:
1025       - structures assigned to core regs must be copied after parameters
1026         assigned to the stack but before structures assigned to VFP regs because
1027         a structure can lie partly in core registers and partly on the stack;
1028       - parameters assigned to the stack and all structures be copied before
1029         parameters assigned to a core reg since copying a parameter to the stack
1030         require using a core reg;
1031       - parameters assigned to VFP regs be copied before structures assigned to
1032         VFP regs as the copy might use an even numbered VFP reg that already
1033         holds part of a structure. */
1034   for(i = 0; i < NB_CLASSES; i++) {
1035     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1036       vpushv(pplan->sval);
1037       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1038       switch(i) {
1039         case STACK_CLASS:
1040         case CORE_STRUCT_CLASS:
1041         case VFP_STRUCT_CLASS:
1042           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1043             int padding = 0;
1044             size = type_size(&pplan->sval->type, &align);
1045             /* align to stack align size */
1046             size = (size + 3) & ~3;
1047             if (i == STACK_CLASS && pplan->prev)
1048               padding = pplan->start - pplan->prev->end;
1049             size += padding; /* Add padding if any */
1050             /* allocate the necessary size on stack */
1051             gadd_sp(-size);
1052             /* generate structure store */
1053             r = get_reg(RC_INT);
1054             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1055             vset(&vtop->type, r | VT_LVAL, 0);
1056             vswap();
1057             vstore(); /* memcpy to current sp + potential padding */
1058
1059             /* Homogeneous float aggregate are loaded to VFP registers
1060                immediately since there is no way of loading data in multiple
1061                non consecutive VFP registers as what is done for other
1062                structures (see the use of todo). */
1063             if (i == VFP_STRUCT_CLASS) {
1064               int first = pplan->start, nb = pplan->end - first + 1;
1065               /* vpop.32 {pplan->start, ..., pplan->end} */
1066               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1067               /* No need to write the register used to a SValue since VFP regs
1068                  cannot be used for gcall_or_jmp */
1069             }
1070           } else {
1071             if (is_float(pplan->sval->type.t)) {
1072 #ifdef TCC_ARM_VFP
1073               r = vfpr(gv(RC_FLOAT)) << 12;
1074               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1075                 size = 4;
1076               else {
1077                 size = 8;
1078                 r |= 0x101; /* vpush.32 -> vpush.64 */
1079               }
1080               o(0xED2D0A01 + r); /* vpush */
1081 #else
1082               r = fpr(gv(RC_FLOAT)) << 12;
1083               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1084                 size = 4;
1085               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1086                 size = 8;
1087               else
1088                 size = LDOUBLE_SIZE;
1089
1090               if (size == 12)
1091                 r |= 0x400000;
1092               else if(size == 8)
1093                 r|=0x8000;
1094
1095               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1096 #endif
1097             } else {
1098               /* simple type (currently always same size) */
1099               /* XXX: implicit cast ? */
1100               size=4;
1101               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1102                 lexpand_nr();
1103                 size = 8;
1104                 r = gv(RC_INT);
1105                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1106                 vtop--;
1107               }
1108               r = gv(RC_INT);
1109               o(0xE52D0004|(intr(r)<<12)); /* push r */
1110             }
1111             if (i == STACK_CLASS && pplan->prev)
1112               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1113           }
1114           break;
1115
1116         case VFP_CLASS:
1117           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1118           if (pplan->start & 1) { /* Must be in upper part of double register */
1119             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1120             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1121           }
1122           break;
1123
1124         case CORE_CLASS:
1125           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1126             lexpand_nr();
1127             gv(regmask(pplan->end));
1128             pplan->sval->r2 = vtop->r;
1129             vtop--;
1130           }
1131           gv(regmask(pplan->start));
1132           /* Mark register as used so that gcall_or_jmp use another one
1133              (regs >=4 are free as never used to pass parameters) */
1134           pplan->sval->r = vtop->r;
1135           break;
1136       }
1137       vtop--;
1138     }
1139   }
1140
1141   /* Manually free remaining registers since next parameters are loaded
1142    * manually, without the help of gv(int). */
1143   save_regs(nb_args);
1144
1145   if(todo) {
1146     o(0xE8BD0000|todo); /* pop {todo} */
1147     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1148       int r;
1149       pplan->sval->r = pplan->start;
1150       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1151          can occupy more than 2 registers. Thus, we need to push on the value
1152          stack some fake parameter to have on SValue for each registers used
1153          by a structure (r2 is not used). */
1154       for (r = pplan->start + 1; r <= pplan->end; r++) {
1155         if (todo & (1 << r)) {
1156           nb_extra_sval++;
1157           vpushi(0);
1158           vtop->r = r;
1159         }
1160       }
1161     }
1162   }
1163   return nb_extra_sval;
1164 }
1165
1166 /* Generate function call. The function address is pushed first, then
1167    all the parameters in call order. This functions pops all the
1168    parameters and the function address. */
1169 void gfunc_call(int nb_args)
1170 {
1171   int r, args_size;
1172   int variadic, corefloat = 1;
1173   int todo;
1174   struct plan plan;
1175
1176 #ifdef TCC_ARM_HARDFLOAT
1177   variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1178   corefloat = variadic || floats_in_core_regs(&vtop[-nb_args]);
1179 #endif
1180   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1181      VT_JMP anywhere except on the top of the stack because it would complicate
1182      the code generator. */
1183   r = vtop->r & VT_VALMASK;
1184   if (r == VT_CMP || (r & ~1) == VT_JMP)
1185     gv(RC_INT);
1186
1187   args_size = assign_regs(nb_args, corefloat, &plan, &todo);
1188
1189 #ifdef TCC_ARM_EABI
1190   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1191     args_size = (args_size + 7) & ~7;
1192     o(0xE24DD004); /* sub sp, sp, #4 */
1193   }
1194 #endif
1195
1196   nb_args += copy_params(nb_args, &plan, todo);
1197   tcc_free(plan.pplans);
1198
1199   /* Move fct SValue on top as required by gcall_or_jmp */
1200   vrotb(nb_args + 1);
1201   gcall_or_jmp(0);
1202   if (args_size)
1203       gadd_sp(args_size); /* pop all parameters passed on the stack */
1204 #ifdef TCC_ARM_EABI
1205 #ifdef TCC_ARM_VFP
1206   if(corefloat && is_float(vtop->type.ref->type.t)) {
1207     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1208       o(0xEE000A10); /*vmov s0, r0 */
1209     } else {
1210       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1211       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1212     }
1213   }
1214 #endif
1215 #endif
1216   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1217   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1218 }
1219
1220 /* generate function prolog of type 't' */
1221 void gfunc_prolog(CType *func_type)
1222 {
1223   Sym *sym,*sym2;
1224   int n,nf,size,align, variadic, struct_ret = 0;
1225 #ifdef TCC_ARM_HARDFLOAT
1226   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1227 #endif
1228
1229   sym = func_type->ref;
1230   func_vt = sym->type;
1231
1232   n = nf = 0;
1233   variadic = (func_type->ref->c == FUNC_ELLIPSIS);
1234   if((func_vt.t & VT_BTYPE) == VT_STRUCT
1235 #ifdef TCC_ARM_HARDFLOAT
1236      && (variadic || !is_hgen_float_aggr(&func_vt))
1237 #endif
1238      && type_size(&func_vt,&align) > 4)
1239   {
1240     n++;
1241     struct_ret = 1;
1242     func_vc = 12; /* Offset from fp of the place to store the result */
1243   }
1244   for(sym2=sym->next;sym2 && (n<4 || nf<16);sym2=sym2->next) {
1245     size = type_size(&sym2->type, &align);
1246 #ifdef TCC_ARM_HARDFLOAT
1247     if (!variadic && (is_float(sym2->type.t)
1248         || is_hgen_float_aggr(&sym2->type))) {
1249       int tmpnf = assign_vfpreg(&avregs, align, size);
1250       tmpnf += (size + 3) / 4;
1251       nf = (tmpnf > nf) ? tmpnf : nf;
1252     } else
1253 #endif
1254     if (n < 4)
1255       n += (size + 3) / 4;
1256   }
1257   o(0xE1A0C00D); /* mov ip,sp */
1258   if(variadic)
1259     n=4;
1260   if(n) {
1261     if(n>4)
1262       n=4;
1263 #ifdef TCC_ARM_EABI
1264     n=(n+1)&-2;
1265 #endif
1266     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1267   }
1268   if (nf) {
1269     if (nf>16)
1270       nf=16;
1271     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1272     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1273   }
1274   o(0xE92D5800); /* save fp, ip, lr */
1275   o(0xE1A0B00D); /* mov fp, sp */
1276   func_sub_sp_offset = ind;
1277   o(0xE1A00000); /* nop, leave space for stack adjustment in epilogue */
1278   {
1279     int addr, pn = struct_ret, sn = 0; /* pn=core, sn=stack */
1280
1281 #ifdef TCC_ARM_HARDFLOAT
1282     func_vc += nf * 4;
1283     avregs = AVAIL_REGS_INITIALIZER;
1284 #endif
1285     while ((sym = sym->next)) {
1286       CType *type;
1287       type = &sym->type;
1288       size = type_size(type, &align);
1289       size = (size + 3) >> 2;
1290       align = (align + 3) & ~3;
1291 #ifdef TCC_ARM_HARDFLOAT
1292       if (!variadic && (is_float(sym->type.t)
1293           || is_hgen_float_aggr(&sym->type))) {
1294         int fpn = assign_vfpreg(&avregs, align, size << 2);
1295         if (fpn >= 0) {
1296           addr = fpn * 4;
1297         } else
1298           goto from_stack;
1299       } else
1300 #endif
1301       if (pn < 4) {
1302 #ifdef TCC_ARM_EABI
1303         pn = (pn + (align-1)/4) & -(align/4);
1304 #endif
1305         addr = (nf + pn) * 4;
1306         pn += size;
1307         if (!sn && pn > 4)
1308           sn = (pn - 4);
1309       } else {
1310 #ifdef TCC_ARM_HARDFLOAT
1311 from_stack:
1312 #endif
1313 #ifdef TCC_ARM_EABI
1314         sn = (sn + (align-1)/4) & -(align/4);
1315 #endif
1316         addr = (n + nf + sn) * 4;
1317         sn += size;
1318       }
1319       sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t), addr+12);
1320     }
1321   }
1322   last_itod_magic=0;
1323   leaffunc = 1;
1324   loc = 0;
1325 }
1326
1327 /* generate function epilog */
1328 void gfunc_epilog(void)
1329 {
1330   uint32_t x;
1331   int diff;
1332 #ifdef TCC_ARM_EABI
1333   /* Useless but harmless copy of the float result into main register(s) in case
1334      of variadic function in the hardfloat variant */
1335   if(is_float(func_vt.t)) {
1336     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1337       o(0xEE100A10); /* fmrs r0, s0 */
1338     else {
1339       o(0xEE100B10); /* fmrdl r0, d0 */
1340       o(0xEE301B10); /* fmrdh r1, d0 */
1341     }
1342   }
1343 #endif
1344   o(0xE89BA800); /* restore fp, sp, pc */
1345   diff = (-loc + 3) & -4;
1346 #ifdef TCC_ARM_EABI
1347   if(!leaffunc)
1348     diff = ((diff + 11) & -8) - 4;
1349 #endif
1350   if(diff > 0) {
1351     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1352     if(x)
1353       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1354     else {
1355       int addr;
1356       addr=ind;
1357       o(0xE59FC004); /* ldr ip,[pc+4] */
1358       o(0xE04BD00C); /* sub sp,fp,ip  */
1359       o(0xE1A0F00E); /* mov pc,lr */
1360       o(diff);
1361       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1362     }
1363   }
1364 }
1365
1366 /* generate a jump to a label */
1367 int gjmp(int t)
1368 {
1369   int r;
1370   r=ind;
1371   o(0xE0000000|encbranch(r,t,1));
1372   return r;
1373 }
1374
1375 /* generate a jump to a fixed address */
1376 void gjmp_addr(int a)
1377 {
1378   gjmp(a);
1379 }
1380
1381 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1382 int gtst(int inv, int t)
1383 {
1384   int v, r;
1385   uint32_t op;
1386   v = vtop->r & VT_VALMASK;
1387   r=ind;
1388   if (v == VT_CMP) {
1389     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1390     op|=encbranch(r,t,1);
1391     o(op);
1392     t=r;
1393   } else if (v == VT_JMP || v == VT_JMPI) {
1394     if ((v & 1) == inv) {
1395       if(!vtop->c.i)
1396         vtop->c.i=t;
1397       else {
1398         uint32_t *x;
1399         int p,lp;
1400         if(t) {
1401           p = vtop->c.i;
1402           do {
1403             p = decbranch(lp=p);
1404           } while(p);
1405           x = (uint32_t *)(cur_text_section->data + lp);
1406           *x &= 0xff000000;
1407           *x |= encbranch(lp,t,1);
1408         }
1409         t = vtop->c.i;
1410       }
1411     } else {
1412       t = gjmp(t);
1413       gsym(vtop->c.i);
1414     }
1415   } else {
1416     if (is_float(vtop->type.t)) {
1417       r=gv(RC_FLOAT);
1418 #ifdef TCC_ARM_VFP
1419       o(0xEEB50A40|(vfpr(r)<<12)|T2CPR(vtop->type.t)); /* fcmpzX */
1420       o(0xEEF1FA10); /* fmstat */
1421 #else
1422       o(0xEE90F118|(fpr(r)<<16));
1423 #endif
1424       vtop->r = VT_CMP;
1425       vtop->c.i = TOK_NE;
1426       return gtst(inv, t);
1427     } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1428       /* constant jmp optimization */
1429       if ((vtop->c.i != 0) != inv)
1430         t = gjmp(t);
1431     } else {
1432       v = gv(RC_INT);
1433       o(0xE3300000|(intr(v)<<16));
1434       vtop->r = VT_CMP;
1435       vtop->c.i = TOK_NE;
1436       return gtst(inv, t);
1437     }
1438   }
1439   vtop--;
1440   return t;
1441 }
1442
1443 /* generate an integer binary operation */
1444 void gen_opi(int op)
1445 {
1446   int c, func = 0;
1447   uint32_t opc = 0, r, fr;
1448   unsigned short retreg = REG_IRET;
1449
1450   c=0;
1451   switch(op) {
1452     case '+':
1453       opc = 0x8;
1454       c=1;
1455       break;
1456     case TOK_ADDC1: /* add with carry generation */
1457       opc = 0x9;
1458       c=1;
1459       break;
1460     case '-':
1461       opc = 0x4;
1462       c=1;
1463       break;
1464     case TOK_SUBC1: /* sub with carry generation */
1465       opc = 0x5;
1466       c=1;
1467       break;
1468     case TOK_ADDC2: /* add with carry use */
1469       opc = 0xA;
1470       c=1;
1471       break;
1472     case TOK_SUBC2: /* sub with carry use */
1473       opc = 0xC;
1474       c=1;
1475       break;
1476     case '&':
1477       opc = 0x0;
1478       c=1;
1479       break;
1480     case '^':
1481       opc = 0x2;
1482       c=1;
1483       break;
1484     case '|':
1485       opc = 0x18;
1486       c=1;
1487       break;
1488     case '*':
1489       gv2(RC_INT, RC_INT);
1490       r = vtop[-1].r;
1491       fr = vtop[0].r;
1492       vtop--;
1493       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1494       return;
1495     case TOK_SHL:
1496       opc = 0;
1497       c=2;
1498       break;
1499     case TOK_SHR:
1500       opc = 1;
1501       c=2;
1502       break;
1503     case TOK_SAR:
1504       opc = 2;
1505       c=2;
1506       break;
1507     case '/':
1508     case TOK_PDIV:
1509       func=TOK___divsi3;
1510       c=3;
1511       break;
1512     case TOK_UDIV:
1513       func=TOK___udivsi3;
1514       c=3;
1515       break;
1516     case '%':
1517 #ifdef TCC_ARM_EABI
1518       func=TOK___aeabi_idivmod;
1519       retreg=REG_LRET;
1520 #else
1521       func=TOK___modsi3;
1522 #endif
1523       c=3;
1524       break;
1525     case TOK_UMOD:
1526 #ifdef TCC_ARM_EABI
1527       func=TOK___aeabi_uidivmod;
1528       retreg=REG_LRET;
1529 #else
1530       func=TOK___umodsi3;
1531 #endif
1532       c=3;
1533       break;
1534     case TOK_UMULL:
1535       gv2(RC_INT, RC_INT);
1536       r=intr(vtop[-1].r2=get_reg(RC_INT));
1537       c=vtop[-1].r;
1538       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1539       vtop--;
1540       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1541       return;
1542     default:
1543       opc = 0x15;
1544       c=1;
1545       break;
1546   }
1547   switch(c) {
1548     case 1:
1549       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1550         if(opc == 4 || opc == 5 || opc == 0xc) {
1551           vswap();
1552           opc|=2; // sub -> rsb
1553         }
1554       }
1555       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1556           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1557         gv(RC_INT);
1558       vswap();
1559       c=intr(gv(RC_INT));
1560       vswap();
1561       opc=0xE0000000|(opc<<20)|(c<<16);
1562       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1563         uint32_t x;
1564         x=stuff_const(opc|0x2000000,vtop->c.i);
1565         if(x) {
1566           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1567           o(x|(r<<12));
1568           goto done;
1569         }
1570       }
1571       fr=intr(gv(RC_INT));
1572       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1573       o(opc|(r<<12)|fr);
1574 done:
1575       vtop--;
1576       if (op >= TOK_ULT && op <= TOK_GT) {
1577         vtop->r = VT_CMP;
1578         vtop->c.i = op;
1579       }
1580       break;
1581     case 2:
1582       opc=0xE1A00000|(opc<<5);
1583       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1584           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1585         gv(RC_INT);
1586       vswap();
1587       r=intr(gv(RC_INT));
1588       vswap();
1589       opc|=r;
1590       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1591         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1592         c = vtop->c.i & 0x1f;
1593         o(opc|(c<<7)|(fr<<12));
1594       } else {
1595         fr=intr(gv(RC_INT));
1596         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1597         o(opc|(c<<12)|(fr<<8)|0x10);
1598       }
1599       vtop--;
1600       break;
1601     case 3:
1602       vpush_global_sym(&func_old_type, func);
1603       vrott(3);
1604       gfunc_call(2);
1605       vpushi(0);
1606       vtop->r = retreg;
1607       break;
1608     default:
1609       tcc_error("gen_opi %i unimplemented!",op);
1610   }
1611 }
1612
1613 #ifdef TCC_ARM_VFP
1614 static int is_zero(int i)
1615 {
1616   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1617     return 0;
1618   if (vtop[i].type.t == VT_FLOAT)
1619     return (vtop[i].c.f == 0.f);
1620   else if (vtop[i].type.t == VT_DOUBLE)
1621     return (vtop[i].c.d == 0.0);
1622   return (vtop[i].c.ld == 0.l);
1623 }
1624
1625 /* generate a floating point operation 'v = t1 op t2' instruction. The
1626  *    two operands are guaranted to have the same floating point type */
1627 void gen_opf(int op)
1628 {
1629   uint32_t x;
1630   int fneg=0,r;
1631   x=0xEE000A00|T2CPR(vtop->type.t);
1632   switch(op) {
1633     case '+':
1634       if(is_zero(-1))
1635         vswap();
1636       if(is_zero(0)) {
1637         vtop--;
1638         return;
1639       }
1640       x|=0x300000;
1641       break;
1642     case '-':
1643       x|=0x300040;
1644       if(is_zero(0)) {
1645         vtop--;
1646         return;
1647       }
1648       if(is_zero(-1)) {
1649         x|=0x810000; /* fsubX -> fnegX */
1650         vswap();
1651         vtop--;
1652         fneg=1;
1653       }
1654       break;
1655     case '*':
1656       x|=0x200000;
1657       break;
1658     case '/':
1659       x|=0x800000;
1660       break;
1661     default:
1662       if(op < TOK_ULT || op > TOK_GT) {
1663         tcc_error("unknown fp op %x!",op);
1664         return;
1665       }
1666       if(is_zero(-1)) {
1667         vswap();
1668         switch(op) {
1669           case TOK_LT: op=TOK_GT; break;
1670           case TOK_GE: op=TOK_ULE; break;
1671           case TOK_LE: op=TOK_GE; break;
1672           case TOK_GT: op=TOK_ULT; break;
1673         }
1674       }
1675       x|=0xB40040; /* fcmpX */
1676       if(op!=TOK_EQ && op!=TOK_NE)
1677         x|=0x80; /* fcmpX -> fcmpeX */
1678       if(is_zero(0)) {
1679         vtop--;
1680         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1681       } else {
1682         x|=vfpr(gv(RC_FLOAT));
1683         vswap();
1684         o(x|(vfpr(gv(RC_FLOAT))<<12));
1685         vtop--;
1686       }
1687       o(0xEEF1FA10); /* fmstat */
1688
1689       switch(op) {
1690         case TOK_LE: op=TOK_ULE; break;
1691         case TOK_LT: op=TOK_ULT; break;
1692         case TOK_UGE: op=TOK_GE; break;
1693         case TOK_UGT: op=TOK_GT; break;
1694       }
1695
1696       vtop->r = VT_CMP;
1697       vtop->c.i = op;
1698       return;
1699   }
1700   r=gv(RC_FLOAT);
1701   x|=vfpr(r);
1702   r=regmask(r);
1703   if(!fneg) {
1704     int r2;
1705     vswap();
1706     r2=gv(RC_FLOAT);
1707     x|=vfpr(r2)<<16;
1708     r|=regmask(r2);
1709   }
1710   vtop->r=get_reg_ex(RC_FLOAT,r);
1711   if(!fneg)
1712     vtop--;
1713   o(x|(vfpr(vtop->r)<<12));
1714 }
1715
1716 #else
1717 static uint32_t is_fconst()
1718 {
1719   long double f;
1720   uint32_t r;
1721   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1722     return 0;
1723   if (vtop->type.t == VT_FLOAT)
1724     f = vtop->c.f;
1725   else if (vtop->type.t == VT_DOUBLE)
1726     f = vtop->c.d;
1727   else
1728     f = vtop->c.ld;
1729   if(!ieee_finite(f))
1730     return 0;
1731   r=0x8;
1732   if(f<0.0) {
1733     r=0x18;
1734     f=-f;
1735   }
1736   if(f==0.0)
1737     return r;
1738   if(f==1.0)
1739     return r|1;
1740   if(f==2.0)
1741     return r|2;
1742   if(f==3.0)
1743     return r|3;
1744   if(f==4.0)
1745     return r|4;
1746   if(f==5.0)
1747     return r|5;
1748   if(f==0.5)
1749     return r|6;
1750   if(f==10.0)
1751     return r|7;
1752   return 0;
1753 }
1754
1755 /* generate a floating point operation 'v = t1 op t2' instruction. The
1756    two operands are guaranted to have the same floating point type */
1757 void gen_opf(int op)
1758 {
1759   uint32_t x, r, r2, c1, c2;
1760   //fputs("gen_opf\n",stderr);
1761   vswap();
1762   c1 = is_fconst();
1763   vswap();
1764   c2 = is_fconst();
1765   x=0xEE000100;
1766 #if LDOUBLE_SIZE == 8
1767   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1768     x|=0x80;
1769 #else
1770   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1771     x|=0x80;
1772   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1773     x|=0x80000;
1774 #endif
1775   switch(op)
1776   {
1777     case '+':
1778       if(!c2) {
1779         vswap();
1780         c2=c1;
1781       }
1782       vswap();
1783       r=fpr(gv(RC_FLOAT));
1784       vswap();
1785       if(c2) {
1786         if(c2>0xf)
1787           x|=0x200000; // suf
1788         r2=c2&0xf;
1789       } else {
1790         r2=fpr(gv(RC_FLOAT));
1791       }
1792       break;
1793     case '-':
1794       if(c2) {
1795         if(c2<=0xf)
1796           x|=0x200000; // suf
1797         r2=c2&0xf;
1798         vswap();
1799         r=fpr(gv(RC_FLOAT));
1800         vswap();
1801       } else if(c1 && c1<=0xf) {
1802         x|=0x300000; // rsf
1803         r2=c1;
1804         r=fpr(gv(RC_FLOAT));
1805         vswap();
1806       } else {
1807         x|=0x200000; // suf
1808         vswap();
1809         r=fpr(gv(RC_FLOAT));
1810         vswap();
1811         r2=fpr(gv(RC_FLOAT));
1812       }
1813       break;
1814     case '*':
1815       if(!c2 || c2>0xf) {
1816         vswap();
1817         c2=c1;
1818       }
1819       vswap();
1820       r=fpr(gv(RC_FLOAT));
1821       vswap();
1822       if(c2 && c2<=0xf)
1823         r2=c2;
1824       else
1825         r2=fpr(gv(RC_FLOAT));
1826       x|=0x100000; // muf
1827       break;
1828     case '/':
1829       if(c2 && c2<=0xf) {
1830         x|=0x400000; // dvf
1831         r2=c2;
1832         vswap();
1833         r=fpr(gv(RC_FLOAT));
1834         vswap();
1835       } else if(c1 && c1<=0xf) {
1836         x|=0x500000; // rdf
1837         r2=c1;
1838         r=fpr(gv(RC_FLOAT));
1839         vswap();
1840       } else {
1841         x|=0x400000; // dvf
1842         vswap();
1843         r=fpr(gv(RC_FLOAT));
1844         vswap();
1845         r2=fpr(gv(RC_FLOAT));
1846       }
1847       break;
1848     default:
1849       if(op >= TOK_ULT && op <= TOK_GT) {
1850         x|=0xd0f110; // cmfe
1851 /* bug (intention?) in Linux FPU emulator
1852    doesn't set carry if equal */
1853         switch(op) {
1854           case TOK_ULT:
1855           case TOK_UGE:
1856           case TOK_ULE:
1857           case TOK_UGT:
1858             tcc_error("unsigned comparision on floats?");
1859             break;
1860           case TOK_LT:
1861             op=TOK_Nset;
1862             break;
1863           case TOK_LE:
1864             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1865             break;
1866           case TOK_EQ:
1867           case TOK_NE:
1868             x&=~0x400000; // cmfe -> cmf
1869             break;
1870         }
1871         if(c1 && !c2) {
1872           c2=c1;
1873           vswap();
1874           switch(op) {
1875             case TOK_Nset:
1876               op=TOK_GT;
1877               break;
1878             case TOK_GE:
1879               op=TOK_ULE;
1880               break;
1881             case TOK_ULE:
1882               op=TOK_GE;
1883               break;
1884             case TOK_GT:
1885               op=TOK_Nset;
1886               break;
1887           }
1888         }
1889         vswap();
1890         r=fpr(gv(RC_FLOAT));
1891         vswap();
1892         if(c2) {
1893           if(c2>0xf)
1894             x|=0x200000;
1895           r2=c2&0xf;
1896         } else {
1897           r2=fpr(gv(RC_FLOAT));
1898         }
1899         vtop[-1].r = VT_CMP;
1900         vtop[-1].c.i = op;
1901       } else {
1902         tcc_error("unknown fp op %x!",op);
1903         return;
1904       }
1905   }
1906   if(vtop[-1].r == VT_CMP)
1907     c1=15;
1908   else {
1909     c1=vtop->r;
1910     if(r2&0x8)
1911       c1=vtop[-1].r;
1912     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1913     c1=fpr(vtop[-1].r);
1914   }
1915   vtop--;
1916   o(x|(r<<16)|(c1<<12)|r2);
1917 }
1918 #endif
1919
1920 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1921    and 'long long' cases. */
1922 ST_FUNC void gen_cvt_itof1(int t)
1923 {
1924   uint32_t r, r2;
1925   int bt;
1926   bt=vtop->type.t & VT_BTYPE;
1927   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1928 #ifndef TCC_ARM_VFP
1929     uint32_t dsize = 0;
1930 #endif
1931     r=intr(gv(RC_INT));
1932 #ifdef TCC_ARM_VFP
1933     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1934     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1935     r2|=r2<<12;
1936     if(!(vtop->type.t & VT_UNSIGNED))
1937       r2|=0x80;                /* fuitoX -> fsituX */
1938     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1939 #else
1940     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1941     if((t & VT_BTYPE) != VT_FLOAT)
1942       dsize=0x80;    /* flts -> fltd */
1943     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1944     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1945       uint32_t off = 0;
1946       o(0xE3500000|(r<<12));        /* cmp */
1947       r=fpr(get_reg(RC_FLOAT));
1948       if(last_itod_magic) {
1949         off=ind+8-last_itod_magic;
1950         off/=4;
1951         if(off>255)
1952           off=0;
1953       }
1954       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1955       if(!off) {
1956         o(0xEA000000);              /* b */
1957         last_itod_magic=ind;
1958         o(0x4F800000);              /* 4294967296.0f */
1959       }
1960       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1961     }
1962 #endif
1963     return;
1964   } else if(bt == VT_LLONG) {
1965     int func;
1966     CType *func_type = 0;
1967     if((t & VT_BTYPE) == VT_FLOAT) {
1968       func_type = &func_float_type;
1969       if(vtop->type.t & VT_UNSIGNED)
1970         func=TOK___floatundisf;
1971       else
1972         func=TOK___floatdisf;
1973 #if LDOUBLE_SIZE != 8
1974     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1975       func_type = &func_ldouble_type;
1976       if(vtop->type.t & VT_UNSIGNED)
1977         func=TOK___floatundixf;
1978       else
1979         func=TOK___floatdixf;
1980     } else if((t & VT_BTYPE) == VT_DOUBLE) {
1981 #else
1982     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1983 #endif
1984       func_type = &func_double_type;
1985       if(vtop->type.t & VT_UNSIGNED)
1986         func=TOK___floatundidf;
1987       else
1988         func=TOK___floatdidf;
1989     }
1990     if(func_type) {
1991       vpush_global_sym(func_type, func);
1992       vswap();
1993       gfunc_call(1);
1994       vpushi(0);
1995       vtop->r=TREG_F0;
1996       return;
1997     }
1998   }
1999   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2000 }
2001
2002 /* convert fp to int 't' type */
2003 void gen_cvt_ftoi(int t)
2004 {
2005   uint32_t r, r2;
2006   int u, func = 0;
2007   u=t&VT_UNSIGNED;
2008   t&=VT_BTYPE;
2009   r2=vtop->type.t & VT_BTYPE;
2010   if(t==VT_INT) {
2011 #ifdef TCC_ARM_VFP
2012     r=vfpr(gv(RC_FLOAT));
2013     u=u?0:0x10000;
2014     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2015     r2=intr(vtop->r=get_reg(RC_INT));
2016     o(0xEE100A10|(r<<16)|(r2<<12));
2017     return;
2018 #else
2019     if(u) {
2020       if(r2 == VT_FLOAT)
2021         func=TOK___fixunssfsi;
2022 #if LDOUBLE_SIZE != 8
2023       else if(r2 == VT_LDOUBLE)
2024         func=TOK___fixunsxfsi;
2025       else if(r2 == VT_DOUBLE)
2026 #else
2027       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2028 #endif
2029         func=TOK___fixunsdfsi;
2030     } else {
2031       r=fpr(gv(RC_FLOAT));
2032       r2=intr(vtop->r=get_reg(RC_INT));
2033       o(0xEE100170|(r2<<12)|r);
2034       return;
2035     }
2036 #endif
2037   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2038     if(r2 == VT_FLOAT)
2039       func=TOK___fixsfdi;
2040 #if LDOUBLE_SIZE != 8
2041     else if(r2 == VT_LDOUBLE)
2042       func=TOK___fixxfdi;
2043     else if(r2 == VT_DOUBLE)
2044 #else
2045     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2046 #endif
2047       func=TOK___fixdfdi;
2048   }
2049   if(func) {
2050     vpush_global_sym(&func_old_type, func);
2051     vswap();
2052     gfunc_call(1);
2053     vpushi(0);
2054     if(t == VT_LLONG)
2055       vtop->r2 = REG_LRET;
2056     vtop->r = REG_IRET;
2057     return;
2058   }
2059   tcc_error("unimplemented gen_cvt_ftoi!");
2060 }
2061
2062 /* convert from one floating point type to another */
2063 void gen_cvt_ftof(int t)
2064 {
2065 #ifdef TCC_ARM_VFP
2066   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2067     uint32_t r = vfpr(gv(RC_FLOAT));
2068     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2069   }
2070 #else
2071   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2072   gv(RC_FLOAT);
2073 #endif
2074 }
2075
2076 /* computed goto support */
2077 void ggoto(void)
2078 {
2079   gcall_or_jmp(1);
2080   vtop--;
2081 }
2082
2083 /* Save the stack pointer onto the stack and return the location of its address */
2084 ST_FUNC void gen_vla_sp_save(int addr) {
2085     tcc_error("variable length arrays unsupported for this target");
2086 }
2087
2088 /* Restore the SP from a location on the stack */
2089 ST_FUNC void gen_vla_sp_restore(int addr) {
2090     tcc_error("variable length arrays unsupported for this target");
2091 }
2092
2093 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2094 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2095     tcc_error("variable length arrays unsupported for this target");
2096 }
2097
2098 /* end of ARM code generator */
2099 /*************************************************************/
2100 #endif
2101 /*************************************************************/