arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #ifdef TCC_ARM_EABI
  27 #ifndef TCC_ARM_VFP /* Avoid useless warning */
  28 #define TCC_ARM_VFP
  29 #endif
  30 #endif
  31
  32 /* number of available registers */
  33 #ifdef TCC_ARM_VFP
  34 #define NB_REGS            13
  35 #else
  36 #define NB_REGS             9
  37 #endif
  38
  39 #ifndef TCC_ARM_VERSION
  40 # define TCC_ARM_VERSION 5
  41 #endif
  42
  43 /* a register can belong to several classes. The classes must be
  44    sorted from more general to more precise (see gv2() code which does
  45    assumptions on it). */
  46 #define RC_INT     0x0001 /* generic integer register */
  47 #define RC_FLOAT   0x0002 /* generic float register */
  48 #define RC_R0      0x0004
  49 #define RC_R1      0x0008
  50 #define RC_R2      0x0010
  51 #define RC_R3      0x0020
  52 #define RC_R12     0x0040
  53 #define RC_F0      0x0080
  54 #define RC_F1      0x0100
  55 #define RC_F2      0x0200
  56 #define RC_F3      0x0400
  57 #ifdef TCC_ARM_VFP
  58 #define RC_F4      0x0800
  59 #define RC_F5      0x1000
  60 #define RC_F6      0x2000
  61 #define RC_F7      0x4000
  62 #endif
  63 #define RC_IRET    RC_R0  /* function return: integer register */
  64 #define RC_LRET    RC_R1  /* function return: second integer register */
  65 #define RC_FRET    RC_F0  /* function return: float register */
  66
  67 /* pretty names for the registers */
  68 enum {
  69     TREG_R0 = 0,
  70     TREG_R1,
  71     TREG_R2,
  72     TREG_R3,
  73     TREG_R12,
  74     TREG_F0,
  75     TREG_F1,
  76     TREG_F2,
  77     TREG_F3,
  78 #ifdef TCC_ARM_VFP
  79     TREG_F4,
  80     TREG_F5,
  81     TREG_F6,
  82     TREG_F7,
  83 #endif
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 /* ELF defines */
 134
 135 #define EM_TCC_TARGET EM_ARM
 136
 137 /* relocation type for 32 bit data relocation */
 138 #define R_DATA_32   R_ARM_ABS32
 139 #define R_DATA_PTR  R_ARM_ABS32
 140 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 141 #define R_COPY      R_ARM_COPY
 142
 143 #define ELF_START_ADDR 0x00008000
 144 #define ELF_PAGE_SIZE  0x1000
 145
 146 /******************************************************/
 147 #else /* ! TARGET_DEFS_ONLY */
 148 /******************************************************/
 149 #include "tcc.h"
 150
 151 ST_DATA const int reg_classes[NB_REGS] = {
 152     /* r0 */ RC_INT | RC_R0,
 153     /* r1 */ RC_INT | RC_R1,
 154     /* r2 */ RC_INT | RC_R2,
 155     /* r3 */ RC_INT | RC_R3,
 156     /* r12 */ RC_INT | RC_R12,
 157     /* f0 */ RC_FLOAT | RC_F0,
 158     /* f1 */ RC_FLOAT | RC_F1,
 159     /* f2 */ RC_FLOAT | RC_F2,
 160     /* f3 */ RC_FLOAT | RC_F3,
 161 #ifdef TCC_ARM_VFP
 162  /* d4/s8 */ RC_FLOAT | RC_F4,
 163 /* d5/s10 */ RC_FLOAT | RC_F5,
 164 /* d6/s12 */ RC_FLOAT | RC_F6,
 165 /* d7/s14 */ RC_FLOAT | RC_F7,
 166 #endif
 167 };
 168
 169 static int func_sub_sp_offset, last_itod_magic;
 170 static int leaffunc;
 171
 172 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 173 static CType float_type, double_type, func_float_type, func_double_type;
 174 ST_FUNC void arm_init_types(void)
 175 {
 176     float_type.t = VT_FLOAT;
 177     double_type.t = VT_DOUBLE;
 178     func_float_type.t = VT_FUNC;
 179     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 180     func_double_type.t = VT_FUNC;
 181     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 182 }
 183 #else
 184 #define func_float_type func_old_type
 185 #define func_double_type func_old_type
 186 #define func_ldouble_type func_old_type
 187 ST_FUNC void arm_init_types(void) {}
 188 #endif
 189
 190 static int two2mask(int a,int b) {
 191   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 192 }
 193
 194 static int regmask(int r) {
 195   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 196 }
 197
 198 /******************************************************/
 199
 200 void o(uint32_t i)
 201 {
 202   /* this is a good place to start adding big-endian support*/
 203   int ind1;
 204
 205   ind1 = ind + 4;
 206   if (!cur_text_section)
 207     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 208          "can't evaluate constant expressions outside of a function.");
 209   if (ind1 > cur_text_section->data_allocated)
 210     section_realloc(cur_text_section, ind1);
 211   cur_text_section->data[ind++] = i&255;
 212   i>>=8;
 213   cur_text_section->data[ind++] = i&255;
 214   i>>=8;
 215   cur_text_section->data[ind++] = i&255;
 216   i>>=8;
 217   cur_text_section->data[ind++] = i;
 218 }
 219
 220 static uint32_t stuff_const(uint32_t op, uint32_t c)
 221 {
 222   int try_neg=0;
 223   uint32_t nc = 0, negop = 0;
 224
 225   switch(op&0x1F00000)
 226   {
 227     case 0x800000: //add
 228     case 0x400000: //sub
 229       try_neg=1;
 230       negop=op^0xC00000;
 231       nc=-c;
 232       break;
 233     case 0x1A00000: //mov
 234     case 0x1E00000: //mvn
 235       try_neg=1;
 236       negop=op^0x400000;
 237       nc=~c;
 238       break;
 239     case 0x200000: //xor
 240       if(c==~0)
 241         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 242       break;
 243     case 0x0: //and
 244       if(c==~0)
 245         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 246     case 0x1C00000: //bic
 247       try_neg=1;
 248       negop=op^0x1C00000;
 249       nc=~c;
 250       break;
 251     case 0x1800000: //orr
 252       if(c==~0)
 253         return (op&0xFFF0FFFF)|0x1E00000;
 254       break;
 255   }
 256   do {
 257     uint32_t m;
 258     int i;
 259     if(c<256) /* catch undefined <<32 */
 260       return op|c;
 261     for(i=2;i<32;i+=2) {
 262       m=(0xff>>i)|(0xff<<(32-i));
 263       if(!(c&~m))
 264         return op|(i<<7)|(c<<i)|(c>>(32-i));
 265     }
 266     op=negop;
 267     c=nc;
 268   } while(try_neg--);
 269   return 0;
 270 }
 271
 272
 273 //only add,sub
 274 void stuff_const_harder(uint32_t op, uint32_t v) {
 275   uint32_t x;
 276   x=stuff_const(op,v);
 277   if(x)
 278     o(x);
 279   else {
 280     uint32_t a[16], nv, no, o2, n2;
 281     int i,j,k;
 282     a[0]=0xff;
 283     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 284     for(i=1;i<16;i++)
 285       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 286     for(i=0;i<12;i++)
 287       for(j=i<4?i+12:15;j>=i+4;j--)
 288         if((v&(a[i]|a[j]))==v) {
 289           o(stuff_const(op,v&a[i]));
 290           o(stuff_const(o2,v&a[j]));
 291           return;
 292         }
 293     no=op^0xC00000;
 294     n2=o2^0xC00000;
 295     nv=-v;
 296     for(i=0;i<12;i++)
 297       for(j=i<4?i+12:15;j>=i+4;j--)
 298         if((nv&(a[i]|a[j]))==nv) {
 299           o(stuff_const(no,nv&a[i]));
 300           o(stuff_const(n2,nv&a[j]));
 301           return;
 302         }
 303     for(i=0;i<8;i++)
 304       for(j=i+4;j<12;j++)
 305         for(k=i<4?i+12:15;k>=j+4;k--)
 306           if((v&(a[i]|a[j]|a[k]))==v) {
 307             o(stuff_const(op,v&a[i]));
 308             o(stuff_const(o2,v&a[j]));
 309             o(stuff_const(o2,v&a[k]));
 310             return;
 311           }
 312     no=op^0xC00000;
 313     nv=-v;
 314     for(i=0;i<8;i++)
 315       for(j=i+4;j<12;j++)
 316         for(k=i<4?i+12:15;k>=j+4;k--)
 317           if((nv&(a[i]|a[j]|a[k]))==nv) {
 318             o(stuff_const(no,nv&a[i]));
 319             o(stuff_const(n2,nv&a[j]));
 320             o(stuff_const(n2,nv&a[k]));
 321             return;
 322           }
 323     o(stuff_const(op,v&a[0]));
 324     o(stuff_const(o2,v&a[4]));
 325     o(stuff_const(o2,v&a[8]));
 326     o(stuff_const(o2,v&a[12]));
 327   }
 328 }
 329
 330 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 331 {
 332   addr-=pos+8;
 333   addr/=4;
 334   if(addr>=0x1000000 || addr<-0x1000000) {
 335     if(fail)
 336       tcc_error("FIXME: function bigger than 32MB");
 337     return 0;
 338   }
 339   return 0x0A000000|(addr&0xffffff);
 340 }
 341
 342 int decbranch(int pos)
 343 {
 344   int x;
 345   x=*(uint32_t *)(cur_text_section->data + pos);
 346   x&=0x00ffffff;
 347   if(x&0x800000)
 348     x-=0x1000000;
 349   return x*4+pos+8;
 350 }
 351
 352 /* output a symbol and patch all calls to it */
 353 void gsym_addr(int t, int a)
 354 {
 355   uint32_t *x;
 356   int lt;
 357   while(t) {
 358     x=(uint32_t *)(cur_text_section->data + t);
 359     t=decbranch(lt=t);
 360     if(a==lt+4)
 361       *x=0xE1A00000; // nop
 362     else {
 363       *x &= 0xff000000;
 364       *x |= encbranch(lt,a,1);
 365     }
 366   }
 367 }
 368
 369 void gsym(int t)
 370 {
 371   gsym_addr(t, ind);
 372 }
 373
 374 #ifdef TCC_ARM_VFP
 375 static uint32_t vfpr(int r)
 376 {
 377   if(r<TREG_F0 || r>TREG_F7)
 378     tcc_error("compiler error! register %i is no vfp register",r);
 379   return r-5;
 380 }
 381 #else
 382 static uint32_t fpr(int r)
 383 {
 384   if(r<TREG_F0 || r>TREG_F3)
 385     tcc_error("compiler error! register %i is no fpa register",r);
 386   return r-5;
 387 }
 388 #endif
 389
 390 static uint32_t intr(int r)
 391 {
 392   if(r==4)
 393     return 12;
 394   if((r<0 || r>4) && r!=14)
 395     tcc_error("compiler error! register %i is no int register",r);
 396   return r;
 397 }
 398
 399 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 400 {
 401   if(*off>maxoff || *off&((1<<shift)-1)) {
 402     uint32_t x, y;
 403     x=0xE280E000;
 404     if(*sgn)
 405       x=0xE240E000;
 406     x|=(*base)<<16;
 407     *base=14; // lr
 408     y=stuff_const(x,*off&~maxoff);
 409     if(y) {
 410       o(y);
 411       *off&=maxoff;
 412       return;
 413     }
 414     y=stuff_const(x,(*off+maxoff)&~maxoff);
 415     if(y) {
 416       o(y);
 417       *sgn=!*sgn;
 418       *off=((*off+maxoff)&~maxoff)-*off;
 419       return;
 420     }
 421     stuff_const_harder(x,*off&~maxoff);
 422     *off&=maxoff;
 423   }
 424 }
 425
 426 static uint32_t mapcc(int cc)
 427 {
 428   switch(cc)
 429   {
 430     case TOK_ULT:
 431       return 0x30000000; /* CC/LO */
 432     case TOK_UGE:
 433       return 0x20000000; /* CS/HS */
 434     case TOK_EQ:
 435       return 0x00000000; /* EQ */
 436     case TOK_NE:
 437       return 0x10000000; /* NE */
 438     case TOK_ULE:
 439       return 0x90000000; /* LS */
 440     case TOK_UGT:
 441       return 0x80000000; /* HI */
 442     case TOK_Nset:
 443       return 0x40000000; /* MI */
 444     case TOK_Nclear:
 445       return 0x50000000; /* PL */
 446     case TOK_LT:
 447       return 0xB0000000; /* LT */
 448     case TOK_GE:
 449       return 0xA0000000; /* GE */
 450     case TOK_LE:
 451       return 0xD0000000; /* LE */
 452     case TOK_GT:
 453       return 0xC0000000; /* GT */
 454   }
 455   tcc_error("unexpected condition code");
 456   return 0xE0000000; /* AL */
 457 }
 458
 459 static int negcc(int cc)
 460 {
 461   switch(cc)
 462   {
 463     case TOK_ULT:
 464       return TOK_UGE;
 465     case TOK_UGE:
 466       return TOK_ULT;
 467     case TOK_EQ:
 468       return TOK_NE;
 469     case TOK_NE:
 470       return TOK_EQ;
 471     case TOK_ULE:
 472       return TOK_UGT;
 473     case TOK_UGT:
 474       return TOK_ULE;
 475     case TOK_Nset:
 476       return TOK_Nclear;
 477     case TOK_Nclear:
 478       return TOK_Nset;
 479     case TOK_LT:
 480       return TOK_GE;
 481     case TOK_GE:
 482       return TOK_LT;
 483     case TOK_LE:
 484       return TOK_GT;
 485     case TOK_GT:
 486       return TOK_LE;
 487   }
 488   tcc_error("unexpected condition code");
 489   return TOK_NE;
 490 }
 491
 492 /* load 'r' from value 'sv' */
 493 void load(int r, SValue *sv)
 494 {
 495   int v, ft, fc, fr, sign;
 496   uint32_t op;
 497   SValue v1;
 498
 499   fr = sv->r;
 500   ft = sv->type.t;
 501   fc = sv->c.ul;
 502
 503   if(fc>=0)
 504     sign=0;
 505   else {
 506     sign=1;
 507     fc=-fc;
 508   }
 509
 510   v = fr & VT_VALMASK;
 511   if (fr & VT_LVAL) {
 512     uint32_t base = 0xB; // fp
 513     if(v == VT_LLOCAL) {
 514       v1.type.t = VT_PTR;
 515       v1.r = VT_LOCAL | VT_LVAL;
 516       v1.c.ul = sv->c.ul;
 517       load(base=14 /* lr */, &v1);
 518       fc=sign=0;
 519       v=VT_LOCAL;
 520     } else if(v == VT_CONST) {
 521       v1.type.t = VT_PTR;
 522       v1.r = fr&~VT_LVAL;
 523       v1.c.ul = sv->c.ul;
 524       v1.sym=sv->sym;
 525       load(base=14, &v1);
 526       fc=sign=0;
 527       v=VT_LOCAL;
 528     } else if(v < VT_CONST) {
 529       base=intr(v);
 530       fc=sign=0;
 531       v=VT_LOCAL;
 532     }
 533     if(v == VT_LOCAL) {
 534       if(is_float(ft)) {
 535         calcaddr(&base,&fc,&sign,1020,2);
 536 #ifdef TCC_ARM_VFP
 537         op=0xED100A00; /* flds */
 538         if(!sign)
 539           op|=0x800000;
 540         if ((ft & VT_BTYPE) != VT_FLOAT)
 541           op|=0x100;   /* flds -> fldd */
 542         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 543 #else
 544         op=0xED100100;
 545         if(!sign)
 546           op|=0x800000;
 547 #if LDOUBLE_SIZE == 8
 548         if ((ft & VT_BTYPE) != VT_FLOAT)
 549           op|=0x8000;
 550 #else
 551         if ((ft & VT_BTYPE) == VT_DOUBLE)
 552           op|=0x8000;
 553         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 554           op|=0x400000;
 555 #endif
 556         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 557 #endif
 558       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 559                 || (ft & VT_BTYPE) == VT_SHORT) {
 560         calcaddr(&base,&fc,&sign,255,0);
 561         op=0xE1500090;
 562         if ((ft & VT_BTYPE) == VT_SHORT)
 563           op|=0x20;
 564         if ((ft & VT_UNSIGNED) == 0)
 565           op|=0x40;
 566         if(!sign)
 567           op|=0x800000;
 568         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 569       } else {
 570         calcaddr(&base,&fc,&sign,4095,0);
 571         op=0xE5100000;
 572         if(!sign)
 573           op|=0x800000;
 574         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 575           op|=0x400000;
 576         o(op|(intr(r)<<12)|fc|(base<<16));
 577       }
 578       return;
 579     }
 580   } else {
 581     if (v == VT_CONST) {
 582       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
 583       if (fr & VT_SYM || !op) {
 584         o(0xE59F0000|(intr(r)<<12));
 585         o(0xEA000000);
 586         if(fr & VT_SYM)
 587           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 588         o(sv->c.ul);
 589       } else
 590         o(op);
 591       return;
 592     } else if (v == VT_LOCAL) {
 593       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
 594       if (fr & VT_SYM || !op) {
 595         o(0xE59F0000|(intr(r)<<12));
 596         o(0xEA000000);
 597         if(fr & VT_SYM) // needed ?
 598           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 599         o(sv->c.ul);
 600         o(0xE08B0000|(intr(r)<<12)|intr(r));
 601       } else
 602         o(op);
 603       return;
 604     } else if(v == VT_CMP) {
 605       o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
 606       o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
 607       return;
 608     } else if (v == VT_JMP || v == VT_JMPI) {
 609       int t;
 610       t = v & 1;
 611       o(0xE3A00000|(intr(r)<<12)|t);
 612       o(0xEA000000);
 613       gsym(sv->c.ul);
 614       o(0xE3A00000|(intr(r)<<12)|(t^1));
 615       return;
 616     } else if (v < VT_CONST) {
 617       if(is_float(ft))
 618 #ifdef TCC_ARM_VFP
 619         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 620 #else
 621         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 622 #endif
 623       else
 624         o(0xE1A00000|(intr(r)<<12)|intr(v));
 625       return;
 626     }
 627   }
 628   tcc_error("load unimplemented!");
 629 }
 630
 631 /* store register 'r' in lvalue 'v' */
 632 void store(int r, SValue *sv)
 633 {
 634   SValue v1;
 635   int v, ft, fc, fr, sign;
 636   uint32_t op;
 637
 638   fr = sv->r;
 639   ft = sv->type.t;
 640   fc = sv->c.ul;
 641
 642   if(fc>=0)
 643     sign=0;
 644   else {
 645     sign=1;
 646     fc=-fc;
 647   }
 648
 649   v = fr & VT_VALMASK;
 650   if (fr & VT_LVAL || fr == VT_LOCAL) {
 651     uint32_t base = 0xb;
 652     if(v < VT_CONST) {
 653       base=intr(v);
 654       v=VT_LOCAL;
 655       fc=sign=0;
 656     } else if(v == VT_CONST) {
 657       v1.type.t = ft;
 658       v1.r = fr&~VT_LVAL;
 659       v1.c.ul = sv->c.ul;
 660       v1.sym=sv->sym;
 661       load(base=14, &v1);
 662       fc=sign=0;
 663       v=VT_LOCAL;
 664     }
 665     if(v == VT_LOCAL) {
 666        if(is_float(ft)) {
 667         calcaddr(&base,&fc,&sign,1020,2);
 668 #ifdef TCC_ARM_VFP
 669         op=0xED000A00; /* fsts */
 670         if(!sign)
 671           op|=0x800000;
 672         if ((ft & VT_BTYPE) != VT_FLOAT)
 673           op|=0x100;   /* fsts -> fstd */
 674         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 675 #else
 676         op=0xED000100;
 677         if(!sign)
 678           op|=0x800000;
 679 #if LDOUBLE_SIZE == 8
 680         if ((ft & VT_BTYPE) != VT_FLOAT)
 681           op|=0x8000;
 682 #else
 683         if ((ft & VT_BTYPE) == VT_DOUBLE)
 684           op|=0x8000;
 685         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 686           op|=0x400000;
 687 #endif
 688         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 689 #endif
 690         return;
 691       } else if((ft & VT_BTYPE) == VT_SHORT) {
 692         calcaddr(&base,&fc,&sign,255,0);
 693         op=0xE14000B0;
 694         if(!sign)
 695           op|=0x800000;
 696         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 697       } else {
 698         calcaddr(&base,&fc,&sign,4095,0);
 699         op=0xE5000000;
 700         if(!sign)
 701           op|=0x800000;
 702         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 703           op|=0x400000;
 704         o(op|(intr(r)<<12)|fc|(base<<16));
 705       }
 706       return;
 707     }
 708   }
 709   tcc_error("store unimplemented");
 710 }
 711
 712 static void gadd_sp(int val)
 713 {
 714   stuff_const_harder(0xE28DD000,val);
 715 }
 716
 717 /* 'is_jmp' is '1' if it is a jump */
 718 static void gcall_or_jmp(int is_jmp)
 719 {
 720   int r;
 721   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 722     uint32_t x;
 723     /* constant case */
 724     x=encbranch(ind,ind+vtop->c.ul,0);
 725     if(x) {
 726       if (vtop->r & VT_SYM) {
 727         /* relocation case */
 728         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 729       } else
 730         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 731       o(x|(is_jmp?0xE0000000:0xE1000000));
 732     } else {
 733       if(!is_jmp)
 734         o(0xE28FE004); // add lr,pc,#4
 735       o(0xE51FF004);   // ldr pc,[pc,#-4]
 736       if (vtop->r & VT_SYM)
 737         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 738       o(vtop->c.ul);
 739     }
 740   } else {
 741     /* otherwise, indirect call */
 742     r = gv(RC_INT);
 743     if(!is_jmp)
 744       o(0xE1A0E00F);       // mov lr,pc
 745     o(0xE1A0F000|intr(r)); // mov pc,r
 746   }
 747 }
 748
 749 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
 750 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
 751 #ifdef TCC_ARM_EABI
 752     int size, align;
 753     size = type_size(vt, &align);
 754     if (size > 4) {
 755         return 1;
 756     } else {
 757         *ret_align = 4;
 758         ret->ref = NULL;
 759         ret->t = VT_INT;
 760         return 0;
 761     }
 762 #else
 763     return 1;
 764 #endif
 765 }
 766
 767 #ifdef TCC_ARM_HARDFLOAT
 768 /* Return whether a structure is an homogeneous float aggregate or not.
 769    The answer is true if all the elements of the structure are of the same
 770    primitive float type and there is less than 4 elements.
 771
 772    type: the type corresponding to the structure to be tested */
 773 static int is_hgen_float_aggr(CType *type)
 774 {
 775   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 776     struct Sym *ref;
 777     int btype, nb_fields = 0;
 778
 779     ref = type->ref;
 780     btype = ref->type.t & VT_BTYPE;
 781     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 782       for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 783       return !ref && nb_fields <= 4;
 784     }
 785   }
 786   return 0;
 787 }
 788
 789 struct avail_regs {
 790   signed char avail[3]; /* 3 holes max with only float and double alignments */
 791   int first_hole; /* first available hole */
 792   int last_hole; /* last available hole (none if equal to first_hole) */
 793   int first_free_reg; /* next free register in the sequence, hole excluded */
 794 };
 795
 796 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 797
 798 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 799    param) according to the rules described in the procedure call standard for
 800    the ARM architecture (AAPCS). If found, the registers are assigned to this
 801    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 802    and the parameter is a single float.
 803
 804    avregs: opaque structure to keep track of available VFP co-processor regs
 805    align: alignment contraints for the param, as returned by type_size()
 806    size: size of the parameter, as returned by type_size() */
 807 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 808 {
 809   int first_reg = 0;
 810
 811   if (avregs->first_free_reg == -1)
 812     return -1;
 813   if (align >> 3) { /* double alignment */
 814     first_reg = avregs->first_free_reg;
 815     /* alignment contraint not respected so use next reg and record hole */
 816     if (first_reg & 1)
 817       avregs->avail[avregs->last_hole++] = first_reg++;
 818   } else { /* no special alignment (float or array of float) */
 819     /* if single float and a hole is available, assign the param to it */
 820     if (size == 4 && avregs->first_hole != avregs->last_hole)
 821       return avregs->avail[avregs->first_hole++];
 822     else
 823       first_reg = avregs->first_free_reg;
 824   }
 825   if (first_reg + size / 4 <= 16) {
 826     avregs->first_free_reg = first_reg + size / 4;
 827     return first_reg;
 828   }
 829   avregs->first_free_reg = -1;
 830   return -1;
 831 }
 832 #endif
 833
 834 /* Parameters are classified according to how they are copied to their final
 835    destination for the function call. Because the copying is performed class
 836    after class according to the order in the union below, it is important that
 837    some constraints about the order of the members of this union are respected:
 838    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 839    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 840      VFP_STRUCT_CLASS;
 841    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 842    See the comment for the main loop in copy_params() for the reason. */
 843 enum reg_class {
 844         STACK_CLASS = 0,
 845         CORE_STRUCT_CLASS,
 846         VFP_CLASS,
 847         VFP_STRUCT_CLASS,
 848         CORE_CLASS,
 849         NB_CLASSES
 850 };
 851
 852 struct param_plan {
 853     int start; /* first reg or addr used depending on the class */
 854     int end; /* last reg used or next free addr depending on the class */
 855     SValue *sval; /* pointer to SValue on the value stack */
 856     struct param_plan *prev; /*  previous element in this class */
 857 };
 858
 859 struct plan {
 860     struct param_plan *pplans; /* array of all the param plans */
 861     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
 862 };
 863
 864 #define add_param_plan(plan,pplan,class)                        \
 865     do {                                                        \
 866         pplan.prev = plan->clsplans[class];                     \
 867         plan->pplans[plan ## _nb] = pplan;                      \
 868         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
 869     } while(0)
 870
 871 /* Assign parameters to registers and stack with alignment according to the
 872    rules in the procedure call standard for the ARM architecture (AAPCS).
 873    The overall assignment is recorded in an array of per parameter structures
 874    called parameter plans. The parameter plans are also further organized in a
 875    number of linked lists, one per class of parameter (see the comment for the
 876    definition of union reg_class).
 877
 878    nb_args: number of parameters of the function for which a call is generated
 879    variadic: whether the function is a variadic function or not
 880    plan: the structure where the overall assignment is recorded
 881    todo: a bitmap that record which core registers hold a parameter
 882
 883    Returns the amount of stack space needed for parameter passing
 884
 885    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 886    is the responsability of the caller to free this array once used (ie not
 887    before copy_params). */
 888 static int assign_regs(int nb_args, int variadic, struct plan *plan, int *todo)
 889 {
 890   int i, size, align;
 891   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
 892   int plan_nb = 0;
 893   struct param_plan pplan;
 894 #ifdef TCC_ARM_HARDFLOAT
 895   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 896 #endif
 897
 898   ncrn = nsaa = 0;
 899   *todo = 0;
 900   plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 901   memset(plan->clsplans, 0, sizeof(plan->clsplans));
 902   for(i = nb_args; i-- ;) {
 903     int j, start_vfpreg = 0;
 904     size = type_size(&vtop[-i].type, &align);
 905     switch(vtop[-i].type.t & VT_BTYPE) {
 906       case VT_STRUCT:
 907       case VT_FLOAT:
 908       case VT_DOUBLE:
 909       case VT_LDOUBLE:
 910 #ifdef TCC_ARM_HARDFLOAT
 911       if (!variadic) {
 912         int is_hfa = 0; /* Homogeneous float aggregate */
 913
 914         if (is_float(vtop[-i].type.t)
 915             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
 916           int end_vfpreg;
 917
 918           start_vfpreg = assign_vfpreg(&avregs, align, size);
 919           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
 920           if (start_vfpreg >= 0) {
 921             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
 922             if (is_hfa)
 923               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
 924             else
 925               add_param_plan(plan, pplan, VFP_CLASS);
 926             continue;
 927           } else
 928             break;
 929         }
 930       }
 931 #endif
 932       ncrn = (ncrn + (align-1)/4) & -(align/4);
 933       size = (size + 3) & -4;
 934       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
 935         /* The parameter is allocated both in core register and on stack. As
 936          * such, it can be of either class: it would either be the last of
 937          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
 938         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
 939           *todo|=(1<<j);
 940         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
 941         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
 942         ncrn += size/4;
 943         if (ncrn > 4)
 944           nsaa = (ncrn - 4) * 4;
 945       } else {
 946         ncrn = 4;
 947         break;
 948       }
 949       continue;
 950       default:
 951       if (ncrn < 4) {
 952         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
 953
 954         if (is_long) {
 955           ncrn = (ncrn + 1) & -2;
 956           if (ncrn == 4)
 957             break;
 958         }
 959         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
 960         ncrn++;
 961         if (is_long)
 962           pplan.end = ncrn++;
 963         add_param_plan(plan, pplan, CORE_CLASS);
 964         continue;
 965       }
 966     }
 967     nsaa = (nsaa + (align - 1)) & ~(align - 1);
 968     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
 969     add_param_plan(plan, pplan, STACK_CLASS);
 970     nsaa += size; /* size already rounded up before */
 971   }
 972   return nsaa;
 973 }
 974
 975 #undef add_param_plan
 976
 977 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
 978    function call.
 979
 980    nb_args: number of parameters the function take
 981    plan: the overall assignment plan for parameters
 982    todo: a bitmap indicating what core reg will hold a parameter
 983
 984    Returns the number of SValue added by this function on the value stack */
 985 static int copy_params(int nb_args, struct plan *plan, int todo)
 986 {
 987   int size, align, r, i, nb_extra_sval = 0;
 988   struct param_plan *pplan;
 989
 990    /* Several constraints require parameters to be copied in a specific order:
 991       - structures are copied to the stack before being loaded in a reg;
 992       - floats loaded to an odd numbered VFP reg are first copied to the
 993         preceding even numbered VFP reg and then moved to the next VFP reg.
 994
 995       It is thus important that:
 996       - structures assigned to core regs must be copied after parameters
 997         assigned to the stack but before structures assigned to VFP regs because
 998         a structure can lie partly in core registers and partly on the stack;
 999       - parameters assigned to the stack and all structures be copied before
1000         parameters assigned to a core reg since copying a parameter to the stack
1001         require using a core reg;
1002       - parameters assigned to VFP regs be copied before structures assigned to
1003         VFP regs as the copy might use an even numbered VFP reg that already
1004         holds part of a structure. */
1005   for(i = 0; i < NB_CLASSES; i++) {
1006     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1007       vpushv(pplan->sval);
1008       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1009       switch(i) {
1010         case STACK_CLASS:
1011         case CORE_STRUCT_CLASS:
1012         case VFP_STRUCT_CLASS:
1013           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1014             size = type_size(&pplan->sval->type, &align);
1015             /* align to stack align size */
1016             size = (size + 3) & ~3;
1017             if (i == STACK_CLASS && pplan->prev)
1018               size += pplan->start - pplan->prev->end; /* Add padding if any */
1019             /* allocate the necessary size on stack */
1020             gadd_sp(-size);
1021             /* generate structure store */
1022             r = get_reg(RC_INT);
1023             o(0xE1A0000D|(intr(r)<<12)); /* mov r, sp */
1024             vset(&vtop->type, r | VT_LVAL, 0);
1025             vswap();
1026             vstore(); /* memcpy to current sp */
1027             /* Homogeneous float aggregate are loaded to VFP registers
1028                immediately since there is no way of loading data in multiple
1029                non consecutive VFP registers as what is done for other
1030                structures (see the use of todo). */
1031             if (i == VFP_STRUCT_CLASS) {
1032               int first = pplan->start, nb = pplan->end - first + 1;
1033               /* vpop.32 {pplan->start, ..., pplan->end} */
1034               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1035               /* No need to write the register used to a SValue since VFP regs
1036                  cannot be used for gcall_or_jmp */
1037             }
1038           } else {
1039             if (is_float(pplan->sval->type.t)) {
1040 #ifdef TCC_ARM_VFP
1041               r = vfpr(gv(RC_FLOAT)) << 12;
1042               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1043                 size = 4;
1044               else {
1045                 size = 8;
1046                 r |= 0x101; /* vpush.32 -> vpush.64 */
1047               }
1048               o(0xED2D0A01 + r); /* vpush */
1049 #else
1050               r = fpr(gv(RC_FLOAT)) << 12;
1051               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1052                 size = 4;
1053               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1054                 size = 8;
1055               else
1056                 size = LDOUBLE_SIZE;
1057
1058               if (size == 12)
1059                 r |= 0x400000;
1060               else if(size == 8)
1061                 r|=0x8000;
1062
1063               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1064 #endif
1065             } else {
1066               /* simple type (currently always same size) */
1067               /* XXX: implicit cast ? */
1068               size=4;
1069               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1070                 lexpand_nr();
1071                 size = 8;
1072                 r = gv(RC_INT);
1073                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1074                 vtop--;
1075               }
1076               r = gv(RC_INT);
1077               o(0xE52D0004|(intr(r)<<12)); /* push r */
1078             }
1079             if (i == STACK_CLASS && pplan->prev)
1080               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1081           }
1082           break;
1083
1084         case VFP_CLASS:
1085           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1086           if (pplan->start & 1) { /* Must be in upper part of double register */
1087             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1088             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1089           }
1090           break;
1091
1092         case CORE_CLASS:
1093           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1094             lexpand_nr();
1095             gv(regmask(pplan->end));
1096             pplan->sval->r2 = vtop->r;
1097             vtop--;
1098           }
1099           gv(regmask(pplan->start));
1100           /* Mark register as used so that gcall_or_jmp use another one
1101              (regs >=4 are free as never used to pass parameters) */
1102           pplan->sval->r = vtop->r;
1103           break;
1104       }
1105       vtop--;
1106     }
1107   }
1108
1109   /* Manually free remaining registers since next parameters are loaded
1110    * manually, without the help of gv(int). */
1111   save_regs(nb_args);
1112
1113   if(todo) {
1114     o(0xE8BD0000|todo); /* pop {todo} */
1115     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1116       int r;
1117       pplan->sval->r = pplan->start;
1118       /* TODO: why adding fake param */
1119       for (r = pplan->start + 1; r <= pplan->end; r++) {
1120         if (todo & (1 << r)) {
1121           nb_extra_sval++;
1122           vpushi(0);
1123           vtop->r = r;
1124         }
1125       }
1126     }
1127   }
1128   return nb_extra_sval;
1129 }
1130
1131 /* Generate function call. The function address is pushed first, then
1132    all the parameters in call order. This functions pops all the
1133    parameters and the function address. */
1134 void gfunc_call(int nb_args)
1135 {
1136   int align, r, args_size;
1137   int variadic;
1138   int todo;
1139   struct plan plan;
1140
1141   variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1142   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1143      VT_JMP anywhere except on the top of the stack because it would complicate
1144      the code generator. */
1145   r = vtop->r & VT_VALMASK;
1146   if (r == VT_CMP || (r & ~1) == VT_JMP)
1147     gv(RC_INT);
1148 #ifdef TCC_ARM_EABI
1149   /* return type is a struct so caller of gfunc_call (unary(void) in tccgen.c)
1150      assumed it had to be passed by a pointer. Since it's less than 4 bytes, we
1151      can actually pass it directly in a register. */
1152   if((vtop[-nb_args].type.ref->type.t & VT_BTYPE) == VT_STRUCT
1153      && type_size(&vtop[-nb_args].type.ref->type, &align) <= 4) {
1154     SValue tmp;
1155     tmp=vtop[-nb_args];
1156     vtop[-nb_args]=vtop[-nb_args+1];
1157     vtop[-nb_args+1]=tmp;
1158     --nb_args;
1159   }
1160 #endif
1161
1162   args_size = assign_regs(nb_args, variadic, &plan, &todo);
1163
1164 #ifdef TCC_ARM_EABI
1165   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1166     args_size = (args_size + 7) & ~7;
1167     o(0xE24DD004); /* sub sp, sp, #4 */
1168   }
1169 #endif
1170
1171   nb_args += copy_params(nb_args, &plan, todo);
1172   tcc_free(plan.pplans);
1173
1174   /* Move fct SValue on top as required by gcall_or_jmp */
1175   vrotb(nb_args + 1);
1176   gcall_or_jmp(0);
1177   if (args_size)
1178       gadd_sp(args_size); /* pop all parameters passed on the stack */
1179 #ifdef TCC_ARM_EABI
1180   if((vtop->type.ref->type.t & VT_BTYPE) == VT_STRUCT
1181      && type_size(&vtop->type.ref->type, &align) <= 4) {
1182     store(REG_IRET,vtop-nb_args-1);
1183     nb_args++;
1184   }
1185 #ifdef TCC_ARM_VFP
1186 #ifdef TCC_ARM_HARDFLOAT
1187   else if(variadic && is_float(vtop->type.ref->type.t)) {
1188 #else
1189   else if(is_float(vtop->type.ref->type.t)) {
1190 #endif
1191     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1192       o(0xEE000A10); /*vmov s0, r0 */
1193     } else {
1194       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1195       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1196     }
1197   }
1198 #endif
1199 #endif
1200   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1201   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1202 }
1203
1204 /* generate function prolog of type 't' */
1205 void gfunc_prolog(CType *func_type)
1206 {
1207   Sym *sym,*sym2;
1208   int n,nf,size,align, variadic, struct_ret = 0;
1209 #ifdef TCC_ARM_HARDFLOAT
1210   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1211 #endif
1212
1213   sym = func_type->ref;
1214   func_vt = sym->type;
1215
1216   n = nf = 0;
1217   variadic = (func_type->ref->c == FUNC_ELLIPSIS);
1218   if((func_vt.t & VT_BTYPE) == VT_STRUCT
1219      && type_size(&func_vt,&align) > 4)
1220   {
1221     n++;
1222     struct_ret = 1;
1223     func_vc = 12; /* Offset from fp of the place to store the result */
1224   }
1225   for(sym2=sym->next;sym2 && (n<4 || nf<16);sym2=sym2->next) {
1226     size = type_size(&sym2->type, &align);
1227 #ifdef TCC_ARM_HARDFLOAT
1228     if (!variadic && (is_float(sym2->type.t)
1229         || is_hgen_float_aggr(&sym2->type))) {
1230       int tmpnf = assign_vfpreg(&avregs, align, size) + 1;
1231       nf = (tmpnf > nf) ? tmpnf : nf;
1232     } else
1233 #endif
1234     if (n < 4)
1235       n += (size + 3) / 4;
1236   }
1237   o(0xE1A0C00D); /* mov ip,sp */
1238   if(variadic)
1239     n=4;
1240   if(n) {
1241     if(n>4)
1242       n=4;
1243 #ifdef TCC_ARM_EABI
1244     n=(n+1)&-2;
1245 #endif
1246     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1247   }
1248   if (nf) {
1249     if (nf>16)
1250       nf=16;
1251     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1252     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1253   }
1254   o(0xE92D5800); /* save fp, ip, lr */
1255   o(0xE1A0B00D); /* mov fp, sp */
1256   func_sub_sp_offset = ind;
1257   o(0xE1A00000); /* nop, leave space for stack adjustment in epilogue */
1258   {
1259     int addr, pn = struct_ret, sn = 0; /* pn=core, sn=stack */
1260
1261 #ifdef TCC_ARM_HARDFLOAT
1262     func_vc += nf * 4;
1263     avregs = AVAIL_REGS_INITIALIZER;
1264 #endif
1265     while ((sym = sym->next)) {
1266       CType *type;
1267       type = &sym->type;
1268       size = type_size(type, &align);
1269       size = (size + 3) >> 2;
1270       align = (align + 3) & ~3;
1271 #ifdef TCC_ARM_HARDFLOAT
1272       if (!variadic && (is_float(sym->type.t)
1273           || is_hgen_float_aggr(&sym->type))) {
1274         int fpn = assign_vfpreg(&avregs, align, size << 2);
1275         if (fpn >= 0) {
1276           addr = fpn * 4;
1277         } else
1278           goto from_stack;
1279       } else
1280 #endif
1281       if (pn < 4) {
1282 #ifdef TCC_ARM_EABI
1283         pn = (pn + (align-1)/4) & -(align/4);
1284 #endif
1285         addr = (nf + pn) * 4;
1286         pn += size;
1287         if (!sn && pn > 4)
1288           sn = (pn - 4);
1289       } else {
1290 #ifdef TCC_ARM_HARDFLOAT
1291 from_stack:
1292 #endif
1293 #ifdef TCC_ARM_EABI
1294         sn = (sn + (align-1)/4) & -(align/4);
1295 #endif
1296         addr = (n + nf + sn) * 4;
1297         sn += size;
1298       }
1299       sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t), addr+12);
1300     }
1301   }
1302   last_itod_magic=0;
1303   leaffunc = 1;
1304   loc = 0;
1305 }
1306
1307 /* generate function epilog */
1308 void gfunc_epilog(void)
1309 {
1310   uint32_t x;
1311   int diff;
1312 #ifdef TCC_ARM_EABI
1313   /* Useless but harmless copy of the float result into main register(s) in case
1314      of variadic function in the hardfloat variant */
1315   if(is_float(func_vt.t)) {
1316     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1317       o(0xEE100A10); /* fmrs r0, s0 */
1318     else {
1319       o(0xEE100B10); /* fmrdl r0, d0 */
1320       o(0xEE301B10); /* fmrdh r1, d0 */
1321     }
1322   }
1323 #endif
1324   o(0xE89BA800); /* restore fp, sp, pc */
1325   diff = (-loc + 3) & -4;
1326 #ifdef TCC_ARM_EABI
1327   if(!leaffunc)
1328     diff = ((diff + 11) & -8) - 4;
1329 #endif
1330   if(diff > 0) {
1331     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1332     if(x)
1333       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1334     else {
1335       int addr;
1336       addr=ind;
1337       o(0xE59FC004); /* ldr ip,[pc+4] */
1338       o(0xE04BD00C); /* sub sp,fp,ip  */
1339       o(0xE1A0F00E); /* mov pc,lr */
1340       o(diff);
1341       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1342     }
1343   }
1344 }
1345
1346 /* generate a jump to a label */
1347 int gjmp(int t)
1348 {
1349   int r;
1350   r=ind;
1351   o(0xE0000000|encbranch(r,t,1));
1352   return r;
1353 }
1354
1355 /* generate a jump to a fixed address */
1356 void gjmp_addr(int a)
1357 {
1358   gjmp(a);
1359 }
1360
1361 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1362 int gtst(int inv, int t)
1363 {
1364   int v, r;
1365   uint32_t op;
1366   v = vtop->r & VT_VALMASK;
1367   r=ind;
1368   if (v == VT_CMP) {
1369     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1370     op|=encbranch(r,t,1);
1371     o(op);
1372     t=r;
1373   } else if (v == VT_JMP || v == VT_JMPI) {
1374     if ((v & 1) == inv) {
1375       if(!vtop->c.i)
1376         vtop->c.i=t;
1377       else {
1378         uint32_t *x;
1379         int p,lp;
1380         if(t) {
1381           p = vtop->c.i;
1382           do {
1383             p = decbranch(lp=p);
1384           } while(p);
1385           x = (uint32_t *)(cur_text_section->data + lp);
1386           *x &= 0xff000000;
1387           *x |= encbranch(lp,t,1);
1388         }
1389         t = vtop->c.i;
1390       }
1391     } else {
1392       t = gjmp(t);
1393       gsym(vtop->c.i);
1394     }
1395   } else {
1396     if (is_float(vtop->type.t)) {
1397       r=gv(RC_FLOAT);
1398 #ifdef TCC_ARM_VFP
1399       o(0xEEB50A40|(vfpr(r)<<12)|T2CPR(vtop->type.t)); /* fcmpzX */
1400       o(0xEEF1FA10); /* fmstat */
1401 #else
1402       o(0xEE90F118|(fpr(r)<<16));
1403 #endif
1404       vtop->r = VT_CMP;
1405       vtop->c.i = TOK_NE;
1406       return gtst(inv, t);
1407     } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1408       /* constant jmp optimization */
1409       if ((vtop->c.i != 0) != inv)
1410         t = gjmp(t);
1411     } else {
1412       v = gv(RC_INT);
1413       o(0xE3300000|(intr(v)<<16));
1414       vtop->r = VT_CMP;
1415       vtop->c.i = TOK_NE;
1416       return gtst(inv, t);
1417     }
1418   }
1419   vtop--;
1420   return t;
1421 }
1422
1423 /* generate an integer binary operation */
1424 void gen_opi(int op)
1425 {
1426   int c, func = 0;
1427   uint32_t opc = 0, r, fr;
1428   unsigned short retreg = REG_IRET;
1429
1430   c=0;
1431   switch(op) {
1432     case '+':
1433       opc = 0x8;
1434       c=1;
1435       break;
1436     case TOK_ADDC1: /* add with carry generation */
1437       opc = 0x9;
1438       c=1;
1439       break;
1440     case '-':
1441       opc = 0x4;
1442       c=1;
1443       break;
1444     case TOK_SUBC1: /* sub with carry generation */
1445       opc = 0x5;
1446       c=1;
1447       break;
1448     case TOK_ADDC2: /* add with carry use */
1449       opc = 0xA;
1450       c=1;
1451       break;
1452     case TOK_SUBC2: /* sub with carry use */
1453       opc = 0xC;
1454       c=1;
1455       break;
1456     case '&':
1457       opc = 0x0;
1458       c=1;
1459       break;
1460     case '^':
1461       opc = 0x2;
1462       c=1;
1463       break;
1464     case '|':
1465       opc = 0x18;
1466       c=1;
1467       break;
1468     case '*':
1469       gv2(RC_INT, RC_INT);
1470       r = vtop[-1].r;
1471       fr = vtop[0].r;
1472       vtop--;
1473       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1474       return;
1475     case TOK_SHL:
1476       opc = 0;
1477       c=2;
1478       break;
1479     case TOK_SHR:
1480       opc = 1;
1481       c=2;
1482       break;
1483     case TOK_SAR:
1484       opc = 2;
1485       c=2;
1486       break;
1487     case '/':
1488     case TOK_PDIV:
1489       func=TOK___divsi3;
1490       c=3;
1491       break;
1492     case TOK_UDIV:
1493       func=TOK___udivsi3;
1494       c=3;
1495       break;
1496     case '%':
1497 #ifdef TCC_ARM_EABI
1498       func=TOK___aeabi_idivmod;
1499       retreg=REG_LRET;
1500 #else
1501       func=TOK___modsi3;
1502 #endif
1503       c=3;
1504       break;
1505     case TOK_UMOD:
1506 #ifdef TCC_ARM_EABI
1507       func=TOK___aeabi_uidivmod;
1508       retreg=REG_LRET;
1509 #else
1510       func=TOK___umodsi3;
1511 #endif
1512       c=3;
1513       break;
1514     case TOK_UMULL:
1515       gv2(RC_INT, RC_INT);
1516       r=intr(vtop[-1].r2=get_reg(RC_INT));
1517       c=vtop[-1].r;
1518       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1519       vtop--;
1520       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1521       return;
1522     default:
1523       opc = 0x15;
1524       c=1;
1525       break;
1526   }
1527   switch(c) {
1528     case 1:
1529       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1530         if(opc == 4 || opc == 5 || opc == 0xc) {
1531           vswap();
1532           opc|=2; // sub -> rsb
1533         }
1534       }
1535       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1536           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1537         gv(RC_INT);
1538       vswap();
1539       c=intr(gv(RC_INT));
1540       vswap();
1541       opc=0xE0000000|(opc<<20)|(c<<16);
1542       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1543         uint32_t x;
1544         x=stuff_const(opc|0x2000000,vtop->c.i);
1545         if(x) {
1546           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1547           o(x|(r<<12));
1548           goto done;
1549         }
1550       }
1551       fr=intr(gv(RC_INT));
1552       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1553       o(opc|(r<<12)|fr);
1554 done:
1555       vtop--;
1556       if (op >= TOK_ULT && op <= TOK_GT) {
1557         vtop->r = VT_CMP;
1558         vtop->c.i = op;
1559       }
1560       break;
1561     case 2:
1562       opc=0xE1A00000|(opc<<5);
1563       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1564           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1565         gv(RC_INT);
1566       vswap();
1567       r=intr(gv(RC_INT));
1568       vswap();
1569       opc|=r;
1570       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1571         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1572         c = vtop->c.i & 0x1f;
1573         o(opc|(c<<7)|(fr<<12));
1574       } else {
1575         fr=intr(gv(RC_INT));
1576         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1577         o(opc|(c<<12)|(fr<<8)|0x10);
1578       }
1579       vtop--;
1580       break;
1581     case 3:
1582       vpush_global_sym(&func_old_type, func);
1583       vrott(3);
1584       gfunc_call(2);
1585       vpushi(0);
1586       vtop->r = retreg;
1587       break;
1588     default:
1589       tcc_error("gen_opi %i unimplemented!",op);
1590   }
1591 }
1592
1593 #ifdef TCC_ARM_VFP
1594 static int is_zero(int i)
1595 {
1596   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1597     return 0;
1598   if (vtop[i].type.t == VT_FLOAT)
1599     return (vtop[i].c.f == 0.f);
1600   else if (vtop[i].type.t == VT_DOUBLE)
1601     return (vtop[i].c.d == 0.0);
1602   return (vtop[i].c.ld == 0.l);
1603 }
1604
1605 /* generate a floating point operation 'v = t1 op t2' instruction. The
1606  *    two operands are guaranted to have the same floating point type */
1607 void gen_opf(int op)
1608 {
1609   uint32_t x;
1610   int fneg=0,r;
1611   x=0xEE000A00|T2CPR(vtop->type.t);
1612   switch(op) {
1613     case '+':
1614       if(is_zero(-1))
1615         vswap();
1616       if(is_zero(0)) {
1617         vtop--;
1618         return;
1619       }
1620       x|=0x300000;
1621       break;
1622     case '-':
1623       x|=0x300040;
1624       if(is_zero(0)) {
1625         vtop--;
1626         return;
1627       }
1628       if(is_zero(-1)) {
1629         x|=0x810000; /* fsubX -> fnegX */
1630         vswap();
1631         vtop--;
1632         fneg=1;
1633       }
1634       break;
1635     case '*':
1636       x|=0x200000;
1637       break;
1638     case '/':
1639       x|=0x800000;
1640       break;
1641     default:
1642       if(op < TOK_ULT || op > TOK_GT) {
1643         tcc_error("unknown fp op %x!",op);
1644         return;
1645       }
1646       if(is_zero(-1)) {
1647         vswap();
1648         switch(op) {
1649           case TOK_LT: op=TOK_GT; break;
1650           case TOK_GE: op=TOK_ULE; break;
1651           case TOK_LE: op=TOK_GE; break;
1652           case TOK_GT: op=TOK_ULT; break;
1653         }
1654       }
1655       x|=0xB40040; /* fcmpX */
1656       if(op!=TOK_EQ && op!=TOK_NE)
1657         x|=0x80; /* fcmpX -> fcmpeX */
1658       if(is_zero(0)) {
1659         vtop--;
1660         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1661       } else {
1662         x|=vfpr(gv(RC_FLOAT));
1663         vswap();
1664         o(x|(vfpr(gv(RC_FLOAT))<<12));
1665         vtop--;
1666       }
1667       o(0xEEF1FA10); /* fmstat */
1668
1669       switch(op) {
1670         case TOK_LE: op=TOK_ULE; break;
1671         case TOK_LT: op=TOK_ULT; break;
1672         case TOK_UGE: op=TOK_GE; break;
1673         case TOK_UGT: op=TOK_GT; break;
1674       }
1675
1676       vtop->r = VT_CMP;
1677       vtop->c.i = op;
1678       return;
1679   }
1680   r=gv(RC_FLOAT);
1681   x|=vfpr(r);
1682   r=regmask(r);
1683   if(!fneg) {
1684     int r2;
1685     vswap();
1686     r2=gv(RC_FLOAT);
1687     x|=vfpr(r2)<<16;
1688     r|=regmask(r2);
1689   }
1690   vtop->r=get_reg_ex(RC_FLOAT,r);
1691   if(!fneg)
1692     vtop--;
1693   o(x|(vfpr(vtop->r)<<12));
1694 }
1695
1696 #else
1697 static uint32_t is_fconst()
1698 {
1699   long double f;
1700   uint32_t r;
1701   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1702     return 0;
1703   if (vtop->type.t == VT_FLOAT)
1704     f = vtop->c.f;
1705   else if (vtop->type.t == VT_DOUBLE)
1706     f = vtop->c.d;
1707   else
1708     f = vtop->c.ld;
1709   if(!ieee_finite(f))
1710     return 0;
1711   r=0x8;
1712   if(f<0.0) {
1713     r=0x18;
1714     f=-f;
1715   }
1716   if(f==0.0)
1717     return r;
1718   if(f==1.0)
1719     return r|1;
1720   if(f==2.0)
1721     return r|2;
1722   if(f==3.0)
1723     return r|3;
1724   if(f==4.0)
1725     return r|4;
1726   if(f==5.0)
1727     return r|5;
1728   if(f==0.5)
1729     return r|6;
1730   if(f==10.0)
1731     return r|7;
1732   return 0;
1733 }
1734
1735 /* generate a floating point operation 'v = t1 op t2' instruction. The
1736    two operands are guaranted to have the same floating point type */
1737 void gen_opf(int op)
1738 {
1739   uint32_t x, r, r2, c1, c2;
1740   //fputs("gen_opf\n",stderr);
1741   vswap();
1742   c1 = is_fconst();
1743   vswap();
1744   c2 = is_fconst();
1745   x=0xEE000100;
1746 #if LDOUBLE_SIZE == 8
1747   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1748     x|=0x80;
1749 #else
1750   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1751     x|=0x80;
1752   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1753     x|=0x80000;
1754 #endif
1755   switch(op)
1756   {
1757     case '+':
1758       if(!c2) {
1759         vswap();
1760         c2=c1;
1761       }
1762       vswap();
1763       r=fpr(gv(RC_FLOAT));
1764       vswap();
1765       if(c2) {
1766         if(c2>0xf)
1767           x|=0x200000; // suf
1768         r2=c2&0xf;
1769       } else {
1770         r2=fpr(gv(RC_FLOAT));
1771       }
1772       break;
1773     case '-':
1774       if(c2) {
1775         if(c2<=0xf)
1776           x|=0x200000; // suf
1777         r2=c2&0xf;
1778         vswap();
1779         r=fpr(gv(RC_FLOAT));
1780         vswap();
1781       } else if(c1 && c1<=0xf) {
1782         x|=0x300000; // rsf
1783         r2=c1;
1784         r=fpr(gv(RC_FLOAT));
1785         vswap();
1786       } else {
1787         x|=0x200000; // suf
1788         vswap();
1789         r=fpr(gv(RC_FLOAT));
1790         vswap();
1791         r2=fpr(gv(RC_FLOAT));
1792       }
1793       break;
1794     case '*':
1795       if(!c2 || c2>0xf) {
1796         vswap();
1797         c2=c1;
1798       }
1799       vswap();
1800       r=fpr(gv(RC_FLOAT));
1801       vswap();
1802       if(c2 && c2<=0xf)
1803         r2=c2;
1804       else
1805         r2=fpr(gv(RC_FLOAT));
1806       x|=0x100000; // muf
1807       break;
1808     case '/':
1809       if(c2 && c2<=0xf) {
1810         x|=0x400000; // dvf
1811         r2=c2;
1812         vswap();
1813         r=fpr(gv(RC_FLOAT));
1814         vswap();
1815       } else if(c1 && c1<=0xf) {
1816         x|=0x500000; // rdf
1817         r2=c1;
1818         r=fpr(gv(RC_FLOAT));
1819         vswap();
1820       } else {
1821         x|=0x400000; // dvf
1822         vswap();
1823         r=fpr(gv(RC_FLOAT));
1824         vswap();
1825         r2=fpr(gv(RC_FLOAT));
1826       }
1827       break;
1828     default:
1829       if(op >= TOK_ULT && op <= TOK_GT) {
1830         x|=0xd0f110; // cmfe
1831 /* bug (intention?) in Linux FPU emulator
1832    doesn't set carry if equal */
1833         switch(op) {
1834           case TOK_ULT:
1835           case TOK_UGE:
1836           case TOK_ULE:
1837           case TOK_UGT:
1838             tcc_error("unsigned comparision on floats?");
1839             break;
1840           case TOK_LT:
1841             op=TOK_Nset;
1842             break;
1843           case TOK_LE:
1844             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1845             break;
1846           case TOK_EQ:
1847           case TOK_NE:
1848             x&=~0x400000; // cmfe -> cmf
1849             break;
1850         }
1851         if(c1 && !c2) {
1852           c2=c1;
1853           vswap();
1854           switch(op) {
1855             case TOK_Nset:
1856               op=TOK_GT;
1857               break;
1858             case TOK_GE:
1859               op=TOK_ULE;
1860               break;
1861             case TOK_ULE:
1862               op=TOK_GE;
1863               break;
1864             case TOK_GT:
1865               op=TOK_Nset;
1866               break;
1867           }
1868         }
1869         vswap();
1870         r=fpr(gv(RC_FLOAT));
1871         vswap();
1872         if(c2) {
1873           if(c2>0xf)
1874             x|=0x200000;
1875           r2=c2&0xf;
1876         } else {
1877           r2=fpr(gv(RC_FLOAT));
1878         }
1879         vtop[-1].r = VT_CMP;
1880         vtop[-1].c.i = op;
1881       } else {
1882         tcc_error("unknown fp op %x!",op);
1883         return;
1884       }
1885   }
1886   if(vtop[-1].r == VT_CMP)
1887     c1=15;
1888   else {
1889     c1=vtop->r;
1890     if(r2&0x8)
1891       c1=vtop[-1].r;
1892     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1893     c1=fpr(vtop[-1].r);
1894   }
1895   vtop--;
1896   o(x|(r<<16)|(c1<<12)|r2);
1897 }
1898 #endif
1899
1900 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1901    and 'long long' cases. */
1902 ST_FUNC void gen_cvt_itof1(int t)
1903 {
1904   uint32_t r, r2;
1905   int bt;
1906   bt=vtop->type.t & VT_BTYPE;
1907   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1908 #ifndef TCC_ARM_VFP
1909     uint32_t dsize = 0;
1910 #endif
1911     r=intr(gv(RC_INT));
1912 #ifdef TCC_ARM_VFP
1913     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1914     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1915     r2|=r2<<12;
1916     if(!(vtop->type.t & VT_UNSIGNED))
1917       r2|=0x80;                /* fuitoX -> fsituX */
1918     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1919 #else
1920     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1921     if((t & VT_BTYPE) != VT_FLOAT)
1922       dsize=0x80;    /* flts -> fltd */
1923     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1924     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1925       uint32_t off = 0;
1926       o(0xE3500000|(r<<12));        /* cmp */
1927       r=fpr(get_reg(RC_FLOAT));
1928       if(last_itod_magic) {
1929         off=ind+8-last_itod_magic;
1930         off/=4;
1931         if(off>255)
1932           off=0;
1933       }
1934       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1935       if(!off) {
1936         o(0xEA000000);              /* b */
1937         last_itod_magic=ind;
1938         o(0x4F800000);              /* 4294967296.0f */
1939       }
1940       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1941     }
1942 #endif
1943     return;
1944   } else if(bt == VT_LLONG) {
1945     int func;
1946     CType *func_type = 0;
1947     if((t & VT_BTYPE) == VT_FLOAT) {
1948       func_type = &func_float_type;
1949       if(vtop->type.t & VT_UNSIGNED)
1950         func=TOK___floatundisf;
1951       else
1952         func=TOK___floatdisf;
1953 #if LDOUBLE_SIZE != 8
1954     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1955       func_type = &func_ldouble_type;
1956       if(vtop->type.t & VT_UNSIGNED)
1957         func=TOK___floatundixf;
1958       else
1959         func=TOK___floatdixf;
1960     } else if((t & VT_BTYPE) == VT_DOUBLE) {
1961 #else
1962     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1963 #endif
1964       func_type = &func_double_type;
1965       if(vtop->type.t & VT_UNSIGNED)
1966         func=TOK___floatundidf;
1967       else
1968         func=TOK___floatdidf;
1969     }
1970     if(func_type) {
1971       vpush_global_sym(func_type, func);
1972       vswap();
1973       gfunc_call(1);
1974       vpushi(0);
1975       vtop->r=TREG_F0;
1976       return;
1977     }
1978   }
1979   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1980 }
1981
1982 /* convert fp to int 't' type */
1983 void gen_cvt_ftoi(int t)
1984 {
1985   uint32_t r, r2;
1986   int u, func = 0;
1987   u=t&VT_UNSIGNED;
1988   t&=VT_BTYPE;
1989   r2=vtop->type.t & VT_BTYPE;
1990   if(t==VT_INT) {
1991 #ifdef TCC_ARM_VFP
1992     r=vfpr(gv(RC_FLOAT));
1993     u=u?0:0x10000;
1994     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
1995     r2=intr(vtop->r=get_reg(RC_INT));
1996     o(0xEE100A10|(r<<16)|(r2<<12));
1997     return;
1998 #else
1999     if(u) {
2000       if(r2 == VT_FLOAT)
2001         func=TOK___fixunssfsi;
2002 #if LDOUBLE_SIZE != 8
2003       else if(r2 == VT_LDOUBLE)
2004         func=TOK___fixunsxfsi;
2005       else if(r2 == VT_DOUBLE)
2006 #else
2007       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2008 #endif
2009         func=TOK___fixunsdfsi;
2010     } else {
2011       r=fpr(gv(RC_FLOAT));
2012       r2=intr(vtop->r=get_reg(RC_INT));
2013       o(0xEE100170|(r2<<12)|r);
2014       return;
2015     }
2016 #endif
2017   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2018     if(r2 == VT_FLOAT)
2019       func=TOK___fixsfdi;
2020 #if LDOUBLE_SIZE != 8
2021     else if(r2 == VT_LDOUBLE)
2022       func=TOK___fixxfdi;
2023     else if(r2 == VT_DOUBLE)
2024 #else
2025     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2026 #endif
2027       func=TOK___fixdfdi;
2028   }
2029   if(func) {
2030     vpush_global_sym(&func_old_type, func);
2031     vswap();
2032     gfunc_call(1);
2033     vpushi(0);
2034     if(t == VT_LLONG)
2035       vtop->r2 = REG_LRET;
2036     vtop->r = REG_IRET;
2037     return;
2038   }
2039   tcc_error("unimplemented gen_cvt_ftoi!");
2040 }
2041
2042 /* convert from one floating point type to another */
2043 void gen_cvt_ftof(int t)
2044 {
2045 #ifdef TCC_ARM_VFP
2046   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2047     uint32_t r = vfpr(gv(RC_FLOAT));
2048     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2049   }
2050 #else
2051   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2052   gv(RC_FLOAT);
2053 #endif
2054 }
2055
2056 /* computed goto support */
2057 void ggoto(void)
2058 {
2059   gcall_or_jmp(1);
2060   vtop--;
2061 }
2062
2063 /* Save the stack pointer onto the stack and return the location of its address */
2064 ST_FUNC void gen_vla_sp_save(int addr) {
2065     tcc_error("variable length arrays unsupported for this target");
2066 }
2067
2068 /* Restore the SP from a location on the stack */
2069 ST_FUNC void gen_vla_sp_restore(int addr) {
2070     tcc_error("variable length arrays unsupported for this target");
2071 }
2072
2073 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2074 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2075     tcc_error("variable length arrays unsupported for this target");
2076 }
2077
2078 /* end of ARM code generator */
2079 /*************************************************************/
2080 #endif
2081 /*************************************************************/