arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_CPU_VERSION
  38 # define TCC_CPU_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_LRET    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82     TREG_SP = 13,
  83     TREG_LR,
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 #else /* ! TARGET_DEFS_ONLY */
 134 /******************************************************/
 135 #include "tcc.h"
 136
 137 enum float_abi float_abi;
 138
 139 ST_DATA const int reg_classes[NB_REGS] = {
 140     /* r0 */ RC_INT | RC_R0,
 141     /* r1 */ RC_INT | RC_R1,
 142     /* r2 */ RC_INT | RC_R2,
 143     /* r3 */ RC_INT | RC_R3,
 144     /* r12 */ RC_INT | RC_R12,
 145     /* f0 */ RC_FLOAT | RC_F0,
 146     /* f1 */ RC_FLOAT | RC_F1,
 147     /* f2 */ RC_FLOAT | RC_F2,
 148     /* f3 */ RC_FLOAT | RC_F3,
 149 #ifdef TCC_ARM_VFP
 150  /* d4/s8 */ RC_FLOAT | RC_F4,
 151 /* d5/s10 */ RC_FLOAT | RC_F5,
 152 /* d6/s12 */ RC_FLOAT | RC_F6,
 153 /* d7/s14 */ RC_FLOAT | RC_F7,
 154 #endif
 155 };
 156
 157 static int func_sub_sp_offset, last_itod_magic;
 158 static int leaffunc;
 159
 160 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 161 static CType float_type, double_type, func_float_type, func_double_type;
 162 ST_FUNC void arm_init(struct TCCState *s)
 163 {
 164     float_type.t = VT_FLOAT;
 165     double_type.t = VT_DOUBLE;
 166     func_float_type.t = VT_FUNC;
 167     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 168     func_double_type.t = VT_FUNC;
 169     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 170
 171     float_abi = s->float_abi;
 172 #ifndef TCC_ARM_HARDFLOAT
 173     tcc_warning("soft float ABI currently not supported: default to softfp");
 174 #endif
 175 }
 176 #else
 177 #define func_float_type func_old_type
 178 #define func_double_type func_old_type
 179 #define func_ldouble_type func_old_type
 180 ST_FUNC void arm_init(struct TCCState *s)
 181 {
 182 #if 0
 183 #if !defined (TCC_ARM_VFP)
 184     tcc_warning("Support for FPA is deprecated and will be removed in next"
 185                 " release");
 186 #endif
 187 #if !defined (TCC_ARM_EABI)
 188     tcc_warning("Support for OABI is deprecated and will be removed in next"
 189                 " release");
 190 #endif
 191 #endif
 192 }
 193 #endif
 194
 195 static int two2mask(int a,int b) {
 196   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 197 }
 198
 199 static int regmask(int r) {
 200   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 201 }
 202
 203 /******************************************************/
 204
 205 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
 206 const char *default_elfinterp(struct TCCState *s)
 207 {
 208     if (s->float_abi == ARM_HARD_FLOAT)
 209         return "/lib/ld-linux-armhf.so.3";
 210     else
 211         return "/lib/ld-linux.so.3";
 212 }
 213 #endif
 214
 215 void o(uint32_t i)
 216 {
 217   /* this is a good place to start adding big-endian support*/
 218   int ind1;
 219   if (nocode_wanted)
 220     return;
 221   ind1 = ind + 4;
 222   if (!cur_text_section)
 223     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 224          "can't evaluate constant expressions outside of a function.");
 225   if (ind1 > cur_text_section->data_allocated)
 226     section_realloc(cur_text_section, ind1);
 227   cur_text_section->data[ind++] = i&255;
 228   i>>=8;
 229   cur_text_section->data[ind++] = i&255;
 230   i>>=8;
 231   cur_text_section->data[ind++] = i&255;
 232   i>>=8;
 233   cur_text_section->data[ind++] = i;
 234 }
 235
 236 static uint32_t stuff_const(uint32_t op, uint32_t c)
 237 {
 238   int try_neg=0;
 239   uint32_t nc = 0, negop = 0;
 240
 241   switch(op&0x1F00000)
 242   {
 243     case 0x800000: //add
 244     case 0x400000: //sub
 245       try_neg=1;
 246       negop=op^0xC00000;
 247       nc=-c;
 248       break;
 249     case 0x1A00000: //mov
 250     case 0x1E00000: //mvn
 251       try_neg=1;
 252       negop=op^0x400000;
 253       nc=~c;
 254       break;
 255     case 0x200000: //xor
 256       if(c==~0)
 257         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 258       break;
 259     case 0x0: //and
 260       if(c==~0)
 261         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 262     case 0x1C00000: //bic
 263       try_neg=1;
 264       negop=op^0x1C00000;
 265       nc=~c;
 266       break;
 267     case 0x1800000: //orr
 268       if(c==~0)
 269         return (op&0xFFF0FFFF)|0x1E00000;
 270       break;
 271   }
 272   do {
 273     uint32_t m;
 274     int i;
 275     if(c<256) /* catch undefined <<32 */
 276       return op|c;
 277     for(i=2;i<32;i+=2) {
 278       m=(0xff>>i)|(0xff<<(32-i));
 279       if(!(c&~m))
 280         return op|(i<<7)|(c<<i)|(c>>(32-i));
 281     }
 282     op=negop;
 283     c=nc;
 284   } while(try_neg--);
 285   return 0;
 286 }
 287
 288
 289 //only add,sub
 290 void stuff_const_harder(uint32_t op, uint32_t v) {
 291   uint32_t x;
 292   x=stuff_const(op,v);
 293   if(x)
 294     o(x);
 295   else {
 296     uint32_t a[16], nv, no, o2, n2;
 297     int i,j,k;
 298     a[0]=0xff;
 299     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 300     for(i=1;i<16;i++)
 301       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 302     for(i=0;i<12;i++)
 303       for(j=i<4?i+12:15;j>=i+4;j--)
 304         if((v&(a[i]|a[j]))==v) {
 305           o(stuff_const(op,v&a[i]));
 306           o(stuff_const(o2,v&a[j]));
 307           return;
 308         }
 309     no=op^0xC00000;
 310     n2=o2^0xC00000;
 311     nv=-v;
 312     for(i=0;i<12;i++)
 313       for(j=i<4?i+12:15;j>=i+4;j--)
 314         if((nv&(a[i]|a[j]))==nv) {
 315           o(stuff_const(no,nv&a[i]));
 316           o(stuff_const(n2,nv&a[j]));
 317           return;
 318         }
 319     for(i=0;i<8;i++)
 320       for(j=i+4;j<12;j++)
 321         for(k=i<4?i+12:15;k>=j+4;k--)
 322           if((v&(a[i]|a[j]|a[k]))==v) {
 323             o(stuff_const(op,v&a[i]));
 324             o(stuff_const(o2,v&a[j]));
 325             o(stuff_const(o2,v&a[k]));
 326             return;
 327           }
 328     no=op^0xC00000;
 329     nv=-v;
 330     for(i=0;i<8;i++)
 331       for(j=i+4;j<12;j++)
 332         for(k=i<4?i+12:15;k>=j+4;k--)
 333           if((nv&(a[i]|a[j]|a[k]))==nv) {
 334             o(stuff_const(no,nv&a[i]));
 335             o(stuff_const(n2,nv&a[j]));
 336             o(stuff_const(n2,nv&a[k]));
 337             return;
 338           }
 339     o(stuff_const(op,v&a[0]));
 340     o(stuff_const(o2,v&a[4]));
 341     o(stuff_const(o2,v&a[8]));
 342     o(stuff_const(o2,v&a[12]));
 343   }
 344 }
 345
 346 uint32_t encbranch(int pos, int addr, int fail)
 347 {
 348   addr-=pos+8;
 349   addr/=4;
 350   if(addr>=0x1000000 || addr<-0x1000000) {
 351     if(fail)
 352       tcc_error("FIXME: function bigger than 32MB");
 353     return 0;
 354   }
 355   return 0x0A000000|(addr&0xffffff);
 356 }
 357
 358 int decbranch(int pos)
 359 {
 360   int x;
 361   x=*(uint32_t *)(cur_text_section->data + pos);
 362   x&=0x00ffffff;
 363   if(x&0x800000)
 364     x-=0x1000000;
 365   return x*4+pos+8;
 366 }
 367
 368 /* output a symbol and patch all calls to it */
 369 void gsym_addr(int t, int a)
 370 {
 371   uint32_t *x;
 372   int lt;
 373   while(t) {
 374     x=(uint32_t *)(cur_text_section->data + t);
 375     t=decbranch(lt=t);
 376     if(a==lt+4)
 377       *x=0xE1A00000; // nop
 378     else {
 379       *x &= 0xff000000;
 380       *x |= encbranch(lt,a,1);
 381     }
 382   }
 383 }
 384
 385 void gsym(int t)
 386 {
 387   gsym_addr(t, ind);
 388 }
 389
 390 #ifdef TCC_ARM_VFP
 391 static uint32_t vfpr(int r)
 392 {
 393   if(r<TREG_F0 || r>TREG_F7)
 394     tcc_error("compiler error! register %i is no vfp register",r);
 395   return r - TREG_F0;
 396 }
 397 #else
 398 static uint32_t fpr(int r)
 399 {
 400   if(r<TREG_F0 || r>TREG_F3)
 401     tcc_error("compiler error! register %i is no fpa register",r);
 402   return r - TREG_F0;
 403 }
 404 #endif
 405
 406 static uint32_t intr(int r)
 407 {
 408   if(r == TREG_R12)
 409     return 12;
 410   if(r >= TREG_R0 && r <= TREG_R3)
 411     return r - TREG_R0;
 412   if (r >= TREG_SP && r <= TREG_LR)
 413     return r + (13 - TREG_SP);
 414   tcc_error("compiler error! register %i is no int register",r);
 415 }
 416
 417 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 418 {
 419   if(*off>maxoff || *off&((1<<shift)-1)) {
 420     uint32_t x, y;
 421     x=0xE280E000;
 422     if(*sgn)
 423       x=0xE240E000;
 424     x|=(*base)<<16;
 425     *base=14; // lr
 426     y=stuff_const(x,*off&~maxoff);
 427     if(y) {
 428       o(y);
 429       *off&=maxoff;
 430       return;
 431     }
 432     y=stuff_const(x,(*off+maxoff)&~maxoff);
 433     if(y) {
 434       o(y);
 435       *sgn=!*sgn;
 436       *off=((*off+maxoff)&~maxoff)-*off;
 437       return;
 438     }
 439     stuff_const_harder(x,*off&~maxoff);
 440     *off&=maxoff;
 441   }
 442 }
 443
 444 static uint32_t mapcc(int cc)
 445 {
 446   switch(cc)
 447   {
 448     case TOK_ULT:
 449       return 0x30000000; /* CC/LO */
 450     case TOK_UGE:
 451       return 0x20000000; /* CS/HS */
 452     case TOK_EQ:
 453       return 0x00000000; /* EQ */
 454     case TOK_NE:
 455       return 0x10000000; /* NE */
 456     case TOK_ULE:
 457       return 0x90000000; /* LS */
 458     case TOK_UGT:
 459       return 0x80000000; /* HI */
 460     case TOK_Nset:
 461       return 0x40000000; /* MI */
 462     case TOK_Nclear:
 463       return 0x50000000; /* PL */
 464     case TOK_LT:
 465       return 0xB0000000; /* LT */
 466     case TOK_GE:
 467       return 0xA0000000; /* GE */
 468     case TOK_LE:
 469       return 0xD0000000; /* LE */
 470     case TOK_GT:
 471       return 0xC0000000; /* GT */
 472   }
 473   tcc_error("unexpected condition code");
 474   return 0xE0000000; /* AL */
 475 }
 476
 477 static int negcc(int cc)
 478 {
 479   switch(cc)
 480   {
 481     case TOK_ULT:
 482       return TOK_UGE;
 483     case TOK_UGE:
 484       return TOK_ULT;
 485     case TOK_EQ:
 486       return TOK_NE;
 487     case TOK_NE:
 488       return TOK_EQ;
 489     case TOK_ULE:
 490       return TOK_UGT;
 491     case TOK_UGT:
 492       return TOK_ULE;
 493     case TOK_Nset:
 494       return TOK_Nclear;
 495     case TOK_Nclear:
 496       return TOK_Nset;
 497     case TOK_LT:
 498       return TOK_GE;
 499     case TOK_GE:
 500       return TOK_LT;
 501     case TOK_LE:
 502       return TOK_GT;
 503     case TOK_GT:
 504       return TOK_LE;
 505   }
 506   tcc_error("unexpected condition code");
 507   return TOK_NE;
 508 }
 509
 510 /* load 'r' from value 'sv' */
 511 void load(int r, SValue *sv)
 512 {
 513   int v, ft, fc, fr, sign;
 514   uint32_t op;
 515   SValue v1;
 516
 517   fr = sv->r;
 518   ft = sv->type.t;
 519   fc = sv->c.i;
 520
 521   if(fc>=0)
 522     sign=0;
 523   else {
 524     sign=1;
 525     fc=-fc;
 526   }
 527
 528   v = fr & VT_VALMASK;
 529   if (fr & VT_LVAL) {
 530     uint32_t base = 0xB; // fp
 531     if(v == VT_LLOCAL) {
 532       v1.type.t = VT_PTR;
 533       v1.r = VT_LOCAL | VT_LVAL;
 534       v1.c.i = sv->c.i;
 535       load(TREG_LR, &v1);
 536       base = 14; /* lr */
 537       fc=sign=0;
 538       v=VT_LOCAL;
 539     } else if(v == VT_CONST) {
 540       v1.type.t = VT_PTR;
 541       v1.r = fr&~VT_LVAL;
 542       v1.c.i = sv->c.i;
 543       v1.sym=sv->sym;
 544       load(TREG_LR, &v1);
 545       base = 14; /* lr */
 546       fc=sign=0;
 547       v=VT_LOCAL;
 548     } else if(v < VT_CONST) {
 549       base=intr(v);
 550       fc=sign=0;
 551       v=VT_LOCAL;
 552     }
 553     if(v == VT_LOCAL) {
 554       if(is_float(ft)) {
 555         calcaddr(&base,&fc,&sign,1020,2);
 556 #ifdef TCC_ARM_VFP
 557         op=0xED100A00; /* flds */
 558         if(!sign)
 559           op|=0x800000;
 560         if ((ft & VT_BTYPE) != VT_FLOAT)
 561           op|=0x100;   /* flds -> fldd */
 562         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 563 #else
 564         op=0xED100100;
 565         if(!sign)
 566           op|=0x800000;
 567 #if LDOUBLE_SIZE == 8
 568         if ((ft & VT_BTYPE) != VT_FLOAT)
 569           op|=0x8000;
 570 #else
 571         if ((ft & VT_BTYPE) == VT_DOUBLE)
 572           op|=0x8000;
 573         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 574           op|=0x400000;
 575 #endif
 576         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 577 #endif
 578       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 579                 || (ft & VT_BTYPE) == VT_SHORT) {
 580         calcaddr(&base,&fc,&sign,255,0);
 581         op=0xE1500090;
 582         if ((ft & VT_BTYPE) == VT_SHORT)
 583           op|=0x20;
 584         if ((ft & VT_UNSIGNED) == 0)
 585           op|=0x40;
 586         if(!sign)
 587           op|=0x800000;
 588         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 589       } else {
 590         calcaddr(&base,&fc,&sign,4095,0);
 591         op=0xE5100000;
 592         if(!sign)
 593           op|=0x800000;
 594         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 595           op|=0x400000;
 596         o(op|(intr(r)<<12)|fc|(base<<16));
 597       }
 598       return;
 599     }
 600   } else {
 601     if (v == VT_CONST) {
 602       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.i);
 603       if (fr & VT_SYM || !op) {
 604         o(0xE59F0000|(intr(r)<<12));
 605         o(0xEA000000);
 606         if(fr & VT_SYM)
 607           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 608         o(sv->c.i);
 609       } else
 610         o(op);
 611       return;
 612     } else if (v == VT_LOCAL) {
 613       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.i);
 614       if (fr & VT_SYM || !op) {
 615         o(0xE59F0000|(intr(r)<<12));
 616         o(0xEA000000);
 617         if(fr & VT_SYM) // needed ?
 618           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 619         o(sv->c.i);
 620         o(0xE08B0000|(intr(r)<<12)|intr(r));
 621       } else
 622         o(op);
 623       return;
 624     } else if(v == VT_CMP) {
 625       o(mapcc(sv->c.i)|0x3A00001|(intr(r)<<12));
 626       o(mapcc(negcc(sv->c.i))|0x3A00000|(intr(r)<<12));
 627       return;
 628     } else if (v == VT_JMP || v == VT_JMPI) {
 629       int t;
 630       t = v & 1;
 631       o(0xE3A00000|(intr(r)<<12)|t);
 632       o(0xEA000000);
 633       gsym(sv->c.i);
 634       o(0xE3A00000|(intr(r)<<12)|(t^1));
 635       return;
 636     } else if (v < VT_CONST) {
 637       if(is_float(ft))
 638 #ifdef TCC_ARM_VFP
 639         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 640 #else
 641         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 642 #endif
 643       else
 644         o(0xE1A00000|(intr(r)<<12)|intr(v));
 645       return;
 646     }
 647   }
 648   tcc_error("load unimplemented!");
 649 }
 650
 651 /* store register 'r' in lvalue 'v' */
 652 void store(int r, SValue *sv)
 653 {
 654   SValue v1;
 655   int v, ft, fc, fr, sign;
 656   uint32_t op;
 657
 658   fr = sv->r;
 659   ft = sv->type.t;
 660   fc = sv->c.i;
 661
 662   if(fc>=0)
 663     sign=0;
 664   else {
 665     sign=1;
 666     fc=-fc;
 667   }
 668
 669   v = fr & VT_VALMASK;
 670   if (fr & VT_LVAL || fr == VT_LOCAL) {
 671     uint32_t base = 0xb; /* fp */
 672     if(v < VT_CONST) {
 673       base=intr(v);
 674       v=VT_LOCAL;
 675       fc=sign=0;
 676     } else if(v == VT_CONST) {
 677       v1.type.t = ft;
 678       v1.r = fr&~VT_LVAL;
 679       v1.c.i = sv->c.i;
 680       v1.sym=sv->sym;
 681       load(TREG_LR, &v1);
 682       base = 14; /* lr */
 683       fc=sign=0;
 684       v=VT_LOCAL;
 685     }
 686     if(v == VT_LOCAL) {
 687        if(is_float(ft)) {
 688         calcaddr(&base,&fc,&sign,1020,2);
 689 #ifdef TCC_ARM_VFP
 690         op=0xED000A00; /* fsts */
 691         if(!sign)
 692           op|=0x800000;
 693         if ((ft & VT_BTYPE) != VT_FLOAT)
 694           op|=0x100;   /* fsts -> fstd */
 695         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 696 #else
 697         op=0xED000100;
 698         if(!sign)
 699           op|=0x800000;
 700 #if LDOUBLE_SIZE == 8
 701         if ((ft & VT_BTYPE) != VT_FLOAT)
 702           op|=0x8000;
 703 #else
 704         if ((ft & VT_BTYPE) == VT_DOUBLE)
 705           op|=0x8000;
 706         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 707           op|=0x400000;
 708 #endif
 709         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 710 #endif
 711         return;
 712       } else if((ft & VT_BTYPE) == VT_SHORT) {
 713         calcaddr(&base,&fc,&sign,255,0);
 714         op=0xE14000B0;
 715         if(!sign)
 716           op|=0x800000;
 717         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 718       } else {
 719         calcaddr(&base,&fc,&sign,4095,0);
 720         op=0xE5000000;
 721         if(!sign)
 722           op|=0x800000;
 723         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 724           op|=0x400000;
 725         o(op|(intr(r)<<12)|fc|(base<<16));
 726       }
 727       return;
 728     }
 729   }
 730   tcc_error("store unimplemented");
 731 }
 732
 733 static void gadd_sp(int val)
 734 {
 735   stuff_const_harder(0xE28DD000,val);
 736 }
 737
 738 /* 'is_jmp' is '1' if it is a jump */
 739 static void gcall_or_jmp(int is_jmp)
 740 {
 741   int r;
 742   uint32_t x;
 743   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 744     /* constant case */
 745         if(vtop->r & VT_SYM){
 746                 x=encbranch(ind,ind+vtop->c.i,0);
 747                 if(x) {
 748                 /* relocation case */
 749                   greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 750                   o(x|(is_jmp?0xE0000000:0xE1000000));
 751                 } else {
 752                         if(!is_jmp)
 753                                 o(0xE28FE004); // add lr,pc,#4
 754                         o(0xE51FF004);   // ldr pc,[pc,#-4]
 755                         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 756                         o(vtop->c.i);
 757                 }
 758         }else{
 759                 if(!is_jmp)
 760                         o(0xE28FE004); // add lr,pc,#4
 761                 o(0xE51FF004);   // ldr pc,[pc,#-4]
 762                 o(vtop->c.i);
 763         }
 764   } else {
 765     /* otherwise, indirect call */
 766     r = gv(RC_INT);
 767     if(!is_jmp)
 768       o(0xE1A0E00F);       // mov lr,pc
 769     o(0xE1A0F000|intr(r)); // mov pc,r
 770   }
 771 }
 772
 773 static int unalias_ldbl(int btype)
 774 {
 775 #if LDOUBLE_SIZE == 8
 776     if (btype == VT_LDOUBLE)
 777       btype = VT_DOUBLE;
 778 #endif
 779     return btype;
 780 }
 781
 782 /* Return whether a structure is an homogeneous float aggregate or not.
 783    The answer is true if all the elements of the structure are of the same
 784    primitive float type and there is less than 4 elements.
 785
 786    type: the type corresponding to the structure to be tested */
 787 static int is_hgen_float_aggr(CType *type)
 788 {
 789   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 790     struct Sym *ref;
 791     int btype, nb_fields = 0;
 792
 793     ref = type->ref->next;
 794     btype = unalias_ldbl(ref->type.t & VT_BTYPE);
 795     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 796       for(; ref && btype == unalias_ldbl(ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 797       return !ref && nb_fields <= 4;
 798     }
 799   }
 800   return 0;
 801 }
 802
 803 struct avail_regs {
 804   signed char avail[3]; /* 3 holes max with only float and double alignments */
 805   int first_hole; /* first available hole */
 806   int last_hole; /* last available hole (none if equal to first_hole) */
 807   int first_free_reg; /* next free register in the sequence, hole excluded */
 808 };
 809
 810 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 811
 812 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 813    param) according to the rules described in the procedure call standard for
 814    the ARM architecture (AAPCS). If found, the registers are assigned to this
 815    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 816    and the parameter is a single float.
 817
 818    avregs: opaque structure to keep track of available VFP co-processor regs
 819    align: alignment constraints for the param, as returned by type_size()
 820    size: size of the parameter, as returned by type_size() */
 821 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 822 {
 823   int first_reg = 0;
 824
 825   if (avregs->first_free_reg == -1)
 826     return -1;
 827   if (align >> 3) { /* double alignment */
 828     first_reg = avregs->first_free_reg;
 829     /* alignment constraint not respected so use next reg and record hole */
 830     if (first_reg & 1)
 831       avregs->avail[avregs->last_hole++] = first_reg++;
 832   } else { /* no special alignment (float or array of float) */
 833     /* if single float and a hole is available, assign the param to it */
 834     if (size == 4 && avregs->first_hole != avregs->last_hole)
 835       return avregs->avail[avregs->first_hole++];
 836     else
 837       first_reg = avregs->first_free_reg;
 838   }
 839   if (first_reg + size / 4 <= 16) {
 840     avregs->first_free_reg = first_reg + size / 4;
 841     return first_reg;
 842   }
 843   avregs->first_free_reg = -1;
 844   return -1;
 845 }
 846
 847 /* Returns whether all params need to be passed in core registers or not.
 848    This is the case for function part of the runtime ABI. */
 849 int floats_in_core_regs(SValue *sval)
 850 {
 851   if (!sval->sym)
 852     return 0;
 853
 854   switch (sval->sym->v) {
 855     case TOK___floatundisf:
 856     case TOK___floatundidf:
 857     case TOK___fixunssfdi:
 858     case TOK___fixunsdfdi:
 859 #ifndef TCC_ARM_VFP
 860     case TOK___fixunsxfdi:
 861 #endif
 862     case TOK___floatdisf:
 863     case TOK___floatdidf:
 864     case TOK___fixsfdi:
 865     case TOK___fixdfdi:
 866       return 1;
 867
 868     default:
 869       return 0;
 870   }
 871 }
 872
 873 /* Return the number of registers needed to return the struct, or 0 if
 874    returning via struct pointer. */
 875 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) {
 876 #ifdef TCC_ARM_EABI
 877     int size, align;
 878     size = type_size(vt, &align);
 879     if (float_abi == ARM_HARD_FLOAT && !variadic &&
 880         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
 881         *ret_align = 8;
 882         *regsize = 8;
 883         ret->ref = NULL;
 884         ret->t = VT_DOUBLE;
 885         return (size + 7) >> 3;
 886     } else if (size <= 4) {
 887         *ret_align = 4;
 888         *regsize = 4;
 889         ret->ref = NULL;
 890         ret->t = VT_INT;
 891         return 1;
 892     } else
 893         return 0;
 894 #else
 895     return 0;
 896 #endif
 897 }
 898
 899 /* Parameters are classified according to how they are copied to their final
 900    destination for the function call. Because the copying is performed class
 901    after class according to the order in the union below, it is important that
 902    some constraints about the order of the members of this union are respected:
 903    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 904    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 905      VFP_STRUCT_CLASS;
 906    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 907    See the comment for the main loop in copy_params() for the reason. */
 908 enum reg_class {
 909         STACK_CLASS = 0,
 910         CORE_STRUCT_CLASS,
 911         VFP_CLASS,
 912         VFP_STRUCT_CLASS,
 913         CORE_CLASS,
 914         NB_CLASSES
 915 };
 916
 917 struct param_plan {
 918     int start; /* first reg or addr used depending on the class */
 919     int end; /* last reg used or next free addr depending on the class */
 920     SValue *sval; /* pointer to SValue on the value stack */
 921     struct param_plan *prev; /*  previous element in this class */
 922 };
 923
 924 struct plan {
 925     struct param_plan *pplans; /* array of all the param plans */
 926     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
 927 };
 928
 929 #define add_param_plan(plan,pplan,class)                        \
 930     do {                                                        \
 931         pplan.prev = plan->clsplans[class];                     \
 932         plan->pplans[plan ## _nb] = pplan;                      \
 933         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
 934     } while(0)
 935
 936 /* Assign parameters to registers and stack with alignment according to the
 937    rules in the procedure call standard for the ARM architecture (AAPCS).
 938    The overall assignment is recorded in an array of per parameter structures
 939    called parameter plans. The parameter plans are also further organized in a
 940    number of linked lists, one per class of parameter (see the comment for the
 941    definition of union reg_class).
 942
 943    nb_args: number of parameters of the function for which a call is generated
 944    float_abi: float ABI in use for this function call
 945    plan: the structure where the overall assignment is recorded
 946    todo: a bitmap that record which core registers hold a parameter
 947
 948    Returns the amount of stack space needed for parameter passing
 949
 950    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 951    is the responsibility of the caller to free this array once used (ie not
 952    before copy_params). */
 953 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
 954 {
 955   int i, size, align;
 956   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
 957   int plan_nb = 0;
 958   struct param_plan pplan;
 959   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 960
 961   ncrn = nsaa = 0;
 962   *todo = 0;
 963   plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 964   memset(plan->clsplans, 0, sizeof(plan->clsplans));
 965   for(i = nb_args; i-- ;) {
 966     int j, start_vfpreg = 0;
 967     CType type = vtop[-i].type;
 968     type.t &= ~VT_ARRAY;
 969     size = type_size(&type, &align);
 970     size = (size + 3) & ~3;
 971     align = (align + 3) & ~3;
 972     switch(vtop[-i].type.t & VT_BTYPE) {
 973       case VT_STRUCT:
 974       case VT_FLOAT:
 975       case VT_DOUBLE:
 976       case VT_LDOUBLE:
 977       if (float_abi == ARM_HARD_FLOAT) {
 978         int is_hfa = 0; /* Homogeneous float aggregate */
 979
 980         if (is_float(vtop[-i].type.t)
 981             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
 982           int end_vfpreg;
 983
 984           start_vfpreg = assign_vfpreg(&avregs, align, size);
 985           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
 986           if (start_vfpreg >= 0) {
 987             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
 988             if (is_hfa)
 989               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
 990             else
 991               add_param_plan(plan, pplan, VFP_CLASS);
 992             continue;
 993           } else
 994             break;
 995         }
 996       }
 997       ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
 998       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
 999         /* The parameter is allocated both in core register and on stack. As
1000          * such, it can be of either class: it would either be the last of
1001          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
1002         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
1003           *todo|=(1<<j);
1004         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
1005         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1006         ncrn += size/4;
1007         if (ncrn > 4)
1008           nsaa = (ncrn - 4) * 4;
1009       } else {
1010         ncrn = 4;
1011         break;
1012       }
1013       continue;
1014       default:
1015       if (ncrn < 4) {
1016         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1017
1018         if (is_long) {
1019           ncrn = (ncrn + 1) & -2;
1020           if (ncrn == 4)
1021             break;
1022         }
1023         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1024         ncrn++;
1025         if (is_long)
1026           pplan.end = ncrn++;
1027         add_param_plan(plan, pplan, CORE_CLASS);
1028         continue;
1029       }
1030     }
1031     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1032     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1033     add_param_plan(plan, pplan, STACK_CLASS);
1034     nsaa += size; /* size already rounded up before */
1035   }
1036   return nsaa;
1037 }
1038
1039 #undef add_param_plan
1040
1041 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1042    function call.
1043
1044    nb_args: number of parameters the function take
1045    plan: the overall assignment plan for parameters
1046    todo: a bitmap indicating what core reg will hold a parameter
1047
1048    Returns the number of SValue added by this function on the value stack */
1049 static int copy_params(int nb_args, struct plan *plan, int todo)
1050 {
1051   int size, align, r, i, nb_extra_sval = 0;
1052   struct param_plan *pplan;
1053   int pass = 0;
1054
1055    /* Several constraints require parameters to be copied in a specific order:
1056       - structures are copied to the stack before being loaded in a reg;
1057       - floats loaded to an odd numbered VFP reg are first copied to the
1058         preceding even numbered VFP reg and then moved to the next VFP reg.
1059
1060       It is thus important that:
1061       - structures assigned to core regs must be copied after parameters
1062         assigned to the stack but before structures assigned to VFP regs because
1063         a structure can lie partly in core registers and partly on the stack;
1064       - parameters assigned to the stack and all structures be copied before
1065         parameters assigned to a core reg since copying a parameter to the stack
1066         require using a core reg;
1067       - parameters assigned to VFP regs be copied before structures assigned to
1068         VFP regs as the copy might use an even numbered VFP reg that already
1069         holds part of a structure. */
1070 again:
1071   for(i = 0; i < NB_CLASSES; i++) {
1072     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1073
1074       if (pass
1075           && (i != CORE_CLASS || pplan->sval->r < VT_CONST))
1076         continue;
1077
1078       vpushv(pplan->sval);
1079       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1080       switch(i) {
1081         case STACK_CLASS:
1082         case CORE_STRUCT_CLASS:
1083         case VFP_STRUCT_CLASS:
1084           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1085             int padding = 0;
1086             size = type_size(&pplan->sval->type, &align);
1087             /* align to stack align size */
1088             size = (size + 3) & ~3;
1089             if (i == STACK_CLASS && pplan->prev)
1090               padding = pplan->start - pplan->prev->end;
1091             size += padding; /* Add padding if any */
1092             /* allocate the necessary size on stack */
1093             gadd_sp(-size);
1094             /* generate structure store */
1095             r = get_reg(RC_INT);
1096             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1097             vset(&vtop->type, r | VT_LVAL, 0);
1098             vswap();
1099             vstore(); /* memcpy to current sp + potential padding */
1100
1101             /* Homogeneous float aggregate are loaded to VFP registers
1102                immediately since there is no way of loading data in multiple
1103                non consecutive VFP registers as what is done for other
1104                structures (see the use of todo). */
1105             if (i == VFP_STRUCT_CLASS) {
1106               int first = pplan->start, nb = pplan->end - first + 1;
1107               /* vpop.32 {pplan->start, ..., pplan->end} */
1108               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1109               /* No need to write the register used to a SValue since VFP regs
1110                  cannot be used for gcall_or_jmp */
1111             }
1112           } else {
1113             if (is_float(pplan->sval->type.t)) {
1114 #ifdef TCC_ARM_VFP
1115               r = vfpr(gv(RC_FLOAT)) << 12;
1116               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1117                 size = 4;
1118               else {
1119                 size = 8;
1120                 r |= 0x101; /* vpush.32 -> vpush.64 */
1121               }
1122               o(0xED2D0A01 + r); /* vpush */
1123 #else
1124               r = fpr(gv(RC_FLOAT)) << 12;
1125               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1126                 size = 4;
1127               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1128                 size = 8;
1129               else
1130                 size = LDOUBLE_SIZE;
1131
1132               if (size == 12)
1133                 r |= 0x400000;
1134               else if(size == 8)
1135                 r|=0x8000;
1136
1137               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1138 #endif
1139             } else {
1140               /* simple type (currently always same size) */
1141               /* XXX: implicit cast ? */
1142               size=4;
1143               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1144                 lexpand();
1145                 size = 8;
1146                 r = gv(RC_INT);
1147                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1148                 vtop--;
1149               }
1150               r = gv(RC_INT);
1151               o(0xE52D0004|(intr(r)<<12)); /* push r */
1152             }
1153             if (i == STACK_CLASS && pplan->prev)
1154               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1155           }
1156           break;
1157
1158         case VFP_CLASS:
1159           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1160           if (pplan->start & 1) { /* Must be in upper part of double register */
1161             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1162             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1163           }
1164           break;
1165
1166         case CORE_CLASS:
1167           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1168             lexpand();
1169             gv(regmask(pplan->end));
1170             pplan->sval->r2 = vtop->r;
1171             vtop--;
1172           }
1173           gv(regmask(pplan->start));
1174           /* Mark register as used so that gcall_or_jmp use another one
1175              (regs >=4 are free as never used to pass parameters) */
1176           pplan->sval->r = vtop->r;
1177           break;
1178       }
1179       vtop--;
1180     }
1181   }
1182
1183   /* second pass to restore registers that were saved on stack by accident.
1184      Maybe redundant after the "lvalue_save" patch in tccgen.c:gv() */
1185   if (++pass < 2)
1186     goto again;
1187
1188   /* Manually free remaining registers since next parameters are loaded
1189    * manually, without the help of gv(int). */
1190   save_regs(nb_args);
1191
1192   if(todo) {
1193     o(0xE8BD0000|todo); /* pop {todo} */
1194     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1195       int r;
1196       pplan->sval->r = pplan->start;
1197       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1198          can occupy more than 2 registers. Thus, we need to push on the value
1199          stack some fake parameter to have on SValue for each registers used
1200          by a structure (r2 is not used). */
1201       for (r = pplan->start + 1; r <= pplan->end; r++) {
1202         if (todo & (1 << r)) {
1203           nb_extra_sval++;
1204           vpushi(0);
1205           vtop->r = r;
1206         }
1207       }
1208     }
1209   }
1210   return nb_extra_sval;
1211 }
1212
1213 /* Generate function call. The function address is pushed first, then
1214    all the parameters in call order. This functions pops all the
1215    parameters and the function address. */
1216 void gfunc_call(int nb_args)
1217 {
1218   int r, args_size;
1219   int def_float_abi = float_abi;
1220   int todo;
1221   struct plan plan;
1222
1223 #ifdef TCC_ARM_EABI
1224   int variadic;
1225
1226   if (float_abi == ARM_HARD_FLOAT) {
1227     variadic = (vtop[-nb_args].type.ref->f.func_type == FUNC_ELLIPSIS);
1228     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1229       float_abi = ARM_SOFTFP_FLOAT;
1230   }
1231 #endif
1232   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1233      VT_JMP anywhere except on the top of the stack because it would complicate
1234      the code generator. */
1235   r = vtop->r & VT_VALMASK;
1236   if (r == VT_CMP || (r & ~1) == VT_JMP)
1237     gv(RC_INT);
1238
1239   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1240
1241 #ifdef TCC_ARM_EABI
1242   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1243     args_size = (args_size + 7) & ~7;
1244     o(0xE24DD004); /* sub sp, sp, #4 */
1245   }
1246 #endif
1247
1248   nb_args += copy_params(nb_args, &plan, todo);
1249   tcc_free(plan.pplans);
1250
1251   /* Move fct SValue on top as required by gcall_or_jmp */
1252   vrotb(nb_args + 1);
1253   gcall_or_jmp(0);
1254   if (args_size)
1255       gadd_sp(args_size); /* pop all parameters passed on the stack */
1256 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1257   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1258     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1259       o(0xEE000A10); /*vmov s0, r0 */
1260     } else {
1261       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1262       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1263     }
1264   }
1265 #endif
1266   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1267   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1268   float_abi = def_float_abi;
1269 }
1270
1271 /* generate function prolog of type 't' */
1272 void gfunc_prolog(CType *func_type)
1273 {
1274   Sym *sym,*sym2;
1275   int n, nf, size, align, rs, struct_ret = 0;
1276   int addr, pn, sn; /* pn=core, sn=stack */
1277   CType ret_type;
1278
1279 #ifdef TCC_ARM_EABI
1280   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1281 #endif
1282
1283   sym = func_type->ref;
1284   func_vt = sym->type;
1285   func_var = (func_type->ref->f.func_type == FUNC_ELLIPSIS);
1286
1287   n = nf = 0;
1288   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1289       !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs))
1290   {
1291     n++;
1292     struct_ret = 1;
1293     func_vc = 12; /* Offset from fp of the place to store the result */
1294   }
1295   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1296     size = type_size(&sym2->type, &align);
1297 #ifdef TCC_ARM_EABI
1298     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1299         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1300       int tmpnf = assign_vfpreg(&avregs, align, size);
1301       tmpnf += (size + 3) / 4;
1302       nf = (tmpnf > nf) ? tmpnf : nf;
1303     } else
1304 #endif
1305     if (n < 4)
1306       n += (size + 3) / 4;
1307   }
1308   o(0xE1A0C00D); /* mov ip,sp */
1309   if (func_var)
1310     n=4;
1311   if (n) {
1312     if(n>4)
1313       n=4;
1314 #ifdef TCC_ARM_EABI
1315     n=(n+1)&-2;
1316 #endif
1317     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1318   }
1319   if (nf) {
1320     if (nf>16)
1321       nf=16;
1322     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1323     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1324   }
1325   o(0xE92D5800); /* save fp, ip, lr */
1326   o(0xE1A0B00D); /* mov fp, sp */
1327   func_sub_sp_offset = ind;
1328   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1329
1330 #ifdef TCC_ARM_EABI
1331   if (float_abi == ARM_HARD_FLOAT) {
1332     func_vc += nf * 4;
1333     avregs = AVAIL_REGS_INITIALIZER;
1334   }
1335 #endif
1336   pn = struct_ret, sn = 0;
1337   while ((sym = sym->next)) {
1338     CType *type;
1339     type = &sym->type;
1340     size = type_size(type, &align);
1341     size = (size + 3) >> 2;
1342     align = (align + 3) & ~3;
1343 #ifdef TCC_ARM_EABI
1344     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1345         || is_hgen_float_aggr(&sym->type))) {
1346       int fpn = assign_vfpreg(&avregs, align, size << 2);
1347       if (fpn >= 0)
1348         addr = fpn * 4;
1349       else
1350         goto from_stack;
1351     } else
1352 #endif
1353     if (pn < 4) {
1354 #ifdef TCC_ARM_EABI
1355         pn = (pn + (align-1)/4) & -(align/4);
1356 #endif
1357       addr = (nf + pn) * 4;
1358       pn += size;
1359       if (!sn && pn > 4)
1360         sn = (pn - 4);
1361     } else {
1362 #ifdef TCC_ARM_EABI
1363 from_stack:
1364         sn = (sn + (align-1)/4) & -(align/4);
1365 #endif
1366       addr = (n + nf + sn) * 4;
1367       sn += size;
1368     }
1369     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1370              addr + 12);
1371   }
1372   last_itod_magic=0;
1373   leaffunc = 1;
1374   loc = 0;
1375 }
1376
1377 /* generate function epilog */
1378 void gfunc_epilog(void)
1379 {
1380   uint32_t x;
1381   int diff;
1382   /* Copy float return value to core register if base standard is used and
1383      float computation is made with VFP */
1384 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1385   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1386     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1387       o(0xEE100A10); /* fmrs r0, s0 */
1388     else {
1389       o(0xEE100B10); /* fmrdl r0, d0 */
1390       o(0xEE301B10); /* fmrdh r1, d0 */
1391     }
1392   }
1393 #endif
1394   o(0xE89BA800); /* restore fp, sp, pc */
1395   diff = (-loc + 3) & -4;
1396 #ifdef TCC_ARM_EABI
1397   if(!leaffunc)
1398     diff = ((diff + 11) & -8) - 4;
1399 #endif
1400   if(diff > 0) {
1401     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1402     if(x)
1403       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1404     else {
1405       int addr;
1406       addr=ind;
1407       o(0xE59FC004); /* ldr ip,[pc+4] */
1408       o(0xE04BD00C); /* sub sp,fp,ip  */
1409       o(0xE1A0F00E); /* mov pc,lr */
1410       o(diff);
1411       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1412     }
1413   }
1414 }
1415
1416 ST_FUNC void gen_fill_nops(int bytes)
1417 {
1418     if ((bytes & 3))
1419       tcc_error("alignment of code section not multiple of 4");
1420     while (bytes > 0) {
1421         o(0xE1A00000);
1422         bytes -= 4;
1423     }
1424 }
1425
1426 /* generate a jump to a label */
1427 int gjmp(int t)
1428 {
1429   int r;
1430   if (nocode_wanted)
1431     return t;
1432   r=ind;
1433   o(0xE0000000|encbranch(r,t,1));
1434   return r;
1435 }
1436
1437 /* generate a jump to a fixed address */
1438 void gjmp_addr(int a)
1439 {
1440   gjmp(a);
1441 }
1442
1443 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1444 int gtst(int inv, int t)
1445 {
1446   int v, r;
1447   uint32_t op;
1448
1449   v = vtop->r & VT_VALMASK;
1450   r=ind;
1451
1452   if (nocode_wanted) {
1453     ;
1454   } else if (v == VT_CMP) {
1455     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1456     op|=encbranch(r,t,1);
1457     o(op);
1458     t=r;
1459   } else if (v == VT_JMP || v == VT_JMPI) {
1460     if ((v & 1) == inv) {
1461       if(!vtop->c.i)
1462         vtop->c.i=t;
1463       else {
1464         uint32_t *x;
1465         int p,lp;
1466         if(t) {
1467           p = vtop->c.i;
1468           do {
1469             p = decbranch(lp=p);
1470           } while(p);
1471           x = (uint32_t *)(cur_text_section->data + lp);
1472           *x &= 0xff000000;
1473           *x |= encbranch(lp,t,1);
1474         }
1475         t = vtop->c.i;
1476       }
1477     } else {
1478       t = gjmp(t);
1479       gsym(vtop->c.i);
1480     }
1481   }
1482   vtop--;
1483   return t;
1484 }
1485
1486 /* generate an integer binary operation */
1487 void gen_opi(int op)
1488 {
1489   int c, func = 0;
1490   uint32_t opc = 0, r, fr;
1491   unsigned short retreg = REG_IRET;
1492
1493   c=0;
1494   switch(op) {
1495     case '+':
1496       opc = 0x8;
1497       c=1;
1498       break;
1499     case TOK_ADDC1: /* add with carry generation */
1500       opc = 0x9;
1501       c=1;
1502       break;
1503     case '-':
1504       opc = 0x4;
1505       c=1;
1506       break;
1507     case TOK_SUBC1: /* sub with carry generation */
1508       opc = 0x5;
1509       c=1;
1510       break;
1511     case TOK_ADDC2: /* add with carry use */
1512       opc = 0xA;
1513       c=1;
1514       break;
1515     case TOK_SUBC2: /* sub with carry use */
1516       opc = 0xC;
1517       c=1;
1518       break;
1519     case '&':
1520       opc = 0x0;
1521       c=1;
1522       break;
1523     case '^':
1524       opc = 0x2;
1525       c=1;
1526       break;
1527     case '|':
1528       opc = 0x18;
1529       c=1;
1530       break;
1531     case '*':
1532       gv2(RC_INT, RC_INT);
1533       r = vtop[-1].r;
1534       fr = vtop[0].r;
1535       vtop--;
1536       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1537       return;
1538     case TOK_SHL:
1539       opc = 0;
1540       c=2;
1541       break;
1542     case TOK_SHR:
1543       opc = 1;
1544       c=2;
1545       break;
1546     case TOK_SAR:
1547       opc = 2;
1548       c=2;
1549       break;
1550     case '/':
1551     case TOK_PDIV:
1552       func=TOK___divsi3;
1553       c=3;
1554       break;
1555     case TOK_UDIV:
1556       func=TOK___udivsi3;
1557       c=3;
1558       break;
1559     case '%':
1560 #ifdef TCC_ARM_EABI
1561       func=TOK___aeabi_idivmod;
1562       retreg=REG_LRET;
1563 #else
1564       func=TOK___modsi3;
1565 #endif
1566       c=3;
1567       break;
1568     case TOK_UMOD:
1569 #ifdef TCC_ARM_EABI
1570       func=TOK___aeabi_uidivmod;
1571       retreg=REG_LRET;
1572 #else
1573       func=TOK___umodsi3;
1574 #endif
1575       c=3;
1576       break;
1577     case TOK_UMULL:
1578       gv2(RC_INT, RC_INT);
1579       r=intr(vtop[-1].r2=get_reg(RC_INT));
1580       c=vtop[-1].r;
1581       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1582       vtop--;
1583       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1584       return;
1585     default:
1586       opc = 0x15;
1587       c=1;
1588       break;
1589   }
1590   switch(c) {
1591     case 1:
1592       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1593         if(opc == 4 || opc == 5 || opc == 0xc) {
1594           vswap();
1595           opc|=2; // sub -> rsb
1596         }
1597       }
1598       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1599           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1600         gv(RC_INT);
1601       vswap();
1602       c=intr(gv(RC_INT));
1603       vswap();
1604       opc=0xE0000000|(opc<<20)|(c<<16);
1605       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1606         uint32_t x;
1607         x=stuff_const(opc|0x2000000,vtop->c.i);
1608         if(x) {
1609           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1610           o(x|(r<<12));
1611           goto done;
1612         }
1613       }
1614       fr=intr(gv(RC_INT));
1615       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1616       o(opc|(r<<12)|fr);
1617 done:
1618       vtop--;
1619       if (op >= TOK_ULT && op <= TOK_GT) {
1620         vtop->r = VT_CMP;
1621         vtop->c.i = op;
1622       }
1623       break;
1624     case 2:
1625       opc=0xE1A00000|(opc<<5);
1626       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1627           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1628         gv(RC_INT);
1629       vswap();
1630       r=intr(gv(RC_INT));
1631       vswap();
1632       opc|=r;
1633       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1634         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1635         c = vtop->c.i & 0x1f;
1636         o(opc|(c<<7)|(fr<<12));
1637       } else {
1638         fr=intr(gv(RC_INT));
1639         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1640         o(opc|(c<<12)|(fr<<8)|0x10);
1641       }
1642       vtop--;
1643       break;
1644     case 3:
1645       vpush_global_sym(&func_old_type, func);
1646       vrott(3);
1647       gfunc_call(2);
1648       vpushi(0);
1649       vtop->r = retreg;
1650       break;
1651     default:
1652       tcc_error("gen_opi %i unimplemented!",op);
1653   }
1654 }
1655
1656 #ifdef TCC_ARM_VFP
1657 static int is_zero(int i)
1658 {
1659   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1660     return 0;
1661   if (vtop[i].type.t == VT_FLOAT)
1662     return (vtop[i].c.f == 0.f);
1663   else if (vtop[i].type.t == VT_DOUBLE)
1664     return (vtop[i].c.d == 0.0);
1665   return (vtop[i].c.ld == 0.l);
1666 }
1667
1668 /* generate a floating point operation 'v = t1 op t2' instruction. The
1669  *    two operands are guaranteed to have the same floating point type */
1670 void gen_opf(int op)
1671 {
1672   uint32_t x;
1673   int fneg=0,r;
1674   x=0xEE000A00|T2CPR(vtop->type.t);
1675   switch(op) {
1676     case '+':
1677       if(is_zero(-1))
1678         vswap();
1679       if(is_zero(0)) {
1680         vtop--;
1681         return;
1682       }
1683       x|=0x300000;
1684       break;
1685     case '-':
1686       x|=0x300040;
1687       if(is_zero(0)) {
1688         vtop--;
1689         return;
1690       }
1691       if(is_zero(-1)) {
1692         x|=0x810000; /* fsubX -> fnegX */
1693         vswap();
1694         vtop--;
1695         fneg=1;
1696       }
1697       break;
1698     case '*':
1699       x|=0x200000;
1700       break;
1701     case '/':
1702       x|=0x800000;
1703       break;
1704     default:
1705       if(op < TOK_ULT || op > TOK_GT) {
1706         tcc_error("unknown fp op %x!",op);
1707         return;
1708       }
1709       if(is_zero(-1)) {
1710         vswap();
1711         switch(op) {
1712           case TOK_LT: op=TOK_GT; break;
1713           case TOK_GE: op=TOK_ULE; break;
1714           case TOK_LE: op=TOK_GE; break;
1715           case TOK_GT: op=TOK_ULT; break;
1716         }
1717       }
1718       x|=0xB40040; /* fcmpX */
1719       if(op!=TOK_EQ && op!=TOK_NE)
1720         x|=0x80; /* fcmpX -> fcmpeX */
1721       if(is_zero(0)) {
1722         vtop--;
1723         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1724       } else {
1725         x|=vfpr(gv(RC_FLOAT));
1726         vswap();
1727         o(x|(vfpr(gv(RC_FLOAT))<<12));
1728         vtop--;
1729       }
1730       o(0xEEF1FA10); /* fmstat */
1731
1732       switch(op) {
1733         case TOK_LE: op=TOK_ULE; break;
1734         case TOK_LT: op=TOK_ULT; break;
1735         case TOK_UGE: op=TOK_GE; break;
1736         case TOK_UGT: op=TOK_GT; break;
1737       }
1738
1739       vtop->r = VT_CMP;
1740       vtop->c.i = op;
1741       return;
1742   }
1743   r=gv(RC_FLOAT);
1744   x|=vfpr(r);
1745   r=regmask(r);
1746   if(!fneg) {
1747     int r2;
1748     vswap();
1749     r2=gv(RC_FLOAT);
1750     x|=vfpr(r2)<<16;
1751     r|=regmask(r2);
1752   }
1753   vtop->r=get_reg_ex(RC_FLOAT,r);
1754   if(!fneg)
1755     vtop--;
1756   o(x|(vfpr(vtop->r)<<12));
1757 }
1758
1759 #else
1760 static uint32_t is_fconst()
1761 {
1762   long double f;
1763   uint32_t r;
1764   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1765     return 0;
1766   if (vtop->type.t == VT_FLOAT)
1767     f = vtop->c.f;
1768   else if (vtop->type.t == VT_DOUBLE)
1769     f = vtop->c.d;
1770   else
1771     f = vtop->c.ld;
1772   if(!ieee_finite(f))
1773     return 0;
1774   r=0x8;
1775   if(f<0.0) {
1776     r=0x18;
1777     f=-f;
1778   }
1779   if(f==0.0)
1780     return r;
1781   if(f==1.0)
1782     return r|1;
1783   if(f==2.0)
1784     return r|2;
1785   if(f==3.0)
1786     return r|3;
1787   if(f==4.0)
1788     return r|4;
1789   if(f==5.0)
1790     return r|5;
1791   if(f==0.5)
1792     return r|6;
1793   if(f==10.0)
1794     return r|7;
1795   return 0;
1796 }
1797
1798 /* generate a floating point operation 'v = t1 op t2' instruction. The
1799    two operands are guaranteed to have the same floating point type */
1800 void gen_opf(int op)
1801 {
1802   uint32_t x, r, r2, c1, c2;
1803   //fputs("gen_opf\n",stderr);
1804   vswap();
1805   c1 = is_fconst();
1806   vswap();
1807   c2 = is_fconst();
1808   x=0xEE000100;
1809 #if LDOUBLE_SIZE == 8
1810   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1811     x|=0x80;
1812 #else
1813   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1814     x|=0x80;
1815   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1816     x|=0x80000;
1817 #endif
1818   switch(op)
1819   {
1820     case '+':
1821       if(!c2) {
1822         vswap();
1823         c2=c1;
1824       }
1825       vswap();
1826       r=fpr(gv(RC_FLOAT));
1827       vswap();
1828       if(c2) {
1829         if(c2>0xf)
1830           x|=0x200000; // suf
1831         r2=c2&0xf;
1832       } else {
1833         r2=fpr(gv(RC_FLOAT));
1834       }
1835       break;
1836     case '-':
1837       if(c2) {
1838         if(c2<=0xf)
1839           x|=0x200000; // suf
1840         r2=c2&0xf;
1841         vswap();
1842         r=fpr(gv(RC_FLOAT));
1843         vswap();
1844       } else if(c1 && c1<=0xf) {
1845         x|=0x300000; // rsf
1846         r2=c1;
1847         r=fpr(gv(RC_FLOAT));
1848         vswap();
1849       } else {
1850         x|=0x200000; // suf
1851         vswap();
1852         r=fpr(gv(RC_FLOAT));
1853         vswap();
1854         r2=fpr(gv(RC_FLOAT));
1855       }
1856       break;
1857     case '*':
1858       if(!c2 || c2>0xf) {
1859         vswap();
1860         c2=c1;
1861       }
1862       vswap();
1863       r=fpr(gv(RC_FLOAT));
1864       vswap();
1865       if(c2 && c2<=0xf)
1866         r2=c2;
1867       else
1868         r2=fpr(gv(RC_FLOAT));
1869       x|=0x100000; // muf
1870       break;
1871     case '/':
1872       if(c2 && c2<=0xf) {
1873         x|=0x400000; // dvf
1874         r2=c2;
1875         vswap();
1876         r=fpr(gv(RC_FLOAT));
1877         vswap();
1878       } else if(c1 && c1<=0xf) {
1879         x|=0x500000; // rdf
1880         r2=c1;
1881         r=fpr(gv(RC_FLOAT));
1882         vswap();
1883       } else {
1884         x|=0x400000; // dvf
1885         vswap();
1886         r=fpr(gv(RC_FLOAT));
1887         vswap();
1888         r2=fpr(gv(RC_FLOAT));
1889       }
1890       break;
1891     default:
1892       if(op >= TOK_ULT && op <= TOK_GT) {
1893         x|=0xd0f110; // cmfe
1894 /* bug (intention?) in Linux FPU emulator
1895    doesn't set carry if equal */
1896         switch(op) {
1897           case TOK_ULT:
1898           case TOK_UGE:
1899           case TOK_ULE:
1900           case TOK_UGT:
1901             tcc_error("unsigned comparison on floats?");
1902             break;
1903           case TOK_LT:
1904             op=TOK_Nset;
1905             break;
1906           case TOK_LE:
1907             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1908             break;
1909           case TOK_EQ:
1910           case TOK_NE:
1911             x&=~0x400000; // cmfe -> cmf
1912             break;
1913         }
1914         if(c1 && !c2) {
1915           c2=c1;
1916           vswap();
1917           switch(op) {
1918             case TOK_Nset:
1919               op=TOK_GT;
1920               break;
1921             case TOK_GE:
1922               op=TOK_ULE;
1923               break;
1924             case TOK_ULE:
1925               op=TOK_GE;
1926               break;
1927             case TOK_GT:
1928               op=TOK_Nset;
1929               break;
1930           }
1931         }
1932         vswap();
1933         r=fpr(gv(RC_FLOAT));
1934         vswap();
1935         if(c2) {
1936           if(c2>0xf)
1937             x|=0x200000;
1938           r2=c2&0xf;
1939         } else {
1940           r2=fpr(gv(RC_FLOAT));
1941         }
1942         vtop[-1].r = VT_CMP;
1943         vtop[-1].c.i = op;
1944       } else {
1945         tcc_error("unknown fp op %x!",op);
1946         return;
1947       }
1948   }
1949   if(vtop[-1].r == VT_CMP)
1950     c1=15;
1951   else {
1952     c1=vtop->r;
1953     if(r2&0x8)
1954       c1=vtop[-1].r;
1955     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1956     c1=fpr(vtop[-1].r);
1957   }
1958   vtop--;
1959   o(x|(r<<16)|(c1<<12)|r2);
1960 }
1961 #endif
1962
1963 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1964    and 'long long' cases. */
1965 ST_FUNC void gen_cvt_itof1(int t)
1966 {
1967   uint32_t r, r2;
1968   int bt;
1969   bt=vtop->type.t & VT_BTYPE;
1970   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1971 #ifndef TCC_ARM_VFP
1972     uint32_t dsize = 0;
1973 #endif
1974     r=intr(gv(RC_INT));
1975 #ifdef TCC_ARM_VFP
1976     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1977     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1978     r2|=r2<<12;
1979     if(!(vtop->type.t & VT_UNSIGNED))
1980       r2|=0x80;                /* fuitoX -> fsituX */
1981     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1982 #else
1983     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1984     if((t & VT_BTYPE) != VT_FLOAT)
1985       dsize=0x80;    /* flts -> fltd */
1986     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1987     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1988       uint32_t off = 0;
1989       o(0xE3500000|(r<<12));        /* cmp */
1990       r=fpr(get_reg(RC_FLOAT));
1991       if(last_itod_magic) {
1992         off=ind+8-last_itod_magic;
1993         off/=4;
1994         if(off>255)
1995           off=0;
1996       }
1997       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1998       if(!off) {
1999         o(0xEA000000);              /* b */
2000         last_itod_magic=ind;
2001         o(0x4F800000);              /* 4294967296.0f */
2002       }
2003       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
2004     }
2005 #endif
2006     return;
2007   } else if(bt == VT_LLONG) {
2008     int func;
2009     CType *func_type = 0;
2010     if((t & VT_BTYPE) == VT_FLOAT) {
2011       func_type = &func_float_type;
2012       if(vtop->type.t & VT_UNSIGNED)
2013         func=TOK___floatundisf;
2014       else
2015         func=TOK___floatdisf;
2016 #if LDOUBLE_SIZE != 8
2017     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
2018       func_type = &func_ldouble_type;
2019       if(vtop->type.t & VT_UNSIGNED)
2020         func=TOK___floatundixf;
2021       else
2022         func=TOK___floatdixf;
2023     } else if((t & VT_BTYPE) == VT_DOUBLE) {
2024 #else
2025     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
2026 #endif
2027       func_type = &func_double_type;
2028       if(vtop->type.t & VT_UNSIGNED)
2029         func=TOK___floatundidf;
2030       else
2031         func=TOK___floatdidf;
2032     }
2033     if(func_type) {
2034       vpush_global_sym(func_type, func);
2035       vswap();
2036       gfunc_call(1);
2037       vpushi(0);
2038       vtop->r=TREG_F0;
2039       return;
2040     }
2041   }
2042   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2043 }
2044
2045 /* convert fp to int 't' type */
2046 void gen_cvt_ftoi(int t)
2047 {
2048   uint32_t r, r2;
2049   int u, func = 0;
2050   u=t&VT_UNSIGNED;
2051   t&=VT_BTYPE;
2052   r2=vtop->type.t & VT_BTYPE;
2053   if(t==VT_INT) {
2054 #ifdef TCC_ARM_VFP
2055     r=vfpr(gv(RC_FLOAT));
2056     u=u?0:0x10000;
2057     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2058     r2=intr(vtop->r=get_reg(RC_INT));
2059     o(0xEE100A10|(r<<16)|(r2<<12));
2060     return;
2061 #else
2062     if(u) {
2063       if(r2 == VT_FLOAT)
2064         func=TOK___fixunssfsi;
2065 #if LDOUBLE_SIZE != 8
2066       else if(r2 == VT_LDOUBLE)
2067         func=TOK___fixunsxfsi;
2068       else if(r2 == VT_DOUBLE)
2069 #else
2070       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2071 #endif
2072         func=TOK___fixunsdfsi;
2073     } else {
2074       r=fpr(gv(RC_FLOAT));
2075       r2=intr(vtop->r=get_reg(RC_INT));
2076       o(0xEE100170|(r2<<12)|r);
2077       return;
2078     }
2079 #endif
2080   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2081     if(r2 == VT_FLOAT)
2082       func=TOK___fixsfdi;
2083 #if LDOUBLE_SIZE != 8
2084     else if(r2 == VT_LDOUBLE)
2085       func=TOK___fixxfdi;
2086     else if(r2 == VT_DOUBLE)
2087 #else
2088     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2089 #endif
2090       func=TOK___fixdfdi;
2091   }
2092   if(func) {
2093     vpush_global_sym(&func_old_type, func);
2094     vswap();
2095     gfunc_call(1);
2096     vpushi(0);
2097     if(t == VT_LLONG)
2098       vtop->r2 = REG_LRET;
2099     vtop->r = REG_IRET;
2100     return;
2101   }
2102   tcc_error("unimplemented gen_cvt_ftoi!");
2103 }
2104
2105 /* convert from one floating point type to another */
2106 void gen_cvt_ftof(int t)
2107 {
2108 #ifdef TCC_ARM_VFP
2109   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2110     uint32_t r = vfpr(gv(RC_FLOAT));
2111     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2112   }
2113 #else
2114   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2115   gv(RC_FLOAT);
2116 #endif
2117 }
2118
2119 /* computed goto support */
2120 void ggoto(void)
2121 {
2122   gcall_or_jmp(1);
2123   vtop--;
2124 }
2125
2126 /* Save the stack pointer onto the stack and return the location of its address */
2127 ST_FUNC void gen_vla_sp_save(int addr) {
2128     SValue v;
2129     v.type.t = VT_PTR;
2130     v.r = VT_LOCAL | VT_LVAL;
2131     v.c.i = addr;
2132     store(TREG_SP, &v);
2133 }
2134
2135 /* Restore the SP from a location on the stack */
2136 ST_FUNC void gen_vla_sp_restore(int addr) {
2137     SValue v;
2138     v.type.t = VT_PTR;
2139     v.r = VT_LOCAL | VT_LVAL;
2140     v.c.i = addr;
2141     load(TREG_SP, &v);
2142 }
2143
2144 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2145 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2146     int r = intr(gv(RC_INT));
2147     o(0xE04D0000|(r<<12)|r); /* sub r, sp, r */
2148 #ifdef TCC_ARM_EABI
2149     if (align < 8)
2150         align = 8;
2151 #else
2152     if (align < 4)
2153         align = 4;
2154 #endif
2155     if (align & (align - 1))
2156         tcc_error("alignment is not a power of 2: %i", align);
2157     o(stuff_const(0xE3C0D000|(r<<16), align - 1)); /* bic sp, r, #align-1 */
2158     vpop();
2159 }
2160
2161 /* end of ARM code generator */
2162 /*************************************************************/
2163 #endif
2164 /*************************************************************/