arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_CPU_VERSION
  38 # define TCC_CPU_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_LRET    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82     TREG_SP = 13,
  83     TREG_LR,
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 #else /* ! TARGET_DEFS_ONLY */
 134 /******************************************************/
 135 #include "tcc.h"
 136
 137 enum float_abi float_abi;
 138
 139 ST_DATA const int reg_classes[NB_REGS] = {
 140     /* r0 */ RC_INT | RC_R0,
 141     /* r1 */ RC_INT | RC_R1,
 142     /* r2 */ RC_INT | RC_R2,
 143     /* r3 */ RC_INT | RC_R3,
 144     /* r12 */ RC_INT | RC_R12,
 145     /* f0 */ RC_FLOAT | RC_F0,
 146     /* f1 */ RC_FLOAT | RC_F1,
 147     /* f2 */ RC_FLOAT | RC_F2,
 148     /* f3 */ RC_FLOAT | RC_F3,
 149 #ifdef TCC_ARM_VFP
 150  /* d4/s8 */ RC_FLOAT | RC_F4,
 151 /* d5/s10 */ RC_FLOAT | RC_F5,
 152 /* d6/s12 */ RC_FLOAT | RC_F6,
 153 /* d7/s14 */ RC_FLOAT | RC_F7,
 154 #endif
 155 };
 156
 157 static int func_sub_sp_offset, last_itod_magic;
 158 static int leaffunc;
 159
 160 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 161 static CType float_type, double_type, func_float_type, func_double_type;
 162 ST_FUNC void arm_init(struct TCCState *s)
 163 {
 164     float_type.t = VT_FLOAT;
 165     double_type.t = VT_DOUBLE;
 166     func_float_type.t = VT_FUNC;
 167     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 168     func_double_type.t = VT_FUNC;
 169     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 170
 171     float_abi = s->float_abi;
 172 #ifndef TCC_ARM_HARDFLOAT
 173     tcc_warning("soft float ABI currently not supported: default to softfp");
 174 #endif
 175 }
 176 #else
 177 #define func_float_type func_old_type
 178 #define func_double_type func_old_type
 179 #define func_ldouble_type func_old_type
 180 ST_FUNC void arm_init(struct TCCState *s)
 181 {
 182 #if 0
 183 #if !defined (TCC_ARM_VFP)
 184     tcc_warning("Support for FPA is deprecated and will be removed in next"
 185                 " release");
 186 #endif
 187 #if !defined (TCC_ARM_EABI)
 188     tcc_warning("Support for OABI is deprecated and will be removed in next"
 189                 " release");
 190 #endif
 191 #endif
 192 }
 193 #endif
 194
 195 static int two2mask(int a,int b) {
 196   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 197 }
 198
 199 static int regmask(int r) {
 200   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 201 }
 202
 203 /******************************************************/
 204
 205 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
 206 const char *default_elfinterp(struct TCCState *s)
 207 {
 208     if (s->float_abi == ARM_HARD_FLOAT)
 209         return "/lib/ld-linux-armhf.so.3";
 210     else
 211         return "/lib/ld-linux.so.3";
 212 }
 213 #endif
 214
 215 void o(uint32_t i)
 216 {
 217   /* this is a good place to start adding big-endian support*/
 218   int ind1;
 219   if (nocode_wanted)
 220     return;
 221   ind1 = ind + 4;
 222   if (!cur_text_section)
 223     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 224          "can't evaluate constant expressions outside of a function.");
 225   if (ind1 > cur_text_section->data_allocated)
 226     section_realloc(cur_text_section, ind1);
 227   cur_text_section->data[ind++] = i&255;
 228   i>>=8;
 229   cur_text_section->data[ind++] = i&255;
 230   i>>=8;
 231   cur_text_section->data[ind++] = i&255;
 232   i>>=8;
 233   cur_text_section->data[ind++] = i;
 234 }
 235
 236 static uint32_t stuff_const(uint32_t op, uint32_t c)
 237 {
 238   int try_neg=0;
 239   uint32_t nc = 0, negop = 0;
 240
 241   switch(op&0x1F00000)
 242   {
 243     case 0x800000: //add
 244     case 0x400000: //sub
 245       try_neg=1;
 246       negop=op^0xC00000;
 247       nc=-c;
 248       break;
 249     case 0x1A00000: //mov
 250     case 0x1E00000: //mvn
 251       try_neg=1;
 252       negop=op^0x400000;
 253       nc=~c;
 254       break;
 255     case 0x200000: //xor
 256       if(c==~0)
 257         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 258       break;
 259     case 0x0: //and
 260       if(c==~0)
 261         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 262     case 0x1C00000: //bic
 263       try_neg=1;
 264       negop=op^0x1C00000;
 265       nc=~c;
 266       break;
 267     case 0x1800000: //orr
 268       if(c==~0)
 269         return (op&0xFFF0FFFF)|0x1E00000;
 270       break;
 271   }
 272   do {
 273     uint32_t m;
 274     int i;
 275     if(c<256) /* catch undefined <<32 */
 276       return op|c;
 277     for(i=2;i<32;i+=2) {
 278       m=(0xff>>i)|(0xff<<(32-i));
 279       if(!(c&~m))
 280         return op|(i<<7)|(c<<i)|(c>>(32-i));
 281     }
 282     op=negop;
 283     c=nc;
 284   } while(try_neg--);
 285   return 0;
 286 }
 287
 288
 289 //only add,sub
 290 void stuff_const_harder(uint32_t op, uint32_t v) {
 291   uint32_t x;
 292   x=stuff_const(op,v);
 293   if(x)
 294     o(x);
 295   else {
 296     uint32_t a[16], nv, no, o2, n2;
 297     int i,j,k;
 298     a[0]=0xff;
 299     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 300     for(i=1;i<16;i++)
 301       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 302     for(i=0;i<12;i++)
 303       for(j=i<4?i+12:15;j>=i+4;j--)
 304         if((v&(a[i]|a[j]))==v) {
 305           o(stuff_const(op,v&a[i]));
 306           o(stuff_const(o2,v&a[j]));
 307           return;
 308         }
 309     no=op^0xC00000;
 310     n2=o2^0xC00000;
 311     nv=-v;
 312     for(i=0;i<12;i++)
 313       for(j=i<4?i+12:15;j>=i+4;j--)
 314         if((nv&(a[i]|a[j]))==nv) {
 315           o(stuff_const(no,nv&a[i]));
 316           o(stuff_const(n2,nv&a[j]));
 317           return;
 318         }
 319     for(i=0;i<8;i++)
 320       for(j=i+4;j<12;j++)
 321         for(k=i<4?i+12:15;k>=j+4;k--)
 322           if((v&(a[i]|a[j]|a[k]))==v) {
 323             o(stuff_const(op,v&a[i]));
 324             o(stuff_const(o2,v&a[j]));
 325             o(stuff_const(o2,v&a[k]));
 326             return;
 327           }
 328     no=op^0xC00000;
 329     nv=-v;
 330     for(i=0;i<8;i++)
 331       for(j=i+4;j<12;j++)
 332         for(k=i<4?i+12:15;k>=j+4;k--)
 333           if((nv&(a[i]|a[j]|a[k]))==nv) {
 334             o(stuff_const(no,nv&a[i]));
 335             o(stuff_const(n2,nv&a[j]));
 336             o(stuff_const(n2,nv&a[k]));
 337             return;
 338           }
 339     o(stuff_const(op,v&a[0]));
 340     o(stuff_const(o2,v&a[4]));
 341     o(stuff_const(o2,v&a[8]));
 342     o(stuff_const(o2,v&a[12]));
 343   }
 344 }
 345
 346 uint32_t encbranch(int pos, int addr, int fail)
 347 {
 348   addr-=pos+8;
 349   addr/=4;
 350   if(addr>=0x1000000 || addr<-0x1000000) {
 351     if(fail)
 352       tcc_error("FIXME: function bigger than 32MB");
 353     return 0;
 354   }
 355   return 0x0A000000|(addr&0xffffff);
 356 }
 357
 358 int decbranch(int pos)
 359 {
 360   int x;
 361   x=*(uint32_t *)(cur_text_section->data + pos);
 362   x&=0x00ffffff;
 363   if(x&0x800000)
 364     x-=0x1000000;
 365   return x*4+pos+8;
 366 }
 367
 368 /* output a symbol and patch all calls to it */
 369 void gsym_addr(int t, int a)
 370 {
 371   uint32_t *x;
 372   int lt;
 373   while(t) {
 374     x=(uint32_t *)(cur_text_section->data + t);
 375     t=decbranch(lt=t);
 376     if(a==lt+4)
 377       *x=0xE1A00000; // nop
 378     else {
 379       *x &= 0xff000000;
 380       *x |= encbranch(lt,a,1);
 381     }
 382   }
 383 }
 384
 385 void gsym(int t)
 386 {
 387   gsym_addr(t, ind);
 388 }
 389
 390 #ifdef TCC_ARM_VFP
 391 static uint32_t vfpr(int r)
 392 {
 393   if(r<TREG_F0 || r>TREG_F7)
 394     tcc_error("compiler error! register %i is no vfp register",r);
 395   return r - TREG_F0;
 396 }
 397 #else
 398 static uint32_t fpr(int r)
 399 {
 400   if(r<TREG_F0 || r>TREG_F3)
 401     tcc_error("compiler error! register %i is no fpa register",r);
 402   return r - TREG_F0;
 403 }
 404 #endif
 405
 406 static uint32_t intr(int r)
 407 {
 408   if(r == TREG_R12)
 409     return 12;
 410   if(r >= TREG_R0 && r <= TREG_R3)
 411     return r - TREG_R0;
 412   if (r >= TREG_SP && r <= TREG_LR)
 413     return r + (13 - TREG_SP);
 414   tcc_error("compiler error! register %i is no int register",r);
 415 }
 416
 417 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 418 {
 419   if(*off>maxoff || *off&((1<<shift)-1)) {
 420     uint32_t x, y;
 421     x=0xE280E000;
 422     if(*sgn)
 423       x=0xE240E000;
 424     x|=(*base)<<16;
 425     *base=14; // lr
 426     y=stuff_const(x,*off&~maxoff);
 427     if(y) {
 428       o(y);
 429       *off&=maxoff;
 430       return;
 431     }
 432     y=stuff_const(x,(*off+maxoff)&~maxoff);
 433     if(y) {
 434       o(y);
 435       *sgn=!*sgn;
 436       *off=((*off+maxoff)&~maxoff)-*off;
 437       return;
 438     }
 439     stuff_const_harder(x,*off&~maxoff);
 440     *off&=maxoff;
 441   }
 442 }
 443
 444 static uint32_t mapcc(int cc)
 445 {
 446   switch(cc)
 447   {
 448     case TOK_ULT:
 449       return 0x30000000; /* CC/LO */
 450     case TOK_UGE:
 451       return 0x20000000; /* CS/HS */
 452     case TOK_EQ:
 453       return 0x00000000; /* EQ */
 454     case TOK_NE:
 455       return 0x10000000; /* NE */
 456     case TOK_ULE:
 457       return 0x90000000; /* LS */
 458     case TOK_UGT:
 459       return 0x80000000; /* HI */
 460     case TOK_Nset:
 461       return 0x40000000; /* MI */
 462     case TOK_Nclear:
 463       return 0x50000000; /* PL */
 464     case TOK_LT:
 465       return 0xB0000000; /* LT */
 466     case TOK_GE:
 467       return 0xA0000000; /* GE */
 468     case TOK_LE:
 469       return 0xD0000000; /* LE */
 470     case TOK_GT:
 471       return 0xC0000000; /* GT */
 472   }
 473   tcc_error("unexpected condition code");
 474   return 0xE0000000; /* AL */
 475 }
 476
 477 static int negcc(int cc)
 478 {
 479   switch(cc)
 480   {
 481     case TOK_ULT:
 482       return TOK_UGE;
 483     case TOK_UGE:
 484       return TOK_ULT;
 485     case TOK_EQ:
 486       return TOK_NE;
 487     case TOK_NE:
 488       return TOK_EQ;
 489     case TOK_ULE:
 490       return TOK_UGT;
 491     case TOK_UGT:
 492       return TOK_ULE;
 493     case TOK_Nset:
 494       return TOK_Nclear;
 495     case TOK_Nclear:
 496       return TOK_Nset;
 497     case TOK_LT:
 498       return TOK_GE;
 499     case TOK_GE:
 500       return TOK_LT;
 501     case TOK_LE:
 502       return TOK_GT;
 503     case TOK_GT:
 504       return TOK_LE;
 505   }
 506   tcc_error("unexpected condition code");
 507   return TOK_NE;
 508 }
 509
 510 /* load 'r' from value 'sv' */
 511 void load(int r, SValue *sv)
 512 {
 513   int v, ft, fc, fr, sign;
 514   uint32_t op;
 515   SValue v1;
 516
 517   fr = sv->r;
 518   ft = sv->type.t;
 519   fc = sv->c.i;
 520
 521   if(fc>=0)
 522     sign=0;
 523   else {
 524     sign=1;
 525     fc=-fc;
 526   }
 527
 528   v = fr & VT_VALMASK;
 529   if (fr & VT_LVAL) {
 530     uint32_t base = 0xB; // fp
 531     if(v == VT_LLOCAL) {
 532       v1.type.t = VT_PTR;
 533       v1.r = VT_LOCAL | VT_LVAL;
 534       v1.c.i = sv->c.i;
 535       load(TREG_LR, &v1);
 536       base = 14; /* lr */
 537       fc=sign=0;
 538       v=VT_LOCAL;
 539     } else if(v == VT_CONST) {
 540       v1.type.t = VT_PTR;
 541       v1.r = fr&~VT_LVAL;
 542       v1.c.i = sv->c.i;
 543       v1.sym=sv->sym;
 544       load(TREG_LR, &v1);
 545       base = 14; /* lr */
 546       fc=sign=0;
 547       v=VT_LOCAL;
 548     } else if(v < VT_CONST) {
 549       base=intr(v);
 550       fc=sign=0;
 551       v=VT_LOCAL;
 552     }
 553     if(v == VT_LOCAL) {
 554       if(is_float(ft)) {
 555         calcaddr(&base,&fc,&sign,1020,2);
 556 #ifdef TCC_ARM_VFP
 557         op=0xED100A00; /* flds */
 558         if(!sign)
 559           op|=0x800000;
 560         if ((ft & VT_BTYPE) != VT_FLOAT)
 561           op|=0x100;   /* flds -> fldd */
 562         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 563 #else
 564         op=0xED100100;
 565         if(!sign)
 566           op|=0x800000;
 567 #if LDOUBLE_SIZE == 8
 568         if ((ft & VT_BTYPE) != VT_FLOAT)
 569           op|=0x8000;
 570 #else
 571         if ((ft & VT_BTYPE) == VT_DOUBLE)
 572           op|=0x8000;
 573         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 574           op|=0x400000;
 575 #endif
 576         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 577 #endif
 578       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 579                 || (ft & VT_BTYPE) == VT_SHORT) {
 580         calcaddr(&base,&fc,&sign,255,0);
 581         op=0xE1500090;
 582         if ((ft & VT_BTYPE) == VT_SHORT)
 583           op|=0x20;
 584         if ((ft & VT_UNSIGNED) == 0)
 585           op|=0x40;
 586         if(!sign)
 587           op|=0x800000;
 588         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 589       } else {
 590         calcaddr(&base,&fc,&sign,4095,0);
 591         op=0xE5100000;
 592         if(!sign)
 593           op|=0x800000;
 594         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 595           op|=0x400000;
 596         o(op|(intr(r)<<12)|fc|(base<<16));
 597       }
 598       return;
 599     }
 600   } else {
 601     if (v == VT_CONST) {
 602       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.i);
 603       if (fr & VT_SYM || !op) {
 604         o(0xE59F0000|(intr(r)<<12));
 605         o(0xEA000000);
 606         if(fr & VT_SYM)
 607           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 608         o(sv->c.i);
 609       } else
 610         o(op);
 611       return;
 612     } else if (v == VT_LOCAL) {
 613       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.i);
 614       if (fr & VT_SYM || !op) {
 615         o(0xE59F0000|(intr(r)<<12));
 616         o(0xEA000000);
 617         if(fr & VT_SYM) // needed ?
 618           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 619         o(sv->c.i);
 620         o(0xE08B0000|(intr(r)<<12)|intr(r));
 621       } else
 622         o(op);
 623       return;
 624     } else if(v == VT_CMP) {
 625       o(mapcc(sv->c.i)|0x3A00001|(intr(r)<<12));
 626       o(mapcc(negcc(sv->c.i))|0x3A00000|(intr(r)<<12));
 627       return;
 628     } else if (v == VT_JMP || v == VT_JMPI) {
 629       int t;
 630       t = v & 1;
 631       o(0xE3A00000|(intr(r)<<12)|t);
 632       o(0xEA000000);
 633       gsym(sv->c.i);
 634       o(0xE3A00000|(intr(r)<<12)|(t^1));
 635       return;
 636     } else if (v < VT_CONST) {
 637       if(is_float(ft))
 638 #ifdef TCC_ARM_VFP
 639         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 640 #else
 641         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 642 #endif
 643       else
 644         o(0xE1A00000|(intr(r)<<12)|intr(v));
 645       return;
 646     }
 647   }
 648   tcc_error("load unimplemented!");
 649 }
 650
 651 /* store register 'r' in lvalue 'v' */
 652 void store(int r, SValue *sv)
 653 {
 654   SValue v1;
 655   int v, ft, fc, fr, sign;
 656   uint32_t op;
 657
 658   fr = sv->r;
 659   ft = sv->type.t;
 660   fc = sv->c.i;
 661
 662   if(fc>=0)
 663     sign=0;
 664   else {
 665     sign=1;
 666     fc=-fc;
 667   }
 668
 669   v = fr & VT_VALMASK;
 670   if (fr & VT_LVAL || fr == VT_LOCAL) {
 671     uint32_t base = 0xb; /* fp */
 672     if(v < VT_CONST) {
 673       base=intr(v);
 674       v=VT_LOCAL;
 675       fc=sign=0;
 676     } else if(v == VT_CONST) {
 677       v1.type.t = ft;
 678       v1.r = fr&~VT_LVAL;
 679       v1.c.i = sv->c.i;
 680       v1.sym=sv->sym;
 681       load(TREG_LR, &v1);
 682       base = 14; /* lr */
 683       fc=sign=0;
 684       v=VT_LOCAL;
 685     }
 686     if(v == VT_LOCAL) {
 687        if(is_float(ft)) {
 688         calcaddr(&base,&fc,&sign,1020,2);
 689 #ifdef TCC_ARM_VFP
 690         op=0xED000A00; /* fsts */
 691         if(!sign)
 692           op|=0x800000;
 693         if ((ft & VT_BTYPE) != VT_FLOAT)
 694           op|=0x100;   /* fsts -> fstd */
 695         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 696 #else
 697         op=0xED000100;
 698         if(!sign)
 699           op|=0x800000;
 700 #if LDOUBLE_SIZE == 8
 701         if ((ft & VT_BTYPE) != VT_FLOAT)
 702           op|=0x8000;
 703 #else
 704         if ((ft & VT_BTYPE) == VT_DOUBLE)
 705           op|=0x8000;
 706         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 707           op|=0x400000;
 708 #endif
 709         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 710 #endif
 711         return;
 712       } else if((ft & VT_BTYPE) == VT_SHORT) {
 713         calcaddr(&base,&fc,&sign,255,0);
 714         op=0xE14000B0;
 715         if(!sign)
 716           op|=0x800000;
 717         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 718       } else {
 719         calcaddr(&base,&fc,&sign,4095,0);
 720         op=0xE5000000;
 721         if(!sign)
 722           op|=0x800000;
 723         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 724           op|=0x400000;
 725         o(op|(intr(r)<<12)|fc|(base<<16));
 726       }
 727       return;
 728     }
 729   }
 730   tcc_error("store unimplemented");
 731 }
 732
 733 static void gadd_sp(int val)
 734 {
 735   stuff_const_harder(0xE28DD000,val);
 736 }
 737
 738 /* 'is_jmp' is '1' if it is a jump */
 739 static void gcall_or_jmp(int is_jmp)
 740 {
 741   int r;
 742   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 743     uint32_t x;
 744     /* constant case */
 745     x=encbranch(ind,ind+vtop->c.i,0);
 746     if(x) {
 747       if (vtop->r & VT_SYM) {
 748         /* relocation case */
 749         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 750       } else
 751         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 752       o(x|(is_jmp?0xE0000000:0xE1000000));
 753     } else {
 754       if(!is_jmp)
 755         o(0xE28FE004); // add lr,pc,#4
 756       o(0xE51FF004);   // ldr pc,[pc,#-4]
 757       if (vtop->r & VT_SYM)
 758         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 759       o(vtop->c.i);
 760     }
 761   } else {
 762     /* otherwise, indirect call */
 763     r = gv(RC_INT);
 764     if(!is_jmp)
 765       o(0xE1A0E00F);       // mov lr,pc
 766     o(0xE1A0F000|intr(r)); // mov pc,r
 767   }
 768 }
 769
 770 static int unalias_ldbl(int btype)
 771 {
 772 #if LDOUBLE_SIZE == 8
 773     if (btype == VT_LDOUBLE)
 774       btype = VT_DOUBLE;
 775 #endif
 776     return btype;
 777 }
 778
 779 /* Return whether a structure is an homogeneous float aggregate or not.
 780    The answer is true if all the elements of the structure are of the same
 781    primitive float type and there is less than 4 elements.
 782
 783    type: the type corresponding to the structure to be tested */
 784 static int is_hgen_float_aggr(CType *type)
 785 {
 786   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 787     struct Sym *ref;
 788     int btype, nb_fields = 0;
 789
 790     ref = type->ref->next;
 791     btype = unalias_ldbl(ref->type.t & VT_BTYPE);
 792     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 793       for(; ref && btype == unalias_ldbl(ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 794       return !ref && nb_fields <= 4;
 795     }
 796   }
 797   return 0;
 798 }
 799
 800 struct avail_regs {
 801   signed char avail[3]; /* 3 holes max with only float and double alignments */
 802   int first_hole; /* first available hole */
 803   int last_hole; /* last available hole (none if equal to first_hole) */
 804   int first_free_reg; /* next free register in the sequence, hole excluded */
 805 };
 806
 807 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 808
 809 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 810    param) according to the rules described in the procedure call standard for
 811    the ARM architecture (AAPCS). If found, the registers are assigned to this
 812    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 813    and the parameter is a single float.
 814
 815    avregs: opaque structure to keep track of available VFP co-processor regs
 816    align: alignment constraints for the param, as returned by type_size()
 817    size: size of the parameter, as returned by type_size() */
 818 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 819 {
 820   int first_reg = 0;
 821
 822   if (avregs->first_free_reg == -1)
 823     return -1;
 824   if (align >> 3) { /* double alignment */
 825     first_reg = avregs->first_free_reg;
 826     /* alignment constraint not respected so use next reg and record hole */
 827     if (first_reg & 1)
 828       avregs->avail[avregs->last_hole++] = first_reg++;
 829   } else { /* no special alignment (float or array of float) */
 830     /* if single float and a hole is available, assign the param to it */
 831     if (size == 4 && avregs->first_hole != avregs->last_hole)
 832       return avregs->avail[avregs->first_hole++];
 833     else
 834       first_reg = avregs->first_free_reg;
 835   }
 836   if (first_reg + size / 4 <= 16) {
 837     avregs->first_free_reg = first_reg + size / 4;
 838     return first_reg;
 839   }
 840   avregs->first_free_reg = -1;
 841   return -1;
 842 }
 843
 844 /* Returns whether all params need to be passed in core registers or not.
 845    This is the case for function part of the runtime ABI. */
 846 int floats_in_core_regs(SValue *sval)
 847 {
 848   if (!sval->sym)
 849     return 0;
 850
 851   switch (sval->sym->v) {
 852     case TOK___floatundisf:
 853     case TOK___floatundidf:
 854     case TOK___fixunssfdi:
 855     case TOK___fixunsdfdi:
 856 #ifndef TCC_ARM_VFP
 857     case TOK___fixunsxfdi:
 858 #endif
 859     case TOK___floatdisf:
 860     case TOK___floatdidf:
 861     case TOK___fixsfdi:
 862     case TOK___fixdfdi:
 863       return 1;
 864
 865     default:
 866       return 0;
 867   }
 868 }
 869
 870 /* Return the number of registers needed to return the struct, or 0 if
 871    returning via struct pointer. */
 872 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) {
 873 #ifdef TCC_ARM_EABI
 874     int size, align;
 875     size = type_size(vt, &align);
 876     if (float_abi == ARM_HARD_FLOAT && !variadic &&
 877         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
 878         *ret_align = 8;
 879         *regsize = 8;
 880         ret->ref = NULL;
 881         ret->t = VT_DOUBLE;
 882         return (size + 7) >> 3;
 883     } else if (size <= 4) {
 884         *ret_align = 4;
 885         *regsize = 4;
 886         ret->ref = NULL;
 887         ret->t = VT_INT;
 888         return 1;
 889     } else
 890         return 0;
 891 #else
 892     return 0;
 893 #endif
 894 }
 895
 896 /* Parameters are classified according to how they are copied to their final
 897    destination for the function call. Because the copying is performed class
 898    after class according to the order in the union below, it is important that
 899    some constraints about the order of the members of this union are respected:
 900    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 901    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 902      VFP_STRUCT_CLASS;
 903    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 904    See the comment for the main loop in copy_params() for the reason. */
 905 enum reg_class {
 906         STACK_CLASS = 0,
 907         CORE_STRUCT_CLASS,
 908         VFP_CLASS,
 909         VFP_STRUCT_CLASS,
 910         CORE_CLASS,
 911         NB_CLASSES
 912 };
 913
 914 struct param_plan {
 915     int start; /* first reg or addr used depending on the class */
 916     int end; /* last reg used or next free addr depending on the class */
 917     SValue *sval; /* pointer to SValue on the value stack */
 918     struct param_plan *prev; /*  previous element in this class */
 919 };
 920
 921 struct plan {
 922     struct param_plan *pplans; /* array of all the param plans */
 923     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
 924 };
 925
 926 #define add_param_plan(plan,pplan,class)                        \
 927     do {                                                        \
 928         pplan.prev = plan->clsplans[class];                     \
 929         plan->pplans[plan ## _nb] = pplan;                      \
 930         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
 931     } while(0)
 932
 933 /* Assign parameters to registers and stack with alignment according to the
 934    rules in the procedure call standard for the ARM architecture (AAPCS).
 935    The overall assignment is recorded in an array of per parameter structures
 936    called parameter plans. The parameter plans are also further organized in a
 937    number of linked lists, one per class of parameter (see the comment for the
 938    definition of union reg_class).
 939
 940    nb_args: number of parameters of the function for which a call is generated
 941    float_abi: float ABI in use for this function call
 942    plan: the structure where the overall assignment is recorded
 943    todo: a bitmap that record which core registers hold a parameter
 944
 945    Returns the amount of stack space needed for parameter passing
 946
 947    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 948    is the responsibility of the caller to free this array once used (ie not
 949    before copy_params). */
 950 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
 951 {
 952   int i, size, align;
 953   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
 954   int plan_nb = 0;
 955   struct param_plan pplan;
 956   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 957
 958   ncrn = nsaa = 0;
 959   *todo = 0;
 960   plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 961   memset(plan->clsplans, 0, sizeof(plan->clsplans));
 962   for(i = nb_args; i-- ;) {
 963     int j, start_vfpreg = 0;
 964     CType type = vtop[-i].type;
 965     type.t &= ~VT_ARRAY;
 966     size = type_size(&type, &align);
 967     size = (size + 3) & ~3;
 968     align = (align + 3) & ~3;
 969     switch(vtop[-i].type.t & VT_BTYPE) {
 970       case VT_STRUCT:
 971       case VT_FLOAT:
 972       case VT_DOUBLE:
 973       case VT_LDOUBLE:
 974       if (float_abi == ARM_HARD_FLOAT) {
 975         int is_hfa = 0; /* Homogeneous float aggregate */
 976
 977         if (is_float(vtop[-i].type.t)
 978             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
 979           int end_vfpreg;
 980
 981           start_vfpreg = assign_vfpreg(&avregs, align, size);
 982           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
 983           if (start_vfpreg >= 0) {
 984             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
 985             if (is_hfa)
 986               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
 987             else
 988               add_param_plan(plan, pplan, VFP_CLASS);
 989             continue;
 990           } else
 991             break;
 992         }
 993       }
 994       ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
 995       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
 996         /* The parameter is allocated both in core register and on stack. As
 997          * such, it can be of either class: it would either be the last of
 998          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
 999         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
1000           *todo|=(1<<j);
1001         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
1002         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1003         ncrn += size/4;
1004         if (ncrn > 4)
1005           nsaa = (ncrn - 4) * 4;
1006       } else {
1007         ncrn = 4;
1008         break;
1009       }
1010       continue;
1011       default:
1012       if (ncrn < 4) {
1013         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1014
1015         if (is_long) {
1016           ncrn = (ncrn + 1) & -2;
1017           if (ncrn == 4)
1018             break;
1019         }
1020         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1021         ncrn++;
1022         if (is_long)
1023           pplan.end = ncrn++;
1024         add_param_plan(plan, pplan, CORE_CLASS);
1025         continue;
1026       }
1027     }
1028     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1029     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1030     add_param_plan(plan, pplan, STACK_CLASS);
1031     nsaa += size; /* size already rounded up before */
1032   }
1033   return nsaa;
1034 }
1035
1036 #undef add_param_plan
1037
1038 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1039    function call.
1040
1041    nb_args: number of parameters the function take
1042    plan: the overall assignment plan for parameters
1043    todo: a bitmap indicating what core reg will hold a parameter
1044
1045    Returns the number of SValue added by this function on the value stack */
1046 static int copy_params(int nb_args, struct plan *plan, int todo)
1047 {
1048   int size, align, r, i, nb_extra_sval = 0;
1049   struct param_plan *pplan;
1050   int pass = 0;
1051
1052    /* Several constraints require parameters to be copied in a specific order:
1053       - structures are copied to the stack before being loaded in a reg;
1054       - floats loaded to an odd numbered VFP reg are first copied to the
1055         preceding even numbered VFP reg and then moved to the next VFP reg.
1056
1057       It is thus important that:
1058       - structures assigned to core regs must be copied after parameters
1059         assigned to the stack but before structures assigned to VFP regs because
1060         a structure can lie partly in core registers and partly on the stack;
1061       - parameters assigned to the stack and all structures be copied before
1062         parameters assigned to a core reg since copying a parameter to the stack
1063         require using a core reg;
1064       - parameters assigned to VFP regs be copied before structures assigned to
1065         VFP regs as the copy might use an even numbered VFP reg that already
1066         holds part of a structure. */
1067 again:
1068   for(i = 0; i < NB_CLASSES; i++) {
1069     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1070
1071       if (pass
1072           && (i != CORE_CLASS || pplan->sval->r < VT_CONST))
1073         continue;
1074
1075       vpushv(pplan->sval);
1076       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1077       switch(i) {
1078         case STACK_CLASS:
1079         case CORE_STRUCT_CLASS:
1080         case VFP_STRUCT_CLASS:
1081           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1082             int padding = 0;
1083             size = type_size(&pplan->sval->type, &align);
1084             /* align to stack align size */
1085             size = (size + 3) & ~3;
1086             if (i == STACK_CLASS && pplan->prev)
1087               padding = pplan->start - pplan->prev->end;
1088             size += padding; /* Add padding if any */
1089             /* allocate the necessary size on stack */
1090             gadd_sp(-size);
1091             /* generate structure store */
1092             r = get_reg(RC_INT);
1093             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1094             vset(&vtop->type, r | VT_LVAL, 0);
1095             vswap();
1096             vstore(); /* memcpy to current sp + potential padding */
1097
1098             /* Homogeneous float aggregate are loaded to VFP registers
1099                immediately since there is no way of loading data in multiple
1100                non consecutive VFP registers as what is done for other
1101                structures (see the use of todo). */
1102             if (i == VFP_STRUCT_CLASS) {
1103               int first = pplan->start, nb = pplan->end - first + 1;
1104               /* vpop.32 {pplan->start, ..., pplan->end} */
1105               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1106               /* No need to write the register used to a SValue since VFP regs
1107                  cannot be used for gcall_or_jmp */
1108             }
1109           } else {
1110             if (is_float(pplan->sval->type.t)) {
1111 #ifdef TCC_ARM_VFP
1112               r = vfpr(gv(RC_FLOAT)) << 12;
1113               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1114                 size = 4;
1115               else {
1116                 size = 8;
1117                 r |= 0x101; /* vpush.32 -> vpush.64 */
1118               }
1119               o(0xED2D0A01 + r); /* vpush */
1120 #else
1121               r = fpr(gv(RC_FLOAT)) << 12;
1122               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1123                 size = 4;
1124               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1125                 size = 8;
1126               else
1127                 size = LDOUBLE_SIZE;
1128
1129               if (size == 12)
1130                 r |= 0x400000;
1131               else if(size == 8)
1132                 r|=0x8000;
1133
1134               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1135 #endif
1136             } else {
1137               /* simple type (currently always same size) */
1138               /* XXX: implicit cast ? */
1139               size=4;
1140               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1141                 lexpand_nr();
1142                 size = 8;
1143                 r = gv(RC_INT);
1144                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1145                 vtop--;
1146               }
1147               r = gv(RC_INT);
1148               o(0xE52D0004|(intr(r)<<12)); /* push r */
1149             }
1150             if (i == STACK_CLASS && pplan->prev)
1151               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1152           }
1153           break;
1154
1155         case VFP_CLASS:
1156           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1157           if (pplan->start & 1) { /* Must be in upper part of double register */
1158             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1159             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1160           }
1161           break;
1162
1163         case CORE_CLASS:
1164           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1165             lexpand_nr();
1166             gv(regmask(pplan->end));
1167             pplan->sval->r2 = vtop->r;
1168             vtop--;
1169           }
1170           gv(regmask(pplan->start));
1171           /* Mark register as used so that gcall_or_jmp use another one
1172              (regs >=4 are free as never used to pass parameters) */
1173           pplan->sval->r = vtop->r;
1174           break;
1175       }
1176       vtop--;
1177     }
1178   }
1179
1180   /* second pass to restore registers that were saved on stack by accident.
1181      Maybe redundant after the "lvalue_save" patch in tccgen.c:gv() */
1182   if (++pass < 2)
1183     goto again;
1184
1185   /* Manually free remaining registers since next parameters are loaded
1186    * manually, without the help of gv(int). */
1187   save_regs(nb_args);
1188
1189   if(todo) {
1190     o(0xE8BD0000|todo); /* pop {todo} */
1191     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1192       int r;
1193       pplan->sval->r = pplan->start;
1194       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1195          can occupy more than 2 registers. Thus, we need to push on the value
1196          stack some fake parameter to have on SValue for each registers used
1197          by a structure (r2 is not used). */
1198       for (r = pplan->start + 1; r <= pplan->end; r++) {
1199         if (todo & (1 << r)) {
1200           nb_extra_sval++;
1201           vpushi(0);
1202           vtop->r = r;
1203         }
1204       }
1205     }
1206   }
1207   return nb_extra_sval;
1208 }
1209
1210 /* Generate function call. The function address is pushed first, then
1211    all the parameters in call order. This functions pops all the
1212    parameters and the function address. */
1213 void gfunc_call(int nb_args)
1214 {
1215   int r, args_size;
1216   int def_float_abi = float_abi;
1217   int todo;
1218   struct plan plan;
1219
1220 #ifdef TCC_ARM_EABI
1221   int variadic;
1222
1223   if (float_abi == ARM_HARD_FLOAT) {
1224     variadic = (vtop[-nb_args].type.ref->f.func_type == FUNC_ELLIPSIS);
1225     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1226       float_abi = ARM_SOFTFP_FLOAT;
1227   }
1228 #endif
1229   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1230      VT_JMP anywhere except on the top of the stack because it would complicate
1231      the code generator. */
1232   r = vtop->r & VT_VALMASK;
1233   if (r == VT_CMP || (r & ~1) == VT_JMP)
1234     gv(RC_INT);
1235
1236   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1237
1238 #ifdef TCC_ARM_EABI
1239   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1240     args_size = (args_size + 7) & ~7;
1241     o(0xE24DD004); /* sub sp, sp, #4 */
1242   }
1243 #endif
1244
1245   nb_args += copy_params(nb_args, &plan, todo);
1246   tcc_free(plan.pplans);
1247
1248   /* Move fct SValue on top as required by gcall_or_jmp */
1249   vrotb(nb_args + 1);
1250   gcall_or_jmp(0);
1251   if (args_size)
1252       gadd_sp(args_size); /* pop all parameters passed on the stack */
1253 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1254   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1255     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1256       o(0xEE000A10); /*vmov s0, r0 */
1257     } else {
1258       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1259       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1260     }
1261   }
1262 #endif
1263   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1264   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1265   float_abi = def_float_abi;
1266 }
1267
1268 /* generate function prolog of type 't' */
1269 void gfunc_prolog(CType *func_type)
1270 {
1271   Sym *sym,*sym2;
1272   int n, nf, size, align, rs, struct_ret = 0;
1273   int addr, pn, sn; /* pn=core, sn=stack */
1274   CType ret_type;
1275
1276 #ifdef TCC_ARM_EABI
1277   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1278 #endif
1279
1280   sym = func_type->ref;
1281   func_vt = sym->type;
1282   func_var = (func_type->ref->f.func_type == FUNC_ELLIPSIS);
1283
1284   n = nf = 0;
1285   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1286       !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs))
1287   {
1288     n++;
1289     struct_ret = 1;
1290     func_vc = 12; /* Offset from fp of the place to store the result */
1291   }
1292   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1293     size = type_size(&sym2->type, &align);
1294 #ifdef TCC_ARM_EABI
1295     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1296         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1297       int tmpnf = assign_vfpreg(&avregs, align, size);
1298       tmpnf += (size + 3) / 4;
1299       nf = (tmpnf > nf) ? tmpnf : nf;
1300     } else
1301 #endif
1302     if (n < 4)
1303       n += (size + 3) / 4;
1304   }
1305   o(0xE1A0C00D); /* mov ip,sp */
1306   if (func_var)
1307     n=4;
1308   if (n) {
1309     if(n>4)
1310       n=4;
1311 #ifdef TCC_ARM_EABI
1312     n=(n+1)&-2;
1313 #endif
1314     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1315   }
1316   if (nf) {
1317     if (nf>16)
1318       nf=16;
1319     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1320     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1321   }
1322   o(0xE92D5800); /* save fp, ip, lr */
1323   o(0xE1A0B00D); /* mov fp, sp */
1324   func_sub_sp_offset = ind;
1325   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1326
1327 #ifdef TCC_ARM_EABI
1328   if (float_abi == ARM_HARD_FLOAT) {
1329     func_vc += nf * 4;
1330     avregs = AVAIL_REGS_INITIALIZER;
1331   }
1332 #endif
1333   pn = struct_ret, sn = 0;
1334   while ((sym = sym->next)) {
1335     CType *type;
1336     type = &sym->type;
1337     size = type_size(type, &align);
1338     size = (size + 3) >> 2;
1339     align = (align + 3) & ~3;
1340 #ifdef TCC_ARM_EABI
1341     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1342         || is_hgen_float_aggr(&sym->type))) {
1343       int fpn = assign_vfpreg(&avregs, align, size << 2);
1344       if (fpn >= 0)
1345         addr = fpn * 4;
1346       else
1347         goto from_stack;
1348     } else
1349 #endif
1350     if (pn < 4) {
1351 #ifdef TCC_ARM_EABI
1352         pn = (pn + (align-1)/4) & -(align/4);
1353 #endif
1354       addr = (nf + pn) * 4;
1355       pn += size;
1356       if (!sn && pn > 4)
1357         sn = (pn - 4);
1358     } else {
1359 #ifdef TCC_ARM_EABI
1360 from_stack:
1361         sn = (sn + (align-1)/4) & -(align/4);
1362 #endif
1363       addr = (n + nf + sn) * 4;
1364       sn += size;
1365     }
1366     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1367              addr + 12);
1368   }
1369   last_itod_magic=0;
1370   leaffunc = 1;
1371   loc = 0;
1372 }
1373
1374 /* generate function epilog */
1375 void gfunc_epilog(void)
1376 {
1377   uint32_t x;
1378   int diff;
1379   /* Copy float return value to core register if base standard is used and
1380      float computation is made with VFP */
1381 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1382   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1383     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1384       o(0xEE100A10); /* fmrs r0, s0 */
1385     else {
1386       o(0xEE100B10); /* fmrdl r0, d0 */
1387       o(0xEE301B10); /* fmrdh r1, d0 */
1388     }
1389   }
1390 #endif
1391   o(0xE89BA800); /* restore fp, sp, pc */
1392   diff = (-loc + 3) & -4;
1393 #ifdef TCC_ARM_EABI
1394   if(!leaffunc)
1395     diff = ((diff + 11) & -8) - 4;
1396 #endif
1397   if(diff > 0) {
1398     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1399     if(x)
1400       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1401     else {
1402       int addr;
1403       addr=ind;
1404       o(0xE59FC004); /* ldr ip,[pc+4] */
1405       o(0xE04BD00C); /* sub sp,fp,ip  */
1406       o(0xE1A0F00E); /* mov pc,lr */
1407       o(diff);
1408       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1409     }
1410   }
1411 }
1412
1413 /* generate a jump to a label */
1414 int gjmp(int t)
1415 {
1416   int r;
1417   if (nocode_wanted)
1418     return t;
1419   r=ind;
1420   o(0xE0000000|encbranch(r,t,1));
1421   return r;
1422 }
1423
1424 /* generate a jump to a fixed address */
1425 void gjmp_addr(int a)
1426 {
1427   gjmp(a);
1428 }
1429
1430 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1431 int gtst(int inv, int t)
1432 {
1433   int v, r;
1434   uint32_t op;
1435
1436   v = vtop->r & VT_VALMASK;
1437   r=ind;
1438
1439   if (nocode_wanted) {
1440     ;
1441   } else if (v == VT_CMP) {
1442     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1443     op|=encbranch(r,t,1);
1444     o(op);
1445     t=r;
1446   } else if (v == VT_JMP || v == VT_JMPI) {
1447     if ((v & 1) == inv) {
1448       if(!vtop->c.i)
1449         vtop->c.i=t;
1450       else {
1451         uint32_t *x;
1452         int p,lp;
1453         if(t) {
1454           p = vtop->c.i;
1455           do {
1456             p = decbranch(lp=p);
1457           } while(p);
1458           x = (uint32_t *)(cur_text_section->data + lp);
1459           *x &= 0xff000000;
1460           *x |= encbranch(lp,t,1);
1461         }
1462         t = vtop->c.i;
1463       }
1464     } else {
1465       t = gjmp(t);
1466       gsym(vtop->c.i);
1467     }
1468   }
1469   vtop--;
1470   return t;
1471 }
1472
1473 /* generate an integer binary operation */
1474 void gen_opi(int op)
1475 {
1476   int c, func = 0;
1477   uint32_t opc = 0, r, fr;
1478   unsigned short retreg = REG_IRET;
1479
1480   c=0;
1481   switch(op) {
1482     case '+':
1483       opc = 0x8;
1484       c=1;
1485       break;
1486     case TOK_ADDC1: /* add with carry generation */
1487       opc = 0x9;
1488       c=1;
1489       break;
1490     case '-':
1491       opc = 0x4;
1492       c=1;
1493       break;
1494     case TOK_SUBC1: /* sub with carry generation */
1495       opc = 0x5;
1496       c=1;
1497       break;
1498     case TOK_ADDC2: /* add with carry use */
1499       opc = 0xA;
1500       c=1;
1501       break;
1502     case TOK_SUBC2: /* sub with carry use */
1503       opc = 0xC;
1504       c=1;
1505       break;
1506     case '&':
1507       opc = 0x0;
1508       c=1;
1509       break;
1510     case '^':
1511       opc = 0x2;
1512       c=1;
1513       break;
1514     case '|':
1515       opc = 0x18;
1516       c=1;
1517       break;
1518     case '*':
1519       gv2(RC_INT, RC_INT);
1520       r = vtop[-1].r;
1521       fr = vtop[0].r;
1522       vtop--;
1523       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1524       return;
1525     case TOK_SHL:
1526       opc = 0;
1527       c=2;
1528       break;
1529     case TOK_SHR:
1530       opc = 1;
1531       c=2;
1532       break;
1533     case TOK_SAR:
1534       opc = 2;
1535       c=2;
1536       break;
1537     case '/':
1538     case TOK_PDIV:
1539       func=TOK___divsi3;
1540       c=3;
1541       break;
1542     case TOK_UDIV:
1543       func=TOK___udivsi3;
1544       c=3;
1545       break;
1546     case '%':
1547 #ifdef TCC_ARM_EABI
1548       func=TOK___aeabi_idivmod;
1549       retreg=REG_LRET;
1550 #else
1551       func=TOK___modsi3;
1552 #endif
1553       c=3;
1554       break;
1555     case TOK_UMOD:
1556 #ifdef TCC_ARM_EABI
1557       func=TOK___aeabi_uidivmod;
1558       retreg=REG_LRET;
1559 #else
1560       func=TOK___umodsi3;
1561 #endif
1562       c=3;
1563       break;
1564     case TOK_UMULL:
1565       gv2(RC_INT, RC_INT);
1566       r=intr(vtop[-1].r2=get_reg(RC_INT));
1567       c=vtop[-1].r;
1568       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1569       vtop--;
1570       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1571       return;
1572     default:
1573       opc = 0x15;
1574       c=1;
1575       break;
1576   }
1577   switch(c) {
1578     case 1:
1579       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1580         if(opc == 4 || opc == 5 || opc == 0xc) {
1581           vswap();
1582           opc|=2; // sub -> rsb
1583         }
1584       }
1585       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1586           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1587         gv(RC_INT);
1588       vswap();
1589       c=intr(gv(RC_INT));
1590       vswap();
1591       opc=0xE0000000|(opc<<20)|(c<<16);
1592       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1593         uint32_t x;
1594         x=stuff_const(opc|0x2000000,vtop->c.i);
1595         if(x) {
1596           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1597           o(x|(r<<12));
1598           goto done;
1599         }
1600       }
1601       fr=intr(gv(RC_INT));
1602       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1603       o(opc|(r<<12)|fr);
1604 done:
1605       vtop--;
1606       if (op >= TOK_ULT && op <= TOK_GT) {
1607         vtop->r = VT_CMP;
1608         vtop->c.i = op;
1609       }
1610       break;
1611     case 2:
1612       opc=0xE1A00000|(opc<<5);
1613       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1614           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1615         gv(RC_INT);
1616       vswap();
1617       r=intr(gv(RC_INT));
1618       vswap();
1619       opc|=r;
1620       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1621         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1622         c = vtop->c.i & 0x1f;
1623         o(opc|(c<<7)|(fr<<12));
1624       } else {
1625         fr=intr(gv(RC_INT));
1626         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1627         o(opc|(c<<12)|(fr<<8)|0x10);
1628       }
1629       vtop--;
1630       break;
1631     case 3:
1632       vpush_global_sym(&func_old_type, func);
1633       vrott(3);
1634       gfunc_call(2);
1635       vpushi(0);
1636       vtop->r = retreg;
1637       break;
1638     default:
1639       tcc_error("gen_opi %i unimplemented!",op);
1640   }
1641 }
1642
1643 #ifdef TCC_ARM_VFP
1644 static int is_zero(int i)
1645 {
1646   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1647     return 0;
1648   if (vtop[i].type.t == VT_FLOAT)
1649     return (vtop[i].c.f == 0.f);
1650   else if (vtop[i].type.t == VT_DOUBLE)
1651     return (vtop[i].c.d == 0.0);
1652   return (vtop[i].c.ld == 0.l);
1653 }
1654
1655 /* generate a floating point operation 'v = t1 op t2' instruction. The
1656  *    two operands are guaranteed to have the same floating point type */
1657 void gen_opf(int op)
1658 {
1659   uint32_t x;
1660   int fneg=0,r;
1661   x=0xEE000A00|T2CPR(vtop->type.t);
1662   switch(op) {
1663     case '+':
1664       if(is_zero(-1))
1665         vswap();
1666       if(is_zero(0)) {
1667         vtop--;
1668         return;
1669       }
1670       x|=0x300000;
1671       break;
1672     case '-':
1673       x|=0x300040;
1674       if(is_zero(0)) {
1675         vtop--;
1676         return;
1677       }
1678       if(is_zero(-1)) {
1679         x|=0x810000; /* fsubX -> fnegX */
1680         vswap();
1681         vtop--;
1682         fneg=1;
1683       }
1684       break;
1685     case '*':
1686       x|=0x200000;
1687       break;
1688     case '/':
1689       x|=0x800000;
1690       break;
1691     default:
1692       if(op < TOK_ULT || op > TOK_GT) {
1693         tcc_error("unknown fp op %x!",op);
1694         return;
1695       }
1696       if(is_zero(-1)) {
1697         vswap();
1698         switch(op) {
1699           case TOK_LT: op=TOK_GT; break;
1700           case TOK_GE: op=TOK_ULE; break;
1701           case TOK_LE: op=TOK_GE; break;
1702           case TOK_GT: op=TOK_ULT; break;
1703         }
1704       }
1705       x|=0xB40040; /* fcmpX */
1706       if(op!=TOK_EQ && op!=TOK_NE)
1707         x|=0x80; /* fcmpX -> fcmpeX */
1708       if(is_zero(0)) {
1709         vtop--;
1710         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1711       } else {
1712         x|=vfpr(gv(RC_FLOAT));
1713         vswap();
1714         o(x|(vfpr(gv(RC_FLOAT))<<12));
1715         vtop--;
1716       }
1717       o(0xEEF1FA10); /* fmstat */
1718
1719       switch(op) {
1720         case TOK_LE: op=TOK_ULE; break;
1721         case TOK_LT: op=TOK_ULT; break;
1722         case TOK_UGE: op=TOK_GE; break;
1723         case TOK_UGT: op=TOK_GT; break;
1724       }
1725
1726       vtop->r = VT_CMP;
1727       vtop->c.i = op;
1728       return;
1729   }
1730   r=gv(RC_FLOAT);
1731   x|=vfpr(r);
1732   r=regmask(r);
1733   if(!fneg) {
1734     int r2;
1735     vswap();
1736     r2=gv(RC_FLOAT);
1737     x|=vfpr(r2)<<16;
1738     r|=regmask(r2);
1739   }
1740   vtop->r=get_reg_ex(RC_FLOAT,r);
1741   if(!fneg)
1742     vtop--;
1743   o(x|(vfpr(vtop->r)<<12));
1744 }
1745
1746 #else
1747 static uint32_t is_fconst()
1748 {
1749   long double f;
1750   uint32_t r;
1751   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1752     return 0;
1753   if (vtop->type.t == VT_FLOAT)
1754     f = vtop->c.f;
1755   else if (vtop->type.t == VT_DOUBLE)
1756     f = vtop->c.d;
1757   else
1758     f = vtop->c.ld;
1759   if(!ieee_finite(f))
1760     return 0;
1761   r=0x8;
1762   if(f<0.0) {
1763     r=0x18;
1764     f=-f;
1765   }
1766   if(f==0.0)
1767     return r;
1768   if(f==1.0)
1769     return r|1;
1770   if(f==2.0)
1771     return r|2;
1772   if(f==3.0)
1773     return r|3;
1774   if(f==4.0)
1775     return r|4;
1776   if(f==5.0)
1777     return r|5;
1778   if(f==0.5)
1779     return r|6;
1780   if(f==10.0)
1781     return r|7;
1782   return 0;
1783 }
1784
1785 /* generate a floating point operation 'v = t1 op t2' instruction. The
1786    two operands are guaranteed to have the same floating point type */
1787 void gen_opf(int op)
1788 {
1789   uint32_t x, r, r2, c1, c2;
1790   //fputs("gen_opf\n",stderr);
1791   vswap();
1792   c1 = is_fconst();
1793   vswap();
1794   c2 = is_fconst();
1795   x=0xEE000100;
1796 #if LDOUBLE_SIZE == 8
1797   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1798     x|=0x80;
1799 #else
1800   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1801     x|=0x80;
1802   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1803     x|=0x80000;
1804 #endif
1805   switch(op)
1806   {
1807     case '+':
1808       if(!c2) {
1809         vswap();
1810         c2=c1;
1811       }
1812       vswap();
1813       r=fpr(gv(RC_FLOAT));
1814       vswap();
1815       if(c2) {
1816         if(c2>0xf)
1817           x|=0x200000; // suf
1818         r2=c2&0xf;
1819       } else {
1820         r2=fpr(gv(RC_FLOAT));
1821       }
1822       break;
1823     case '-':
1824       if(c2) {
1825         if(c2<=0xf)
1826           x|=0x200000; // suf
1827         r2=c2&0xf;
1828         vswap();
1829         r=fpr(gv(RC_FLOAT));
1830         vswap();
1831       } else if(c1 && c1<=0xf) {
1832         x|=0x300000; // rsf
1833         r2=c1;
1834         r=fpr(gv(RC_FLOAT));
1835         vswap();
1836       } else {
1837         x|=0x200000; // suf
1838         vswap();
1839         r=fpr(gv(RC_FLOAT));
1840         vswap();
1841         r2=fpr(gv(RC_FLOAT));
1842       }
1843       break;
1844     case '*':
1845       if(!c2 || c2>0xf) {
1846         vswap();
1847         c2=c1;
1848       }
1849       vswap();
1850       r=fpr(gv(RC_FLOAT));
1851       vswap();
1852       if(c2 && c2<=0xf)
1853         r2=c2;
1854       else
1855         r2=fpr(gv(RC_FLOAT));
1856       x|=0x100000; // muf
1857       break;
1858     case '/':
1859       if(c2 && c2<=0xf) {
1860         x|=0x400000; // dvf
1861         r2=c2;
1862         vswap();
1863         r=fpr(gv(RC_FLOAT));
1864         vswap();
1865       } else if(c1 && c1<=0xf) {
1866         x|=0x500000; // rdf
1867         r2=c1;
1868         r=fpr(gv(RC_FLOAT));
1869         vswap();
1870       } else {
1871         x|=0x400000; // dvf
1872         vswap();
1873         r=fpr(gv(RC_FLOAT));
1874         vswap();
1875         r2=fpr(gv(RC_FLOAT));
1876       }
1877       break;
1878     default:
1879       if(op >= TOK_ULT && op <= TOK_GT) {
1880         x|=0xd0f110; // cmfe
1881 /* bug (intention?) in Linux FPU emulator
1882    doesn't set carry if equal */
1883         switch(op) {
1884           case TOK_ULT:
1885           case TOK_UGE:
1886           case TOK_ULE:
1887           case TOK_UGT:
1888             tcc_error("unsigned comparison on floats?");
1889             break;
1890           case TOK_LT:
1891             op=TOK_Nset;
1892             break;
1893           case TOK_LE:
1894             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1895             break;
1896           case TOK_EQ:
1897           case TOK_NE:
1898             x&=~0x400000; // cmfe -> cmf
1899             break;
1900         }
1901         if(c1 && !c2) {
1902           c2=c1;
1903           vswap();
1904           switch(op) {
1905             case TOK_Nset:
1906               op=TOK_GT;
1907               break;
1908             case TOK_GE:
1909               op=TOK_ULE;
1910               break;
1911             case TOK_ULE:
1912               op=TOK_GE;
1913               break;
1914             case TOK_GT:
1915               op=TOK_Nset;
1916               break;
1917           }
1918         }
1919         vswap();
1920         r=fpr(gv(RC_FLOAT));
1921         vswap();
1922         if(c2) {
1923           if(c2>0xf)
1924             x|=0x200000;
1925           r2=c2&0xf;
1926         } else {
1927           r2=fpr(gv(RC_FLOAT));
1928         }
1929         vtop[-1].r = VT_CMP;
1930         vtop[-1].c.i = op;
1931       } else {
1932         tcc_error("unknown fp op %x!",op);
1933         return;
1934       }
1935   }
1936   if(vtop[-1].r == VT_CMP)
1937     c1=15;
1938   else {
1939     c1=vtop->r;
1940     if(r2&0x8)
1941       c1=vtop[-1].r;
1942     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1943     c1=fpr(vtop[-1].r);
1944   }
1945   vtop--;
1946   o(x|(r<<16)|(c1<<12)|r2);
1947 }
1948 #endif
1949
1950 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1951    and 'long long' cases. */
1952 ST_FUNC void gen_cvt_itof1(int t)
1953 {
1954   uint32_t r, r2;
1955   int bt;
1956   bt=vtop->type.t & VT_BTYPE;
1957   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1958 #ifndef TCC_ARM_VFP
1959     uint32_t dsize = 0;
1960 #endif
1961     r=intr(gv(RC_INT));
1962 #ifdef TCC_ARM_VFP
1963     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1964     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1965     r2|=r2<<12;
1966     if(!(vtop->type.t & VT_UNSIGNED))
1967       r2|=0x80;                /* fuitoX -> fsituX */
1968     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1969 #else
1970     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1971     if((t & VT_BTYPE) != VT_FLOAT)
1972       dsize=0x80;    /* flts -> fltd */
1973     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1974     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1975       uint32_t off = 0;
1976       o(0xE3500000|(r<<12));        /* cmp */
1977       r=fpr(get_reg(RC_FLOAT));
1978       if(last_itod_magic) {
1979         off=ind+8-last_itod_magic;
1980         off/=4;
1981         if(off>255)
1982           off=0;
1983       }
1984       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1985       if(!off) {
1986         o(0xEA000000);              /* b */
1987         last_itod_magic=ind;
1988         o(0x4F800000);              /* 4294967296.0f */
1989       }
1990       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1991     }
1992 #endif
1993     return;
1994   } else if(bt == VT_LLONG) {
1995     int func;
1996     CType *func_type = 0;
1997     if((t & VT_BTYPE) == VT_FLOAT) {
1998       func_type = &func_float_type;
1999       if(vtop->type.t & VT_UNSIGNED)
2000         func=TOK___floatundisf;
2001       else
2002         func=TOK___floatdisf;
2003 #if LDOUBLE_SIZE != 8
2004     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
2005       func_type = &func_ldouble_type;
2006       if(vtop->type.t & VT_UNSIGNED)
2007         func=TOK___floatundixf;
2008       else
2009         func=TOK___floatdixf;
2010     } else if((t & VT_BTYPE) == VT_DOUBLE) {
2011 #else
2012     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
2013 #endif
2014       func_type = &func_double_type;
2015       if(vtop->type.t & VT_UNSIGNED)
2016         func=TOK___floatundidf;
2017       else
2018         func=TOK___floatdidf;
2019     }
2020     if(func_type) {
2021       vpush_global_sym(func_type, func);
2022       vswap();
2023       gfunc_call(1);
2024       vpushi(0);
2025       vtop->r=TREG_F0;
2026       return;
2027     }
2028   }
2029   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2030 }
2031
2032 /* convert fp to int 't' type */
2033 void gen_cvt_ftoi(int t)
2034 {
2035   uint32_t r, r2;
2036   int u, func = 0;
2037   u=t&VT_UNSIGNED;
2038   t&=VT_BTYPE;
2039   r2=vtop->type.t & VT_BTYPE;
2040   if(t==VT_INT) {
2041 #ifdef TCC_ARM_VFP
2042     r=vfpr(gv(RC_FLOAT));
2043     u=u?0:0x10000;
2044     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2045     r2=intr(vtop->r=get_reg(RC_INT));
2046     o(0xEE100A10|(r<<16)|(r2<<12));
2047     return;
2048 #else
2049     if(u) {
2050       if(r2 == VT_FLOAT)
2051         func=TOK___fixunssfsi;
2052 #if LDOUBLE_SIZE != 8
2053       else if(r2 == VT_LDOUBLE)
2054         func=TOK___fixunsxfsi;
2055       else if(r2 == VT_DOUBLE)
2056 #else
2057       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2058 #endif
2059         func=TOK___fixunsdfsi;
2060     } else {
2061       r=fpr(gv(RC_FLOAT));
2062       r2=intr(vtop->r=get_reg(RC_INT));
2063       o(0xEE100170|(r2<<12)|r);
2064       return;
2065     }
2066 #endif
2067   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2068     if(r2 == VT_FLOAT)
2069       func=TOK___fixsfdi;
2070 #if LDOUBLE_SIZE != 8
2071     else if(r2 == VT_LDOUBLE)
2072       func=TOK___fixxfdi;
2073     else if(r2 == VT_DOUBLE)
2074 #else
2075     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2076 #endif
2077       func=TOK___fixdfdi;
2078   }
2079   if(func) {
2080     vpush_global_sym(&func_old_type, func);
2081     vswap();
2082     gfunc_call(1);
2083     vpushi(0);
2084     if(t == VT_LLONG)
2085       vtop->r2 = REG_LRET;
2086     vtop->r = REG_IRET;
2087     return;
2088   }
2089   tcc_error("unimplemented gen_cvt_ftoi!");
2090 }
2091
2092 /* convert from one floating point type to another */
2093 void gen_cvt_ftof(int t)
2094 {
2095 #ifdef TCC_ARM_VFP
2096   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2097     uint32_t r = vfpr(gv(RC_FLOAT));
2098     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2099   }
2100 #else
2101   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2102   gv(RC_FLOAT);
2103 #endif
2104 }
2105
2106 /* computed goto support */
2107 void ggoto(void)
2108 {
2109   gcall_or_jmp(1);
2110   vtop--;
2111 }
2112
2113 /* Save the stack pointer onto the stack and return the location of its address */
2114 ST_FUNC void gen_vla_sp_save(int addr) {
2115     SValue v;
2116     v.type.t = VT_PTR;
2117     v.r = VT_LOCAL | VT_LVAL;
2118     v.c.i = addr;
2119     store(TREG_SP, &v);
2120 }
2121
2122 /* Restore the SP from a location on the stack */
2123 ST_FUNC void gen_vla_sp_restore(int addr) {
2124     SValue v;
2125     v.type.t = VT_PTR;
2126     v.r = VT_LOCAL | VT_LVAL;
2127     v.c.i = addr;
2128     load(TREG_SP, &v);
2129 }
2130
2131 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2132 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2133     int r = intr(gv(RC_INT));
2134     o(0xE04D0000|(r<<12)|r); /* sub r, sp, r */
2135 #ifdef TCC_ARM_EABI
2136     if (align < 8)
2137         align = 8;
2138 #else
2139     if (align < 4)
2140         align = 4;
2141 #endif
2142     if (align & (align - 1))
2143         tcc_error("alignment is not a power of 2: %i", align);
2144     o(stuff_const(0xE3C0D000|(r<<16), align - 1)); /* bic sp, r, #align-1 */
2145     vpop();
2146 }
2147
2148 /* end of ARM code generator */
2149 /*************************************************************/
2150 #endif
2151 /*************************************************************/