arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_CPU_VERSION
  38 # define TCC_CPU_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_LRET    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82     TREG_SP = 13,
  83     TREG_LR,
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 #else /* ! TARGET_DEFS_ONLY */
 134 /******************************************************/
 135 #include "tcc.h"
 136
 137 enum float_abi float_abi;
 138
 139 ST_DATA const int reg_classes[NB_REGS] = {
 140     /* r0 */ RC_INT | RC_R0,
 141     /* r1 */ RC_INT | RC_R1,
 142     /* r2 */ RC_INT | RC_R2,
 143     /* r3 */ RC_INT | RC_R3,
 144     /* r12 */ RC_INT | RC_R12,
 145     /* f0 */ RC_FLOAT | RC_F0,
 146     /* f1 */ RC_FLOAT | RC_F1,
 147     /* f2 */ RC_FLOAT | RC_F2,
 148     /* f3 */ RC_FLOAT | RC_F3,
 149 #ifdef TCC_ARM_VFP
 150  /* d4/s8 */ RC_FLOAT | RC_F4,
 151 /* d5/s10 */ RC_FLOAT | RC_F5,
 152 /* d6/s12 */ RC_FLOAT | RC_F6,
 153 /* d7/s14 */ RC_FLOAT | RC_F7,
 154 #endif
 155 };
 156
 157 static int func_sub_sp_offset, last_itod_magic;
 158 static int leaffunc;
 159
 160 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 161 static CType float_type, double_type, func_float_type, func_double_type;
 162 ST_FUNC void arm_init(struct TCCState *s)
 163 {
 164     float_type.t = VT_FLOAT;
 165     double_type.t = VT_DOUBLE;
 166     func_float_type.t = VT_FUNC;
 167     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 168     func_double_type.t = VT_FUNC;
 169     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 170
 171     float_abi = s->float_abi;
 172 #ifndef TCC_ARM_HARDFLOAT
 173     tcc_warning("soft float ABI currently not supported: default to softfp");
 174 #endif
 175 }
 176 #else
 177 #define func_float_type func_old_type
 178 #define func_double_type func_old_type
 179 #define func_ldouble_type func_old_type
 180 ST_FUNC void arm_init(struct TCCState *s)
 181 {
 182 #if 0
 183 #if !defined (TCC_ARM_VFP)
 184     tcc_warning("Support for FPA is deprecated and will be removed in next"
 185                 " release");
 186 #endif
 187 #if !defined (TCC_ARM_EABI)
 188     tcc_warning("Support for OABI is deprecated and will be removed in next"
 189                 " release");
 190 #endif
 191 #endif
 192 }
 193 #endif
 194
 195 static int two2mask(int a,int b) {
 196   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 197 }
 198
 199 static int regmask(int r) {
 200   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 201 }
 202
 203 /******************************************************/
 204
 205 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
 206 const char *default_elfinterp(struct TCCState *s)
 207 {
 208     if (s->float_abi == ARM_HARD_FLOAT)
 209         return "/lib/ld-linux-armhf.so.3";
 210     else
 211         return "/lib/ld-linux.so.3";
 212 }
 213 #endif
 214
 215 void o(uint32_t i)
 216 {
 217   /* this is a good place to start adding big-endian support*/
 218   int ind1;
 219   if (nocode_wanted)
 220     return;
 221   ind1 = ind + 4;
 222   if (!cur_text_section)
 223     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 224          "can't evaluate constant expressions outside of a function.");
 225   if (ind1 > cur_text_section->data_allocated)
 226     section_realloc(cur_text_section, ind1);
 227   cur_text_section->data[ind++] = i&255;
 228   i>>=8;
 229   cur_text_section->data[ind++] = i&255;
 230   i>>=8;
 231   cur_text_section->data[ind++] = i&255;
 232   i>>=8;
 233   cur_text_section->data[ind++] = i;
 234 }
 235
 236 static uint32_t stuff_const(uint32_t op, uint32_t c)
 237 {
 238   int try_neg=0;
 239   uint32_t nc = 0, negop = 0;
 240
 241   switch(op&0x1F00000)
 242   {
 243     case 0x800000: //add
 244     case 0x400000: //sub
 245       try_neg=1;
 246       negop=op^0xC00000;
 247       nc=-c;
 248       break;
 249     case 0x1A00000: //mov
 250     case 0x1E00000: //mvn
 251       try_neg=1;
 252       negop=op^0x400000;
 253       nc=~c;
 254       break;
 255     case 0x200000: //xor
 256       if(c==~0)
 257         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 258       break;
 259     case 0x0: //and
 260       if(c==~0)
 261         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 262     case 0x1C00000: //bic
 263       try_neg=1;
 264       negop=op^0x1C00000;
 265       nc=~c;
 266       break;
 267     case 0x1800000: //orr
 268       if(c==~0)
 269         return (op&0xFFF0FFFF)|0x1E00000;
 270       break;
 271   }
 272   do {
 273     uint32_t m;
 274     int i;
 275     if(c<256) /* catch undefined <<32 */
 276       return op|c;
 277     for(i=2;i<32;i+=2) {
 278       m=(0xff>>i)|(0xff<<(32-i));
 279       if(!(c&~m))
 280         return op|(i<<7)|(c<<i)|(c>>(32-i));
 281     }
 282     op=negop;
 283     c=nc;
 284   } while(try_neg--);
 285   return 0;
 286 }
 287
 288
 289 //only add,sub
 290 void stuff_const_harder(uint32_t op, uint32_t v) {
 291   uint32_t x;
 292   x=stuff_const(op,v);
 293   if(x)
 294     o(x);
 295   else {
 296     uint32_t a[16], nv, no, o2, n2;
 297     int i,j,k;
 298     a[0]=0xff;
 299     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 300     for(i=1;i<16;i++)
 301       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 302     for(i=0;i<12;i++)
 303       for(j=i<4?i+12:15;j>=i+4;j--)
 304         if((v&(a[i]|a[j]))==v) {
 305           o(stuff_const(op,v&a[i]));
 306           o(stuff_const(o2,v&a[j]));
 307           return;
 308         }
 309     no=op^0xC00000;
 310     n2=o2^0xC00000;
 311     nv=-v;
 312     for(i=0;i<12;i++)
 313       for(j=i<4?i+12:15;j>=i+4;j--)
 314         if((nv&(a[i]|a[j]))==nv) {
 315           o(stuff_const(no,nv&a[i]));
 316           o(stuff_const(n2,nv&a[j]));
 317           return;
 318         }
 319     for(i=0;i<8;i++)
 320       for(j=i+4;j<12;j++)
 321         for(k=i<4?i+12:15;k>=j+4;k--)
 322           if((v&(a[i]|a[j]|a[k]))==v) {
 323             o(stuff_const(op,v&a[i]));
 324             o(stuff_const(o2,v&a[j]));
 325             o(stuff_const(o2,v&a[k]));
 326             return;
 327           }
 328     no=op^0xC00000;
 329     nv=-v;
 330     for(i=0;i<8;i++)
 331       for(j=i+4;j<12;j++)
 332         for(k=i<4?i+12:15;k>=j+4;k--)
 333           if((nv&(a[i]|a[j]|a[k]))==nv) {
 334             o(stuff_const(no,nv&a[i]));
 335             o(stuff_const(n2,nv&a[j]));
 336             o(stuff_const(n2,nv&a[k]));
 337             return;
 338           }
 339     o(stuff_const(op,v&a[0]));
 340     o(stuff_const(o2,v&a[4]));
 341     o(stuff_const(o2,v&a[8]));
 342     o(stuff_const(o2,v&a[12]));
 343   }
 344 }
 345
 346 uint32_t encbranch(int pos, int addr, int fail)
 347 {
 348   addr-=pos+8;
 349   addr/=4;
 350   if(addr>=0x1000000 || addr<-0x1000000) {
 351     if(fail)
 352       tcc_error("FIXME: function bigger than 32MB");
 353     return 0;
 354   }
 355   return 0x0A000000|(addr&0xffffff);
 356 }
 357
 358 int decbranch(int pos)
 359 {
 360   int x;
 361   x=*(uint32_t *)(cur_text_section->data + pos);
 362   x&=0x00ffffff;
 363   if(x&0x800000)
 364     x-=0x1000000;
 365   return x*4+pos+8;
 366 }
 367
 368 /* output a symbol and patch all calls to it */
 369 void gsym_addr(int t, int a)
 370 {
 371   uint32_t *x;
 372   int lt;
 373   while(t) {
 374     x=(uint32_t *)(cur_text_section->data + t);
 375     t=decbranch(lt=t);
 376     if(a==lt+4)
 377       *x=0xE1A00000; // nop
 378     else {
 379       *x &= 0xff000000;
 380       *x |= encbranch(lt,a,1);
 381     }
 382   }
 383 }
 384
 385 void gsym(int t)
 386 {
 387   gsym_addr(t, ind);
 388 }
 389
 390 #ifdef TCC_ARM_VFP
 391 static uint32_t vfpr(int r)
 392 {
 393   if(r<TREG_F0 || r>TREG_F7)
 394     tcc_error("compiler error! register %i is no vfp register",r);
 395   return r - TREG_F0;
 396 }
 397 #else
 398 static uint32_t fpr(int r)
 399 {
 400   if(r<TREG_F0 || r>TREG_F3)
 401     tcc_error("compiler error! register %i is no fpa register",r);
 402   return r - TREG_F0;
 403 }
 404 #endif
 405
 406 static uint32_t intr(int r)
 407 {
 408   if(r == TREG_R12)
 409     return 12;
 410   if(r >= TREG_R0 && r <= TREG_R3)
 411     return r - TREG_R0;
 412   if (r >= TREG_SP && r <= TREG_LR)
 413     return r + (13 - TREG_SP);
 414   tcc_error("compiler error! register %i is no int register",r);
 415 }
 416
 417 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 418 {
 419   if(*off>maxoff || *off&((1<<shift)-1)) {
 420     uint32_t x, y;
 421     x=0xE280E000;
 422     if(*sgn)
 423       x=0xE240E000;
 424     x|=(*base)<<16;
 425     *base=14; // lr
 426     y=stuff_const(x,*off&~maxoff);
 427     if(y) {
 428       o(y);
 429       *off&=maxoff;
 430       return;
 431     }
 432     y=stuff_const(x,(*off+maxoff)&~maxoff);
 433     if(y) {
 434       o(y);
 435       *sgn=!*sgn;
 436       *off=((*off+maxoff)&~maxoff)-*off;
 437       return;
 438     }
 439     stuff_const_harder(x,*off&~maxoff);
 440     *off&=maxoff;
 441   }
 442 }
 443
 444 static uint32_t mapcc(int cc)
 445 {
 446   switch(cc)
 447   {
 448     case TOK_ULT:
 449       return 0x30000000; /* CC/LO */
 450     case TOK_UGE:
 451       return 0x20000000; /* CS/HS */
 452     case TOK_EQ:
 453       return 0x00000000; /* EQ */
 454     case TOK_NE:
 455       return 0x10000000; /* NE */
 456     case TOK_ULE:
 457       return 0x90000000; /* LS */
 458     case TOK_UGT:
 459       return 0x80000000; /* HI */
 460     case TOK_Nset:
 461       return 0x40000000; /* MI */
 462     case TOK_Nclear:
 463       return 0x50000000; /* PL */
 464     case TOK_LT:
 465       return 0xB0000000; /* LT */
 466     case TOK_GE:
 467       return 0xA0000000; /* GE */
 468     case TOK_LE:
 469       return 0xD0000000; /* LE */
 470     case TOK_GT:
 471       return 0xC0000000; /* GT */
 472   }
 473   tcc_error("unexpected condition code");
 474   return 0xE0000000; /* AL */
 475 }
 476
 477 static int negcc(int cc)
 478 {
 479   switch(cc)
 480   {
 481     case TOK_ULT:
 482       return TOK_UGE;
 483     case TOK_UGE:
 484       return TOK_ULT;
 485     case TOK_EQ:
 486       return TOK_NE;
 487     case TOK_NE:
 488       return TOK_EQ;
 489     case TOK_ULE:
 490       return TOK_UGT;
 491     case TOK_UGT:
 492       return TOK_ULE;
 493     case TOK_Nset:
 494       return TOK_Nclear;
 495     case TOK_Nclear:
 496       return TOK_Nset;
 497     case TOK_LT:
 498       return TOK_GE;
 499     case TOK_GE:
 500       return TOK_LT;
 501     case TOK_LE:
 502       return TOK_GT;
 503     case TOK_GT:
 504       return TOK_LE;
 505   }
 506   tcc_error("unexpected condition code");
 507   return TOK_NE;
 508 }
 509
 510 /* load 'r' from value 'sv' */
 511 void load(int r, SValue *sv)
 512 {
 513   int v, ft, fc, fr, sign;
 514   uint32_t op;
 515   SValue v1;
 516
 517   fr = sv->r;
 518   ft = sv->type.t;
 519   fc = sv->c.i;
 520
 521   if(fc>=0)
 522     sign=0;
 523   else {
 524     sign=1;
 525     fc=-fc;
 526   }
 527
 528   v = fr & VT_VALMASK;
 529   if (fr & VT_LVAL) {
 530     uint32_t base = 0xB; // fp
 531     if(v == VT_LLOCAL) {
 532       v1.type.t = VT_PTR;
 533       v1.r = VT_LOCAL | VT_LVAL;
 534       v1.c.i = sv->c.i;
 535       load(TREG_LR, &v1);
 536       base = 14; /* lr */
 537       fc=sign=0;
 538       v=VT_LOCAL;
 539     } else if(v == VT_CONST) {
 540       v1.type.t = VT_PTR;
 541       v1.r = fr&~VT_LVAL;
 542       v1.c.i = sv->c.i;
 543       v1.sym=sv->sym;
 544       load(TREG_LR, &v1);
 545       base = 14; /* lr */
 546       fc=sign=0;
 547       v=VT_LOCAL;
 548     } else if(v < VT_CONST) {
 549       base=intr(v);
 550       fc=sign=0;
 551       v=VT_LOCAL;
 552     }
 553     if(v == VT_LOCAL) {
 554       if(is_float(ft)) {
 555         calcaddr(&base,&fc,&sign,1020,2);
 556 #ifdef TCC_ARM_VFP
 557         op=0xED100A00; /* flds */
 558         if(!sign)
 559           op|=0x800000;
 560         if ((ft & VT_BTYPE) != VT_FLOAT)
 561           op|=0x100;   /* flds -> fldd */
 562         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 563 #else
 564         op=0xED100100;
 565         if(!sign)
 566           op|=0x800000;
 567 #if LDOUBLE_SIZE == 8
 568         if ((ft & VT_BTYPE) != VT_FLOAT)
 569           op|=0x8000;
 570 #else
 571         if ((ft & VT_BTYPE) == VT_DOUBLE)
 572           op|=0x8000;
 573         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 574           op|=0x400000;
 575 #endif
 576         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 577 #endif
 578       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 579                 || (ft & VT_BTYPE) == VT_SHORT) {
 580         calcaddr(&base,&fc,&sign,255,0);
 581         op=0xE1500090;
 582         if ((ft & VT_BTYPE) == VT_SHORT)
 583           op|=0x20;
 584         if ((ft & VT_UNSIGNED) == 0)
 585           op|=0x40;
 586         if(!sign)
 587           op|=0x800000;
 588         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 589       } else {
 590         calcaddr(&base,&fc,&sign,4095,0);
 591         op=0xE5100000;
 592         if(!sign)
 593           op|=0x800000;
 594         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 595           op|=0x400000;
 596         o(op|(intr(r)<<12)|fc|(base<<16));
 597       }
 598       return;
 599     }
 600   } else {
 601     if (v == VT_CONST) {
 602       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.i);
 603       if (fr & VT_SYM || !op) {
 604         o(0xE59F0000|(intr(r)<<12));
 605         o(0xEA000000);
 606         if(fr & VT_SYM)
 607           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 608         o(sv->c.i);
 609       } else
 610         o(op);
 611       return;
 612     } else if (v == VT_LOCAL) {
 613       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.i);
 614       if (fr & VT_SYM || !op) {
 615         o(0xE59F0000|(intr(r)<<12));
 616         o(0xEA000000);
 617         if(fr & VT_SYM) // needed ?
 618           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 619         o(sv->c.i);
 620         o(0xE08B0000|(intr(r)<<12)|intr(r));
 621       } else
 622         o(op);
 623       return;
 624     } else if(v == VT_CMP) {
 625       o(mapcc(sv->c.i)|0x3A00001|(intr(r)<<12));
 626       o(mapcc(negcc(sv->c.i))|0x3A00000|(intr(r)<<12));
 627       return;
 628     } else if (v == VT_JMP || v == VT_JMPI) {
 629       int t;
 630       t = v & 1;
 631       o(0xE3A00000|(intr(r)<<12)|t);
 632       o(0xEA000000);
 633       gsym(sv->c.i);
 634       o(0xE3A00000|(intr(r)<<12)|(t^1));
 635       return;
 636     } else if (v < VT_CONST) {
 637       if(is_float(ft))
 638 #ifdef TCC_ARM_VFP
 639         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 640 #else
 641         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 642 #endif
 643       else
 644         o(0xE1A00000|(intr(r)<<12)|intr(v));
 645       return;
 646     }
 647   }
 648   tcc_error("load unimplemented!");
 649 }
 650
 651 /* store register 'r' in lvalue 'v' */
 652 void store(int r, SValue *sv)
 653 {
 654   SValue v1;
 655   int v, ft, fc, fr, sign;
 656   uint32_t op;
 657
 658   fr = sv->r;
 659   ft = sv->type.t;
 660   fc = sv->c.i;
 661
 662   if(fc>=0)
 663     sign=0;
 664   else {
 665     sign=1;
 666     fc=-fc;
 667   }
 668
 669   v = fr & VT_VALMASK;
 670   if (fr & VT_LVAL || fr == VT_LOCAL) {
 671     uint32_t base = 0xb; /* fp */
 672     if(v < VT_CONST) {
 673       base=intr(v);
 674       v=VT_LOCAL;
 675       fc=sign=0;
 676     } else if(v == VT_CONST) {
 677       v1.type.t = ft;
 678       v1.r = fr&~VT_LVAL;
 679       v1.c.i = sv->c.i;
 680       v1.sym=sv->sym;
 681       load(TREG_LR, &v1);
 682       base = 14; /* lr */
 683       fc=sign=0;
 684       v=VT_LOCAL;
 685     }
 686     if(v == VT_LOCAL) {
 687        if(is_float(ft)) {
 688         calcaddr(&base,&fc,&sign,1020,2);
 689 #ifdef TCC_ARM_VFP
 690         op=0xED000A00; /* fsts */
 691         if(!sign)
 692           op|=0x800000;
 693         if ((ft & VT_BTYPE) != VT_FLOAT)
 694           op|=0x100;   /* fsts -> fstd */
 695         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 696 #else
 697         op=0xED000100;
 698         if(!sign)
 699           op|=0x800000;
 700 #if LDOUBLE_SIZE == 8
 701         if ((ft & VT_BTYPE) != VT_FLOAT)
 702           op|=0x8000;
 703 #else
 704         if ((ft & VT_BTYPE) == VT_DOUBLE)
 705           op|=0x8000;
 706         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 707           op|=0x400000;
 708 #endif
 709         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 710 #endif
 711         return;
 712       } else if((ft & VT_BTYPE) == VT_SHORT) {
 713         calcaddr(&base,&fc,&sign,255,0);
 714         op=0xE14000B0;
 715         if(!sign)
 716           op|=0x800000;
 717         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 718       } else {
 719         calcaddr(&base,&fc,&sign,4095,0);
 720         op=0xE5000000;
 721         if(!sign)
 722           op|=0x800000;
 723         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 724           op|=0x400000;
 725         o(op|(intr(r)<<12)|fc|(base<<16));
 726       }
 727       return;
 728     }
 729   }
 730   tcc_error("store unimplemented");
 731 }
 732
 733 static void gadd_sp(int val)
 734 {
 735   stuff_const_harder(0xE28DD000,val);
 736 }
 737
 738 /* 'is_jmp' is '1' if it is a jump */
 739 static void gcall_or_jmp(int is_jmp)
 740 {
 741   int r;
 742   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 743     uint32_t x;
 744     /* constant case */
 745     x=encbranch(ind,ind+vtop->c.i,0);
 746     if(x) {
 747       if (vtop->r & VT_SYM) {
 748         /* relocation case */
 749         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 750       } else
 751         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 752       o(x|(is_jmp?0xE0000000:0xE1000000));
 753     } else {
 754       if(!is_jmp)
 755         o(0xE28FE004); // add lr,pc,#4
 756       o(0xE51FF004);   // ldr pc,[pc,#-4]
 757       if (vtop->r & VT_SYM)
 758         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 759       o(vtop->c.i);
 760     }
 761   } else {
 762     /* otherwise, indirect call */
 763     r = gv(RC_INT);
 764     if(!is_jmp)
 765       o(0xE1A0E00F);       // mov lr,pc
 766     o(0xE1A0F000|intr(r)); // mov pc,r
 767   }
 768 }
 769
 770 static int unalias_ldbl(int btype)
 771 {
 772 #if LDOUBLE_SIZE == 8
 773     if (btype == VT_LDOUBLE)
 774       btype = VT_DOUBLE;
 775 #endif
 776     return btype;
 777 }
 778
 779 /* Return whether a structure is an homogeneous float aggregate or not.
 780    The answer is true if all the elements of the structure are of the same
 781    primitive float type and there is less than 4 elements.
 782
 783    type: the type corresponding to the structure to be tested */
 784 static int is_hgen_float_aggr(CType *type)
 785 {
 786   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 787     struct Sym *ref;
 788     int btype, nb_fields = 0;
 789
 790     ref = type->ref->next;
 791     btype = unalias_ldbl(ref->type.t & VT_BTYPE);
 792     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 793       for(; ref && btype == unalias_ldbl(ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 794       return !ref && nb_fields <= 4;
 795     }
 796   }
 797   return 0;
 798 }
 799
 800 struct avail_regs {
 801   signed char avail[3]; /* 3 holes max with only float and double alignments */
 802   int first_hole; /* first available hole */
 803   int last_hole; /* last available hole (none if equal to first_hole) */
 804   int first_free_reg; /* next free register in the sequence, hole excluded */
 805 };
 806
 807 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 808
 809 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 810    param) according to the rules described in the procedure call standard for
 811    the ARM architecture (AAPCS). If found, the registers are assigned to this
 812    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 813    and the parameter is a single float.
 814
 815    avregs: opaque structure to keep track of available VFP co-processor regs
 816    align: alignment constraints for the param, as returned by type_size()
 817    size: size of the parameter, as returned by type_size() */
 818 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 819 {
 820   int first_reg = 0;
 821
 822   if (avregs->first_free_reg == -1)
 823     return -1;
 824   if (align >> 3) { /* double alignment */
 825     first_reg = avregs->first_free_reg;
 826     /* alignment constraint not respected so use next reg and record hole */
 827     if (first_reg & 1)
 828       avregs->avail[avregs->last_hole++] = first_reg++;
 829   } else { /* no special alignment (float or array of float) */
 830     /* if single float and a hole is available, assign the param to it */
 831     if (size == 4 && avregs->first_hole != avregs->last_hole)
 832       return avregs->avail[avregs->first_hole++];
 833     else
 834       first_reg = avregs->first_free_reg;
 835   }
 836   if (first_reg + size / 4 <= 16) {
 837     avregs->first_free_reg = first_reg + size / 4;
 838     return first_reg;
 839   }
 840   avregs->first_free_reg = -1;
 841   return -1;
 842 }
 843
 844 /* Returns whether all params need to be passed in core registers or not.
 845    This is the case for function part of the runtime ABI. */
 846 int floats_in_core_regs(SValue *sval)
 847 {
 848   if (!sval->sym)
 849     return 0;
 850
 851   switch (sval->sym->v) {
 852     case TOK___floatundisf:
 853     case TOK___floatundidf:
 854     case TOK___fixunssfdi:
 855     case TOK___fixunsdfdi:
 856 #ifndef TCC_ARM_VFP
 857     case TOK___fixunsxfdi:
 858 #endif
 859     case TOK___floatdisf:
 860     case TOK___floatdidf:
 861     case TOK___fixsfdi:
 862     case TOK___fixdfdi:
 863       return 1;
 864
 865     default:
 866       return 0;
 867   }
 868 }
 869
 870 /* Return the number of registers needed to return the struct, or 0 if
 871    returning via struct pointer. */
 872 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) {
 873 #ifdef TCC_ARM_EABI
 874     int size, align;
 875     size = type_size(vt, &align);
 876     if (float_abi == ARM_HARD_FLOAT && !variadic &&
 877         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
 878         *ret_align = 8;
 879         *regsize = 8;
 880         ret->ref = NULL;
 881         ret->t = VT_DOUBLE;
 882         return (size + 7) >> 3;
 883     } else if (size <= 4) {
 884         *ret_align = 4;
 885         *regsize = 4;
 886         ret->ref = NULL;
 887         ret->t = VT_INT;
 888         return 1;
 889     } else
 890         return 0;
 891 #else
 892     return 0;
 893 #endif
 894 }
 895
 896 /* Parameters are classified according to how they are copied to their final
 897    destination for the function call. Because the copying is performed class
 898    after class according to the order in the union below, it is important that
 899    some constraints about the order of the members of this union are respected:
 900    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 901    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 902      VFP_STRUCT_CLASS;
 903    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 904    See the comment for the main loop in copy_params() for the reason. */
 905 enum reg_class {
 906         STACK_CLASS = 0,
 907         CORE_STRUCT_CLASS,
 908         VFP_CLASS,
 909         VFP_STRUCT_CLASS,
 910         CORE_CLASS,
 911         NB_CLASSES
 912 };
 913
 914 struct param_plan {
 915     int start; /* first reg or addr used depending on the class */
 916     int end; /* last reg used or next free addr depending on the class */
 917     SValue *sval; /* pointer to SValue on the value stack */
 918     struct param_plan *prev; /*  previous element in this class */
 919 };
 920
 921 struct plan {
 922     struct param_plan *pplans; /* array of all the param plans */
 923     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
 924 };
 925
 926 #define add_param_plan(plan,pplan,class)                        \
 927     do {                                                        \
 928         pplan.prev = plan->clsplans[class];                     \
 929         plan->pplans[plan ## _nb] = pplan;                      \
 930         plan->clsplans[class] = &plan->pplans[plan ## _nb++];   \
 931     } while(0)
 932
 933 /* Assign parameters to registers and stack with alignment according to the
 934    rules in the procedure call standard for the ARM architecture (AAPCS).
 935    The overall assignment is recorded in an array of per parameter structures
 936    called parameter plans. The parameter plans are also further organized in a
 937    number of linked lists, one per class of parameter (see the comment for the
 938    definition of union reg_class).
 939
 940    nb_args: number of parameters of the function for which a call is generated
 941    float_abi: float ABI in use for this function call
 942    plan: the structure where the overall assignment is recorded
 943    todo: a bitmap that record which core registers hold a parameter
 944
 945    Returns the amount of stack space needed for parameter passing
 946
 947    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 948    is the responsibility of the caller to free this array once used (ie not
 949    before copy_params). */
 950 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
 951 {
 952   int i, size, align;
 953   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
 954   int plan_nb = 0;
 955   struct param_plan pplan;
 956   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 957
 958   ncrn = nsaa = 0;
 959   *todo = 0;
 960   plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 961   memset(plan->clsplans, 0, sizeof(plan->clsplans));
 962   for(i = nb_args; i-- ;) {
 963     int j, start_vfpreg = 0;
 964     CType type = vtop[-i].type;
 965     type.t &= ~VT_ARRAY;
 966     size = type_size(&type, &align);
 967     size = (size + 3) & ~3;
 968     align = (align + 3) & ~3;
 969     switch(vtop[-i].type.t & VT_BTYPE) {
 970       case VT_STRUCT:
 971       case VT_FLOAT:
 972       case VT_DOUBLE:
 973       case VT_LDOUBLE:
 974       if (float_abi == ARM_HARD_FLOAT) {
 975         int is_hfa = 0; /* Homogeneous float aggregate */
 976
 977         if (is_float(vtop[-i].type.t)
 978             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
 979           int end_vfpreg;
 980
 981           start_vfpreg = assign_vfpreg(&avregs, align, size);
 982           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
 983           if (start_vfpreg >= 0) {
 984             pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
 985             if (is_hfa)
 986               add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
 987             else
 988               add_param_plan(plan, pplan, VFP_CLASS);
 989             continue;
 990           } else
 991             break;
 992         }
 993       }
 994       ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
 995       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
 996         /* The parameter is allocated both in core register and on stack. As
 997          * such, it can be of either class: it would either be the last of
 998          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
 999         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
1000           *todo|=(1<<j);
1001         pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
1002         add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1003         ncrn += size/4;
1004         if (ncrn > 4)
1005           nsaa = (ncrn - 4) * 4;
1006       } else {
1007         ncrn = 4;
1008         break;
1009       }
1010       continue;
1011       default:
1012       if (ncrn < 4) {
1013         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1014
1015         if (is_long) {
1016           ncrn = (ncrn + 1) & -2;
1017           if (ncrn == 4)
1018             break;
1019         }
1020         pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1021         ncrn++;
1022         if (is_long)
1023           pplan.end = ncrn++;
1024         add_param_plan(plan, pplan, CORE_CLASS);
1025         continue;
1026       }
1027     }
1028     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1029     pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1030     add_param_plan(plan, pplan, STACK_CLASS);
1031     nsaa += size; /* size already rounded up before */
1032   }
1033   return nsaa;
1034 }
1035
1036 #undef add_param_plan
1037
1038 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1039    function call.
1040
1041    nb_args: number of parameters the function take
1042    plan: the overall assignment plan for parameters
1043    todo: a bitmap indicating what core reg will hold a parameter
1044
1045    Returns the number of SValue added by this function on the value stack */
1046 static int copy_params(int nb_args, struct plan *plan, int todo)
1047 {
1048   int size, align, r, i, nb_extra_sval = 0;
1049   struct param_plan *pplan;
1050   int pass = 0;
1051
1052    /* Several constraints require parameters to be copied in a specific order:
1053       - structures are copied to the stack before being loaded in a reg;
1054       - floats loaded to an odd numbered VFP reg are first copied to the
1055         preceding even numbered VFP reg and then moved to the next VFP reg.
1056
1057       It is thus important that:
1058       - structures assigned to core regs must be copied after parameters
1059         assigned to the stack but before structures assigned to VFP regs because
1060         a structure can lie partly in core registers and partly on the stack;
1061       - parameters assigned to the stack and all structures be copied before
1062         parameters assigned to a core reg since copying a parameter to the stack
1063         require using a core reg;
1064       - parameters assigned to VFP regs be copied before structures assigned to
1065         VFP regs as the copy might use an even numbered VFP reg that already
1066         holds part of a structure. */
1067 again:
1068   for(i = 0; i < NB_CLASSES; i++) {
1069     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1070
1071       if (pass
1072           && (i != CORE_CLASS || pplan->sval->r < VT_CONST))
1073         continue;
1074
1075       vpushv(pplan->sval);
1076       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1077       switch(i) {
1078         case STACK_CLASS:
1079         case CORE_STRUCT_CLASS:
1080         case VFP_STRUCT_CLASS:
1081           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1082             int padding = 0;
1083             size = type_size(&pplan->sval->type, &align);
1084             /* align to stack align size */
1085             size = (size + 3) & ~3;
1086             if (i == STACK_CLASS && pplan->prev)
1087               padding = pplan->start - pplan->prev->end;
1088             size += padding; /* Add padding if any */
1089             /* allocate the necessary size on stack */
1090             gadd_sp(-size);
1091             /* generate structure store */
1092             r = get_reg(RC_INT);
1093             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1094             vset(&vtop->type, r | VT_LVAL, 0);
1095             vswap();
1096             vstore(); /* memcpy to current sp + potential padding */
1097
1098             /* Homogeneous float aggregate are loaded to VFP registers
1099                immediately since there is no way of loading data in multiple
1100                non consecutive VFP registers as what is done for other
1101                structures (see the use of todo). */
1102             if (i == VFP_STRUCT_CLASS) {
1103               int first = pplan->start, nb = pplan->end - first + 1;
1104               /* vpop.32 {pplan->start, ..., pplan->end} */
1105               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1106               /* No need to write the register used to a SValue since VFP regs
1107                  cannot be used for gcall_or_jmp */
1108             }
1109           } else {
1110             if (is_float(pplan->sval->type.t)) {
1111 #ifdef TCC_ARM_VFP
1112               r = vfpr(gv(RC_FLOAT)) << 12;
1113               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1114                 size = 4;
1115               else {
1116                 size = 8;
1117                 r |= 0x101; /* vpush.32 -> vpush.64 */
1118               }
1119               o(0xED2D0A01 + r); /* vpush */
1120 #else
1121               r = fpr(gv(RC_FLOAT)) << 12;
1122               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1123                 size = 4;
1124               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1125                 size = 8;
1126               else
1127                 size = LDOUBLE_SIZE;
1128
1129               if (size == 12)
1130                 r |= 0x400000;
1131               else if(size == 8)
1132                 r|=0x8000;
1133
1134               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1135 #endif
1136             } else {
1137               /* simple type (currently always same size) */
1138               /* XXX: implicit cast ? */
1139               size=4;
1140               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1141                 lexpand_nr();
1142                 size = 8;
1143                 r = gv(RC_INT);
1144                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1145                 vtop--;
1146               }
1147               r = gv(RC_INT);
1148               o(0xE52D0004|(intr(r)<<12)); /* push r */
1149             }
1150             if (i == STACK_CLASS && pplan->prev)
1151               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1152           }
1153           break;
1154
1155         case VFP_CLASS:
1156           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1157           if (pplan->start & 1) { /* Must be in upper part of double register */
1158             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1159             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1160           }
1161           break;
1162
1163         case CORE_CLASS:
1164           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1165             lexpand_nr();
1166             gv(regmask(pplan->end));
1167             pplan->sval->r2 = vtop->r;
1168             vtop--;
1169           }
1170           gv(regmask(pplan->start));
1171           /* Mark register as used so that gcall_or_jmp use another one
1172              (regs >=4 are free as never used to pass parameters) */
1173           pplan->sval->r = vtop->r;
1174           break;
1175       }
1176       vtop--;
1177     }
1178   }
1179
1180   /* second pass to restore registers that were saved on stack by accident.
1181      Maybe redundant after the "lvalue_save" patch in tccgen.c:gv() */
1182   if (++pass < 2)
1183     goto again;
1184
1185   /* Manually free remaining registers since next parameters are loaded
1186    * manually, without the help of gv(int). */
1187   save_regs(nb_args);
1188
1189   if(todo) {
1190     o(0xE8BD0000|todo); /* pop {todo} */
1191     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1192       int r;
1193       pplan->sval->r = pplan->start;
1194       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1195          can occupy more than 2 registers. Thus, we need to push on the value
1196          stack some fake parameter to have on SValue for each registers used
1197          by a structure (r2 is not used). */
1198       for (r = pplan->start + 1; r <= pplan->end; r++) {
1199         if (todo & (1 << r)) {
1200           nb_extra_sval++;
1201           vpushi(0);
1202           vtop->r = r;
1203         }
1204       }
1205     }
1206   }
1207   return nb_extra_sval;
1208 }
1209
1210 /* Generate function call. The function address is pushed first, then
1211    all the parameters in call order. This functions pops all the
1212    parameters and the function address. */
1213 void gfunc_call(int nb_args)
1214 {
1215   int r, args_size;
1216   int def_float_abi = float_abi;
1217   int todo;
1218   struct plan plan;
1219
1220 #ifdef TCC_ARM_EABI
1221   int variadic;
1222
1223   if (float_abi == ARM_HARD_FLOAT) {
1224     variadic = (vtop[-nb_args].type.ref->f.func_type == FUNC_ELLIPSIS);
1225     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1226       float_abi = ARM_SOFTFP_FLOAT;
1227   }
1228 #endif
1229   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1230      VT_JMP anywhere except on the top of the stack because it would complicate
1231      the code generator. */
1232   r = vtop->r & VT_VALMASK;
1233   if (r == VT_CMP || (r & ~1) == VT_JMP)
1234     gv(RC_INT);
1235
1236   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1237
1238 #ifdef TCC_ARM_EABI
1239   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1240     args_size = (args_size + 7) & ~7;
1241     o(0xE24DD004); /* sub sp, sp, #4 */
1242   }
1243 #endif
1244
1245   nb_args += copy_params(nb_args, &plan, todo);
1246   tcc_free(plan.pplans);
1247
1248   /* Move fct SValue on top as required by gcall_or_jmp */
1249   vrotb(nb_args + 1);
1250   gcall_or_jmp(0);
1251   if (args_size)
1252       gadd_sp(args_size); /* pop all parameters passed on the stack */
1253 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1254   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1255     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1256       o(0xEE000A10); /*vmov s0, r0 */
1257     } else {
1258       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1259       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1260     }
1261   }
1262 #endif
1263   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1264   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1265   float_abi = def_float_abi;
1266 }
1267
1268 /* generate function prolog of type 't' */
1269 void gfunc_prolog(CType *func_type)
1270 {
1271   Sym *sym,*sym2;
1272   int n, nf, size, align, rs, struct_ret = 0;
1273   int addr, pn, sn; /* pn=core, sn=stack */
1274   CType ret_type;
1275
1276 #ifdef TCC_ARM_EABI
1277   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1278 #endif
1279
1280   sym = func_type->ref;
1281   func_vt = sym->type;
1282   func_var = (func_type->ref->f.func_type == FUNC_ELLIPSIS);
1283
1284   n = nf = 0;
1285   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1286       !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs))
1287   {
1288     n++;
1289     struct_ret = 1;
1290     func_vc = 12; /* Offset from fp of the place to store the result */
1291   }
1292   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1293     size = type_size(&sym2->type, &align);
1294 #ifdef TCC_ARM_EABI
1295     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1296         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1297       int tmpnf = assign_vfpreg(&avregs, align, size);
1298       tmpnf += (size + 3) / 4;
1299       nf = (tmpnf > nf) ? tmpnf : nf;
1300     } else
1301 #endif
1302     if (n < 4)
1303       n += (size + 3) / 4;
1304   }
1305   o(0xE1A0C00D); /* mov ip,sp */
1306   if (func_var)
1307     n=4;
1308   if (n) {
1309     if(n>4)
1310       n=4;
1311 #ifdef TCC_ARM_EABI
1312     n=(n+1)&-2;
1313 #endif
1314     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1315   }
1316   if (nf) {
1317     if (nf>16)
1318       nf=16;
1319     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1320     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1321   }
1322   o(0xE92D5800); /* save fp, ip, lr */
1323   o(0xE1A0B00D); /* mov fp, sp */
1324   func_sub_sp_offset = ind;
1325   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1326
1327 #ifdef TCC_ARM_EABI
1328   if (float_abi == ARM_HARD_FLOAT) {
1329     func_vc += nf * 4;
1330     avregs = AVAIL_REGS_INITIALIZER;
1331   }
1332 #endif
1333   pn = struct_ret, sn = 0;
1334   while ((sym = sym->next)) {
1335     CType *type;
1336     type = &sym->type;
1337     size = type_size(type, &align);
1338     size = (size + 3) >> 2;
1339     align = (align + 3) & ~3;
1340 #ifdef TCC_ARM_EABI
1341     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1342         || is_hgen_float_aggr(&sym->type))) {
1343       int fpn = assign_vfpreg(&avregs, align, size << 2);
1344       if (fpn >= 0)
1345         addr = fpn * 4;
1346       else
1347         goto from_stack;
1348     } else
1349 #endif
1350     if (pn < 4) {
1351 #ifdef TCC_ARM_EABI
1352         pn = (pn + (align-1)/4) & -(align/4);
1353 #endif
1354       addr = (nf + pn) * 4;
1355       pn += size;
1356       if (!sn && pn > 4)
1357         sn = (pn - 4);
1358     } else {
1359 #ifdef TCC_ARM_EABI
1360 from_stack:
1361         sn = (sn + (align-1)/4) & -(align/4);
1362 #endif
1363       addr = (n + nf + sn) * 4;
1364       sn += size;
1365     }
1366     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1367              addr + 12);
1368   }
1369   last_itod_magic=0;
1370   leaffunc = 1;
1371   loc = 0;
1372 }
1373
1374 /* generate function epilog */
1375 void gfunc_epilog(void)
1376 {
1377   uint32_t x;
1378   int diff;
1379   /* Copy float return value to core register if base standard is used and
1380      float computation is made with VFP */
1381 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1382   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1383     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1384       o(0xEE100A10); /* fmrs r0, s0 */
1385     else {
1386       o(0xEE100B10); /* fmrdl r0, d0 */
1387       o(0xEE301B10); /* fmrdh r1, d0 */
1388     }
1389   }
1390 #endif
1391   o(0xE89BA800); /* restore fp, sp, pc */
1392   diff = (-loc + 3) & -4;
1393 #ifdef TCC_ARM_EABI
1394   if(!leaffunc)
1395     diff = ((diff + 11) & -8) - 4;
1396 #endif
1397   if(diff > 0) {
1398     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1399     if(x)
1400       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1401     else {
1402       int addr;
1403       addr=ind;
1404       o(0xE59FC004); /* ldr ip,[pc+4] */
1405       o(0xE04BD00C); /* sub sp,fp,ip  */
1406       o(0xE1A0F00E); /* mov pc,lr */
1407       o(diff);
1408       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1409     }
1410   }
1411 }
1412
1413 ST_FUNC void gen_fill_nops(int bytes)
1414 {
1415     if ((bytes & 3))
1416       tcc_error("alignment of code section not multiple of 4");
1417     while (bytes > 0) {
1418         o(0xE1A00000);
1419         bytes -= 4;
1420     }
1421 }
1422
1423 /* generate a jump to a label */
1424 int gjmp(int t)
1425 {
1426   int r;
1427   if (nocode_wanted)
1428     return t;
1429   r=ind;
1430   o(0xE0000000|encbranch(r,t,1));
1431   return r;
1432 }
1433
1434 /* generate a jump to a fixed address */
1435 void gjmp_addr(int a)
1436 {
1437   gjmp(a);
1438 }
1439
1440 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1441 int gtst(int inv, int t)
1442 {
1443   int v, r;
1444   uint32_t op;
1445
1446   v = vtop->r & VT_VALMASK;
1447   r=ind;
1448
1449   if (nocode_wanted) {
1450     ;
1451   } else if (v == VT_CMP) {
1452     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1453     op|=encbranch(r,t,1);
1454     o(op);
1455     t=r;
1456   } else if (v == VT_JMP || v == VT_JMPI) {
1457     if ((v & 1) == inv) {
1458       if(!vtop->c.i)
1459         vtop->c.i=t;
1460       else {
1461         uint32_t *x;
1462         int p,lp;
1463         if(t) {
1464           p = vtop->c.i;
1465           do {
1466             p = decbranch(lp=p);
1467           } while(p);
1468           x = (uint32_t *)(cur_text_section->data + lp);
1469           *x &= 0xff000000;
1470           *x |= encbranch(lp,t,1);
1471         }
1472         t = vtop->c.i;
1473       }
1474     } else {
1475       t = gjmp(t);
1476       gsym(vtop->c.i);
1477     }
1478   }
1479   vtop--;
1480   return t;
1481 }
1482
1483 /* generate an integer binary operation */
1484 void gen_opi(int op)
1485 {
1486   int c, func = 0;
1487   uint32_t opc = 0, r, fr;
1488   unsigned short retreg = REG_IRET;
1489
1490   c=0;
1491   switch(op) {
1492     case '+':
1493       opc = 0x8;
1494       c=1;
1495       break;
1496     case TOK_ADDC1: /* add with carry generation */
1497       opc = 0x9;
1498       c=1;
1499       break;
1500     case '-':
1501       opc = 0x4;
1502       c=1;
1503       break;
1504     case TOK_SUBC1: /* sub with carry generation */
1505       opc = 0x5;
1506       c=1;
1507       break;
1508     case TOK_ADDC2: /* add with carry use */
1509       opc = 0xA;
1510       c=1;
1511       break;
1512     case TOK_SUBC2: /* sub with carry use */
1513       opc = 0xC;
1514       c=1;
1515       break;
1516     case '&':
1517       opc = 0x0;
1518       c=1;
1519       break;
1520     case '^':
1521       opc = 0x2;
1522       c=1;
1523       break;
1524     case '|':
1525       opc = 0x18;
1526       c=1;
1527       break;
1528     case '*':
1529       gv2(RC_INT, RC_INT);
1530       r = vtop[-1].r;
1531       fr = vtop[0].r;
1532       vtop--;
1533       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1534       return;
1535     case TOK_SHL:
1536       opc = 0;
1537       c=2;
1538       break;
1539     case TOK_SHR:
1540       opc = 1;
1541       c=2;
1542       break;
1543     case TOK_SAR:
1544       opc = 2;
1545       c=2;
1546       break;
1547     case '/':
1548     case TOK_PDIV:
1549       func=TOK___divsi3;
1550       c=3;
1551       break;
1552     case TOK_UDIV:
1553       func=TOK___udivsi3;
1554       c=3;
1555       break;
1556     case '%':
1557 #ifdef TCC_ARM_EABI
1558       func=TOK___aeabi_idivmod;
1559       retreg=REG_LRET;
1560 #else
1561       func=TOK___modsi3;
1562 #endif
1563       c=3;
1564       break;
1565     case TOK_UMOD:
1566 #ifdef TCC_ARM_EABI
1567       func=TOK___aeabi_uidivmod;
1568       retreg=REG_LRET;
1569 #else
1570       func=TOK___umodsi3;
1571 #endif
1572       c=3;
1573       break;
1574     case TOK_UMULL:
1575       gv2(RC_INT, RC_INT);
1576       r=intr(vtop[-1].r2=get_reg(RC_INT));
1577       c=vtop[-1].r;
1578       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1579       vtop--;
1580       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1581       return;
1582     default:
1583       opc = 0x15;
1584       c=1;
1585       break;
1586   }
1587   switch(c) {
1588     case 1:
1589       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1590         if(opc == 4 || opc == 5 || opc == 0xc) {
1591           vswap();
1592           opc|=2; // sub -> rsb
1593         }
1594       }
1595       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1596           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1597         gv(RC_INT);
1598       vswap();
1599       c=intr(gv(RC_INT));
1600       vswap();
1601       opc=0xE0000000|(opc<<20)|(c<<16);
1602       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1603         uint32_t x;
1604         x=stuff_const(opc|0x2000000,vtop->c.i);
1605         if(x) {
1606           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1607           o(x|(r<<12));
1608           goto done;
1609         }
1610       }
1611       fr=intr(gv(RC_INT));
1612       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1613       o(opc|(r<<12)|fr);
1614 done:
1615       vtop--;
1616       if (op >= TOK_ULT && op <= TOK_GT) {
1617         vtop->r = VT_CMP;
1618         vtop->c.i = op;
1619       }
1620       break;
1621     case 2:
1622       opc=0xE1A00000|(opc<<5);
1623       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1624           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1625         gv(RC_INT);
1626       vswap();
1627       r=intr(gv(RC_INT));
1628       vswap();
1629       opc|=r;
1630       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1631         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1632         c = vtop->c.i & 0x1f;
1633         o(opc|(c<<7)|(fr<<12));
1634       } else {
1635         fr=intr(gv(RC_INT));
1636         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1637         o(opc|(c<<12)|(fr<<8)|0x10);
1638       }
1639       vtop--;
1640       break;
1641     case 3:
1642       vpush_global_sym(&func_old_type, func);
1643       vrott(3);
1644       gfunc_call(2);
1645       vpushi(0);
1646       vtop->r = retreg;
1647       break;
1648     default:
1649       tcc_error("gen_opi %i unimplemented!",op);
1650   }
1651 }
1652
1653 #ifdef TCC_ARM_VFP
1654 static int is_zero(int i)
1655 {
1656   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1657     return 0;
1658   if (vtop[i].type.t == VT_FLOAT)
1659     return (vtop[i].c.f == 0.f);
1660   else if (vtop[i].type.t == VT_DOUBLE)
1661     return (vtop[i].c.d == 0.0);
1662   return (vtop[i].c.ld == 0.l);
1663 }
1664
1665 /* generate a floating point operation 'v = t1 op t2' instruction. The
1666  *    two operands are guaranteed to have the same floating point type */
1667 void gen_opf(int op)
1668 {
1669   uint32_t x;
1670   int fneg=0,r;
1671   x=0xEE000A00|T2CPR(vtop->type.t);
1672   switch(op) {
1673     case '+':
1674       if(is_zero(-1))
1675         vswap();
1676       if(is_zero(0)) {
1677         vtop--;
1678         return;
1679       }
1680       x|=0x300000;
1681       break;
1682     case '-':
1683       x|=0x300040;
1684       if(is_zero(0)) {
1685         vtop--;
1686         return;
1687       }
1688       if(is_zero(-1)) {
1689         x|=0x810000; /* fsubX -> fnegX */
1690         vswap();
1691         vtop--;
1692         fneg=1;
1693       }
1694       break;
1695     case '*':
1696       x|=0x200000;
1697       break;
1698     case '/':
1699       x|=0x800000;
1700       break;
1701     default:
1702       if(op < TOK_ULT || op > TOK_GT) {
1703         tcc_error("unknown fp op %x!",op);
1704         return;
1705       }
1706       if(is_zero(-1)) {
1707         vswap();
1708         switch(op) {
1709           case TOK_LT: op=TOK_GT; break;
1710           case TOK_GE: op=TOK_ULE; break;
1711           case TOK_LE: op=TOK_GE; break;
1712           case TOK_GT: op=TOK_ULT; break;
1713         }
1714       }
1715       x|=0xB40040; /* fcmpX */
1716       if(op!=TOK_EQ && op!=TOK_NE)
1717         x|=0x80; /* fcmpX -> fcmpeX */
1718       if(is_zero(0)) {
1719         vtop--;
1720         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1721       } else {
1722         x|=vfpr(gv(RC_FLOAT));
1723         vswap();
1724         o(x|(vfpr(gv(RC_FLOAT))<<12));
1725         vtop--;
1726       }
1727       o(0xEEF1FA10); /* fmstat */
1728
1729       switch(op) {
1730         case TOK_LE: op=TOK_ULE; break;
1731         case TOK_LT: op=TOK_ULT; break;
1732         case TOK_UGE: op=TOK_GE; break;
1733         case TOK_UGT: op=TOK_GT; break;
1734       }
1735
1736       vtop->r = VT_CMP;
1737       vtop->c.i = op;
1738       return;
1739   }
1740   r=gv(RC_FLOAT);
1741   x|=vfpr(r);
1742   r=regmask(r);
1743   if(!fneg) {
1744     int r2;
1745     vswap();
1746     r2=gv(RC_FLOAT);
1747     x|=vfpr(r2)<<16;
1748     r|=regmask(r2);
1749   }
1750   vtop->r=get_reg_ex(RC_FLOAT,r);
1751   if(!fneg)
1752     vtop--;
1753   o(x|(vfpr(vtop->r)<<12));
1754 }
1755
1756 #else
1757 static uint32_t is_fconst()
1758 {
1759   long double f;
1760   uint32_t r;
1761   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1762     return 0;
1763   if (vtop->type.t == VT_FLOAT)
1764     f = vtop->c.f;
1765   else if (vtop->type.t == VT_DOUBLE)
1766     f = vtop->c.d;
1767   else
1768     f = vtop->c.ld;
1769   if(!ieee_finite(f))
1770     return 0;
1771   r=0x8;
1772   if(f<0.0) {
1773     r=0x18;
1774     f=-f;
1775   }
1776   if(f==0.0)
1777     return r;
1778   if(f==1.0)
1779     return r|1;
1780   if(f==2.0)
1781     return r|2;
1782   if(f==3.0)
1783     return r|3;
1784   if(f==4.0)
1785     return r|4;
1786   if(f==5.0)
1787     return r|5;
1788   if(f==0.5)
1789     return r|6;
1790   if(f==10.0)
1791     return r|7;
1792   return 0;
1793 }
1794
1795 /* generate a floating point operation 'v = t1 op t2' instruction. The
1796    two operands are guaranteed to have the same floating point type */
1797 void gen_opf(int op)
1798 {
1799   uint32_t x, r, r2, c1, c2;
1800   //fputs("gen_opf\n",stderr);
1801   vswap();
1802   c1 = is_fconst();
1803   vswap();
1804   c2 = is_fconst();
1805   x=0xEE000100;
1806 #if LDOUBLE_SIZE == 8
1807   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1808     x|=0x80;
1809 #else
1810   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1811     x|=0x80;
1812   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1813     x|=0x80000;
1814 #endif
1815   switch(op)
1816   {
1817     case '+':
1818       if(!c2) {
1819         vswap();
1820         c2=c1;
1821       }
1822       vswap();
1823       r=fpr(gv(RC_FLOAT));
1824       vswap();
1825       if(c2) {
1826         if(c2>0xf)
1827           x|=0x200000; // suf
1828         r2=c2&0xf;
1829       } else {
1830         r2=fpr(gv(RC_FLOAT));
1831       }
1832       break;
1833     case '-':
1834       if(c2) {
1835         if(c2<=0xf)
1836           x|=0x200000; // suf
1837         r2=c2&0xf;
1838         vswap();
1839         r=fpr(gv(RC_FLOAT));
1840         vswap();
1841       } else if(c1 && c1<=0xf) {
1842         x|=0x300000; // rsf
1843         r2=c1;
1844         r=fpr(gv(RC_FLOAT));
1845         vswap();
1846       } else {
1847         x|=0x200000; // suf
1848         vswap();
1849         r=fpr(gv(RC_FLOAT));
1850         vswap();
1851         r2=fpr(gv(RC_FLOAT));
1852       }
1853       break;
1854     case '*':
1855       if(!c2 || c2>0xf) {
1856         vswap();
1857         c2=c1;
1858       }
1859       vswap();
1860       r=fpr(gv(RC_FLOAT));
1861       vswap();
1862       if(c2 && c2<=0xf)
1863         r2=c2;
1864       else
1865         r2=fpr(gv(RC_FLOAT));
1866       x|=0x100000; // muf
1867       break;
1868     case '/':
1869       if(c2 && c2<=0xf) {
1870         x|=0x400000; // dvf
1871         r2=c2;
1872         vswap();
1873         r=fpr(gv(RC_FLOAT));
1874         vswap();
1875       } else if(c1 && c1<=0xf) {
1876         x|=0x500000; // rdf
1877         r2=c1;
1878         r=fpr(gv(RC_FLOAT));
1879         vswap();
1880       } else {
1881         x|=0x400000; // dvf
1882         vswap();
1883         r=fpr(gv(RC_FLOAT));
1884         vswap();
1885         r2=fpr(gv(RC_FLOAT));
1886       }
1887       break;
1888     default:
1889       if(op >= TOK_ULT && op <= TOK_GT) {
1890         x|=0xd0f110; // cmfe
1891 /* bug (intention?) in Linux FPU emulator
1892    doesn't set carry if equal */
1893         switch(op) {
1894           case TOK_ULT:
1895           case TOK_UGE:
1896           case TOK_ULE:
1897           case TOK_UGT:
1898             tcc_error("unsigned comparison on floats?");
1899             break;
1900           case TOK_LT:
1901             op=TOK_Nset;
1902             break;
1903           case TOK_LE:
1904             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1905             break;
1906           case TOK_EQ:
1907           case TOK_NE:
1908             x&=~0x400000; // cmfe -> cmf
1909             break;
1910         }
1911         if(c1 && !c2) {
1912           c2=c1;
1913           vswap();
1914           switch(op) {
1915             case TOK_Nset:
1916               op=TOK_GT;
1917               break;
1918             case TOK_GE:
1919               op=TOK_ULE;
1920               break;
1921             case TOK_ULE:
1922               op=TOK_GE;
1923               break;
1924             case TOK_GT:
1925               op=TOK_Nset;
1926               break;
1927           }
1928         }
1929         vswap();
1930         r=fpr(gv(RC_FLOAT));
1931         vswap();
1932         if(c2) {
1933           if(c2>0xf)
1934             x|=0x200000;
1935           r2=c2&0xf;
1936         } else {
1937           r2=fpr(gv(RC_FLOAT));
1938         }
1939         vtop[-1].r = VT_CMP;
1940         vtop[-1].c.i = op;
1941       } else {
1942         tcc_error("unknown fp op %x!",op);
1943         return;
1944       }
1945   }
1946   if(vtop[-1].r == VT_CMP)
1947     c1=15;
1948   else {
1949     c1=vtop->r;
1950     if(r2&0x8)
1951       c1=vtop[-1].r;
1952     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1953     c1=fpr(vtop[-1].r);
1954   }
1955   vtop--;
1956   o(x|(r<<16)|(c1<<12)|r2);
1957 }
1958 #endif
1959
1960 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1961    and 'long long' cases. */
1962 ST_FUNC void gen_cvt_itof1(int t)
1963 {
1964   uint32_t r, r2;
1965   int bt;
1966   bt=vtop->type.t & VT_BTYPE;
1967   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1968 #ifndef TCC_ARM_VFP
1969     uint32_t dsize = 0;
1970 #endif
1971     r=intr(gv(RC_INT));
1972 #ifdef TCC_ARM_VFP
1973     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1974     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1975     r2|=r2<<12;
1976     if(!(vtop->type.t & VT_UNSIGNED))
1977       r2|=0x80;                /* fuitoX -> fsituX */
1978     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1979 #else
1980     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1981     if((t & VT_BTYPE) != VT_FLOAT)
1982       dsize=0x80;    /* flts -> fltd */
1983     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1984     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1985       uint32_t off = 0;
1986       o(0xE3500000|(r<<12));        /* cmp */
1987       r=fpr(get_reg(RC_FLOAT));
1988       if(last_itod_magic) {
1989         off=ind+8-last_itod_magic;
1990         off/=4;
1991         if(off>255)
1992           off=0;
1993       }
1994       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1995       if(!off) {
1996         o(0xEA000000);              /* b */
1997         last_itod_magic=ind;
1998         o(0x4F800000);              /* 4294967296.0f */
1999       }
2000       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
2001     }
2002 #endif
2003     return;
2004   } else if(bt == VT_LLONG) {
2005     int func;
2006     CType *func_type = 0;
2007     if((t & VT_BTYPE) == VT_FLOAT) {
2008       func_type = &func_float_type;
2009       if(vtop->type.t & VT_UNSIGNED)
2010         func=TOK___floatundisf;
2011       else
2012         func=TOK___floatdisf;
2013 #if LDOUBLE_SIZE != 8
2014     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
2015       func_type = &func_ldouble_type;
2016       if(vtop->type.t & VT_UNSIGNED)
2017         func=TOK___floatundixf;
2018       else
2019         func=TOK___floatdixf;
2020     } else if((t & VT_BTYPE) == VT_DOUBLE) {
2021 #else
2022     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
2023 #endif
2024       func_type = &func_double_type;
2025       if(vtop->type.t & VT_UNSIGNED)
2026         func=TOK___floatundidf;
2027       else
2028         func=TOK___floatdidf;
2029     }
2030     if(func_type) {
2031       vpush_global_sym(func_type, func);
2032       vswap();
2033       gfunc_call(1);
2034       vpushi(0);
2035       vtop->r=TREG_F0;
2036       return;
2037     }
2038   }
2039   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2040 }
2041
2042 /* convert fp to int 't' type */
2043 void gen_cvt_ftoi(int t)
2044 {
2045   uint32_t r, r2;
2046   int u, func = 0;
2047   u=t&VT_UNSIGNED;
2048   t&=VT_BTYPE;
2049   r2=vtop->type.t & VT_BTYPE;
2050   if(t==VT_INT) {
2051 #ifdef TCC_ARM_VFP
2052     r=vfpr(gv(RC_FLOAT));
2053     u=u?0:0x10000;
2054     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2055     r2=intr(vtop->r=get_reg(RC_INT));
2056     o(0xEE100A10|(r<<16)|(r2<<12));
2057     return;
2058 #else
2059     if(u) {
2060       if(r2 == VT_FLOAT)
2061         func=TOK___fixunssfsi;
2062 #if LDOUBLE_SIZE != 8
2063       else if(r2 == VT_LDOUBLE)
2064         func=TOK___fixunsxfsi;
2065       else if(r2 == VT_DOUBLE)
2066 #else
2067       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2068 #endif
2069         func=TOK___fixunsdfsi;
2070     } else {
2071       r=fpr(gv(RC_FLOAT));
2072       r2=intr(vtop->r=get_reg(RC_INT));
2073       o(0xEE100170|(r2<<12)|r);
2074       return;
2075     }
2076 #endif
2077   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2078     if(r2 == VT_FLOAT)
2079       func=TOK___fixsfdi;
2080 #if LDOUBLE_SIZE != 8
2081     else if(r2 == VT_LDOUBLE)
2082       func=TOK___fixxfdi;
2083     else if(r2 == VT_DOUBLE)
2084 #else
2085     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2086 #endif
2087       func=TOK___fixdfdi;
2088   }
2089   if(func) {
2090     vpush_global_sym(&func_old_type, func);
2091     vswap();
2092     gfunc_call(1);
2093     vpushi(0);
2094     if(t == VT_LLONG)
2095       vtop->r2 = REG_LRET;
2096     vtop->r = REG_IRET;
2097     return;
2098   }
2099   tcc_error("unimplemented gen_cvt_ftoi!");
2100 }
2101
2102 /* convert from one floating point type to another */
2103 void gen_cvt_ftof(int t)
2104 {
2105 #ifdef TCC_ARM_VFP
2106   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2107     uint32_t r = vfpr(gv(RC_FLOAT));
2108     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2109   }
2110 #else
2111   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2112   gv(RC_FLOAT);
2113 #endif
2114 }
2115
2116 /* computed goto support */
2117 void ggoto(void)
2118 {
2119   gcall_or_jmp(1);
2120   vtop--;
2121 }
2122
2123 /* Save the stack pointer onto the stack and return the location of its address */
2124 ST_FUNC void gen_vla_sp_save(int addr) {
2125     SValue v;
2126     v.type.t = VT_PTR;
2127     v.r = VT_LOCAL | VT_LVAL;
2128     v.c.i = addr;
2129     store(TREG_SP, &v);
2130 }
2131
2132 /* Restore the SP from a location on the stack */
2133 ST_FUNC void gen_vla_sp_restore(int addr) {
2134     SValue v;
2135     v.type.t = VT_PTR;
2136     v.r = VT_LOCAL | VT_LVAL;
2137     v.c.i = addr;
2138     load(TREG_SP, &v);
2139 }
2140
2141 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2142 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2143     int r = intr(gv(RC_INT));
2144     o(0xE04D0000|(r<<12)|r); /* sub r, sp, r */
2145 #ifdef TCC_ARM_EABI
2146     if (align < 8)
2147         align = 8;
2148 #else
2149     if (align < 4)
2150         align = 4;
2151 #endif
2152     if (align & (align - 1))
2153         tcc_error("alignment is not a power of 2: %i", align);
2154     o(stuff_const(0xE3C0D000|(r<<16), align - 1)); /* bic sp, r, #align-1 */
2155     vpop();
2156 }
2157
2158 /* end of ARM code generator */
2159 /*************************************************************/
2160 #endif
2161 /*************************************************************/