arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #ifdef TCC_ARM_EABI
  27 #ifndef TCC_ARM_VFP /* Avoid useless warning */
  28 #define TCC_ARM_VFP
  29 #endif
  30 #endif
  31
  32 /* number of available registers */
  33 #ifdef TCC_ARM_VFP
  34 #define NB_REGS            13
  35 #else
  36 #define NB_REGS             9
  37 #endif
  38
  39 #ifndef TCC_ARM_VERSION
  40 # define TCC_ARM_VERSION 5
  41 #endif
  42
  43 /* a register can belong to several classes. The classes must be
  44    sorted from more general to more precise (see gv2() code which does
  45    assumptions on it). */
  46 #define RC_INT     0x0001 /* generic integer register */
  47 #define RC_FLOAT   0x0002 /* generic float register */
  48 #define RC_R0      0x0004
  49 #define RC_R1      0x0008
  50 #define RC_R2      0x0010
  51 #define RC_R3      0x0020
  52 #define RC_R12     0x0040
  53 #define RC_F0      0x0080
  54 #define RC_F1      0x0100
  55 #define RC_F2      0x0200
  56 #define RC_F3      0x0400
  57 #ifdef TCC_ARM_VFP
  58 #define RC_F4      0x0800
  59 #define RC_F5      0x1000
  60 #define RC_F6      0x2000
  61 #define RC_F7      0x4000
  62 #endif
  63 #define RC_IRET    RC_R0  /* function return: integer register */
  64 #define RC_LRET    RC_R1  /* function return: second integer register */
  65 #define RC_FRET    RC_F0  /* function return: float register */
  66
  67 /* pretty names for the registers */
  68 enum {
  69     TREG_R0 = 0,
  70     TREG_R1,
  71     TREG_R2,
  72     TREG_R3,
  73     TREG_R12,
  74     TREG_F0,
  75     TREG_F1,
  76     TREG_F2,
  77     TREG_F3,
  78 #ifdef TCC_ARM_VFP
  79     TREG_F4,
  80     TREG_F5,
  81     TREG_F6,
  82     TREG_F7,
  83 #endif
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 /* ELF defines */
 134
 135 #define EM_TCC_TARGET EM_ARM
 136
 137 /* relocation type for 32 bit data relocation */
 138 #define R_DATA_32   R_ARM_ABS32
 139 #define R_DATA_PTR  R_ARM_ABS32
 140 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 141 #define R_COPY      R_ARM_COPY
 142
 143 #define ELF_START_ADDR 0x00008000
 144 #define ELF_PAGE_SIZE  0x1000
 145
 146 /******************************************************/
 147 #else /* ! TARGET_DEFS_ONLY */
 148 /******************************************************/
 149 #include "tcc.h"
 150
 151 ST_DATA const int reg_classes[NB_REGS] = {
 152     /* r0 */ RC_INT | RC_R0,
 153     /* r1 */ RC_INT | RC_R1,
 154     /* r2 */ RC_INT | RC_R2,
 155     /* r3 */ RC_INT | RC_R3,
 156     /* r12 */ RC_INT | RC_R12,
 157     /* f0 */ RC_FLOAT | RC_F0,
 158     /* f1 */ RC_FLOAT | RC_F1,
 159     /* f2 */ RC_FLOAT | RC_F2,
 160     /* f3 */ RC_FLOAT | RC_F3,
 161 #ifdef TCC_ARM_VFP
 162  /* d4/s8 */ RC_FLOAT | RC_F4,
 163 /* d5/s10 */ RC_FLOAT | RC_F5,
 164 /* d6/s12 */ RC_FLOAT | RC_F6,
 165 /* d7/s14 */ RC_FLOAT | RC_F7,
 166 #endif
 167 };
 168
 169 static int func_sub_sp_offset, last_itod_magic;
 170 static int leaffunc;
 171
 172 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 173 static CType float_type, double_type, func_float_type, func_double_type;
 174 ST_FUNC void arm_init_types(void)
 175 {
 176     float_type.t = VT_FLOAT;
 177     double_type.t = VT_DOUBLE;
 178     func_float_type.t = VT_FUNC;
 179     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 180     func_double_type.t = VT_FUNC;
 181     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 182 }
 183 #else
 184 #define func_float_type func_old_type
 185 #define func_double_type func_old_type
 186 #define func_ldouble_type func_old_type
 187 ST_FUNC void arm_init_types(void) {}
 188 #endif
 189
 190 static int two2mask(int a,int b) {
 191   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 192 }
 193
 194 static int regmask(int r) {
 195   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 196 }
 197
 198 /******************************************************/
 199
 200 void o(uint32_t i)
 201 {
 202   /* this is a good place to start adding big-endian support*/
 203   int ind1;
 204
 205   ind1 = ind + 4;
 206   if (!cur_text_section)
 207     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 208          "can't evaluate constant expressions outside of a function.");
 209   if (ind1 > cur_text_section->data_allocated)
 210     section_realloc(cur_text_section, ind1);
 211   cur_text_section->data[ind++] = i&255;
 212   i>>=8;
 213   cur_text_section->data[ind++] = i&255;
 214   i>>=8;
 215   cur_text_section->data[ind++] = i&255;
 216   i>>=8;
 217   cur_text_section->data[ind++] = i;
 218 }
 219
 220 static uint32_t stuff_const(uint32_t op, uint32_t c)
 221 {
 222   int try_neg=0;
 223   uint32_t nc = 0, negop = 0;
 224
 225   switch(op&0x1F00000)
 226   {
 227     case 0x800000: //add
 228     case 0x400000: //sub
 229       try_neg=1;
 230       negop=op^0xC00000;
 231       nc=-c;
 232       break;
 233     case 0x1A00000: //mov
 234     case 0x1E00000: //mvn
 235       try_neg=1;
 236       negop=op^0x400000;
 237       nc=~c;
 238       break;
 239     case 0x200000: //xor
 240       if(c==~0)
 241         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 242       break;
 243     case 0x0: //and
 244       if(c==~0)
 245         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 246     case 0x1C00000: //bic
 247       try_neg=1;
 248       negop=op^0x1C00000;
 249       nc=~c;
 250       break;
 251     case 0x1800000: //orr
 252       if(c==~0)
 253         return (op&0xFFF0FFFF)|0x1E00000;
 254       break;
 255   }
 256   do {
 257     uint32_t m;
 258     int i;
 259     if(c<256) /* catch undefined <<32 */
 260       return op|c;
 261     for(i=2;i<32;i+=2) {
 262       m=(0xff>>i)|(0xff<<(32-i));
 263       if(!(c&~m))
 264         return op|(i<<7)|(c<<i)|(c>>(32-i));
 265     }
 266     op=negop;
 267     c=nc;
 268   } while(try_neg--);
 269   return 0;
 270 }
 271
 272
 273 //only add,sub
 274 void stuff_const_harder(uint32_t op, uint32_t v) {
 275   uint32_t x;
 276   x=stuff_const(op,v);
 277   if(x)
 278     o(x);
 279   else {
 280     uint32_t a[16], nv, no, o2, n2;
 281     int i,j,k;
 282     a[0]=0xff;
 283     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 284     for(i=1;i<16;i++)
 285       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 286     for(i=0;i<12;i++)
 287       for(j=i<4?i+12:15;j>=i+4;j--)
 288         if((v&(a[i]|a[j]))==v) {
 289           o(stuff_const(op,v&a[i]));
 290           o(stuff_const(o2,v&a[j]));
 291           return;
 292         }
 293     no=op^0xC00000;
 294     n2=o2^0xC00000;
 295     nv=-v;
 296     for(i=0;i<12;i++)
 297       for(j=i<4?i+12:15;j>=i+4;j--)
 298         if((nv&(a[i]|a[j]))==nv) {
 299           o(stuff_const(no,nv&a[i]));
 300           o(stuff_const(n2,nv&a[j]));
 301           return;
 302         }
 303     for(i=0;i<8;i++)
 304       for(j=i+4;j<12;j++)
 305         for(k=i<4?i+12:15;k>=j+4;k--)
 306           if((v&(a[i]|a[j]|a[k]))==v) {
 307             o(stuff_const(op,v&a[i]));
 308             o(stuff_const(o2,v&a[j]));
 309             o(stuff_const(o2,v&a[k]));
 310             return;
 311           }
 312     no=op^0xC00000;
 313     nv=-v;
 314     for(i=0;i<8;i++)
 315       for(j=i+4;j<12;j++)
 316         for(k=i<4?i+12:15;k>=j+4;k--)
 317           if((nv&(a[i]|a[j]|a[k]))==nv) {
 318             o(stuff_const(no,nv&a[i]));
 319             o(stuff_const(n2,nv&a[j]));
 320             o(stuff_const(n2,nv&a[k]));
 321             return;
 322           }
 323     o(stuff_const(op,v&a[0]));
 324     o(stuff_const(o2,v&a[4]));
 325     o(stuff_const(o2,v&a[8]));
 326     o(stuff_const(o2,v&a[12]));
 327   }
 328 }
 329
 330 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 331 {
 332   addr-=pos+8;
 333   addr/=4;
 334   if(addr>=0x1000000 || addr<-0x1000000) {
 335     if(fail)
 336       tcc_error("FIXME: function bigger than 32MB");
 337     return 0;
 338   }
 339   return 0x0A000000|(addr&0xffffff);
 340 }
 341
 342 int decbranch(int pos)
 343 {
 344   int x;
 345   x=*(uint32_t *)(cur_text_section->data + pos);
 346   x&=0x00ffffff;
 347   if(x&0x800000)
 348     x-=0x1000000;
 349   return x*4+pos+8;
 350 }
 351
 352 /* output a symbol and patch all calls to it */
 353 void gsym_addr(int t, int a)
 354 {
 355   uint32_t *x;
 356   int lt;
 357   while(t) {
 358     x=(uint32_t *)(cur_text_section->data + t);
 359     t=decbranch(lt=t);
 360     if(a==lt+4)
 361       *x=0xE1A00000; // nop
 362     else {
 363       *x &= 0xff000000;
 364       *x |= encbranch(lt,a,1);
 365     }
 366   }
 367 }
 368
 369 void gsym(int t)
 370 {
 371   gsym_addr(t, ind);
 372 }
 373
 374 #ifdef TCC_ARM_VFP
 375 static uint32_t vfpr(int r)
 376 {
 377   if(r<TREG_F0 || r>TREG_F7)
 378     tcc_error("compiler error! register %i is no vfp register",r);
 379   return r-5;
 380 }
 381 #else
 382 static uint32_t fpr(int r)
 383 {
 384   if(r<TREG_F0 || r>TREG_F3)
 385     tcc_error("compiler error! register %i is no fpa register",r);
 386   return r-5;
 387 }
 388 #endif
 389
 390 static uint32_t intr(int r)
 391 {
 392   if(r==4)
 393     return 12;
 394   if((r<0 || r>4) && r!=14)
 395     tcc_error("compiler error! register %i is no int register",r);
 396   return r;
 397 }
 398
 399 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 400 {
 401   if(*off>maxoff || *off&((1<<shift)-1)) {
 402     uint32_t x, y;
 403     x=0xE280E000;
 404     if(*sgn)
 405       x=0xE240E000;
 406     x|=(*base)<<16;
 407     *base=14; // lr
 408     y=stuff_const(x,*off&~maxoff);
 409     if(y) {
 410       o(y);
 411       *off&=maxoff;
 412       return;
 413     }
 414     y=stuff_const(x,(*off+maxoff)&~maxoff);
 415     if(y) {
 416       o(y);
 417       *sgn=!*sgn;
 418       *off=((*off+maxoff)&~maxoff)-*off;
 419       return;
 420     }
 421     stuff_const_harder(x,*off&~maxoff);
 422     *off&=maxoff;
 423   }
 424 }
 425
 426 static uint32_t mapcc(int cc)
 427 {
 428   switch(cc)
 429   {
 430     case TOK_ULT:
 431       return 0x30000000; /* CC/LO */
 432     case TOK_UGE:
 433       return 0x20000000; /* CS/HS */
 434     case TOK_EQ:
 435       return 0x00000000; /* EQ */
 436     case TOK_NE:
 437       return 0x10000000; /* NE */
 438     case TOK_ULE:
 439       return 0x90000000; /* LS */
 440     case TOK_UGT:
 441       return 0x80000000; /* HI */
 442     case TOK_Nset:
 443       return 0x40000000; /* MI */
 444     case TOK_Nclear:
 445       return 0x50000000; /* PL */
 446     case TOK_LT:
 447       return 0xB0000000; /* LT */
 448     case TOK_GE:
 449       return 0xA0000000; /* GE */
 450     case TOK_LE:
 451       return 0xD0000000; /* LE */
 452     case TOK_GT:
 453       return 0xC0000000; /* GT */
 454   }
 455   tcc_error("unexpected condition code");
 456   return 0xE0000000; /* AL */
 457 }
 458
 459 static int negcc(int cc)
 460 {
 461   switch(cc)
 462   {
 463     case TOK_ULT:
 464       return TOK_UGE;
 465     case TOK_UGE:
 466       return TOK_ULT;
 467     case TOK_EQ:
 468       return TOK_NE;
 469     case TOK_NE:
 470       return TOK_EQ;
 471     case TOK_ULE:
 472       return TOK_UGT;
 473     case TOK_UGT:
 474       return TOK_ULE;
 475     case TOK_Nset:
 476       return TOK_Nclear;
 477     case TOK_Nclear:
 478       return TOK_Nset;
 479     case TOK_LT:
 480       return TOK_GE;
 481     case TOK_GE:
 482       return TOK_LT;
 483     case TOK_LE:
 484       return TOK_GT;
 485     case TOK_GT:
 486       return TOK_LE;
 487   }
 488   tcc_error("unexpected condition code");
 489   return TOK_NE;
 490 }
 491
 492 /* load 'r' from value 'sv' */
 493 void load(int r, SValue *sv)
 494 {
 495   int v, ft, fc, fr, sign;
 496   uint32_t op;
 497   SValue v1;
 498
 499   fr = sv->r;
 500   ft = sv->type.t;
 501   fc = sv->c.ul;
 502
 503   if(fc>=0)
 504     sign=0;
 505   else {
 506     sign=1;
 507     fc=-fc;
 508   }
 509
 510   v = fr & VT_VALMASK;
 511   if (fr & VT_LVAL) {
 512     uint32_t base = 0xB; // fp
 513     if(v == VT_LLOCAL) {
 514       v1.type.t = VT_PTR;
 515       v1.r = VT_LOCAL | VT_LVAL;
 516       v1.c.ul = sv->c.ul;
 517       load(base=14 /* lr */, &v1);
 518       fc=sign=0;
 519       v=VT_LOCAL;
 520     } else if(v == VT_CONST) {
 521       v1.type.t = VT_PTR;
 522       v1.r = fr&~VT_LVAL;
 523       v1.c.ul = sv->c.ul;
 524       v1.sym=sv->sym;
 525       load(base=14, &v1);
 526       fc=sign=0;
 527       v=VT_LOCAL;
 528     } else if(v < VT_CONST) {
 529       base=intr(v);
 530       fc=sign=0;
 531       v=VT_LOCAL;
 532     }
 533     if(v == VT_LOCAL) {
 534       if(is_float(ft)) {
 535         calcaddr(&base,&fc,&sign,1020,2);
 536 #ifdef TCC_ARM_VFP
 537         op=0xED100A00; /* flds */
 538         if(!sign)
 539           op|=0x800000;
 540         if ((ft & VT_BTYPE) != VT_FLOAT)
 541           op|=0x100;   /* flds -> fldd */
 542         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 543 #else
 544         op=0xED100100;
 545         if(!sign)
 546           op|=0x800000;
 547 #if LDOUBLE_SIZE == 8
 548         if ((ft & VT_BTYPE) != VT_FLOAT)
 549           op|=0x8000;
 550 #else
 551         if ((ft & VT_BTYPE) == VT_DOUBLE)
 552           op|=0x8000;
 553         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 554           op|=0x400000;
 555 #endif
 556         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 557 #endif
 558       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 559                 || (ft & VT_BTYPE) == VT_SHORT) {
 560         calcaddr(&base,&fc,&sign,255,0);
 561         op=0xE1500090;
 562         if ((ft & VT_BTYPE) == VT_SHORT)
 563           op|=0x20;
 564         if ((ft & VT_UNSIGNED) == 0)
 565           op|=0x40;
 566         if(!sign)
 567           op|=0x800000;
 568         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 569       } else {
 570         calcaddr(&base,&fc,&sign,4095,0);
 571         op=0xE5100000;
 572         if(!sign)
 573           op|=0x800000;
 574         if ((ft & VT_BTYPE) == VT_BYTE)
 575           op|=0x400000;
 576         o(op|(intr(r)<<12)|fc|(base<<16));
 577       }
 578       return;
 579     }
 580   } else {
 581     if (v == VT_CONST) {
 582       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
 583       if (fr & VT_SYM || !op) {
 584         o(0xE59F0000|(intr(r)<<12));
 585         o(0xEA000000);
 586         if(fr & VT_SYM)
 587           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 588         o(sv->c.ul);
 589       } else
 590         o(op);
 591       return;
 592     } else if (v == VT_LOCAL) {
 593       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
 594       if (fr & VT_SYM || !op) {
 595         o(0xE59F0000|(intr(r)<<12));
 596         o(0xEA000000);
 597         if(fr & VT_SYM) // needed ?
 598           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 599         o(sv->c.ul);
 600         o(0xE08B0000|(intr(r)<<12)|intr(r));
 601       } else
 602         o(op);
 603       return;
 604     } else if(v == VT_CMP) {
 605       o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
 606       o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
 607       return;
 608     } else if (v == VT_JMP || v == VT_JMPI) {
 609       int t;
 610       t = v & 1;
 611       o(0xE3A00000|(intr(r)<<12)|t);
 612       o(0xEA000000);
 613       gsym(sv->c.ul);
 614       o(0xE3A00000|(intr(r)<<12)|(t^1));
 615       return;
 616     } else if (v < VT_CONST) {
 617       if(is_float(ft))
 618 #ifdef TCC_ARM_VFP
 619         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 620 #else
 621         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 622 #endif
 623       else
 624         o(0xE1A00000|(intr(r)<<12)|intr(v));
 625       return;
 626     }
 627   }
 628   tcc_error("load unimplemented!");
 629 }
 630
 631 /* store register 'r' in lvalue 'v' */
 632 void store(int r, SValue *sv)
 633 {
 634   SValue v1;
 635   int v, ft, fc, fr, sign;
 636   uint32_t op;
 637
 638   fr = sv->r;
 639   ft = sv->type.t;
 640   fc = sv->c.ul;
 641
 642   if(fc>=0)
 643     sign=0;
 644   else {
 645     sign=1;
 646     fc=-fc;
 647   }
 648
 649   v = fr & VT_VALMASK;
 650   if (fr & VT_LVAL || fr == VT_LOCAL) {
 651     uint32_t base = 0xb;
 652     if(v < VT_CONST) {
 653       base=intr(v);
 654       v=VT_LOCAL;
 655       fc=sign=0;
 656     } else if(v == VT_CONST) {
 657       v1.type.t = ft;
 658       v1.r = fr&~VT_LVAL;
 659       v1.c.ul = sv->c.ul;
 660       v1.sym=sv->sym;
 661       load(base=14, &v1);
 662       fc=sign=0;
 663       v=VT_LOCAL;
 664     }
 665     if(v == VT_LOCAL) {
 666        if(is_float(ft)) {
 667         calcaddr(&base,&fc,&sign,1020,2);
 668 #ifdef TCC_ARM_VFP
 669         op=0xED000A00; /* fsts */
 670         if(!sign)
 671           op|=0x800000;
 672         if ((ft & VT_BTYPE) != VT_FLOAT)
 673           op|=0x100;   /* fsts -> fstd */
 674         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 675 #else
 676         op=0xED000100;
 677         if(!sign)
 678           op|=0x800000;
 679 #if LDOUBLE_SIZE == 8
 680         if ((ft & VT_BTYPE) != VT_FLOAT)
 681           op|=0x8000;
 682 #else
 683         if ((ft & VT_BTYPE) == VT_DOUBLE)
 684           op|=0x8000;
 685         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 686           op|=0x400000;
 687 #endif
 688         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 689 #endif
 690         return;
 691       } else if((ft & VT_BTYPE) == VT_SHORT) {
 692         calcaddr(&base,&fc,&sign,255,0);
 693         op=0xE14000B0;
 694         if(!sign)
 695           op|=0x800000;
 696         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 697       } else {
 698         calcaddr(&base,&fc,&sign,4095,0);
 699         op=0xE5000000;
 700         if(!sign)
 701           op|=0x800000;
 702         if ((ft & VT_BTYPE) == VT_BYTE)
 703           op|=0x400000;
 704         o(op|(intr(r)<<12)|fc|(base<<16));
 705       }
 706       return;
 707     }
 708   }
 709   tcc_error("store unimplemented");
 710 }
 711
 712 static void gadd_sp(int val)
 713 {
 714   stuff_const_harder(0xE28DD000,val);
 715 }
 716
 717 /* 'is_jmp' is '1' if it is a jump */
 718 static void gcall_or_jmp(int is_jmp)
 719 {
 720   int r;
 721   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 722     uint32_t x;
 723     /* constant case */
 724     x=encbranch(ind,ind+vtop->c.ul,0);
 725     if(x) {
 726       if (vtop->r & VT_SYM) {
 727         /* relocation case */
 728         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 729       } else
 730         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 731       o(x|(is_jmp?0xE0000000:0xE1000000));
 732     } else {
 733       if(!is_jmp)
 734         o(0xE28FE004); // add lr,pc,#4
 735       o(0xE51FF004);   // ldr pc,[pc,#-4]
 736       if (vtop->r & VT_SYM)
 737         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 738       o(vtop->c.ul);
 739     }
 740   } else {
 741     /* otherwise, indirect call */
 742     r = gv(RC_INT);
 743     if(!is_jmp)
 744       o(0xE1A0E00F);       // mov lr,pc
 745     o(0xE1A0F000|intr(r)); // mov pc,r
 746   }
 747 }
 748
 749 #ifdef TCC_ARM_HARDFLOAT
 750 static int is_float_hgen_aggr(CType *type)
 751 {
 752   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 753     struct Sym *ref;
 754     int btype, nb_fields = 0;
 755
 756     ref = type->ref;
 757     btype = ref->type.t & VT_BTYPE;
 758     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 759       for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 760       return !ref && nb_fields <= 4;
 761     }
 762   }
 763   return 0;
 764 }
 765
 766 struct avail_regs {
 767   /* worst case: f(float, double, 3 float struct, double, 3 float struct, double) */
 768   signed char avail[3];
 769   int first_hole;
 770   int last_hole;
 771   int first_free_reg;
 772 };
 773
 774 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 775
 776 /* Assign a register for a CPRC param with correct size and alignment
 777  * size and align are in bytes, as returned by type_size */
 778 int assign_fpreg(struct avail_regs *avregs, int align, int size)
 779 {
 780   int first_reg = 0;
 781
 782   if (avregs->first_free_reg == -1)
 783     return -1;
 784   if (align >> 3) { // alignment needed (base type: double)
 785     first_reg = avregs->first_free_reg;
 786     if (first_reg & 1)
 787       avregs->avail[avregs->last_hole++] = first_reg++;
 788   } else {
 789     if (size == 4 && avregs->first_hole != avregs->last_hole)
 790       return avregs->avail[avregs->first_hole++];
 791     else
 792       first_reg = avregs->first_free_reg;
 793   }
 794   if (first_reg + size / 4 <= 16) {
 795     avregs->first_free_reg = first_reg + size / 4;
 796     return first_reg;
 797   }
 798   avregs->first_free_reg = -1;
 799   return -1;
 800 }
 801 #endif
 802
 803 /* Return 1 if this function returns via an sret pointer, 0 otherwise */
 804 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *align) {
 805     size = type_size(vt, &align);
 806     if (size > 4) {
 807         return 1;
 808     } else {
 809         *align = 4;
 810         ret->ref = NULL;
 811         ret->t = VT_INT;
 812     }
 813     return 0;
 814 }
 815
 816 /* Generate function call. The function address is pushed first, then
 817    all the parameters in call order. This functions pops all the
 818    parameters and the function address. */
 819 void gfunc_call(int nb_args)
 820 {
 821   int size, align, r, args_size, i, ncrn, ncprn, argno, vfp_argno;
 822   signed char plan[4][2]={{-1,-1},{-1,-1},{-1,-1},{-1,-1}};
 823   SValue *before_stack = NULL; /* SValue before first on stack argument */
 824   SValue *before_vfpreg_hfa = NULL; /* SValue before first in VFP reg hfa argument */
 825 #ifdef TCC_ARM_HARDFLOAT
 826   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 827   signed char vfp_plan[16];
 828   int plan2[4+16];
 829   int variadic;
 830 #else
 831   int plan2[4]={0,0,0,0};
 832 #endif
 833   int vfp_todo=0;
 834   int todo=0, keep;
 835
 836 #ifdef TCC_ARM_HARDFLOAT
 837   memset(vfp_plan, -1, sizeof(vfp_plan));
 838   memset(plan2, 0, sizeof(plan2));
 839   variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
 840 #endif
 841   r = vtop->r & VT_VALMASK;
 842   if (r == VT_CMP || (r & ~1) == VT_JMP)
 843     gv(RC_INT);
 844 #ifdef TCC_ARM_EABI
 845   if((vtop[-nb_args].type.ref->type.t & VT_BTYPE) == VT_STRUCT
 846      && type_size(&vtop[-nb_args].type.ref->type, &align) <= 4) {
 847     SValue tmp;
 848     tmp=vtop[-nb_args];
 849     vtop[-nb_args]=vtop[-nb_args+1];
 850     vtop[-nb_args+1]=tmp;
 851     --nb_args;
 852   }
 853
 854   vpushi(0), nb_args++;
 855   vtop->type.t = VT_LLONG;
 856 #endif
 857   ncrn = ncprn = argno = vfp_argno = args_size = 0;
 858   /* Assign argument to registers and stack with alignment.
 859      If, considering alignment constraints, enough registers of the correct type
 860      (core or VFP) are free for the current argument, assign them to it, else
 861      allocate on stack with correct alignment. Whenever a structure is allocated
 862      in registers or on stack, it is always put on the stack at this stage. The
 863      stack is divided in 3 zones. The zone are, from low addresses to high
 864      addresses: structures to be loaded in core registers, structures to be
 865      loaded in VFP registers, argument allocated to stack. SValue's representing
 866      structures in the first zone are moved just after the SValue pointed by
 867      before_vfpreg_hfa. SValue's representing structures in the second zone are
 868      moved just after the SValue pointer by before_stack. */
 869   for(i = nb_args; i-- ;) {
 870     int j, assigned_vfpreg = 0;
 871     size = type_size(&vtop[-i].type, &align);
 872     switch(vtop[-i].type.t & VT_BTYPE) {
 873       case VT_STRUCT:
 874       case VT_FLOAT:
 875       case VT_DOUBLE:
 876       case VT_LDOUBLE:
 877 #ifdef TCC_ARM_HARDFLOAT
 878       if (!variadic) {
 879         int hfa = 0; /* Homogeneous float aggregate */
 880
 881         if (is_float(vtop[-i].type.t)
 882             || (hfa = is_float_hgen_aggr(&vtop[-i].type))) {
 883           int end_reg;
 884
 885           assigned_vfpreg = assign_fpreg(&avregs, align, size);
 886           end_reg = assigned_vfpreg + (size - 1) / 4;
 887           if (assigned_vfpreg >= 0) {
 888             vfp_plan[vfp_argno++]=TREG_F0 + assigned_vfpreg/2;
 889             if (hfa) {
 890               /* before_stack can only have been set because all core registers
 891                  are assigned, so no need to care about before_vfpreg_hfa if
 892                  before_stack is set */
 893               if (before_stack) {
 894                 vrote(&vtop[-i], &vtop[-i] - before_stack);
 895                 before_stack++;
 896               } else if (!before_vfpreg_hfa)
 897                 before_vfpreg_hfa = &vtop[-i-1];
 898               for (j = assigned_vfpreg; j <= end_reg; j++)
 899                 vfp_todo|=(1<<j);
 900             }
 901             continue;
 902           } else {
 903             if (!hfa)
 904               vfp_argno++;
 905             /* No need to update before_stack as no more hfa can be allocated in
 906                VFP regs */
 907             if (!before_vfpreg_hfa)
 908               before_vfpreg_hfa = &vtop[-i-1];
 909             break;
 910           }
 911         }
 912       }
 913 #endif
 914       ncrn = (ncrn + (align-1)/4) & -(align/4);
 915       size = (size + 3) & -4;
 916       if (ncrn + size/4 <= 4 || (ncrn < 4 && assigned_vfpreg != -1)) {
 917         /* Either there is HFA in VFP registers, or there is arguments on stack,
 918            it cannot be both. Hence either before_stack already points after
 919            the slot where the vtop[-i] SValue is moved, or before_stack will not
 920            be used */
 921         if (before_vfpreg_hfa) {
 922           vrote(&vtop[-i], &vtop[-i] - before_vfpreg_hfa);
 923           before_vfpreg_hfa++;
 924         }
 925         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
 926           todo|=(1<<j);
 927         ncrn+=size/4;
 928         if (ncrn > 4) {
 929           args_size = (ncrn - 4) * 4;
 930           if (!before_stack)
 931             before_stack = &vtop[-i-1];
 932         }
 933       }
 934       else {
 935         ncrn = 4;
 936         /* No need to set before_vfpreg_hfa if not set since there will no
 937            longer be any structure assigned to core registers */
 938         if (!before_stack)
 939           before_stack = &vtop[-i-1];
 940         break;
 941       }
 942       continue;
 943       default:
 944 #ifdef TCC_ARM_EABI
 945       if (!i) {
 946         break;
 947       }
 948 #endif
 949       if (ncrn < 4) {
 950         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
 951
 952         if (is_long) {
 953           ncrn = (ncrn + 1) & -2;
 954           if (ncrn == 4) {
 955             argno++;
 956             break;
 957           }
 958         }
 959         plan[argno++][0]=ncrn++;
 960         if (is_long) {
 961           plan[argno-1][1]=ncrn++;
 962         }
 963         continue;
 964       }
 965       argno++;
 966     }
 967 #ifdef TCC_ARM_EABI
 968     if(args_size & (align-1)) {
 969       vpushi(0);
 970       vtop->type.t = VT_VOID; /* padding */
 971       vrott(i+2);
 972       args_size += 4;
 973       nb_args++;
 974       argno++;
 975     }
 976 #endif
 977     args_size += (size + 3) & -4;
 978   }
 979 #ifdef TCC_ARM_EABI
 980   vtop--, nb_args--;
 981 #endif
 982   args_size = keep = 0;
 983   for(i = 0;i < nb_args; i++) {
 984     vrotb(keep+1);
 985     if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 986       size = type_size(&vtop->type, &align);
 987       /* align to stack align size */
 988       size = (size + 3) & -4;
 989       /* allocate the necessary size on stack */
 990       gadd_sp(-size);
 991       /* generate structure store */
 992       r = get_reg(RC_INT);
 993       o(0xE1A0000D|(intr(r)<<12));
 994       vset(&vtop->type, r | VT_LVAL, 0);
 995       vswap();
 996       vstore();
 997       vtop--;
 998       args_size += size;
 999     } else if (is_float(vtop->type.t)) {
1000 #ifdef TCC_ARM_HARDFLOAT
1001       if (!variadic && --vfp_argno<16 && vfp_plan[vfp_argno]!=-1) {
1002         plan2[keep++]=vfp_plan[vfp_argno];
1003         continue;
1004       }
1005 #endif
1006 #ifdef TCC_ARM_VFP
1007       r=vfpr(gv(RC_FLOAT))<<12;
1008       size=4;
1009       if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1010       {
1011         size=8;
1012         r|=0x101; /* fstms -> fstmd */
1013       }
1014       o(0xED2D0A01+r);
1015 #else
1016       r=fpr(gv(RC_FLOAT))<<12;
1017       if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
1018         size = 4;
1019       else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1020         size = 8;
1021       else
1022         size = LDOUBLE_SIZE;
1023
1024       if (size == 12)
1025         r|=0x400000;
1026       else if(size == 8)
1027         r|=0x8000;
1028
1029       o(0xED2D0100|r|(size>>2));
1030 #endif
1031       vtop--;
1032       args_size += size;
1033     } else {
1034       int s;
1035       /* simple type (currently always same size) */
1036       /* XXX: implicit cast ? */
1037       size=4;
1038       if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1039         lexpand_nr();
1040         s=-1;
1041         if(--argno<4 && plan[argno][1]!=-1)
1042           s=plan[argno][1];
1043         argno++;
1044         size = 8;
1045         if(s==-1) {
1046           r = gv(RC_INT);
1047           o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
1048           vtop--;
1049         } else {
1050           size=0;
1051           plan2[keep]=s;
1052           keep++;
1053           vswap();
1054         }
1055       }
1056       s=-1;
1057       if(--argno<4 && plan[argno][0]!=-1)
1058         s=plan[argno][0];
1059 #ifdef TCC_ARM_EABI
1060       if(vtop->type.t == VT_VOID) {
1061         if(s == -1)
1062           o(0xE24DD004); /* sub sp,sp,#4 */
1063         vtop--;
1064       } else
1065 #endif
1066       if(s == -1) {
1067         r = gv(RC_INT);
1068         o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
1069         vtop--;
1070       } else {
1071         size=0;
1072         plan2[keep]=s;
1073         keep++;
1074       }
1075       args_size += size;
1076     }
1077   }
1078   for(i = 0; i < keep; i++) {
1079     vrotb(keep);
1080     gv(regmask(plan2[i]));
1081 #ifdef TCC_ARM_HARDFLOAT
1082     /* arg is in s(2d+1): plan2[i]<plan2[i+1] => alignment occured (ex f,d,f) */
1083     if (i < keep - 1 && is_float(vtop->type.t) && (plan2[i] <= plan2[i + 1])) {
1084       o(0xEEF00A40|(vfpr(plan2[i])<<12)|vfpr(plan2[i]));
1085     }
1086 #endif
1087   }
1088 save_regs(keep); /* save used temporary registers */
1089   keep++;
1090   if(ncrn) {
1091     int nb_regs=0;
1092     if (ncrn>4)
1093       ncrn=4;
1094     todo&=((1<<ncrn)-1);
1095     if(todo) {
1096       int i;
1097       o(0xE8BD0000|todo);
1098       for(i=0;i<4;i++)
1099         if(todo&(1<<i)) {
1100           vpushi(0);
1101           vtop->r=i;
1102           keep++;
1103           nb_regs++;
1104         }
1105     }
1106     args_size-=nb_regs*4;
1107   }
1108   if(vfp_todo) {
1109     int nb_fregs=0;
1110
1111     for(i=0;i<16;i++)
1112       if(vfp_todo&(1<<i)) {
1113         o(0xED9D0A00|(i&1)<<22|(i>>1)<<12|nb_fregs);
1114         vpushi(0);
1115         /* There might be 2 floats in a double VFP reg but that doesn't seem
1116            to matter */
1117         if (!(i%2))
1118           vtop->r=TREG_F0+i/2;
1119         keep++;
1120         nb_fregs++;
1121       }
1122     if (nb_fregs) {
1123       gadd_sp(nb_fregs*4);
1124       args_size-=nb_fregs*4;
1125     }
1126   }
1127   vrotb(keep);
1128   gcall_or_jmp(0);
1129   if (args_size)
1130       gadd_sp(args_size);
1131 #ifdef TCC_ARM_EABI
1132   if((vtop->type.ref->type.t & VT_BTYPE) == VT_STRUCT
1133      && type_size(&vtop->type.ref->type, &align) <= 4)
1134   {
1135     store(REG_IRET,vtop-keep);
1136     ++keep;
1137   }
1138 #ifdef TCC_ARM_VFP
1139 #ifdef TCC_ARM_HARDFLOAT
1140   else if(variadic && is_float(vtop->type.ref->type.t)) {
1141 #else
1142   else if(is_float(vtop->type.ref->type.t)) {
1143 #endif
1144     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1145       o(0xEE000A10); /* fmsr s0,r0 */
1146     } else {
1147       o(0xEE000B10); /* fmdlr d0,r0 */
1148       o(0xEE201B10); /* fmdhr d0,r1 */
1149     }
1150   }
1151 #endif
1152 #endif
1153   vtop-=keep;
1154   leaffunc = 0;
1155 }
1156
1157 /* generate function prolog of type 't' */
1158 void gfunc_prolog(CType *func_type)
1159 {
1160   Sym *sym,*sym2;
1161   int n,nf,size,align, variadic, struct_ret = 0;
1162 #ifdef TCC_ARM_HARDFLOAT
1163   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1164 #endif
1165
1166   sym = func_type->ref;
1167   func_vt = sym->type;
1168
1169   n = nf = 0;
1170   variadic = (func_type->ref->c == FUNC_ELLIPSIS);
1171   if((func_vt.t & VT_BTYPE) == VT_STRUCT
1172      && type_size(&func_vt,&align) > 4)
1173   {
1174     n++;
1175     struct_ret = 1;
1176     func_vc = 12; /* Offset from fp of the place to store the result */
1177   }
1178   for(sym2=sym->next;sym2 && (n<4 || nf<16);sym2=sym2->next) {
1179     size = type_size(&sym2->type, &align);
1180 #ifdef TCC_ARM_HARDFLOAT
1181     if (!variadic && (is_float(sym2->type.t)
1182         || is_float_hgen_aggr(&sym2->type))) {
1183       int tmpnf = assign_fpreg(&avregs, align, size) + 1;
1184       nf = (tmpnf > nf) ? tmpnf : nf;
1185     } else
1186 #endif
1187     if (n < 4)
1188       n += (size + 3) / 4;
1189   }
1190   o(0xE1A0C00D); /* mov ip,sp */
1191   if(variadic)
1192     n=4;
1193   if(n) {
1194     if(n>4)
1195       n=4;
1196 #ifdef TCC_ARM_EABI
1197     n=(n+1)&-2;
1198 #endif
1199     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1200   }
1201   if (nf) {
1202     if (nf>16)
1203       nf=16;
1204     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1205     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1206   }
1207   o(0xE92D5800); /* save fp, ip, lr */
1208   o(0xE1A0B00D); /* mov fp, sp */
1209   func_sub_sp_offset = ind;
1210   o(0xE1A00000); /* nop, leave space for stack adjustment in epilogue */
1211   {
1212     int addr, pn = struct_ret, sn = 0; /* pn=core, sn=stack */
1213
1214 #ifdef TCC_ARM_HARDFLOAT
1215     avregs = AVAIL_REGS_INITIALIZER;
1216 #endif
1217     while ((sym = sym->next)) {
1218       CType *type;
1219       type = &sym->type;
1220       size = type_size(type, &align);
1221       size = (size + 3) >> 2;
1222       align = (align + 3) & ~3;
1223 #ifdef TCC_ARM_HARDFLOAT
1224       if (!variadic && (is_float(sym->type.t)
1225           || is_float_hgen_aggr(&sym->type))) {
1226         int fpn = assign_fpreg(&avregs, align, size << 2);
1227         if (fpn >= 0) {
1228           addr = fpn * 4;
1229         } else
1230           goto from_stack;
1231       } else
1232 #endif
1233       if (pn < 4) {
1234 #ifdef TCC_ARM_EABI
1235         pn = (pn + (align-1)/4) & -(align/4);
1236 #endif
1237         addr = (nf + pn) * 4;
1238         pn += size;
1239         if (!sn && pn > 4)
1240           sn = (pn - 4);
1241       } else {
1242 #ifdef TCC_ARM_HARDFLOAT
1243 from_stack:
1244 #endif
1245 #ifdef TCC_ARM_EABI
1246         sn = (sn + (align-1)/4) & -(align/4);
1247 #endif
1248         addr = (n + nf + sn) * 4;
1249         sn += size;
1250       }
1251       sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t), addr+12);
1252     }
1253   }
1254   last_itod_magic=0;
1255   leaffunc = 1;
1256   loc = 0;
1257 }
1258
1259 /* generate function epilog */
1260 void gfunc_epilog(void)
1261 {
1262   uint32_t x;
1263   int diff;
1264 #ifdef TCC_ARM_EABI
1265   /* Useless but harmless copy of the float result into main register(s) in case
1266      of variadic function in the hardfloat variant */
1267   if(is_float(func_vt.t)) {
1268     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1269       o(0xEE100A10); /* fmrs r0, s0 */
1270     else {
1271       o(0xEE100B10); /* fmrdl r0, d0 */
1272       o(0xEE301B10); /* fmrdh r1, d0 */
1273     }
1274   }
1275 #endif
1276   o(0xE89BA800); /* restore fp, sp, pc */
1277   diff = (-loc + 3) & -4;
1278 #ifdef TCC_ARM_EABI
1279   if(!leaffunc)
1280     diff = ((diff + 11) & -8) - 4;
1281 #endif
1282   if(diff > 0) {
1283     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1284     if(x)
1285       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1286     else {
1287       int addr;
1288       addr=ind;
1289       o(0xE59FC004); /* ldr ip,[pc+4] */
1290       o(0xE04BD00C); /* sub sp,fp,ip  */
1291       o(0xE1A0F00E); /* mov pc,lr */
1292       o(diff);
1293       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1294     }
1295   }
1296 }
1297
1298 /* generate a jump to a label */
1299 int gjmp(int t)
1300 {
1301   int r;
1302   r=ind;
1303   o(0xE0000000|encbranch(r,t,1));
1304   return r;
1305 }
1306
1307 /* generate a jump to a fixed address */
1308 void gjmp_addr(int a)
1309 {
1310   gjmp(a);
1311 }
1312
1313 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1314 int gtst(int inv, int t)
1315 {
1316   int v, r;
1317   uint32_t op;
1318   v = vtop->r & VT_VALMASK;
1319   r=ind;
1320   if (v == VT_CMP) {
1321     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1322     op|=encbranch(r,t,1);
1323     o(op);
1324     t=r;
1325   } else if (v == VT_JMP || v == VT_JMPI) {
1326     if ((v & 1) == inv) {
1327       if(!vtop->c.i)
1328         vtop->c.i=t;
1329       else {
1330         uint32_t *x;
1331         int p,lp;
1332         if(t) {
1333           p = vtop->c.i;
1334           do {
1335             p = decbranch(lp=p);
1336           } while(p);
1337           x = (uint32_t *)(cur_text_section->data + lp);
1338           *x &= 0xff000000;
1339           *x |= encbranch(lp,t,1);
1340         }
1341         t = vtop->c.i;
1342       }
1343     } else {
1344       t = gjmp(t);
1345       gsym(vtop->c.i);
1346     }
1347   } else {
1348     if (is_float(vtop->type.t)) {
1349       r=gv(RC_FLOAT);
1350 #ifdef TCC_ARM_VFP
1351       o(0xEEB50A40|(vfpr(r)<<12)|T2CPR(vtop->type.t)); /* fcmpzX */
1352       o(0xEEF1FA10); /* fmstat */
1353 #else
1354       o(0xEE90F118|(fpr(r)<<16));
1355 #endif
1356       vtop->r = VT_CMP;
1357       vtop->c.i = TOK_NE;
1358       return gtst(inv, t);
1359     } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1360       /* constant jmp optimization */
1361       if ((vtop->c.i != 0) != inv)
1362         t = gjmp(t);
1363     } else {
1364       v = gv(RC_INT);
1365       o(0xE3300000|(intr(v)<<16));
1366       vtop->r = VT_CMP;
1367       vtop->c.i = TOK_NE;
1368       return gtst(inv, t);
1369     }
1370   }
1371   vtop--;
1372   return t;
1373 }
1374
1375 /* generate an integer binary operation */
1376 void gen_opi(int op)
1377 {
1378   int c, func = 0;
1379   uint32_t opc = 0, r, fr;
1380   unsigned short retreg = REG_IRET;
1381
1382   c=0;
1383   switch(op) {
1384     case '+':
1385       opc = 0x8;
1386       c=1;
1387       break;
1388     case TOK_ADDC1: /* add with carry generation */
1389       opc = 0x9;
1390       c=1;
1391       break;
1392     case '-':
1393       opc = 0x4;
1394       c=1;
1395       break;
1396     case TOK_SUBC1: /* sub with carry generation */
1397       opc = 0x5;
1398       c=1;
1399       break;
1400     case TOK_ADDC2: /* add with carry use */
1401       opc = 0xA;
1402       c=1;
1403       break;
1404     case TOK_SUBC2: /* sub with carry use */
1405       opc = 0xC;
1406       c=1;
1407       break;
1408     case '&':
1409       opc = 0x0;
1410       c=1;
1411       break;
1412     case '^':
1413       opc = 0x2;
1414       c=1;
1415       break;
1416     case '|':
1417       opc = 0x18;
1418       c=1;
1419       break;
1420     case '*':
1421       gv2(RC_INT, RC_INT);
1422       r = vtop[-1].r;
1423       fr = vtop[0].r;
1424       vtop--;
1425       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1426       return;
1427     case TOK_SHL:
1428       opc = 0;
1429       c=2;
1430       break;
1431     case TOK_SHR:
1432       opc = 1;
1433       c=2;
1434       break;
1435     case TOK_SAR:
1436       opc = 2;
1437       c=2;
1438       break;
1439     case '/':
1440     case TOK_PDIV:
1441       func=TOK___divsi3;
1442       c=3;
1443       break;
1444     case TOK_UDIV:
1445       func=TOK___udivsi3;
1446       c=3;
1447       break;
1448     case '%':
1449 #ifdef TCC_ARM_EABI
1450       func=TOK___aeabi_idivmod;
1451       retreg=REG_LRET;
1452 #else
1453       func=TOK___modsi3;
1454 #endif
1455       c=3;
1456       break;
1457     case TOK_UMOD:
1458 #ifdef TCC_ARM_EABI
1459       func=TOK___aeabi_uidivmod;
1460       retreg=REG_LRET;
1461 #else
1462       func=TOK___umodsi3;
1463 #endif
1464       c=3;
1465       break;
1466     case TOK_UMULL:
1467       gv2(RC_INT, RC_INT);
1468       r=intr(vtop[-1].r2=get_reg(RC_INT));
1469       c=vtop[-1].r;
1470       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1471       vtop--;
1472       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1473       return;
1474     default:
1475       opc = 0x15;
1476       c=1;
1477       break;
1478   }
1479   switch(c) {
1480     case 1:
1481       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1482         if(opc == 4 || opc == 5 || opc == 0xc) {
1483           vswap();
1484           opc|=2; // sub -> rsb
1485         }
1486       }
1487       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1488           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1489         gv(RC_INT);
1490       vswap();
1491       c=intr(gv(RC_INT));
1492       vswap();
1493       opc=0xE0000000|(opc<<20)|(c<<16);
1494       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1495         uint32_t x;
1496         x=stuff_const(opc|0x2000000,vtop->c.i);
1497         if(x) {
1498           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1499           o(x|(r<<12));
1500           goto done;
1501         }
1502       }
1503       fr=intr(gv(RC_INT));
1504       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1505       o(opc|(r<<12)|fr);
1506 done:
1507       vtop--;
1508       if (op >= TOK_ULT && op <= TOK_GT) {
1509         vtop->r = VT_CMP;
1510         vtop->c.i = op;
1511       }
1512       break;
1513     case 2:
1514       opc=0xE1A00000|(opc<<5);
1515       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1516           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1517         gv(RC_INT);
1518       vswap();
1519       r=intr(gv(RC_INT));
1520       vswap();
1521       opc|=r;
1522       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1523         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1524         c = vtop->c.i & 0x1f;
1525         o(opc|(c<<7)|(fr<<12));
1526       } else {
1527         fr=intr(gv(RC_INT));
1528         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1529         o(opc|(c<<12)|(fr<<8)|0x10);
1530       }
1531       vtop--;
1532       break;
1533     case 3:
1534       vpush_global_sym(&func_old_type, func);
1535       vrott(3);
1536       gfunc_call(2);
1537       vpushi(0);
1538       vtop->r = retreg;
1539       break;
1540     default:
1541       tcc_error("gen_opi %i unimplemented!",op);
1542   }
1543 }
1544
1545 #ifdef TCC_ARM_VFP
1546 static int is_zero(int i)
1547 {
1548   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1549     return 0;
1550   if (vtop[i].type.t == VT_FLOAT)
1551     return (vtop[i].c.f == 0.f);
1552   else if (vtop[i].type.t == VT_DOUBLE)
1553     return (vtop[i].c.d == 0.0);
1554   return (vtop[i].c.ld == 0.l);
1555 }
1556
1557 /* generate a floating point operation 'v = t1 op t2' instruction. The
1558  *    two operands are guaranted to have the same floating point type */
1559 void gen_opf(int op)
1560 {
1561   uint32_t x;
1562   int fneg=0,r;
1563   x=0xEE000A00|T2CPR(vtop->type.t);
1564   switch(op) {
1565     case '+':
1566       if(is_zero(-1))
1567         vswap();
1568       if(is_zero(0)) {
1569         vtop--;
1570         return;
1571       }
1572       x|=0x300000;
1573       break;
1574     case '-':
1575       x|=0x300040;
1576       if(is_zero(0)) {
1577         vtop--;
1578         return;
1579       }
1580       if(is_zero(-1)) {
1581         x|=0x810000; /* fsubX -> fnegX */
1582         vswap();
1583         vtop--;
1584         fneg=1;
1585       }
1586       break;
1587     case '*':
1588       x|=0x200000;
1589       break;
1590     case '/':
1591       x|=0x800000;
1592       break;
1593     default:
1594       if(op < TOK_ULT || op > TOK_GT) {
1595         tcc_error("unknown fp op %x!",op);
1596         return;
1597       }
1598       if(is_zero(-1)) {
1599         vswap();
1600         switch(op) {
1601           case TOK_LT: op=TOK_GT; break;
1602           case TOK_GE: op=TOK_ULE; break;
1603           case TOK_LE: op=TOK_GE; break;
1604           case TOK_GT: op=TOK_ULT; break;
1605         }
1606       }
1607       x|=0xB40040; /* fcmpX */
1608       if(op!=TOK_EQ && op!=TOK_NE)
1609         x|=0x80; /* fcmpX -> fcmpeX */
1610       if(is_zero(0)) {
1611         vtop--;
1612         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1613       } else {
1614         x|=vfpr(gv(RC_FLOAT));
1615         vswap();
1616         o(x|(vfpr(gv(RC_FLOAT))<<12));
1617         vtop--;
1618       }
1619       o(0xEEF1FA10); /* fmstat */
1620
1621       switch(op) {
1622         case TOK_LE: op=TOK_ULE; break;
1623         case TOK_LT: op=TOK_ULT; break;
1624         case TOK_UGE: op=TOK_GE; break;
1625         case TOK_UGT: op=TOK_GT; break;
1626       }
1627
1628       vtop->r = VT_CMP;
1629       vtop->c.i = op;
1630       return;
1631   }
1632   r=gv(RC_FLOAT);
1633   x|=vfpr(r);
1634   r=regmask(r);
1635   if(!fneg) {
1636     int r2;
1637     vswap();
1638     r2=gv(RC_FLOAT);
1639     x|=vfpr(r2)<<16;
1640     r|=regmask(r2);
1641   }
1642   vtop->r=get_reg_ex(RC_FLOAT,r);
1643   if(!fneg)
1644     vtop--;
1645   o(x|(vfpr(vtop->r)<<12));
1646 }
1647
1648 #else
1649 static uint32_t is_fconst()
1650 {
1651   long double f;
1652   uint32_t r;
1653   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1654     return 0;
1655   if (vtop->type.t == VT_FLOAT)
1656     f = vtop->c.f;
1657   else if (vtop->type.t == VT_DOUBLE)
1658     f = vtop->c.d;
1659   else
1660     f = vtop->c.ld;
1661   if(!ieee_finite(f))
1662     return 0;
1663   r=0x8;
1664   if(f<0.0) {
1665     r=0x18;
1666     f=-f;
1667   }
1668   if(f==0.0)
1669     return r;
1670   if(f==1.0)
1671     return r|1;
1672   if(f==2.0)
1673     return r|2;
1674   if(f==3.0)
1675     return r|3;
1676   if(f==4.0)
1677     return r|4;
1678   if(f==5.0)
1679     return r|5;
1680   if(f==0.5)
1681     return r|6;
1682   if(f==10.0)
1683     return r|7;
1684   return 0;
1685 }
1686
1687 /* generate a floating point operation 'v = t1 op t2' instruction. The
1688    two operands are guaranted to have the same floating point type */
1689 void gen_opf(int op)
1690 {
1691   uint32_t x, r, r2, c1, c2;
1692   //fputs("gen_opf\n",stderr);
1693   vswap();
1694   c1 = is_fconst();
1695   vswap();
1696   c2 = is_fconst();
1697   x=0xEE000100;
1698 #if LDOUBLE_SIZE == 8
1699   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1700     x|=0x80;
1701 #else
1702   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1703     x|=0x80;
1704   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1705     x|=0x80000;
1706 #endif
1707   switch(op)
1708   {
1709     case '+':
1710       if(!c2) {
1711         vswap();
1712         c2=c1;
1713       }
1714       vswap();
1715       r=fpr(gv(RC_FLOAT));
1716       vswap();
1717       if(c2) {
1718         if(c2>0xf)
1719           x|=0x200000; // suf
1720         r2=c2&0xf;
1721       } else {
1722         r2=fpr(gv(RC_FLOAT));
1723       }
1724       break;
1725     case '-':
1726       if(c2) {
1727         if(c2<=0xf)
1728           x|=0x200000; // suf
1729         r2=c2&0xf;
1730         vswap();
1731         r=fpr(gv(RC_FLOAT));
1732         vswap();
1733       } else if(c1 && c1<=0xf) {
1734         x|=0x300000; // rsf
1735         r2=c1;
1736         r=fpr(gv(RC_FLOAT));
1737         vswap();
1738       } else {
1739         x|=0x200000; // suf
1740         vswap();
1741         r=fpr(gv(RC_FLOAT));
1742         vswap();
1743         r2=fpr(gv(RC_FLOAT));
1744       }
1745       break;
1746     case '*':
1747       if(!c2 || c2>0xf) {
1748         vswap();
1749         c2=c1;
1750       }
1751       vswap();
1752       r=fpr(gv(RC_FLOAT));
1753       vswap();
1754       if(c2 && c2<=0xf)
1755         r2=c2;
1756       else
1757         r2=fpr(gv(RC_FLOAT));
1758       x|=0x100000; // muf
1759       break;
1760     case '/':
1761       if(c2 && c2<=0xf) {
1762         x|=0x400000; // dvf
1763         r2=c2;
1764         vswap();
1765         r=fpr(gv(RC_FLOAT));
1766         vswap();
1767       } else if(c1 && c1<=0xf) {
1768         x|=0x500000; // rdf
1769         r2=c1;
1770         r=fpr(gv(RC_FLOAT));
1771         vswap();
1772       } else {
1773         x|=0x400000; // dvf
1774         vswap();
1775         r=fpr(gv(RC_FLOAT));
1776         vswap();
1777         r2=fpr(gv(RC_FLOAT));
1778       }
1779       break;
1780     default:
1781       if(op >= TOK_ULT && op <= TOK_GT) {
1782         x|=0xd0f110; // cmfe
1783 /* bug (intention?) in Linux FPU emulator
1784    doesn't set carry if equal */
1785         switch(op) {
1786           case TOK_ULT:
1787           case TOK_UGE:
1788           case TOK_ULE:
1789           case TOK_UGT:
1790             tcc_error("unsigned comparision on floats?");
1791             break;
1792           case TOK_LT:
1793             op=TOK_Nset;
1794             break;
1795           case TOK_LE:
1796             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1797             break;
1798           case TOK_EQ:
1799           case TOK_NE:
1800             x&=~0x400000; // cmfe -> cmf
1801             break;
1802         }
1803         if(c1 && !c2) {
1804           c2=c1;
1805           vswap();
1806           switch(op) {
1807             case TOK_Nset:
1808               op=TOK_GT;
1809               break;
1810             case TOK_GE:
1811               op=TOK_ULE;
1812               break;
1813             case TOK_ULE:
1814               op=TOK_GE;
1815               break;
1816             case TOK_GT:
1817               op=TOK_Nset;
1818               break;
1819           }
1820         }
1821         vswap();
1822         r=fpr(gv(RC_FLOAT));
1823         vswap();
1824         if(c2) {
1825           if(c2>0xf)
1826             x|=0x200000;
1827           r2=c2&0xf;
1828         } else {
1829           r2=fpr(gv(RC_FLOAT));
1830         }
1831         vtop[-1].r = VT_CMP;
1832         vtop[-1].c.i = op;
1833       } else {
1834         tcc_error("unknown fp op %x!",op);
1835         return;
1836       }
1837   }
1838   if(vtop[-1].r == VT_CMP)
1839     c1=15;
1840   else {
1841     c1=vtop->r;
1842     if(r2&0x8)
1843       c1=vtop[-1].r;
1844     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1845     c1=fpr(vtop[-1].r);
1846   }
1847   vtop--;
1848   o(x|(r<<16)|(c1<<12)|r2);
1849 }
1850 #endif
1851
1852 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1853    and 'long long' cases. */
1854 ST_FUNC void gen_cvt_itof1(int t)
1855 {
1856   uint32_t r, r2;
1857   int bt;
1858   bt=vtop->type.t & VT_BTYPE;
1859   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1860 #ifndef TCC_ARM_VFP
1861     uint32_t dsize = 0;
1862 #endif
1863     r=intr(gv(RC_INT));
1864 #ifdef TCC_ARM_VFP
1865     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1866     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1867     r2|=r2<<12;
1868     if(!(vtop->type.t & VT_UNSIGNED))
1869       r2|=0x80;                /* fuitoX -> fsituX */
1870     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1871 #else
1872     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1873     if((t & VT_BTYPE) != VT_FLOAT)
1874       dsize=0x80;    /* flts -> fltd */
1875     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1876     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1877       uint32_t off = 0;
1878       o(0xE3500000|(r<<12));        /* cmp */
1879       r=fpr(get_reg(RC_FLOAT));
1880       if(last_itod_magic) {
1881         off=ind+8-last_itod_magic;
1882         off/=4;
1883         if(off>255)
1884           off=0;
1885       }
1886       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1887       if(!off) {
1888         o(0xEA000000);              /* b */
1889         last_itod_magic=ind;
1890         o(0x4F800000);              /* 4294967296.0f */
1891       }
1892       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1893     }
1894 #endif
1895     return;
1896   } else if(bt == VT_LLONG) {
1897     int func;
1898     CType *func_type = 0;
1899     if((t & VT_BTYPE) == VT_FLOAT) {
1900       func_type = &func_float_type;
1901       if(vtop->type.t & VT_UNSIGNED)
1902         func=TOK___floatundisf;
1903       else
1904         func=TOK___floatdisf;
1905 #if LDOUBLE_SIZE != 8
1906     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1907       func_type = &func_ldouble_type;
1908       if(vtop->type.t & VT_UNSIGNED)
1909         func=TOK___floatundixf;
1910       else
1911         func=TOK___floatdixf;
1912     } else if((t & VT_BTYPE) == VT_DOUBLE) {
1913 #else
1914     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1915 #endif
1916       func_type = &func_double_type;
1917       if(vtop->type.t & VT_UNSIGNED)
1918         func=TOK___floatundidf;
1919       else
1920         func=TOK___floatdidf;
1921     }
1922     if(func_type) {
1923       vpush_global_sym(func_type, func);
1924       vswap();
1925       gfunc_call(1);
1926       vpushi(0);
1927       vtop->r=TREG_F0;
1928       return;
1929     }
1930   }
1931   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1932 }
1933
1934 /* convert fp to int 't' type */
1935 void gen_cvt_ftoi(int t)
1936 {
1937   uint32_t r, r2;
1938   int u, func = 0;
1939   u=t&VT_UNSIGNED;
1940   t&=VT_BTYPE;
1941   r2=vtop->type.t & VT_BTYPE;
1942   if(t==VT_INT) {
1943 #ifdef TCC_ARM_VFP
1944     r=vfpr(gv(RC_FLOAT));
1945     u=u?0:0x10000;
1946     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
1947     r2=intr(vtop->r=get_reg(RC_INT));
1948     o(0xEE100A10|(r<<16)|(r2<<12));
1949     return;
1950 #else
1951     if(u) {
1952       if(r2 == VT_FLOAT)
1953         func=TOK___fixunssfsi;
1954 #if LDOUBLE_SIZE != 8
1955       else if(r2 == VT_LDOUBLE)
1956         func=TOK___fixunsxfsi;
1957       else if(r2 == VT_DOUBLE)
1958 #else
1959       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1960 #endif
1961         func=TOK___fixunsdfsi;
1962     } else {
1963       r=fpr(gv(RC_FLOAT));
1964       r2=intr(vtop->r=get_reg(RC_INT));
1965       o(0xEE100170|(r2<<12)|r);
1966       return;
1967     }
1968 #endif
1969   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
1970     if(r2 == VT_FLOAT)
1971       func=TOK___fixsfdi;
1972 #if LDOUBLE_SIZE != 8
1973     else if(r2 == VT_LDOUBLE)
1974       func=TOK___fixxfdi;
1975     else if(r2 == VT_DOUBLE)
1976 #else
1977     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1978 #endif
1979       func=TOK___fixdfdi;
1980   }
1981   if(func) {
1982     vpush_global_sym(&func_old_type, func);
1983     vswap();
1984     gfunc_call(1);
1985     vpushi(0);
1986     if(t == VT_LLONG)
1987       vtop->r2 = REG_LRET;
1988     vtop->r = REG_IRET;
1989     return;
1990   }
1991   tcc_error("unimplemented gen_cvt_ftoi!");
1992 }
1993
1994 /* convert from one floating point type to another */
1995 void gen_cvt_ftof(int t)
1996 {
1997 #ifdef TCC_ARM_VFP
1998   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
1999     uint32_t r = vfpr(gv(RC_FLOAT));
2000     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2001   }
2002 #else
2003   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2004   gv(RC_FLOAT);
2005 #endif
2006 }
2007
2008 /* computed goto support */
2009 void ggoto(void)
2010 {
2011   gcall_or_jmp(1);
2012   vtop--;
2013 }
2014
2015 /* end of ARM code generator */
2016 /*************************************************************/
2017 #endif
2018 /*************************************************************/