arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #ifdef TCC_ARM_EABI
  27 #ifndef TCC_ARM_VFP // Avoid useless warning
  28 #define TCC_ARM_VFP
  29 #endif
  30 #endif
  31
  32 /* number of available registers */
  33 #ifdef TCC_ARM_VFP
  34 #define NB_REGS            13
  35 #else
  36 #define NB_REGS             9
  37 #endif
  38
  39 #ifndef TCC_ARM_VERSION
  40 # define TCC_ARM_VERSION 5
  41 #endif
  42
  43 /* a register can belong to several classes. The classes must be
  44    sorted from more general to more precise (see gv2() code which does
  45    assumptions on it). */
  46 #define RC_INT     0x0001 /* generic integer register */
  47 #define RC_FLOAT   0x0002 /* generic float register */
  48 #define RC_R0      0x0004
  49 #define RC_R1      0x0008
  50 #define RC_R2      0x0010
  51 #define RC_R3      0x0020
  52 #define RC_R12     0x0040
  53 #define RC_F0      0x0080
  54 #define RC_F1      0x0100
  55 #define RC_F2      0x0200
  56 #define RC_F3      0x0400
  57 #ifdef TCC_ARM_VFP
  58 #define RC_F4      0x0800
  59 #define RC_F5      0x1000
  60 #define RC_F6      0x2000
  61 #define RC_F7      0x4000
  62 #endif
  63 #define RC_IRET    RC_R0  /* function return: integer register */
  64 #define RC_LRET    RC_R1  /* function return: second integer register */
  65 #define RC_FRET    RC_F0  /* function return: float register */
  66
  67 /* pretty names for the registers */
  68 enum {
  69     TREG_R0 = 0,
  70     TREG_R1,
  71     TREG_R2,
  72     TREG_R3,
  73     TREG_R12,
  74     TREG_F0,
  75     TREG_F1,
  76     TREG_F2,
  77     TREG_F3,
  78 #ifdef TCC_ARM_VFP
  79     TREG_F4,
  80     TREG_F5,
  81     TREG_F6,
  82     TREG_F7,
  83 #endif
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 //#define FUNC_STRUCT_PARAM_AS_PTR
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 /* ELF defines */
 134
 135 #define EM_TCC_TARGET EM_ARM
 136
 137 /* relocation type for 32 bit data relocation */
 138 #define R_DATA_32   R_ARM_ABS32
 139 #define R_DATA_PTR  R_ARM_ABS32
 140 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 141 #define R_COPY      R_ARM_COPY
 142
 143 #define ELF_START_ADDR 0x00008000
 144 #define ELF_PAGE_SIZE  0x1000
 145
 146 /******************************************************/
 147 #else /* ! TARGET_DEFS_ONLY */
 148 /******************************************************/
 149 #include "tcc.h"
 150
 151 ST_DATA const int reg_classes[NB_REGS] = {
 152     /* r0 */ RC_INT | RC_R0,
 153     /* r1 */ RC_INT | RC_R1,
 154     /* r2 */ RC_INT | RC_R2,
 155     /* r3 */ RC_INT | RC_R3,
 156     /* r12 */ RC_INT | RC_R12,
 157     /* f0 */ RC_FLOAT | RC_F0,
 158     /* f1 */ RC_FLOAT | RC_F1,
 159     /* f2 */ RC_FLOAT | RC_F2,
 160     /* f3 */ RC_FLOAT | RC_F3,
 161 #ifdef TCC_ARM_VFP
 162  /* d4/s8 */ RC_FLOAT | RC_F4,
 163 /* d5/s10 */ RC_FLOAT | RC_F5,
 164 /* d6/s12 */ RC_FLOAT | RC_F6,
 165 /* d7/s14 */ RC_FLOAT | RC_F7,
 166 #endif
 167 };
 168
 169 static int func_sub_sp_offset, last_itod_magic;
 170 static int leaffunc;
 171
 172 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 173 static CType float_type, double_type, func_float_type, func_double_type;
 174 ST_FUNC void arm_init_types(void)
 175 {
 176     float_type.t = VT_FLOAT;
 177     double_type.t = VT_DOUBLE;
 178     func_float_type.t = VT_FUNC;
 179     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 180     func_double_type.t = VT_FUNC;
 181     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 182 }
 183 #else
 184 #define func_float_type func_old_type
 185 #define func_double_type func_old_type
 186 #define func_ldouble_type func_old_type
 187 ST_FUNC void arm_init_types(void) {}
 188 #endif
 189
 190 static int two2mask(int a,int b) {
 191   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 192 }
 193
 194 static int regmask(int r) {
 195   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 196 }
 197
 198 /******************************************************/
 199
 200 void o(uint32_t i)
 201 {
 202   /* this is a good place to start adding big-endian support*/
 203   int ind1;
 204
 205   ind1 = ind + 4;
 206   if (!cur_text_section)
 207     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 208          "can't evaluate constant expressions outside of a function.");
 209   if (ind1 > cur_text_section->data_allocated)
 210     section_realloc(cur_text_section, ind1);
 211   cur_text_section->data[ind++] = i&255;
 212   i>>=8;
 213   cur_text_section->data[ind++] = i&255;
 214   i>>=8;
 215   cur_text_section->data[ind++] = i&255;
 216   i>>=8;
 217   cur_text_section->data[ind++] = i;
 218 }
 219
 220 static uint32_t stuff_const(uint32_t op, uint32_t c)
 221 {
 222   int try_neg=0;
 223   uint32_t nc = 0, negop = 0;
 224
 225   switch(op&0x1F00000)
 226   {
 227     case 0x800000: //add
 228     case 0x400000: //sub
 229       try_neg=1;
 230       negop=op^0xC00000;
 231       nc=-c;
 232       break;
 233     case 0x1A00000: //mov
 234     case 0x1E00000: //mvn
 235       try_neg=1;
 236       negop=op^0x400000;
 237       nc=~c;
 238       break;
 239     case 0x200000: //xor
 240       if(c==~0)
 241         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 242       break;
 243     case 0x0: //and
 244       if(c==~0)
 245         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 246     case 0x1C00000: //bic
 247       try_neg=1;
 248       negop=op^0x1C00000;
 249       nc=~c;
 250       break;
 251     case 0x1800000: //orr
 252       if(c==~0)
 253         return (op&0xFFF0FFFF)|0x1E00000;
 254       break;
 255   }
 256   do {
 257     uint32_t m;
 258     int i;
 259     if(c<256) /* catch undefined <<32 */
 260       return op|c;
 261     for(i=2;i<32;i+=2) {
 262       m=(0xff>>i)|(0xff<<(32-i));
 263       if(!(c&~m))
 264         return op|(i<<7)|(c<<i)|(c>>(32-i));
 265     }
 266     op=negop;
 267     c=nc;
 268   } while(try_neg--);
 269   return 0;
 270 }
 271
 272
 273 //only add,sub
 274 void stuff_const_harder(uint32_t op, uint32_t v) {
 275   uint32_t x;
 276   x=stuff_const(op,v);
 277   if(x)
 278     o(x);
 279   else {
 280     uint32_t a[16], nv, no, o2, n2;
 281     int i,j,k;
 282     a[0]=0xff;
 283     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 284     for(i=1;i<16;i++)
 285       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 286     for(i=0;i<12;i++)
 287       for(j=i<4?i+12:15;j>=i+4;j--)
 288         if((v&(a[i]|a[j]))==v) {
 289           o(stuff_const(op,v&a[i]));
 290           o(stuff_const(o2,v&a[j]));
 291           return;
 292         }
 293     no=op^0xC00000;
 294     n2=o2^0xC00000;
 295     nv=-v;
 296     for(i=0;i<12;i++)
 297       for(j=i<4?i+12:15;j>=i+4;j--)
 298         if((nv&(a[i]|a[j]))==nv) {
 299           o(stuff_const(no,nv&a[i]));
 300           o(stuff_const(n2,nv&a[j]));
 301           return;
 302         }
 303     for(i=0;i<8;i++)
 304       for(j=i+4;j<12;j++)
 305         for(k=i<4?i+12:15;k>=j+4;k--)
 306           if((v&(a[i]|a[j]|a[k]))==v) {
 307             o(stuff_const(op,v&a[i]));
 308             o(stuff_const(o2,v&a[j]));
 309             o(stuff_const(o2,v&a[k]));
 310             return;
 311           }
 312     no=op^0xC00000;
 313     nv=-v;
 314     for(i=0;i<8;i++)
 315       for(j=i+4;j<12;j++)
 316         for(k=i<4?i+12:15;k>=j+4;k--)
 317           if((nv&(a[i]|a[j]|a[k]))==nv) {
 318             o(stuff_const(no,nv&a[i]));
 319             o(stuff_const(n2,nv&a[j]));
 320             o(stuff_const(n2,nv&a[k]));
 321             return;
 322           }
 323     o(stuff_const(op,v&a[0]));
 324     o(stuff_const(o2,v&a[4]));
 325     o(stuff_const(o2,v&a[8]));
 326     o(stuff_const(o2,v&a[12]));
 327   }
 328 }
 329
 330 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 331 {
 332   addr-=pos+8;
 333   addr/=4;
 334   if(addr>=0x1000000 || addr<-0x1000000) {
 335     if(fail)
 336       tcc_error("FIXME: function bigger than 32MB");
 337     return 0;
 338   }
 339   return 0x0A000000|(addr&0xffffff);
 340 }
 341
 342 int decbranch(int pos)
 343 {
 344   int x;
 345   x=*(uint32_t *)(cur_text_section->data + pos);
 346   x&=0x00ffffff;
 347   if(x&0x800000)
 348     x-=0x1000000;
 349   return x*4+pos+8;
 350 }
 351
 352 /* output a symbol and patch all calls to it */
 353 void gsym_addr(int t, int a)
 354 {
 355   uint32_t *x;
 356   int lt;
 357   while(t) {
 358     x=(uint32_t *)(cur_text_section->data + t);
 359     t=decbranch(lt=t);
 360     if(a==lt+4)
 361       *x=0xE1A00000; // nop
 362     else {
 363       *x &= 0xff000000;
 364       *x |= encbranch(lt,a,1);
 365     }
 366   }
 367 }
 368
 369 void gsym(int t)
 370 {
 371   gsym_addr(t, ind);
 372 }
 373
 374 #ifdef TCC_ARM_VFP
 375 static uint32_t vfpr(int r)
 376 {
 377   if(r<TREG_F0 || r>TREG_F7)
 378     tcc_error("compiler error! register %i is no vfp register",r);
 379   return r-5;
 380 }
 381 #else
 382 static uint32_t fpr(int r)
 383 {
 384   if(r<TREG_F0 || r>TREG_F3)
 385     tcc_error("compiler error! register %i is no fpa register",r);
 386   return r-5;
 387 }
 388 #endif
 389
 390 static uint32_t intr(int r)
 391 {
 392   if(r==4)
 393     return 12;
 394   if((r<0 || r>4) && r!=14)
 395     tcc_error("compiler error! register %i is no int register",r);
 396   return r;
 397 }
 398
 399 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 400 {
 401   if(*off>maxoff || *off&((1<<shift)-1)) {
 402     uint32_t x, y;
 403     x=0xE280E000;
 404     if(*sgn)
 405       x=0xE240E000;
 406     x|=(*base)<<16;
 407     *base=14; // lr
 408     y=stuff_const(x,*off&~maxoff);
 409     if(y) {
 410       o(y);
 411       *off&=maxoff;
 412       return;
 413     }
 414     y=stuff_const(x,(*off+maxoff)&~maxoff);
 415     if(y) {
 416       o(y);
 417       *sgn=!*sgn;
 418       *off=((*off+maxoff)&~maxoff)-*off;
 419       return;
 420     }
 421     stuff_const_harder(x,*off&~maxoff);
 422     *off&=maxoff;
 423   }
 424 }
 425
 426 static uint32_t mapcc(int cc)
 427 {
 428   switch(cc)
 429   {
 430     case TOK_ULT:
 431       return 0x30000000; /* CC/LO */
 432     case TOK_UGE:
 433       return 0x20000000; /* CS/HS */
 434     case TOK_EQ:
 435       return 0x00000000; /* EQ */
 436     case TOK_NE:
 437       return 0x10000000; /* NE */
 438     case TOK_ULE:
 439       return 0x90000000; /* LS */
 440     case TOK_UGT:
 441       return 0x80000000; /* HI */
 442     case TOK_Nset:
 443       return 0x40000000; /* MI */
 444     case TOK_Nclear:
 445       return 0x50000000; /* PL */
 446     case TOK_LT:
 447       return 0xB0000000; /* LT */
 448     case TOK_GE:
 449       return 0xA0000000; /* GE */
 450     case TOK_LE:
 451       return 0xD0000000; /* LE */
 452     case TOK_GT:
 453       return 0xC0000000; /* GT */
 454   }
 455   tcc_error("unexpected condition code");
 456   return 0xE0000000; /* AL */
 457 }
 458
 459 static int negcc(int cc)
 460 {
 461   switch(cc)
 462   {
 463     case TOK_ULT:
 464       return TOK_UGE;
 465     case TOK_UGE:
 466       return TOK_ULT;
 467     case TOK_EQ:
 468       return TOK_NE;
 469     case TOK_NE:
 470       return TOK_EQ;
 471     case TOK_ULE:
 472       return TOK_UGT;
 473     case TOK_UGT:
 474       return TOK_ULE;
 475     case TOK_Nset:
 476       return TOK_Nclear;
 477     case TOK_Nclear:
 478       return TOK_Nset;
 479     case TOK_LT:
 480       return TOK_GE;
 481     case TOK_GE:
 482       return TOK_LT;
 483     case TOK_LE:
 484       return TOK_GT;
 485     case TOK_GT:
 486       return TOK_LE;
 487   }
 488   tcc_error("unexpected condition code");
 489   return TOK_NE;
 490 }
 491
 492 /* load 'r' from value 'sv' */
 493 void load(int r, SValue *sv)
 494 {
 495   int v, ft, fc, fr, sign;
 496   uint32_t op;
 497   SValue v1;
 498
 499   fr = sv->r;
 500   ft = sv->type.t;
 501   fc = sv->c.ul;
 502
 503   if(fc>=0)
 504     sign=0;
 505   else {
 506     sign=1;
 507     fc=-fc;
 508   }
 509
 510   v = fr & VT_VALMASK;
 511   if (fr & VT_LVAL) {
 512     uint32_t base = 0xB; // fp
 513     if(v == VT_LLOCAL) {
 514       v1.type.t = VT_PTR;
 515       v1.r = VT_LOCAL | VT_LVAL;
 516       v1.c.ul = sv->c.ul;
 517       load(base=14 /* lr */, &v1);
 518       fc=sign=0;
 519       v=VT_LOCAL;
 520     } else if(v == VT_CONST) {
 521       v1.type.t = VT_PTR;
 522       v1.r = fr&~VT_LVAL;
 523       v1.c.ul = sv->c.ul;
 524       v1.sym=sv->sym;
 525       load(base=14, &v1);
 526       fc=sign=0;
 527       v=VT_LOCAL;
 528     } else if(v < VT_CONST) {
 529       base=intr(v);
 530       fc=sign=0;
 531       v=VT_LOCAL;
 532     }
 533     if(v == VT_LOCAL) {
 534       if(is_float(ft)) {
 535         calcaddr(&base,&fc,&sign,1020,2);
 536 #ifdef TCC_ARM_VFP
 537         op=0xED100A00; /* flds */
 538         if(!sign)
 539           op|=0x800000;
 540         if ((ft & VT_BTYPE) != VT_FLOAT)
 541           op|=0x100;   /* flds -> fldd */
 542         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 543 #else
 544         op=0xED100100;
 545         if(!sign)
 546           op|=0x800000;
 547 #if LDOUBLE_SIZE == 8
 548         if ((ft & VT_BTYPE) != VT_FLOAT)
 549           op|=0x8000;
 550 #else
 551         if ((ft & VT_BTYPE) == VT_DOUBLE)
 552           op|=0x8000;
 553         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 554           op|=0x400000;
 555 #endif
 556         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 557 #endif
 558       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 559                 || (ft & VT_BTYPE) == VT_SHORT) {
 560         calcaddr(&base,&fc,&sign,255,0);
 561         op=0xE1500090;
 562         if ((ft & VT_BTYPE) == VT_SHORT)
 563           op|=0x20;
 564         if ((ft & VT_UNSIGNED) == 0)
 565           op|=0x40;
 566         if(!sign)
 567           op|=0x800000;
 568         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 569       } else {
 570         calcaddr(&base,&fc,&sign,4095,0);
 571         op=0xE5100000;
 572         if(!sign)
 573           op|=0x800000;
 574         if ((ft & VT_BTYPE) == VT_BYTE)
 575           op|=0x400000;
 576         o(op|(intr(r)<<12)|fc|(base<<16));
 577       }
 578       return;
 579     }
 580   } else {
 581     if (v == VT_CONST) {
 582       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
 583       if (fr & VT_SYM || !op) {
 584         o(0xE59F0000|(intr(r)<<12));
 585         o(0xEA000000);
 586         if(fr & VT_SYM)
 587           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 588         o(sv->c.ul);
 589       } else
 590         o(op);
 591       return;
 592     } else if (v == VT_LOCAL) {
 593       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
 594       if (fr & VT_SYM || !op) {
 595         o(0xE59F0000|(intr(r)<<12));
 596         o(0xEA000000);
 597         if(fr & VT_SYM) // needed ?
 598           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 599         o(sv->c.ul);
 600         o(0xE08B0000|(intr(r)<<12)|intr(r));
 601       } else
 602         o(op);
 603       return;
 604     } else if(v == VT_CMP) {
 605       o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
 606       o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
 607       return;
 608     } else if (v == VT_JMP || v == VT_JMPI) {
 609       int t;
 610       t = v & 1;
 611       o(0xE3A00000|(intr(r)<<12)|t);
 612       o(0xEA000000);
 613       gsym(sv->c.ul);
 614       o(0xE3A00000|(intr(r)<<12)|(t^1));
 615       return;
 616     } else if (v < VT_CONST) {
 617       if(is_float(ft))
 618 #ifdef TCC_ARM_VFP
 619         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 620 #else
 621         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 622 #endif
 623       else
 624         o(0xE1A00000|(intr(r)<<12)|intr(v));
 625       return;
 626     }
 627   }
 628   tcc_error("load unimplemented!");
 629 }
 630
 631 /* store register 'r' in lvalue 'v' */
 632 void store(int r, SValue *sv)
 633 {
 634   SValue v1;
 635   int v, ft, fc, fr, sign;
 636   uint32_t op;
 637
 638   fr = sv->r;
 639   ft = sv->type.t;
 640   fc = sv->c.ul;
 641
 642   if(fc>=0)
 643     sign=0;
 644   else {
 645     sign=1;
 646     fc=-fc;
 647   }
 648
 649   v = fr & VT_VALMASK;
 650   if (fr & VT_LVAL || fr == VT_LOCAL) {
 651     uint32_t base = 0xb;
 652     if(v < VT_CONST) {
 653       base=intr(v);
 654       v=VT_LOCAL;
 655       fc=sign=0;
 656     } else if(v == VT_CONST) {
 657       v1.type.t = ft;
 658       v1.r = fr&~VT_LVAL;
 659       v1.c.ul = sv->c.ul;
 660       v1.sym=sv->sym;
 661       load(base=14, &v1);
 662       fc=sign=0;
 663       v=VT_LOCAL;
 664     }
 665     if(v == VT_LOCAL) {
 666        if(is_float(ft)) {
 667         calcaddr(&base,&fc,&sign,1020,2);
 668 #ifdef TCC_ARM_VFP
 669         op=0xED000A00; /* fsts */
 670         if(!sign)
 671           op|=0x800000;
 672         if ((ft & VT_BTYPE) != VT_FLOAT)
 673           op|=0x100;   /* fsts -> fstd */
 674         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 675 #else
 676         op=0xED000100;
 677         if(!sign)
 678           op|=0x800000;
 679 #if LDOUBLE_SIZE == 8
 680         if ((ft & VT_BTYPE) != VT_FLOAT)
 681           op|=0x8000;
 682 #else
 683         if ((ft & VT_BTYPE) == VT_DOUBLE)
 684           op|=0x8000;
 685         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 686           op|=0x400000;
 687 #endif
 688         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 689 #endif
 690         return;
 691       } else if((ft & VT_BTYPE) == VT_SHORT) {
 692         calcaddr(&base,&fc,&sign,255,0);
 693         op=0xE14000B0;
 694         if(!sign)
 695           op|=0x800000;
 696         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 697       } else {
 698         calcaddr(&base,&fc,&sign,4095,0);
 699         op=0xE5000000;
 700         if(!sign)
 701           op|=0x800000;
 702         if ((ft & VT_BTYPE) == VT_BYTE)
 703           op|=0x400000;
 704         o(op|(intr(r)<<12)|fc|(base<<16));
 705       }
 706       return;
 707     }
 708   }
 709   tcc_error("store unimplemented");
 710 }
 711
 712 static void gadd_sp(int val)
 713 {
 714   stuff_const_harder(0xE28DD000,val);
 715 }
 716
 717 /* 'is_jmp' is '1' if it is a jump */
 718 static void gcall_or_jmp(int is_jmp)
 719 {
 720   int r;
 721   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 722     uint32_t x;
 723     /* constant case */
 724     x=encbranch(ind,ind+vtop->c.ul,0);
 725     if(x) {
 726       if (vtop->r & VT_SYM) {
 727         /* relocation case */
 728         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 729       } else
 730         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 731       o(x|(is_jmp?0xE0000000:0xE1000000));
 732     } else {
 733       if(!is_jmp)
 734         o(0xE28FE004); // add lr,pc,#4
 735       o(0xE51FF004);   // ldr pc,[pc,#-4]
 736       if (vtop->r & VT_SYM)
 737         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 738       o(vtop->c.ul);
 739     }
 740   } else {
 741     /* otherwise, indirect call */
 742     r = gv(RC_INT);
 743     if(!is_jmp)
 744       o(0xE1A0E00F);       // mov lr,pc
 745     o(0xE1A0F000|intr(r)); // mov pc,r
 746   }
 747 }
 748
 749 #ifdef TCC_ARM_HARDFLOAT
 750 static int is_float_hgen_aggr(CType *type)
 751 {
 752   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 753     struct Sym *ref;
 754     int btype, nb_fields = 0;
 755
 756     ref = type->ref;
 757     btype = ref->type.t & VT_BTYPE;
 758     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 759       for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 760       return !ref && nb_fields <= 4;
 761     }
 762   }
 763   return 0;
 764 }
 765
 766 struct avail_regs {
 767   /* worst case: f(float, double, 3 float struct, double, 3 float struct, double) */
 768   signed char avail[3];
 769   int first_hole;
 770   int last_hole;
 771   int first_free_reg;
 772 };
 773
 774 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 775
 776 /* Assign a register for a CPRC param with correct size and alignment
 777  * size and align are in bytes, as returned by type_size */
 778 int assign_fpreg(struct avail_regs *avregs, int align, int size)
 779 {
 780   int first_reg = 0;
 781
 782   if (avregs->first_free_reg == -1)
 783     return -1;
 784   if (align >> 3) { // alignment needed (base type: double)
 785     first_reg = avregs->first_free_reg;
 786     if (first_reg & 1)
 787       avregs->avail[avregs->last_hole++] = first_reg++;
 788   } else {
 789     if (size == 4 && avregs->first_hole != avregs->last_hole)
 790       return avregs->avail[avregs->first_hole++];
 791     else
 792       first_reg = avregs->first_free_reg;
 793   }
 794   if (first_reg + size / 4 <= 16) {
 795     avregs->first_free_reg = first_reg + size / 4;
 796     return first_reg;
 797   }
 798   avregs->first_free_reg = -1;
 799   return -1;
 800 }
 801 #endif
 802
 803 /* Generate function call. The function address is pushed first, then
 804    all the parameters in call order. This functions pops all the
 805    parameters and the function address. */
 806 void gfunc_call(int nb_args)
 807 {
 808   int size, align, r, args_size, i, ncrn, ncprn, argno, vfp_argno;
 809   signed char plan[4][2]={{-1,-1},{-1,-1},{-1,-1},{-1,-1}};
 810   SValue *before_stack = NULL; /* SValue before first on stack argument */
 811   SValue *before_vfpreg_hfa = NULL; /* SValue before first in VFP reg hfa argument */
 812 #ifdef TCC_ARM_HARDFLOAT
 813   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 814   signed char vfp_plan[16];
 815   int plan2[4+16];
 816   int variadic;
 817 #else
 818   int plan2[4]={0,0,0,0};
 819 #endif
 820   int vfp_todo=0;
 821   int todo=0, keep;
 822
 823 #ifdef TCC_ARM_HARDFLOAT
 824   memset(vfp_plan, -1, sizeof(vfp_plan));
 825   memset(plan2, 0, sizeof(plan2));
 826   variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
 827 #endif
 828   r = vtop->r & VT_VALMASK;
 829   if (r == VT_CMP || (r & ~1) == VT_JMP)
 830     gv(RC_INT);
 831 #ifdef TCC_ARM_EABI
 832   if((vtop[-nb_args].type.ref->type.t & VT_BTYPE) == VT_STRUCT
 833      && type_size(&vtop[-nb_args].type.ref->type, &align) <= 4) {
 834     SValue tmp;
 835     tmp=vtop[-nb_args];
 836     vtop[-nb_args]=vtop[-nb_args+1];
 837     vtop[-nb_args+1]=tmp;
 838     --nb_args;
 839   }
 840
 841   vpushi(0), nb_args++;
 842   vtop->type.t = VT_LLONG;
 843 #endif
 844   ncrn = ncprn = argno = vfp_argno = args_size = 0;
 845   /* Assign argument to registers and stack with alignment.
 846      If, considering alignment constraints, enough registers of the correct type
 847      (core or VFP) are free for the current argument, assign them to it, else
 848      allocate on stack with correct alignment. Whenever a structure is allocated
 849      in registers or on stack, it is always put on the stack at this stage. The
 850      stack is divided in 3 zones. The zone are, from low addresses to high
 851      addresses: structures to be loaded in core registers, structures to be
 852      loaded in VFP registers, argument allocated to stack. SValue's representing
 853      structures in the first zone are moved just after the SValue pointed by
 854      before_vfpreg_hfa. SValue's representing structures in the second zone are
 855      moved just after the SValue pointer by before_stack. */
 856   for(i = nb_args; i-- ;) {
 857     int j, assigned_vfpreg = 0;
 858     size = type_size(&vtop[-i].type, &align);
 859     switch(vtop[-i].type.t & VT_BTYPE) {
 860       case VT_STRUCT:
 861       case VT_FLOAT:
 862       case VT_DOUBLE:
 863       case VT_LDOUBLE:
 864 #ifdef TCC_ARM_HARDFLOAT
 865       if (!variadic) {
 866         int hfa = 0; /* Homogeneous float aggregate */
 867
 868         if (is_float(vtop[-i].type.t)
 869             || (hfa = is_float_hgen_aggr(&vtop[-i].type))) {
 870           int end_reg;
 871
 872           assigned_vfpreg = assign_fpreg(&avregs, align, size);
 873           end_reg = assigned_vfpreg + (size - 1) / 4;
 874           if (assigned_vfpreg >= 0) {
 875             vfp_plan[vfp_argno++]=TREG_F0 + assigned_vfpreg/2;
 876             if (hfa) {
 877               /* before_stack can only have been set because all core registers
 878                  are assigned, so no need to care about before_vfpreg_hfa if
 879                  before_stack is set */
 880               if (before_stack) {
 881                 vrote(&vtop[-i], &vtop[-i] - before_stack);
 882                 before_stack++;
 883               } else if (!before_vfpreg_hfa)
 884                 before_vfpreg_hfa = &vtop[-i-1];
 885               for (j = assigned_vfpreg; j <= end_reg; j++)
 886                 vfp_todo|=(1<<j);
 887             }
 888             continue;
 889           } else {
 890             if (!hfa)
 891               vfp_argno++;
 892             /* No need to update before_stack as no more hfa can be allocated in
 893                VFP regs */
 894             if (!before_vfpreg_hfa)
 895               before_vfpreg_hfa = &vtop[-i-1];
 896             break;
 897           }
 898         }
 899       }
 900 #endif
 901       ncrn = (ncrn + (align-1)/4) & -(align/4);
 902       size = (size + 3) & -4;
 903       if (ncrn + size/4 <= 4 || (ncrn < 4 && assigned_vfpreg != -1)) {
 904         /* Either there is HFA in VFP registers, or there is arguments on stack,
 905            it cannot be both. Hence either before_stack already points after
 906            the slot where the vtop[-i] SValue is moved, or before_stack will not
 907            be used */
 908         if (before_vfpreg_hfa) {
 909           vrote(&vtop[-i], &vtop[-i] - before_vfpreg_hfa);
 910           before_vfpreg_hfa++;
 911         }
 912         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
 913           todo|=(1<<j);
 914         ncrn+=size/4;
 915         if (ncrn > 4) {
 916           args_size = (ncrn - 4) * 4;
 917           if (!before_stack)
 918             before_stack = &vtop[-i-1];
 919         }
 920       }
 921       else {
 922         ncrn = 4;
 923         /* No need to set before_vfpreg_hfa if not set since there will no
 924            longer be any structure assigned to core registers */
 925         if (!before_stack)
 926           before_stack = &vtop[-i-1];
 927         break;
 928       }
 929       continue;
 930       default:
 931 #ifdef TCC_ARM_EABI
 932       if (!i) {
 933         break;
 934       }
 935 #endif
 936       if (ncrn < 4) {
 937         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
 938
 939         if (is_long) {
 940           ncrn = (ncrn + 1) & -2;
 941           if (ncrn == 4) {
 942             argno++;
 943             break;
 944           }
 945         }
 946         plan[argno++][0]=ncrn++;
 947         if (is_long) {
 948           plan[argno-1][1]=ncrn++;
 949         }
 950         continue;
 951       }
 952       argno++;
 953     }
 954 #ifdef TCC_ARM_EABI
 955     if(args_size & (align-1)) {
 956       vpushi(0);
 957       vtop->type.t = VT_VOID; /* padding */
 958       vrott(i+2);
 959       args_size += 4;
 960       nb_args++;
 961       argno++;
 962     }
 963 #endif
 964     args_size += (size + 3) & -4;
 965   }
 966 #ifdef TCC_ARM_EABI
 967   vtop--, nb_args--;
 968 #endif
 969   args_size = keep = 0;
 970   for(i = 0;i < nb_args; i++) {
 971     vrotb(keep+1);
 972     if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 973       size = type_size(&vtop->type, &align);
 974       /* align to stack align size */
 975       size = (size + 3) & -4;
 976       /* allocate the necessary size on stack */
 977       gadd_sp(-size);
 978       /* generate structure store */
 979       r = get_reg(RC_INT);
 980       o(0xE1A0000D|(intr(r)<<12));
 981       vset(&vtop->type, r | VT_LVAL, 0);
 982       vswap();
 983       vstore();
 984       vtop--;
 985       args_size += size;
 986     } else if (is_float(vtop->type.t)) {
 987 #ifdef TCC_ARM_HARDFLOAT
 988       if (!variadic && --vfp_argno<16 && vfp_plan[vfp_argno]!=-1) {
 989         plan2[keep++]=vfp_plan[vfp_argno];
 990         continue;
 991       }
 992 #endif
 993 #ifdef TCC_ARM_VFP
 994       r=vfpr(gv(RC_FLOAT))<<12;
 995       size=4;
 996       if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
 997       {
 998         size=8;
 999         r|=0x101; /* fstms -> fstmd */
1000       }
1001       o(0xED2D0A01+r);
1002 #else
1003       r=fpr(gv(RC_FLOAT))<<12;
1004       if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
1005         size = 4;
1006       else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1007         size = 8;
1008       else
1009         size = LDOUBLE_SIZE;
1010
1011       if (size == 12)
1012         r|=0x400000;
1013       else if(size == 8)
1014         r|=0x8000;
1015
1016       o(0xED2D0100|r|(size>>2));
1017 #endif
1018       vtop--;
1019       args_size += size;
1020     } else {
1021       int s;
1022       /* simple type (currently always same size) */
1023       /* XXX: implicit cast ? */
1024       size=4;
1025       if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1026         lexpand_nr();
1027         s=-1;
1028         if(--argno<4 && plan[argno][1]!=-1)
1029           s=plan[argno][1];
1030         argno++;
1031         size = 8;
1032         if(s==-1) {
1033           r = gv(RC_INT);
1034           o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
1035           vtop--;
1036         } else {
1037           size=0;
1038           plan2[keep]=s;
1039           keep++;
1040           vswap();
1041         }
1042       }
1043       s=-1;
1044       if(--argno<4 && plan[argno][0]!=-1)
1045         s=plan[argno][0];
1046 #ifdef TCC_ARM_EABI
1047       if(vtop->type.t == VT_VOID) {
1048         if(s == -1)
1049           o(0xE24DD004); /* sub sp,sp,#4 */
1050         vtop--;
1051       } else
1052 #endif
1053       if(s == -1) {
1054         r = gv(RC_INT);
1055         o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
1056         vtop--;
1057       } else {
1058         size=0;
1059         plan2[keep]=s;
1060         keep++;
1061       }
1062       args_size += size;
1063     }
1064   }
1065   for(i = 0; i < keep; i++) {
1066     vrotb(keep);
1067     gv(regmask(plan2[i]));
1068 #ifdef TCC_ARM_HARDFLOAT
1069     /* arg is in s(2d+1): plan2[i]<plan2[i+1] => alignment occured (ex f,d,f) */
1070     if (i < keep - 1 && is_float(vtop->type.t) && (plan2[i] <= plan2[i + 1])) {
1071       o(0xEEF00A40|(vfpr(plan2[i])<<12)|vfpr(plan2[i]));
1072     }
1073 #endif
1074   }
1075 save_regs(keep); /* save used temporary registers */
1076   keep++;
1077   if(ncrn) {
1078     int nb_regs=0;
1079     if (ncrn>4)
1080       ncrn=4;
1081     todo&=((1<<ncrn)-1);
1082     if(todo) {
1083       int i;
1084       o(0xE8BD0000|todo);
1085       for(i=0;i<4;i++)
1086         if(todo&(1<<i)) {
1087           vpushi(0);
1088           vtop->r=i;
1089           keep++;
1090           nb_regs++;
1091         }
1092     }
1093     args_size-=nb_regs*4;
1094   }
1095   if(vfp_todo) {
1096     int nb_fregs=0;
1097
1098     for(i=0;i<16;i++)
1099       if(vfp_todo&(1<<i)) {
1100         o(0xED9D0A00|(i&1)<<22|(i>>1)<<12|nb_fregs);
1101         vpushi(0);
1102         /* There might be 2 floats in a double VFP reg but that doesn't seem
1103            to matter */
1104         if (!(i%2))
1105           vtop->r=TREG_F0+i/2;
1106         keep++;
1107         nb_fregs++;
1108       }
1109     if (nb_fregs) {
1110       gadd_sp(nb_fregs*4);
1111       args_size-=nb_fregs*4;
1112     }
1113   }
1114   vrotb(keep);
1115   gcall_or_jmp(0);
1116   if (args_size)
1117       gadd_sp(args_size);
1118 #ifdef TCC_ARM_EABI
1119   if((vtop->type.ref->type.t & VT_BTYPE) == VT_STRUCT
1120      && type_size(&vtop->type.ref->type, &align) <= 4)
1121   {
1122     store(REG_IRET,vtop-keep);
1123     ++keep;
1124   }
1125 #ifdef TCC_ARM_VFP
1126 #ifdef TCC_ARM_HARDFLOAT
1127   else if(variadic && is_float(vtop->type.ref->type.t)) {
1128 #else
1129   else if(is_float(vtop->type.ref->type.t)) {
1130 #endif
1131     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1132       o(0xEE000A10); /* fmsr s0,r0 */
1133     } else {
1134       o(0xEE000B10); /* fmdlr d0,r0 */
1135       o(0xEE201B10); /* fmdhr d0,r1 */
1136     }
1137   }
1138 #endif
1139 #endif
1140   vtop-=keep;
1141   leaffunc = 0;
1142 }
1143
1144 /* generate function prolog of type 't' */
1145 void gfunc_prolog(CType *func_type)
1146 {
1147   Sym *sym,*sym2;
1148   int n,nf,size,align, variadic, struct_ret = 0;
1149 #ifdef TCC_ARM_HARDFLOAT
1150   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1151 #endif
1152
1153   sym = func_type->ref;
1154   func_vt = sym->type;
1155
1156   n = nf = 0;
1157   variadic = (func_type->ref->c == FUNC_ELLIPSIS);
1158   if((func_vt.t & VT_BTYPE) == VT_STRUCT
1159      && type_size(&func_vt,&align) > 4)
1160   {
1161     n++;
1162     struct_ret = 1;
1163     func_vc = 12; /* Offset from fp of the place to store the result */
1164   }
1165   for(sym2=sym->next;sym2 && (n<4 || nf<16);sym2=sym2->next) {
1166     size = type_size(&sym2->type, &align);
1167 #ifdef TCC_ARM_HARDFLOAT
1168     if (!variadic && (is_float(sym2->type.t)
1169         || is_float_hgen_aggr(&sym2->type))) {
1170       int tmpnf = assign_fpreg(&avregs, align, size) + 1;
1171       nf = (tmpnf > nf) ? tmpnf : nf;
1172     } else
1173 #endif
1174     if (n < 4)
1175       n += (size + 3) / 4;
1176   }
1177   o(0xE1A0C00D); /* mov ip,sp */
1178   if(variadic)
1179     n=4;
1180   if(n) {
1181     if(n>4)
1182       n=4;
1183 #ifdef TCC_ARM_EABI
1184     n=(n+1)&-2;
1185 #endif
1186     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1187   }
1188   if (nf) {
1189     if (nf>16)
1190       nf=16;
1191     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1192     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1193   }
1194   o(0xE92D5800); /* save fp, ip, lr */
1195   o(0xE1A0B00D); /* mov fp, sp */
1196   func_sub_sp_offset = ind;
1197   o(0xE1A00000); /* nop, leave space for stack adjustment in epilogue */
1198   {
1199     int addr, pn = struct_ret, sn = 0; /* pn=core, sn=stack */
1200
1201 #ifdef TCC_ARM_HARDFLOAT
1202     avregs = AVAIL_REGS_INITIALIZER;
1203 #endif
1204     while ((sym = sym->next)) {
1205       CType *type;
1206       type = &sym->type;
1207       size = type_size(type, &align);
1208       size = (size + 3) >> 2;
1209       align = (align + 3) & ~3;
1210 #ifdef TCC_ARM_HARDFLOAT
1211       if (!variadic && (is_float(sym->type.t)
1212           || is_float_hgen_aggr(&sym->type))) {
1213         int fpn = assign_fpreg(&avregs, align, size << 2);
1214         if (fpn >= 0) {
1215           addr = fpn * 4;
1216         } else
1217           goto from_stack;
1218       } else
1219 #endif
1220       if (pn < 4) {
1221 #ifdef TCC_ARM_EABI
1222         pn = (pn + (align-1)/4) & -(align/4);
1223 #endif
1224         addr = (nf + pn) * 4;
1225         pn += size;
1226         if (!sn && pn > 4)
1227           sn = (pn - 4);
1228       } else {
1229 #ifdef TCC_ARM_HARDFLOAT
1230 from_stack:
1231 #endif
1232 #ifdef TCC_ARM_EABI
1233         sn = (sn + (align-1)/4) & -(align/4);
1234 #endif
1235         addr = (n + nf + sn) * 4;
1236         sn += size;
1237       }
1238       sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t), addr+12);
1239     }
1240   }
1241   last_itod_magic=0;
1242   leaffunc = 1;
1243   loc = 0;
1244 }
1245
1246 /* generate function epilog */
1247 void gfunc_epilog(void)
1248 {
1249   uint32_t x;
1250   int diff;
1251 #ifdef TCC_ARM_EABI
1252   /* Useless but harmless copy of the float result into main register(s) in case
1253      of variadic function in the hardfloat variant */
1254   if(is_float(func_vt.t)) {
1255     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1256       o(0xEE100A10); /* fmrs r0, s0 */
1257     else {
1258       o(0xEE100B10); /* fmrdl r0, d0 */
1259       o(0xEE301B10); /* fmrdh r1, d0 */
1260     }
1261   }
1262 #endif
1263   o(0xE89BA800); /* restore fp, sp, pc */
1264   diff = (-loc + 3) & -4;
1265 #ifdef TCC_ARM_EABI
1266   if(!leaffunc)
1267     diff = ((diff + 11) & -8) - 4;
1268 #endif
1269   if(diff > 0) {
1270     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1271     if(x)
1272       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1273     else {
1274       int addr;
1275       addr=ind;
1276       o(0xE59FC004); /* ldr ip,[pc+4] */
1277       o(0xE04BD00C); /* sub sp,fp,ip  */
1278       o(0xE1A0F00E); /* mov pc,lr */
1279       o(diff);
1280       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1281     }
1282   }
1283 }
1284
1285 /* generate a jump to a label */
1286 int gjmp(int t)
1287 {
1288   int r;
1289   r=ind;
1290   o(0xE0000000|encbranch(r,t,1));
1291   return r;
1292 }
1293
1294 /* generate a jump to a fixed address */
1295 void gjmp_addr(int a)
1296 {
1297   gjmp(a);
1298 }
1299
1300 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1301 int gtst(int inv, int t)
1302 {
1303   int v, r;
1304   uint32_t op;
1305   v = vtop->r & VT_VALMASK;
1306   r=ind;
1307   if (v == VT_CMP) {
1308     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1309     op|=encbranch(r,t,1);
1310     o(op);
1311     t=r;
1312   } else if (v == VT_JMP || v == VT_JMPI) {
1313     if ((v & 1) == inv) {
1314       if(!vtop->c.i)
1315         vtop->c.i=t;
1316       else {
1317         uint32_t *x;
1318         int p,lp;
1319         if(t) {
1320           p = vtop->c.i;
1321           do {
1322             p = decbranch(lp=p);
1323           } while(p);
1324           x = (uint32_t *)(cur_text_section->data + lp);
1325           *x &= 0xff000000;
1326           *x |= encbranch(lp,t,1);
1327         }
1328         t = vtop->c.i;
1329       }
1330     } else {
1331       t = gjmp(t);
1332       gsym(vtop->c.i);
1333     }
1334   } else {
1335     if (is_float(vtop->type.t)) {
1336       r=gv(RC_FLOAT);
1337 #ifdef TCC_ARM_VFP
1338       o(0xEEB50A40|(vfpr(r)<<12)|T2CPR(vtop->type.t)); /* fcmpzX */
1339       o(0xEEF1FA10); /* fmstat */
1340 #else
1341       o(0xEE90F118|(fpr(r)<<16));
1342 #endif
1343       vtop->r = VT_CMP;
1344       vtop->c.i = TOK_NE;
1345       return gtst(inv, t);
1346     } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1347       /* constant jmp optimization */
1348       if ((vtop->c.i != 0) != inv)
1349         t = gjmp(t);
1350     } else {
1351       v = gv(RC_INT);
1352       o(0xE3300000|(intr(v)<<16));
1353       vtop->r = VT_CMP;
1354       vtop->c.i = TOK_NE;
1355       return gtst(inv, t);
1356     }
1357   }
1358   vtop--;
1359   return t;
1360 }
1361
1362 /* generate an integer binary operation */
1363 void gen_opi(int op)
1364 {
1365   int c, func = 0;
1366   uint32_t opc = 0, r, fr;
1367   unsigned short retreg = REG_IRET;
1368
1369   c=0;
1370   switch(op) {
1371     case '+':
1372       opc = 0x8;
1373       c=1;
1374       break;
1375     case TOK_ADDC1: /* add with carry generation */
1376       opc = 0x9;
1377       c=1;
1378       break;
1379     case '-':
1380       opc = 0x4;
1381       c=1;
1382       break;
1383     case TOK_SUBC1: /* sub with carry generation */
1384       opc = 0x5;
1385       c=1;
1386       break;
1387     case TOK_ADDC2: /* add with carry use */
1388       opc = 0xA;
1389       c=1;
1390       break;
1391     case TOK_SUBC2: /* sub with carry use */
1392       opc = 0xC;
1393       c=1;
1394       break;
1395     case '&':
1396       opc = 0x0;
1397       c=1;
1398       break;
1399     case '^':
1400       opc = 0x2;
1401       c=1;
1402       break;
1403     case '|':
1404       opc = 0x18;
1405       c=1;
1406       break;
1407     case '*':
1408       gv2(RC_INT, RC_INT);
1409       r = vtop[-1].r;
1410       fr = vtop[0].r;
1411       vtop--;
1412       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1413       return;
1414     case TOK_SHL:
1415       opc = 0;
1416       c=2;
1417       break;
1418     case TOK_SHR:
1419       opc = 1;
1420       c=2;
1421       break;
1422     case TOK_SAR:
1423       opc = 2;
1424       c=2;
1425       break;
1426     case '/':
1427     case TOK_PDIV:
1428       func=TOK___divsi3;
1429       c=3;
1430       break;
1431     case TOK_UDIV:
1432       func=TOK___udivsi3;
1433       c=3;
1434       break;
1435     case '%':
1436 #ifdef TCC_ARM_EABI
1437       func=TOK___aeabi_idivmod;
1438       retreg=REG_LRET;
1439 #else
1440       func=TOK___modsi3;
1441 #endif
1442       c=3;
1443       break;
1444     case TOK_UMOD:
1445 #ifdef TCC_ARM_EABI
1446       func=TOK___aeabi_uidivmod;
1447       retreg=REG_LRET;
1448 #else
1449       func=TOK___umodsi3;
1450 #endif
1451       c=3;
1452       break;
1453     case TOK_UMULL:
1454       gv2(RC_INT, RC_INT);
1455       r=intr(vtop[-1].r2=get_reg(RC_INT));
1456       c=vtop[-1].r;
1457       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1458       vtop--;
1459       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1460       return;
1461     default:
1462       opc = 0x15;
1463       c=1;
1464       break;
1465   }
1466   switch(c) {
1467     case 1:
1468       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1469         if(opc == 4 || opc == 5 || opc == 0xc) {
1470           vswap();
1471           opc|=2; // sub -> rsb
1472         }
1473       }
1474       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1475           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1476         gv(RC_INT);
1477       vswap();
1478       c=intr(gv(RC_INT));
1479       vswap();
1480       opc=0xE0000000|(opc<<20)|(c<<16);
1481       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1482         uint32_t x;
1483         x=stuff_const(opc|0x2000000,vtop->c.i);
1484         if(x) {
1485           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1486           o(x|(r<<12));
1487           goto done;
1488         }
1489       }
1490       fr=intr(gv(RC_INT));
1491       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1492       o(opc|(r<<12)|fr);
1493 done:
1494       vtop--;
1495       if (op >= TOK_ULT && op <= TOK_GT) {
1496         vtop->r = VT_CMP;
1497         vtop->c.i = op;
1498       }
1499       break;
1500     case 2:
1501       opc=0xE1A00000|(opc<<5);
1502       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1503           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1504         gv(RC_INT);
1505       vswap();
1506       r=intr(gv(RC_INT));
1507       vswap();
1508       opc|=r;
1509       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1510         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1511         c = vtop->c.i & 0x1f;
1512         o(opc|(c<<7)|(fr<<12));
1513       } else {
1514         fr=intr(gv(RC_INT));
1515         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1516         o(opc|(c<<12)|(fr<<8)|0x10);
1517       }
1518       vtop--;
1519       break;
1520     case 3:
1521       vpush_global_sym(&func_old_type, func);
1522       vrott(3);
1523       gfunc_call(2);
1524       vpushi(0);
1525       vtop->r = retreg;
1526       break;
1527     default:
1528       tcc_error("gen_opi %i unimplemented!",op);
1529   }
1530 }
1531
1532 #ifdef TCC_ARM_VFP
1533 static int is_zero(int i)
1534 {
1535   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1536     return 0;
1537   if (vtop[i].type.t == VT_FLOAT)
1538     return (vtop[i].c.f == 0.f);
1539   else if (vtop[i].type.t == VT_DOUBLE)
1540     return (vtop[i].c.d == 0.0);
1541   return (vtop[i].c.ld == 0.l);
1542 }
1543
1544 /* generate a floating point operation 'v = t1 op t2' instruction. The
1545  *    two operands are guaranted to have the same floating point type */
1546 void gen_opf(int op)
1547 {
1548   uint32_t x;
1549   int fneg=0,r;
1550   x=0xEE000A00|T2CPR(vtop->type.t);
1551   switch(op) {
1552     case '+':
1553       if(is_zero(-1))
1554         vswap();
1555       if(is_zero(0)) {
1556         vtop--;
1557         return;
1558       }
1559       x|=0x300000;
1560       break;
1561     case '-':
1562       x|=0x300040;
1563       if(is_zero(0)) {
1564         vtop--;
1565         return;
1566       }
1567       if(is_zero(-1)) {
1568         x|=0x810000; /* fsubX -> fnegX */
1569         vswap();
1570         vtop--;
1571         fneg=1;
1572       }
1573       break;
1574     case '*':
1575       x|=0x200000;
1576       break;
1577     case '/':
1578       x|=0x800000;
1579       break;
1580     default:
1581       if(op < TOK_ULT || op > TOK_GT) {
1582         tcc_error("unknown fp op %x!",op);
1583         return;
1584       }
1585       if(is_zero(-1)) {
1586         vswap();
1587         switch(op) {
1588           case TOK_LT: op=TOK_GT; break;
1589           case TOK_GE: op=TOK_ULE; break;
1590           case TOK_LE: op=TOK_GE; break;
1591           case TOK_GT: op=TOK_ULT; break;
1592         }
1593       }
1594       x|=0xB40040; /* fcmpX */
1595       if(op!=TOK_EQ && op!=TOK_NE)
1596         x|=0x80; /* fcmpX -> fcmpeX */
1597       if(is_zero(0)) {
1598         vtop--;
1599         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1600       } else {
1601         x|=vfpr(gv(RC_FLOAT));
1602         vswap();
1603         o(x|(vfpr(gv(RC_FLOAT))<<12));
1604         vtop--;
1605       }
1606       o(0xEEF1FA10); /* fmstat */
1607
1608       switch(op) {
1609         case TOK_LE: op=TOK_ULE; break;
1610         case TOK_LT: op=TOK_ULT; break;
1611         case TOK_UGE: op=TOK_GE; break;
1612         case TOK_UGT: op=TOK_GT; break;
1613       }
1614
1615       vtop->r = VT_CMP;
1616       vtop->c.i = op;
1617       return;
1618   }
1619   r=gv(RC_FLOAT);
1620   x|=vfpr(r);
1621   r=regmask(r);
1622   if(!fneg) {
1623     int r2;
1624     vswap();
1625     r2=gv(RC_FLOAT);
1626     x|=vfpr(r2)<<16;
1627     r|=regmask(r2);
1628   }
1629   vtop->r=get_reg_ex(RC_FLOAT,r);
1630   if(!fneg)
1631     vtop--;
1632   o(x|(vfpr(vtop->r)<<12));
1633 }
1634
1635 #else
1636 static uint32_t is_fconst()
1637 {
1638   long double f;
1639   uint32_t r;
1640   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1641     return 0;
1642   if (vtop->type.t == VT_FLOAT)
1643     f = vtop->c.f;
1644   else if (vtop->type.t == VT_DOUBLE)
1645     f = vtop->c.d;
1646   else
1647     f = vtop->c.ld;
1648   if(!ieee_finite(f))
1649     return 0;
1650   r=0x8;
1651   if(f<0.0) {
1652     r=0x18;
1653     f=-f;
1654   }
1655   if(f==0.0)
1656     return r;
1657   if(f==1.0)
1658     return r|1;
1659   if(f==2.0)
1660     return r|2;
1661   if(f==3.0)
1662     return r|3;
1663   if(f==4.0)
1664     return r|4;
1665   if(f==5.0)
1666     return r|5;
1667   if(f==0.5)
1668     return r|6;
1669   if(f==10.0)
1670     return r|7;
1671   return 0;
1672 }
1673
1674 /* generate a floating point operation 'v = t1 op t2' instruction. The
1675    two operands are guaranted to have the same floating point type */
1676 void gen_opf(int op)
1677 {
1678   uint32_t x, r, r2, c1, c2;
1679   //fputs("gen_opf\n",stderr);
1680   vswap();
1681   c1 = is_fconst();
1682   vswap();
1683   c2 = is_fconst();
1684   x=0xEE000100;
1685 #if LDOUBLE_SIZE == 8
1686   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1687     x|=0x80;
1688 #else
1689   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1690     x|=0x80;
1691   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1692     x|=0x80000;
1693 #endif
1694   switch(op)
1695   {
1696     case '+':
1697       if(!c2) {
1698         vswap();
1699         c2=c1;
1700       }
1701       vswap();
1702       r=fpr(gv(RC_FLOAT));
1703       vswap();
1704       if(c2) {
1705         if(c2>0xf)
1706           x|=0x200000; // suf
1707         r2=c2&0xf;
1708       } else {
1709         r2=fpr(gv(RC_FLOAT));
1710       }
1711       break;
1712     case '-':
1713       if(c2) {
1714         if(c2<=0xf)
1715           x|=0x200000; // suf
1716         r2=c2&0xf;
1717         vswap();
1718         r=fpr(gv(RC_FLOAT));
1719         vswap();
1720       } else if(c1 && c1<=0xf) {
1721         x|=0x300000; // rsf
1722         r2=c1;
1723         r=fpr(gv(RC_FLOAT));
1724         vswap();
1725       } else {
1726         x|=0x200000; // suf
1727         vswap();
1728         r=fpr(gv(RC_FLOAT));
1729         vswap();
1730         r2=fpr(gv(RC_FLOAT));
1731       }
1732       break;
1733     case '*':
1734       if(!c2 || c2>0xf) {
1735         vswap();
1736         c2=c1;
1737       }
1738       vswap();
1739       r=fpr(gv(RC_FLOAT));
1740       vswap();
1741       if(c2 && c2<=0xf)
1742         r2=c2;
1743       else
1744         r2=fpr(gv(RC_FLOAT));
1745       x|=0x100000; // muf
1746       break;
1747     case '/':
1748       if(c2 && c2<=0xf) {
1749         x|=0x400000; // dvf
1750         r2=c2;
1751         vswap();
1752         r=fpr(gv(RC_FLOAT));
1753         vswap();
1754       } else if(c1 && c1<=0xf) {
1755         x|=0x500000; // rdf
1756         r2=c1;
1757         r=fpr(gv(RC_FLOAT));
1758         vswap();
1759       } else {
1760         x|=0x400000; // dvf
1761         vswap();
1762         r=fpr(gv(RC_FLOAT));
1763         vswap();
1764         r2=fpr(gv(RC_FLOAT));
1765       }
1766       break;
1767     default:
1768       if(op >= TOK_ULT && op <= TOK_GT) {
1769         x|=0xd0f110; // cmfe
1770 /* bug (intention?) in Linux FPU emulator
1771    doesn't set carry if equal */
1772         switch(op) {
1773           case TOK_ULT:
1774           case TOK_UGE:
1775           case TOK_ULE:
1776           case TOK_UGT:
1777             tcc_error("unsigned comparision on floats?");
1778             break;
1779           case TOK_LT:
1780             op=TOK_Nset;
1781             break;
1782           case TOK_LE:
1783             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1784             break;
1785           case TOK_EQ:
1786           case TOK_NE:
1787             x&=~0x400000; // cmfe -> cmf
1788             break;
1789         }
1790         if(c1 && !c2) {
1791           c2=c1;
1792           vswap();
1793           switch(op) {
1794             case TOK_Nset:
1795               op=TOK_GT;
1796               break;
1797             case TOK_GE:
1798               op=TOK_ULE;
1799               break;
1800             case TOK_ULE:
1801               op=TOK_GE;
1802               break;
1803             case TOK_GT:
1804               op=TOK_Nset;
1805               break;
1806           }
1807         }
1808         vswap();
1809         r=fpr(gv(RC_FLOAT));
1810         vswap();
1811         if(c2) {
1812           if(c2>0xf)
1813             x|=0x200000;
1814           r2=c2&0xf;
1815         } else {
1816           r2=fpr(gv(RC_FLOAT));
1817         }
1818         vtop[-1].r = VT_CMP;
1819         vtop[-1].c.i = op;
1820       } else {
1821         tcc_error("unknown fp op %x!",op);
1822         return;
1823       }
1824   }
1825   if(vtop[-1].r == VT_CMP)
1826     c1=15;
1827   else {
1828     c1=vtop->r;
1829     if(r2&0x8)
1830       c1=vtop[-1].r;
1831     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1832     c1=fpr(vtop[-1].r);
1833   }
1834   vtop--;
1835   o(x|(r<<16)|(c1<<12)|r2);
1836 }
1837 #endif
1838
1839 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1840    and 'long long' cases. */
1841 ST_FUNC void gen_cvt_itof1(int t)
1842 {
1843   uint32_t r, r2;
1844   int bt;
1845   bt=vtop->type.t & VT_BTYPE;
1846   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1847 #ifndef TCC_ARM_VFP
1848     uint32_t dsize = 0;
1849 #endif
1850     r=intr(gv(RC_INT));
1851 #ifdef TCC_ARM_VFP
1852     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1853     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1854     r2|=r2<<12;
1855     if(!(vtop->type.t & VT_UNSIGNED))
1856       r2|=0x80;                /* fuitoX -> fsituX */
1857     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1858 #else
1859     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1860     if((t & VT_BTYPE) != VT_FLOAT)
1861       dsize=0x80;    /* flts -> fltd */
1862     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1863     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1864       uint32_t off = 0;
1865       o(0xE3500000|(r<<12));        /* cmp */
1866       r=fpr(get_reg(RC_FLOAT));
1867       if(last_itod_magic) {
1868         off=ind+8-last_itod_magic;
1869         off/=4;
1870         if(off>255)
1871           off=0;
1872       }
1873       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1874       if(!off) {
1875         o(0xEA000000);              /* b */
1876         last_itod_magic=ind;
1877         o(0x4F800000);              /* 4294967296.0f */
1878       }
1879       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1880     }
1881 #endif
1882     return;
1883   } else if(bt == VT_LLONG) {
1884     int func;
1885     CType *func_type = 0;
1886     if((t & VT_BTYPE) == VT_FLOAT) {
1887       func_type = &func_float_type;
1888       if(vtop->type.t & VT_UNSIGNED)
1889         func=TOK___floatundisf;
1890       else
1891         func=TOK___floatdisf;
1892 #if LDOUBLE_SIZE != 8
1893     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1894       func_type = &func_ldouble_type;
1895       if(vtop->type.t & VT_UNSIGNED)
1896         func=TOK___floatundixf;
1897       else
1898         func=TOK___floatdixf;
1899     } else if((t & VT_BTYPE) == VT_DOUBLE) {
1900 #else
1901     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1902 #endif
1903       func_type = &func_double_type;
1904       if(vtop->type.t & VT_UNSIGNED)
1905         func=TOK___floatundidf;
1906       else
1907         func=TOK___floatdidf;
1908     }
1909     if(func_type) {
1910       vpush_global_sym(func_type, func);
1911       vswap();
1912       gfunc_call(1);
1913       vpushi(0);
1914       vtop->r=TREG_F0;
1915       return;
1916     }
1917   }
1918   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1919 }
1920
1921 /* convert fp to int 't' type */
1922 void gen_cvt_ftoi(int t)
1923 {
1924   uint32_t r, r2;
1925   int u, func = 0;
1926   u=t&VT_UNSIGNED;
1927   t&=VT_BTYPE;
1928   r2=vtop->type.t & VT_BTYPE;
1929   if(t==VT_INT) {
1930 #ifdef TCC_ARM_VFP
1931     r=vfpr(gv(RC_FLOAT));
1932     u=u?0:0x10000;
1933     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
1934     r2=intr(vtop->r=get_reg(RC_INT));
1935     o(0xEE100A10|(r<<16)|(r2<<12));
1936     return;
1937 #else
1938     if(u) {
1939       if(r2 == VT_FLOAT)
1940         func=TOK___fixunssfsi;
1941 #if LDOUBLE_SIZE != 8
1942       else if(r2 == VT_LDOUBLE)
1943         func=TOK___fixunsxfsi;
1944       else if(r2 == VT_DOUBLE)
1945 #else
1946       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1947 #endif
1948         func=TOK___fixunsdfsi;
1949     } else {
1950       r=fpr(gv(RC_FLOAT));
1951       r2=intr(vtop->r=get_reg(RC_INT));
1952       o(0xEE100170|(r2<<12)|r);
1953       return;
1954     }
1955 #endif
1956   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
1957     if(r2 == VT_FLOAT)
1958       func=TOK___fixsfdi;
1959 #if LDOUBLE_SIZE != 8
1960     else if(r2 == VT_LDOUBLE)
1961       func=TOK___fixxfdi;
1962     else if(r2 == VT_DOUBLE)
1963 #else
1964     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1965 #endif
1966       func=TOK___fixdfdi;
1967   }
1968   if(func) {
1969     vpush_global_sym(&func_old_type, func);
1970     vswap();
1971     gfunc_call(1);
1972     vpushi(0);
1973     if(t == VT_LLONG)
1974       vtop->r2 = REG_LRET;
1975     vtop->r = REG_IRET;
1976     return;
1977   }
1978   tcc_error("unimplemented gen_cvt_ftoi!");
1979 }
1980
1981 /* convert from one floating point type to another */
1982 void gen_cvt_ftof(int t)
1983 {
1984 #ifdef TCC_ARM_VFP
1985   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
1986     uint32_t r = vfpr(gv(RC_FLOAT));
1987     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
1988   }
1989 #else
1990   /* all we have to do on i386 and FPA ARM is to put the float in a register */
1991   gv(RC_FLOAT);
1992 #endif
1993 }
1994
1995 /* computed goto support */
1996 void ggoto(void)
1997 {
1998   gcall_or_jmp(1);
1999   vtop--;
2000 }
2001
2002 /* end of ARM code generator */
2003 /*************************************************************/
2004 #endif
2005 /*************************************************************/