arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *
   6  *  Based on i386-gen.c by Fabrice Bellard
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 #ifdef TARGET_DEFS_ONLY
  24
  25 #ifdef TCC_ARM_EABI
  26 #ifndef TCC_ARM_VFP // Avoid useless warning
  27 #define TCC_ARM_VFP
  28 #endif
  29 #endif
  30
  31 /* number of available registers */
  32 #ifdef TCC_ARM_VFP
  33 #define NB_REGS            13
  34 #else
  35 #define NB_REGS             9
  36 #endif
  37
  38 /* a register can belong to several classes. The classes must be
  39    sorted from more general to more precise (see gv2() code which does
  40    assumptions on it). */
  41 #define RC_INT     0x0001 /* generic integer register */
  42 #define RC_FLOAT   0x0002 /* generic float register */
  43 #define RC_R0      0x0004
  44 #define RC_R1      0x0008
  45 #define RC_R2      0x0010
  46 #define RC_R3      0x0020
  47 #define RC_R12     0x0040
  48 #define RC_F0      0x0080
  49 #define RC_F1      0x0100
  50 #define RC_F2      0x0200
  51 #define RC_F3      0x0400
  52 #ifdef TCC_ARM_VFP
  53 #define RC_F4      0x0800
  54 #define RC_F5      0x1000
  55 #define RC_F6      0x2000
  56 #define RC_F7      0x4000
  57 #endif
  58 #define RC_IRET    RC_R0  /* function return: integer register */
  59 #define RC_LRET    RC_R1  /* function return: second integer register */
  60 #define RC_FRET    RC_F0  /* function return: float register */
  61
  62 /* pretty names for the registers */
  63 enum {
  64     TREG_R0 = 0,
  65     TREG_R1,
  66     TREG_R2,
  67     TREG_R3,
  68     TREG_R12,
  69     TREG_F0,
  70     TREG_F1,
  71     TREG_F2,
  72     TREG_F3,
  73 #ifdef TCC_ARM_VFP
  74     TREG_F4,
  75     TREG_F5,
  76     TREG_F6,
  77     TREG_F7,
  78 #endif
  79 };
  80
  81 #ifdef TCC_ARM_VFP
  82 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  83 #endif
  84
  85 /* return registers for function */
  86 #define REG_IRET TREG_R0 /* single word int return register */
  87 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  88 #define REG_FRET TREG_F0 /* float return register */
  89
  90 #ifdef TCC_ARM_EABI
  91 #define TOK___divdi3 TOK___aeabi_ldivmod
  92 #define TOK___moddi3 TOK___aeabi_ldivmod
  93 #define TOK___udivdi3 TOK___aeabi_uldivmod
  94 #define TOK___umoddi3 TOK___aeabi_uldivmod
  95 #endif
  96
  97 /* defined if function parameters must be evaluated in reverse order */
  98 #define INVERT_FUNC_PARAMS
  99
 100 /* defined if structures are passed as pointers. Otherwise structures
 101    are directly pushed on stack. */
 102 //#define FUNC_STRUCT_PARAM_AS_PTR
 103
 104 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 105 ST_DATA CType float_type, double_type, func_float_type, func_double_type;
 106 #define func_ldouble_type func_double_type
 107 #else
 108 #define func_float_type func_old_type
 109 #define func_double_type func_old_type
 110 #define func_ldouble_type func_old_type
 111 #endif
 112
 113 /* pointer size, in bytes */
 114 #define PTR_SIZE 4
 115
 116 /* long double size and alignment, in bytes */
 117 #ifdef TCC_ARM_VFP
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifndef LDOUBLE_SIZE
 122 #define LDOUBLE_SIZE  8
 123 #endif
 124
 125 #ifdef TCC_ARM_EABI
 126 #define LDOUBLE_ALIGN 8
 127 #else
 128 #define LDOUBLE_ALIGN 4
 129 #endif
 130
 131 /* maximum alignment (for aligned attribute support) */
 132 #define MAX_ALIGN     8
 133
 134 #define CHAR_IS_UNSIGNED
 135
 136 /******************************************************/
 137 /* ELF defines */
 138
 139 #define EM_TCC_TARGET EM_ARM
 140
 141 /* relocation type for 32 bit data relocation */
 142 #define R_DATA_32   R_ARM_ABS32
 143 #define R_DATA_PTR  R_ARM_ABS32
 144 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 145 #define R_COPY      R_ARM_COPY
 146
 147 #define ELF_START_ADDR 0x00008000
 148 #define ELF_PAGE_SIZE  0x1000
 149
 150 /******************************************************/
 151 #else /* ! TARGET_DEFS_ONLY */
 152 /******************************************************/
 153 #include "tcc.h"
 154
 155 ST_DATA const int reg_classes[NB_REGS] = {
 156     /* r0 */ RC_INT | RC_R0,
 157     /* r1 */ RC_INT | RC_R1,
 158     /* r2 */ RC_INT | RC_R2,
 159     /* r3 */ RC_INT | RC_R3,
 160     /* r12 */ RC_INT | RC_R12,
 161     /* f0 */ RC_FLOAT | RC_F0,
 162     /* f1 */ RC_FLOAT | RC_F1,
 163     /* f2 */ RC_FLOAT | RC_F2,
 164     /* f3 */ RC_FLOAT | RC_F3,
 165 #ifdef TCC_ARM_VFP
 166  /* d4/s8 */ RC_FLOAT | RC_F4,
 167 /* d5/s10 */ RC_FLOAT | RC_F5,
 168 /* d6/s12 */ RC_FLOAT | RC_F6,
 169 /* d7/s14 */ RC_FLOAT | RC_F7,
 170 #endif
 171 };
 172
 173 /* keep in sync with line 104 above */
 174 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 175 ST_DATA CType float_type, double_type, func_float_type, func_double_type;
 176 #endif
 177
 178 static int func_sub_sp_offset, last_itod_magic;
 179 static int leaffunc;
 180
 181 static int two2mask(int a,int b) {
 182   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 183 }
 184
 185 static int regmask(int r) {
 186   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 187 }
 188
 189 /******************************************************/
 190
 191 void o(uint32_t i)
 192 {
 193   /* this is a good place to start adding big-endian support*/
 194   int ind1;
 195
 196   ind1 = ind + 4;
 197   if (!cur_text_section)
 198     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 199          "can't evaluate constant expressions outside of a function.");
 200   if (ind1 > cur_text_section->data_allocated)
 201     section_realloc(cur_text_section, ind1);
 202   cur_text_section->data[ind++] = i&255;
 203   i>>=8;
 204   cur_text_section->data[ind++] = i&255;
 205   i>>=8;
 206   cur_text_section->data[ind++] = i&255;
 207   i>>=8;
 208   cur_text_section->data[ind++] = i;
 209 }
 210
 211 static uint32_t stuff_const(uint32_t op, uint32_t c)
 212 {
 213   int try_neg=0;
 214   uint32_t nc = 0, negop = 0;
 215
 216   switch(op&0x1F00000)
 217   {
 218     case 0x800000: //add
 219     case 0x400000: //sub
 220       try_neg=1;
 221       negop=op^0xC00000;
 222       nc=-c;
 223       break;
 224     case 0x1A00000: //mov
 225     case 0x1E00000: //mvn
 226       try_neg=1;
 227       negop=op^0x400000;
 228       nc=~c;
 229       break;
 230     case 0x200000: //xor
 231       if(c==~0)
 232         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 233       break;
 234     case 0x0: //and
 235       if(c==~0)
 236         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 237     case 0x1C00000: //bic
 238       try_neg=1;
 239       negop=op^0x1C00000;
 240       nc=~c;
 241       break;
 242     case 0x1800000: //orr
 243       if(c==~0)
 244         return (op&0xFFF0FFFF)|0x1E00000;
 245       break;
 246   }
 247   do {
 248     uint32_t m;
 249     int i;
 250     if(c<256) /* catch undefined <<32 */
 251       return op|c;
 252     for(i=2;i<32;i+=2) {
 253       m=(0xff>>i)|(0xff<<(32-i));
 254       if(!(c&~m))
 255         return op|(i<<7)|(c<<i)|(c>>(32-i));
 256     }
 257     op=negop;
 258     c=nc;
 259   } while(try_neg--);
 260   return 0;
 261 }
 262
 263
 264 //only add,sub
 265 void stuff_const_harder(uint32_t op, uint32_t v) {
 266   uint32_t x;
 267   x=stuff_const(op,v);
 268   if(x)
 269     o(x);
 270   else {
 271     uint32_t a[16], nv, no, o2, n2;
 272     int i,j,k;
 273     a[0]=0xff;
 274     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 275     for(i=1;i<16;i++)
 276       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 277     for(i=0;i<12;i++)
 278       for(j=i<4?i+12:15;j>=i+4;j--)
 279         if((v&(a[i]|a[j]))==v) {
 280           o(stuff_const(op,v&a[i]));
 281           o(stuff_const(o2,v&a[j]));
 282           return;
 283         }
 284     no=op^0xC00000;
 285     n2=o2^0xC00000;
 286     nv=-v;
 287     for(i=0;i<12;i++)
 288       for(j=i<4?i+12:15;j>=i+4;j--)
 289         if((nv&(a[i]|a[j]))==nv) {
 290           o(stuff_const(no,nv&a[i]));
 291           o(stuff_const(n2,nv&a[j]));
 292           return;
 293         }
 294     for(i=0;i<8;i++)
 295       for(j=i+4;j<12;j++)
 296         for(k=i<4?i+12:15;k>=j+4;k--)
 297           if((v&(a[i]|a[j]|a[k]))==v) {
 298             o(stuff_const(op,v&a[i]));
 299             o(stuff_const(o2,v&a[j]));
 300             o(stuff_const(o2,v&a[k]));
 301             return;
 302           }
 303     no=op^0xC00000;
 304     nv=-v;
 305     for(i=0;i<8;i++)
 306       for(j=i+4;j<12;j++)
 307         for(k=i<4?i+12:15;k>=j+4;k--)
 308           if((nv&(a[i]|a[j]|a[k]))==nv) {
 309             o(stuff_const(no,nv&a[i]));
 310             o(stuff_const(n2,nv&a[j]));
 311             o(stuff_const(n2,nv&a[k]));
 312             return;
 313           }
 314     o(stuff_const(op,v&a[0]));
 315     o(stuff_const(o2,v&a[4]));
 316     o(stuff_const(o2,v&a[8]));
 317     o(stuff_const(o2,v&a[12]));
 318   }
 319 }
 320
 321 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 322 {
 323   addr-=pos+8;
 324   addr/=4;
 325   if(addr>=0x1000000 || addr<-0x1000000) {
 326     if(fail)
 327       tcc_error("FIXME: function bigger than 32MB");
 328     return 0;
 329   }
 330   return 0x0A000000|(addr&0xffffff);
 331 }
 332
 333 int decbranch(int pos)
 334 {
 335   int x;
 336   x=*(uint32_t *)(cur_text_section->data + pos);
 337   x&=0x00ffffff;
 338   if(x&0x800000)
 339     x-=0x1000000;
 340   return x*4+pos+8;
 341 }
 342
 343 /* output a symbol and patch all calls to it */
 344 void gsym_addr(int t, int a)
 345 {
 346   uint32_t *x;
 347   int lt;
 348   while(t) {
 349     x=(uint32_t *)(cur_text_section->data + t);
 350     t=decbranch(lt=t);
 351     if(a==lt+4)
 352       *x=0xE1A00000; // nop
 353     else {
 354       *x &= 0xff000000;
 355       *x |= encbranch(lt,a,1);
 356     }
 357   }
 358 }
 359
 360 void gsym(int t)
 361 {
 362   gsym_addr(t, ind);
 363 }
 364
 365 #ifdef TCC_ARM_VFP
 366 static uint32_t vfpr(int r)
 367 {
 368   if(r<TREG_F0 || r>TREG_F7)
 369     tcc_error("compiler error! register %i is no vfp register",r);
 370   return r-5;
 371 }
 372 #else
 373 static uint32_t fpr(int r)
 374 {
 375   if(r<TREG_F0 || r>TREG_F3)
 376     tcc_error("compiler error! register %i is no fpa register",r);
 377   return r-5;
 378 }
 379 #endif
 380
 381 static uint32_t intr(int r)
 382 {
 383   if(r==4)
 384     return 12;
 385   if((r<0 || r>4) && r!=14)
 386     tcc_error("compiler error! register %i is no int register",r);
 387   return r;
 388 }
 389
 390 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 391 {
 392   if(*off>maxoff || *off&((1<<shift)-1)) {
 393     uint32_t x, y;
 394     x=0xE280E000;
 395     if(*sgn)
 396       x=0xE240E000;
 397     x|=(*base)<<16;
 398     *base=14; // lr
 399     y=stuff_const(x,*off&~maxoff);
 400     if(y) {
 401       o(y);
 402       *off&=maxoff;
 403       return;
 404     }
 405     y=stuff_const(x,(*off+maxoff)&~maxoff);
 406     if(y) {
 407       o(y);
 408       *sgn=!*sgn;
 409       *off=((*off+maxoff)&~maxoff)-*off;
 410       return;
 411     }
 412     stuff_const_harder(x,*off&~maxoff);
 413     *off&=maxoff;
 414   }
 415 }
 416
 417 static uint32_t mapcc(int cc)
 418 {
 419   switch(cc)
 420   {
 421     case TOK_ULT:
 422       return 0x30000000; /* CC/LO */
 423     case TOK_UGE:
 424       return 0x20000000; /* CS/HS */
 425     case TOK_EQ:
 426       return 0x00000000; /* EQ */
 427     case TOK_NE:
 428       return 0x10000000; /* NE */
 429     case TOK_ULE:
 430       return 0x90000000; /* LS */
 431     case TOK_UGT:
 432       return 0x80000000; /* HI */
 433     case TOK_Nset:
 434       return 0x40000000; /* MI */
 435     case TOK_Nclear:
 436       return 0x50000000; /* PL */
 437     case TOK_LT:
 438       return 0xB0000000; /* LT */
 439     case TOK_GE:
 440       return 0xA0000000; /* GE */
 441     case TOK_LE:
 442       return 0xD0000000; /* LE */
 443     case TOK_GT:
 444       return 0xC0000000; /* GT */
 445   }
 446   tcc_error("unexpected condition code");
 447   return 0xE0000000; /* AL */
 448 }
 449
 450 static int negcc(int cc)
 451 {
 452   switch(cc)
 453   {
 454     case TOK_ULT:
 455       return TOK_UGE;
 456     case TOK_UGE:
 457       return TOK_ULT;
 458     case TOK_EQ:
 459       return TOK_NE;
 460     case TOK_NE:
 461       return TOK_EQ;
 462     case TOK_ULE:
 463       return TOK_UGT;
 464     case TOK_UGT:
 465       return TOK_ULE;
 466     case TOK_Nset:
 467       return TOK_Nclear;
 468     case TOK_Nclear:
 469       return TOK_Nset;
 470     case TOK_LT:
 471       return TOK_GE;
 472     case TOK_GE:
 473       return TOK_LT;
 474     case TOK_LE:
 475       return TOK_GT;
 476     case TOK_GT:
 477       return TOK_LE;
 478   }
 479   tcc_error("unexpected condition code");
 480   return TOK_NE;
 481 }
 482
 483 /* load 'r' from value 'sv' */
 484 void load(int r, SValue *sv)
 485 {
 486   int v, ft, fc, fr, sign;
 487   uint32_t op;
 488   SValue v1;
 489
 490   fr = sv->r;
 491   ft = sv->type.t;
 492   fc = sv->c.ul;
 493
 494   if(fc>=0)
 495     sign=0;
 496   else {
 497     sign=1;
 498     fc=-fc;
 499   }
 500
 501   v = fr & VT_VALMASK;
 502   if (fr & VT_LVAL) {
 503     uint32_t base = 0xB; // fp
 504     if(v == VT_LLOCAL) {
 505       v1.type.t = VT_PTR;
 506       v1.r = VT_LOCAL | VT_LVAL;
 507       v1.c.ul = sv->c.ul;
 508       load(base=14 /* lr */, &v1);
 509       fc=sign=0;
 510       v=VT_LOCAL;
 511     } else if(v == VT_CONST) {
 512       v1.type.t = VT_PTR;
 513       v1.r = fr&~VT_LVAL;
 514       v1.c.ul = sv->c.ul;
 515       v1.sym=sv->sym;
 516       load(base=14, &v1);
 517       fc=sign=0;
 518       v=VT_LOCAL;
 519     } else if(v < VT_CONST) {
 520       base=intr(v);
 521       fc=sign=0;
 522       v=VT_LOCAL;
 523     }
 524     if(v == VT_LOCAL) {
 525       if(is_float(ft)) {
 526         calcaddr(&base,&fc,&sign,1020,2);
 527 #ifdef TCC_ARM_VFP
 528         op=0xED100A00; /* flds */
 529         if(!sign)
 530           op|=0x800000;
 531         if ((ft & VT_BTYPE) != VT_FLOAT)
 532           op|=0x100;   /* flds -> fldd */
 533         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 534 #else
 535         op=0xED100100;
 536         if(!sign)
 537           op|=0x800000;
 538 #if LDOUBLE_SIZE == 8
 539         if ((ft & VT_BTYPE) != VT_FLOAT)
 540           op|=0x8000;
 541 #else
 542         if ((ft & VT_BTYPE) == VT_DOUBLE)
 543           op|=0x8000;
 544         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 545           op|=0x400000;
 546 #endif
 547         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 548 #endif
 549       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 550                 || (ft & VT_BTYPE) == VT_SHORT) {
 551         calcaddr(&base,&fc,&sign,255,0);
 552         op=0xE1500090;
 553         if ((ft & VT_BTYPE) == VT_SHORT)
 554           op|=0x20;
 555         if ((ft & VT_UNSIGNED) == 0)
 556           op|=0x40;
 557         if(!sign)
 558           op|=0x800000;
 559         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 560       } else {
 561         calcaddr(&base,&fc,&sign,4095,0);
 562         op=0xE5100000;
 563         if(!sign)
 564           op|=0x800000;
 565         if ((ft & VT_BTYPE) == VT_BYTE)
 566           op|=0x400000;
 567         o(op|(intr(r)<<12)|fc|(base<<16));
 568       }
 569       return;
 570     }
 571   } else {
 572     if (v == VT_CONST) {
 573       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
 574       if (fr & VT_SYM || !op) {
 575         o(0xE59F0000|(intr(r)<<12));
 576         o(0xEA000000);
 577         if(fr & VT_SYM)
 578           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 579         o(sv->c.ul);
 580       } else
 581         o(op);
 582       return;
 583     } else if (v == VT_LOCAL) {
 584       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
 585       if (fr & VT_SYM || !op) {
 586         o(0xE59F0000|(intr(r)<<12));
 587         o(0xEA000000);
 588         if(fr & VT_SYM) // needed ?
 589           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 590         o(sv->c.ul);
 591         o(0xE08B0000|(intr(r)<<12)|intr(r));
 592       } else
 593         o(op);
 594       return;
 595     } else if(v == VT_CMP) {
 596       o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
 597       o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
 598       return;
 599     } else if (v == VT_JMP || v == VT_JMPI) {
 600       int t;
 601       t = v & 1;
 602       o(0xE3A00000|(intr(r)<<12)|t);
 603       o(0xEA000000);
 604       gsym(sv->c.ul);
 605       o(0xE3A00000|(intr(r)<<12)|(t^1));
 606       return;
 607     } else if (v < VT_CONST) {
 608       if(is_float(ft))
 609 #ifdef TCC_ARM_VFP
 610         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 611 #else
 612         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 613 #endif
 614       else
 615         o(0xE1A00000|(intr(r)<<12)|intr(v));
 616       return;
 617     }
 618   }
 619   tcc_error("load unimplemented!");
 620 }
 621
 622 /* store register 'r' in lvalue 'v' */
 623 void store(int r, SValue *sv)
 624 {
 625   SValue v1;
 626   int v, ft, fc, fr, sign;
 627   uint32_t op;
 628
 629   fr = sv->r;
 630   ft = sv->type.t;
 631   fc = sv->c.ul;
 632
 633   if(fc>=0)
 634     sign=0;
 635   else {
 636     sign=1;
 637     fc=-fc;
 638   }
 639
 640   v = fr & VT_VALMASK;
 641   if (fr & VT_LVAL || fr == VT_LOCAL) {
 642     uint32_t base = 0xb;
 643     if(v < VT_CONST) {
 644       base=intr(v);
 645       v=VT_LOCAL;
 646       fc=sign=0;
 647     } else if(v == VT_CONST) {
 648       v1.type.t = ft;
 649       v1.r = fr&~VT_LVAL;
 650       v1.c.ul = sv->c.ul;
 651       v1.sym=sv->sym;
 652       load(base=14, &v1);
 653       fc=sign=0;
 654       v=VT_LOCAL;
 655     }
 656     if(v == VT_LOCAL) {
 657        if(is_float(ft)) {
 658         calcaddr(&base,&fc,&sign,1020,2);
 659 #ifdef TCC_ARM_VFP
 660         op=0xED000A00; /* fsts */
 661         if(!sign)
 662           op|=0x800000;
 663         if ((ft & VT_BTYPE) != VT_FLOAT)
 664           op|=0x100;   /* fsts -> fstd */
 665         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 666 #else
 667         op=0xED000100;
 668         if(!sign)
 669           op|=0x800000;
 670 #if LDOUBLE_SIZE == 8
 671         if ((ft & VT_BTYPE) != VT_FLOAT)
 672           op|=0x8000;
 673 #else
 674         if ((ft & VT_BTYPE) == VT_DOUBLE)
 675           op|=0x8000;
 676         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 677           op|=0x400000;
 678 #endif
 679         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 680 #endif
 681         return;
 682       } else if((ft & VT_BTYPE) == VT_SHORT) {
 683         calcaddr(&base,&fc,&sign,255,0);
 684         op=0xE14000B0;
 685         if(!sign)
 686           op|=0x800000;
 687         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 688       } else {
 689         calcaddr(&base,&fc,&sign,4095,0);
 690         op=0xE5000000;
 691         if(!sign)
 692           op|=0x800000;
 693         if ((ft & VT_BTYPE) == VT_BYTE)
 694           op|=0x400000;
 695         o(op|(intr(r)<<12)|fc|(base<<16));
 696       }
 697       return;
 698     }
 699   }
 700   tcc_error("store unimplemented");
 701 }
 702
 703 static void gadd_sp(int val)
 704 {
 705   stuff_const_harder(0xE28DD000,val);
 706 }
 707
 708 /* 'is_jmp' is '1' if it is a jump */
 709 static void gcall_or_jmp(int is_jmp)
 710 {
 711   int r;
 712   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 713     uint32_t x;
 714     /* constant case */
 715     x=encbranch(ind,ind+vtop->c.ul,0);
 716     if(x) {
 717       if (vtop->r & VT_SYM) {
 718         /* relocation case */
 719         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 720       } else
 721         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 722       o(x|(is_jmp?0xE0000000:0xE1000000));
 723     } else {
 724       if(!is_jmp)
 725         o(0xE28FE004); // add lr,pc,#4
 726       o(0xE51FF004);   // ldr pc,[pc,#-4]
 727       if (vtop->r & VT_SYM)
 728         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 729       o(vtop->c.ul);
 730     }
 731   } else {
 732     /* otherwise, indirect call */
 733     r = gv(RC_INT);
 734     if(!is_jmp)
 735       o(0xE1A0E00F);       // mov lr,pc
 736     o(0xE1A0F000|intr(r)); // mov pc,r
 737   }
 738 }
 739
 740 #ifdef TCC_ARM_HARDFLOAT
 741 static int is_float_hgen_aggr(CType *type)
 742 {
 743   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 744     struct Sym *ref;
 745     int btype, nb_fields = 0;
 746
 747     ref = type->ref;
 748     btype = ref->type.t & VT_BTYPE;
 749     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 750       for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 751       return !ref && nb_fields <= 4;
 752     }
 753   }
 754   return 0;
 755 }
 756
 757 struct avail_regs {
 758   /* worst case: f(float, double, 3 float struct, double, 3 float struct, double) */
 759   signed char avail[3];
 760   int first_hole;
 761   int last_hole;
 762   int first_free_reg;
 763 };
 764
 765 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 766
 767 /* Assign a register for a CPRC param with correct size and alignment
 768  * size and align are in bytes, as returned by type_size */
 769 int assign_fpreg(struct avail_regs *avregs, int align, int size)
 770 {
 771   int first_reg = 0;
 772
 773   if (avregs->first_free_reg == -1)
 774     return -1;
 775   if (align >> 3) { // alignment needed (base type: double)
 776     first_reg = avregs->first_free_reg;
 777     if (first_reg & 1)
 778       avregs->avail[avregs->last_hole++] = first_reg++;
 779   } else {
 780     if (size == 4 && avregs->first_hole != avregs->last_hole)
 781       return avregs->avail[avregs->first_hole++];
 782     else
 783       first_reg = avregs->first_free_reg;
 784   }
 785   if (first_reg + size / 4 <= 16) {
 786     avregs->first_free_reg = first_reg + size / 4;
 787     return first_reg;
 788   }
 789   avregs->first_free_reg = -1;
 790   return -1;
 791 }
 792 #endif
 793
 794 /* Generate function call. The function address is pushed first, then
 795    all the parameters in call order. This functions pops all the
 796    parameters and the function address. */
 797 void gfunc_call(int nb_args)
 798 {
 799   int size, align, r, args_size, i, ncrn, ncprn, argno, vfp_argno;
 800   signed char plan[4][2]={{-1,-1},{-1,-1},{-1,-1},{-1,-1}};
 801   SValue *before_stack = NULL; /* SValue before first on stack argument */
 802   SValue *before_vfpreg_hfa = NULL; /* SValue before first in VFP reg hfa argument */
 803 #ifdef TCC_ARM_HARDFLOAT
 804   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 805   signed char vfp_plan[16];
 806   int plan2[4+16];
 807   int variadic;
 808 #else
 809   int plan2[4]={0,0,0,0};
 810 #endif
 811   int vfp_todo=0;
 812   int todo=0, keep;
 813
 814 #ifdef TCC_ARM_HARDFLOAT
 815   memset(vfp_plan, -1, sizeof(vfp_plan));
 816   memset(plan2, 0, sizeof(plan2));
 817   variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
 818 #endif
 819   r = vtop->r & VT_VALMASK;
 820   if (r == VT_CMP || (r & ~1) == VT_JMP)
 821     gv(RC_INT);
 822 #ifdef TCC_ARM_EABI
 823   if((vtop[-nb_args].type.ref->type.t & VT_BTYPE) == VT_STRUCT
 824      && type_size(&vtop[-nb_args].type.ref->type, &align) <= 4) {
 825     SValue tmp;
 826     tmp=vtop[-nb_args];
 827     vtop[-nb_args]=vtop[-nb_args+1];
 828     vtop[-nb_args+1]=tmp;
 829     --nb_args;
 830   }
 831
 832   vpushi(0), nb_args++;
 833   vtop->type.t = VT_LLONG;
 834   args_size = 0;
 835 #endif
 836   ncrn = ncprn = argno = vfp_argno = 0;
 837   /* Assign argument to registers and stack with alignment.
 838      If, considering alignment constraints, enough registers of the correct type
 839      (core or VFP) are free for the current argument, assign them to it, else
 840      allocate on stack with correct alignment. Whenever a structure is allocated
 841      in registers or on stack, it is always put on the stack at this stage. The
 842      stack is divided in 3 zones. The zone are, from low addresses to high
 843      addresses: structures to be loaded in core registers, structures to be
 844      loaded in VFP registers, argument allocated to stack. SValue's representing
 845      structures in the first zone are moved just after the SValue pointed by
 846      before_vfpreg_hfa. SValue's representing structures in the second zone are
 847      moved just after the SValue pointer by before_stack. */
 848   for(i = nb_args; i-- ;) {
 849     int j, assigned_vfpreg = 0;
 850     size = type_size(&vtop[-i].type, &align);
 851     switch(vtop[-i].type.t & VT_BTYPE) {
 852       case VT_STRUCT:
 853       case VT_FLOAT:
 854       case VT_DOUBLE:
 855       case VT_LDOUBLE:
 856 #ifdef TCC_ARM_HARDFLOAT
 857       if (!variadic) {
 858         int hfa = 0; /* Homogeneous float aggregate */
 859
 860         if (is_float(vtop[-i].type.t)
 861             || (hfa = is_float_hgen_aggr(&vtop[-i].type))) {
 862           int end_reg;
 863
 864           assigned_vfpreg = assign_fpreg(&avregs, align, size);
 865           end_reg = assigned_vfpreg + (size - 1) / 4;
 866           if (assigned_vfpreg >= 0) {
 867             vfp_plan[vfp_argno++]=TREG_F0 + assigned_vfpreg/2;
 868             if (hfa) {
 869               /* before_stack can only have been set because all core registers
 870                  are assigned, so no need to care about before_vfpreg_hfa if
 871                  before_stack is set */
 872               if (before_stack) {
 873                 vrote(&vtop[-i], &vtop[-i] - before_stack);
 874                 before_stack++;
 875               } else if (!before_vfpreg_hfa)
 876                 before_vfpreg_hfa = &vtop[-i-1];
 877               for (j = assigned_vfpreg; j <= end_reg; j++)
 878                 vfp_todo|=(1<<j);
 879             }
 880             continue;
 881           } else {
 882             if (!hfa)
 883               vfp_argno++;
 884             /* No need to update before_stack as no more hfa can be allocated in
 885                VFP regs */
 886             if (!before_vfpreg_hfa)
 887               before_vfpreg_hfa = &vtop[-i-1];
 888             break;
 889           }
 890         }
 891       }
 892 #endif
 893       ncrn = (ncrn + (align-1)/4) & -(align/4);
 894       size = (size + 3) & -4;
 895       if (ncrn + size/4 <= 4 || (ncrn < 4 && assigned_vfpreg != -1)) {
 896         /* Either there is HFA in VFP registers, or there is arguments on stack,
 897            it cannot be both. Hence either before_stack already points after
 898            the slot where the vtop[-i] SValue is moved, or before_stack will not
 899            be used */
 900         if (before_vfpreg_hfa) {
 901           vrote(&vtop[-i], &vtop[-i] - before_vfpreg_hfa);
 902           before_vfpreg_hfa++;
 903         }
 904         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
 905           todo|=(1<<j);
 906         ncrn+=size/4;
 907         if (ncrn > 4) {
 908           args_size = (ncrn - 4) * 4;
 909           if (!before_stack)
 910             before_stack = &vtop[-i-1];
 911         }
 912       }
 913       else {
 914         ncrn = 4;
 915         /* No need to set before_vfpreg_hfa if not set since there will no
 916            longer be any structure assigned to core registers */
 917         if (!before_stack)
 918           before_stack = &vtop[-i-1];
 919         break;
 920       }
 921       continue;
 922       default:
 923 #ifdef TCC_ARM_EABI
 924       if (!i) {
 925         break;
 926       }
 927 #endif
 928       if (ncrn < 4) {
 929         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
 930
 931         if (is_long) {
 932           ncrn = (ncrn + 1) & -2;
 933           if (ncrn == 4) {
 934             argno++;
 935             break;
 936           }
 937         }
 938         plan[argno++][0]=ncrn++;
 939         if (is_long) {
 940           plan[argno-1][1]=ncrn++;
 941         }
 942         continue;
 943       }
 944       argno++;
 945     }
 946 #ifdef TCC_ARM_EABI
 947     if(args_size & (align-1)) {
 948       vpushi(0);
 949       vtop->type.t = VT_VOID; /* padding */
 950       vrott(i+2);
 951       args_size += 4;
 952       nb_args++;
 953       argno++;
 954     }
 955 #endif
 956     args_size += (size + 3) & -4;
 957   }
 958 #ifdef TCC_ARM_EABI
 959   vtop--, nb_args--;
 960 #endif
 961   args_size = keep = 0;
 962   for(i = 0;i < nb_args; i++) {
 963     vrotb(keep+1);
 964     if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 965       size = type_size(&vtop->type, &align);
 966       /* align to stack align size */
 967       size = (size + 3) & -4;
 968       /* allocate the necessary size on stack */
 969       gadd_sp(-size);
 970       /* generate structure store */
 971       r = get_reg(RC_INT);
 972       o(0xE1A0000D|(intr(r)<<12));
 973       vset(&vtop->type, r | VT_LVAL, 0);
 974       vswap();
 975       vstore();
 976       vtop--;
 977       args_size += size;
 978     } else if (is_float(vtop->type.t)) {
 979 #ifdef TCC_ARM_HARDFLOAT
 980       if (!variadic && --vfp_argno<16 && vfp_plan[vfp_argno]!=-1) {
 981         plan2[keep++]=vfp_plan[vfp_argno];
 982         continue;
 983       }
 984 #endif
 985 #ifdef TCC_ARM_VFP
 986       r=vfpr(gv(RC_FLOAT))<<12;
 987       size=4;
 988       if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
 989       {
 990         size=8;
 991         r|=0x101; /* fstms -> fstmd */
 992       }
 993       o(0xED2D0A01+r);
 994 #else
 995       r=fpr(gv(RC_FLOAT))<<12;
 996       if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
 997         size = 4;
 998       else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
 999         size = 8;
1000       else
1001         size = LDOUBLE_SIZE;
1002
1003       if (size == 12)
1004         r|=0x400000;
1005       else if(size == 8)
1006         r|=0x8000;
1007
1008       o(0xED2D0100|r|(size>>2));
1009 #endif
1010       vtop--;
1011       args_size += size;
1012     } else {
1013       int s;
1014       /* simple type (currently always same size) */
1015       /* XXX: implicit cast ? */
1016       size=4;
1017       if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1018         lexpand_nr();
1019         s=-1;
1020         if(--argno<4 && plan[argno][1]!=-1)
1021           s=plan[argno][1];
1022         argno++;
1023         size = 8;
1024         if(s==-1) {
1025           r = gv(RC_INT);
1026           o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
1027           vtop--;
1028         } else {
1029           size=0;
1030           plan2[keep]=s;
1031           keep++;
1032           vswap();
1033         }
1034       }
1035       s=-1;
1036       if(--argno<4 && plan[argno][0]!=-1)
1037         s=plan[argno][0];
1038 #ifdef TCC_ARM_EABI
1039       if(vtop->type.t == VT_VOID) {
1040         if(s == -1)
1041           o(0xE24DD004); /* sub sp,sp,#4 */
1042         vtop--;
1043       } else
1044 #endif
1045       if(s == -1) {
1046         r = gv(RC_INT);
1047         o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
1048         vtop--;
1049       } else {
1050         size=0;
1051         plan2[keep]=s;
1052         keep++;
1053       }
1054       args_size += size;
1055     }
1056   }
1057   for(i = 0; i < keep; i++) {
1058     vrotb(keep);
1059     gv(regmask(plan2[i]));
1060 #ifdef TCC_ARM_HARDFLOAT
1061     /* arg is in s(2d+1): plan2[i]<plan2[i+1] => alignment occured (ex f,d,f) */
1062     if (i < keep - 1 && is_float(vtop->type.t) && (plan2[i] <= plan2[i + 1])) {
1063       o(0xEEF00A40|(vfpr(plan2[i])<<12)|vfpr(plan2[i]));
1064     }
1065 #endif
1066   }
1067 save_regs(keep); /* save used temporary registers */
1068   keep++;
1069   if(ncrn) {
1070     int nb_regs=0;
1071     if (ncrn>4)
1072       ncrn=4;
1073     todo&=((1<<ncrn)-1);
1074     if(todo) {
1075       int i;
1076       o(0xE8BD0000|todo);
1077       for(i=0;i<4;i++)
1078         if(todo&(1<<i)) {
1079           vpushi(0);
1080           vtop->r=i;
1081           keep++;
1082           nb_regs++;
1083         }
1084     }
1085     args_size-=nb_regs*4;
1086   }
1087   if(vfp_todo) {
1088     int nb_fregs=0;
1089
1090     for(i=0;i<16;i++)
1091       if(vfp_todo&(1<<i)) {
1092         o(0xED9D0A00|(i&1)<<22|(i>>1)<<12|nb_fregs);
1093         vpushi(0);
1094         /* There might be 2 floats in a double VFP reg but that doesn't seem
1095            to matter */
1096         if (!(i%2))
1097           vtop->r=TREG_F0+i/2;
1098         keep++;
1099         nb_fregs++;
1100       }
1101     if (nb_fregs) {
1102       gadd_sp(nb_fregs*4);
1103       args_size-=nb_fregs*4;
1104     }
1105   }
1106   vrotb(keep);
1107   gcall_or_jmp(0);
1108   if (args_size)
1109       gadd_sp(args_size);
1110 #ifdef TCC_ARM_EABI
1111   if((vtop->type.ref->type.t & VT_BTYPE) == VT_STRUCT
1112      && type_size(&vtop->type.ref->type, &align) <= 4)
1113   {
1114     store(REG_IRET,vtop-keep);
1115     ++keep;
1116   }
1117 #ifdef TCC_ARM_VFP
1118 #ifdef TCC_ARM_HARDFLOAT
1119   else if(variadic && is_float(vtop->type.ref->type.t)) {
1120 #else
1121   else if(is_float(vtop->type.ref->type.t)) {
1122 #endif
1123     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1124       o(0xEE000A10); /* fmsr s0,r0 */
1125     } else {
1126       o(0xEE000B10); /* fmdlr d0,r0 */
1127       o(0xEE201B10); /* fmdhr d0,r1 */
1128     }
1129   }
1130 #endif
1131 #endif
1132   vtop-=keep;
1133   leaffunc = 0;
1134 }
1135
1136 /* generate function prolog of type 't' */
1137 void gfunc_prolog(CType *func_type)
1138 {
1139   Sym *sym,*sym2;
1140   int n,nf,size,align, variadic, struct_ret = 0;
1141 #ifdef TCC_ARM_HARDFLOAT
1142   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1143 #endif
1144
1145   sym = func_type->ref;
1146   func_vt = sym->type;
1147
1148   n = nf = 0;
1149   variadic = (func_type->ref->c == FUNC_ELLIPSIS);
1150   if((func_vt.t & VT_BTYPE) == VT_STRUCT
1151      && type_size(&func_vt,&align) > 4)
1152   {
1153     n++;
1154     struct_ret = 1;
1155     func_vc = 12; /* Offset from fp of the place to store the result */
1156   }
1157   for(sym2=sym->next;sym2 && (n<4 || nf<16);sym2=sym2->next) {
1158     size = type_size(&sym2->type, &align);
1159 #ifdef TCC_ARM_HARDFLOAT
1160     if (!variadic && (is_float(sym2->type.t)
1161         || is_float_hgen_aggr(&sym2->type))) {
1162       int tmpnf = assign_fpreg(&avregs, align, size) + 1;
1163       nf = (tmpnf > nf) ? tmpnf : nf;
1164     } else
1165 #endif
1166     if (n < 4)
1167       n += (size + 3) / 4;
1168   }
1169   o(0xE1A0C00D); /* mov ip,sp */
1170   if(variadic)
1171     n=4;
1172   if(n) {
1173     if(n>4)
1174       n=4;
1175 #ifdef TCC_ARM_EABI
1176     n=(n+1)&-2;
1177 #endif
1178     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1179   }
1180   if (nf) {
1181     if (nf>16)
1182       nf=16;
1183     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1184     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1185   }
1186   o(0xE92D5800); /* save fp, ip, lr */
1187   o(0xE1A0B00D); /* mov fp, sp */
1188   func_sub_sp_offset = ind;
1189   o(0xE1A00000); /* nop, leave space for stack adjustment in epilogue */
1190   {
1191     int addr, pn = struct_ret, sn = 0; /* pn=core, sn=stack */
1192
1193 #ifdef TCC_ARM_HARDFLOAT
1194     avregs = AVAIL_REGS_INITIALIZER;
1195 #endif
1196     while ((sym = sym->next)) {
1197       CType *type;
1198       type = &sym->type;
1199       size = type_size(type, &align);
1200       size = (size + 3) >> 2;
1201 #ifdef TCC_ARM_HARDFLOAT
1202       if (!variadic && (is_float(sym->type.t)
1203           || is_float_hgen_aggr(&sym->type))) {
1204         int fpn = assign_fpreg(&avregs, align, size << 2);
1205         if (fpn >= 0) {
1206           addr = fpn * 4;
1207         } else
1208           goto from_stack;
1209       } else
1210 #endif
1211       if (pn < 4) {
1212 #ifdef TCC_ARM_EABI
1213         pn = (pn + (align-1)/4) & -(align/4);
1214 #endif
1215         addr = (nf + pn) * 4;
1216         pn += size;
1217         if (!sn && pn > 4)
1218           sn = (pn - 4);
1219       } else {
1220 #ifdef TCC_ARM_HARDFLOAT
1221 from_stack:
1222 #endif
1223 #ifdef TCC_ARM_EABI
1224         sn = (sn + (align-1)/4) & -(align/4);
1225 #endif
1226         addr = (n + nf + sn) * 4;
1227         sn += size;
1228       }
1229       sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t), addr+12);
1230     }
1231   }
1232   last_itod_magic=0;
1233   leaffunc = 1;
1234   loc = -12;
1235 }
1236
1237 /* generate function epilog */
1238 void gfunc_epilog(void)
1239 {
1240   uint32_t x;
1241   int diff;
1242 #ifdef TCC_ARM_EABI
1243   /* Useless but harmless copy of the float result into main register(s) in case
1244      of variadic function in the hardfloat variant */
1245   if(is_float(func_vt.t)) {
1246     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1247       o(0xEE100A10); /* fmrs r0, s0 */
1248     else {
1249       o(0xEE100B10); /* fmrdl r0, d0 */
1250       o(0xEE301B10); /* fmrdh r1, d0 */
1251     }
1252   }
1253 #endif
1254   o(0xE89BA800); /* restore fp, sp, pc */
1255   diff = (-loc + 3) & -4;
1256 #ifdef TCC_ARM_EABI
1257   if(!leaffunc)
1258     diff = (diff + 7) & -8;
1259 #endif
1260   if(diff > 12) {
1261     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1262     if(x)
1263       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1264     else {
1265       int addr;
1266       addr=ind;
1267       o(0xE59FC004); /* ldr ip,[pc+4] */
1268       o(0xE04BD00C); /* sub sp,fp,ip  */
1269       o(0xE1A0F00E); /* mov pc,lr */
1270       o(diff);
1271       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1272     }
1273   }
1274 }
1275
1276 /* generate a jump to a label */
1277 int gjmp(int t)
1278 {
1279   int r;
1280   r=ind;
1281   o(0xE0000000|encbranch(r,t,1));
1282   return r;
1283 }
1284
1285 /* generate a jump to a fixed address */
1286 void gjmp_addr(int a)
1287 {
1288   gjmp(a);
1289 }
1290
1291 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1292 int gtst(int inv, int t)
1293 {
1294   int v, r;
1295   uint32_t op;
1296   v = vtop->r & VT_VALMASK;
1297   r=ind;
1298   if (v == VT_CMP) {
1299     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1300     op|=encbranch(r,t,1);
1301     o(op);
1302     t=r;
1303   } else if (v == VT_JMP || v == VT_JMPI) {
1304     if ((v & 1) == inv) {
1305       if(!vtop->c.i)
1306         vtop->c.i=t;
1307       else {
1308         uint32_t *x;
1309         int p,lp;
1310         if(t) {
1311           p = vtop->c.i;
1312           do {
1313             p = decbranch(lp=p);
1314           } while(p);
1315           x = (uint32_t *)(cur_text_section->data + lp);
1316           *x &= 0xff000000;
1317           *x |= encbranch(lp,t,1);
1318         }
1319         t = vtop->c.i;
1320       }
1321     } else {
1322       t = gjmp(t);
1323       gsym(vtop->c.i);
1324     }
1325   } else {
1326     if (is_float(vtop->type.t)) {
1327       r=gv(RC_FLOAT);
1328 #ifdef TCC_ARM_VFP
1329       o(0xEEB50A40|(vfpr(r)<<12)|T2CPR(vtop->type.t)); /* fcmpzX */
1330       o(0xEEF1FA10); /* fmstat */
1331 #else
1332       o(0xEE90F118|(fpr(r)<<16));
1333 #endif
1334       vtop->r = VT_CMP;
1335       vtop->c.i = TOK_NE;
1336       return gtst(inv, t);
1337     } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1338       /* constant jmp optimization */
1339       if ((vtop->c.i != 0) != inv)
1340         t = gjmp(t);
1341     } else {
1342       v = gv(RC_INT);
1343       o(0xE3300000|(intr(v)<<16));
1344       vtop->r = VT_CMP;
1345       vtop->c.i = TOK_NE;
1346       return gtst(inv, t);
1347     }
1348   }
1349   vtop--;
1350   return t;
1351 }
1352
1353 /* generate an integer binary operation */
1354 void gen_opi(int op)
1355 {
1356   int c, func = 0;
1357   uint32_t opc = 0, r, fr;
1358   unsigned short retreg = REG_IRET;
1359
1360   c=0;
1361   switch(op) {
1362     case '+':
1363       opc = 0x8;
1364       c=1;
1365       break;
1366     case TOK_ADDC1: /* add with carry generation */
1367       opc = 0x9;
1368       c=1;
1369       break;
1370     case '-':
1371       opc = 0x4;
1372       c=1;
1373       break;
1374     case TOK_SUBC1: /* sub with carry generation */
1375       opc = 0x5;
1376       c=1;
1377       break;
1378     case TOK_ADDC2: /* add with carry use */
1379       opc = 0xA;
1380       c=1;
1381       break;
1382     case TOK_SUBC2: /* sub with carry use */
1383       opc = 0xC;
1384       c=1;
1385       break;
1386     case '&':
1387       opc = 0x0;
1388       c=1;
1389       break;
1390     case '^':
1391       opc = 0x2;
1392       c=1;
1393       break;
1394     case '|':
1395       opc = 0x18;
1396       c=1;
1397       break;
1398     case '*':
1399       gv2(RC_INT, RC_INT);
1400       r = vtop[-1].r;
1401       fr = vtop[0].r;
1402       vtop--;
1403       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1404       return;
1405     case TOK_SHL:
1406       opc = 0;
1407       c=2;
1408       break;
1409     case TOK_SHR:
1410       opc = 1;
1411       c=2;
1412       break;
1413     case TOK_SAR:
1414       opc = 2;
1415       c=2;
1416       break;
1417     case '/':
1418     case TOK_PDIV:
1419       func=TOK___divsi3;
1420       c=3;
1421       break;
1422     case TOK_UDIV:
1423       func=TOK___udivsi3;
1424       c=3;
1425       break;
1426     case '%':
1427 #ifdef TCC_ARM_EABI
1428       func=TOK___aeabi_idivmod;
1429       retreg=REG_LRET;
1430 #else
1431       func=TOK___modsi3;
1432 #endif
1433       c=3;
1434       break;
1435     case TOK_UMOD:
1436 #ifdef TCC_ARM_EABI
1437       func=TOK___aeabi_uidivmod;
1438       retreg=REG_LRET;
1439 #else
1440       func=TOK___umodsi3;
1441 #endif
1442       c=3;
1443       break;
1444     case TOK_UMULL:
1445       gv2(RC_INT, RC_INT);
1446       r=intr(vtop[-1].r2=get_reg(RC_INT));
1447       c=vtop[-1].r;
1448       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1449       vtop--;
1450       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1451       return;
1452     default:
1453       opc = 0x15;
1454       c=1;
1455       break;
1456   }
1457   switch(c) {
1458     case 1:
1459       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1460         if(opc == 4 || opc == 5 || opc == 0xc) {
1461           vswap();
1462           opc|=2; // sub -> rsb
1463         }
1464       }
1465       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1466           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1467         gv(RC_INT);
1468       vswap();
1469       c=intr(gv(RC_INT));
1470       vswap();
1471       opc=0xE0000000|(opc<<20)|(c<<16);
1472       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1473         uint32_t x;
1474         x=stuff_const(opc|0x2000000,vtop->c.i);
1475         if(x) {
1476           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1477           o(x|(r<<12));
1478           goto done;
1479         }
1480       }
1481       fr=intr(gv(RC_INT));
1482       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1483       o(opc|(r<<12)|fr);
1484 done:
1485       vtop--;
1486       if (op >= TOK_ULT && op <= TOK_GT) {
1487         vtop->r = VT_CMP;
1488         vtop->c.i = op;
1489       }
1490       break;
1491     case 2:
1492       opc=0xE1A00000|(opc<<5);
1493       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1494           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1495         gv(RC_INT);
1496       vswap();
1497       r=intr(gv(RC_INT));
1498       vswap();
1499       opc|=r;
1500       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1501         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1502         c = vtop->c.i & 0x1f;
1503         o(opc|(c<<7)|(fr<<12));
1504       } else {
1505         fr=intr(gv(RC_INT));
1506         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1507         o(opc|(c<<12)|(fr<<8)|0x10);
1508       }
1509       vtop--;
1510       break;
1511     case 3:
1512       vpush_global_sym(&func_old_type, func);
1513       vrott(3);
1514       gfunc_call(2);
1515       vpushi(0);
1516       vtop->r = retreg;
1517       break;
1518     default:
1519       tcc_error("gen_opi %i unimplemented!",op);
1520   }
1521 }
1522
1523 #ifdef TCC_ARM_VFP
1524 static int is_zero(int i)
1525 {
1526   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1527     return 0;
1528   if (vtop[i].type.t == VT_FLOAT)
1529     return (vtop[i].c.f == 0.f);
1530   else if (vtop[i].type.t == VT_DOUBLE)
1531     return (vtop[i].c.d == 0.0);
1532   return (vtop[i].c.ld == 0.l);
1533 }
1534
1535 /* generate a floating point operation 'v = t1 op t2' instruction. The
1536  *    two operands are guaranted to have the same floating point type */
1537 void gen_opf(int op)
1538 {
1539   uint32_t x;
1540   int fneg=0,r;
1541   x=0xEE000A00|T2CPR(vtop->type.t);
1542   switch(op) {
1543     case '+':
1544       if(is_zero(-1))
1545         vswap();
1546       if(is_zero(0)) {
1547         vtop--;
1548         return;
1549       }
1550       x|=0x300000;
1551       break;
1552     case '-':
1553       x|=0x300040;
1554       if(is_zero(0)) {
1555         vtop--;
1556         return;
1557       }
1558       if(is_zero(-1)) {
1559         x|=0x810000; /* fsubX -> fnegX */
1560         vswap();
1561         vtop--;
1562         fneg=1;
1563       }
1564       break;
1565     case '*':
1566       x|=0x200000;
1567       break;
1568     case '/':
1569       x|=0x800000;
1570       break;
1571     default:
1572       if(op < TOK_ULT || op > TOK_GT) {
1573         tcc_error("unknown fp op %x!",op);
1574         return;
1575       }
1576       if(is_zero(-1)) {
1577         vswap();
1578         switch(op) {
1579           case TOK_LT: op=TOK_GT; break;
1580           case TOK_GE: op=TOK_ULE; break;
1581           case TOK_LE: op=TOK_GE; break;
1582           case TOK_GT: op=TOK_ULT; break;
1583         }
1584       }
1585       x|=0xB40040; /* fcmpX */
1586       if(op!=TOK_EQ && op!=TOK_NE)
1587         x|=0x80; /* fcmpX -> fcmpeX */
1588       if(is_zero(0)) {
1589         vtop--;
1590         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1591       } else {
1592         x|=vfpr(gv(RC_FLOAT));
1593         vswap();
1594         o(x|(vfpr(gv(RC_FLOAT))<<12));
1595         vtop--;
1596       }
1597       o(0xEEF1FA10); /* fmstat */
1598
1599       switch(op) {
1600         case TOK_LE: op=TOK_ULE; break;
1601         case TOK_LT: op=TOK_ULT; break;
1602         case TOK_UGE: op=TOK_GE; break;
1603         case TOK_UGT: op=TOK_GT; break;
1604       }
1605
1606       vtop->r = VT_CMP;
1607       vtop->c.i = op;
1608       return;
1609   }
1610   r=gv(RC_FLOAT);
1611   x|=vfpr(r);
1612   r=regmask(r);
1613   if(!fneg) {
1614     int r2;
1615     vswap();
1616     r2=gv(RC_FLOAT);
1617     x|=vfpr(r2)<<16;
1618     r|=regmask(r2);
1619   }
1620   vtop->r=get_reg_ex(RC_FLOAT,r);
1621   if(!fneg)
1622     vtop--;
1623   o(x|(vfpr(vtop->r)<<12));
1624 }
1625
1626 #else
1627 static uint32_t is_fconst()
1628 {
1629   long double f;
1630   uint32_t r;
1631   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1632     return 0;
1633   if (vtop->type.t == VT_FLOAT)
1634     f = vtop->c.f;
1635   else if (vtop->type.t == VT_DOUBLE)
1636     f = vtop->c.d;
1637   else
1638     f = vtop->c.ld;
1639   if(!ieee_finite(f))
1640     return 0;
1641   r=0x8;
1642   if(f<0.0) {
1643     r=0x18;
1644     f=-f;
1645   }
1646   if(f==0.0)
1647     return r;
1648   if(f==1.0)
1649     return r|1;
1650   if(f==2.0)
1651     return r|2;
1652   if(f==3.0)
1653     return r|3;
1654   if(f==4.0)
1655     return r|4;
1656   if(f==5.0)
1657     return r|5;
1658   if(f==0.5)
1659     return r|6;
1660   if(f==10.0)
1661     return r|7;
1662   return 0;
1663 }
1664
1665 /* generate a floating point operation 'v = t1 op t2' instruction. The
1666    two operands are guaranted to have the same floating point type */
1667 void gen_opf(int op)
1668 {
1669   uint32_t x, r, r2, c1, c2;
1670   //fputs("gen_opf\n",stderr);
1671   vswap();
1672   c1 = is_fconst();
1673   vswap();
1674   c2 = is_fconst();
1675   x=0xEE000100;
1676 #if LDOUBLE_SIZE == 8
1677   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1678     x|=0x80;
1679 #else
1680   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1681     x|=0x80;
1682   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1683     x|=0x80000;
1684 #endif
1685   switch(op)
1686   {
1687     case '+':
1688       if(!c2) {
1689         vswap();
1690         c2=c1;
1691       }
1692       vswap();
1693       r=fpr(gv(RC_FLOAT));
1694       vswap();
1695       if(c2) {
1696         if(c2>0xf)
1697           x|=0x200000; // suf
1698         r2=c2&0xf;
1699       } else {
1700         r2=fpr(gv(RC_FLOAT));
1701       }
1702       break;
1703     case '-':
1704       if(c2) {
1705         if(c2<=0xf)
1706           x|=0x200000; // suf
1707         r2=c2&0xf;
1708         vswap();
1709         r=fpr(gv(RC_FLOAT));
1710         vswap();
1711       } else if(c1 && c1<=0xf) {
1712         x|=0x300000; // rsf
1713         r2=c1;
1714         r=fpr(gv(RC_FLOAT));
1715         vswap();
1716       } else {
1717         x|=0x200000; // suf
1718         vswap();
1719         r=fpr(gv(RC_FLOAT));
1720         vswap();
1721         r2=fpr(gv(RC_FLOAT));
1722       }
1723       break;
1724     case '*':
1725       if(!c2 || c2>0xf) {
1726         vswap();
1727         c2=c1;
1728       }
1729       vswap();
1730       r=fpr(gv(RC_FLOAT));
1731       vswap();
1732       if(c2 && c2<=0xf)
1733         r2=c2;
1734       else
1735         r2=fpr(gv(RC_FLOAT));
1736       x|=0x100000; // muf
1737       break;
1738     case '/':
1739       if(c2 && c2<=0xf) {
1740         x|=0x400000; // dvf
1741         r2=c2;
1742         vswap();
1743         r=fpr(gv(RC_FLOAT));
1744         vswap();
1745       } else if(c1 && c1<=0xf) {
1746         x|=0x500000; // rdf
1747         r2=c1;
1748         r=fpr(gv(RC_FLOAT));
1749         vswap();
1750       } else {
1751         x|=0x400000; // dvf
1752         vswap();
1753         r=fpr(gv(RC_FLOAT));
1754         vswap();
1755         r2=fpr(gv(RC_FLOAT));
1756       }
1757       break;
1758     default:
1759       if(op >= TOK_ULT && op <= TOK_GT) {
1760         x|=0xd0f110; // cmfe
1761 /* bug (intention?) in Linux FPU emulator
1762    doesn't set carry if equal */
1763         switch(op) {
1764           case TOK_ULT:
1765           case TOK_UGE:
1766           case TOK_ULE:
1767           case TOK_UGT:
1768             tcc_error("unsigned comparision on floats?");
1769             break;
1770           case TOK_LT:
1771             op=TOK_Nset;
1772             break;
1773           case TOK_LE:
1774             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1775             break;
1776           case TOK_EQ:
1777           case TOK_NE:
1778             x&=~0x400000; // cmfe -> cmf
1779             break;
1780         }
1781         if(c1 && !c2) {
1782           c2=c1;
1783           vswap();
1784           switch(op) {
1785             case TOK_Nset:
1786               op=TOK_GT;
1787               break;
1788             case TOK_GE:
1789               op=TOK_ULE;
1790               break;
1791             case TOK_ULE:
1792               op=TOK_GE;
1793               break;
1794             case TOK_GT:
1795               op=TOK_Nset;
1796               break;
1797           }
1798         }
1799         vswap();
1800         r=fpr(gv(RC_FLOAT));
1801         vswap();
1802         if(c2) {
1803           if(c2>0xf)
1804             x|=0x200000;
1805           r2=c2&0xf;
1806         } else {
1807           r2=fpr(gv(RC_FLOAT));
1808         }
1809         vtop[-1].r = VT_CMP;
1810         vtop[-1].c.i = op;
1811       } else {
1812         tcc_error("unknown fp op %x!",op);
1813         return;
1814       }
1815   }
1816   if(vtop[-1].r == VT_CMP)
1817     c1=15;
1818   else {
1819     c1=vtop->r;
1820     if(r2&0x8)
1821       c1=vtop[-1].r;
1822     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1823     c1=fpr(vtop[-1].r);
1824   }
1825   vtop--;
1826   o(x|(r<<16)|(c1<<12)|r2);
1827 }
1828 #endif
1829
1830 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1831    and 'long long' cases. */
1832 ST_FUNC void gen_cvt_itof1(int t)
1833 {
1834   uint32_t r, r2;
1835   int bt;
1836   bt=vtop->type.t & VT_BTYPE;
1837   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1838 #ifndef TCC_ARM_VFP
1839     uint32_t dsize = 0;
1840 #endif
1841     r=intr(gv(RC_INT));
1842 #ifdef TCC_ARM_VFP
1843     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1844     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1845     r2<<=12;
1846     if(!(vtop->type.t & VT_UNSIGNED))
1847       r2|=0x80;                /* fuitoX -> fsituX */
1848     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1849 #else
1850     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1851     if((t & VT_BTYPE) != VT_FLOAT)
1852       dsize=0x80;    /* flts -> fltd */
1853     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1854     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1855       uint32_t off = 0;
1856       o(0xE3500000|(r<<12));        /* cmp */
1857       r=fpr(get_reg(RC_FLOAT));
1858       if(last_itod_magic) {
1859         off=ind+8-last_itod_magic;
1860         off/=4;
1861         if(off>255)
1862           off=0;
1863       }
1864       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1865       if(!off) {
1866         o(0xEA000000);              /* b */
1867         last_itod_magic=ind;
1868         o(0x4F800000);              /* 4294967296.0f */
1869       }
1870       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1871     }
1872 #endif
1873     return;
1874   } else if(bt == VT_LLONG) {
1875     int func;
1876     CType *func_type = 0;
1877     if((t & VT_BTYPE) == VT_FLOAT) {
1878       func_type = &func_float_type;
1879       if(vtop->type.t & VT_UNSIGNED)
1880         func=TOK___floatundisf;
1881       else
1882         func=TOK___floatdisf;
1883 #if LDOUBLE_SIZE != 8
1884     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1885       func_type = &func_ldouble_type;
1886       if(vtop->type.t & VT_UNSIGNED)
1887         func=TOK___floatundixf;
1888       else
1889         func=TOK___floatdixf;
1890     } else if((t & VT_BTYPE) == VT_DOUBLE) {
1891 #else
1892     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1893 #endif
1894       func_type = &func_double_type;
1895       if(vtop->type.t & VT_UNSIGNED)
1896         func=TOK___floatundidf;
1897       else
1898         func=TOK___floatdidf;
1899     }
1900     if(func_type) {
1901       vpush_global_sym(func_type, func);
1902       vswap();
1903       gfunc_call(1);
1904       vpushi(0);
1905       vtop->r=TREG_F0;
1906       return;
1907     }
1908   }
1909   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1910 }
1911
1912 /* convert fp to int 't' type */
1913 void gen_cvt_ftoi(int t)
1914 {
1915   uint32_t r, r2;
1916   int u, func = 0;
1917   u=t&VT_UNSIGNED;
1918   t&=VT_BTYPE;
1919   r2=vtop->type.t & VT_BTYPE;
1920   if(t==VT_INT) {
1921 #ifdef TCC_ARM_VFP
1922     r=vfpr(gv(RC_FLOAT));
1923     u=u?0:0x10000;
1924     o(0xEEBC0A40|(r<<12)|r|T2CPR(r2)); /* ftoXiY */
1925     r2=intr(vtop->r=get_reg(RC_INT));
1926     o(0xEE100A10|(r<<16)|(r2<<12));
1927     return;
1928 #else
1929     if(u) {
1930       if(r2 == VT_FLOAT)
1931         func=TOK___fixunssfsi;
1932 #if LDOUBLE_SIZE != 8
1933       else if(r2 == VT_LDOUBLE)
1934         func=TOK___fixunsxfsi;
1935       else if(r2 == VT_DOUBLE)
1936 #else
1937       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1938 #endif
1939         func=TOK___fixunsdfsi;
1940     } else {
1941       r=fpr(gv(RC_FLOAT));
1942       r2=intr(vtop->r=get_reg(RC_INT));
1943       o(0xEE100170|(r2<<12)|r);
1944       return;
1945     }
1946 #endif
1947   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
1948     if(r2 == VT_FLOAT)
1949       func=TOK___fixsfdi;
1950 #if LDOUBLE_SIZE != 8
1951     else if(r2 == VT_LDOUBLE)
1952       func=TOK___fixxfdi;
1953     else if(r2 == VT_DOUBLE)
1954 #else
1955     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1956 #endif
1957       func=TOK___fixdfdi;
1958   }
1959   if(func) {
1960     vpush_global_sym(&func_old_type, func);
1961     vswap();
1962     gfunc_call(1);
1963     vpushi(0);
1964     if(t == VT_LLONG)
1965       vtop->r2 = REG_LRET;
1966     vtop->r = REG_IRET;
1967     return;
1968   }
1969   tcc_error("unimplemented gen_cvt_ftoi!");
1970 }
1971
1972 /* convert from one floating point type to another */
1973 void gen_cvt_ftof(int t)
1974 {
1975 #ifdef TCC_ARM_VFP
1976   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
1977     uint32_t r = vfpr(gv(RC_FLOAT));
1978     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
1979   }
1980 #else
1981   /* all we have to do on i386 and FPA ARM is to put the float in a register */
1982   gv(RC_FLOAT);
1983 #endif
1984 }
1985
1986 /* computed goto support */
1987 void ggoto(void)
1988 {
1989   gcall_or_jmp(1);
1990   vtop--;
1991 }
1992
1993 /* end of ARM code generator */
1994 /*************************************************************/
1995 #endif
1996 /*************************************************************/