arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *
   6  *  Based on i386-gen.c by Fabrice Bellard
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 #ifdef TARGET_DEFS_ONLY
  24
  25 #ifdef TCC_ARM_EABI
  26 #ifndef TCC_ARM_VFP // Avoid useless warning
  27 #define TCC_ARM_VFP
  28 #endif
  29 #endif
  30
  31 /* number of available registers */
  32 #ifdef TCC_ARM_VFP
  33 #define NB_REGS            13
  34 #else
  35 #define NB_REGS             9
  36 #endif
  37
  38 /* a register can belong to several classes. The classes must be
  39    sorted from more general to more precise (see gv2() code which does
  40    assumptions on it). */
  41 #define RC_INT     0x0001 /* generic integer register */
  42 #define RC_FLOAT   0x0002 /* generic float register */
  43 #define RC_R0      0x0004
  44 #define RC_R1      0x0008
  45 #define RC_R2      0x0010
  46 #define RC_R3      0x0020
  47 #define RC_R12     0x0040
  48 #define RC_F0      0x0080
  49 #define RC_F1      0x0100
  50 #define RC_F2      0x0200
  51 #define RC_F3      0x0400
  52 #ifdef TCC_ARM_VFP
  53 #define RC_F4      0x0800
  54 #define RC_F5      0x1000
  55 #define RC_F6      0x2000
  56 #define RC_F7      0x4000
  57 #endif
  58 #define RC_IRET    RC_R0  /* function return: integer register */
  59 #define RC_LRET    RC_R1  /* function return: second integer register */
  60 #define RC_FRET    RC_F0  /* function return: float register */
  61
  62 /* pretty names for the registers */
  63 enum {
  64     TREG_R0 = 0,
  65     TREG_R1,
  66     TREG_R2,
  67     TREG_R3,
  68     TREG_R12,
  69     TREG_F0,
  70     TREG_F1,
  71     TREG_F2,
  72     TREG_F3,
  73 #ifdef TCC_ARM_VFP
  74     TREG_F4,
  75     TREG_F5,
  76     TREG_F6,
  77     TREG_F7,
  78 #endif
  79 };
  80
  81 #ifdef TCC_ARM_VFP
  82 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  83 #endif
  84
  85 /* return registers for function */
  86 #define REG_IRET TREG_R0 /* single word int return register */
  87 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  88 #define REG_FRET TREG_F0 /* float return register */
  89
  90 #ifdef TCC_ARM_EABI
  91 #define TOK___divdi3 TOK___aeabi_ldivmod
  92 #define TOK___moddi3 TOK___aeabi_ldivmod
  93 #define TOK___udivdi3 TOK___aeabi_uldivmod
  94 #define TOK___umoddi3 TOK___aeabi_uldivmod
  95 #endif
  96
  97 /* defined if function parameters must be evaluated in reverse order */
  98 #define INVERT_FUNC_PARAMS
  99
 100 /* defined if structures are passed as pointers. Otherwise structures
 101    are directly pushed on stack. */
 102 //#define FUNC_STRUCT_PARAM_AS_PTR
 103
 104 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 105 ST_DATA CType float_type, double_type, func_float_type, func_double_type;
 106 #define func_ldouble_type func_double_type
 107 #else
 108 #define func_float_type func_old_type
 109 #define func_double_type func_old_type
 110 #define func_ldouble_type func_old_type
 111 #endif
 112
 113 /* pointer size, in bytes */
 114 #define PTR_SIZE 4
 115
 116 /* long double size and alignment, in bytes */
 117 #ifdef TCC_ARM_VFP
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifndef LDOUBLE_SIZE
 122 #define LDOUBLE_SIZE  8
 123 #endif
 124
 125 #ifdef TCC_ARM_EABI
 126 #define LDOUBLE_ALIGN 8
 127 #else
 128 #define LDOUBLE_ALIGN 4
 129 #endif
 130
 131 /* maximum alignment (for aligned attribute support) */
 132 #define MAX_ALIGN     8
 133
 134 #define CHAR_IS_UNSIGNED
 135
 136 /******************************************************/
 137 /* ELF defines */
 138
 139 #define EM_TCC_TARGET EM_ARM
 140
 141 /* relocation type for 32 bit data relocation */
 142 #define R_DATA_32   R_ARM_ABS32
 143 #define R_DATA_PTR  R_ARM_ABS32
 144 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 145 #define R_COPY      R_ARM_COPY
 146
 147 #define ELF_START_ADDR 0x00008000
 148 #define ELF_PAGE_SIZE  0x1000
 149
 150 /******************************************************/
 151 #else /* ! TARGET_DEFS_ONLY */
 152 /******************************************************/
 153 #include "tcc.h"
 154
 155 ST_DATA const int reg_classes[NB_REGS] = {
 156     /* r0 */ RC_INT | RC_R0,
 157     /* r1 */ RC_INT | RC_R1,
 158     /* r2 */ RC_INT | RC_R2,
 159     /* r3 */ RC_INT | RC_R3,
 160     /* r12 */ RC_INT | RC_R12,
 161     /* f0 */ RC_FLOAT | RC_F0,
 162     /* f1 */ RC_FLOAT | RC_F1,
 163     /* f2 */ RC_FLOAT | RC_F2,
 164     /* f3 */ RC_FLOAT | RC_F3,
 165 #ifdef TCC_ARM_VFP
 166  /* d4/s8 */ RC_FLOAT | RC_F4,
 167 /* d5/s10 */ RC_FLOAT | RC_F5,
 168 /* d6/s12 */ RC_FLOAT | RC_F6,
 169 /* d7/s14 */ RC_FLOAT | RC_F7,
 170 #endif
 171 };
 172
 173 /* keep in sync with line 104 above */
 174 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 175 ST_DATA CType float_type, double_type, func_float_type, func_double_type;
 176 #endif
 177
 178 static int func_sub_sp_offset, last_itod_magic;
 179 static int leaffunc;
 180
 181 static int two2mask(int a,int b) {
 182   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 183 }
 184
 185 static int regmask(int r) {
 186   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 187 }
 188
 189 /******************************************************/
 190
 191 void o(uint32_t i)
 192 {
 193   /* this is a good place to start adding big-endian support*/
 194   int ind1;
 195
 196   ind1 = ind + 4;
 197   if (!cur_text_section)
 198     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 199          "can't evaluate constant expressions outside of a function.");
 200   if (ind1 > cur_text_section->data_allocated)
 201     section_realloc(cur_text_section, ind1);
 202   cur_text_section->data[ind++] = i&255;
 203   i>>=8;
 204   cur_text_section->data[ind++] = i&255;
 205   i>>=8;
 206   cur_text_section->data[ind++] = i&255;
 207   i>>=8;
 208   cur_text_section->data[ind++] = i;
 209 }
 210
 211 static uint32_t stuff_const(uint32_t op, uint32_t c)
 212 {
 213   int try_neg=0;
 214   uint32_t nc = 0, negop = 0;
 215
 216   switch(op&0x1F00000)
 217   {
 218     case 0x800000: //add
 219     case 0x400000: //sub
 220       try_neg=1;
 221       negop=op^0xC00000;
 222       nc=-c;
 223       break;
 224     case 0x1A00000: //mov
 225     case 0x1E00000: //mvn
 226       try_neg=1;
 227       negop=op^0x400000;
 228       nc=~c;
 229       break;
 230     case 0x200000: //xor
 231       if(c==~0)
 232         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 233       break;
 234     case 0x0: //and
 235       if(c==~0)
 236         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 237     case 0x1C00000: //bic
 238       try_neg=1;
 239       negop=op^0x1C00000;
 240       nc=~c;
 241       break;
 242     case 0x1800000: //orr
 243       if(c==~0)
 244         return (op&0xFFF0FFFF)|0x1E00000;
 245       break;
 246   }
 247   do {
 248     uint32_t m;
 249     int i;
 250     if(c<256) /* catch undefined <<32 */
 251       return op|c;
 252     for(i=2;i<32;i+=2) {
 253       m=(0xff>>i)|(0xff<<(32-i));
 254       if(!(c&~m))
 255         return op|(i<<7)|(c<<i)|(c>>(32-i));
 256     }
 257     op=negop;
 258     c=nc;
 259   } while(try_neg--);
 260   return 0;
 261 }
 262
 263
 264 //only add,sub
 265 void stuff_const_harder(uint32_t op, uint32_t v) {
 266   uint32_t x;
 267   x=stuff_const(op,v);
 268   if(x)
 269     o(x);
 270   else {
 271     uint32_t a[16], nv, no, o2, n2;
 272     int i,j,k;
 273     a[0]=0xff;
 274     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 275     for(i=1;i<16;i++)
 276       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 277     for(i=0;i<12;i++)
 278       for(j=i<4?i+12:15;j>=i+4;j--)
 279         if((v&(a[i]|a[j]))==v) {
 280           o(stuff_const(op,v&a[i]));
 281           o(stuff_const(o2,v&a[j]));
 282           return;
 283         }
 284     no=op^0xC00000;
 285     n2=o2^0xC00000;
 286     nv=-v;
 287     for(i=0;i<12;i++)
 288       for(j=i<4?i+12:15;j>=i+4;j--)
 289         if((nv&(a[i]|a[j]))==nv) {
 290           o(stuff_const(no,nv&a[i]));
 291           o(stuff_const(n2,nv&a[j]));
 292           return;
 293         }
 294     for(i=0;i<8;i++)
 295       for(j=i+4;j<12;j++)
 296         for(k=i<4?i+12:15;k>=j+4;k--)
 297           if((v&(a[i]|a[j]|a[k]))==v) {
 298             o(stuff_const(op,v&a[i]));
 299             o(stuff_const(o2,v&a[j]));
 300             o(stuff_const(o2,v&a[k]));
 301             return;
 302           }
 303     no=op^0xC00000;
 304     nv=-v;
 305     for(i=0;i<8;i++)
 306       for(j=i+4;j<12;j++)
 307         for(k=i<4?i+12:15;k>=j+4;k--)
 308           if((nv&(a[i]|a[j]|a[k]))==nv) {
 309             o(stuff_const(no,nv&a[i]));
 310             o(stuff_const(n2,nv&a[j]));
 311             o(stuff_const(n2,nv&a[k]));
 312             return;
 313           }
 314     o(stuff_const(op,v&a[0]));
 315     o(stuff_const(o2,v&a[4]));
 316     o(stuff_const(o2,v&a[8]));
 317     o(stuff_const(o2,v&a[12]));
 318   }
 319 }
 320
 321 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 322 {
 323   addr-=pos+8;
 324   addr/=4;
 325   if(addr>=0x1000000 || addr<-0x1000000) {
 326     if(fail)
 327       tcc_error("FIXME: function bigger than 32MB");
 328     return 0;
 329   }
 330   return 0x0A000000|(addr&0xffffff);
 331 }
 332
 333 int decbranch(int pos)
 334 {
 335   int x;
 336   x=*(uint32_t *)(cur_text_section->data + pos);
 337   x&=0x00ffffff;
 338   if(x&0x800000)
 339     x-=0x1000000;
 340   return x*4+pos+8;
 341 }
 342
 343 /* output a symbol and patch all calls to it */
 344 void gsym_addr(int t, int a)
 345 {
 346   uint32_t *x;
 347   int lt;
 348   while(t) {
 349     x=(uint32_t *)(cur_text_section->data + t);
 350     t=decbranch(lt=t);
 351     if(a==lt+4)
 352       *x=0xE1A00000; // nop
 353     else {
 354       *x &= 0xff000000;
 355       *x |= encbranch(lt,a,1);
 356     }
 357   }
 358 }
 359
 360 void gsym(int t)
 361 {
 362   gsym_addr(t, ind);
 363 }
 364
 365 #ifdef TCC_ARM_VFP
 366 static uint32_t vfpr(int r)
 367 {
 368   if(r<TREG_F0 || r>TREG_F7)
 369     tcc_error("compiler error! register %i is no vfp register",r);
 370   return r-5;
 371 }
 372 #else
 373 static uint32_t fpr(int r)
 374 {
 375   if(r<TREG_F0 || r>TREG_F3)
 376     tcc_error("compiler error! register %i is no fpa register",r);
 377   return r-5;
 378 }
 379 #endif
 380
 381 static uint32_t intr(int r)
 382 {
 383   if(r==4)
 384     return 12;
 385   if((r<0 || r>4) && r!=14)
 386     tcc_error("compiler error! register %i is no int register",r);
 387   return r;
 388 }
 389
 390 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 391 {
 392   if(*off>maxoff || *off&((1<<shift)-1)) {
 393     uint32_t x, y;
 394     x=0xE280E000;
 395     if(*sgn)
 396       x=0xE240E000;
 397     x|=(*base)<<16;
 398     *base=14; // lr
 399     y=stuff_const(x,*off&~maxoff);
 400     if(y) {
 401       o(y);
 402       *off&=maxoff;
 403       return;
 404     }
 405     y=stuff_const(x,(*off+maxoff)&~maxoff);
 406     if(y) {
 407       o(y);
 408       *sgn=!*sgn;
 409       *off=((*off+maxoff)&~maxoff)-*off;
 410       return;
 411     }
 412     stuff_const_harder(x,*off&~maxoff);
 413     *off&=maxoff;
 414   }
 415 }
 416
 417 static uint32_t mapcc(int cc)
 418 {
 419   switch(cc)
 420   {
 421     case TOK_ULT:
 422       return 0x30000000; /* CC/LO */
 423     case TOK_UGE:
 424       return 0x20000000; /* CS/HS */
 425     case TOK_EQ:
 426       return 0x00000000; /* EQ */
 427     case TOK_NE:
 428       return 0x10000000; /* NE */
 429     case TOK_ULE:
 430       return 0x90000000; /* LS */
 431     case TOK_UGT:
 432       return 0x80000000; /* HI */
 433     case TOK_Nset:
 434       return 0x40000000; /* MI */
 435     case TOK_Nclear:
 436       return 0x50000000; /* PL */
 437     case TOK_LT:
 438       return 0xB0000000; /* LT */
 439     case TOK_GE:
 440       return 0xA0000000; /* GE */
 441     case TOK_LE:
 442       return 0xD0000000; /* LE */
 443     case TOK_GT:
 444       return 0xC0000000; /* GT */
 445   }
 446   tcc_error("unexpected condition code");
 447   return 0xE0000000; /* AL */
 448 }
 449
 450 static int negcc(int cc)
 451 {
 452   switch(cc)
 453   {
 454     case TOK_ULT:
 455       return TOK_UGE;
 456     case TOK_UGE:
 457       return TOK_ULT;
 458     case TOK_EQ:
 459       return TOK_NE;
 460     case TOK_NE:
 461       return TOK_EQ;
 462     case TOK_ULE:
 463       return TOK_UGT;
 464     case TOK_UGT:
 465       return TOK_ULE;
 466     case TOK_Nset:
 467       return TOK_Nclear;
 468     case TOK_Nclear:
 469       return TOK_Nset;
 470     case TOK_LT:
 471       return TOK_GE;
 472     case TOK_GE:
 473       return TOK_LT;
 474     case TOK_LE:
 475       return TOK_GT;
 476     case TOK_GT:
 477       return TOK_LE;
 478   }
 479   tcc_error("unexpected condition code");
 480   return TOK_NE;
 481 }
 482
 483 /* load 'r' from value 'sv' */
 484 void load(int r, SValue *sv)
 485 {
 486   int v, ft, fc, fr, sign;
 487   uint32_t op;
 488   SValue v1;
 489
 490   fr = sv->r;
 491   ft = sv->type.t;
 492   fc = sv->c.ul;
 493
 494   if(fc>=0)
 495     sign=0;
 496   else {
 497     sign=1;
 498     fc=-fc;
 499   }
 500
 501   v = fr & VT_VALMASK;
 502   if (fr & VT_LVAL) {
 503     uint32_t base = 0xB; // fp
 504     if(v == VT_LLOCAL) {
 505       v1.type.t = VT_PTR;
 506       v1.r = VT_LOCAL | VT_LVAL;
 507       v1.c.ul = sv->c.ul;
 508       load(base=14 /* lr */, &v1);
 509       fc=sign=0;
 510       v=VT_LOCAL;
 511     } else if(v == VT_CONST) {
 512       v1.type.t = VT_PTR;
 513       v1.r = fr&~VT_LVAL;
 514       v1.c.ul = sv->c.ul;
 515       v1.sym=sv->sym;
 516       load(base=14, &v1);
 517       fc=sign=0;
 518       v=VT_LOCAL;
 519     } else if(v < VT_CONST) {
 520       base=intr(v);
 521       fc=sign=0;
 522       v=VT_LOCAL;
 523     }
 524     if(v == VT_LOCAL) {
 525       if(is_float(ft)) {
 526         calcaddr(&base,&fc,&sign,1020,2);
 527 #ifdef TCC_ARM_VFP
 528         op=0xED100A00; /* flds */
 529         if(!sign)
 530           op|=0x800000;
 531         if ((ft & VT_BTYPE) != VT_FLOAT)
 532           op|=0x100;   /* flds -> fldd */
 533         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 534 #else
 535         op=0xED100100;
 536         if(!sign)
 537           op|=0x800000;
 538 #if LDOUBLE_SIZE == 8
 539         if ((ft & VT_BTYPE) != VT_FLOAT)
 540           op|=0x8000;
 541 #else
 542         if ((ft & VT_BTYPE) == VT_DOUBLE)
 543           op|=0x8000;
 544         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 545           op|=0x400000;
 546 #endif
 547         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 548 #endif
 549       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 550                 || (ft & VT_BTYPE) == VT_SHORT) {
 551         calcaddr(&base,&fc,&sign,255,0);
 552         op=0xE1500090;
 553         if ((ft & VT_BTYPE) == VT_SHORT)
 554           op|=0x20;
 555         if ((ft & VT_UNSIGNED) == 0)
 556           op|=0x40;
 557         if(!sign)
 558           op|=0x800000;
 559         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 560       } else {
 561         calcaddr(&base,&fc,&sign,4095,0);
 562         op=0xE5100000;
 563         if(!sign)
 564           op|=0x800000;
 565         if ((ft & VT_BTYPE) == VT_BYTE)
 566           op|=0x400000;
 567         o(op|(intr(r)<<12)|fc|(base<<16));
 568       }
 569       return;
 570     }
 571   } else {
 572     if (v == VT_CONST) {
 573       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
 574       if (fr & VT_SYM || !op) {
 575         o(0xE59F0000|(intr(r)<<12));
 576         o(0xEA000000);
 577         if(fr & VT_SYM)
 578           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 579         o(sv->c.ul);
 580       } else
 581         o(op);
 582       return;
 583     } else if (v == VT_LOCAL) {
 584       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
 585       if (fr & VT_SYM || !op) {
 586         o(0xE59F0000|(intr(r)<<12));
 587         o(0xEA000000);
 588         if(fr & VT_SYM) // needed ?
 589           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 590         o(sv->c.ul);
 591         o(0xE08B0000|(intr(r)<<12)|intr(r));
 592       } else
 593         o(op);
 594       return;
 595     } else if(v == VT_CMP) {
 596       o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
 597       o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
 598       return;
 599     } else if (v == VT_JMP || v == VT_JMPI) {
 600       int t;
 601       t = v & 1;
 602       o(0xE3A00000|(intr(r)<<12)|t);
 603       o(0xEA000000);
 604       gsym(sv->c.ul);
 605       o(0xE3A00000|(intr(r)<<12)|(t^1));
 606       return;
 607     } else if (v < VT_CONST) {
 608       if(is_float(ft))
 609 #ifdef TCC_ARM_VFP
 610         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 611 #else
 612         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 613 #endif
 614       else
 615         o(0xE1A00000|(intr(r)<<12)|intr(v));
 616       return;
 617     }
 618   }
 619   tcc_error("load unimplemented!");
 620 }
 621
 622 /* store register 'r' in lvalue 'v' */
 623 void store(int r, SValue *sv)
 624 {
 625   SValue v1;
 626   int v, ft, fc, fr, sign;
 627   uint32_t op;
 628
 629   fr = sv->r;
 630   ft = sv->type.t;
 631   fc = sv->c.ul;
 632
 633   if(fc>=0)
 634     sign=0;
 635   else {
 636     sign=1;
 637     fc=-fc;
 638   }
 639
 640   v = fr & VT_VALMASK;
 641   if (fr & VT_LVAL || fr == VT_LOCAL) {
 642     uint32_t base = 0xb;
 643     if(v < VT_CONST) {
 644       base=intr(v);
 645       v=VT_LOCAL;
 646       fc=sign=0;
 647     } else if(v == VT_CONST) {
 648       v1.type.t = ft;
 649       v1.r = fr&~VT_LVAL;
 650       v1.c.ul = sv->c.ul;
 651       v1.sym=sv->sym;
 652       load(base=14, &v1);
 653       fc=sign=0;
 654       v=VT_LOCAL;
 655     }
 656     if(v == VT_LOCAL) {
 657        if(is_float(ft)) {
 658         calcaddr(&base,&fc,&sign,1020,2);
 659 #ifdef TCC_ARM_VFP
 660         op=0xED000A00; /* fsts */
 661         if(!sign)
 662           op|=0x800000;
 663         if ((ft & VT_BTYPE) != VT_FLOAT)
 664           op|=0x100;   /* fsts -> fstd */
 665         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 666 #else
 667         op=0xED000100;
 668         if(!sign)
 669           op|=0x800000;
 670 #if LDOUBLE_SIZE == 8
 671         if ((ft & VT_BTYPE) != VT_FLOAT)
 672           op|=0x8000;
 673 #else
 674         if ((ft & VT_BTYPE) == VT_DOUBLE)
 675           op|=0x8000;
 676         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 677           op|=0x400000;
 678 #endif
 679         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 680 #endif
 681         return;
 682       } else if((ft & VT_BTYPE) == VT_SHORT) {
 683         calcaddr(&base,&fc,&sign,255,0);
 684         op=0xE14000B0;
 685         if(!sign)
 686           op|=0x800000;
 687         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 688       } else {
 689         calcaddr(&base,&fc,&sign,4095,0);
 690         op=0xE5000000;
 691         if(!sign)
 692           op|=0x800000;
 693         if ((ft & VT_BTYPE) == VT_BYTE)
 694           op|=0x400000;
 695         o(op|(intr(r)<<12)|fc|(base<<16));
 696       }
 697       return;
 698     }
 699   }
 700   tcc_error("store unimplemented");
 701 }
 702
 703 static void gadd_sp(int val)
 704 {
 705   stuff_const_harder(0xE28DD000,val);
 706 }
 707
 708 /* 'is_jmp' is '1' if it is a jump */
 709 static void gcall_or_jmp(int is_jmp)
 710 {
 711   int r;
 712   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 713     uint32_t x;
 714     /* constant case */
 715     x=encbranch(ind,ind+vtop->c.ul,0);
 716     if(x) {
 717       if (vtop->r & VT_SYM) {
 718         /* relocation case */
 719         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 720       } else
 721         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 722       o(x|(is_jmp?0xE0000000:0xE1000000));
 723     } else {
 724       if(!is_jmp)
 725         o(0xE28FE004); // add lr,pc,#4
 726       o(0xE51FF004);   // ldr pc,[pc,#-4]
 727       if (vtop->r & VT_SYM)
 728         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 729       o(vtop->c.ul);
 730     }
 731   } else {
 732     /* otherwise, indirect call */
 733     r = gv(RC_INT);
 734     if(!is_jmp)
 735       o(0xE1A0E00F);       // mov lr,pc
 736     o(0xE1A0F000|intr(r)); // mov pc,r
 737   }
 738 }
 739
 740 #ifdef TCC_ARM_HARDFLOAT
 741 static int is_float_hgen_aggr(CType *type)
 742 {
 743   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 744     struct Sym *ref;
 745     int btype, nb_fields = 0;
 746
 747     ref = type->ref;
 748     btype = ref->type.t & VT_BTYPE;
 749     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 750       for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 751       return !ref && nb_fields <= 4;
 752     }
 753   }
 754   return 0;
 755 }
 756
 757 struct avail_regs {
 758   /* worst case: f(float, double, 3 float struct, double, 3 float struct, double) */
 759   signed char avail[3];
 760   int first_hole;
 761   int last_hole;
 762   int first_free_reg;
 763 };
 764
 765 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 766
 767 /* Assign a register for a CPRC param with correct size and alignment
 768  * size and align are in bytes, as returned by type_size */
 769 int assign_fpreg(struct avail_regs *avregs, int align, int size)
 770 {
 771   int first_reg = 0;
 772
 773   if (avregs->first_free_reg == -1)
 774     return -1;
 775   if (align >> 3) { // alignment needed (base type: double)
 776     first_reg = avregs->first_free_reg;
 777     if (first_reg & 1)
 778       avregs->avail[avregs->last_hole++] = first_reg++;
 779   } else {
 780     if (size == 4 && avregs->first_hole != avregs->last_hole)
 781       return avregs->avail[avregs->first_hole++];
 782     else
 783       first_reg = avregs->first_free_reg;
 784   }
 785   if (first_reg + size / 4 <= 16) {
 786     avregs->first_free_reg = first_reg + size / 4;
 787     return first_reg;
 788   }
 789   avregs->first_free_reg = -1;
 790   return -1;
 791 }
 792 #endif
 793
 794 /* Generate function call. The function address is pushed first, then
 795    all the parameters in call order. This functions pops all the
 796    parameters and the function address. */
 797 void gfunc_call(int nb_args)
 798 {
 799   int size, align, r, args_size, i, ncrn, ncprn, argno, vfp_argno;
 800   signed char plan[4][2]={{-1,-1},{-1,-1},{-1,-1},{-1,-1}};
 801   SValue *before_stack = NULL; /* SValue before first on stack argument */
 802   SValue *before_vfpreg_hfa = NULL; /* SValue before first in VFP reg hfa argument */
 803 #ifdef TCC_ARM_HARDFLOAT
 804   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 805   signed char vfp_plan[16];
 806   int plan2[4+16];
 807   int variadic;
 808 #else
 809   int plan2[4]={0,0,0,0};
 810 #endif
 811   int vfp_todo=0;
 812   int todo=0, keep;
 813
 814 #ifdef TCC_ARM_HARDFLOAT
 815   memset(vfp_plan, -1, sizeof(vfp_plan));
 816   memset(plan2, 0, sizeof(plan2));
 817   variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
 818 #endif
 819   r = vtop->r & VT_VALMASK;
 820   if (r == VT_CMP || (r & ~1) == VT_JMP)
 821     gv(RC_INT);
 822 #ifdef TCC_ARM_EABI
 823   if((vtop[-nb_args].type.ref->type.t & VT_BTYPE) == VT_STRUCT
 824      && type_size(&vtop[-nb_args].type.ref->type, &align) <= 4) {
 825     SValue tmp;
 826     tmp=vtop[-nb_args];
 827     vtop[-nb_args]=vtop[-nb_args+1];
 828     vtop[-nb_args+1]=tmp;
 829     --nb_args;
 830   }
 831
 832   vpushi(0);
 833   vtop->type.t = VT_LLONG;
 834   args_size = 0;
 835 #endif
 836   ncrn = ncprn = argno = vfp_argno = 0;
 837   /* Assign argument to registers and stack with alignment.
 838      If, considering alignment constraints, enough registers of the correct type
 839      (core or VFP) are free for the current argument, assign them to it, else
 840      allocate on stack with correct alignment. Whenever a structure is allocated
 841      in registers or on stack, it is always put on the stack at this stage. The
 842      stack is divided in 3 zones. The zone are, from low addresses to high
 843      addresses: structures to be loaded in core registers, structures to be
 844      loaded in VFP registers, argument allocated to stack. SValue's representing
 845      structures in the first zone are moved just after the SValue pointed by
 846      before_vfpreg_hfa. SValue's representing structures in the second zone are
 847      moved just after the SValue pointer by before_stack. */
 848   for(i = nb_args + 1 ; i-- ;) {
 849     int j, assigned_vfpreg = 0;
 850     size = type_size(&vtop[-i].type, &align);
 851     switch(vtop[-i].type.t & VT_BTYPE) {
 852       case VT_STRUCT:
 853       case VT_FLOAT:
 854       case VT_DOUBLE:
 855       case VT_LDOUBLE:
 856 #ifdef TCC_ARM_HARDFLOAT
 857       if (!variadic) {
 858         int hfa = 0; /* Homogeneous float aggregate */
 859
 860         if (is_float(vtop[-i].type.t)
 861             || (hfa = is_float_hgen_aggr(&vtop[-i].type))) {
 862           int end_reg;
 863
 864           assigned_vfpreg = assign_fpreg(&avregs, align, size);
 865           end_reg = assigned_vfpreg + (size - 1) / 4;
 866           if (assigned_vfpreg >= 0) {
 867             vfp_plan[vfp_argno++]=TREG_F0 + assigned_vfpreg/2;
 868             if (hfa) {
 869               /* before_stack can only have been set because all core registers
 870                  are assigned, so no need to care about before_vfpreg_hfa if
 871                  before_stack is set */
 872               if (before_stack) {
 873                 vrote(&vtop[-i], &vtop[-i] - before_stack);
 874                 before_stack++;
 875               } else if (!before_vfpreg_hfa)
 876                 before_vfpreg_hfa = &vtop[-i-1];
 877               for (j = assigned_vfpreg; j <= end_reg; j++)
 878                 vfp_todo|=(1<<j);
 879             }
 880             continue;
 881           } else {
 882             if (!hfa)
 883               vfp_argno++;
 884             /* No need to update before_stack as no more hfa can be allocated in
 885                VFP regs */
 886             if (!before_vfpreg_hfa)
 887               before_vfpreg_hfa = &vtop[-i-1];
 888             break;
 889           }
 890         }
 891       }
 892 #endif
 893       ncrn = (ncrn + (align-1)/4) & -(align/4);
 894       size = (size + 3) & -4;
 895       if (ncrn + size/4 <= 4 || (ncrn < 4 && assigned_vfpreg != -1)) {
 896         /* Either there is HFA in VFP registers, or there is arguments on stack,
 897            it cannot be both. Hence either before_stack already points after
 898            the slot where the vtop[-i] SValue is moved, or before_stack will not
 899            be used */
 900         if (before_vfpreg_hfa) {
 901           vrote(&vtop[-i], &vtop[-i] - before_vfpreg_hfa);
 902           before_vfpreg_hfa++;
 903         }
 904         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
 905           todo|=(1<<j);
 906         ncrn+=size/4;
 907         if (ncrn > 4) {
 908           args_size = (ncrn - 4) * 4;
 909           if (!before_stack)
 910             before_stack = &vtop[-i-1];
 911         }
 912       }
 913       else {
 914         ncrn = 4;
 915         /* No need to set before_vfpreg_hfa if not set since there will no
 916            longer be any structure assigned to core registers */
 917         if (!before_stack)
 918           before_stack = &vtop[-i-1];
 919         break;
 920       }
 921       continue;
 922       default:
 923       if (!i) {
 924         break;
 925       }
 926       if (ncrn < 4) {
 927         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
 928
 929         if (is_long) {
 930           ncrn = (ncrn + 1) & -2;
 931           if (ncrn == 4) {
 932             argno++;
 933             break;
 934           }
 935         }
 936         plan[argno++][0]=ncrn++;
 937         if (is_long) {
 938           plan[argno-1][1]=ncrn++;
 939         }
 940         continue;
 941       }
 942       argno++;
 943     }
 944 #ifdef TCC_ARM_EABI
 945     if(args_size & (align-1)) {
 946       vpushi(0);
 947       vtop->type.t = VT_VOID; /* padding */
 948       vrott(i+2);
 949       args_size += 4;
 950       nb_args++;
 951       argno++;
 952     }
 953 #endif
 954     args_size += (size + 3) & -4;
 955   }
 956   vtop--;
 957   args_size = keep = 0;
 958   for(i = 0;i < nb_args; i++) {
 959     vrotb(keep+1);
 960     if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 961       size = type_size(&vtop->type, &align);
 962       /* align to stack align size */
 963       size = (size + 3) & -4;
 964       /* allocate the necessary size on stack */
 965       gadd_sp(-size);
 966       /* generate structure store */
 967       r = get_reg(RC_INT);
 968       o(0xE1A0000D|(intr(r)<<12));
 969       vset(&vtop->type, r | VT_LVAL, 0);
 970       vswap();
 971       vstore();
 972       vtop--;
 973       args_size += size;
 974     } else if (is_float(vtop->type.t)) {
 975 #ifdef TCC_ARM_HARDFLOAT
 976       if (!variadic && --vfp_argno<16 && vfp_plan[vfp_argno]!=-1) {
 977         plan2[keep++]=vfp_plan[vfp_argno];
 978         continue;
 979       }
 980 #endif
 981 #ifdef TCC_ARM_VFP
 982       r=vfpr(gv(RC_FLOAT))<<12;
 983       size=4;
 984       if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
 985       {
 986         size=8;
 987         r|=0x101; /* fstms -> fstmd */
 988       }
 989       o(0xED2D0A01+r);
 990 #else
 991       r=fpr(gv(RC_FLOAT))<<12;
 992       if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
 993         size = 4;
 994       else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
 995         size = 8;
 996       else
 997         size = LDOUBLE_SIZE;
 998
 999       if (size == 12)
1000         r|=0x400000;
1001       else if(size == 8)
1002         r|=0x8000;
1003
1004       o(0xED2D0100|r|(size>>2));
1005 #endif
1006       vtop--;
1007       args_size += size;
1008     } else {
1009       int s;
1010       /* simple type (currently always same size) */
1011       /* XXX: implicit cast ? */
1012       size=4;
1013       if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1014         lexpand_nr();
1015         s=-1;
1016         if(--argno<4 && plan[argno][1]!=-1)
1017           s=plan[argno][1];
1018         argno++;
1019         size = 8;
1020         if(s==-1) {
1021           r = gv(RC_INT);
1022           o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
1023           vtop--;
1024         } else {
1025           size=0;
1026           plan2[keep]=s;
1027           keep++;
1028           vswap();
1029         }
1030       }
1031       s=-1;
1032       if(--argno<4 && plan[argno][0]!=-1)
1033         s=plan[argno][0];
1034 #ifdef TCC_ARM_EABI
1035       if(vtop->type.t == VT_VOID) {
1036         if(s == -1)
1037           o(0xE24DD004); /* sub sp,sp,#4 */
1038         vtop--;
1039       } else
1040 #endif
1041       if(s == -1) {
1042         r = gv(RC_INT);
1043         o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
1044         vtop--;
1045       } else {
1046         size=0;
1047         plan2[keep]=s;
1048         keep++;
1049       }
1050       args_size += size;
1051     }
1052   }
1053   for(i = 0; i < keep; i++) {
1054     vrotb(keep);
1055     gv(regmask(plan2[i]));
1056 #ifdef TCC_ARM_HARDFLOAT
1057     /* arg is in s(2d+1): plan2[i]<plan2[i+1] => alignment occured (ex f,d,f) */
1058     if (i < keep - 1 && is_float(vtop->type.t) && (plan2[i] <= plan2[i + 1])) {
1059       o(0xEEF00A40|(vfpr(plan2[i])<<12)|vfpr(plan2[i]));
1060     }
1061 #endif
1062   }
1063 save_regs(keep); /* save used temporary registers */
1064   keep++;
1065   if(ncrn) {
1066     int nb_regs=0;
1067     if (ncrn>4)
1068       ncrn=4;
1069     todo&=((1<<ncrn)-1);
1070     if(todo) {
1071       int i;
1072       o(0xE8BD0000|todo);
1073       for(i=0;i<4;i++)
1074         if(todo&(1<<i)) {
1075           vpushi(0);
1076           vtop->r=i;
1077           keep++;
1078           nb_regs++;
1079         }
1080     }
1081     args_size-=nb_regs*4;
1082   }
1083   if(vfp_todo) {
1084     int nb_fregs=0;
1085
1086     for(i=0;i<16;i++)
1087       if(vfp_todo&(1<<i)) {
1088         o(0xED9D0A00|(i&1)<<22|(i>>1)<<12|nb_fregs);
1089         vpushi(0);
1090         /* There might be 2 floats in a double VFP reg but that doesn't seem
1091            to matter */
1092         if (!(i%2))
1093           vtop->r=TREG_F0+i/2;
1094         keep++;
1095         nb_fregs++;
1096       }
1097     if (nb_fregs) {
1098       gadd_sp(nb_fregs*4);
1099       args_size-=nb_fregs*4;
1100     }
1101   }
1102   vrotb(keep);
1103   gcall_or_jmp(0);
1104   if (args_size)
1105       gadd_sp(args_size);
1106 #ifdef TCC_ARM_EABI
1107   if((vtop->type.ref->type.t & VT_BTYPE) == VT_STRUCT
1108      && type_size(&vtop->type.ref->type, &align) <= 4)
1109   {
1110     store(REG_IRET,vtop-keep);
1111     ++keep;
1112   }
1113 #ifdef TCC_ARM_VFP
1114 #ifdef TCC_ARM_HARDFLOAT
1115   else if(variadic && is_float(vtop->type.ref->type.t)) {
1116 #else
1117   else if(is_float(vtop->type.ref->type.t)) {
1118 #endif
1119     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1120       o(0xEE000A10); /* fmsr s0,r0 */
1121     } else {
1122       o(0xEE000B10); /* fmdlr d0,r0 */
1123       o(0xEE201B10); /* fmdhr d0,r1 */
1124     }
1125   }
1126 #endif
1127 #endif
1128   vtop-=keep;
1129   leaffunc = 0;
1130 }
1131
1132 /* generate function prolog of type 't' */
1133 void gfunc_prolog(CType *func_type)
1134 {
1135   Sym *sym,*sym2;
1136   int n,nf,size,align, variadic, struct_ret = 0;
1137 #ifdef TCC_ARM_HARDFLOAT
1138   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1139 #endif
1140
1141   sym = func_type->ref;
1142   func_vt = sym->type;
1143
1144   n = nf = 0;
1145   variadic = (func_type->ref->c == FUNC_ELLIPSIS);
1146   if((func_vt.t & VT_BTYPE) == VT_STRUCT
1147      && type_size(&func_vt,&align) > 4)
1148   {
1149     n++;
1150     struct_ret = 1;
1151   }
1152   for(sym2=sym->next;sym2 && (n<4 || nf<16);sym2=sym2->next) {
1153     size = type_size(&sym2->type, &align);
1154 #ifdef TCC_ARM_HARDFLOAT
1155     if (!variadic && (is_float(sym2->type.t)
1156         || is_float_hgen_aggr(&sym2->type))) {
1157       int tmpnf = assign_fpreg(&avregs, align, size) + 1;
1158       nf = (tmpnf > nf) ? tmpnf : nf;
1159     } else
1160 #endif
1161     if (n < 4)
1162       n += (size + 3) / 4;
1163   }
1164   if (struct_ret)
1165     func_vc = nf * 4;
1166   o(0xE1A0C00D); /* mov ip,sp */
1167   if(variadic)
1168     n=4;
1169   if(n) {
1170     if(n>4)
1171       n=4;
1172 #ifdef TCC_ARM_EABI
1173     n=(n+1)&-2;
1174 #endif
1175     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1176   }
1177   if (nf) {
1178     if (nf>16)
1179       nf=16;
1180     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1181     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1182   }
1183   o(0xE92D5800); /* save fp, ip, lr */
1184   o(0xE28DB00C); /* add fp, sp, #12 */
1185   func_sub_sp_offset = ind;
1186   o(0xE1A00000); /* nop, leave space for stack adjustment in epilogue */
1187   {
1188     int addr, pn = struct_ret, sn = 0; /* pn=core, sn=stack */
1189
1190 #ifdef TCC_ARM_HARDFLOAT
1191     avregs = AVAIL_REGS_INITIALIZER;
1192 #endif
1193     while ((sym = sym->next)) {
1194       CType *type;
1195       type = &sym->type;
1196       size = type_size(type, &align);
1197       size = (size + 3) >> 2;
1198 #ifdef TCC_ARM_HARDFLOAT
1199       if (!variadic && (is_float(sym->type.t)
1200           || is_float_hgen_aggr(&sym->type))) {
1201         int fpn = assign_fpreg(&avregs, align, size << 2);
1202         if (fpn >= 0) {
1203           addr = fpn * 4;
1204         } else
1205           goto from_stack;
1206       } else
1207 #endif
1208       if (pn < 4) {
1209 #ifdef TCC_ARM_EABI
1210         pn = (pn + (align-1)/4) & -(align/4);
1211 #endif
1212         addr = (nf + pn) * 4;
1213         pn += size;
1214         if (!sn && pn > 4)
1215           sn = (pn - 4);
1216       } else {
1217 #ifdef TCC_ARM_HARDFLOAT
1218 from_stack:
1219 #endif
1220 #ifdef TCC_ARM_EABI
1221         sn = (sn + (align-1)/4) & -(align/4);
1222 #endif
1223         addr = (n + nf + sn) * 4;
1224         sn += size;
1225       }
1226       sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t), addr);
1227     }
1228   }
1229   last_itod_magic=0;
1230   leaffunc = 1;
1231   loc = -12;
1232 }
1233
1234 /* generate function epilog */
1235 void gfunc_epilog(void)
1236 {
1237   uint32_t x;
1238   int diff;
1239 #ifdef TCC_ARM_EABI
1240   /* Useless but harmless copy of the float result into main register(s) in case
1241      of variadic function in the hardfloat variant */
1242   if(is_float(func_vt.t)) {
1243     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1244       o(0xEE100A10); /* fmrs r0, s0 */
1245     else {
1246       o(0xEE100B10); /* fmrdl r0, d0 */
1247       o(0xEE301B10); /* fmrdh r1, d0 */
1248     }
1249   }
1250 #endif
1251   o(0xE91BA800); /* restore fp, sp, pc */
1252   diff = (-loc + 3) & -4;
1253 #ifdef TCC_ARM_EABI
1254   if(!leaffunc)
1255     diff = (diff + 7) & -8;
1256 #endif
1257   if(diff > 12) {
1258     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1259     if(x)
1260       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1261     else {
1262       int addr;
1263       addr=ind;
1264       o(0xE59FC004); /* ldr ip,[pc+4] */
1265       o(0xE04BD00C); /* sub sp,fp,ip  */
1266       o(0xE1A0F00E); /* mov pc,lr */
1267       o(diff);
1268       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1269     }
1270   }
1271 }
1272
1273 /* generate a jump to a label */
1274 int gjmp(int t)
1275 {
1276   int r;
1277   r=ind;
1278   o(0xE0000000|encbranch(r,t,1));
1279   return r;
1280 }
1281
1282 /* generate a jump to a fixed address */
1283 void gjmp_addr(int a)
1284 {
1285   gjmp(a);
1286 }
1287
1288 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1289 int gtst(int inv, int t)
1290 {
1291   int v, r;
1292   uint32_t op;
1293   v = vtop->r & VT_VALMASK;
1294   r=ind;
1295   if (v == VT_CMP) {
1296     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1297     op|=encbranch(r,t,1);
1298     o(op);
1299     t=r;
1300   } else if (v == VT_JMP || v == VT_JMPI) {
1301     if ((v & 1) == inv) {
1302       if(!vtop->c.i)
1303         vtop->c.i=t;
1304       else {
1305         uint32_t *x;
1306         int p,lp;
1307         if(t) {
1308           p = vtop->c.i;
1309           do {
1310             p = decbranch(lp=p);
1311           } while(p);
1312           x = (uint32_t *)(cur_text_section->data + lp);
1313           *x &= 0xff000000;
1314           *x |= encbranch(lp,t,1);
1315         }
1316         t = vtop->c.i;
1317       }
1318     } else {
1319       t = gjmp(t);
1320       gsym(vtop->c.i);
1321     }
1322   } else {
1323     if (is_float(vtop->type.t)) {
1324       r=gv(RC_FLOAT);
1325 #ifdef TCC_ARM_VFP
1326       o(0xEEB50A40|(vfpr(r)<<12)|T2CPR(vtop->type.t)); /* fcmpzX */
1327       o(0xEEF1FA10); /* fmstat */
1328 #else
1329       o(0xEE90F118|(fpr(r)<<16));
1330 #endif
1331       vtop->r = VT_CMP;
1332       vtop->c.i = TOK_NE;
1333       return gtst(inv, t);
1334     } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1335       /* constant jmp optimization */
1336       if ((vtop->c.i != 0) != inv)
1337         t = gjmp(t);
1338     } else {
1339       v = gv(RC_INT);
1340       o(0xE3300000|(intr(v)<<16));
1341       vtop->r = VT_CMP;
1342       vtop->c.i = TOK_NE;
1343       return gtst(inv, t);
1344     }
1345   }
1346   vtop--;
1347   return t;
1348 }
1349
1350 /* generate an integer binary operation */
1351 void gen_opi(int op)
1352 {
1353   int c, func = 0;
1354   uint32_t opc = 0, r, fr;
1355   unsigned short retreg = REG_IRET;
1356
1357   c=0;
1358   switch(op) {
1359     case '+':
1360       opc = 0x8;
1361       c=1;
1362       break;
1363     case TOK_ADDC1: /* add with carry generation */
1364       opc = 0x9;
1365       c=1;
1366       break;
1367     case '-':
1368       opc = 0x4;
1369       c=1;
1370       break;
1371     case TOK_SUBC1: /* sub with carry generation */
1372       opc = 0x5;
1373       c=1;
1374       break;
1375     case TOK_ADDC2: /* add with carry use */
1376       opc = 0xA;
1377       c=1;
1378       break;
1379     case TOK_SUBC2: /* sub with carry use */
1380       opc = 0xC;
1381       c=1;
1382       break;
1383     case '&':
1384       opc = 0x0;
1385       c=1;
1386       break;
1387     case '^':
1388       opc = 0x2;
1389       c=1;
1390       break;
1391     case '|':
1392       opc = 0x18;
1393       c=1;
1394       break;
1395     case '*':
1396       gv2(RC_INT, RC_INT);
1397       r = vtop[-1].r;
1398       fr = vtop[0].r;
1399       vtop--;
1400       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1401       return;
1402     case TOK_SHL:
1403       opc = 0;
1404       c=2;
1405       break;
1406     case TOK_SHR:
1407       opc = 1;
1408       c=2;
1409       break;
1410     case TOK_SAR:
1411       opc = 2;
1412       c=2;
1413       break;
1414     case '/':
1415     case TOK_PDIV:
1416       func=TOK___divsi3;
1417       c=3;
1418       break;
1419     case TOK_UDIV:
1420       func=TOK___udivsi3;
1421       c=3;
1422       break;
1423     case '%':
1424 #ifdef TCC_ARM_EABI
1425       func=TOK___aeabi_idivmod;
1426       retreg=REG_LRET;
1427 #else
1428       func=TOK___modsi3;
1429 #endif
1430       c=3;
1431       break;
1432     case TOK_UMOD:
1433 #ifdef TCC_ARM_EABI
1434       func=TOK___aeabi_uidivmod;
1435       retreg=REG_LRET;
1436 #else
1437       func=TOK___umodsi3;
1438 #endif
1439       c=3;
1440       break;
1441     case TOK_UMULL:
1442       gv2(RC_INT, RC_INT);
1443       r=intr(vtop[-1].r2=get_reg(RC_INT));
1444       c=vtop[-1].r;
1445       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1446       vtop--;
1447       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1448       return;
1449     default:
1450       opc = 0x15;
1451       c=1;
1452       break;
1453   }
1454   switch(c) {
1455     case 1:
1456       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1457         if(opc == 4 || opc == 5 || opc == 0xc) {
1458           vswap();
1459           opc|=2; // sub -> rsb
1460         }
1461       }
1462       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1463           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1464         gv(RC_INT);
1465       vswap();
1466       c=intr(gv(RC_INT));
1467       vswap();
1468       opc=0xE0000000|(opc<<20)|(c<<16);
1469       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1470         uint32_t x;
1471         x=stuff_const(opc|0x2000000,vtop->c.i);
1472         if(x) {
1473           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1474           o(x|(r<<12));
1475           goto done;
1476         }
1477       }
1478       fr=intr(gv(RC_INT));
1479       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1480       o(opc|(r<<12)|fr);
1481 done:
1482       vtop--;
1483       if (op >= TOK_ULT && op <= TOK_GT) {
1484         vtop->r = VT_CMP;
1485         vtop->c.i = op;
1486       }
1487       break;
1488     case 2:
1489       opc=0xE1A00000|(opc<<5);
1490       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1491           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1492         gv(RC_INT);
1493       vswap();
1494       r=intr(gv(RC_INT));
1495       vswap();
1496       opc|=r;
1497       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1498         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1499         c = vtop->c.i & 0x1f;
1500         o(opc|(c<<7)|(fr<<12));
1501       } else {
1502         fr=intr(gv(RC_INT));
1503         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1504         o(opc|(c<<12)|(fr<<8)|0x10);
1505       }
1506       vtop--;
1507       break;
1508     case 3:
1509       vpush_global_sym(&func_old_type, func);
1510       vrott(3);
1511       gfunc_call(2);
1512       vpushi(0);
1513       vtop->r = retreg;
1514       break;
1515     default:
1516       tcc_error("gen_opi %i unimplemented!",op);
1517   }
1518 }
1519
1520 #ifdef TCC_ARM_VFP
1521 static int is_zero(int i)
1522 {
1523   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1524     return 0;
1525   if (vtop[i].type.t == VT_FLOAT)
1526     return (vtop[i].c.f == 0.f);
1527   else if (vtop[i].type.t == VT_DOUBLE)
1528     return (vtop[i].c.d == 0.0);
1529   return (vtop[i].c.ld == 0.l);
1530 }
1531
1532 /* generate a floating point operation 'v = t1 op t2' instruction. The
1533  *    two operands are guaranted to have the same floating point type */
1534 void gen_opf(int op)
1535 {
1536   uint32_t x;
1537   int fneg=0,r;
1538   x=0xEE000A00|T2CPR(vtop->type.t);
1539   switch(op) {
1540     case '+':
1541       if(is_zero(-1))
1542         vswap();
1543       if(is_zero(0)) {
1544         vtop--;
1545         return;
1546       }
1547       x|=0x300000;
1548       break;
1549     case '-':
1550       x|=0x300040;
1551       if(is_zero(0)) {
1552         vtop--;
1553         return;
1554       }
1555       if(is_zero(-1)) {
1556         x|=0x810000; /* fsubX -> fnegX */
1557         vswap();
1558         vtop--;
1559         fneg=1;
1560       }
1561       break;
1562     case '*':
1563       x|=0x200000;
1564       break;
1565     case '/':
1566       x|=0x800000;
1567       break;
1568     default:
1569       if(op < TOK_ULT || op > TOK_GT) {
1570         tcc_error("unknown fp op %x!",op);
1571         return;
1572       }
1573       if(is_zero(-1)) {
1574         vswap();
1575         switch(op) {
1576           case TOK_LT: op=TOK_GT; break;
1577           case TOK_GE: op=TOK_ULE; break;
1578           case TOK_LE: op=TOK_GE; break;
1579           case TOK_GT: op=TOK_ULT; break;
1580         }
1581       }
1582       x|=0xB40040; /* fcmpX */
1583       if(op!=TOK_EQ && op!=TOK_NE)
1584         x|=0x80; /* fcmpX -> fcmpeX */
1585       if(is_zero(0)) {
1586         vtop--;
1587         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1588       } else {
1589         x|=vfpr(gv(RC_FLOAT));
1590         vswap();
1591         o(x|(vfpr(gv(RC_FLOAT))<<12));
1592         vtop--;
1593       }
1594       o(0xEEF1FA10); /* fmstat */
1595
1596       switch(op) {
1597         case TOK_LE: op=TOK_ULE; break;
1598         case TOK_LT: op=TOK_ULT; break;
1599         case TOK_UGE: op=TOK_GE; break;
1600         case TOK_UGT: op=TOK_GT; break;
1601       }
1602
1603       vtop->r = VT_CMP;
1604       vtop->c.i = op;
1605       return;
1606   }
1607   r=gv(RC_FLOAT);
1608   x|=vfpr(r);
1609   r=regmask(r);
1610   if(!fneg) {
1611     int r2;
1612     vswap();
1613     r2=gv(RC_FLOAT);
1614     x|=vfpr(r2)<<16;
1615     r|=regmask(r2);
1616   }
1617   vtop->r=get_reg_ex(RC_FLOAT,r);
1618   if(!fneg)
1619     vtop--;
1620   o(x|(vfpr(vtop->r)<<12));
1621 }
1622
1623 #else
1624 static uint32_t is_fconst()
1625 {
1626   long double f;
1627   uint32_t r;
1628   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1629     return 0;
1630   if (vtop->type.t == VT_FLOAT)
1631     f = vtop->c.f;
1632   else if (vtop->type.t == VT_DOUBLE)
1633     f = vtop->c.d;
1634   else
1635     f = vtop->c.ld;
1636   if(!ieee_finite(f))
1637     return 0;
1638   r=0x8;
1639   if(f<0.0) {
1640     r=0x18;
1641     f=-f;
1642   }
1643   if(f==0.0)
1644     return r;
1645   if(f==1.0)
1646     return r|1;
1647   if(f==2.0)
1648     return r|2;
1649   if(f==3.0)
1650     return r|3;
1651   if(f==4.0)
1652     return r|4;
1653   if(f==5.0)
1654     return r|5;
1655   if(f==0.5)
1656     return r|6;
1657   if(f==10.0)
1658     return r|7;
1659   return 0;
1660 }
1661
1662 /* generate a floating point operation 'v = t1 op t2' instruction. The
1663    two operands are guaranted to have the same floating point type */
1664 void gen_opf(int op)
1665 {
1666   uint32_t x, r, r2, c1, c2;
1667   //fputs("gen_opf\n",stderr);
1668   vswap();
1669   c1 = is_fconst();
1670   vswap();
1671   c2 = is_fconst();
1672   x=0xEE000100;
1673 #if LDOUBLE_SIZE == 8
1674   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1675     x|=0x80;
1676 #else
1677   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1678     x|=0x80;
1679   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1680     x|=0x80000;
1681 #endif
1682   switch(op)
1683   {
1684     case '+':
1685       if(!c2) {
1686         vswap();
1687         c2=c1;
1688       }
1689       vswap();
1690       r=fpr(gv(RC_FLOAT));
1691       vswap();
1692       if(c2) {
1693         if(c2>0xf)
1694           x|=0x200000; // suf
1695         r2=c2&0xf;
1696       } else {
1697         r2=fpr(gv(RC_FLOAT));
1698       }
1699       break;
1700     case '-':
1701       if(c2) {
1702         if(c2<=0xf)
1703           x|=0x200000; // suf
1704         r2=c2&0xf;
1705         vswap();
1706         r=fpr(gv(RC_FLOAT));
1707         vswap();
1708       } else if(c1 && c1<=0xf) {
1709         x|=0x300000; // rsf
1710         r2=c1;
1711         r=fpr(gv(RC_FLOAT));
1712         vswap();
1713       } else {
1714         x|=0x200000; // suf
1715         vswap();
1716         r=fpr(gv(RC_FLOAT));
1717         vswap();
1718         r2=fpr(gv(RC_FLOAT));
1719       }
1720       break;
1721     case '*':
1722       if(!c2 || c2>0xf) {
1723         vswap();
1724         c2=c1;
1725       }
1726       vswap();
1727       r=fpr(gv(RC_FLOAT));
1728       vswap();
1729       if(c2 && c2<=0xf)
1730         r2=c2;
1731       else
1732         r2=fpr(gv(RC_FLOAT));
1733       x|=0x100000; // muf
1734       break;
1735     case '/':
1736       if(c2 && c2<=0xf) {
1737         x|=0x400000; // dvf
1738         r2=c2;
1739         vswap();
1740         r=fpr(gv(RC_FLOAT));
1741         vswap();
1742       } else if(c1 && c1<=0xf) {
1743         x|=0x500000; // rdf
1744         r2=c1;
1745         r=fpr(gv(RC_FLOAT));
1746         vswap();
1747       } else {
1748         x|=0x400000; // dvf
1749         vswap();
1750         r=fpr(gv(RC_FLOAT));
1751         vswap();
1752         r2=fpr(gv(RC_FLOAT));
1753       }
1754       break;
1755     default:
1756       if(op >= TOK_ULT && op <= TOK_GT) {
1757         x|=0xd0f110; // cmfe
1758 /* bug (intention?) in Linux FPU emulator
1759    doesn't set carry if equal */
1760         switch(op) {
1761           case TOK_ULT:
1762           case TOK_UGE:
1763           case TOK_ULE:
1764           case TOK_UGT:
1765             tcc_error("unsigned comparision on floats?");
1766             break;
1767           case TOK_LT:
1768             op=TOK_Nset;
1769             break;
1770           case TOK_LE:
1771             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1772             break;
1773           case TOK_EQ:
1774           case TOK_NE:
1775             x&=~0x400000; // cmfe -> cmf
1776             break;
1777         }
1778         if(c1 && !c2) {
1779           c2=c1;
1780           vswap();
1781           switch(op) {
1782             case TOK_Nset:
1783               op=TOK_GT;
1784               break;
1785             case TOK_GE:
1786               op=TOK_ULE;
1787               break;
1788             case TOK_ULE:
1789               op=TOK_GE;
1790               break;
1791             case TOK_GT:
1792               op=TOK_Nset;
1793               break;
1794           }
1795         }
1796         vswap();
1797         r=fpr(gv(RC_FLOAT));
1798         vswap();
1799         if(c2) {
1800           if(c2>0xf)
1801             x|=0x200000;
1802           r2=c2&0xf;
1803         } else {
1804           r2=fpr(gv(RC_FLOAT));
1805         }
1806         vtop[-1].r = VT_CMP;
1807         vtop[-1].c.i = op;
1808       } else {
1809         tcc_error("unknown fp op %x!",op);
1810         return;
1811       }
1812   }
1813   if(vtop[-1].r == VT_CMP)
1814     c1=15;
1815   else {
1816     c1=vtop->r;
1817     if(r2&0x8)
1818       c1=vtop[-1].r;
1819     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1820     c1=fpr(vtop[-1].r);
1821   }
1822   vtop--;
1823   o(x|(r<<16)|(c1<<12)|r2);
1824 }
1825 #endif
1826
1827 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1828    and 'long long' cases. */
1829 ST_FUNC void gen_cvt_itof1(int t)
1830 {
1831   uint32_t r, r2;
1832   int bt;
1833   bt=vtop->type.t & VT_BTYPE;
1834   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1835 #ifndef TCC_ARM_VFP
1836     uint32_t dsize = 0;
1837 #endif
1838     r=intr(gv(RC_INT));
1839 #ifdef TCC_ARM_VFP
1840     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1841     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1842     r2<<=12;
1843     if(!(vtop->type.t & VT_UNSIGNED))
1844       r2|=0x80;                /* fuitoX -> fsituX */
1845     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1846 #else
1847     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1848     if((t & VT_BTYPE) != VT_FLOAT)
1849       dsize=0x80;    /* flts -> fltd */
1850     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1851     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1852       uint32_t off = 0;
1853       o(0xE3500000|(r<<12));        /* cmp */
1854       r=fpr(get_reg(RC_FLOAT));
1855       if(last_itod_magic) {
1856         off=ind+8-last_itod_magic;
1857         off/=4;
1858         if(off>255)
1859           off=0;
1860       }
1861       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1862       if(!off) {
1863         o(0xEA000000);              /* b */
1864         last_itod_magic=ind;
1865         o(0x4F800000);              /* 4294967296.0f */
1866       }
1867       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1868     }
1869 #endif
1870     return;
1871   } else if(bt == VT_LLONG) {
1872     int func;
1873     CType *func_type = 0;
1874     if((t & VT_BTYPE) == VT_FLOAT) {
1875       func_type = &func_float_type;
1876       if(vtop->type.t & VT_UNSIGNED)
1877         func=TOK___floatundisf;
1878       else
1879         func=TOK___floatdisf;
1880 #if LDOUBLE_SIZE != 8
1881     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1882       func_type = &func_ldouble_type;
1883       if(vtop->type.t & VT_UNSIGNED)
1884         func=TOK___floatundixf;
1885       else
1886         func=TOK___floatdixf;
1887     } else if((t & VT_BTYPE) == VT_DOUBLE) {
1888 #else
1889     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1890 #endif
1891       func_type = &func_double_type;
1892       if(vtop->type.t & VT_UNSIGNED)
1893         func=TOK___floatundidf;
1894       else
1895         func=TOK___floatdidf;
1896     }
1897     if(func_type) {
1898       vpush_global_sym(func_type, func);
1899       vswap();
1900       gfunc_call(1);
1901       vpushi(0);
1902       vtop->r=TREG_F0;
1903       return;
1904     }
1905   }
1906   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1907 }
1908
1909 /* convert fp to int 't' type */
1910 void gen_cvt_ftoi(int t)
1911 {
1912   uint32_t r, r2;
1913   int u, func = 0;
1914   u=t&VT_UNSIGNED;
1915   t&=VT_BTYPE;
1916   r2=vtop->type.t & VT_BTYPE;
1917   if(t==VT_INT) {
1918 #ifdef TCC_ARM_VFP
1919     r=vfpr(gv(RC_FLOAT));
1920     u=u?0:0x10000;
1921     o(0xEEBC0A40|(r<<12)|r|T2CPR(r2)); /* ftoXiY */
1922     r2=intr(vtop->r=get_reg(RC_INT));
1923     o(0xEE100A10|(r<<16)|(r2<<12));
1924     return;
1925 #else
1926     if(u) {
1927       if(r2 == VT_FLOAT)
1928         func=TOK___fixunssfsi;
1929 #if LDOUBLE_SIZE != 8
1930       else if(r2 == VT_LDOUBLE)
1931         func=TOK___fixunsxfsi;
1932       else if(r2 == VT_DOUBLE)
1933 #else
1934       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1935 #endif
1936         func=TOK___fixunsdfsi;
1937     } else {
1938       r=fpr(gv(RC_FLOAT));
1939       r2=intr(vtop->r=get_reg(RC_INT));
1940       o(0xEE100170|(r2<<12)|r);
1941       return;
1942     }
1943 #endif
1944   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
1945     if(r2 == VT_FLOAT)
1946       func=TOK___fixsfdi;
1947 #if LDOUBLE_SIZE != 8
1948     else if(r2 == VT_LDOUBLE)
1949       func=TOK___fixxfdi;
1950     else if(r2 == VT_DOUBLE)
1951 #else
1952     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1953 #endif
1954       func=TOK___fixdfdi;
1955   }
1956   if(func) {
1957     vpush_global_sym(&func_old_type, func);
1958     vswap();
1959     gfunc_call(1);
1960     vpushi(0);
1961     if(t == VT_LLONG)
1962       vtop->r2 = REG_LRET;
1963     vtop->r = REG_IRET;
1964     return;
1965   }
1966   tcc_error("unimplemented gen_cvt_ftoi!");
1967 }
1968
1969 /* convert from one floating point type to another */
1970 void gen_cvt_ftof(int t)
1971 {
1972 #ifdef TCC_ARM_VFP
1973   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
1974     uint32_t r = vfpr(gv(RC_FLOAT));
1975     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
1976   }
1977 #else
1978   /* all we have to do on i386 and FPA ARM is to put the float in a register */
1979   gv(RC_FLOAT);
1980 #endif
1981 }
1982
1983 /* computed goto support */
1984 void ggoto(void)
1985 {
1986   gcall_or_jmp(1);
1987   vtop--;
1988 }
1989
1990 /* end of ARM code generator */
1991 /*************************************************************/
1992 #endif
1993 /*************************************************************/