arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *
   6  *  Based on i386-gen.c by Fabrice Bellard
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 #ifdef TARGET_DEFS_ONLY
  24
  25 #ifdef TCC_ARM_EABI
  26 #ifndef TCC_ARM_VFP // Avoid useless warning
  27 #define TCC_ARM_VFP
  28 #endif
  29 #endif
  30
  31 /* number of available registers */
  32 #ifdef TCC_ARM_VFP
  33 #define NB_REGS            13
  34 #else
  35 #define NB_REGS             9
  36 #endif
  37
  38 /* a register can belong to several classes. The classes must be
  39    sorted from more general to more precise (see gv2() code which does
  40    assumptions on it). */
  41 #define RC_INT     0x0001 /* generic integer register */
  42 #define RC_FLOAT   0x0002 /* generic float register */
  43 #define RC_R0      0x0004
  44 #define RC_R1      0x0008
  45 #define RC_R2      0x0010
  46 #define RC_R3      0x0020
  47 #define RC_R12     0x0040
  48 #define RC_F0      0x0080
  49 #define RC_F1      0x0100
  50 #define RC_F2      0x0200
  51 #define RC_F3      0x0400
  52 #ifdef TCC_ARM_VFP
  53 #define RC_F4      0x0800
  54 #define RC_F5      0x1000
  55 #define RC_F6      0x2000
  56 #define RC_F7      0x4000
  57 #endif
  58 #define RC_IRET    RC_R0  /* function return: integer register */
  59 #define RC_LRET    RC_R1  /* function return: second integer register */
  60 #define RC_FRET    RC_F0  /* function return: float register */
  61
  62 /* pretty names for the registers */
  63 enum {
  64     TREG_R0 = 0,
  65     TREG_R1,
  66     TREG_R2,
  67     TREG_R3,
  68     TREG_R12,
  69     TREG_F0,
  70     TREG_F1,
  71     TREG_F2,
  72     TREG_F3,
  73 #ifdef TCC_ARM_VFP
  74     TREG_F4,
  75     TREG_F5,
  76     TREG_F6,
  77     TREG_F7,
  78 #endif
  79 };
  80
  81 #ifdef TCC_ARM_VFP
  82 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  83 #endif
  84
  85 /* return registers for function */
  86 #define REG_IRET TREG_R0 /* single word int return register */
  87 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  88 #define REG_FRET TREG_F0 /* float return register */
  89
  90 #ifdef TCC_ARM_EABI
  91 #define TOK___divdi3 TOK___aeabi_ldivmod
  92 #define TOK___moddi3 TOK___aeabi_ldivmod
  93 #define TOK___udivdi3 TOK___aeabi_uldivmod
  94 #define TOK___umoddi3 TOK___aeabi_uldivmod
  95 #endif
  96
  97 /* defined if function parameters must be evaluated in reverse order */
  98 #define INVERT_FUNC_PARAMS
  99
 100 /* defined if structures are passed as pointers. Otherwise structures
 101    are directly pushed on stack. */
 102 //#define FUNC_STRUCT_PARAM_AS_PTR
 103
 104 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 105 ST_DATA CType float_type, double_type, func_float_type, func_double_type;
 106 #define func_ldouble_type func_double_type
 107 #else
 108 #define func_float_type func_old_type
 109 #define func_double_type func_old_type
 110 #define func_ldouble_type func_old_type
 111 #endif
 112
 113 /* pointer size, in bytes */
 114 #define PTR_SIZE 4
 115
 116 /* long double size and alignment, in bytes */
 117 #ifdef TCC_ARM_VFP
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifndef LDOUBLE_SIZE
 122 #define LDOUBLE_SIZE  8
 123 #endif
 124
 125 #ifdef TCC_ARM_EABI
 126 #define LDOUBLE_ALIGN 8
 127 #else
 128 #define LDOUBLE_ALIGN 4
 129 #endif
 130
 131 /* maximum alignment (for aligned attribute support) */
 132 #define MAX_ALIGN     8
 133
 134 #define CHAR_IS_UNSIGNED
 135
 136 /******************************************************/
 137 /* ELF defines */
 138
 139 #define EM_TCC_TARGET EM_ARM
 140
 141 /* relocation type for 32 bit data relocation */
 142 #define R_DATA_32   R_ARM_ABS32
 143 #define R_DATA_PTR  R_ARM_ABS32
 144 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 145 #define R_COPY      R_ARM_COPY
 146
 147 #define ELF_START_ADDR 0x00008000
 148 #define ELF_PAGE_SIZE  0x1000
 149
 150 /******************************************************/
 151 #else /* ! TARGET_DEFS_ONLY */
 152 /******************************************************/
 153 #include "tcc.h"
 154
 155 ST_DATA const int reg_classes[NB_REGS] = {
 156     /* r0 */ RC_INT | RC_R0,
 157     /* r1 */ RC_INT | RC_R1,
 158     /* r2 */ RC_INT | RC_R2,
 159     /* r3 */ RC_INT | RC_R3,
 160     /* r12 */ RC_INT | RC_R12,
 161     /* f0 */ RC_FLOAT | RC_F0,
 162     /* f1 */ RC_FLOAT | RC_F1,
 163     /* f2 */ RC_FLOAT | RC_F2,
 164     /* f3 */ RC_FLOAT | RC_F3,
 165 #ifdef TCC_ARM_VFP
 166  /* d4/s8 */ RC_FLOAT | RC_F4,
 167 /* d5/s10 */ RC_FLOAT | RC_F5,
 168 /* d6/s12 */ RC_FLOAT | RC_F6,
 169 /* d7/s14 */ RC_FLOAT | RC_F7,
 170 #endif
 171 };
 172
 173 /* keep in sync with line 104 above */
 174 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 175 ST_DATA CType float_type, double_type, func_float_type, func_double_type;
 176 #endif
 177
 178 static int func_sub_sp_offset, last_itod_magic;
 179 static int leaffunc;
 180
 181 static int two2mask(int a,int b) {
 182   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 183 }
 184
 185 static int regmask(int r) {
 186   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 187 }
 188
 189 /******************************************************/
 190
 191 void o(uint32_t i)
 192 {
 193   /* this is a good place to start adding big-endian support*/
 194   int ind1;
 195
 196   ind1 = ind + 4;
 197   if (!cur_text_section)
 198     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 199          "can't evaluate constant expressions outside of a function.");
 200   if (ind1 > cur_text_section->data_allocated)
 201     section_realloc(cur_text_section, ind1);
 202   cur_text_section->data[ind++] = i&255;
 203   i>>=8;
 204   cur_text_section->data[ind++] = i&255;
 205   i>>=8;
 206   cur_text_section->data[ind++] = i&255;
 207   i>>=8;
 208   cur_text_section->data[ind++] = i;
 209 }
 210
 211 static uint32_t stuff_const(uint32_t op, uint32_t c)
 212 {
 213   int try_neg=0;
 214   uint32_t nc = 0, negop = 0;
 215
 216   switch(op&0x1F00000)
 217   {
 218     case 0x800000: //add
 219     case 0x400000: //sub
 220       try_neg=1;
 221       negop=op^0xC00000;
 222       nc=-c;
 223       break;
 224     case 0x1A00000: //mov
 225     case 0x1E00000: //mvn
 226       try_neg=1;
 227       negop=op^0x400000;
 228       nc=~c;
 229       break;
 230     case 0x200000: //xor
 231       if(c==~0)
 232         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 233       break;
 234     case 0x0: //and
 235       if(c==~0)
 236         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 237     case 0x1C00000: //bic
 238       try_neg=1;
 239       negop=op^0x1C00000;
 240       nc=~c;
 241       break;
 242     case 0x1800000: //orr
 243       if(c==~0)
 244         return (op&0xFFF0FFFF)|0x1E00000;
 245       break;
 246   }
 247   do {
 248     uint32_t m;
 249     int i;
 250     if(c<256) /* catch undefined <<32 */
 251       return op|c;
 252     for(i=2;i<32;i+=2) {
 253       m=(0xff>>i)|(0xff<<(32-i));
 254       if(!(c&~m))
 255         return op|(i<<7)|(c<<i)|(c>>(32-i));
 256     }
 257     op=negop;
 258     c=nc;
 259   } while(try_neg--);
 260   return 0;
 261 }
 262
 263
 264 //only add,sub
 265 void stuff_const_harder(uint32_t op, uint32_t v) {
 266   uint32_t x;
 267   x=stuff_const(op,v);
 268   if(x)
 269     o(x);
 270   else {
 271     uint32_t a[16], nv, no, o2, n2;
 272     int i,j,k;
 273     a[0]=0xff;
 274     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 275     for(i=1;i<16;i++)
 276       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 277     for(i=0;i<12;i++)
 278       for(j=i<4?i+12:15;j>=i+4;j--)
 279         if((v&(a[i]|a[j]))==v) {
 280           o(stuff_const(op,v&a[i]));
 281           o(stuff_const(o2,v&a[j]));
 282           return;
 283         }
 284     no=op^0xC00000;
 285     n2=o2^0xC00000;
 286     nv=-v;
 287     for(i=0;i<12;i++)
 288       for(j=i<4?i+12:15;j>=i+4;j--)
 289         if((nv&(a[i]|a[j]))==nv) {
 290           o(stuff_const(no,nv&a[i]));
 291           o(stuff_const(n2,nv&a[j]));
 292           return;
 293         }
 294     for(i=0;i<8;i++)
 295       for(j=i+4;j<12;j++)
 296         for(k=i<4?i+12:15;k>=j+4;k--)
 297           if((v&(a[i]|a[j]|a[k]))==v) {
 298             o(stuff_const(op,v&a[i]));
 299             o(stuff_const(o2,v&a[j]));
 300             o(stuff_const(o2,v&a[k]));
 301             return;
 302           }
 303     no=op^0xC00000;
 304     nv=-v;
 305     for(i=0;i<8;i++)
 306       for(j=i+4;j<12;j++)
 307         for(k=i<4?i+12:15;k>=j+4;k--)
 308           if((nv&(a[i]|a[j]|a[k]))==nv) {
 309             o(stuff_const(no,nv&a[i]));
 310             o(stuff_const(n2,nv&a[j]));
 311             o(stuff_const(n2,nv&a[k]));
 312             return;
 313           }
 314     o(stuff_const(op,v&a[0]));
 315     o(stuff_const(o2,v&a[4]));
 316     o(stuff_const(o2,v&a[8]));
 317     o(stuff_const(o2,v&a[12]));
 318   }
 319 }
 320
 321 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 322 {
 323   addr-=pos+8;
 324   addr/=4;
 325   if(addr>=0x1000000 || addr<-0x1000000) {
 326     if(fail)
 327       tcc_error("FIXME: function bigger than 32MB");
 328     return 0;
 329   }
 330   return 0x0A000000|(addr&0xffffff);
 331 }
 332
 333 int decbranch(int pos)
 334 {
 335   int x;
 336   x=*(uint32_t *)(cur_text_section->data + pos);
 337   x&=0x00ffffff;
 338   if(x&0x800000)
 339     x-=0x1000000;
 340   return x*4+pos+8;
 341 }
 342
 343 /* output a symbol and patch all calls to it */
 344 void gsym_addr(int t, int a)
 345 {
 346   uint32_t *x;
 347   int lt;
 348   while(t) {
 349     x=(uint32_t *)(cur_text_section->data + t);
 350     t=decbranch(lt=t);
 351     if(a==lt+4)
 352       *x=0xE1A00000; // nop
 353     else {
 354       *x &= 0xff000000;
 355       *x |= encbranch(lt,a,1);
 356     }
 357   }
 358 }
 359
 360 void gsym(int t)
 361 {
 362   gsym_addr(t, ind);
 363 }
 364
 365 #ifdef TCC_ARM_VFP
 366 static uint32_t vfpr(int r)
 367 {
 368   if(r<TREG_F0 || r>TREG_F7)
 369     tcc_error("compiler error! register %i is no vfp register",r);
 370   return r-5;
 371 }
 372 #else
 373 static uint32_t fpr(int r)
 374 {
 375   if(r<TREG_F0 || r>TREG_F3)
 376     tcc_error("compiler error! register %i is no fpa register",r);
 377   return r-5;
 378 }
 379 #endif
 380
 381 static uint32_t intr(int r)
 382 {
 383   if(r==4)
 384     return 12;
 385   if((r<0 || r>4) && r!=14)
 386     tcc_error("compiler error! register %i is no int register",r);
 387   return r;
 388 }
 389
 390 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 391 {
 392   if(*off>maxoff || *off&((1<<shift)-1)) {
 393     uint32_t x, y;
 394     x=0xE280E000;
 395     if(*sgn)
 396       x=0xE240E000;
 397     x|=(*base)<<16;
 398     *base=14; // lr
 399     y=stuff_const(x,*off&~maxoff);
 400     if(y) {
 401       o(y);
 402       *off&=maxoff;
 403       return;
 404     }
 405     y=stuff_const(x,(*off+maxoff)&~maxoff);
 406     if(y) {
 407       o(y);
 408       *sgn=!*sgn;
 409       *off=((*off+maxoff)&~maxoff)-*off;
 410       return;
 411     }
 412     stuff_const_harder(x,*off&~maxoff);
 413     *off&=maxoff;
 414   }
 415 }
 416
 417 static uint32_t mapcc(int cc)
 418 {
 419   switch(cc)
 420   {
 421     case TOK_ULT:
 422       return 0x30000000; /* CC/LO */
 423     case TOK_UGE:
 424       return 0x20000000; /* CS/HS */
 425     case TOK_EQ:
 426       return 0x00000000; /* EQ */
 427     case TOK_NE:
 428       return 0x10000000; /* NE */
 429     case TOK_ULE:
 430       return 0x90000000; /* LS */
 431     case TOK_UGT:
 432       return 0x80000000; /* HI */
 433     case TOK_Nset:
 434       return 0x40000000; /* MI */
 435     case TOK_Nclear:
 436       return 0x50000000; /* PL */
 437     case TOK_LT:
 438       return 0xB0000000; /* LT */
 439     case TOK_GE:
 440       return 0xA0000000; /* GE */
 441     case TOK_LE:
 442       return 0xD0000000; /* LE */
 443     case TOK_GT:
 444       return 0xC0000000; /* GT */
 445   }
 446   tcc_error("unexpected condition code");
 447   return 0xE0000000; /* AL */
 448 }
 449
 450 static int negcc(int cc)
 451 {
 452   switch(cc)
 453   {
 454     case TOK_ULT:
 455       return TOK_UGE;
 456     case TOK_UGE:
 457       return TOK_ULT;
 458     case TOK_EQ:
 459       return TOK_NE;
 460     case TOK_NE:
 461       return TOK_EQ;
 462     case TOK_ULE:
 463       return TOK_UGT;
 464     case TOK_UGT:
 465       return TOK_ULE;
 466     case TOK_Nset:
 467       return TOK_Nclear;
 468     case TOK_Nclear:
 469       return TOK_Nset;
 470     case TOK_LT:
 471       return TOK_GE;
 472     case TOK_GE:
 473       return TOK_LT;
 474     case TOK_LE:
 475       return TOK_GT;
 476     case TOK_GT:
 477       return TOK_LE;
 478   }
 479   tcc_error("unexpected condition code");
 480   return TOK_NE;
 481 }
 482
 483 /* load 'r' from value 'sv' */
 484 void load(int r, SValue *sv)
 485 {
 486   int v, ft, fc, fr, sign;
 487   uint32_t op;
 488   SValue v1;
 489
 490   fr = sv->r;
 491   ft = sv->type.t;
 492   fc = sv->c.ul;
 493
 494   if(fc>=0)
 495     sign=0;
 496   else {
 497     sign=1;
 498     fc=-fc;
 499   }
 500
 501   v = fr & VT_VALMASK;
 502   if (fr & VT_LVAL) {
 503     uint32_t base = 0xB; // fp
 504     if(v == VT_LLOCAL) {
 505       v1.type.t = VT_PTR;
 506       v1.r = VT_LOCAL | VT_LVAL;
 507       v1.c.ul = sv->c.ul;
 508       load(base=14 /* lr */, &v1);
 509       fc=sign=0;
 510       v=VT_LOCAL;
 511     } else if(v == VT_CONST) {
 512       v1.type.t = VT_PTR;
 513       v1.r = fr&~VT_LVAL;
 514       v1.c.ul = sv->c.ul;
 515       v1.sym=sv->sym;
 516       load(base=14, &v1);
 517       fc=sign=0;
 518       v=VT_LOCAL;
 519     } else if(v < VT_CONST) {
 520       base=intr(v);
 521       fc=sign=0;
 522       v=VT_LOCAL;
 523     }
 524     if(v == VT_LOCAL) {
 525       if(is_float(ft)) {
 526         calcaddr(&base,&fc,&sign,1020,2);
 527 #ifdef TCC_ARM_VFP
 528         op=0xED100A00; /* flds */
 529         if(!sign)
 530           op|=0x800000;
 531         if ((ft & VT_BTYPE) != VT_FLOAT)
 532           op|=0x100;   /* flds -> fldd */
 533         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 534 #else
 535         op=0xED100100;
 536         if(!sign)
 537           op|=0x800000;
 538 #if LDOUBLE_SIZE == 8
 539         if ((ft & VT_BTYPE) != VT_FLOAT)
 540           op|=0x8000;
 541 #else
 542         if ((ft & VT_BTYPE) == VT_DOUBLE)
 543           op|=0x8000;
 544         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 545           op|=0x400000;
 546 #endif
 547         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 548 #endif
 549       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 550                 || (ft & VT_BTYPE) == VT_SHORT) {
 551         calcaddr(&base,&fc,&sign,255,0);
 552         op=0xE1500090;
 553         if ((ft & VT_BTYPE) == VT_SHORT)
 554           op|=0x20;
 555         if ((ft & VT_UNSIGNED) == 0)
 556           op|=0x40;
 557         if(!sign)
 558           op|=0x800000;
 559         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 560       } else {
 561         calcaddr(&base,&fc,&sign,4095,0);
 562         op=0xE5100000;
 563         if(!sign)
 564           op|=0x800000;
 565         if ((ft & VT_BTYPE) == VT_BYTE)
 566           op|=0x400000;
 567         o(op|(intr(r)<<12)|fc|(base<<16));
 568       }
 569       return;
 570     }
 571   } else {
 572     if (v == VT_CONST) {
 573       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
 574       if (fr & VT_SYM || !op) {
 575         o(0xE59F0000|(intr(r)<<12));
 576         o(0xEA000000);
 577         if(fr & VT_SYM)
 578           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 579         o(sv->c.ul);
 580       } else
 581         o(op);
 582       return;
 583     } else if (v == VT_LOCAL) {
 584       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
 585       if (fr & VT_SYM || !op) {
 586         o(0xE59F0000|(intr(r)<<12));
 587         o(0xEA000000);
 588         if(fr & VT_SYM) // needed ?
 589           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 590         o(sv->c.ul);
 591         o(0xE08B0000|(intr(r)<<12)|intr(r));
 592       } else
 593         o(op);
 594       return;
 595     } else if(v == VT_CMP) {
 596       o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
 597       o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
 598       return;
 599     } else if (v == VT_JMP || v == VT_JMPI) {
 600       int t;
 601       t = v & 1;
 602       o(0xE3A00000|(intr(r)<<12)|t);
 603       o(0xEA000000);
 604       gsym(sv->c.ul);
 605       o(0xE3A00000|(intr(r)<<12)|(t^1));
 606       return;
 607     } else if (v < VT_CONST) {
 608       if(is_float(ft))
 609 #ifdef TCC_ARM_VFP
 610         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 611 #else
 612         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 613 #endif
 614       else
 615         o(0xE1A00000|(intr(r)<<12)|intr(v));
 616       return;
 617     }
 618   }
 619   tcc_error("load unimplemented!");
 620 }
 621
 622 /* store register 'r' in lvalue 'v' */
 623 void store(int r, SValue *sv)
 624 {
 625   SValue v1;
 626   int v, ft, fc, fr, sign;
 627   uint32_t op;
 628
 629   fr = sv->r;
 630   ft = sv->type.t;
 631   fc = sv->c.ul;
 632
 633   if(fc>=0)
 634     sign=0;
 635   else {
 636     sign=1;
 637     fc=-fc;
 638   }
 639
 640   v = fr & VT_VALMASK;
 641   if (fr & VT_LVAL || fr == VT_LOCAL) {
 642     uint32_t base = 0xb;
 643     if(v < VT_CONST) {
 644       base=intr(v);
 645       v=VT_LOCAL;
 646       fc=sign=0;
 647     } else if(v == VT_CONST) {
 648       v1.type.t = ft;
 649       v1.r = fr&~VT_LVAL;
 650       v1.c.ul = sv->c.ul;
 651       v1.sym=sv->sym;
 652       load(base=14, &v1);
 653       fc=sign=0;
 654       v=VT_LOCAL;
 655     }
 656     if(v == VT_LOCAL) {
 657        if(is_float(ft)) {
 658         calcaddr(&base,&fc,&sign,1020,2);
 659 #ifdef TCC_ARM_VFP
 660         op=0xED000A00; /* fsts */
 661         if(!sign)
 662           op|=0x800000;
 663         if ((ft & VT_BTYPE) != VT_FLOAT)
 664           op|=0x100;   /* fsts -> fstd */
 665         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 666 #else
 667         op=0xED000100;
 668         if(!sign)
 669           op|=0x800000;
 670 #if LDOUBLE_SIZE == 8
 671         if ((ft & VT_BTYPE) != VT_FLOAT)
 672           op|=0x8000;
 673 #else
 674         if ((ft & VT_BTYPE) == VT_DOUBLE)
 675           op|=0x8000;
 676         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 677           op|=0x400000;
 678 #endif
 679         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 680 #endif
 681         return;
 682       } else if((ft & VT_BTYPE) == VT_SHORT) {
 683         calcaddr(&base,&fc,&sign,255,0);
 684         op=0xE14000B0;
 685         if(!sign)
 686           op|=0x800000;
 687         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 688       } else {
 689         calcaddr(&base,&fc,&sign,4095,0);
 690         op=0xE5000000;
 691         if(!sign)
 692           op|=0x800000;
 693         if ((ft & VT_BTYPE) == VT_BYTE)
 694           op|=0x400000;
 695         o(op|(intr(r)<<12)|fc|(base<<16));
 696       }
 697       return;
 698     }
 699   }
 700   tcc_error("store unimplemented");
 701 }
 702
 703 static void gadd_sp(int val)
 704 {
 705   stuff_const_harder(0xE28DD000,val);
 706 }
 707
 708 /* 'is_jmp' is '1' if it is a jump */
 709 static void gcall_or_jmp(int is_jmp)
 710 {
 711   int r;
 712   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 713     uint32_t x;
 714     /* constant case */
 715     x=encbranch(ind,ind+vtop->c.ul,0);
 716     if(x) {
 717       if (vtop->r & VT_SYM) {
 718         /* relocation case */
 719         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 720       } else
 721         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 722       o(x|(is_jmp?0xE0000000:0xE1000000));
 723     } else {
 724       if(!is_jmp)
 725         o(0xE28FE004); // add lr,pc,#4
 726       o(0xE51FF004);   // ldr pc,[pc,#-4]
 727       if (vtop->r & VT_SYM)
 728         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 729       o(vtop->c.ul);
 730     }
 731   } else {
 732     /* otherwise, indirect call */
 733     r = gv(RC_INT);
 734     if(!is_jmp)
 735       o(0xE1A0E00F);       // mov lr,pc
 736     o(0xE1A0F000|intr(r)); // mov pc,r
 737   }
 738 }
 739
 740 /* Generate function call. The function address is pushed first, then
 741    all the parameters in call order. This functions pops all the
 742    parameters and the function address. */
 743 void gfunc_call(int nb_args)
 744 {
 745   int size, align, r, args_size, i;
 746   Sym *func_sym;
 747   signed char plan[4][2]={{-1,-1},{-1,-1},{-1,-1},{-1,-1}};
 748   int todo=0xf, keep, plan2[4]={0,0,0,0};
 749
 750   r = vtop->r & VT_VALMASK;
 751   if (r == VT_CMP || (r & ~1) == VT_JMP)
 752     gv(RC_INT);
 753 #ifdef TCC_ARM_EABI
 754   if((vtop[-nb_args].type.ref->type.t & VT_BTYPE) == VT_STRUCT
 755      && type_size(&vtop[-nb_args].type.ref->type, &align) <= 4) {
 756     SValue tmp;
 757     tmp=vtop[-nb_args];
 758     vtop[-nb_args]=vtop[-nb_args+1];
 759     vtop[-nb_args+1]=tmp;
 760     --nb_args;
 761   }
 762
 763   vpushi(0);
 764   vtop->type.t = VT_LLONG;
 765   args_size = 0;
 766   for(i = nb_args + 1 ; i-- ;) {
 767     size = type_size(&vtop[-i].type, &align);
 768     if(args_size & (align-1)) {
 769       vpushi(0);
 770       vtop->type.t = VT_VOID; /* padding */
 771       vrott(i+2);
 772       args_size += 4;
 773       ++nb_args;
 774     }
 775     args_size += (size + 3) & -4;
 776   }
 777   vtop--;
 778 #endif
 779   args_size = 0;
 780   for(i = nb_args ; i-- && args_size < 16 ;) {
 781     switch(vtop[-i].type.t & VT_BTYPE) {
 782       case VT_STRUCT:
 783       case VT_FLOAT:
 784       case VT_DOUBLE:
 785       case VT_LDOUBLE:
 786       size = type_size(&vtop[-i].type, &align);
 787         size = (size + 3) & -4;
 788       args_size += size;
 789         break;
 790       default:
 791       plan[nb_args-1-i][0]=args_size/4;
 792       args_size += 4;
 793       if ((vtop[-i].type.t & VT_BTYPE) == VT_LLONG && args_size < 16) {
 794         plan[nb_args-1-i][1]=args_size/4;
 795         args_size += 4;
 796       }
 797     }
 798   }
 799   args_size = keep = 0;
 800   for(i = 0;i < nb_args; i++) {
 801     vnrott(keep+1);
 802     if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 803       size = type_size(&vtop->type, &align);
 804       /* align to stack align size */
 805       size = (size + 3) & -4;
 806       /* allocate the necessary size on stack */
 807       gadd_sp(-size);
 808       /* generate structure store */
 809       r = get_reg(RC_INT);
 810       o(0xE1A0000D|(intr(r)<<12));
 811       vset(&vtop->type, r | VT_LVAL, 0);
 812       vswap();
 813       vstore();
 814       vtop--;
 815       args_size += size;
 816     } else if (is_float(vtop->type.t)) {
 817 #ifdef TCC_ARM_VFP
 818       r=vfpr(gv(RC_FLOAT))<<12;
 819       size=4;
 820       if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
 821       {
 822         size=8;
 823         r|=0x101; /* fstms -> fstmd */
 824       }
 825       o(0xED2D0A01+r);
 826 #else
 827       r=fpr(gv(RC_FLOAT))<<12;
 828       if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
 829         size = 4;
 830       else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
 831         size = 8;
 832       else
 833         size = LDOUBLE_SIZE;
 834
 835       if (size == 12)
 836         r|=0x400000;
 837       else if(size == 8)
 838         r|=0x8000;
 839
 840       o(0xED2D0100|r|(size>>2));
 841 #endif
 842       vtop--;
 843       args_size += size;
 844     } else {
 845       int s;
 846       /* simple type (currently always same size) */
 847       /* XXX: implicit cast ? */
 848       size=4;
 849       if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
 850         lexpand_nr();
 851         s=RC_INT;
 852         if(nb_args-i<5 && plan[nb_args-i-1][1]!=-1) {
 853           s=regmask(plan[nb_args-i-1][1]);
 854           todo&=~(1<<plan[nb_args-i-1][1]);
 855         }
 856         if(s==RC_INT) {
 857           r = gv(s);
 858           o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
 859           vtop--;
 860         } else {
 861           plan2[keep]=s;
 862           keep++;
 863           vswap();
 864         }
 865         size = 8;
 866       }
 867       s=RC_INT;
 868       if(nb_args-i<5 && plan[nb_args-i-1][0]!=-1) {
 869         s=regmask(plan[nb_args-i-1][0]);
 870         todo&=~(1<<plan[nb_args-i-1][0]);
 871       }
 872 #ifdef TCC_ARM_EABI
 873       if(vtop->type.t == VT_VOID) {
 874         if(s == RC_INT)
 875           o(0xE24DD004); /* sub sp,sp,#4 */
 876         vtop--;
 877       } else
 878 #endif
 879       if(s == RC_INT) {
 880         r = gv(s);
 881         o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
 882         vtop--;
 883       } else {
 884         plan2[keep]=s;
 885         keep++;
 886       }
 887       args_size += size;
 888     }
 889   }
 890   for(i=keep;i--;) {
 891     gv(plan2[i]);
 892     vrott(keep);
 893   }
 894 save_regs(keep); /* save used temporary registers */
 895   keep++;
 896   if(args_size) {
 897     int n;
 898     n=args_size/4;
 899     if(n>4)
 900       n=4;
 901     todo&=((1<<n)-1);
 902     if(todo) {
 903       int i;
 904       o(0xE8BD0000|todo);
 905       for(i=0;i<4;i++)
 906         if(todo&(1<<i)) {
 907           vpushi(0);
 908           vtop->r=i;
 909           keep++;
 910         }
 911     }
 912     args_size-=n*4;
 913   }
 914   vnrott(keep);
 915   func_sym = vtop->type.ref;
 916   gcall_or_jmp(0);
 917   if (args_size)
 918       gadd_sp(args_size);
 919 #ifdef TCC_ARM_EABI
 920   if((vtop->type.ref->type.t & VT_BTYPE) == VT_STRUCT
 921      && type_size(&vtop->type.ref->type, &align) <= 4)
 922   {
 923     store(REG_IRET,vtop-keep);
 924     ++keep;
 925   }
 926 #ifdef TCC_ARM_VFP
 927   else if(is_float(vtop->type.ref->type.t)) {
 928     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
 929       o(0xEE000A10); /* fmsr s0,r0 */
 930     } else {
 931       o(0xEE000B10); /* fmdlr d0,r0 */
 932       o(0xEE201B10); /* fmdhr d0,r1 */
 933     }
 934   }
 935 #endif
 936 #endif
 937   vtop-=keep;
 938   leaffunc = 0;
 939 }
 940
 941 /* generate function prolog of type 't' */
 942 void gfunc_prolog(CType *func_type)
 943 {
 944   Sym *sym,*sym2;
 945   int n,addr,size,align;
 946
 947   sym = func_type->ref;
 948   func_vt = sym->type;
 949
 950   n = 0;
 951   addr = 0;
 952   if((func_vt.t & VT_BTYPE) == VT_STRUCT
 953      && type_size(&func_vt,&align) > 4)
 954   {
 955     func_vc = addr;
 956     addr += 4;
 957     n++;
 958   }
 959   for(sym2=sym->next;sym2 && n<4;sym2=sym2->next) {
 960     size = type_size(&sym2->type, &align);
 961     n += (size + 3) / 4;
 962   }
 963   o(0xE1A0C00D); /* mov ip,sp */
 964   if(func_type->ref->c == FUNC_ELLIPSIS)
 965     n=4;
 966   if(n) {
 967     if(n>4)
 968       n=4;
 969 #ifdef TCC_ARM_EABI
 970     n=(n+1)&-2;
 971 #endif
 972     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
 973   }
 974   o(0xE92D5800); /* save fp, ip, lr */
 975   o(0xE28DB00C); /* add fp, sp, #12 */
 976   func_sub_sp_offset = ind;
 977   o(0xE1A00000); /* nop, leave space for stack adjustment */
 978   while ((sym = sym->next)) {
 979     CType *type;
 980     type = &sym->type;
 981     size = type_size(type, &align);
 982     size = (size + 3) & -4;
 983 #ifdef TCC_ARM_EABI
 984     addr = (addr + align - 1) & -align;
 985 #endif
 986     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t), addr);
 987     addr += size;
 988   }
 989   last_itod_magic=0;
 990   leaffunc = 1;
 991   loc = -12;
 992 }
 993
 994 /* generate function epilog */
 995 void gfunc_epilog(void)
 996 {
 997   uint32_t x;
 998   int diff;
 999 #ifdef TCC_ARM_EABI
1000   if(is_float(func_vt.t)) {
1001     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1002       o(0xEE100A10); /* fmrs r0, s0 */
1003     else {
1004       o(0xEE100B10); /* fmrdl r0, d0 */
1005       o(0xEE301B10); /* fmrdh r1, d0 */
1006     }
1007   }
1008 #endif
1009   o(0xE91BA800); /* restore fp, sp, pc */
1010   diff = (-loc + 3) & -4;
1011 #ifdef TCC_ARM_EABI
1012   if(!leaffunc)
1013     diff = (diff + 7) & -8;
1014 #endif
1015   if(diff > 12) {
1016     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1017     if(x)
1018       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1019     else {
1020       int addr;
1021       addr=ind;
1022       o(0xE59FC004); /* ldr ip,[pc+4] */
1023       o(0xE04BD00C); /* sub sp,fp,ip  */
1024       o(0xE1A0F00E); /* mov pc,lr */
1025       o(diff);
1026       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1027     }
1028   }
1029 }
1030
1031 /* generate a jump to a label */
1032 int gjmp(int t)
1033 {
1034   int r;
1035   r=ind;
1036   o(0xE0000000|encbranch(r,t,1));
1037   return r;
1038 }
1039
1040 /* generate a jump to a fixed address */
1041 void gjmp_addr(int a)
1042 {
1043   gjmp(a);
1044 }
1045
1046 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1047 int gtst(int inv, int t)
1048 {
1049   int v, r;
1050   uint32_t op;
1051   v = vtop->r & VT_VALMASK;
1052   r=ind;
1053   if (v == VT_CMP) {
1054     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1055     op|=encbranch(r,t,1);
1056     o(op);
1057     t=r;
1058   } else if (v == VT_JMP || v == VT_JMPI) {
1059     if ((v & 1) == inv) {
1060       if(!vtop->c.i)
1061         vtop->c.i=t;
1062       else {
1063         uint32_t *x;
1064         int p,lp;
1065         if(t) {
1066           p = vtop->c.i;
1067           do {
1068             p = decbranch(lp=p);
1069           } while(p);
1070           x = (uint32_t *)(cur_text_section->data + lp);
1071           *x &= 0xff000000;
1072           *x |= encbranch(lp,t,1);
1073         }
1074         t = vtop->c.i;
1075       }
1076     } else {
1077       t = gjmp(t);
1078       gsym(vtop->c.i);
1079     }
1080   } else {
1081     if (is_float(vtop->type.t)) {
1082       r=gv(RC_FLOAT);
1083 #ifdef TCC_ARM_VFP
1084       o(0xEEB50A40|(vfpr(r)<<12)|T2CPR(vtop->type.t)); /* fcmpzX */
1085       o(0xEEF1FA10); /* fmstat */
1086 #else
1087       o(0xEE90F118|(fpr(r)<<16));
1088 #endif
1089       vtop->r = VT_CMP;
1090       vtop->c.i = TOK_NE;
1091       return gtst(inv, t);
1092     } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1093       /* constant jmp optimization */
1094       if ((vtop->c.i != 0) != inv)
1095         t = gjmp(t);
1096     } else {
1097       v = gv(RC_INT);
1098       o(0xE3300000|(intr(v)<<16));
1099       vtop->r = VT_CMP;
1100       vtop->c.i = TOK_NE;
1101       return gtst(inv, t);
1102     }
1103   }
1104   vtop--;
1105   return t;
1106 }
1107
1108 /* generate an integer binary operation */
1109 void gen_opi(int op)
1110 {
1111   int c, func = 0;
1112   uint32_t opc = 0, r, fr;
1113   unsigned short retreg = REG_IRET;
1114
1115   c=0;
1116   switch(op) {
1117     case '+':
1118       opc = 0x8;
1119       c=1;
1120       break;
1121     case TOK_ADDC1: /* add with carry generation */
1122       opc = 0x9;
1123       c=1;
1124       break;
1125     case '-':
1126       opc = 0x4;
1127       c=1;
1128       break;
1129     case TOK_SUBC1: /* sub with carry generation */
1130       opc = 0x5;
1131       c=1;
1132       break;
1133     case TOK_ADDC2: /* add with carry use */
1134       opc = 0xA;
1135       c=1;
1136       break;
1137     case TOK_SUBC2: /* sub with carry use */
1138       opc = 0xC;
1139       c=1;
1140       break;
1141     case '&':
1142       opc = 0x0;
1143       c=1;
1144       break;
1145     case '^':
1146       opc = 0x2;
1147       c=1;
1148       break;
1149     case '|':
1150       opc = 0x18;
1151       c=1;
1152       break;
1153     case '*':
1154       gv2(RC_INT, RC_INT);
1155       r = vtop[-1].r;
1156       fr = vtop[0].r;
1157       vtop--;
1158       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1159       return;
1160     case TOK_SHL:
1161       opc = 0;
1162       c=2;
1163       break;
1164     case TOK_SHR:
1165       opc = 1;
1166       c=2;
1167       break;
1168     case TOK_SAR:
1169       opc = 2;
1170       c=2;
1171       break;
1172     case '/':
1173     case TOK_PDIV:
1174       func=TOK___divsi3;
1175       c=3;
1176       break;
1177     case TOK_UDIV:
1178       func=TOK___udivsi3;
1179       c=3;
1180       break;
1181     case '%':
1182 #ifdef TCC_ARM_EABI
1183       func=TOK___aeabi_idivmod;
1184       retreg=REG_LRET;
1185 #else
1186       func=TOK___modsi3;
1187 #endif
1188       c=3;
1189       break;
1190     case TOK_UMOD:
1191 #ifdef TCC_ARM_EABI
1192       func=TOK___aeabi_uidivmod;
1193       retreg=REG_LRET;
1194 #else
1195       func=TOK___umodsi3;
1196 #endif
1197       c=3;
1198       break;
1199     case TOK_UMULL:
1200       gv2(RC_INT, RC_INT);
1201       r=intr(vtop[-1].r2=get_reg(RC_INT));
1202       c=vtop[-1].r;
1203       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1204       vtop--;
1205       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1206       return;
1207     default:
1208       opc = 0x15;
1209       c=1;
1210       break;
1211   }
1212   switch(c) {
1213     case 1:
1214       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1215         if(opc == 4 || opc == 5 || opc == 0xc) {
1216           vswap();
1217           opc|=2; // sub -> rsb
1218         }
1219       }
1220       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1221           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1222         gv(RC_INT);
1223       vswap();
1224       c=intr(gv(RC_INT));
1225       vswap();
1226       opc=0xE0000000|(opc<<20)|(c<<16);
1227       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1228         uint32_t x;
1229         x=stuff_const(opc|0x2000000,vtop->c.i);
1230         if(x) {
1231           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1232           o(x|(r<<12));
1233           goto done;
1234         }
1235       }
1236       fr=intr(gv(RC_INT));
1237       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1238       o(opc|(r<<12)|fr);
1239 done:
1240       vtop--;
1241       if (op >= TOK_ULT && op <= TOK_GT) {
1242         vtop->r = VT_CMP;
1243         vtop->c.i = op;
1244       }
1245       break;
1246     case 2:
1247       opc=0xE1A00000|(opc<<5);
1248       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1249           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1250         gv(RC_INT);
1251       vswap();
1252       r=intr(gv(RC_INT));
1253       vswap();
1254       opc|=r;
1255       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1256         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1257         c = vtop->c.i & 0x1f;
1258         o(opc|(c<<7)|(fr<<12));
1259       } else {
1260         fr=intr(gv(RC_INT));
1261         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1262         o(opc|(c<<12)|(fr<<8)|0x10);
1263       }
1264       vtop--;
1265       break;
1266     case 3:
1267       vpush_global_sym(&func_old_type, func);
1268       vrott(3);
1269       gfunc_call(2);
1270       vpushi(0);
1271       vtop->r = retreg;
1272       break;
1273     default:
1274       tcc_error("gen_opi %i unimplemented!",op);
1275   }
1276 }
1277
1278 #ifdef TCC_ARM_VFP
1279 static int is_zero(int i)
1280 {
1281   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1282     return 0;
1283   if (vtop[i].type.t == VT_FLOAT)
1284     return (vtop[i].c.f == 0.f);
1285   else if (vtop[i].type.t == VT_DOUBLE)
1286     return (vtop[i].c.d == 0.0);
1287   return (vtop[i].c.ld == 0.l);
1288 }
1289
1290 /* generate a floating point operation 'v = t1 op t2' instruction. The
1291  *    two operands are guaranted to have the same floating point type */
1292 void gen_opf(int op)
1293 {
1294   uint32_t x;
1295   int fneg=0,r;
1296   x=0xEE000A00|T2CPR(vtop->type.t);
1297   switch(op) {
1298     case '+':
1299       if(is_zero(-1))
1300         vswap();
1301       if(is_zero(0)) {
1302         vtop--;
1303         return;
1304       }
1305       x|=0x300000;
1306       break;
1307     case '-':
1308       x|=0x300040;
1309       if(is_zero(0)) {
1310         vtop--;
1311         return;
1312       }
1313       if(is_zero(-1)) {
1314         x|=0x810000; /* fsubX -> fnegX */
1315         vswap();
1316         vtop--;
1317         fneg=1;
1318       }
1319       break;
1320     case '*':
1321       x|=0x200000;
1322       break;
1323     case '/':
1324       x|=0x800000;
1325       break;
1326     default:
1327       if(op < TOK_ULT && op > TOK_GT) {
1328         tcc_error("unknown fp op %x!",op);
1329         return;
1330       }
1331       if(is_zero(-1)) {
1332         vswap();
1333         switch(op) {
1334           case TOK_LT: op=TOK_GT; break;
1335           case TOK_GE: op=TOK_ULE; break;
1336           case TOK_LE: op=TOK_GE; break;
1337           case TOK_GT: op=TOK_ULT; break;
1338         }
1339       }
1340       x|=0xB40040; /* fcmpX */
1341       if(op!=TOK_EQ && op!=TOK_NE)
1342         x|=0x80; /* fcmpX -> fcmpeX */
1343       if(is_zero(0)) {
1344         vtop--;
1345         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1346       } else {
1347         x|=vfpr(gv(RC_FLOAT));
1348         vswap();
1349         o(x|(vfpr(gv(RC_FLOAT))<<12));
1350         vtop--;
1351       }
1352       o(0xEEF1FA10); /* fmstat */
1353
1354       switch(op) {
1355         case TOK_LE: op=TOK_ULE; break;
1356         case TOK_LT: op=TOK_ULT; break;
1357         case TOK_UGE: op=TOK_GE; break;
1358         case TOK_UGT: op=TOK_GT; break;
1359       }
1360
1361       vtop->r = VT_CMP;
1362       vtop->c.i = op;
1363       return;
1364   }
1365   r=gv(RC_FLOAT);
1366   x|=vfpr(r);
1367   r=regmask(r);
1368   if(!fneg) {
1369     int r2;
1370     vswap();
1371     r2=gv(RC_FLOAT);
1372     x|=vfpr(r2)<<16;
1373     r|=regmask(r2);
1374   }
1375   vtop->r=get_reg_ex(RC_FLOAT,r);
1376   if(!fneg)
1377     vtop--;
1378   o(x|(vfpr(vtop->r)<<12));
1379 }
1380
1381 #else
1382 static uint32_t is_fconst()
1383 {
1384   long double f;
1385   uint32_t r;
1386   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1387     return 0;
1388   if (vtop->type.t == VT_FLOAT)
1389     f = vtop->c.f;
1390   else if (vtop->type.t == VT_DOUBLE)
1391     f = vtop->c.d;
1392   else
1393     f = vtop->c.ld;
1394   if(!ieee_finite(f))
1395     return 0;
1396   r=0x8;
1397   if(f<0.0) {
1398     r=0x18;
1399     f=-f;
1400   }
1401   if(f==0.0)
1402     return r;
1403   if(f==1.0)
1404     return r|1;
1405   if(f==2.0)
1406     return r|2;
1407   if(f==3.0)
1408     return r|3;
1409   if(f==4.0)
1410     return r|4;
1411   if(f==5.0)
1412     return r|5;
1413   if(f==0.5)
1414     return r|6;
1415   if(f==10.0)
1416     return r|7;
1417   return 0;
1418 }
1419
1420 /* generate a floating point operation 'v = t1 op t2' instruction. The
1421    two operands are guaranted to have the same floating point type */
1422 void gen_opf(int op)
1423 {
1424   uint32_t x, r, r2, c1, c2;
1425   //fputs("gen_opf\n",stderr);
1426   vswap();
1427   c1 = is_fconst();
1428   vswap();
1429   c2 = is_fconst();
1430   x=0xEE000100;
1431 #if LDOUBLE_SIZE == 8
1432   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1433     x|=0x80;
1434 #else
1435   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1436     x|=0x80;
1437   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1438     x|=0x80000;
1439 #endif
1440   switch(op)
1441   {
1442     case '+':
1443       if(!c2) {
1444         vswap();
1445         c2=c1;
1446       }
1447       vswap();
1448       r=fpr(gv(RC_FLOAT));
1449       vswap();
1450       if(c2) {
1451         if(c2>0xf)
1452           x|=0x200000; // suf
1453         r2=c2&0xf;
1454       } else {
1455         r2=fpr(gv(RC_FLOAT));
1456       }
1457       break;
1458     case '-':
1459       if(c2) {
1460         if(c2<=0xf)
1461           x|=0x200000; // suf
1462         r2=c2&0xf;
1463         vswap();
1464         r=fpr(gv(RC_FLOAT));
1465         vswap();
1466       } else if(c1 && c1<=0xf) {
1467         x|=0x300000; // rsf
1468         r2=c1;
1469         r=fpr(gv(RC_FLOAT));
1470         vswap();
1471       } else {
1472         x|=0x200000; // suf
1473         vswap();
1474         r=fpr(gv(RC_FLOAT));
1475         vswap();
1476         r2=fpr(gv(RC_FLOAT));
1477       }
1478       break;
1479     case '*':
1480       if(!c2 || c2>0xf) {
1481         vswap();
1482         c2=c1;
1483       }
1484       vswap();
1485       r=fpr(gv(RC_FLOAT));
1486       vswap();
1487       if(c2 && c2<=0xf)
1488         r2=c2;
1489       else
1490         r2=fpr(gv(RC_FLOAT));
1491       x|=0x100000; // muf
1492       break;
1493     case '/':
1494       if(c2 && c2<=0xf) {
1495         x|=0x400000; // dvf
1496         r2=c2;
1497         vswap();
1498         r=fpr(gv(RC_FLOAT));
1499         vswap();
1500       } else if(c1 && c1<=0xf) {
1501         x|=0x500000; // rdf
1502         r2=c1;
1503         r=fpr(gv(RC_FLOAT));
1504         vswap();
1505       } else {
1506         x|=0x400000; // dvf
1507         vswap();
1508         r=fpr(gv(RC_FLOAT));
1509         vswap();
1510         r2=fpr(gv(RC_FLOAT));
1511       }
1512       break;
1513     default:
1514       if(op >= TOK_ULT && op <= TOK_GT) {
1515         x|=0xd0f110; // cmfe
1516 /* bug (intention?) in Linux FPU emulator
1517    doesn't set carry if equal */
1518         switch(op) {
1519           case TOK_ULT:
1520           case TOK_UGE:
1521           case TOK_ULE:
1522           case TOK_UGT:
1523             tcc_error("unsigned comparision on floats?");
1524             break;
1525           case TOK_LT:
1526             op=TOK_Nset;
1527             break;
1528           case TOK_LE:
1529             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1530             break;
1531           case TOK_EQ:
1532           case TOK_NE:
1533             x&=~0x400000; // cmfe -> cmf
1534             break;
1535         }
1536         if(c1 && !c2) {
1537           c2=c1;
1538           vswap();
1539           switch(op) {
1540             case TOK_Nset:
1541               op=TOK_GT;
1542               break;
1543             case TOK_GE:
1544               op=TOK_ULE;
1545               break;
1546             case TOK_ULE:
1547               op=TOK_GE;
1548               break;
1549             case TOK_GT:
1550               op=TOK_Nset;
1551               break;
1552           }
1553         }
1554         vswap();
1555         r=fpr(gv(RC_FLOAT));
1556         vswap();
1557         if(c2) {
1558           if(c2>0xf)
1559             x|=0x200000;
1560           r2=c2&0xf;
1561         } else {
1562           r2=fpr(gv(RC_FLOAT));
1563         }
1564         vtop[-1].r = VT_CMP;
1565         vtop[-1].c.i = op;
1566       } else {
1567         tcc_error("unknown fp op %x!",op);
1568         return;
1569       }
1570   }
1571   if(vtop[-1].r == VT_CMP)
1572     c1=15;
1573   else {
1574     c1=vtop->r;
1575     if(r2&0x8)
1576       c1=vtop[-1].r;
1577     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1578     c1=fpr(vtop[-1].r);
1579   }
1580   vtop--;
1581   o(x|(r<<16)|(c1<<12)|r2);
1582 }
1583 #endif
1584
1585 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1586    and 'long long' cases. */
1587 ST_FUNC void gen_cvt_itof1(int t)
1588 {
1589   uint32_t r, r2;
1590   int bt;
1591   bt=vtop->type.t & VT_BTYPE;
1592   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1593 #ifndef TCC_ARM_VFP
1594     uint32_t dsize = 0;
1595 #endif
1596     r=intr(gv(RC_INT));
1597 #ifdef TCC_ARM_VFP
1598     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1599     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1600     r2<<=12;
1601     if(!(vtop->type.t & VT_UNSIGNED))
1602       r2|=0x80;                /* fuitoX -> fsituX */
1603     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1604 #else
1605     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1606     if((t & VT_BTYPE) != VT_FLOAT)
1607       dsize=0x80;    /* flts -> fltd */
1608     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1609     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1610       uint32_t off = 0;
1611       o(0xE3500000|(r<<12));        /* cmp */
1612       r=fpr(get_reg(RC_FLOAT));
1613       if(last_itod_magic) {
1614         off=ind+8-last_itod_magic;
1615         off/=4;
1616         if(off>255)
1617           off=0;
1618       }
1619       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1620       if(!off) {
1621         o(0xEA000000);              /* b */
1622         last_itod_magic=ind;
1623         o(0x4F800000);              /* 4294967296.0f */
1624       }
1625       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1626     }
1627 #endif
1628     return;
1629   } else if(bt == VT_LLONG) {
1630     int func;
1631     CType *func_type = 0;
1632     if((t & VT_BTYPE) == VT_FLOAT) {
1633       func_type = &func_float_type;
1634       if(vtop->type.t & VT_UNSIGNED)
1635         func=TOK___floatundisf;
1636       else
1637         func=TOK___floatdisf;
1638 #if LDOUBLE_SIZE != 8
1639     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1640       func_type = &func_ldouble_type;
1641       if(vtop->type.t & VT_UNSIGNED)
1642         func=TOK___floatundixf;
1643       else
1644         func=TOK___floatdixf;
1645     } else if((t & VT_BTYPE) == VT_DOUBLE) {
1646 #else
1647     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1648 #endif
1649       func_type = &func_double_type;
1650       if(vtop->type.t & VT_UNSIGNED)
1651         func=TOK___floatundidf;
1652       else
1653         func=TOK___floatdidf;
1654     }
1655     if(func_type) {
1656       vpush_global_sym(func_type, func);
1657       vswap();
1658       gfunc_call(1);
1659       vpushi(0);
1660       vtop->r=TREG_F0;
1661       return;
1662     }
1663   }
1664   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1665 }
1666
1667 /* convert fp to int 't' type */
1668 void gen_cvt_ftoi(int t)
1669 {
1670   uint32_t r, r2;
1671   int u, func = 0;
1672   u=t&VT_UNSIGNED;
1673   t&=VT_BTYPE;
1674   r2=vtop->type.t & VT_BTYPE;
1675   if(t==VT_INT) {
1676 #ifdef TCC_ARM_VFP
1677     r=vfpr(gv(RC_FLOAT));
1678     u=u?0:0x10000;
1679     o(0xEEBC0A40|(r<<12)|r|T2CPR(r2)); /* ftoXiY */
1680     r2=intr(vtop->r=get_reg(RC_INT));
1681     o(0xEE100A10|(r<<16)|(r2<<12));
1682     return;
1683 #else
1684     if(u) {
1685       if(r2 == VT_FLOAT)
1686         func=TOK___fixunssfsi;
1687 #if LDOUBLE_SIZE != 8
1688       else if(r2 == VT_LDOUBLE)
1689         func=TOK___fixunsxfsi;
1690       else if(r2 == VT_DOUBLE)
1691 #else
1692       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1693 #endif
1694         func=TOK___fixunsdfsi;
1695     } else {
1696       r=fpr(gv(RC_FLOAT));
1697       r2=intr(vtop->r=get_reg(RC_INT));
1698       o(0xEE100170|(r2<<12)|r);
1699       return;
1700     }
1701 #endif
1702   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
1703     if(r2 == VT_FLOAT)
1704       func=TOK___fixsfdi;
1705 #if LDOUBLE_SIZE != 8
1706     else if(r2 == VT_LDOUBLE)
1707       func=TOK___fixxfdi;
1708     else if(r2 == VT_DOUBLE)
1709 #else
1710     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1711 #endif
1712       func=TOK___fixdfdi;
1713   }
1714   if(func) {
1715     vpush_global_sym(&func_old_type, func);
1716     vswap();
1717     gfunc_call(1);
1718     vpushi(0);
1719     if(t == VT_LLONG)
1720       vtop->r2 = REG_LRET;
1721     vtop->r = REG_IRET;
1722     return;
1723   }
1724   tcc_error("unimplemented gen_cvt_ftoi!");
1725 }
1726
1727 /* convert from one floating point type to another */
1728 void gen_cvt_ftof(int t)
1729 {
1730 #ifdef TCC_ARM_VFP
1731   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
1732     uint32_t r = vfpr(gv(RC_FLOAT));
1733     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
1734   }
1735 #else
1736   /* all we have to do on i386 and FPA ARM is to put the float in a register */
1737   gv(RC_FLOAT);
1738 #endif
1739 }
1740
1741 /* computed goto support */
1742 void ggoto(void)
1743 {
1744   gcall_or_jmp(1);
1745   vtop--;
1746 }
1747
1748 /* end of ARM code generator */
1749 /*************************************************************/
1750 #endif
1751 /*************************************************************/