arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *
   6  *  Based on i386-gen.c by Fabrice Bellard
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 #ifdef TARGET_DEFS_ONLY
  24
  25 #ifdef TCC_ARM_EABI
  26 #ifndef TCC_ARM_VFP // Avoid useless warning
  27 #define TCC_ARM_VFP
  28 #endif
  29 #endif
  30
  31 /* number of available registers */
  32 #ifdef TCC_ARM_VFP
  33 #define NB_REGS            13
  34 #else
  35 #define NB_REGS             9
  36 #endif
  37
  38 /* a register can belong to several classes. The classes must be
  39    sorted from more general to more precise (see gv2() code which does
  40    assumptions on it). */
  41 #define RC_INT     0x0001 /* generic integer register */
  42 #define RC_FLOAT   0x0002 /* generic float register */
  43 #define RC_R0      0x0004
  44 #define RC_R1      0x0008
  45 #define RC_R2      0x0010
  46 #define RC_R3      0x0020
  47 #define RC_R12     0x0040
  48 #define RC_F0      0x0080
  49 #define RC_F1      0x0100
  50 #define RC_F2      0x0200
  51 #define RC_F3      0x0400
  52 #ifdef TCC_ARM_VFP
  53 #define RC_F4      0x0800
  54 #define RC_F5      0x1000
  55 #define RC_F6      0x2000
  56 #define RC_F7      0x4000
  57 #endif
  58 #define RC_IRET    RC_R0  /* function return: integer register */
  59 #define RC_LRET    RC_R1  /* function return: second integer register */
  60 #define RC_FRET    RC_F0  /* function return: float register */
  61
  62 /* pretty names for the registers */
  63 enum {
  64     TREG_R0 = 0,
  65     TREG_R1,
  66     TREG_R2,
  67     TREG_R3,
  68     TREG_R12,
  69     TREG_F0,
  70     TREG_F1,
  71     TREG_F2,
  72     TREG_F3,
  73 #ifdef TCC_ARM_VFP
  74     TREG_F4,
  75     TREG_F5,
  76     TREG_F6,
  77     TREG_F7,
  78 #endif
  79 };
  80
  81 #ifdef TCC_ARM_VFP
  82 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  83 #endif
  84
  85 /* return registers for function */
  86 #define REG_IRET TREG_R0 /* single word int return register */
  87 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  88 #define REG_FRET TREG_F0 /* float return register */
  89
  90 #ifdef TCC_ARM_EABI
  91 #define TOK___divdi3 TOK___aeabi_ldivmod
  92 #define TOK___moddi3 TOK___aeabi_ldivmod
  93 #define TOK___udivdi3 TOK___aeabi_uldivmod
  94 #define TOK___umoddi3 TOK___aeabi_uldivmod
  95 #endif
  96
  97 /* defined if function parameters must be evaluated in reverse order */
  98 #define INVERT_FUNC_PARAMS
  99
 100 /* defined if structures are passed as pointers. Otherwise structures
 101    are directly pushed on stack. */
 102 //#define FUNC_STRUCT_PARAM_AS_PTR
 103
 104 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 105 ST_DATA CType float_type, double_type, func_float_type, func_double_type;
 106 #define func_ldouble_type func_double_type
 107 #else
 108 #define func_float_type func_old_type
 109 #define func_double_type func_old_type
 110 #define func_ldouble_type func_old_type
 111 #endif
 112
 113 /* pointer size, in bytes */
 114 #define PTR_SIZE 4
 115
 116 /* long double size and alignment, in bytes */
 117 #ifdef TCC_ARM_VFP
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifndef LDOUBLE_SIZE
 122 #define LDOUBLE_SIZE  8
 123 #endif
 124
 125 #ifdef TCC_ARM_EABI
 126 #define LDOUBLE_ALIGN 8
 127 #else
 128 #define LDOUBLE_ALIGN 4
 129 #endif
 130
 131 /* maximum alignment (for aligned attribute support) */
 132 #define MAX_ALIGN     8
 133
 134 #define CHAR_IS_UNSIGNED
 135
 136 /******************************************************/
 137 /* ELF defines */
 138
 139 #define EM_TCC_TARGET EM_ARM
 140
 141 /* relocation type for 32 bit data relocation */
 142 #define R_DATA_32   R_ARM_ABS32
 143 #define R_DATA_PTR  R_ARM_ABS32
 144 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 145 #define R_COPY      R_ARM_COPY
 146
 147 #define ELF_START_ADDR 0x00008000
 148 #define ELF_PAGE_SIZE  0x1000
 149
 150 /******************************************************/
 151 #else /* ! TARGET_DEFS_ONLY */
 152 /******************************************************/
 153 #include "tcc.h"
 154
 155 ST_DATA const int reg_classes[NB_REGS] = {
 156     /* r0 */ RC_INT | RC_R0,
 157     /* r1 */ RC_INT | RC_R1,
 158     /* r2 */ RC_INT | RC_R2,
 159     /* r3 */ RC_INT | RC_R3,
 160     /* r12 */ RC_INT | RC_R12,
 161     /* f0 */ RC_FLOAT | RC_F0,
 162     /* f1 */ RC_FLOAT | RC_F1,
 163     /* f2 */ RC_FLOAT | RC_F2,
 164     /* f3 */ RC_FLOAT | RC_F3,
 165 #ifdef TCC_ARM_VFP
 166  /* d4/s8 */ RC_FLOAT | RC_F4,
 167 /* d5/s10 */ RC_FLOAT | RC_F5,
 168 /* d6/s12 */ RC_FLOAT | RC_F6,
 169 /* d7/s14 */ RC_FLOAT | RC_F7,
 170 #endif
 171 };
 172
 173 /* keep in sync with line 104 above */
 174 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 175 ST_DATA CType float_type, double_type, func_float_type, func_double_type;
 176 #endif
 177
 178 static int func_sub_sp_offset, last_itod_magic;
 179 static int leaffunc;
 180
 181 static int two2mask(int a,int b) {
 182   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 183 }
 184
 185 static int regmask(int r) {
 186   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 187 }
 188
 189 /******************************************************/
 190
 191 void o(uint32_t i)
 192 {
 193   /* this is a good place to start adding big-endian support*/
 194   int ind1;
 195
 196   ind1 = ind + 4;
 197   if (!cur_text_section)
 198     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 199          "can't evaluate constant expressions outside of a function.");
 200   if (ind1 > cur_text_section->data_allocated)
 201     section_realloc(cur_text_section, ind1);
 202   cur_text_section->data[ind++] = i&255;
 203   i>>=8;
 204   cur_text_section->data[ind++] = i&255;
 205   i>>=8;
 206   cur_text_section->data[ind++] = i&255;
 207   i>>=8;
 208   cur_text_section->data[ind++] = i;
 209 }
 210
 211 static uint32_t stuff_const(uint32_t op, uint32_t c)
 212 {
 213   int try_neg=0;
 214   uint32_t nc = 0, negop = 0;
 215
 216   switch(op&0x1F00000)
 217   {
 218     case 0x800000: //add
 219     case 0x400000: //sub
 220       try_neg=1;
 221       negop=op^0xC00000;
 222       nc=-c;
 223       break;
 224     case 0x1A00000: //mov
 225     case 0x1E00000: //mvn
 226       try_neg=1;
 227       negop=op^0x400000;
 228       nc=~c;
 229       break;
 230     case 0x200000: //xor
 231       if(c==~0)
 232         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 233       break;
 234     case 0x0: //and
 235       if(c==~0)
 236         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 237     case 0x1C00000: //bic
 238       try_neg=1;
 239       negop=op^0x1C00000;
 240       nc=~c;
 241       break;
 242     case 0x1800000: //orr
 243       if(c==~0)
 244         return (op&0xFFF0FFFF)|0x1E00000;
 245       break;
 246   }
 247   do {
 248     uint32_t m;
 249     int i;
 250     if(c<256) /* catch undefined <<32 */
 251       return op|c;
 252     for(i=2;i<32;i+=2) {
 253       m=(0xff>>i)|(0xff<<(32-i));
 254       if(!(c&~m))
 255         return op|(i<<7)|(c<<i)|(c>>(32-i));
 256     }
 257     op=negop;
 258     c=nc;
 259   } while(try_neg--);
 260   return 0;
 261 }
 262
 263
 264 //only add,sub
 265 void stuff_const_harder(uint32_t op, uint32_t v) {
 266   uint32_t x;
 267   x=stuff_const(op,v);
 268   if(x)
 269     o(x);
 270   else {
 271     uint32_t a[16], nv, no, o2, n2;
 272     int i,j,k;
 273     a[0]=0xff;
 274     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 275     for(i=1;i<16;i++)
 276       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 277     for(i=0;i<12;i++)
 278       for(j=i<4?i+12:15;j>=i+4;j--)
 279         if((v&(a[i]|a[j]))==v) {
 280           o(stuff_const(op,v&a[i]));
 281           o(stuff_const(o2,v&a[j]));
 282           return;
 283         }
 284     no=op^0xC00000;
 285     n2=o2^0xC00000;
 286     nv=-v;
 287     for(i=0;i<12;i++)
 288       for(j=i<4?i+12:15;j>=i+4;j--)
 289         if((nv&(a[i]|a[j]))==nv) {
 290           o(stuff_const(no,nv&a[i]));
 291           o(stuff_const(n2,nv&a[j]));
 292           return;
 293         }
 294     for(i=0;i<8;i++)
 295       for(j=i+4;j<12;j++)
 296         for(k=i<4?i+12:15;k>=j+4;k--)
 297           if((v&(a[i]|a[j]|a[k]))==v) {
 298             o(stuff_const(op,v&a[i]));
 299             o(stuff_const(o2,v&a[j]));
 300             o(stuff_const(o2,v&a[k]));
 301             return;
 302           }
 303     no=op^0xC00000;
 304     nv=-v;
 305     for(i=0;i<8;i++)
 306       for(j=i+4;j<12;j++)
 307         for(k=i<4?i+12:15;k>=j+4;k--)
 308           if((nv&(a[i]|a[j]|a[k]))==nv) {
 309             o(stuff_const(no,nv&a[i]));
 310             o(stuff_const(n2,nv&a[j]));
 311             o(stuff_const(n2,nv&a[k]));
 312             return;
 313           }
 314     o(stuff_const(op,v&a[0]));
 315     o(stuff_const(o2,v&a[4]));
 316     o(stuff_const(o2,v&a[8]));
 317     o(stuff_const(o2,v&a[12]));
 318   }
 319 }
 320
 321 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 322 {
 323   addr-=pos+8;
 324   addr/=4;
 325   if(addr>=0x1000000 || addr<-0x1000000) {
 326     if(fail)
 327       tcc_error("FIXME: function bigger than 32MB");
 328     return 0;
 329   }
 330   return 0x0A000000|(addr&0xffffff);
 331 }
 332
 333 int decbranch(int pos)
 334 {
 335   int x;
 336   x=*(uint32_t *)(cur_text_section->data + pos);
 337   x&=0x00ffffff;
 338   if(x&0x800000)
 339     x-=0x1000000;
 340   return x*4+pos+8;
 341 }
 342
 343 /* output a symbol and patch all calls to it */
 344 void gsym_addr(int t, int a)
 345 {
 346   uint32_t *x;
 347   int lt;
 348   while(t) {
 349     x=(uint32_t *)(cur_text_section->data + t);
 350     t=decbranch(lt=t);
 351     if(a==lt+4)
 352       *x=0xE1A00000; // nop
 353     else {
 354       *x &= 0xff000000;
 355       *x |= encbranch(lt,a,1);
 356     }
 357   }
 358 }
 359
 360 void gsym(int t)
 361 {
 362   gsym_addr(t, ind);
 363 }
 364
 365 #ifdef TCC_ARM_VFP
 366 static uint32_t vfpr(int r)
 367 {
 368   if(r<TREG_F0 || r>TREG_F7)
 369     tcc_error("compiler error! register %i is no vfp register",r);
 370   return r-5;
 371 }
 372 #else
 373 static uint32_t fpr(int r)
 374 {
 375   if(r<TREG_F0 || r>TREG_F3)
 376     tcc_error("compiler error! register %i is no fpa register",r);
 377   return r-5;
 378 }
 379 #endif
 380
 381 static uint32_t intr(int r)
 382 {
 383   if(r==4)
 384     return 12;
 385   if((r<0 || r>4) && r!=14)
 386     tcc_error("compiler error! register %i is no int register",r);
 387   return r;
 388 }
 389
 390 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 391 {
 392   if(*off>maxoff || *off&((1<<shift)-1)) {
 393     uint32_t x, y;
 394     x=0xE280E000;
 395     if(*sgn)
 396       x=0xE240E000;
 397     x|=(*base)<<16;
 398     *base=14; // lr
 399     y=stuff_const(x,*off&~maxoff);
 400     if(y) {
 401       o(y);
 402       *off&=maxoff;
 403       return;
 404     }
 405     y=stuff_const(x,(*off+maxoff)&~maxoff);
 406     if(y) {
 407       o(y);
 408       *sgn=!*sgn;
 409       *off=((*off+maxoff)&~maxoff)-*off;
 410       return;
 411     }
 412     stuff_const_harder(x,*off&~maxoff);
 413     *off&=maxoff;
 414   }
 415 }
 416
 417 static uint32_t mapcc(int cc)
 418 {
 419   switch(cc)
 420   {
 421     case TOK_ULT:
 422       return 0x30000000; /* CC/LO */
 423     case TOK_UGE:
 424       return 0x20000000; /* CS/HS */
 425     case TOK_EQ:
 426       return 0x00000000; /* EQ */
 427     case TOK_NE:
 428       return 0x10000000; /* NE */
 429     case TOK_ULE:
 430       return 0x90000000; /* LS */
 431     case TOK_UGT:
 432       return 0x80000000; /* HI */
 433     case TOK_Nset:
 434       return 0x40000000; /* MI */
 435     case TOK_Nclear:
 436       return 0x50000000; /* PL */
 437     case TOK_LT:
 438       return 0xB0000000; /* LT */
 439     case TOK_GE:
 440       return 0xA0000000; /* GE */
 441     case TOK_LE:
 442       return 0xD0000000; /* LE */
 443     case TOK_GT:
 444       return 0xC0000000; /* GT */
 445   }
 446   tcc_error("unexpected condition code");
 447   return 0xE0000000; /* AL */
 448 }
 449
 450 static int negcc(int cc)
 451 {
 452   switch(cc)
 453   {
 454     case TOK_ULT:
 455       return TOK_UGE;
 456     case TOK_UGE:
 457       return TOK_ULT;
 458     case TOK_EQ:
 459       return TOK_NE;
 460     case TOK_NE:
 461       return TOK_EQ;
 462     case TOK_ULE:
 463       return TOK_UGT;
 464     case TOK_UGT:
 465       return TOK_ULE;
 466     case TOK_Nset:
 467       return TOK_Nclear;
 468     case TOK_Nclear:
 469       return TOK_Nset;
 470     case TOK_LT:
 471       return TOK_GE;
 472     case TOK_GE:
 473       return TOK_LT;
 474     case TOK_LE:
 475       return TOK_GT;
 476     case TOK_GT:
 477       return TOK_LE;
 478   }
 479   tcc_error("unexpected condition code");
 480   return TOK_NE;
 481 }
 482
 483 /* load 'r' from value 'sv' */
 484 void load(int r, SValue *sv)
 485 {
 486   int v, ft, fc, fr, sign;
 487   uint32_t op;
 488   SValue v1;
 489
 490   fr = sv->r;
 491   ft = sv->type.t;
 492   fc = sv->c.ul;
 493
 494   if(fc>=0)
 495     sign=0;
 496   else {
 497     sign=1;
 498     fc=-fc;
 499   }
 500
 501   v = fr & VT_VALMASK;
 502   if (fr & VT_LVAL) {
 503     uint32_t base = 0xB; // fp
 504     if(v == VT_LLOCAL) {
 505       v1.type.t = VT_PTR;
 506       v1.r = VT_LOCAL | VT_LVAL;
 507       v1.c.ul = sv->c.ul;
 508       load(base=14 /* lr */, &v1);
 509       fc=sign=0;
 510       v=VT_LOCAL;
 511     } else if(v == VT_CONST) {
 512       v1.type.t = VT_PTR;
 513       v1.r = fr&~VT_LVAL;
 514       v1.c.ul = sv->c.ul;
 515       v1.sym=sv->sym;
 516       load(base=14, &v1);
 517       fc=sign=0;
 518       v=VT_LOCAL;
 519     } else if(v < VT_CONST) {
 520       base=intr(v);
 521       fc=sign=0;
 522       v=VT_LOCAL;
 523     }
 524     if(v == VT_LOCAL) {
 525       if(is_float(ft)) {
 526         calcaddr(&base,&fc,&sign,1020,2);
 527 #ifdef TCC_ARM_VFP
 528         op=0xED100A00; /* flds */
 529         if(!sign)
 530           op|=0x800000;
 531         if ((ft & VT_BTYPE) != VT_FLOAT)
 532           op|=0x100;   /* flds -> fldd */
 533         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 534 #else
 535         op=0xED100100;
 536         if(!sign)
 537           op|=0x800000;
 538 #if LDOUBLE_SIZE == 8
 539         if ((ft & VT_BTYPE) != VT_FLOAT)
 540           op|=0x8000;
 541 #else
 542         if ((ft & VT_BTYPE) == VT_DOUBLE)
 543           op|=0x8000;
 544         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 545           op|=0x400000;
 546 #endif
 547         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 548 #endif
 549       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 550                 || (ft & VT_BTYPE) == VT_SHORT) {
 551         calcaddr(&base,&fc,&sign,255,0);
 552         op=0xE1500090;
 553         if ((ft & VT_BTYPE) == VT_SHORT)
 554           op|=0x20;
 555         if ((ft & VT_UNSIGNED) == 0)
 556           op|=0x40;
 557         if(!sign)
 558           op|=0x800000;
 559         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 560       } else {
 561         calcaddr(&base,&fc,&sign,4095,0);
 562         op=0xE5100000;
 563         if(!sign)
 564           op|=0x800000;
 565         if ((ft & VT_BTYPE) == VT_BYTE)
 566           op|=0x400000;
 567         o(op|(intr(r)<<12)|fc|(base<<16));
 568       }
 569       return;
 570     }
 571   } else {
 572     if (v == VT_CONST) {
 573       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
 574       if (fr & VT_SYM || !op) {
 575         o(0xE59F0000|(intr(r)<<12));
 576         o(0xEA000000);
 577         if(fr & VT_SYM)
 578           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 579         o(sv->c.ul);
 580       } else
 581         o(op);
 582       return;
 583     } else if (v == VT_LOCAL) {
 584       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
 585       if (fr & VT_SYM || !op) {
 586         o(0xE59F0000|(intr(r)<<12));
 587         o(0xEA000000);
 588         if(fr & VT_SYM) // needed ?
 589           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 590         o(sv->c.ul);
 591         o(0xE08B0000|(intr(r)<<12)|intr(r));
 592       } else
 593         o(op);
 594       return;
 595     } else if(v == VT_CMP) {
 596       o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
 597       o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
 598       return;
 599     } else if (v == VT_JMP || v == VT_JMPI) {
 600       int t;
 601       t = v & 1;
 602       o(0xE3A00000|(intr(r)<<12)|t);
 603       o(0xEA000000);
 604       gsym(sv->c.ul);
 605       o(0xE3A00000|(intr(r)<<12)|(t^1));
 606       return;
 607     } else if (v < VT_CONST) {
 608       if(is_float(ft))
 609 #ifdef TCC_ARM_VFP
 610         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 611 #else
 612         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 613 #endif
 614       else
 615         o(0xE1A00000|(intr(r)<<12)|intr(v));
 616       return;
 617     }
 618   }
 619   tcc_error("load unimplemented!");
 620 }
 621
 622 /* store register 'r' in lvalue 'v' */
 623 void store(int r, SValue *sv)
 624 {
 625   SValue v1;
 626   int v, ft, fc, fr, sign;
 627   uint32_t op;
 628
 629   fr = sv->r;
 630   ft = sv->type.t;
 631   fc = sv->c.ul;
 632
 633   if(fc>=0)
 634     sign=0;
 635   else {
 636     sign=1;
 637     fc=-fc;
 638   }
 639
 640   v = fr & VT_VALMASK;
 641   if (fr & VT_LVAL || fr == VT_LOCAL) {
 642     uint32_t base = 0xb;
 643     if(v < VT_CONST) {
 644       base=intr(v);
 645       v=VT_LOCAL;
 646       fc=sign=0;
 647     } else if(v == VT_CONST) {
 648       v1.type.t = ft;
 649       v1.r = fr&~VT_LVAL;
 650       v1.c.ul = sv->c.ul;
 651       v1.sym=sv->sym;
 652       load(base=14, &v1);
 653       fc=sign=0;
 654       v=VT_LOCAL;
 655     }
 656     if(v == VT_LOCAL) {
 657        if(is_float(ft)) {
 658         calcaddr(&base,&fc,&sign,1020,2);
 659 #ifdef TCC_ARM_VFP
 660         op=0xED000A00; /* fsts */
 661         if(!sign)
 662           op|=0x800000;
 663         if ((ft & VT_BTYPE) != VT_FLOAT)
 664           op|=0x100;   /* fsts -> fstd */
 665         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 666 #else
 667         op=0xED000100;
 668         if(!sign)
 669           op|=0x800000;
 670 #if LDOUBLE_SIZE == 8
 671         if ((ft & VT_BTYPE) != VT_FLOAT)
 672           op|=0x8000;
 673 #else
 674         if ((ft & VT_BTYPE) == VT_DOUBLE)
 675           op|=0x8000;
 676         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 677           op|=0x400000;
 678 #endif
 679         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 680 #endif
 681         return;
 682       } else if((ft & VT_BTYPE) == VT_SHORT) {
 683         calcaddr(&base,&fc,&sign,255,0);
 684         op=0xE14000B0;
 685         if(!sign)
 686           op|=0x800000;
 687         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 688       } else {
 689         calcaddr(&base,&fc,&sign,4095,0);
 690         op=0xE5000000;
 691         if(!sign)
 692           op|=0x800000;
 693         if ((ft & VT_BTYPE) == VT_BYTE)
 694           op|=0x400000;
 695         o(op|(intr(r)<<12)|fc|(base<<16));
 696       }
 697       return;
 698     }
 699   }
 700   tcc_error("store unimplemented");
 701 }
 702
 703 static void gadd_sp(int val)
 704 {
 705   stuff_const_harder(0xE28DD000,val);
 706 }
 707
 708 /* 'is_jmp' is '1' if it is a jump */
 709 static void gcall_or_jmp(int is_jmp)
 710 {
 711   int r;
 712   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 713     uint32_t x;
 714     /* constant case */
 715     x=encbranch(ind,ind+vtop->c.ul,0);
 716     if(x) {
 717       if (vtop->r & VT_SYM) {
 718         /* relocation case */
 719         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 720       } else
 721         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 722       o(x|(is_jmp?0xE0000000:0xE1000000));
 723     } else {
 724       if(!is_jmp)
 725         o(0xE28FE004); // add lr,pc,#4
 726       o(0xE51FF004);   // ldr pc,[pc,#-4]
 727       if (vtop->r & VT_SYM)
 728         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 729       o(vtop->c.ul);
 730     }
 731   } else {
 732     /* otherwise, indirect call */
 733     r = gv(RC_INT);
 734     if(!is_jmp)
 735       o(0xE1A0E00F);       // mov lr,pc
 736     o(0xE1A0F000|intr(r)); // mov pc,r
 737   }
 738 }
 739
 740 #ifdef TCC_ARM_HARDFLOAT
 741 static int is_float_hgen_aggr(CType *type)
 742 {
 743   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 744     struct Sym *ref;
 745     int btype, nb_fields = 0;
 746
 747     ref = type->ref;
 748     btype = ref->type.t & VT_BTYPE;
 749     if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 750       for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 751       return !ref && nb_fields <= 4;
 752     }
 753   }
 754   return 0;
 755 }
 756
 757 struct avail_regs {
 758   /* worst case: f(float, double, 3 float struct, double, 3 float struct, double) */
 759   signed char avail[3];
 760   int first_hole;
 761   int last_hole;
 762   int first_free_reg;
 763 };
 764
 765 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 766
 767 /* Assign a register for a CPRC param with correct size and alignment
 768  * size and align are in bytes, as returned by type_size */
 769 int assign_fpreg(struct avail_regs *avregs, int align, int size)
 770 {
 771   int first_reg = 0;
 772
 773   if (avregs->first_free_reg == -1)
 774     return -1;
 775   if (align >> 3) { // alignment needed (base type: double)
 776     first_reg = avregs->first_free_reg;
 777     if (first_reg & 1)
 778       avregs->avail[avregs->last_hole++] = first_reg++;
 779   } else {
 780     if (size == 4 && avregs->first_hole != avregs->last_hole)
 781       return avregs->avail[avregs->first_hole++];
 782     else
 783       first_reg = avregs->first_free_reg;
 784   }
 785   if (first_reg + size / 4 <= 16) {
 786     avregs->first_free_reg = first_reg + size / 4;
 787     return first_reg;
 788   }
 789   avregs->first_free_reg = -1;
 790   return -1;
 791 }
 792 #endif
 793
 794 /* Generate function call. The function address is pushed first, then
 795    all the parameters in call order. This functions pops all the
 796    parameters and the function address. */
 797 void gfunc_call(int nb_args)
 798 {
 799   int size, align, r, args_size, i, ncrn, ncprn, argno, vfp_argno;
 800   signed char plan[4][2]={{-1,-1},{-1,-1},{-1,-1},{-1,-1}};
 801   SValue *before_stack = NULL; /* SValue before first on stack argument */
 802   SValue *before_vfpreg_hfa = NULL; /* SValue before first in VFP reg hfa argument */
 803 #ifdef TCC_ARM_HARDFLOAT
 804   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 805   signed char vfp_plan[16];
 806   int plan2[4+16];
 807   int variadic;
 808 #else
 809   int plan2[4]={0,0,0,0};
 810 #endif
 811   int vfp_todo=0;
 812   int todo=0, keep;
 813
 814 #ifdef TCC_ARM_HARDFLOAT
 815   memset(vfp_plan, -1, sizeof(vfp_plan));
 816   memset(plan2, 0, sizeof(plan2));
 817   variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
 818 #endif
 819   r = vtop->r & VT_VALMASK;
 820   if (r == VT_CMP || (r & ~1) == VT_JMP)
 821     gv(RC_INT);
 822 #ifdef TCC_ARM_EABI
 823   if((vtop[-nb_args].type.ref->type.t & VT_BTYPE) == VT_STRUCT
 824      && type_size(&vtop[-nb_args].type.ref->type, &align) <= 4) {
 825     SValue tmp;
 826     tmp=vtop[-nb_args];
 827     vtop[-nb_args]=vtop[-nb_args+1];
 828     vtop[-nb_args+1]=tmp;
 829     --nb_args;
 830   }
 831
 832   vpushi(0);
 833   vtop->type.t = VT_LLONG;
 834   args_size = 0;
 835 #endif
 836   ncrn = ncprn = argno = vfp_argno = 0;
 837   /* Assign argument to registers and stack with alignment.
 838      If, considering alignment constraints, enough registers of the correct type
 839      (core or VFP) are free for the current argument, assign them to it, else
 840      allocate on stack with correct alignment. Whenever a structure is allocated
 841      in registers or on stack, it is always put on the stack at this stage. The
 842      stack is divided in 3 zones. The zone are, from low addresses to high
 843      addresses: structures to be loaded in core registers, structures to be
 844      loaded in VFP registers, argument allocated to stack. SValue's representing
 845      structures in the first zone are moved just after the SValue pointed by
 846      before_vfpreg_hfa. SValue's representing structures in the second zone are
 847      moved just after the SValue pointer by before_stack. */
 848   for(i = nb_args + 1 ; i-- ;) {
 849     int j, assigned_vfpreg = 0;
 850     size = type_size(&vtop[-i].type, &align);
 851     switch(vtop[-i].type.t & VT_BTYPE) {
 852       case VT_STRUCT:
 853       case VT_FLOAT:
 854       case VT_DOUBLE:
 855       case VT_LDOUBLE:
 856 #ifdef TCC_ARM_HARDFLOAT
 857       if (!variadic) {
 858         int hfa = 0; /* Homogeneous float aggregate */
 859
 860         if (is_float(vtop[-i].type.t)
 861             || (hfa = is_float_hgen_aggr(&vtop[-i].type))) {
 862           int end_reg;
 863
 864           assigned_vfpreg = assign_fpreg(&avregs, align, size);
 865           end_reg = assigned_vfpreg + (size - 1) / 4;
 866           if (assigned_vfpreg >= 0) {
 867             vfp_plan[vfp_argno++]=TREG_F0 + assigned_vfpreg/2;
 868             if (hfa) {
 869               /* before_stack can only have been set because all core registers
 870                  are assigned, so no need to care about before_vfpreg_hfa if
 871                  before_stack is set */
 872               if (before_stack) {
 873                 vrote(&vtop[-i], &vtop[-i] - before_stack);
 874                 before_stack++;
 875               } else if (!before_vfpreg_hfa)
 876                 before_vfpreg_hfa = &vtop[-i-1];
 877               for (j = assigned_vfpreg; j <= end_reg; j++)
 878                 vfp_todo|=(1<<j);
 879             }
 880             continue;
 881           } else {
 882             if (!hfa)
 883               vfp_argno++;
 884             /* No need to update before_stack as no more hfa can be allocated in
 885                VFP regs */
 886             if (!before_vfpreg_hfa)
 887               before_vfpreg_hfa = &vtop[-i-1];
 888             break;
 889           }
 890         }
 891       }
 892 #endif
 893       ncrn = (ncrn + (align-1)/4) & -(align/4);
 894       size = (size + 3) & -4;
 895       if (ncrn + size/4 <= 4 || (ncrn < 4 && assigned_vfpreg != -1)) {
 896         /* Either there is HFA in VFP registers, or there is arguments on stack,
 897            it cannot be both. Hence either before_stack already points after
 898            the slot where the vtop[-i] SValue is moved, or before_stack will not
 899            be used */
 900         if (before_vfpreg_hfa) {
 901           vrote(&vtop[-i], &vtop[-i] - before_vfpreg_hfa);
 902           before_vfpreg_hfa++;
 903         }
 904         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
 905           todo|=(1<<j);
 906         ncrn+=size/4;
 907         if (ncrn > 4) {
 908           args_size = (ncrn - 4) * 4;
 909           if (!before_stack)
 910             before_stack = &vtop[-i-1];
 911         }
 912       }
 913       else {
 914         ncrn = 4;
 915         /* No need to set before_vfpreg_hfa if not set since there will no
 916            longer be any structure assigned to core registers */
 917         if (!before_stack)
 918           before_stack = &vtop[-i-1];
 919         break;
 920       }
 921       continue;
 922       default:
 923       if (!i) {
 924         break;
 925       }
 926       if (ncrn < 4) {
 927         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
 928
 929         if (is_long) {
 930           ncrn = (ncrn + 1) & -2;
 931           if (ncrn == 4) {
 932             argno++;
 933             break;
 934           }
 935         }
 936         plan[argno++][0]=ncrn++;
 937         if (is_long) {
 938           plan[argno-1][1]=ncrn++;
 939         }
 940         continue;
 941       }
 942       argno++;
 943     }
 944 #ifdef TCC_ARM_EABI
 945     if(args_size & (align-1)) {
 946       vpushi(0);
 947       vtop->type.t = VT_VOID; /* padding */
 948       vrott(i+2);
 949       args_size += 4;
 950       nb_args++;
 951       argno++;
 952     }
 953 #endif
 954     args_size += (size + 3) & -4;
 955   }
 956   vtop--;
 957   args_size = keep = 0;
 958   for(i = 0;i < nb_args; i++) {
 959     vrotb(keep+1);
 960     if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 961       size = type_size(&vtop->type, &align);
 962       /* align to stack align size */
 963       size = (size + 3) & -4;
 964       /* allocate the necessary size on stack */
 965       gadd_sp(-size);
 966       /* generate structure store */
 967       r = get_reg(RC_INT);
 968       o(0xE1A0000D|(intr(r)<<12));
 969       vset(&vtop->type, r | VT_LVAL, 0);
 970       vswap();
 971       vstore();
 972       vtop--;
 973       args_size += size;
 974     } else if (is_float(vtop->type.t)) {
 975 #ifdef TCC_ARM_HARDFLOAT
 976       if (!variadic && --vfp_argno<16 && vfp_plan[vfp_argno]!=-1) {
 977         plan2[keep++]=vfp_plan[vfp_argno];
 978         continue;
 979       }
 980 #endif
 981 #ifdef TCC_ARM_VFP
 982       r=vfpr(gv(RC_FLOAT))<<12;
 983       size=4;
 984       if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
 985       {
 986         size=8;
 987         r|=0x101; /* fstms -> fstmd */
 988       }
 989       o(0xED2D0A01+r);
 990 #else
 991       r=fpr(gv(RC_FLOAT))<<12;
 992       if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
 993         size = 4;
 994       else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
 995         size = 8;
 996       else
 997         size = LDOUBLE_SIZE;
 998
 999       if (size == 12)
1000         r|=0x400000;
1001       else if(size == 8)
1002         r|=0x8000;
1003
1004       o(0xED2D0100|r|(size>>2));
1005 #endif
1006       vtop--;
1007       args_size += size;
1008     } else {
1009       int s;
1010       /* simple type (currently always same size) */
1011       /* XXX: implicit cast ? */
1012       size=4;
1013       if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1014         lexpand_nr();
1015         s=-1;
1016         if(--argno<4 && plan[argno][1]!=-1)
1017           s=plan[argno][1];
1018         argno++;
1019         size = 8;
1020         if(s==-1) {
1021           r = gv(RC_INT);
1022           o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
1023           vtop--;
1024         } else {
1025           size=0;
1026           plan2[keep]=s;
1027           keep++;
1028           vswap();
1029         }
1030       }
1031       s=-1;
1032       if(--argno<4 && plan[argno][0]!=-1)
1033         s=plan[argno][0];
1034 #ifdef TCC_ARM_EABI
1035       if(vtop->type.t == VT_VOID) {
1036         if(s == -1)
1037           o(0xE24DD004); /* sub sp,sp,#4 */
1038         vtop--;
1039       } else
1040 #endif
1041       if(s == -1) {
1042         r = gv(RC_INT);
1043         o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
1044         vtop--;
1045       } else {
1046         size=0;
1047         plan2[keep]=s;
1048         keep++;
1049       }
1050       args_size += size;
1051     }
1052   }
1053   for(i = 0; i < keep; i++) {
1054     vrotb(keep);
1055     gv(regmask(plan2[i]));
1056     /* arg is in s(2d+1): plan2[i]<plan2[i+1] => alignment occured (ex f,d,f) */
1057     if (i < keep - 1 && is_float(vtop->type.t) && (plan2[i] <= plan2[i + 1])) {
1058       o(0xEEF00A40|(vfpr(plan2[i])<<12)|vfpr(plan2[i]));
1059     }
1060   }
1061 save_regs(keep); /* save used temporary registers */
1062   keep++;
1063   if(ncrn) {
1064     int nb_regs=0;
1065     if (ncrn>4)
1066       ncrn=4;
1067     todo&=((1<<ncrn)-1);
1068     if(todo) {
1069       int i;
1070       o(0xE8BD0000|todo);
1071       for(i=0;i<4;i++)
1072         if(todo&(1<<i)) {
1073           vpushi(0);
1074           vtop->r=i;
1075           keep++;
1076           nb_regs++;
1077         }
1078     }
1079     args_size-=nb_regs*4;
1080   }
1081   if(vfp_todo) {
1082     int nb_fregs=0;
1083
1084     for(i=0;i<16;i++)
1085       if(vfp_todo&(1<<i)) {
1086         o(0xED9D0A00|(i&1)<<22|(i>>1)<<12|nb_fregs);
1087         vpushi(0);
1088         /* There might be 2 floats in a double VFP reg but that doesn't seem
1089            to matter */
1090         if (!(i%2))
1091           vtop->r=TREG_F0+i/2;
1092         keep++;
1093         nb_fregs++;
1094       }
1095     if (nb_fregs) {
1096       gadd_sp(nb_fregs*4);
1097       args_size-=nb_fregs*4;
1098     }
1099   }
1100   vrotb(keep);
1101   gcall_or_jmp(0);
1102   if (args_size)
1103       gadd_sp(args_size);
1104 #ifdef TCC_ARM_EABI
1105   if((vtop->type.ref->type.t & VT_BTYPE) == VT_STRUCT
1106      && type_size(&vtop->type.ref->type, &align) <= 4)
1107   {
1108     store(REG_IRET,vtop-keep);
1109     ++keep;
1110   }
1111 #ifdef TCC_ARM_VFP
1112 #ifdef TCC_ARM_HARDFLOAT
1113   else if(variadic && is_float(vtop->type.ref->type.t)) {
1114 #else
1115   else if(is_float(vtop->type.ref->type.t)) {
1116 #endif
1117     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1118       o(0xEE000A10); /* fmsr s0,r0 */
1119     } else {
1120       o(0xEE000B10); /* fmdlr d0,r0 */
1121       o(0xEE201B10); /* fmdhr d0,r1 */
1122     }
1123   }
1124 #endif
1125 #endif
1126   vtop-=keep;
1127   leaffunc = 0;
1128 }
1129
1130 /* generate function prolog of type 't' */
1131 void gfunc_prolog(CType *func_type)
1132 {
1133   Sym *sym,*sym2;
1134   int n,nf,size,align, variadic, struct_ret = 0;
1135 #ifdef TCC_ARM_HARDFLOAT
1136   struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1137 #endif
1138
1139   sym = func_type->ref;
1140   func_vt = sym->type;
1141
1142   n = nf = 0;
1143   variadic = (func_type->ref->c == FUNC_ELLIPSIS);
1144   if((func_vt.t & VT_BTYPE) == VT_STRUCT
1145      && type_size(&func_vt,&align) > 4)
1146   {
1147     n++;
1148     struct_ret = 1;
1149   }
1150   for(sym2=sym->next;sym2 && (n<4 || nf<16);sym2=sym2->next) {
1151     size = type_size(&sym2->type, &align);
1152 #ifdef TCC_ARM_HARDFLOAT
1153     if (!variadic && (is_float(sym2->type.t)
1154         || is_float_hgen_aggr(&sym2->type))) {
1155       int tmpnf = assign_fpreg(&avregs, align, size) + 1;
1156       nf = (tmpnf > nf) ? tmpnf : nf;
1157     } else
1158 #endif
1159     if (n < 4)
1160       n += (size + 3) / 4;
1161   }
1162   if (struct_ret)
1163     func_vc = nf * 4;
1164   o(0xE1A0C00D); /* mov ip,sp */
1165   if(variadic)
1166     n=4;
1167   if(n) {
1168     if(n>4)
1169       n=4;
1170 #ifdef TCC_ARM_EABI
1171     n=(n+1)&-2;
1172 #endif
1173     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1174   }
1175   if (nf) {
1176     if (nf>16)
1177       nf=16;
1178     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1179     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1180   }
1181   o(0xE92D5800); /* save fp, ip, lr */
1182   o(0xE28DB00C); /* add fp, sp, #12 */
1183   func_sub_sp_offset = ind;
1184   o(0xE1A00000); /* nop, leave space for stack adjustment in epilogue */
1185   {
1186     int addr, pn = struct_ret, sn = 0; /* pn=core, sn=stack */
1187
1188 #ifdef TCC_ARM_HARDFLOAT
1189     avregs = AVAIL_REGS_INITIALIZER;
1190 #endif
1191     while ((sym = sym->next)) {
1192       CType *type;
1193       type = &sym->type;
1194       size = type_size(type, &align);
1195       size = (size + 3) >> 2;
1196 #ifdef TCC_ARM_HARDFLOAT
1197       if (!variadic && (is_float(sym->type.t)
1198           || is_float_hgen_aggr(&sym->type))) {
1199         int fpn = assign_fpreg(&avregs, align, size << 2);
1200         if (fpn >= 0) {
1201           addr = fpn * 4;
1202         } else
1203           goto from_stack;
1204       } else
1205 #endif
1206       if (pn < 4) {
1207 #ifdef TCC_ARM_EABI
1208         pn = (pn + (align-1)/4) & -(align/4);
1209 #endif
1210         addr = (nf + pn) * 4;
1211         pn += size;
1212         if (!sn && pn > 4)
1213           sn = (pn - 4);
1214       } else {
1215 #ifdef TCC_ARM_HARDFLOAT
1216 from_stack:
1217 #endif
1218 #ifdef TCC_ARM_EABI
1219         sn = (sn + (align-1)/4) & -(align/4);
1220 #endif
1221         addr = (n + nf + sn) * 4;
1222         sn += size;
1223       }
1224       sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t), addr);
1225     }
1226   }
1227   last_itod_magic=0;
1228   leaffunc = 1;
1229   loc = -12;
1230 }
1231
1232 /* generate function epilog */
1233 void gfunc_epilog(void)
1234 {
1235   uint32_t x;
1236   int diff;
1237 #ifdef TCC_ARM_EABI
1238   /* Useless but harmless copy of the float result into main register(s) in case
1239      of variadic function in the hardfloat variant */
1240   if(is_float(func_vt.t)) {
1241     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1242       o(0xEE100A10); /* fmrs r0, s0 */
1243     else {
1244       o(0xEE100B10); /* fmrdl r0, d0 */
1245       o(0xEE301B10); /* fmrdh r1, d0 */
1246     }
1247   }
1248 #endif
1249   o(0xE91BA800); /* restore fp, sp, pc */
1250   diff = (-loc + 3) & -4;
1251 #ifdef TCC_ARM_EABI
1252   if(!leaffunc)
1253     diff = (diff + 7) & -8;
1254 #endif
1255   if(diff > 12) {
1256     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1257     if(x)
1258       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1259     else {
1260       int addr;
1261       addr=ind;
1262       o(0xE59FC004); /* ldr ip,[pc+4] */
1263       o(0xE04BD00C); /* sub sp,fp,ip  */
1264       o(0xE1A0F00E); /* mov pc,lr */
1265       o(diff);
1266       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1267     }
1268   }
1269 }
1270
1271 /* generate a jump to a label */
1272 int gjmp(int t)
1273 {
1274   int r;
1275   r=ind;
1276   o(0xE0000000|encbranch(r,t,1));
1277   return r;
1278 }
1279
1280 /* generate a jump to a fixed address */
1281 void gjmp_addr(int a)
1282 {
1283   gjmp(a);
1284 }
1285
1286 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1287 int gtst(int inv, int t)
1288 {
1289   int v, r;
1290   uint32_t op;
1291   v = vtop->r & VT_VALMASK;
1292   r=ind;
1293   if (v == VT_CMP) {
1294     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1295     op|=encbranch(r,t,1);
1296     o(op);
1297     t=r;
1298   } else if (v == VT_JMP || v == VT_JMPI) {
1299     if ((v & 1) == inv) {
1300       if(!vtop->c.i)
1301         vtop->c.i=t;
1302       else {
1303         uint32_t *x;
1304         int p,lp;
1305         if(t) {
1306           p = vtop->c.i;
1307           do {
1308             p = decbranch(lp=p);
1309           } while(p);
1310           x = (uint32_t *)(cur_text_section->data + lp);
1311           *x &= 0xff000000;
1312           *x |= encbranch(lp,t,1);
1313         }
1314         t = vtop->c.i;
1315       }
1316     } else {
1317       t = gjmp(t);
1318       gsym(vtop->c.i);
1319     }
1320   } else {
1321     if (is_float(vtop->type.t)) {
1322       r=gv(RC_FLOAT);
1323 #ifdef TCC_ARM_VFP
1324       o(0xEEB50A40|(vfpr(r)<<12)|T2CPR(vtop->type.t)); /* fcmpzX */
1325       o(0xEEF1FA10); /* fmstat */
1326 #else
1327       o(0xEE90F118|(fpr(r)<<16));
1328 #endif
1329       vtop->r = VT_CMP;
1330       vtop->c.i = TOK_NE;
1331       return gtst(inv, t);
1332     } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1333       /* constant jmp optimization */
1334       if ((vtop->c.i != 0) != inv)
1335         t = gjmp(t);
1336     } else {
1337       v = gv(RC_INT);
1338       o(0xE3300000|(intr(v)<<16));
1339       vtop->r = VT_CMP;
1340       vtop->c.i = TOK_NE;
1341       return gtst(inv, t);
1342     }
1343   }
1344   vtop--;
1345   return t;
1346 }
1347
1348 /* generate an integer binary operation */
1349 void gen_opi(int op)
1350 {
1351   int c, func = 0;
1352   uint32_t opc = 0, r, fr;
1353   unsigned short retreg = REG_IRET;
1354
1355   c=0;
1356   switch(op) {
1357     case '+':
1358       opc = 0x8;
1359       c=1;
1360       break;
1361     case TOK_ADDC1: /* add with carry generation */
1362       opc = 0x9;
1363       c=1;
1364       break;
1365     case '-':
1366       opc = 0x4;
1367       c=1;
1368       break;
1369     case TOK_SUBC1: /* sub with carry generation */
1370       opc = 0x5;
1371       c=1;
1372       break;
1373     case TOK_ADDC2: /* add with carry use */
1374       opc = 0xA;
1375       c=1;
1376       break;
1377     case TOK_SUBC2: /* sub with carry use */
1378       opc = 0xC;
1379       c=1;
1380       break;
1381     case '&':
1382       opc = 0x0;
1383       c=1;
1384       break;
1385     case '^':
1386       opc = 0x2;
1387       c=1;
1388       break;
1389     case '|':
1390       opc = 0x18;
1391       c=1;
1392       break;
1393     case '*':
1394       gv2(RC_INT, RC_INT);
1395       r = vtop[-1].r;
1396       fr = vtop[0].r;
1397       vtop--;
1398       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1399       return;
1400     case TOK_SHL:
1401       opc = 0;
1402       c=2;
1403       break;
1404     case TOK_SHR:
1405       opc = 1;
1406       c=2;
1407       break;
1408     case TOK_SAR:
1409       opc = 2;
1410       c=2;
1411       break;
1412     case '/':
1413     case TOK_PDIV:
1414       func=TOK___divsi3;
1415       c=3;
1416       break;
1417     case TOK_UDIV:
1418       func=TOK___udivsi3;
1419       c=3;
1420       break;
1421     case '%':
1422 #ifdef TCC_ARM_EABI
1423       func=TOK___aeabi_idivmod;
1424       retreg=REG_LRET;
1425 #else
1426       func=TOK___modsi3;
1427 #endif
1428       c=3;
1429       break;
1430     case TOK_UMOD:
1431 #ifdef TCC_ARM_EABI
1432       func=TOK___aeabi_uidivmod;
1433       retreg=REG_LRET;
1434 #else
1435       func=TOK___umodsi3;
1436 #endif
1437       c=3;
1438       break;
1439     case TOK_UMULL:
1440       gv2(RC_INT, RC_INT);
1441       r=intr(vtop[-1].r2=get_reg(RC_INT));
1442       c=vtop[-1].r;
1443       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1444       vtop--;
1445       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1446       return;
1447     default:
1448       opc = 0x15;
1449       c=1;
1450       break;
1451   }
1452   switch(c) {
1453     case 1:
1454       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1455         if(opc == 4 || opc == 5 || opc == 0xc) {
1456           vswap();
1457           opc|=2; // sub -> rsb
1458         }
1459       }
1460       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1461           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1462         gv(RC_INT);
1463       vswap();
1464       c=intr(gv(RC_INT));
1465       vswap();
1466       opc=0xE0000000|(opc<<20)|(c<<16);
1467       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1468         uint32_t x;
1469         x=stuff_const(opc|0x2000000,vtop->c.i);
1470         if(x) {
1471           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1472           o(x|(r<<12));
1473           goto done;
1474         }
1475       }
1476       fr=intr(gv(RC_INT));
1477       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1478       o(opc|(r<<12)|fr);
1479 done:
1480       vtop--;
1481       if (op >= TOK_ULT && op <= TOK_GT) {
1482         vtop->r = VT_CMP;
1483         vtop->c.i = op;
1484       }
1485       break;
1486     case 2:
1487       opc=0xE1A00000|(opc<<5);
1488       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1489           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1490         gv(RC_INT);
1491       vswap();
1492       r=intr(gv(RC_INT));
1493       vswap();
1494       opc|=r;
1495       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1496         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1497         c = vtop->c.i & 0x1f;
1498         o(opc|(c<<7)|(fr<<12));
1499       } else {
1500         fr=intr(gv(RC_INT));
1501         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1502         o(opc|(c<<12)|(fr<<8)|0x10);
1503       }
1504       vtop--;
1505       break;
1506     case 3:
1507       vpush_global_sym(&func_old_type, func);
1508       vrott(3);
1509       gfunc_call(2);
1510       vpushi(0);
1511       vtop->r = retreg;
1512       break;
1513     default:
1514       tcc_error("gen_opi %i unimplemented!",op);
1515   }
1516 }
1517
1518 #ifdef TCC_ARM_VFP
1519 static int is_zero(int i)
1520 {
1521   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1522     return 0;
1523   if (vtop[i].type.t == VT_FLOAT)
1524     return (vtop[i].c.f == 0.f);
1525   else if (vtop[i].type.t == VT_DOUBLE)
1526     return (vtop[i].c.d == 0.0);
1527   return (vtop[i].c.ld == 0.l);
1528 }
1529
1530 /* generate a floating point operation 'v = t1 op t2' instruction. The
1531  *    two operands are guaranted to have the same floating point type */
1532 void gen_opf(int op)
1533 {
1534   uint32_t x;
1535   int fneg=0,r;
1536   x=0xEE000A00|T2CPR(vtop->type.t);
1537   switch(op) {
1538     case '+':
1539       if(is_zero(-1))
1540         vswap();
1541       if(is_zero(0)) {
1542         vtop--;
1543         return;
1544       }
1545       x|=0x300000;
1546       break;
1547     case '-':
1548       x|=0x300040;
1549       if(is_zero(0)) {
1550         vtop--;
1551         return;
1552       }
1553       if(is_zero(-1)) {
1554         x|=0x810000; /* fsubX -> fnegX */
1555         vswap();
1556         vtop--;
1557         fneg=1;
1558       }
1559       break;
1560     case '*':
1561       x|=0x200000;
1562       break;
1563     case '/':
1564       x|=0x800000;
1565       break;
1566     default:
1567       if(op < TOK_ULT && op > TOK_GT) {
1568         tcc_error("unknown fp op %x!",op);
1569         return;
1570       }
1571       if(is_zero(-1)) {
1572         vswap();
1573         switch(op) {
1574           case TOK_LT: op=TOK_GT; break;
1575           case TOK_GE: op=TOK_ULE; break;
1576           case TOK_LE: op=TOK_GE; break;
1577           case TOK_GT: op=TOK_ULT; break;
1578         }
1579       }
1580       x|=0xB40040; /* fcmpX */
1581       if(op!=TOK_EQ && op!=TOK_NE)
1582         x|=0x80; /* fcmpX -> fcmpeX */
1583       if(is_zero(0)) {
1584         vtop--;
1585         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1586       } else {
1587         x|=vfpr(gv(RC_FLOAT));
1588         vswap();
1589         o(x|(vfpr(gv(RC_FLOAT))<<12));
1590         vtop--;
1591       }
1592       o(0xEEF1FA10); /* fmstat */
1593
1594       switch(op) {
1595         case TOK_LE: op=TOK_ULE; break;
1596         case TOK_LT: op=TOK_ULT; break;
1597         case TOK_UGE: op=TOK_GE; break;
1598         case TOK_UGT: op=TOK_GT; break;
1599       }
1600
1601       vtop->r = VT_CMP;
1602       vtop->c.i = op;
1603       return;
1604   }
1605   r=gv(RC_FLOAT);
1606   x|=vfpr(r);
1607   r=regmask(r);
1608   if(!fneg) {
1609     int r2;
1610     vswap();
1611     r2=gv(RC_FLOAT);
1612     x|=vfpr(r2)<<16;
1613     r|=regmask(r2);
1614   }
1615   vtop->r=get_reg_ex(RC_FLOAT,r);
1616   if(!fneg)
1617     vtop--;
1618   o(x|(vfpr(vtop->r)<<12));
1619 }
1620
1621 #else
1622 static uint32_t is_fconst()
1623 {
1624   long double f;
1625   uint32_t r;
1626   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1627     return 0;
1628   if (vtop->type.t == VT_FLOAT)
1629     f = vtop->c.f;
1630   else if (vtop->type.t == VT_DOUBLE)
1631     f = vtop->c.d;
1632   else
1633     f = vtop->c.ld;
1634   if(!ieee_finite(f))
1635     return 0;
1636   r=0x8;
1637   if(f<0.0) {
1638     r=0x18;
1639     f=-f;
1640   }
1641   if(f==0.0)
1642     return r;
1643   if(f==1.0)
1644     return r|1;
1645   if(f==2.0)
1646     return r|2;
1647   if(f==3.0)
1648     return r|3;
1649   if(f==4.0)
1650     return r|4;
1651   if(f==5.0)
1652     return r|5;
1653   if(f==0.5)
1654     return r|6;
1655   if(f==10.0)
1656     return r|7;
1657   return 0;
1658 }
1659
1660 /* generate a floating point operation 'v = t1 op t2' instruction. The
1661    two operands are guaranted to have the same floating point type */
1662 void gen_opf(int op)
1663 {
1664   uint32_t x, r, r2, c1, c2;
1665   //fputs("gen_opf\n",stderr);
1666   vswap();
1667   c1 = is_fconst();
1668   vswap();
1669   c2 = is_fconst();
1670   x=0xEE000100;
1671 #if LDOUBLE_SIZE == 8
1672   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1673     x|=0x80;
1674 #else
1675   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1676     x|=0x80;
1677   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1678     x|=0x80000;
1679 #endif
1680   switch(op)
1681   {
1682     case '+':
1683       if(!c2) {
1684         vswap();
1685         c2=c1;
1686       }
1687       vswap();
1688       r=fpr(gv(RC_FLOAT));
1689       vswap();
1690       if(c2) {
1691         if(c2>0xf)
1692           x|=0x200000; // suf
1693         r2=c2&0xf;
1694       } else {
1695         r2=fpr(gv(RC_FLOAT));
1696       }
1697       break;
1698     case '-':
1699       if(c2) {
1700         if(c2<=0xf)
1701           x|=0x200000; // suf
1702         r2=c2&0xf;
1703         vswap();
1704         r=fpr(gv(RC_FLOAT));
1705         vswap();
1706       } else if(c1 && c1<=0xf) {
1707         x|=0x300000; // rsf
1708         r2=c1;
1709         r=fpr(gv(RC_FLOAT));
1710         vswap();
1711       } else {
1712         x|=0x200000; // suf
1713         vswap();
1714         r=fpr(gv(RC_FLOAT));
1715         vswap();
1716         r2=fpr(gv(RC_FLOAT));
1717       }
1718       break;
1719     case '*':
1720       if(!c2 || c2>0xf) {
1721         vswap();
1722         c2=c1;
1723       }
1724       vswap();
1725       r=fpr(gv(RC_FLOAT));
1726       vswap();
1727       if(c2 && c2<=0xf)
1728         r2=c2;
1729       else
1730         r2=fpr(gv(RC_FLOAT));
1731       x|=0x100000; // muf
1732       break;
1733     case '/':
1734       if(c2 && c2<=0xf) {
1735         x|=0x400000; // dvf
1736         r2=c2;
1737         vswap();
1738         r=fpr(gv(RC_FLOAT));
1739         vswap();
1740       } else if(c1 && c1<=0xf) {
1741         x|=0x500000; // rdf
1742         r2=c1;
1743         r=fpr(gv(RC_FLOAT));
1744         vswap();
1745       } else {
1746         x|=0x400000; // dvf
1747         vswap();
1748         r=fpr(gv(RC_FLOAT));
1749         vswap();
1750         r2=fpr(gv(RC_FLOAT));
1751       }
1752       break;
1753     default:
1754       if(op >= TOK_ULT && op <= TOK_GT) {
1755         x|=0xd0f110; // cmfe
1756 /* bug (intention?) in Linux FPU emulator
1757    doesn't set carry if equal */
1758         switch(op) {
1759           case TOK_ULT:
1760           case TOK_UGE:
1761           case TOK_ULE:
1762           case TOK_UGT:
1763             tcc_error("unsigned comparision on floats?");
1764             break;
1765           case TOK_LT:
1766             op=TOK_Nset;
1767             break;
1768           case TOK_LE:
1769             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1770             break;
1771           case TOK_EQ:
1772           case TOK_NE:
1773             x&=~0x400000; // cmfe -> cmf
1774             break;
1775         }
1776         if(c1 && !c2) {
1777           c2=c1;
1778           vswap();
1779           switch(op) {
1780             case TOK_Nset:
1781               op=TOK_GT;
1782               break;
1783             case TOK_GE:
1784               op=TOK_ULE;
1785               break;
1786             case TOK_ULE:
1787               op=TOK_GE;
1788               break;
1789             case TOK_GT:
1790               op=TOK_Nset;
1791               break;
1792           }
1793         }
1794         vswap();
1795         r=fpr(gv(RC_FLOAT));
1796         vswap();
1797         if(c2) {
1798           if(c2>0xf)
1799             x|=0x200000;
1800           r2=c2&0xf;
1801         } else {
1802           r2=fpr(gv(RC_FLOAT));
1803         }
1804         vtop[-1].r = VT_CMP;
1805         vtop[-1].c.i = op;
1806       } else {
1807         tcc_error("unknown fp op %x!",op);
1808         return;
1809       }
1810   }
1811   if(vtop[-1].r == VT_CMP)
1812     c1=15;
1813   else {
1814     c1=vtop->r;
1815     if(r2&0x8)
1816       c1=vtop[-1].r;
1817     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1818     c1=fpr(vtop[-1].r);
1819   }
1820   vtop--;
1821   o(x|(r<<16)|(c1<<12)|r2);
1822 }
1823 #endif
1824
1825 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1826    and 'long long' cases. */
1827 ST_FUNC void gen_cvt_itof1(int t)
1828 {
1829   uint32_t r, r2;
1830   int bt;
1831   bt=vtop->type.t & VT_BTYPE;
1832   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1833 #ifndef TCC_ARM_VFP
1834     uint32_t dsize = 0;
1835 #endif
1836     r=intr(gv(RC_INT));
1837 #ifdef TCC_ARM_VFP
1838     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1839     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1840     r2<<=12;
1841     if(!(vtop->type.t & VT_UNSIGNED))
1842       r2|=0x80;                /* fuitoX -> fsituX */
1843     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1844 #else
1845     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1846     if((t & VT_BTYPE) != VT_FLOAT)
1847       dsize=0x80;    /* flts -> fltd */
1848     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1849     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1850       uint32_t off = 0;
1851       o(0xE3500000|(r<<12));        /* cmp */
1852       r=fpr(get_reg(RC_FLOAT));
1853       if(last_itod_magic) {
1854         off=ind+8-last_itod_magic;
1855         off/=4;
1856         if(off>255)
1857           off=0;
1858       }
1859       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
1860       if(!off) {
1861         o(0xEA000000);              /* b */
1862         last_itod_magic=ind;
1863         o(0x4F800000);              /* 4294967296.0f */
1864       }
1865       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1866     }
1867 #endif
1868     return;
1869   } else if(bt == VT_LLONG) {
1870     int func;
1871     CType *func_type = 0;
1872     if((t & VT_BTYPE) == VT_FLOAT) {
1873       func_type = &func_float_type;
1874       if(vtop->type.t & VT_UNSIGNED)
1875         func=TOK___floatundisf;
1876       else
1877         func=TOK___floatdisf;
1878 #if LDOUBLE_SIZE != 8
1879     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1880       func_type = &func_ldouble_type;
1881       if(vtop->type.t & VT_UNSIGNED)
1882         func=TOK___floatundixf;
1883       else
1884         func=TOK___floatdixf;
1885     } else if((t & VT_BTYPE) == VT_DOUBLE) {
1886 #else
1887     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1888 #endif
1889       func_type = &func_double_type;
1890       if(vtop->type.t & VT_UNSIGNED)
1891         func=TOK___floatundidf;
1892       else
1893         func=TOK___floatdidf;
1894     }
1895     if(func_type) {
1896       vpush_global_sym(func_type, func);
1897       vswap();
1898       gfunc_call(1);
1899       vpushi(0);
1900       vtop->r=TREG_F0;
1901       return;
1902     }
1903   }
1904   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1905 }
1906
1907 /* convert fp to int 't' type */
1908 void gen_cvt_ftoi(int t)
1909 {
1910   uint32_t r, r2;
1911   int u, func = 0;
1912   u=t&VT_UNSIGNED;
1913   t&=VT_BTYPE;
1914   r2=vtop->type.t & VT_BTYPE;
1915   if(t==VT_INT) {
1916 #ifdef TCC_ARM_VFP
1917     r=vfpr(gv(RC_FLOAT));
1918     u=u?0:0x10000;
1919     o(0xEEBC0A40|(r<<12)|r|T2CPR(r2)); /* ftoXiY */
1920     r2=intr(vtop->r=get_reg(RC_INT));
1921     o(0xEE100A10|(r<<16)|(r2<<12));
1922     return;
1923 #else
1924     if(u) {
1925       if(r2 == VT_FLOAT)
1926         func=TOK___fixunssfsi;
1927 #if LDOUBLE_SIZE != 8
1928       else if(r2 == VT_LDOUBLE)
1929         func=TOK___fixunsxfsi;
1930       else if(r2 == VT_DOUBLE)
1931 #else
1932       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1933 #endif
1934         func=TOK___fixunsdfsi;
1935     } else {
1936       r=fpr(gv(RC_FLOAT));
1937       r2=intr(vtop->r=get_reg(RC_INT));
1938       o(0xEE100170|(r2<<12)|r);
1939       return;
1940     }
1941 #endif
1942   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
1943     if(r2 == VT_FLOAT)
1944       func=TOK___fixsfdi;
1945 #if LDOUBLE_SIZE != 8
1946     else if(r2 == VT_LDOUBLE)
1947       func=TOK___fixxfdi;
1948     else if(r2 == VT_DOUBLE)
1949 #else
1950     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1951 #endif
1952       func=TOK___fixdfdi;
1953   }
1954   if(func) {
1955     vpush_global_sym(&func_old_type, func);
1956     vswap();
1957     gfunc_call(1);
1958     vpushi(0);
1959     if(t == VT_LLONG)
1960       vtop->r2 = REG_LRET;
1961     vtop->r = REG_IRET;
1962     return;
1963   }
1964   tcc_error("unimplemented gen_cvt_ftoi!");
1965 }
1966
1967 /* convert from one floating point type to another */
1968 void gen_cvt_ftof(int t)
1969 {
1970 #ifdef TCC_ARM_VFP
1971   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
1972     uint32_t r = vfpr(gv(RC_FLOAT));
1973     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
1974   }
1975 #else
1976   /* all we have to do on i386 and FPA ARM is to put the float in a register */
1977   gv(RC_FLOAT);
1978 #endif
1979 }
1980
1981 /* computed goto support */
1982 void ggoto(void)
1983 {
1984   gcall_or_jmp(1);
1985   vtop--;
1986 }
1987
1988 /* end of ARM code generator */
1989 /*************************************************************/
1990 #endif
1991 /*************************************************************/