arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *
   6  *  Based on i386-gen.c by Fabrice Bellard
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 /* number of available registers */
  24 #define NB_REGS             9
  25
  26 /* a register can belong to several classes. The classes must be
  27    sorted from more general to more precise (see gv2() code which does
  28    assumptions on it). */
  29 #define RC_INT     0x0001 /* generic integer register */
  30 #define RC_FLOAT   0x0002 /* generic float register */
  31 #define RC_R0      0x0004
  32 #define RC_R1      0x0008
  33 #define RC_R2      0x0010
  34 #define RC_R3      0x0020
  35 #define RC_R12     0x0040
  36 #define RC_F0      0x0080
  37 #define RC_F1      0x0100
  38 #define RC_F2      0x0200
  39 #define RC_F3      0x0400
  40 #define RC_IRET    RC_R0  /* function return: integer register */
  41 #define RC_LRET    RC_R1  /* function return: second integer register */
  42 #define RC_FRET    RC_F0  /* function return: float register */
  43
  44 /* pretty names for the registers */
  45 enum {
  46     TREG_R0 = 0,
  47     TREG_R1,
  48     TREG_R2,
  49     TREG_R3,
  50     TREG_R12,
  51     TREG_F0,
  52     TREG_F1,
  53     TREG_F2,
  54     TREG_F3,
  55 };
  56
  57 int reg_classes[NB_REGS] = {
  58     /* r0 */ RC_INT | RC_R0,
  59     /* r1 */ RC_INT | RC_R1,
  60     /* r2 */ RC_INT | RC_R2,
  61     /* r3 */ RC_INT | RC_R3,
  62     /* r12 */ RC_INT | RC_R12,
  63     /* f0 */ RC_FLOAT | RC_F0,
  64     /* f1 */ RC_FLOAT | RC_F1,
  65     /* f2 */ RC_FLOAT | RC_F2,
  66     /* f3 */ RC_FLOAT | RC_F3,
  67 };
  68
  69 static int two2mask(int a,int b) {
  70   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
  71 }
  72
  73 static int regmask(int r) {
  74   return reg_classes[r]&~(RC_INT|RC_FLOAT);
  75 }
  76
  77 /* return registers for function */
  78 #define REG_IRET TREG_R0 /* single word int return register */
  79 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  80 #define REG_FRET TREG_F0 /* float return register */
  81
  82 /* defined if function parameters must be evaluated in reverse order */
  83 #define INVERT_FUNC_PARAMS
  84
  85 /* defined if structures are passed as pointers. Otherwise structures
  86    are directly pushed on stack. */
  87 //#define FUNC_STRUCT_PARAM_AS_PTR
  88
  89 /* pointer size, in bytes */
  90 #define PTR_SIZE 4
  91
  92 /* long double size and alignment, in bytes */
  93 #define LDOUBLE_SIZE  8
  94 #define LDOUBLE_ALIGN 4
  95 /* maximum alignment (for aligned attribute support) */
  96 #define MAX_ALIGN     8
  97
  98 #define CHAR_IS_UNSIGNED
  99
 100 /******************************************************/
 101 /* ELF defines */
 102
 103 #define EM_TCC_TARGET EM_ARM
 104
 105 /* relocation type for 32 bit data relocation */
 106 #define R_DATA_32   R_ARM_ABS32
 107 #define R_JMP_SLOT  R_ARM_JUMP_SLOT
 108 #define R_COPY      R_ARM_COPY
 109
 110 #define ELF_START_ADDR 0x00008000
 111 #define ELF_PAGE_SIZE  0x1000
 112
 113 /******************************************************/
 114 static unsigned long func_sub_sp_offset,last_itod_magic;
 115
 116 void o(unsigned long i)
 117 {
 118   /* this is a good place to start adding big-endian support*/
 119   int ind1;
 120
 121   ind1 = ind + 4;
 122   if (!cur_text_section)
 123     error("compiler error! This happens f.ex. if the compiler\n"
 124          "can't evaluate constant expressions outside of a function.");
 125   if (ind1 > cur_text_section->data_allocated)
 126     section_realloc(cur_text_section, ind1);
 127   cur_text_section->data[ind++] = i&255;
 128   i>>=8;
 129   cur_text_section->data[ind++] = i&255;
 130   i>>=8;
 131   cur_text_section->data[ind++] = i&255;
 132   i>>=8;
 133   cur_text_section->data[ind++] = i;
 134 }
 135
 136 static unsigned long stuff_const(unsigned long op,unsigned long c)
 137 {
 138   int try_neg=0;
 139   unsigned long nc,negop;
 140   switch(op&0x1F00000)
 141   {
 142     case 0x800000: //add
 143     case 0x400000: //sub
 144       try_neg=1;
 145       negop=op^0xC00000;
 146       nc=-c;
 147       break;
 148     case 0x1A00000: //mov
 149     case 0x1E00000: //mvn
 150       try_neg=1;
 151       negop=op^0x400000;
 152       nc=~c;
 153       break;
 154     case 0x200000: //xor
 155       if(c==~0)
 156         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 157       break;
 158     case 0x0: //and
 159       if(c==~0)
 160         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 161     case 0x1C00000: //bic
 162       try_neg=1;
 163       negop=op^0x1C00000;
 164       nc=~c;
 165       break;
 166     case 0x1800000: //orr
 167       if(c==~0)
 168         return (op&0xFFF0FFFF)|0x1E00000;
 169       break;
 170   }
 171   do {
 172     unsigned long m;
 173     int i;
 174     if(c<256) /* catch undefined <<32 */
 175       return op|c;
 176     for(i=2;i<32;i+=2) {
 177       m=(0xff>>i)|(0xff<<(32-i));
 178       if(!(c&~m))
 179         return op|(i<<7)|(c<<i)|(c>>(32-i));
 180     }
 181     op=negop;
 182     c=nc;
 183   } while(try_neg--);
 184   return 0;
 185 }
 186
 187
 188 //only add,sub
 189 void stuff_const_harder(unsigned long op,unsigned long v) {
 190   unsigned long x;
 191   x=stuff_const(op,v);
 192   if(x)
 193     o(x);
 194   else {
 195     unsigned long a[16],nv,no,o2,n2;
 196     int i,j,k;
 197     a[0]=0xff;
 198     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 199     for(i=1;i<16;i++)
 200       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 201     for(i=0;i<12;i++)
 202       for(j=i+4;i<13+i;i++)
 203         if((v&(a[i]|a[j]))==v) {
 204           o(stuff_const(op,v&a[i]));
 205           o(stuff_const(o2,v&a[j]));
 206           return;
 207         }
 208     no=op^0xC00000;
 209     n2=o2^0xC00000;
 210     nv=-v;
 211     for(i=0;i<12;i++)
 212       for(j=i+4;i<13+i;i++)
 213         if((nv&(a[i]|a[j]))==nv) {
 214           o(stuff_const(no,nv&a[i]));
 215           o(stuff_const(n2,nv&a[j]));
 216           return;
 217         }
 218     for(i=0;i<8;i++)
 219       for(j=i+4;i<12;i++)
 220         for(k=j+4;k<13+i;i++)
 221           if((v&(a[i]|a[j]|a[k]))==v) {
 222             o(stuff_const(op,v&a[i]));
 223             o(stuff_const(o2,v&a[j]));
 224             o(stuff_const(o2,v&a[k]));
 225             return;
 226           }
 227     no=op^0xC00000;
 228     nv=-v;
 229     for(i=0;i<8;i++)
 230       for(j=i+4;i<12;i++)
 231         for(k=j+4;k<13+i;i++)
 232           if((nv&(a[i]|a[j]|a[k]))==nv) {
 233             o(stuff_const(no,nv&a[i]));
 234             o(stuff_const(n2,nv&a[j]));
 235             o(stuff_const(n2,nv&a[k]));
 236             return;
 237           }
 238     o(stuff_const(op,v&a[0]));
 239     o(stuff_const(o2,v&a[4]));
 240     o(stuff_const(o2,v&a[8]));
 241     o(stuff_const(o2,v&a[12]));
 242   }
 243 }
 244
 245 unsigned long encbranch(int pos,int addr,int fail)
 246 {
 247   addr-=pos+8;
 248   addr/=4;
 249   if(addr>=0x1000000 || addr<-0x1000000) {
 250     if(fail)
 251       error("FIXME: function bigger than 32MB");
 252     return 0;
 253   }
 254   return 0x0A000000|(addr&0xffffff);
 255 }
 256
 257 int decbranch(int pos)
 258 {
 259   int x;
 260   x=*(int *)(cur_text_section->data + pos);
 261   x&=0x00ffffff;
 262   if(x&0x800000)
 263     x-=0x1000000;
 264   return x*4+pos+8;
 265 }
 266
 267 /* output a symbol and patch all calls to it */
 268 void gsym_addr(int t, int a)
 269 {
 270   unsigned long *x;
 271   int lt;
 272   while(t) {
 273     x=(unsigned long *)(cur_text_section->data + t);
 274     t=decbranch(lt=t);
 275     if(a==lt+4)
 276       *x=0xE1A00000; // nop
 277     else {
 278       *x &= 0xff000000;
 279       *x |= encbranch(lt,a,1);
 280     }
 281   }
 282 }
 283
 284 void gsym(int t)
 285 {
 286   gsym_addr(t, ind);
 287 }
 288
 289 static unsigned long fpr(int r)
 290 {
 291   if(r<TREG_F0 || r>TREG_F3)
 292     error("compiler error! register %i is no fp register\n",r);
 293   return r-5;
 294 }
 295
 296 static unsigned long intr(int r)
 297 {
 298   if(r==4)
 299     return 12;
 300   if((r<0 || r>4) && r!=14)
 301     error("compiler error! register %i is no int register\n",r);
 302   return r;
 303 }
 304
 305 static void calcaddr(unsigned long *base,int *off,int *sgn,int maxoff,unsigned shift)
 306 {
 307   if(*off>maxoff || *off&((1<<shift)-1)) {
 308     unsigned long x,y;
 309     x=0xE280E000;
 310     if(*sgn)
 311       x=0xE240E000;
 312     x|=(*base)<<16;
 313     *base=14; // lr
 314     y=stuff_const(x,*off&~maxoff);
 315     if(y) {
 316       o(y);
 317       *off&=maxoff;
 318       return;
 319     }
 320     y=stuff_const(x,(*off+maxoff)&~maxoff);
 321     if(y) {
 322       o(y);
 323       *sgn=!*sgn;
 324       *off=((*off+maxoff)&~maxoff)-*off;
 325       return;
 326     }
 327     stuff_const_harder(x,*off&~maxoff);
 328     *off&=maxoff;
 329   }
 330 }
 331
 332 static unsigned long mapcc(int cc)
 333 {
 334   switch(cc)
 335   {
 336     case TOK_ULT:
 337       return 0x30000000;
 338     case TOK_UGE:
 339       return 0x20000000;
 340     case TOK_EQ:
 341       return 0x00000000;
 342     case TOK_NE:
 343       return 0x10000000;
 344     case TOK_ULE:
 345       return 0x90000000;
 346     case TOK_UGT:
 347       return 0x80000000;
 348     case TOK_LT:
 349       return 0xB0000000;
 350     case TOK_GE:
 351       return 0xA0000000;
 352     case TOK_LE:
 353       return 0xD0000000;
 354     case TOK_GT:
 355       return 0xC0000000;
 356   }
 357   error("unexpected condition code");
 358   return 0xE0000000;
 359 }
 360
 361 static int negcc(int cc)
 362 {
 363   switch(cc)
 364   {
 365     case TOK_ULT:
 366       return TOK_UGE;
 367     case TOK_UGE:
 368       return TOK_ULT;
 369     case TOK_EQ:
 370       return TOK_NE;
 371     case TOK_NE:
 372       return TOK_EQ;
 373     case TOK_ULE:
 374       return TOK_UGT;
 375     case TOK_UGT:
 376       return TOK_ULE;
 377     case TOK_LT:
 378       return TOK_GE;
 379     case TOK_GE:
 380       return TOK_LT;
 381     case TOK_LE:
 382       return TOK_GT;
 383     case TOK_GT:
 384       return TOK_LE;
 385   }
 386   error("unexpected condition code");
 387   return TOK_NE;
 388 }
 389
 390 /* load 'r' from value 'sv' */
 391 void load(int r, SValue *sv)
 392 {
 393   int v, ft, fc, fr, sign;
 394   unsigned long op;
 395   SValue v1;
 396
 397   fr = sv->r;
 398   ft = sv->type.t;
 399   fc = sv->c.ul;
 400
 401   if(fc>=0)
 402     sign=0;
 403   else {
 404     sign=1;
 405     fc=-fc;
 406   }
 407
 408   v = fr & VT_VALMASK;
 409   if (fr & VT_LVAL) {
 410     unsigned long base=0xB; // fp
 411     if(v == VT_LLOCAL) {
 412       v1.type.t = VT_PTR;
 413       v1.r = VT_LOCAL | VT_LVAL;
 414       v1.c.ul = sv->c.ul;
 415       load(base=14 /* lr */, &v1);
 416       fc=sign=0;
 417       v=VT_LOCAL;
 418     } else if(v == VT_CONST) {
 419       v1.type.t = VT_PTR;
 420       v1.r = fr&~VT_LVAL;
 421       v1.c.ul = sv->c.ul;
 422       v1.sym=sv->sym;
 423       load(base=14, &v1);
 424       fc=sign=0;
 425       v=VT_LOCAL;
 426     } else if(v < VT_CONST) {
 427       base=intr(v);
 428       fc=sign=0;
 429       v=VT_LOCAL;
 430     }
 431     if(v == VT_LOCAL) {
 432       if(is_float(ft)) {
 433         calcaddr(&base,&fc,&sign,1020,2);
 434         op=0xED100100;
 435         if(!sign)
 436           op|=0x800000;
 437 #if LDOUBLE_SIZE == 8
 438         if ((ft & VT_BTYPE) != VT_FLOAT)
 439           op|=0x8000;
 440 #else
 441         if ((ft & VT_BTYPE) == VT_DOUBLE)
 442           op|=0x8000;
 443         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 444           op|=0x400000;
 445 #endif
 446         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 447       } else if((ft & VT_TYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_SHORT) {
 448         calcaddr(&base,&fc,&sign,255,0);
 449         op=0xE1500090;
 450         if ((ft & VT_BTYPE) == VT_SHORT)
 451           op|=0x20;
 452         if ((ft & VT_UNSIGNED) == 0)
 453           op|=0x40;
 454         if(!sign)
 455           op|=0x800000;
 456         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 457       } else {
 458         calcaddr(&base,&fc,&sign,4095,0);
 459         op=0xE5100000;
 460         if(!sign)
 461           op|=0x800000;
 462         if ((ft & VT_BTYPE) == VT_BYTE)
 463           op|=0x400000;
 464         o(op|(intr(r)<<12)|fc|(base<<16));
 465       }
 466       return;
 467     }
 468   } else {
 469     if (v == VT_CONST) {
 470       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
 471       if (fr & VT_SYM || !op) {
 472         o(0xE59F0000|(intr(r)<<12));
 473         o(0xEA000000);
 474         if(fr & VT_SYM)
 475           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 476         o(sv->c.ul);
 477       } else
 478         o(op);
 479       return;
 480     } else if (v == VT_LOCAL) {
 481       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
 482       if (fr & VT_SYM || !op) {
 483         o(0xE59F0000|(intr(r)<<12));
 484         o(0xEA000000);
 485         if(fr & VT_SYM) // needed ?
 486           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 487         o(sv->c.ul);
 488         o(0xE08B0000|(intr(r)<<12)|intr(r));
 489       } else
 490         o(op);
 491       return;
 492     } else if(v == VT_CMP) {
 493       o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
 494       o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
 495       return;
 496     } else if (v == VT_JMP || v == VT_JMPI) {
 497       int t;
 498       t = v & 1;
 499       o(0xE3A00000|(intr(r)<<12)|t);
 500       o(0xEA000000);
 501       gsym(sv->c.ul);
 502       o(0xE3A00000|(intr(r)<<12)|(t^1));
 503       return;
 504     } else if (v < VT_CONST) {
 505       if(is_float(ft))
 506         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 507       else
 508         o(0xE1A00000|(intr(r)<<12)|intr(v));
 509       return;
 510     }
 511   }
 512   error("load unimplemented!");
 513 }
 514
 515 /* store register 'r' in lvalue 'v' */
 516 void store(int r, SValue *sv)
 517 {
 518   SValue v1;
 519   int v, ft, fc, fr, sign;
 520   unsigned long op;
 521
 522   fr = sv->r;
 523   ft = sv->type.t;
 524   fc = sv->c.ul;
 525
 526   if(fc>=0)
 527     sign=0;
 528   else {
 529     sign=1;
 530     fc=-fc;
 531   }
 532
 533   v = fr & VT_VALMASK;
 534   if (fr & VT_LVAL || fr == VT_LOCAL) {
 535     unsigned long base=0xb;
 536     if(v < VT_CONST) {
 537       base=intr(v);
 538       v=VT_LOCAL;
 539       fc=sign=0;
 540     } else if(v == VT_CONST) {
 541       v1.type.t = ft;
 542       v1.r = fr&~VT_LVAL;
 543       v1.c.ul = sv->c.ul;
 544       v1.sym=sv->sym;
 545       load(base=14, &v1);
 546       fc=sign=0;
 547       v=VT_LOCAL;
 548     }
 549     if(v == VT_LOCAL) {
 550        if(is_float(ft)) {
 551         calcaddr(&base,&fc,&sign,1020,2);
 552         op=0xED000100;
 553         if(!sign)
 554           op|=0x800000;
 555 #if LDOUBLE_SIZE == 8
 556         if ((ft & VT_BTYPE) != VT_FLOAT)
 557           op|=0x8000;
 558 #else
 559         if ((ft & VT_BTYPE) == VT_DOUBLE)
 560           op|=0x8000;
 561         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 562           op|=0x400000;
 563 #endif
 564         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 565         return;
 566       } else if((ft & VT_BTYPE) == VT_SHORT) {
 567         calcaddr(&base,&fc,&sign,255,0);
 568         op=0xE14000B0;
 569         if(!sign)
 570           op|=0x800000;
 571         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 572       } else {
 573         calcaddr(&base,&fc,&sign,4095,0);
 574         op=0xE5000000;
 575         if(!sign)
 576           op|=0x800000;
 577         if ((ft & VT_BTYPE) == VT_BYTE)
 578           op|=0x400000;
 579         o(op|(intr(r)<<12)|fc|(base<<16));
 580       }
 581       return;
 582     }
 583   }
 584   error("store unimplemented");
 585 }
 586
 587 static void gadd_sp(int val)
 588 {
 589   stuff_const_harder(0xE28DD000,val);
 590 }
 591
 592 /* 'is_jmp' is '1' if it is a jump */
 593 static void gcall_or_jmp(int is_jmp)
 594 {
 595   int r;
 596   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 597     unsigned long x;
 598     /* constant case */
 599     x=encbranch(ind,ind+vtop->c.ul,0);
 600     if(x) {
 601       if (vtop->r & VT_SYM) {
 602         /* relocation case */
 603         greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 604       } else
 605         put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
 606       o(x|(is_jmp?0xE0000000:0xE1000000));
 607     } else {
 608       if(!is_jmp)
 609         o(0xE28FE004); // add lr,pc,#4
 610       o(0xE51FF004);   // ldr pc,[pc,#-4]
 611       if (vtop->r & VT_SYM)
 612         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 613       o(vtop->c.ul);
 614     }
 615   } else {
 616     /* otherwise, indirect call */
 617     r = gv(RC_INT);
 618     if(!is_jmp)
 619       o(0xE1A0E00F);       // mov lr,pc
 620     o(0xE1A0F000|intr(r)); // mov pc,r
 621   }
 622 }
 623
 624 /* Generate function call. The function address is pushed first, then
 625    all the parameters in call order. This functions pops all the
 626    parameters and the function address. */
 627 void gfunc_call(int nb_args)
 628 {
 629   int size, align, r, args_size, i;
 630   Sym *func_sym;
 631   signed char plan[4][2]={{-1,-1},{-1,-1},{-1,-1},{-1,-1}};
 632   int todo=0xf, keep, plan2[4]={0,0,0,0};
 633
 634   r = vtop->r & VT_VALMASK;
 635   if (r == VT_CMP || (r & ~1) == VT_JMP)
 636     gv(RC_INT);
 637   args_size = 0;
 638   for(i = nb_args ; i-- && args_size < 16 ;) {
 639     if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) {
 640       size = type_size(&vtop[-i].type, &align);
 641       size = (size + 3) & ~3;
 642       args_size += size;
 643     } else if ((vtop[-i].type.t & VT_BTYPE) == VT_FLOAT)
 644       args_size += 4;
 645     else if ((vtop[-i].type.t & VT_BTYPE) == VT_DOUBLE)
 646       args_size += 8;
 647     else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE)
 648       args_size += LDOUBLE_SIZE;
 649     else {
 650       plan[nb_args-1-i][0]=args_size/4;
 651       args_size += 4;
 652       if ((vtop[-i].type.t & VT_BTYPE) == VT_LLONG && args_size < 16) {
 653         plan[nb_args-1-i][1]=args_size/4;
 654         args_size += 4;
 655       }
 656     }
 657   }
 658   args_size = keep = 0;
 659   for(i = 0;i < nb_args; i++) {
 660     vnrott(keep+1);
 661     if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
 662       size = type_size(&vtop->type, &align);
 663       /* align to stack align size */
 664       size = (size + 3) & ~3;
 665       /* allocate the necessary size on stack */
 666       gadd_sp(-size);
 667       /* generate structure store */
 668       r = get_reg(RC_INT);
 669       o(0xE1A0000D|(intr(r)<<12));
 670       vset(&vtop->type, r | VT_LVAL, 0);
 671       vswap();
 672       vstore();
 673       vtop--;
 674       args_size += size;
 675     } else if (is_float(vtop->type.t)) {
 676       r=fpr(gv(RC_FLOAT))<<12;
 677       if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
 678         size = 4;
 679       else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
 680         size = 8;
 681       else
 682         size = LDOUBLE_SIZE;
 683
 684       if (size == 12)
 685         r|=0x400000;
 686       else if(size == 8)
 687         r|=0x8000;
 688
 689       o(0xED2D0100|r|(size>>2));
 690       vtop--;
 691       args_size += size;
 692     } else {
 693       int s;
 694       /* simple type (currently always same size) */
 695       /* XXX: implicit cast ? */
 696       size=4;
 697       if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
 698         lexpand_nr();
 699         s=RC_INT;
 700         if(nb_args-i<5 && plan[nb_args-i-1][1]!=-1) {
 701           s=regmask(plan[nb_args-i-1][1]);
 702           todo&=~(1<<plan[nb_args-i-1][1]);
 703         }
 704         if(s==RC_INT) {
 705           r = gv(s);
 706           o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
 707           vtop--;
 708         } else {
 709           plan2[keep]=s;
 710           keep++;
 711           vswap();
 712         }
 713         size = 8;
 714       }
 715       s=RC_INT;
 716       if(nb_args-i<5 && plan[nb_args-i-1][0]!=-1) {
 717         s=regmask(plan[nb_args-i-1][0]);
 718         todo&=~(1<<plan[nb_args-i-1][0]);
 719       }
 720       if(s==RC_INT) {
 721         r = gv(s);
 722         o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
 723         vtop--;
 724       } else {
 725         plan2[keep]=s;
 726         keep++;
 727       }
 728       args_size += size;
 729     }
 730   }
 731   for(i=keep;i--;) {
 732     gv(plan2[i]);
 733     vrott(keep);
 734   }
 735   save_regs(keep); /* save used temporary registers */
 736   keep++;
 737   if(args_size) {
 738     int n;
 739     n=args_size/4;
 740     if(n>4)
 741       n=4;
 742     todo&=((1<<n)-1);
 743     if(todo) {
 744       int i;
 745       o(0xE8BD0000|todo);
 746       for(i=0;i<4;i++)
 747         if(todo&(1<<i)) {
 748           vpushi(0);
 749           vtop->r=i;
 750           keep++;
 751         }
 752     }
 753     args_size-=n*4;
 754   }
 755   vnrott(keep);
 756   func_sym = vtop->type.ref;
 757   gcall_or_jmp(0);
 758   if (args_size)
 759       gadd_sp(args_size);
 760   vtop-=keep;
 761 }
 762
 763 /* generate function prolog of type 't' */
 764 void gfunc_prolog(CType *func_type)
 765 {
 766   Sym *sym,*sym2;
 767   int n,addr,size,align;
 768
 769   sym = func_type->ref;
 770   func_vt = sym->type;
 771
 772   n=0;
 773   addr=12;
 774   if((func_vt.t & VT_BTYPE) == VT_STRUCT) {
 775     func_vc = addr;
 776     addr += 4;
 777     n++;
 778   }
 779   for(sym2=sym->next;sym2 && n<4;sym2=sym2->next) {
 780     size = type_size(&sym2->type, &align);
 781     size = (size + 3) & ~3;
 782     n+=size/4;
 783   }
 784   o(0xE1A0C00D); /* mov ip,sp */
 785   if(func_type->ref->c == FUNC_ELLIPSIS)
 786     n=4;
 787   if(n) {
 788     if(n>4)
 789       n=4;
 790     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
 791   }
 792   o(0xE92D5800); /* save fp, ip, lr*/
 793   o(0xE1A0B00D); /* mov fp,sp */
 794   func_sub_sp_offset = ind;
 795   o(0xE1A00000); /* nop, leave space for stack adjustment */
 796   while ((sym = sym->next)) {
 797     CType *type;
 798     type = &sym->type;
 799     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr);
 800     size = type_size(type, &align);
 801     size = (size + 3) & ~3;
 802     addr += size;
 803   }
 804   last_itod_magic=0;
 805   loc = 0;
 806 }
 807
 808 /* generate function epilog */
 809 void gfunc_epilog(void)
 810 {
 811   unsigned long x;
 812   o(0xE89BA800); /* restore fp, sp, pc */
 813   if(loc) {
 814     x=stuff_const(0xE24DD000, (-loc + 3) & -4); /* sub sp,sp,# */
 815     if(x)
 816       *(unsigned long *)(cur_text_section->data + func_sub_sp_offset) = x;
 817     else {
 818       unsigned long addr;
 819       addr=ind;
 820       o(0xE59FC004); /* ldr ip,[pc+4] */
 821       o(0xE04DD00C); /* sub sp,sp,ip  */
 822       o(0xE1A0F00E); /* mov pc,lr */
 823       o((-loc + 3) & -4);
 824       *(unsigned long *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
 825     }
 826   }
 827 }
 828
 829 /* generate a jump to a label */
 830 int gjmp(int t)
 831 {
 832   int r;
 833   r=ind;
 834   o(0xE0000000|encbranch(r,t,1));
 835   return r;
 836 }
 837
 838 /* generate a jump to a fixed address */
 839 void gjmp_addr(int a)
 840 {
 841   gjmp(a);
 842 }
 843
 844 /* generate a test. set 'inv' to invert test. Stack entry is popped */
 845 int gtst(int inv, int t)
 846 {
 847   int v, r;
 848   unsigned long op;
 849   v = vtop->r & VT_VALMASK;
 850   r=ind;
 851   if (v == VT_CMP) {
 852     op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
 853     op|=encbranch(r,t,1);
 854     o(op);
 855     t=r;
 856   } else if (v == VT_JMP || v == VT_JMPI) {
 857     if ((v & 1) == inv) {
 858       if(!vtop->c.i)
 859         vtop->c.i=t;
 860       else {
 861         unsigned long *x;
 862         int p,lp;
 863         if(t) {
 864           p = vtop->c.i;
 865           do {
 866             p = decbranch(lp=p);
 867           } while(p);
 868           x = (unsigned long *)(cur_text_section->data + lp);
 869           *x &= 0xff000000;
 870           *x |= encbranch(lp,t,1);
 871         }
 872         t = vtop->c.i;
 873       }
 874     } else {
 875       t = gjmp(t);
 876       gsym(vtop->c.i);
 877     }
 878   } else {
 879     if (is_float(vtop->type.t)) {
 880       r=gv(RC_FLOAT);
 881       o(0xEE90F118|fpr(r)<<16);
 882       vtop->r = VT_CMP;
 883       vtop->c.i = TOK_NE;
 884       return gtst(inv, t);
 885     } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
 886       /* constant jmp optimization */
 887       if ((vtop->c.i != 0) != inv)
 888         t = gjmp(t);
 889     } else {
 890       v = gv(RC_INT);
 891       o(0xE3300000|(intr(v)<<16));
 892       vtop->r = VT_CMP;
 893       vtop->c.i = TOK_NE;
 894       return gtst(inv, t);
 895     }
 896   }
 897   vtop--;
 898   return t;
 899 }
 900
 901 /* generate an integer binary operation */
 902 void gen_opi(int op)
 903 {
 904   int c, func;
 905   unsigned long opc,r,fr;
 906
 907   c=0;
 908   switch(op) {
 909     case '+':
 910       opc = 0x8;
 911       c=1;
 912       break;
 913     case TOK_ADDC1: /* add with carry generation */
 914       opc = 0x9;
 915       c=1;
 916       break;
 917     case '-':
 918       opc = 0x4;
 919       c=1;
 920       break;
 921     case TOK_SUBC1: /* sub with carry generation */
 922       opc = 0x5;
 923       c=1;
 924       break;
 925     case TOK_ADDC2: /* add with carry use */
 926       opc = 0xA;
 927       c=1;
 928       break;
 929     case TOK_SUBC2: /* sub with carry use */
 930       opc = 0xC;
 931       c=1;
 932       break;
 933     case '&':
 934       opc = 0x0;
 935       c=1;
 936       break;
 937     case '^':
 938       opc = 0x2;
 939       c=1;
 940       break;
 941     case '|':
 942       opc = 0x18;
 943       c=1;
 944       break;
 945     case '*':
 946       gv2(RC_INT, RC_INT);
 947       r = vtop[-1].r;
 948       fr = vtop[0].r;
 949       vtop--;
 950       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
 951       return;
 952     case TOK_SHL:
 953       opc = 0;
 954       c=2;
 955       break;
 956     case TOK_SHR:
 957       opc = 1;
 958       c=2;
 959       break;
 960     case TOK_SAR:
 961       opc = 2;
 962       c=2;
 963       break;
 964     case '/':
 965     case TOK_PDIV:
 966       func=TOK___divsi3;
 967       c=3;
 968       break;
 969     case TOK_UDIV:
 970       func=TOK___udivsi3;
 971       c=3;
 972       break;
 973     case '%':
 974       func=TOK___modsi3;
 975       c=3;
 976       break;
 977     case TOK_UMOD:
 978       func=TOK___umodsi3;
 979       c=3;
 980       break;
 981     case TOK_UMULL:
 982       gv2(RC_INT, RC_INT);
 983       r=intr(vtop[-1].r2=get_reg(RC_INT));
 984       c=vtop[-1].r;
 985       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
 986       vtop--;
 987       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
 988       return;
 989     default:
 990       opc = 0x15;
 991       c=1;
 992   }
 993   switch(c) {
 994     case 1:
 995       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
 996         if(opc == 4 || opc == 5 || opc == 0xc) {
 997           vswap();
 998           opc|=2; // sub -> rsb
 999         }
1000       }
1001       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1002           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1003         gv(RC_INT);
1004       vswap();
1005       c=intr(gv(RC_INT));
1006       vswap();
1007       opc=0xE0000000|(opc<<20)|(c<<16);
1008       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1009         unsigned long x;
1010         x=stuff_const(opc|0x2000000,vtop->c.i);
1011         if(x) {
1012           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1013           o(x|(r<<12));
1014           goto done;
1015         }
1016       }
1017       fr=intr(gv(RC_INT));
1018       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1019       o(opc|(r<<12)|fr);
1020 done:
1021       vtop--;
1022       if (op >= TOK_ULT && op <= TOK_GT) {
1023         vtop->r = VT_CMP;
1024         vtop->c.i = op;
1025       }
1026       break;
1027     case 2:
1028       opc=0xE1A00000|(opc<<5);
1029       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1030           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1031         gv(RC_INT);
1032       vswap();
1033       r=intr(gv(RC_INT));
1034       vswap();
1035       opc|=r;
1036       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1037         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1038         c = vtop->c.i & 0x1f;
1039         o(opc|(c<<7)|(fr<<12));
1040       } else {
1041         fr=intr(gv(RC_INT));
1042         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1043         o(opc|(c<<12)|(fr<<8)|0x10);
1044       }
1045       vtop--;
1046       break;
1047     case 3:
1048       vpush_global_sym(&func_old_type, func);
1049       vrott(3);
1050       gfunc_call(2);
1051       vpushi(0);
1052       vtop->r = REG_IRET;
1053       break;
1054     default:
1055       error("gen_opi %i unimplemented!",op);
1056   }
1057 }
1058
1059 static int is_fconst()
1060 {
1061   long double f;
1062   int r;
1063   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1064     return 0;
1065   if (vtop->type.t == VT_FLOAT)
1066     f = vtop->c.f;
1067   else if (vtop->type.t == VT_DOUBLE)
1068     f = vtop->c.d;
1069   else
1070     f = vtop->c.ld;
1071   if(!ieee_finite(f))
1072     return 0;
1073   r=0x8;
1074   if(f<0.0) {
1075     r=0x18;
1076     f=-f;
1077   }
1078   if(f==0.0)
1079     return r;
1080   if(f==1.0)
1081     return r|1;
1082   if(f==2.0)
1083     return r|2;
1084   if(f==3.0)
1085     return r|3;
1086   if(f==4.0)
1087     return r|4;
1088   if(f==5.0)
1089     return r|5;
1090   if(f==0.5)
1091     return r|6;
1092   if(f==10.0)
1093     return r|7;
1094   return 0;
1095 }
1096
1097 /* generate a floating point operation 'v = t1 op t2' instruction. The
1098    two operands are guaranted to have the same floating point type */
1099 void gen_opf(int op)
1100 {
1101   unsigned long x;
1102   int r,r2,c1,c2;
1103   //fputs("gen_opf\n",stderr);
1104   vswap();
1105   c1 = is_fconst();
1106   vswap();
1107   c2 = is_fconst();
1108   x=0xEE000100;
1109 #if LDOUBLE_SIZE == 8
1110   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1111     x|=0x80;
1112 #else
1113   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1114     x|=0x80;
1115   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1116     x|=0x80000;
1117 #endif
1118   switch(op)
1119   {
1120     case '+':
1121       if(!c2) {
1122         vswap();
1123         c2=c1;
1124       }
1125       vswap();
1126       r=fpr(gv(RC_FLOAT));
1127       vswap();
1128       if(c2) {
1129         if(c2>0xf)
1130           x|=0x200000; // suf
1131         r2=c2&0xf;
1132       } else {
1133         r2=fpr(gv(RC_FLOAT));
1134       }
1135       break;
1136     case '-':
1137       if(c2) {
1138         if(c2<=0xf)
1139           x|=0x200000; // suf
1140         r2=c2&0xf;
1141         vswap();
1142         r=fpr(gv(RC_FLOAT));
1143         vswap();
1144       } else if(c1 && c1<=0xf) {
1145         x|=0x300000; // rsf
1146         r2=c1;
1147         r=fpr(gv(RC_FLOAT));
1148         vswap();
1149       } else {
1150         x|=0x200000; // suf
1151         vswap();
1152         r=fpr(gv(RC_FLOAT));
1153         vswap();
1154         r2=fpr(gv(RC_FLOAT));
1155       }
1156       break;
1157     case '*':
1158       if(!c2 || c2>0xf) {
1159         vswap();
1160         c2=c1;
1161       }
1162       vswap();
1163       r=fpr(gv(RC_FLOAT));
1164       vswap();
1165       if(c2 && c2<=0xf)
1166         r2=c2;
1167       else
1168         r2=fpr(gv(RC_FLOAT));
1169       x|=0x100000; // muf
1170       break;
1171     case '/':
1172       if(c2 && c2<=0xf) {
1173         x|=0x400000; // dvf
1174         r2=c2;
1175         vswap();
1176         r=fpr(gv(RC_FLOAT));
1177         vswap();
1178       } else if(c1 && c1<=0xf) {
1179         x|=0x500000; // rdf
1180         r2=c1;
1181         r=fpr(gv(RC_FLOAT));
1182         vswap();
1183       } else {
1184         x|=0x400000; // dvf
1185         vswap();
1186         r=fpr(gv(RC_FLOAT));
1187         vswap();
1188         r2=fpr(gv(RC_FLOAT));
1189       }
1190       break;
1191     default:
1192       if(op >= TOK_ULT && op <= TOK_GT) {
1193         x|=0xd0f110; // cmfe
1194         switch(op) {
1195           case TOK_ULT:
1196           case TOK_UGE:
1197           case TOK_ULE:
1198           case TOK_UGT:
1199             fputs("unsigned comparision on floats?\n",stderr);
1200             break;
1201           case TOK_LT:
1202             op=TOK_ULT;
1203             break;
1204           case TOK_GE:
1205             op=TOK_UGE;
1206             break;
1207           case TOK_LE:
1208             op=TOK_ULE;
1209             break;
1210           case TOK_GT:
1211             op=TOK_UGT;
1212             break;
1213           case TOK_EQ:
1214           case TOK_NE:
1215             x&=~0x400000; // cmfe -> cmf
1216             break;
1217         }
1218         if(c1 && !c2) {
1219           c2=c1;
1220           vswap();
1221           switch(op) {
1222             case TOK_ULT:
1223               op=TOK_UGT;
1224               break;
1225             case TOK_UGE:
1226               op=TOK_ULE;
1227               break;
1228             case TOK_ULE:
1229               op=TOK_UGE;
1230               break;
1231             case TOK_UGT:
1232               op=TOK_ULT;
1233               break;
1234           }
1235         }
1236 // bug (intention?) in Linux FPU emulator
1237 // doesn't set carry if equal
1238         if(op==TOK_ULT)
1239           op=TOK_LT;
1240         else if(op==TOK_UGE)
1241           op=TOK_GE;
1242         vswap();
1243         r=fpr(gv(RC_FLOAT));
1244         vswap();
1245         if(c2) {
1246           if(c2>0xf)
1247             x|=0x200000;
1248           r2=c2&0xf;
1249         } else {
1250           r2=fpr(gv(RC_FLOAT));
1251         }
1252         vtop[-1].r = VT_CMP;
1253         vtop[-1].c.i = op;
1254       } else {
1255         error("unknown fp op %x!\n",op);
1256         return;
1257       }
1258   }
1259   if(vtop[-1].r == VT_CMP)
1260     c1=15;
1261   else {
1262     c1=vtop->r;
1263     if(r2&0x8)
1264       c1=vtop[-1].r;
1265     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1266     c1=fpr(vtop[-1].r);
1267   }
1268   vtop--;
1269   o(x|(r<<16)|(c1<<12)|r2);
1270 }
1271
1272 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1273    and 'long long' cases. */
1274 void gen_cvt_itof(int t)
1275 {
1276   int r,r2,bt;
1277   bt=vtop->type.t & VT_BTYPE;
1278   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1279     r=intr(gv(RC_INT));
1280     r2=fpr(vtop->r=get_reg(RC_FLOAT));
1281     o(0xEE000190|(r2<<16)|(r<<12));
1282     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1283       unsigned int off=0;
1284       o(0xE3500000|(r<<12));
1285       r=fpr(get_reg(RC_FLOAT));
1286       if(last_itod_magic) {
1287         off=ind+8-last_itod_magic;
1288         off/=4;
1289         if(off>255)
1290           off=0;
1291       }
1292       o(0xBD1F8100|(r<<12)|off);
1293       if(!off) {
1294         o(0xEA000001);
1295         last_itod_magic=ind;
1296         o(0x41F00000);
1297         o(0);
1298       }
1299       o(0xBE000180|(r2<<16)|(r2<<12)|r);
1300     }
1301     return;
1302   } else if(bt == VT_LLONG) {
1303     int func;
1304     if(vtop->type.t & VT_UNSIGNED)
1305       func=TOK___ulltold;
1306     else
1307       func=TOK___slltold;
1308     vpush_global_sym(&func_old_type, func);
1309     vswap();
1310     gfunc_call(1);
1311     vpushi(0);
1312     vtop->r=TREG_F0;
1313     return;
1314   }
1315   error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1316 }
1317
1318 /* convert fp to int 't' type */
1319 void gen_cvt_ftoi(int t)
1320 {
1321   int r,r2,u,func=0;
1322   u=t&VT_UNSIGNED;
1323   t&=VT_BTYPE;
1324   r2=vtop->type.t & VT_BTYPE;
1325   if(t==VT_INT) {
1326     if(u) {
1327       if(r2 == VT_FLOAT)
1328         func=TOK___fixunssfsi;
1329       else if(r2 == VT_DOUBLE)
1330         func=TOK___fixunsdfsi;
1331       else if(r2 == VT_LDOUBLE)
1332 #if LDOUBLE_SIZE == 8
1333         func=TOK___fixunsdfsi;
1334 #else
1335         func=TOK___fixunsxfsi;
1336 #endif
1337     } else {
1338       r=fpr(gv(RC_FLOAT));
1339       r2=intr(vtop->r=get_reg(RC_INT));
1340       o(0xEE100170|(r2<<12)|r);
1341     return;
1342     }
1343   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
1344     if(r2 == VT_FLOAT)
1345       func=TOK___fixsfdi;
1346     else if(r2 == VT_DOUBLE)
1347       func=TOK___fixdfdi;
1348     else if(r2 == VT_LDOUBLE)
1349 #if LDOUBLE_SIZE == 8
1350       func=TOK___fixdfdi;
1351 #else
1352       func=TOK___fixxfdi;
1353 #endif
1354     }
1355   if(func) {
1356     vpush_global_sym(&func_old_type, func);
1357     vswap();
1358     gfunc_call(1);
1359     vpushi(0);
1360     if(t == VT_LLONG)
1361       vtop->r2 = REG_LRET;
1362     vtop->r = REG_IRET;
1363     return;
1364   }
1365   error("unimplemented gen_cvt_ftoi!");
1366 }
1367
1368 /* convert from one floating point type to another */
1369 void gen_cvt_ftof(int t)
1370 {
1371   /* all we have to do on i386 and ARM is to put the float in a register */
1372   gv(RC_FLOAT);
1373 }
1374
1375 /* computed goto support */
1376 void ggoto(void)
1377 {
1378   gcall_or_jmp(1);
1379   vtop--;
1380 }
1381
1382 /* end of ARM code generator */
1383 /*************************************************************/
1384