arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS 13
  33 #else
  34 #define NB_REGS 9
  35 #endif
  36
  37 typedef int RegArgs;
  38
  39 #ifndef TCC_ARM_VERSION
  40 #define TCC_ARM_VERSION 5
  41 #endif
  42
  43 /* a register can belong to several classes. The classes must be
  44    sorted from more general to more precise (see gv2() code which does
  45    assumptions on it). */
  46 #define RC_INT 0x0001   /* generic integer register */
  47 #define RC_FLOAT 0x0002 /* generic float register */
  48 #define RC_R0 0x0004
  49 #define RC_R1 0x0008
  50 #define RC_R2 0x0010
  51 #define RC_R3 0x0020
  52 #define RC_R12 0x0040
  53 #define RC_F0 0x0080
  54 #define RC_F1 0x0100
  55 #define RC_F2 0x0200
  56 #define RC_F3 0x0400
  57 #ifdef TCC_ARM_VFP
  58 #define RC_F4 0x0800
  59 #define RC_F5 0x1000
  60 #define RC_F6 0x2000
  61 #define RC_F7 0x4000
  62 #endif
  63 #define RC_IRET RC_R0 /* function return: integer register */
  64 #define RC_LRET RC_R1 /* function return: second integer register */
  65 #define RC_FRET RC_F0 /* function return: float register */
  66
  67 /* pretty names for the registers */
  68 enum {
  69     TREG_R0 = 0,
  70     TREG_R1,
  71     TREG_R2,
  72     TREG_R3,
  73     TREG_R12,
  74     TREG_F0,
  75     TREG_F1,
  76     TREG_F2,
  77     TREG_F3,
  78 #ifdef TCC_ARM_VFP
  79     TREG_F4,
  80     TREG_F5,
  81     TREG_F6,
  82     TREG_F7,
  83 #endif
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t)&VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE 8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE 8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN 8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 /* ELF defines */
 134
 135 #define EM_TCC_TARGET EM_ARM
 136
 137 /* relocation type for 32 bit data relocation */
 138 #define R_DATA_32 R_ARM_ABS32
 139 #define R_DATA_PTR R_ARM_ABS32
 140 #define R_JMP_SLOT R_ARM_JUMP_SLOT
 141 #define R_COPY R_ARM_COPY
 142
 143 #define ELF_START_ADDR 0x00008000
 144 #define ELF_PAGE_SIZE 0x1000
 145
 146 enum float_abi {
 147     ARM_SOFTFP_FLOAT,
 148     ARM_HARD_FLOAT,
 149 };
 150
 151 /******************************************************/
 152 #else /* ! TARGET_DEFS_ONLY */
 153 /******************************************************/
 154 #include "tcc.h"
 155
 156 enum float_abi float_abi;
 157
 158 ST_DATA const int reg_classes[NB_REGS] = {
 159     /* r0 */ RC_INT | RC_R0,
 160     /* r1 */ RC_INT | RC_R1,
 161     /* r2 */ RC_INT | RC_R2,
 162     /* r3 */ RC_INT | RC_R3,
 163     /* r12 */ RC_INT | RC_R12,
 164     /* f0 */ RC_FLOAT | RC_F0,
 165     /* f1 */ RC_FLOAT | RC_F1,
 166     /* f2 */ RC_FLOAT | RC_F2,
 167     /* f3 */ RC_FLOAT | RC_F3,
 168 #ifdef TCC_ARM_VFP
 169     /* d4/s8 */ RC_FLOAT | RC_F4,
 170     /* d5/s10 */ RC_FLOAT | RC_F5,
 171     /* d6/s12 */ RC_FLOAT | RC_F6,
 172     /* d7/s14 */ RC_FLOAT | RC_F7,
 173 #endif
 174 };
 175
 176 static int func_sub_sp_offset, last_itod_magic;
 177 static int leaffunc;
 178
 179 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 180 static CType float_type, double_type, func_float_type, func_double_type;
 181 ST_FUNC void arm_init(struct TCCState* s)
 182 {
 183     float_type.t = VT_FLOAT;
 184     double_type.t = VT_DOUBLE;
 185     func_float_type.t = VT_FUNC;
 186     func_float_type.ref =
 187         sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 188     func_double_type.t = VT_FUNC;
 189     func_double_type.ref =
 190         sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 191
 192     float_abi = s->float_abi;
 193 #ifndef TCC_ARM_HARDFLOAT
 194     tcc_warning("soft float ABI currently not supported: default to softfp");
 195 #endif
 196 }
 197 #else
 198 #define func_float_type func_old_type
 199 #define func_double_type func_old_type
 200 #define func_ldouble_type func_old_type
 201 ST_FUNC void arm_init(struct TCCState* s)
 202 {
 203 #if !defined(TCC_ARM_VFP)
 204     tcc_warning("Support for FPA is deprecated and will be removed in next"
 205                 " release");
 206 #endif
 207 #if !defined(TCC_ARM_EABI)
 208     tcc_warning("Support for OABI is deprecated and will be removed in next"
 209                 " release");
 210 #endif
 211 }
 212 #endif
 213
 214 static int two2mask(int a, int b)
 215 {
 216     return (reg_classes[a] | reg_classes[b]) & ~(RC_INT | RC_FLOAT);
 217 }
 218
 219 static int regmask(int r)
 220 {
 221     return reg_classes[r] & ~(RC_INT | RC_FLOAT);
 222 }
 223
 224 /******************************************************/
 225
 226 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
 227 char* default_elfinterp(struct TCCState* s)
 228 {
 229     if (s->float_abi == ARM_HARD_FLOAT)
 230         return "/lib/ld-linux-armhf.so.3";
 231     else
 232         return "/lib/ld-linux.so.3";
 233 }
 234 #endif
 235
 236 void o(uint32_t i)
 237 {
 238     /* this is a good place to start adding big-endian support*/
 239     int ind1;
 240
 241     ind1 = ind + 4;
 242     if (!cur_text_section)
 243         tcc_error("compiler error! This happens f.ex. if the compiler\n"
 244                   "can't evaluate constant expressions outside of a function.");
 245     if (ind1 > cur_text_section->data_allocated)
 246         section_realloc(cur_text_section, ind1);
 247     cur_text_section->data[ind++] = i & 255;
 248     i >>= 8;
 249     cur_text_section->data[ind++] = i & 255;
 250     i >>= 8;
 251     cur_text_section->data[ind++] = i & 255;
 252     i >>= 8;
 253     cur_text_section->data[ind++] = i;
 254 }
 255
 256 static uint32_t stuff_const(uint32_t op, uint32_t c)
 257 {
 258     int try_neg = 0;
 259     uint32_t nc = 0, negop = 0;
 260
 261     switch (op & 0x1F00000) {
 262     case 0x800000: // add
 263     case 0x400000: // sub
 264         try_neg = 1;
 265         negop = op ^ 0xC00000;
 266         nc = -c;
 267         break;
 268     case 0x1A00000: // mov
 269     case 0x1E00000: // mvn
 270         try_neg = 1;
 271         negop = op ^ 0x400000;
 272         nc = ~c;
 273         break;
 274     case 0x200000: // xor
 275         if (c == ~0)
 276             return (op & 0xF010F000) | ((op >> 16) & 0xF) | 0x1E00000;
 277         break;
 278     case 0x0: // and
 279         if (c == ~0)
 280             return (op & 0xF010F000) | ((op >> 16) & 0xF) | 0x1A00000;
 281     case 0x1C00000: // bic
 282         try_neg = 1;
 283         negop = op ^ 0x1C00000;
 284         nc = ~c;
 285         break;
 286     case 0x1800000: // orr
 287         if (c == ~0)
 288             return (op & 0xFFF0FFFF) | 0x1E00000;
 289         break;
 290     }
 291     do {
 292         uint32_t m;
 293         int i;
 294         if (c < 256) /* catch undefined <<32 */
 295             return op | c;
 296         for (i = 2; i < 32; i += 2) {
 297             m = (0xff >> i) | (0xff << (32 - i));
 298             if (!(c & ~m))
 299                 return op | (i << 7) | (c << i) | (c >> (32 - i));
 300         }
 301         op = negop;
 302         c = nc;
 303     } while (try_neg--);
 304     return 0;
 305 }
 306
 307 // only add,sub
 308 void stuff_const_harder(uint32_t op, uint32_t v)
 309 {
 310     uint32_t x;
 311     x = stuff_const(op, v);
 312     if (x)
 313         o(x);
 314     else {
 315         uint32_t a[16], nv, no, o2, n2;
 316         int i, j, k;
 317         a[0] = 0xff;
 318         o2 = (op & 0xfff0ffff) | ((op & 0xf000) << 4);
 319         ;
 320         for (i = 1; i < 16; i++)
 321             a[i] = (a[i - 1] >> 2) | (a[i - 1] << 30);
 322         for (i = 0; i < 12; i++)
 323             for (j = i < 4 ? i + 12 : 15; j >= i + 4; j--)
 324                 if ((v & (a[i] | a[j])) == v) {
 325                     o(stuff_const(op, v & a[i]));
 326                     o(stuff_const(o2, v & a[j]));
 327                     return;
 328                 }
 329         no = op ^ 0xC00000;
 330         n2 = o2 ^ 0xC00000;
 331         nv = -v;
 332         for (i = 0; i < 12; i++)
 333             for (j = i < 4 ? i + 12 : 15; j >= i + 4; j--)
 334                 if ((nv & (a[i] | a[j])) == nv) {
 335                     o(stuff_const(no, nv & a[i]));
 336                     o(stuff_const(n2, nv & a[j]));
 337                     return;
 338                 }
 339         for (i = 0; i < 8; i++)
 340             for (j = i + 4; j < 12; j++)
 341                 for (k = i < 4 ? i + 12 : 15; k >= j + 4; k--)
 342                     if ((v & (a[i] | a[j] | a[k])) == v) {
 343                         o(stuff_const(op, v & a[i]));
 344                         o(stuff_const(o2, v & a[j]));
 345                         o(stuff_const(o2, v & a[k]));
 346                         return;
 347                     }
 348         no = op ^ 0xC00000;
 349         nv = -v;
 350         for (i = 0; i < 8; i++)
 351             for (j = i + 4; j < 12; j++)
 352                 for (k = i < 4 ? i + 12 : 15; k >= j + 4; k--)
 353                     if ((nv & (a[i] | a[j] | a[k])) == nv) {
 354                         o(stuff_const(no, nv & a[i]));
 355                         o(stuff_const(n2, nv & a[j]));
 356                         o(stuff_const(n2, nv & a[k]));
 357                         return;
 358                     }
 359         o(stuff_const(op, v & a[0]));
 360         o(stuff_const(o2, v & a[4]));
 361         o(stuff_const(o2, v & a[8]));
 362         o(stuff_const(o2, v & a[12]));
 363     }
 364 }
 365
 366 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
 367 {
 368     addr -= pos + 8;
 369     addr /= 4;
 370     if (addr >= 0x1000000 || addr < -0x1000000) {
 371         if (fail)
 372             tcc_error("FIXME: function bigger than 32MB");
 373         return 0;
 374     }
 375     return 0x0A000000 | (addr & 0xffffff);
 376 }
 377
 378 int decbranch(int pos)
 379 {
 380     int x;
 381     x = *(uint32_t*)(cur_text_section->data + pos);
 382     x &= 0x00ffffff;
 383     if (x & 0x800000)
 384         x -= 0x1000000;
 385     return x * 4 + pos + 8;
 386 }
 387
 388 /* output a symbol and patch all calls to it */
 389 void gsym_addr(int t, int a)
 390 {
 391     uint32_t* x;
 392     int lt;
 393     while (t) {
 394         x = (uint32_t*)(cur_text_section->data + t);
 395         t = decbranch(lt = t);
 396         if (a == lt + 4)
 397             *x = 0xE1A00000; // nop
 398         else {
 399             *x &= 0xff000000;
 400             *x |= encbranch(lt, a, 1);
 401         }
 402     }
 403 }
 404
 405 void gsym(int t)
 406 {
 407     gsym_addr(t, ind);
 408 }
 409
 410 #ifdef TCC_ARM_VFP
 411 static uint32_t vfpr(int r)
 412 {
 413     if (r < TREG_F0 || r > TREG_F7)
 414         tcc_error("compiler error! register %i is no vfp register", r);
 415     return r - 5;
 416 }
 417 #else
 418 static uint32_t fpr(int r)
 419 {
 420     if (r < TREG_F0 || r > TREG_F3)
 421         tcc_error("compiler error! register %i is no fpa register", r);
 422     return r - 5;
 423 }
 424 #endif
 425
 426 static uint32_t intr(int r)
 427 {
 428     if (r == 4)
 429         return 12;
 430     if ((r < 0 || r > 4) && r != 14)
 431         tcc_error("compiler error! register %i is no int register", r);
 432     return r;
 433 }
 434
 435 static void calcaddr(uint32_t* base, int* off, int* sgn, int maxoff,
 436                      unsigned shift)
 437 {
 438     if (*off > maxoff || *off & ((1 << shift) - 1)) {
 439         uint32_t x, y;
 440         x = 0xE280E000;
 441         if (*sgn)
 442             x = 0xE240E000;
 443         x |= (*base) << 16;
 444         *base = 14; // lr
 445         y = stuff_const(x, *off & ~maxoff);
 446         if (y) {
 447             o(y);
 448             *off &= maxoff;
 449             return;
 450         }
 451         y = stuff_const(x, (*off + maxoff) & ~maxoff);
 452         if (y) {
 453             o(y);
 454             *sgn = !*sgn;
 455             *off = ((*off + maxoff) & ~maxoff) - *off;
 456             return;
 457         }
 458         stuff_const_harder(x, *off & ~maxoff);
 459         *off &= maxoff;
 460     }
 461 }
 462
 463 static uint32_t mapcc(int cc)
 464 {
 465     switch (cc) {
 466     case TOK_ULT:
 467         return 0x30000000; /* CC/LO */
 468     case TOK_UGE:
 469         return 0x20000000; /* CS/HS */
 470     case TOK_EQ:
 471         return 0x00000000; /* EQ */
 472     case TOK_NE:
 473         return 0x10000000; /* NE */
 474     case TOK_ULE:
 475         return 0x90000000; /* LS */
 476     case TOK_UGT:
 477         return 0x80000000; /* HI */
 478     case TOK_Nset:
 479         return 0x40000000; /* MI */
 480     case TOK_Nclear:
 481         return 0x50000000; /* PL */
 482     case TOK_LT:
 483         return 0xB0000000; /* LT */
 484     case TOK_GE:
 485         return 0xA0000000; /* GE */
 486     case TOK_LE:
 487         return 0xD0000000; /* LE */
 488     case TOK_GT:
 489         return 0xC0000000; /* GT */
 490     }
 491     tcc_error("unexpected condition code");
 492     return 0xE0000000; /* AL */
 493 }
 494
 495 static int negcc(int cc)
 496 {
 497     switch (cc) {
 498     case TOK_ULT:
 499         return TOK_UGE;
 500     case TOK_UGE:
 501         return TOK_ULT;
 502     case TOK_EQ:
 503         return TOK_NE;
 504     case TOK_NE:
 505         return TOK_EQ;
 506     case TOK_ULE:
 507         return TOK_UGT;
 508     case TOK_UGT:
 509         return TOK_ULE;
 510     case TOK_Nset:
 511         return TOK_Nclear;
 512     case TOK_Nclear:
 513         return TOK_Nset;
 514     case TOK_LT:
 515         return TOK_GE;
 516     case TOK_GE:
 517         return TOK_LT;
 518     case TOK_LE:
 519         return TOK_GT;
 520     case TOK_GT:
 521         return TOK_LE;
 522     }
 523     tcc_error("unexpected condition code");
 524     return TOK_NE;
 525 }
 526
 527 /* load 'r' from value 'sv' */
 528 void load(int r, SValue* sv)
 529 {
 530     int v, ft, fc, fr, sign;
 531     uint32_t op;
 532     SValue v1;
 533
 534     fr = sv->r;
 535     ft = sv->type.t;
 536     fc = sv->c.ul;
 537
 538     if (fc >= 0)
 539         sign = 0;
 540     else {
 541         sign = 1;
 542         fc = -fc;
 543     }
 544
 545     v = fr & VT_VALMASK;
 546     if (fr & VT_LVAL) {
 547         uint32_t base = 0xB; // fp
 548         if (v == VT_LLOCAL) {
 549             v1.type.t = VT_PTR;
 550             v1.r = VT_LOCAL | VT_LVAL;
 551             v1.c.ul = sv->c.ul;
 552             load(base = 14 /* lr */, &v1);
 553             fc = sign = 0;
 554             v = VT_LOCAL;
 555         } else if (v == VT_CONST) {
 556             v1.type.t = VT_PTR;
 557             v1.r = fr & ~VT_LVAL;
 558             v1.c.ul = sv->c.ul;
 559             v1.sym = sv->sym;
 560             load(base = 14, &v1);
 561             fc = sign = 0;
 562             v = VT_LOCAL;
 563         } else if (v < VT_CONST) {
 564             base = intr(v);
 565             fc = sign = 0;
 566             v = VT_LOCAL;
 567         }
 568         if (v == VT_LOCAL) {
 569             if (is_float(ft)) {
 570                 calcaddr(&base, &fc, &sign, 1020, 2);
 571 #ifdef TCC_ARM_VFP
 572                 op = 0xED100A00; /* flds */
 573                 if (!sign)
 574                     op |= 0x800000;
 575                 if ((ft & VT_BTYPE) != VT_FLOAT)
 576                     op |= 0x100; /* flds -> fldd */
 577                 o(op | (vfpr(r) << 12) | (fc >> 2) | (base << 16));
 578 #else
 579                 op = 0xED100100;
 580                 if (!sign)
 581                     op |= 0x800000;
 582 #if LDOUBLE_SIZE == 8
 583                 if ((ft & VT_BTYPE) != VT_FLOAT)
 584                     op |= 0x8000;
 585 #else
 586                 if ((ft & VT_BTYPE) == VT_DOUBLE)
 587                     op |= 0x8000;
 588                 else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 589                     op |= 0x400000;
 590 #endif
 591                 o(op | (fpr(r) << 12) | (fc >> 2) | (base << 16));
 592 #endif
 593             } else if ((ft & (VT_BTYPE | VT_UNSIGNED)) == VT_BYTE ||
 594                        (ft & VT_BTYPE) == VT_SHORT) {
 595                 calcaddr(&base, &fc, &sign, 255, 0);
 596                 op = 0xE1500090;
 597                 if ((ft & VT_BTYPE) == VT_SHORT)
 598                     op |= 0x20;
 599                 if ((ft & VT_UNSIGNED) == 0)
 600                     op |= 0x40;
 601                 if (!sign)
 602                     op |= 0x800000;
 603                 o(op | (intr(r) << 12) | (base << 16) | ((fc & 0xf0) << 4) |
 604                   (fc & 0xf));
 605             } else {
 606                 calcaddr(&base, &fc, &sign, 4095, 0);
 607                 op = 0xE5100000;
 608                 if (!sign)
 609                     op |= 0x800000;
 610                 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 611                     op |= 0x400000;
 612                 o(op | (intr(r) << 12) | fc | (base << 16));
 613             }
 614             return;
 615         }
 616     } else {
 617         if (v == VT_CONST) {
 618             op = stuff_const(0xE3A00000 | (intr(r) << 12), sv->c.ul);
 619             if (fr & VT_SYM || !op) {
 620                 o(0xE59F0000 | (intr(r) << 12));
 621                 o(0xEA000000);
 622                 if (fr & VT_SYM)
 623                     greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 624                 o(sv->c.ul);
 625             } else
 626                 o(op);
 627             return;
 628         } else if (v == VT_LOCAL) {
 629             op = stuff_const(0xE28B0000 | (intr(r) << 12), sv->c.ul);
 630             if (fr & VT_SYM || !op) {
 631                 o(0xE59F0000 | (intr(r) << 12));
 632                 o(0xEA000000);
 633                 if (fr & VT_SYM) // needed ?
 634                     greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 635                 o(sv->c.ul);
 636                 o(0xE08B0000 | (intr(r) << 12) | intr(r));
 637             } else
 638                 o(op);
 639             return;
 640         } else if (v == VT_CMP) {
 641             o(mapcc(sv->c.ul) | 0x3A00001 | (intr(r) << 12));
 642             o(mapcc(negcc(sv->c.ul)) | 0x3A00000 | (intr(r) << 12));
 643             return;
 644         } else if (v == VT_JMP || v == VT_JMPI) {
 645             int t;
 646             t = v & 1;
 647             o(0xE3A00000 | (intr(r) << 12) | t);
 648             o(0xEA000000);
 649             gsym(sv->c.ul);
 650             o(0xE3A00000 | (intr(r) << 12) | (t ^ 1));
 651             return;
 652         } else if (v < VT_CONST) {
 653             if (is_float(ft))
 654 #ifdef TCC_ARM_VFP
 655                 o(0xEEB00A40 | (vfpr(r) << 12) | vfpr(v) |
 656                   T2CPR(ft)); /* fcpyX */
 657 #else
 658                 o(0xEE008180 | (fpr(r) << 12) | fpr(v));
 659 #endif
 660             else
 661                 o(0xE1A00000 | (intr(r) << 12) | intr(v));
 662             return;
 663         }
 664     }
 665     tcc_error("load unimplemented!");
 666 }
 667
 668 /* store register 'r' in lvalue 'v' */
 669 void store(int r, SValue* sv)
 670 {
 671     SValue v1;
 672     int v, ft, fc, fr, sign;
 673     uint32_t op;
 674
 675     fr = sv->r;
 676     ft = sv->type.t;
 677     fc = sv->c.ul;
 678
 679     if (fc >= 0)
 680         sign = 0;
 681     else {
 682         sign = 1;
 683         fc = -fc;
 684     }
 685
 686     v = fr & VT_VALMASK;
 687     if (fr & VT_LVAL || fr == VT_LOCAL) {
 688         uint32_t base = 0xb;
 689         if (v < VT_CONST) {
 690             base = intr(v);
 691             v = VT_LOCAL;
 692             fc = sign = 0;
 693         } else if (v == VT_CONST) {
 694             v1.type.t = ft;
 695             v1.r = fr & ~VT_LVAL;
 696             v1.c.ul = sv->c.ul;
 697             v1.sym = sv->sym;
 698             load(base = 14, &v1);
 699             fc = sign = 0;
 700             v = VT_LOCAL;
 701         }
 702         if (v == VT_LOCAL) {
 703             if (is_float(ft)) {
 704                 calcaddr(&base, &fc, &sign, 1020, 2);
 705 #ifdef TCC_ARM_VFP
 706                 op = 0xED000A00; /* fsts */
 707                 if (!sign)
 708                     op |= 0x800000;
 709                 if ((ft & VT_BTYPE) != VT_FLOAT)
 710                     op |= 0x100; /* fsts -> fstd */
 711                 o(op | (vfpr(r) << 12) | (fc >> 2) | (base << 16));
 712 #else
 713                 op = 0xED000100;
 714                 if (!sign)
 715                     op |= 0x800000;
 716 #if LDOUBLE_SIZE == 8
 717                 if ((ft & VT_BTYPE) != VT_FLOAT)
 718                     op |= 0x8000;
 719 #else
 720                 if ((ft & VT_BTYPE) == VT_DOUBLE)
 721                     op |= 0x8000;
 722                 if ((ft & VT_BTYPE) == VT_LDOUBLE)
 723                     op |= 0x400000;
 724 #endif
 725                 o(op | (fpr(r) << 12) | (fc >> 2) | (base << 16));
 726 #endif
 727                 return;
 728             } else if ((ft & VT_BTYPE) == VT_SHORT) {
 729                 calcaddr(&base, &fc, &sign, 255, 0);
 730                 op = 0xE14000B0;
 731                 if (!sign)
 732                     op |= 0x800000;
 733                 o(op | (intr(r) << 12) | (base << 16) | ((fc & 0xf0) << 4) |
 734                   (fc & 0xf));
 735             } else {
 736                 calcaddr(&base, &fc, &sign, 4095, 0);
 737                 op = 0xE5000000;
 738                 if (!sign)
 739                     op |= 0x800000;
 740                 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 741                     op |= 0x400000;
 742                 o(op | (intr(r) << 12) | fc | (base << 16));
 743             }
 744             return;
 745         }
 746     }
 747     tcc_error("store unimplemented");
 748 }
 749
 750 static void gadd_sp(int val)
 751 {
 752     stuff_const_harder(0xE28DD000, val);
 753 }
 754
 755 /* 'is_jmp' is '1' if it is a jump */
 756 static void gcall_or_jmp(int is_jmp)
 757 {
 758     int r;
 759     if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 760         uint32_t x;
 761         /* constant case */
 762         x = encbranch(ind, ind + vtop->c.ul, 0);
 763         if (x) {
 764             if (vtop->r & VT_SYM) {
 765                 /* relocation case */
 766                 greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 767             } else
 768                 put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24,
 769                               0);
 770             o(x | (is_jmp ? 0xE0000000 : 0xE1000000));
 771         } else {
 772             if (!is_jmp)
 773                 o(0xE28FE004); // add lr,pc,#4
 774             o(0xE51FF004);     // ldr pc,[pc,#-4]
 775             if (vtop->r & VT_SYM)
 776                 greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 777             o(vtop->c.ul);
 778         }
 779     } else {
 780         /* otherwise, indirect call */
 781         r = gv(RC_INT);
 782         if (!is_jmp)
 783             o(0xE1A0E00F);       // mov lr,pc
 784         o(0xE1A0F000 | intr(r)); // mov pc,r
 785     }
 786 }
 787
 788 /* Return whether a structure is an homogeneous float aggregate or not.
 789    The answer is true if all the elements of the structure are of the same
 790    primitive float type and there is less than 4 elements.
 791
 792    type: the type corresponding to the structure to be tested */
 793 static int is_hgen_float_aggr(CType* type)
 794 {
 795     if ((type->t & VT_BTYPE) == VT_STRUCT) {
 796         struct Sym* ref;
 797         int btype, nb_fields = 0;
 798
 799         ref = type->ref->next;
 800         btype = ref->type.t & VT_BTYPE;
 801         if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 802             for (; ref && btype == (ref->type.t & VT_BTYPE);
 803                  ref = ref->next, nb_fields++)
 804                 ;
 805             return !ref && nb_fields <= 4;
 806         }
 807     }
 808     return 0;
 809 }
 810
 811 struct avail_regs {
 812     signed char
 813         avail[3];       /* 3 holes max with only float and double alignments */
 814     int first_hole;     /* first available hole */
 815     int last_hole;      /* last available hole (none if equal to first_hole) */
 816     int first_free_reg; /* next free register in the sequence, hole excluded */
 817 };
 818
 819 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
 820
 821 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 822    param) according to the rules described in the procedure call standard for
 823    the ARM architecture (AAPCS). If found, the registers are assigned to this
 824    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 825    and the parameter is a single float.
 826
 827    avregs: opaque structure to keep track of available VFP co-processor regs
 828    align: alignment contraints for the param, as returned by type_size()
 829    size: size of the parameter, as returned by type_size() */
 830 int assign_vfpreg(struct avail_regs* avregs, int align, int size)
 831 {
 832     int first_reg = 0;
 833
 834     if (avregs->first_free_reg == -1)
 835         return -1;
 836     if (align >> 3) { /* double alignment */
 837         first_reg = avregs->first_free_reg;
 838         /* alignment contraint not respected so use next reg and record hole */
 839         if (first_reg & 1)
 840             avregs->avail[avregs->last_hole++] = first_reg++;
 841     } else { /* no special alignment (float or array of float) */
 842         /* if single float and a hole is available, assign the param to it */
 843         if (size == 4 && avregs->first_hole != avregs->last_hole)
 844             return avregs->avail[avregs->first_hole++];
 845         else
 846             first_reg = avregs->first_free_reg;
 847     }
 848     if (first_reg + size / 4 <= 16) {
 849         avregs->first_free_reg = first_reg + size / 4;
 850         return first_reg;
 851     }
 852     avregs->first_free_reg = -1;
 853     return -1;
 854 }
 855
 856 /* Returns whether all params need to be passed in core registers or not.
 857    This is the case for function part of the runtime ABI. */
 858 int floats_in_core_regs(SValue* sval)
 859 {
 860     if (!sval->sym)
 861         return 0;
 862
 863     switch (sval->sym->v) {
 864     case TOK___floatundisf:
 865     case TOK___floatundidf:
 866     case TOK___fixunssfdi:
 867     case TOK___fixunsdfdi:
 868 #ifndef TCC_ARM_VFP
 869     case TOK___fixunsxfdi:
 870 #endif
 871     case TOK___floatdisf:
 872     case TOK___floatdidf:
 873     case TOK___fixsfdi:
 874     case TOK___fixdfdi:
 875         return 1;
 876
 877     default:
 878         return 0;
 879     }
 880 }
 881
 882 ST_FUNC int regargs_nregs(RegArgs* args)
 883 {
 884     return *args;
 885 }
 886
 887 /* Return the number of registers needed to return the struct, or 0 if
 888    returning via struct pointer. */
 889 ST_FUNC int gfunc_sret(CType* vt, int variadic, CType* ret, int* ret_align,
 890                        int* regsize, RegArgs* args)
 891 {
 892 #ifdef TCC_ARM_EABI
 893     int size, align;
 894     size = type_size(vt, &align);
 895     if (float_abi == ARM_HARD_FLOAT && !variadic &&
 896         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
 897         *ret_align = 8;
 898         *regsize = 8;
 899         ret->ref = NULL;
 900         ret->t = VT_DOUBLE;
 901         *args = (size + 7) >> 3;
 902     } else if (size <= 4) {
 903         *ret_align = 4;
 904         *regsize = 4;
 905         ret->ref = NULL;
 906         ret->t = VT_INT;
 907         *args = 1;
 908     } else
 909         *args = 0;
 910 #else
 911     *args = 0;
 912 #endif
 913
 914     return *args != 0;
 915 }
 916
 917 /* Parameters are classified according to how they are copied to their final
 918    destination for the function call. Because the copying is performed class
 919    after class according to the order in the union below, it is important that
 920    some constraints about the order of the members of this union are respected:
 921    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 922    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 923      VFP_STRUCT_CLASS;
 924    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 925    See the comment for the main loop in copy_params() for the reason. */
 926 enum reg_class {
 927     STACK_CLASS = 0,
 928     CORE_STRUCT_CLASS,
 929     VFP_CLASS,
 930     VFP_STRUCT_CLASS,
 931     CORE_CLASS,
 932     NB_CLASSES
 933 };
 934
 935 struct param_plan {
 936     int start;    /* first reg or addr used depending on the class */
 937     int end;      /* last reg used or next free addr depending on the class */
 938     SValue* sval; /* pointer to SValue on the value stack */
 939     struct param_plan* prev; /*  previous element in this class */
 940 };
 941
 942 struct plan {
 943     struct param_plan* pplans; /* array of all the param plans */
 944     struct param_plan*
 945         clsplans[NB_CLASSES]; /* per class lists of param plans */
 946 };
 947
 948 #define add_param_plan(plan, pplan, class)                                     \
 949     do {                                                                       \
 950         pplan.prev = plan->clsplans[class];                                    \
 951         plan->pplans[plan##_nb] = pplan;                                       \
 952         plan->clsplans[class] = &plan->pplans[plan##_nb++];                    \
 953     } while (0)
 954
 955 /* Assign parameters to registers and stack with alignment according to the
 956    rules in the procedure call standard for the ARM architecture (AAPCS).
 957    The overall assignment is recorded in an array of per parameter structures
 958    called parameter plans. The parameter plans are also further organized in a
 959    number of linked lists, one per class of parameter (see the comment for the
 960    definition of union reg_class).
 961
 962    nb_args: number of parameters of the function for which a call is generated
 963    float_abi: float ABI in use for this function call
 964    plan: the structure where the overall assignment is recorded
 965    todo: a bitmap that record which core registers hold a parameter
 966
 967    Returns the amount of stack space needed for parameter passing
 968
 969    Note: this function allocated an array in plan->pplans with tcc_malloc. It
 970    is the responsibility of the caller to free this array once used (ie not
 971    before copy_params). */
 972 static int assign_regs(int nb_args, int float_abi, struct plan* plan, int* todo)
 973 {
 974     int i, size, align;
 975     int ncrn /* next core register number */,
 976         nsaa /* next stacked argument address*/;
 977     int plan_nb = 0;
 978     struct param_plan pplan;
 979     struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
 980
 981     ncrn = nsaa = 0;
 982     *todo = 0;
 983     plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
 984     memset(plan->clsplans, 0, sizeof(plan->clsplans));
 985     for (i = nb_args; i--;) {
 986         int j, start_vfpreg = 0;
 987         CType type = vtop[-i].type;
 988         type.t &= ~VT_ARRAY;
 989         size = type_size(&type, &align);
 990         size = (size + 3) & ~3;
 991         align = (align + 3) & ~3;
 992         switch (vtop[-i].type.t & VT_BTYPE) {
 993         case VT_STRUCT:
 994         case VT_FLOAT:
 995         case VT_DOUBLE:
 996         case VT_LDOUBLE:
 997             if (float_abi == ARM_HARD_FLOAT) {
 998                 int is_hfa = 0; /* Homogeneous float aggregate */
 999
1000                 if (is_float(vtop[-i].type.t) ||
1001                     (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
1002                     int end_vfpreg;
1003
1004                     start_vfpreg = assign_vfpreg(&avregs, align, size);
1005                     end_vfpreg = start_vfpreg + ((size - 1) >> 2);
1006                     if (start_vfpreg >= 0) {
1007                         pplan = (struct param_plan){start_vfpreg, end_vfpreg,
1008                                                     &vtop[-i]};
1009                         if (is_hfa)
1010                             add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
1011                         else
1012                             add_param_plan(plan, pplan, VFP_CLASS);
1013                         continue;
1014                     } else
1015                         break;
1016                 }
1017             }
1018             ncrn = (ncrn + (align - 1) / 4) & ~((align / 4) - 1);
1019             if (ncrn + size / 4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
1020                 /* The parameter is allocated both in core register and on
1021              * stack. As
1022              * such, it can be of either class: it would either be the last of
1023              * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
1024                 for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
1025                     *todo |= (1 << j);
1026                 pplan = (struct param_plan){ncrn, j, &vtop[-i]};
1027                 add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1028                 ncrn += size / 4;
1029                 if (ncrn > 4)
1030                     nsaa = (ncrn - 4) * 4;
1031             } else {
1032                 ncrn = 4;
1033                 break;
1034             }
1035             continue;
1036         default:
1037             if (ncrn < 4) {
1038                 int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1039
1040                 if (is_long) {
1041                     ncrn = (ncrn + 1) & -2;
1042                     if (ncrn == 4)
1043                         break;
1044                 }
1045                 pplan = (struct param_plan){ncrn, ncrn, &vtop[-i]};
1046                 ncrn++;
1047                 if (is_long)
1048                     pplan.end = ncrn++;
1049                 add_param_plan(plan, pplan, CORE_CLASS);
1050                 continue;
1051             }
1052         }
1053         nsaa = (nsaa + (align - 1)) & ~(align - 1);
1054         pplan = (struct param_plan){nsaa, nsaa + size, &vtop[-i]};
1055         add_param_plan(plan, pplan, STACK_CLASS);
1056         nsaa += size; /* size already rounded up before */
1057     }
1058     return nsaa;
1059 }
1060
1061 #undef add_param_plan
1062
1063 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1064    function call.
1065
1066    nb_args: number of parameters the function take
1067    plan: the overall assignment plan for parameters
1068    todo: a bitmap indicating what core reg will hold a parameter
1069
1070    Returns the number of SValue added by this function on the value stack */
1071 static int copy_params(int nb_args, struct plan* plan, int todo)
1072 {
1073     int size, align, r, i, nb_extra_sval = 0;
1074     struct param_plan* pplan;
1075
1076     /* Several constraints require parameters to be copied in a specific order:
1077        - structures are copied to the stack before being loaded in a reg;
1078        - floats loaded to an odd numbered VFP reg are first copied to the
1079          preceding even numbered VFP reg and then moved to the next VFP reg.
1080
1081        It is thus important that:
1082        - structures assigned to core regs must be copied after parameters
1083          assigned to the stack but before structures assigned to VFP regs
1084          because a structure can lie partly in core registers and partly on
1085          the stack;
1086        - parameters assigned to the stack and all structures be copied before
1087          parameters assigned to a core reg since copying a parameter to the
1088          stack require using a core reg;
1089        - parameters assigned to VFP regs be copied before structures assigned to
1090          VFP regs as the copy might use an even numbered VFP reg that already
1091          holds part of a structure. */
1092     for (i = 0; i < NB_CLASSES; i++) {
1093         for (pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1094             vpushv(pplan->sval);
1095             pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1096             switch (i) {
1097             case STACK_CLASS:
1098             case CORE_STRUCT_CLASS:
1099             case VFP_STRUCT_CLASS:
1100                 if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1101                     int padding = 0;
1102                     size = type_size(&pplan->sval->type, &align);
1103                     /* align to stack align size */
1104                     size = (size + 3) & ~3;
1105                     if (i == STACK_CLASS && pplan->prev)
1106                         padding = pplan->start - pplan->prev->end;
1107                     size += padding; /* Add padding if any */
1108                     /* allocate the necessary size on stack */
1109                     gadd_sp(-size);
1110                     /* generate structure store */
1111                     r = get_reg(RC_INT);
1112                     o(0xE28D0000 | (intr(r) << 12) |
1113                       padding); /* add r, sp, padding */
1114                     vset(&vtop->type, r | VT_LVAL, 0);
1115                     vswap();
1116                     vstore(); /* memcpy to current sp + potential padding */
1117
1118                     /* Homogeneous float aggregate are loaded to VFP registers
1119                        immediately since there is no way of loading data in
1120                        multiple non consecutive VFP registers as what is done
1121                        for other structures (see the use of todo). */
1122                     if (i == VFP_STRUCT_CLASS) {
1123                         int first = pplan->start, nb = pplan->end - first + 1;
1124                         /* vpop.32 {pplan->start, ..., pplan->end} */
1125                         o(0xECBD0A00 | (first & 1) << 22 | (first >> 1) << 12 |
1126                           nb);
1127                         /* No need to write the register used to a SValue since
1128                            VFP regs cannot be used for gcall_or_jmp */
1129                     }
1130                 } else {
1131                     if (is_float(pplan->sval->type.t)) {
1132 #ifdef TCC_ARM_VFP
1133                         r = vfpr(gv(RC_FLOAT)) << 12;
1134                         if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1135                             size = 4;
1136                         else {
1137                             size = 8;
1138                             r |= 0x101; /* vpush.32 -> vpush.64 */
1139                         }
1140                         o(0xED2D0A01 + r); /* vpush */
1141 #else
1142                         r = fpr(gv(RC_FLOAT)) << 12;
1143                         if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1144                             size = 4;
1145                         else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1146                             size = 8;
1147                         else
1148                             size = LDOUBLE_SIZE;
1149
1150                         if (size == 12)
1151                             r |= 0x400000;
1152                         else if (size == 8)
1153                             r |= 0x8000;
1154
1155                         o(0xED2D0100 | r | (size >> 2)); /* some kind of vpush for FPA */
1156 #endif
1157                     } else {
1158                         /* simple type (currently always same size) */
1159                         /* XXX: implicit cast ? */
1160                         size = 4;
1161                         if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1162                             lexpand_nr();
1163                             size = 8;
1164                             r = gv(RC_INT);
1165                             o(0xE52D0004 | (intr(r) << 12)); /* push r */
1166                             vtop--;
1167                         }
1168                         r = gv(RC_INT);
1169                         o(0xE52D0004 | (intr(r) << 12)); /* push r */
1170                     }
1171                     if (i == STACK_CLASS && pplan->prev)
1172                         gadd_sp(pplan->prev->end -
1173                                 pplan->start); /* Add padding if any */
1174                 }
1175                 break;
1176
1177             case VFP_CLASS:
1178                 gv(regmask(TREG_F0 + (pplan->start >> 1)));
1179                 if (pplan->start &
1180                     1) { /* Must be in upper part of double register */
1181                     o(0xEEF00A40 | ((pplan->start >> 1) << 12) |
1182                       (pplan->start >> 1)); /* vmov.f32 s(n+1), sn */
1183                     vtop->r =
1184                         VT_CONST; /* avoid being saved on stack by gv for next
1185                                      float */
1186                 }
1187                 break;
1188
1189             case CORE_CLASS:
1190                 if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1191                     lexpand_nr();
1192                     gv(regmask(pplan->end));
1193                     pplan->sval->r2 = vtop->r;
1194                     vtop--;
1195                 }
1196                 gv(regmask(pplan->start));
1197                 /* Mark register as used so that gcall_or_jmp use another one
1198                    (regs >=4 are free as never used to pass parameters) */
1199                 pplan->sval->r = vtop->r;
1200                 break;
1201             }
1202             vtop--;
1203         }
1204     }
1205
1206     /* Manually free remaining registers since next parameters are loaded
1207      * manually, without the help of gv(int). */
1208     save_regs(nb_args);
1209
1210     if (todo) {
1211         o(0xE8BD0000 | todo); /* pop {todo} */
1212         for (pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan;
1213              pplan = pplan->prev) {
1214             int r;
1215             pplan->sval->r = pplan->start;
1216             /* An SValue can only pin 2 registers at best (r and r2) but a
1217               structure can occupy more than 2 registers. Thus, we need to
1218               push on the value stack some fake parameter to have on SValue
1219               for each registers used by a structure (r2 is not used). */
1220             for (r = pplan->start + 1; r <= pplan->end; r++) {
1221                 if (todo & (1 << r)) {
1222                     nb_extra_sval++;
1223                     vpushi(0);
1224                     vtop->r = r;
1225                 }
1226             }
1227         }
1228     }
1229     return nb_extra_sval;
1230 }
1231
1232 /* Generate function call. The function address is pushed first, then
1233    all the parameters in call order. This functions pops all the
1234    parameters and the function address. */
1235 void gfunc_call(int nb_args)
1236 {
1237     int r, args_size;
1238     int def_float_abi = float_abi;
1239     int todo;
1240     struct plan plan;
1241
1242 #ifdef TCC_ARM_EABI
1243     int variadic;
1244
1245     if (float_abi == ARM_HARD_FLOAT) {
1246         variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1247         if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1248             float_abi = ARM_SOFTFP_FLOAT;
1249     }
1250 #endif
1251     /* cannot let cpu flags if other instruction are generated. Also avoid
1252        leaving VT_JMP anywhere except on the top of the stack because it
1253        would complicate the code generator. */
1254     r = vtop->r & VT_VALMASK;
1255     if (r == VT_CMP || (r & ~1) == VT_JMP)
1256         gv(RC_INT);
1257
1258     args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1259
1260 #ifdef TCC_ARM_EABI
1261     if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1262         args_size = (args_size + 7) & ~7;
1263         o(0xE24DD004); /* sub sp, sp, #4 */
1264     }
1265 #endif
1266
1267     nb_args += copy_params(nb_args, &plan, todo);
1268     tcc_free(plan.pplans);
1269
1270     /* Move fct SValue on top as required by gcall_or_jmp */
1271     vrotb(nb_args + 1);
1272     gcall_or_jmp(0);
1273     if (args_size)
1274         gadd_sp(args_size); /* pop all parameters passed on the stack */
1275 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1276     if (float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1277         if ((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1278             o(0xEE000A10); /*vmov s0, r0 */
1279         } else {
1280             o(0xEE000B10); /* vmov.32 d0[0], r0 */
1281             o(0xEE201B10); /* vmov.32 d0[1], r1 */
1282         }
1283     }
1284 #endif
1285     vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1286     leaffunc =
1287         0; /* we are calling a function, so we aren't in a leaf function */
1288     float_abi = def_float_abi;
1289 }
1290
1291 /* generate function prolog of type 't' */
1292 void gfunc_prolog(CType* func_type)
1293 {
1294     Sym* sym, *sym2;
1295     int n, nf, size, align, rs, struct_ret = 0;
1296     int addr, pn, sn; /* pn=core, sn=stack */
1297     CType ret_type;
1298     RegArgs dummy;
1299
1300 #ifdef TCC_ARM_EABI
1301     struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1302 #endif
1303
1304     sym = func_type->ref;
1305     func_vt = sym->type;
1306     func_var = (func_type->ref->c == FUNC_ELLIPSIS);
1307
1308     n = nf = 0;
1309     if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1310         !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs, &dummy)) {
1311         n++;
1312         struct_ret = 1;
1313         func_vc = 12; /* Offset from fp of the place to store the result */
1314     }
1315     for (sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1316         size = type_size(&sym2->type, &align);
1317 #ifdef TCC_ARM_EABI
1318         if (float_abi == ARM_HARD_FLOAT && !func_var &&
1319             (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1320             int tmpnf = assign_vfpreg(&avregs, align, size);
1321             tmpnf += (size + 3) / 4;
1322             nf = (tmpnf > nf) ? tmpnf : nf;
1323         } else
1324 #endif
1325             if (n < 4)
1326             n += (size + 3) / 4;
1327     }
1328     o(0xE1A0C00D); /* mov ip,sp */
1329     if (func_var)
1330         n = 4;
1331     if (n) {
1332         if (n > 4)
1333             n = 4;
1334 #ifdef TCC_ARM_EABI
1335         n = (n + 1) & -2;
1336 #endif
1337         o(0xE92D0000 | ((1 << n) - 1)); /* save r0-r4 on stack if needed */
1338     }
1339     if (nf) {
1340         if (nf > 16)
1341             nf = 16;
1342         nf = (nf + 1) & -2; /* nf => HARDFLOAT => EABI */
1343         o(0xED2D0A00 | nf); /* save s0-s15 on stack if needed */
1344     }
1345     o(0xE92D5800); /* save fp, ip, lr */
1346     o(0xE1A0B00D); /* mov fp, sp */
1347     func_sub_sp_offset = ind;
1348     o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1349
1350 #ifdef TCC_ARM_EABI
1351     if (float_abi == ARM_HARD_FLOAT) {
1352         func_vc += nf * 4;
1353         avregs = AVAIL_REGS_INITIALIZER;
1354     }
1355 #endif
1356     pn = struct_ret, sn = 0;
1357     while ((sym = sym->next)) {
1358         CType* type;
1359         type = &sym->type;
1360         size = type_size(type, &align);
1361         size = (size + 3) >> 2;
1362         align = (align + 3) & ~3;
1363 #ifdef TCC_ARM_EABI
1364         if (float_abi == ARM_HARD_FLOAT && !func_var &&
1365             (is_float(sym->type.t) || is_hgen_float_aggr(&sym->type))) {
1366             int fpn = assign_vfpreg(&avregs, align, size << 2);
1367             if (fpn >= 0)
1368                 addr = fpn * 4;
1369             else
1370                 goto from_stack;
1371         } else
1372 #endif
1373             if (pn < 4) {
1374 #ifdef TCC_ARM_EABI
1375             pn = (pn + (align - 1) / 4) & -(align / 4);
1376 #endif
1377             addr = (nf + pn) * 4;
1378             pn += size;
1379             if (!sn && pn > 4)
1380                 sn = (pn - 4);
1381         } else {
1382 #ifdef TCC_ARM_EABI
1383         from_stack:
1384             sn = (sn + (align - 1) / 4) & -(align / 4);
1385 #endif
1386             addr = (n + nf + sn) * 4;
1387             sn += size;
1388         }
1389         sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1390                  addr + 12);
1391     }
1392     last_itod_magic = 0;
1393     leaffunc = 1;
1394     loc = 0;
1395 }
1396
1397 /* generate function epilog */
1398 void gfunc_epilog(void)
1399 {
1400     uint32_t x;
1401     int diff;
1402 /* Copy float return value to core register if base standard is used and
1403    float computation is made with VFP */
1404 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1405     if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1406         if ((func_vt.t & VT_BTYPE) == VT_FLOAT)
1407             o(0xEE100A10); /* fmrs r0, s0 */
1408         else {
1409             o(0xEE100B10); /* fmrdl r0, d0 */
1410             o(0xEE301B10); /* fmrdh r1, d0 */
1411         }
1412     }
1413 #endif
1414     o(0xE89BA800); /* restore fp, sp, pc */
1415     diff = (-loc + 3) & -4;
1416 #ifdef TCC_ARM_EABI
1417     if (!leaffunc)
1418         diff = ((diff + 11) & -8) - 4;
1419 #endif
1420     if (diff > 0) {
1421         x = stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1422         if (x)
1423             *(uint32_t*)(cur_text_section->data + func_sub_sp_offset) = x;
1424         else {
1425             int addr;
1426             addr = ind;
1427             o(0xE59FC004); /* ldr ip,[pc+4] */
1428             o(0xE04BD00C); /* sub sp,fp,ip  */
1429             o(0xE1A0F00E); /* mov pc,lr */
1430             o(diff);
1431             *(uint32_t*)(cur_text_section->data + func_sub_sp_offset) =
1432                 0xE1000000 | encbranch(func_sub_sp_offset, addr, 1);
1433         }
1434     }
1435 }
1436
1437 /* generate a jump to a label */
1438 int gjmp(int t)
1439 {
1440     int r;
1441     r = ind;
1442     o(0xE0000000 | encbranch(r, t, 1));
1443     return r;
1444 }
1445
1446 /* generate a jump to a fixed address */
1447 void gjmp_addr(int a)
1448 {
1449     gjmp(a);
1450 }
1451
1452 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1453 int gtst(int inv, int t)
1454 {
1455     int v, r;
1456     uint32_t op;
1457     v = vtop->r & VT_VALMASK;
1458     r = ind;
1459     if (v == VT_CMP) {
1460         op = mapcc(inv ? negcc(vtop->c.i) : vtop->c.i);
1461         op |= encbranch(r, t, 1);
1462         o(op);
1463         t = r;
1464     } else if (v == VT_JMP || v == VT_JMPI) {
1465         if ((v & 1) == inv) {
1466             if (!vtop->c.i)
1467                 vtop->c.i = t;
1468             else {
1469                 uint32_t* x;
1470                 int p, lp;
1471                 if (t) {
1472                     p = vtop->c.i;
1473                     do {
1474                         p = decbranch(lp = p);
1475                     } while (p);
1476                     x = (uint32_t*)(cur_text_section->data + lp);
1477                     *x &= 0xff000000;
1478                     *x |= encbranch(lp, t, 1);
1479                 }
1480                 t = vtop->c.i;
1481             }
1482         } else {
1483             t = gjmp(t);
1484             gsym(vtop->c.i);
1485         }
1486     }
1487     vtop--;
1488     return t;
1489 }
1490
1491 /* generate an integer binary operation */
1492 void gen_opi(int op)
1493 {
1494     int c, func = 0;
1495     uint32_t opc = 0, r, fr;
1496     unsigned short retreg = REG_IRET;
1497
1498     c = 0;
1499     switch (op) {
1500     case '+':
1501         opc = 0x8;
1502         c = 1;
1503         break;
1504     case TOK_ADDC1: /* add with carry generation */
1505         opc = 0x9;
1506         c = 1;
1507         break;
1508     case '-':
1509         opc = 0x4;
1510         c = 1;
1511         break;
1512     case TOK_SUBC1: /* sub with carry generation */
1513         opc = 0x5;
1514         c = 1;
1515         break;
1516     case TOK_ADDC2: /* add with carry use */
1517         opc = 0xA;
1518         c = 1;
1519         break;
1520     case TOK_SUBC2: /* sub with carry use */
1521         opc = 0xC;
1522         c = 1;
1523         break;
1524     case '&':
1525         opc = 0x0;
1526         c = 1;
1527         break;
1528     case '^':
1529         opc = 0x2;
1530         c = 1;
1531         break;
1532     case '|':
1533         opc = 0x18;
1534         c = 1;
1535         break;
1536     case '*':
1537         gv2(RC_INT, RC_INT);
1538         r = vtop[-1].r;
1539         fr = vtop[0].r;
1540         vtop--;
1541         o(0xE0000090 | (intr(r) << 16) | (intr(r) << 8) | intr(fr));
1542         return;
1543     case TOK_SHL:
1544         opc = 0;
1545         c = 2;
1546         break;
1547     case TOK_SHR:
1548         opc = 1;
1549         c = 2;
1550         break;
1551     case TOK_SAR:
1552         opc = 2;
1553         c = 2;
1554         break;
1555     case '/':
1556     case TOK_PDIV:
1557         func = TOK___divsi3;
1558         c = 3;
1559         break;
1560     case TOK_UDIV:
1561         func = TOK___udivsi3;
1562         c = 3;
1563         break;
1564     case '%':
1565 #ifdef TCC_ARM_EABI
1566         func = TOK___aeabi_idivmod;
1567         retreg = REG_LRET;
1568 #else
1569         func = TOK___modsi3;
1570 #endif
1571         c = 3;
1572         break;
1573     case TOK_UMOD:
1574 #ifdef TCC_ARM_EABI
1575         func = TOK___aeabi_uidivmod;
1576         retreg = REG_LRET;
1577 #else
1578         func = TOK___umodsi3;
1579 #endif
1580         c = 3;
1581         break;
1582     case TOK_UMULL:
1583         gv2(RC_INT, RC_INT);
1584         r = intr(vtop[-1].r2 = get_reg(RC_INT));
1585         c = vtop[-1].r;
1586         vtop[-1].r = get_reg_ex(RC_INT, regmask(c));
1587         vtop--;
1588         o(0xE0800090 | (r << 16) | (intr(vtop->r) << 12) | (intr(c) << 8) |
1589           intr(vtop[1].r));
1590         return;
1591     default:
1592         opc = 0x15;
1593         c = 1;
1594         break;
1595     }
1596     switch (c) {
1597     case 1:
1598         if ((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1599             if (opc == 4 || opc == 5 || opc == 0xc) {
1600                 vswap();
1601                 opc |= 2; // sub -> rsb
1602             }
1603         }
1604         if ((vtop->r & VT_VALMASK) == VT_CMP ||
1605             (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1606             gv(RC_INT);
1607         vswap();
1608         c = intr(gv(RC_INT));
1609         vswap();
1610         opc = 0xE0000000 | (opc << 20) | (c << 16);
1611         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1612             uint32_t x;
1613             x = stuff_const(opc | 0x2000000, vtop->c.i);
1614             if (x) {
1615                 r = intr(vtop[-1].r = get_reg_ex(RC_INT, regmask(vtop[-1].r)));
1616                 o(x | (r << 12));
1617                 goto done;
1618             }
1619         }
1620         fr = intr(gv(RC_INT));
1621         r = intr(vtop[-1].r =
1622                      get_reg_ex(RC_INT, two2mask(vtop->r, vtop[-1].r)));
1623         o(opc | (r << 12) | fr);
1624     done:
1625         vtop--;
1626         if (op >= TOK_ULT && op <= TOK_GT) {
1627             vtop->r = VT_CMP;
1628             vtop->c.i = op;
1629         }
1630         break;
1631     case 2:
1632         opc = 0xE1A00000 | (opc << 5);
1633         if ((vtop->r & VT_VALMASK) == VT_CMP ||
1634             (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1635             gv(RC_INT);
1636         vswap();
1637         r = intr(gv(RC_INT));
1638         vswap();
1639         opc |= r;
1640         if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1641             fr = intr(vtop[-1].r = get_reg_ex(RC_INT, regmask(vtop[-1].r)));
1642             c = vtop->c.i & 0x1f;
1643             o(opc | (c << 7) | (fr << 12));
1644         } else {
1645             fr = intr(gv(RC_INT));
1646             c = intr(vtop[-1].r =
1647                          get_reg_ex(RC_INT, two2mask(vtop->r, vtop[-1].r)));
1648             o(opc | (c << 12) | (fr << 8) | 0x10);
1649         }
1650         vtop--;
1651         break;
1652     case 3:
1653         vpush_global_sym(&func_old_type, func);
1654         vrott(3);
1655         gfunc_call(2);
1656         vpushi(0);
1657         vtop->r = retreg;
1658         break;
1659     default:
1660         tcc_error("gen_opi %i unimplemented!", op);
1661     }
1662 }
1663
1664 #ifdef TCC_ARM_VFP
1665 static int is_zero(int i)
1666 {
1667     if ((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1668         return 0;
1669     if (vtop[i].type.t == VT_FLOAT)
1670         return (vtop[i].c.f == 0.f);
1671     else if (vtop[i].type.t == VT_DOUBLE)
1672         return (vtop[i].c.d == 0.0);
1673     return (vtop[i].c.ld == 0.l);
1674 }
1675
1676 /* generate a floating point operation 'v = t1 op t2' instruction. The
1677  *    two operands are guaranted to have the same floating point type */
1678 void gen_opf(int op)
1679 {
1680     uint32_t x;
1681     int fneg = 0, r;
1682     x = 0xEE000A00 | T2CPR(vtop->type.t);
1683     switch (op) {
1684     case '+':
1685         if (is_zero(-1))
1686             vswap();
1687         if (is_zero(0)) {
1688             vtop--;
1689             return;
1690         }
1691         x |= 0x300000;
1692         break;
1693     case '-':
1694         x |= 0x300040;
1695         if (is_zero(0)) {
1696             vtop--;
1697             return;
1698         }
1699         if (is_zero(-1)) {
1700             x |= 0x810000; /* fsubX -> fnegX */
1701             vswap();
1702             vtop--;
1703             fneg = 1;
1704         }
1705         break;
1706     case '*':
1707         x |= 0x200000;
1708         break;
1709     case '/':
1710         x |= 0x800000;
1711         break;
1712     default:
1713         if (op < TOK_ULT || op > TOK_GT) {
1714             tcc_error("unknown fp op %x!", op);
1715             return;
1716         }
1717         if (is_zero(-1)) {
1718             vswap();
1719         switch(op) {
1720           case TOK_LT: op = TOK_GT; break;
1721           case TOK_GE: op = TOK_ULE; break;
1722           case TOK_LE: op = TOK_GE; break;
1723           case TOK_GT: op = TOK_ULT; break;
1724         }
1725         }
1726         x |= 0xB40040; /* fcmpX */
1727         if (op != TOK_EQ && op != TOK_NE)
1728             x |= 0x80; /* fcmpX -> fcmpeX */
1729         if (is_zero(0)) {
1730             vtop--;
1731             o(x | 0x10000 |
1732               (vfpr(gv(RC_FLOAT)) << 12)); /* fcmp(e)X -> fcmp(e)zX */
1733         } else {
1734             x |= vfpr(gv(RC_FLOAT));
1735             vswap();
1736             o(x | (vfpr(gv(RC_FLOAT)) << 12));
1737             vtop--;
1738         }
1739         o(0xEEF1FA10); /* fmstat */
1740
1741       switch(op) {
1742         case TOK_LE:  op = TOK_ULE; break;
1743         case TOK_LT:  op = TOK_ULT; break;
1744         case TOK_UGE: op = TOK_GE; break;
1745         case TOK_UGT: op = TOK_GT; break;
1746       }
1747
1748         vtop->r = VT_CMP;
1749         vtop->c.i = op;
1750         return;
1751     }
1752     r = gv(RC_FLOAT);
1753     x |= vfpr(r);
1754     r = regmask(r);
1755     if (!fneg) {
1756         int r2;
1757         vswap();
1758         r2 = gv(RC_FLOAT);
1759         x |= vfpr(r2) << 16;
1760         r |= regmask(r2);
1761     }
1762     vtop->r = get_reg_ex(RC_FLOAT, r);
1763     if (!fneg)
1764         vtop--;
1765     o(x | (vfpr(vtop->r) << 12));
1766 }
1767
1768 #else
1769 static uint32_t is_fconst()
1770 {
1771     long double f;
1772     uint32_t r;
1773     if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1774         return 0;
1775     if (vtop->type.t == VT_FLOAT)
1776         f = vtop->c.f;
1777     else if (vtop->type.t == VT_DOUBLE)
1778         f = vtop->c.d;
1779     else
1780         f = vtop->c.ld;
1781     if (!ieee_finite(f))
1782         return 0;
1783     r = 0x8;
1784     if (f < 0.0) {
1785         r = 0x18;
1786         f = -f;
1787     }
1788     if (f == 0.0)
1789         return r;
1790     if (f == 1.0)
1791         return r | 1;
1792     if (f == 2.0)
1793         return r | 2;
1794     if (f == 3.0)
1795         return r | 3;
1796     if (f == 4.0)
1797         return r | 4;
1798     if (f == 5.0)
1799         return r | 5;
1800     if (f == 0.5)
1801         return r | 6;
1802     if (f == 10.0)
1803         return r | 7;
1804     return 0;
1805 }
1806
1807 /* generate a floating point operation 'v = t1 op t2' instruction. The
1808    two operands are guaranted to have the same floating point type */
1809 void gen_opf(int op)
1810 {
1811     uint32_t x, r, r2, c1, c2;
1812     // fputs("gen_opf\n",stderr);
1813     vswap();
1814     c1 = is_fconst();
1815     vswap();
1816     c2 = is_fconst();
1817     x = 0xEE000100;
1818 #if LDOUBLE_SIZE == 8
1819     if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1820         x |= 0x80;
1821 #else
1822     if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1823         x |= 0x80;
1824     else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1825         x |= 0x80000;
1826 #endif
1827     switch (op) {
1828     case '+':
1829         if (!c2) {
1830             vswap();
1831             c2 = c1;
1832         }
1833         vswap();
1834         r = fpr(gv(RC_FLOAT));
1835         vswap();
1836         if (c2) {
1837             if (c2 > 0xf)
1838                 x |= 0x200000; // suf
1839             r2 = c2 & 0xf;
1840         } else {
1841             r2 = fpr(gv(RC_FLOAT));
1842         }
1843         break;
1844     case '-':
1845         if (c2) {
1846             if (c2 <= 0xf)
1847                 x |= 0x200000; // suf
1848             r2 = c2 & 0xf;
1849             vswap();
1850             r = fpr(gv(RC_FLOAT));
1851             vswap();
1852         } else if (c1 && c1 <= 0xf) {
1853             x |= 0x300000; // rsf
1854             r2 = c1;
1855             r = fpr(gv(RC_FLOAT));
1856             vswap();
1857         } else {
1858             x |= 0x200000; // suf
1859             vswap();
1860             r = fpr(gv(RC_FLOAT));
1861             vswap();
1862             r2 = fpr(gv(RC_FLOAT));
1863         }
1864         break;
1865     case '*':
1866         if (!c2 || c2 > 0xf) {
1867             vswap();
1868             c2 = c1;
1869         }
1870         vswap();
1871         r = fpr(gv(RC_FLOAT));
1872         vswap();
1873         if (c2 && c2 <= 0xf)
1874             r2 = c2;
1875         else
1876             r2 = fpr(gv(RC_FLOAT));
1877         x |= 0x100000; // muf
1878         break;
1879     case '/':
1880         if (c2 && c2 <= 0xf) {
1881             x |= 0x400000; // dvf
1882             r2 = c2;
1883             vswap();
1884             r = fpr(gv(RC_FLOAT));
1885             vswap();
1886         } else if (c1 && c1 <= 0xf) {
1887             x |= 0x500000; // rdf
1888             r2 = c1;
1889             r = fpr(gv(RC_FLOAT));
1890             vswap();
1891         } else {
1892             x |= 0x400000; // dvf
1893             vswap();
1894             r = fpr(gv(RC_FLOAT));
1895             vswap();
1896             r2 = fpr(gv(RC_FLOAT));
1897         }
1898         break;
1899     default:
1900         if (op >= TOK_ULT && op <= TOK_GT) {
1901             x |= 0xd0f110; // cmfe
1902                            /* bug (intention?) in Linux FPU emulator
1903                               doesn't set carry if equal */
1904             switch (op) {
1905             case TOK_ULT:
1906             case TOK_UGE:
1907             case TOK_ULE:
1908             case TOK_UGT:
1909                 tcc_error("unsigned comparison on floats?");
1910                 break;
1911             case TOK_LT:
1912                 op = TOK_Nset;
1913                 break;
1914             case TOK_LE:
1915                 op = TOK_ULE; /* correct in unordered case only if AC bit in
1916                                  FPSR set */
1917                 break;
1918             case TOK_EQ:
1919             case TOK_NE:
1920                 x &= ~0x400000; // cmfe -> cmf
1921                 break;
1922             }
1923             if (c1 && !c2) {
1924                 c2 = c1;
1925                 vswap();
1926                 switch (op) {
1927                 case TOK_Nset:
1928                     op = TOK_GT;
1929                     break;
1930                 case TOK_GE:
1931                     op = TOK_ULE;
1932                     break;
1933                 case TOK_ULE:
1934                     op = TOK_GE;
1935                     break;
1936                 case TOK_GT:
1937                     op = TOK_Nset;
1938                     break;
1939                 }
1940             }
1941             vswap();
1942             r = fpr(gv(RC_FLOAT));
1943             vswap();
1944             if (c2) {
1945                 if (c2 > 0xf)
1946                     x |= 0x200000;
1947                 r2 = c2 & 0xf;
1948             } else {
1949                 r2 = fpr(gv(RC_FLOAT));
1950             }
1951             vtop[-1].r = VT_CMP;
1952             vtop[-1].c.i = op;
1953         } else {
1954             tcc_error("unknown fp op %x!", op);
1955             return;
1956         }
1957     }
1958     if (vtop[-1].r == VT_CMP)
1959         c1 = 15;
1960     else {
1961         c1 = vtop->r;
1962         if (r2 & 0x8)
1963             c1 = vtop[-1].r;
1964         vtop[-1].r = get_reg_ex(RC_FLOAT, two2mask(vtop[-1].r, c1));
1965         c1 = fpr(vtop[-1].r);
1966     }
1967     vtop--;
1968     o(x | (r << 16) | (c1 << 12) | r2);
1969 }
1970 #endif
1971
1972 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1973    and 'long long' cases. */
1974 ST_FUNC void gen_cvt_itof1(int t)
1975 {
1976     uint32_t r, r2;
1977     int bt;
1978     bt = vtop->type.t & VT_BTYPE;
1979     if (bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1980 #ifndef TCC_ARM_VFP
1981         uint32_t dsize = 0;
1982 #endif
1983         r = intr(gv(RC_INT));
1984 #ifdef TCC_ARM_VFP
1985         r2 = vfpr(vtop->r = get_reg(RC_FLOAT));
1986         o(0xEE000A10 | (r << 12) | (r2 << 16)); /* fmsr */
1987         r2 |= r2 << 12;
1988         if (!(vtop->type.t & VT_UNSIGNED))
1989             r2 |= 0x80;                /* fuitoX -> fsituX */
1990         o(0xEEB80A40 | r2 | T2CPR(t)); /* fYitoX*/
1991 #else
1992         r2 = fpr(vtop->r = get_reg(RC_FLOAT));
1993         if ((t & VT_BTYPE) != VT_FLOAT)
1994             dsize = 0x80;                               /* flts -> fltd */
1995         o(0xEE000110 | dsize | (r2 << 16) | (r << 12)); /* flts */
1996         if ((vtop->type.t & (VT_UNSIGNED | VT_BTYPE)) ==
1997             (VT_UNSIGNED | VT_INT)) {
1998             uint32_t off = 0;
1999             o(0xE3500000 | (r << 12)); /* cmp */
2000             r = fpr(get_reg(RC_FLOAT));
2001             if (last_itod_magic) {
2002                 off = ind + 8 - last_itod_magic;
2003                 off /= 4;
2004                 if (off > 255)
2005                     off = 0;
2006             }
2007             o(0xBD1F0100 | (r << 12) | off); /* ldflts */
2008             if (!off) {
2009                 o(0xEA000000); /* b */
2010                 last_itod_magic = ind;
2011                 o(0x4F800000); /* 4294967296.0f */
2012             }
2013             o(0xBE000100 | dsize | (r2 << 16) | (r2 << 12) | r); /* adflt */
2014         }
2015 #endif
2016         return;
2017     } else if (bt == VT_LLONG) {
2018         int func;
2019         CType* func_type = 0;
2020         if ((t & VT_BTYPE) == VT_FLOAT) {
2021             func_type = &func_float_type;
2022             if (vtop->type.t & VT_UNSIGNED)
2023                 func = TOK___floatundisf;
2024             else
2025                 func = TOK___floatdisf;
2026 #if LDOUBLE_SIZE != 8
2027         } else if ((t & VT_BTYPE) == VT_LDOUBLE) {
2028             func_type = &func_ldouble_type;
2029             if (vtop->type.t & VT_UNSIGNED)
2030                 func = TOK___floatundixf;
2031             else
2032                 func = TOK___floatdixf;
2033         } else if ((t & VT_BTYPE) == VT_DOUBLE) {
2034 #else
2035         } else if ((t & VT_BTYPE) == VT_DOUBLE ||
2036                    (t & VT_BTYPE) == VT_LDOUBLE) {
2037 #endif
2038             func_type = &func_double_type;
2039             if (vtop->type.t & VT_UNSIGNED)
2040                 func = TOK___floatundidf;
2041             else
2042                 func = TOK___floatdidf;
2043         }
2044         if (func_type) {
2045             vpush_global_sym(func_type, func);
2046             vswap();
2047             gfunc_call(1);
2048             vpushi(0);
2049             vtop->r = TREG_F0;
2050             return;
2051         }
2052     }
2053     tcc_error("unimplemented gen_cvt_itof %x!", vtop->type.t);
2054 }
2055
2056 /* convert fp to int 't' type */
2057 void gen_cvt_ftoi(int t)
2058 {
2059     uint32_t r, r2;
2060     int u, func = 0;
2061     u = t & VT_UNSIGNED;
2062     t &= VT_BTYPE;
2063     r2 = vtop->type.t & VT_BTYPE;
2064     if (t == VT_INT) {
2065 #ifdef TCC_ARM_VFP
2066         r = vfpr(gv(RC_FLOAT));
2067         u = u ? 0 : 0x10000;
2068         o(0xEEBC0AC0 | (r << 12) | r | T2CPR(r2) | u); /* ftoXizY */
2069         r2 = intr(vtop->r = get_reg(RC_INT));
2070         o(0xEE100A10 | (r << 16) | (r2 << 12));
2071         return;
2072 #else
2073         if (u) {
2074             if (r2 == VT_FLOAT)
2075                 func = TOK___fixunssfsi;
2076 #if LDOUBLE_SIZE != 8
2077             else if (r2 == VT_LDOUBLE)
2078                 func = TOK___fixunsxfsi;
2079             else if (r2 == VT_DOUBLE)
2080 #else
2081             else if (r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2082 #endif
2083                 func = TOK___fixunsdfsi;
2084         } else {
2085             r = fpr(gv(RC_FLOAT));
2086             r2 = intr(vtop->r = get_reg(RC_INT));
2087             o(0xEE100170 | (r2 << 12) | r);
2088             return;
2089         }
2090 #endif
2091     } else if (t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2092         if (r2 == VT_FLOAT)
2093             func = TOK___fixsfdi;
2094 #if LDOUBLE_SIZE != 8
2095         else if (r2 == VT_LDOUBLE)
2096             func = TOK___fixxfdi;
2097         else if (r2 == VT_DOUBLE)
2098 #else
2099         else if (r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2100 #endif
2101             func = TOK___fixdfdi;
2102     }
2103     if (func) {
2104         vpush_global_sym(&func_old_type, func);
2105         vswap();
2106         gfunc_call(1);
2107         vpushi(0);
2108         if (t == VT_LLONG)
2109             vtop->r2 = REG_LRET;
2110         vtop->r = REG_IRET;
2111         return;
2112     }
2113     tcc_error("unimplemented gen_cvt_ftoi!");
2114 }
2115
2116 /* convert from one floating point type to another */
2117 void gen_cvt_ftof(int t)
2118 {
2119 #ifdef TCC_ARM_VFP
2120     if (((vtop->type.t & VT_BTYPE) == VT_FLOAT) !=
2121         ((t & VT_BTYPE) == VT_FLOAT)) {
2122         uint32_t r = vfpr(gv(RC_FLOAT));
2123         o(0xEEB70AC0 | (r << 12) | r | T2CPR(vtop->type.t));
2124     }
2125 #else
2126     /* all we have to do on i386 and FPA ARM is to put the float in a register
2127      */
2128     gv(RC_FLOAT);
2129 #endif
2130 }
2131
2132 /* computed goto support */
2133 void ggoto(void)
2134 {
2135     gcall_or_jmp(1);
2136     vtop--;
2137 }
2138
2139 /* Save the stack pointer onto the stack and return the location of its address
2140  */
2141 ST_FUNC void gen_vla_sp_save(int addr)
2142 {
2143     tcc_error("variable length arrays unsupported for this target");
2144 }
2145
2146 /* Restore the SP from a location on the stack */
2147 ST_FUNC void gen_vla_sp_restore(int addr)
2148 {
2149     tcc_error("variable length arrays unsupported for this target");
2150 }
2151
2152 /* Subtract from the stack pointer, and push the resulting value onto the stack
2153  */
2154 ST_FUNC void gen_vla_alloc(CType* type, int align)
2155 {
2156     tcc_error("variable length arrays unsupported for this target");
2157 }
2158
2159 /* end of ARM code generator */
2160 /*************************************************************/
2161 #endif
2162 /*************************************************************/