libffi/src/powerpc/ffi_linux64.c

   1 /* -----------------------------------------------------------------------
   2    ffi_linux64.c - Copyright (C) 2013 IBM
   3                    Copyright (C) 2011 Anthony Green
   4                    Copyright (C) 2011 Kyle Moffett
   5                    Copyright (C) 2008 Red Hat, Inc
   6                    Copyright (C) 2007, 2008 Free Software Foundation, Inc
   7                    Copyright (c) 1998 Geoffrey Keating
   8
   9    PowerPC Foreign Function Interface
  10
  11    Permission is hereby granted, free of charge, to any person obtaining
  12    a copy of this software and associated documentation files (the
  13    ``Software''), to deal in the Software without restriction, including
  14    without limitation the rights to use, copy, modify, merge, publish,
  15    distribute, sublicense, and/or sell copies of the Software, and to
  16    permit persons to whom the Software is furnished to do so, subject to
  17    the following conditions:
  18
  19    The above copyright notice and this permission notice shall be included
  20    in all copies or substantial portions of the Software.
  21
  22    THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
  23    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  24    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  25    IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
  26    OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  27    ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  28    OTHER DEALINGS IN THE SOFTWARE.
  29    ----------------------------------------------------------------------- */
  30
  31 #include "ffi.h"
  32
  33 #ifdef POWERPC64
  34 #include "ffi_common.h"
  35 #include "ffi_powerpc.h"
  36
  37
  38 /* About the LINUX64 ABI.  */
  39 enum {
  40   NUM_GPR_ARG_REGISTERS64 = 8,
  41   NUM_FPR_ARG_REGISTERS64 = 13,
  42   NUM_VEC_ARG_REGISTERS64 = 12,
  43 };
  44 enum { ASM_NEEDS_REGISTERS64 = 4 };
  45
  46
  47 #if HAVE_LONG_DOUBLE_VARIANT && FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
  48 /* Adjust size of ffi_type_longdouble.  */
  49 void FFI_HIDDEN
  50 ffi_prep_types_linux64 (ffi_abi abi)
  51 {
  52   if ((abi & (FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128)) == FFI_LINUX)
  53     {
  54       ffi_type_longdouble.size = 8;
  55       ffi_type_longdouble.alignment = 8;
  56     }
  57   else
  58     {
  59       ffi_type_longdouble.size = 16;
  60       ffi_type_longdouble.alignment = 16;
  61     }
  62 }
  63 #endif
  64
  65
  66 static unsigned int
  67 discover_homogeneous_aggregate (ffi_abi abi,
  68                                 const ffi_type *t,
  69                                 unsigned int *elnum)
  70 {
  71   switch (t->type)
  72     {
  73 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
  74     case FFI_TYPE_LONGDOUBLE:
  75       /* 64-bit long doubles are equivalent to doubles. */
  76       if ((abi & FFI_LINUX_LONG_DOUBLE_128) == 0)
  77         {
  78           *elnum = 1;
  79           return FFI_TYPE_DOUBLE;
  80         }
  81       /* IBM extended precision values use unaligned pairs
  82          of FPRs, but according to the ABI must be considered
  83          distinct from doubles. They are also limited to a
  84          maximum of four members in a homogeneous aggregate. */
  85       else if ((abi & FFI_LINUX_LONG_DOUBLE_IEEE128) == 0)
  86         {
  87           *elnum = 2;
  88           return FFI_TYPE_LONGDOUBLE;
  89         }
  90       /* Fall through. */
  91 #endif
  92     case FFI_TYPE_FLOAT:
  93     case FFI_TYPE_DOUBLE:
  94       *elnum = 1;
  95       return (int) t->type;
  96
  97     case FFI_TYPE_STRUCT:;
  98       {
  99         unsigned int base_elt = 0, total_elnum = 0;
 100         ffi_type **el = t->elements;
 101         while (*el)
 102           {
 103             unsigned int el_elt, el_elnum = 0;
 104             el_elt = discover_homogeneous_aggregate (abi, *el, &el_elnum);
 105             if (el_elt == 0
 106                 || (base_elt && base_elt != el_elt))
 107               return 0;
 108             base_elt = el_elt;
 109             total_elnum += el_elnum;
 110 #if _CALL_ELF == 2
 111             if (total_elnum > 8)
 112               return 0;
 113 #else
 114             if (total_elnum > 1)
 115               return 0;
 116 #endif
 117             el++;
 118           }
 119         *elnum = total_elnum;
 120         return base_elt;
 121       }
 122
 123     default:
 124       return 0;
 125     }
 126 }
 127
 128
 129 /* Perform machine dependent cif processing */
 130 static ffi_status
 131 ffi_prep_cif_linux64_core (ffi_cif *cif)
 132 {
 133   ffi_type **ptr;
 134   unsigned bytes;
 135   unsigned i, fparg_count = 0, intarg_count = 0, vecarg_count = 0;
 136   unsigned flags = cif->flags;
 137   unsigned elt, elnum, rtype;
 138
 139 #if FFI_TYPE_LONGDOUBLE == FFI_TYPE_DOUBLE
 140   /* If compiled without long double support... */
 141   if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0 ||
 142       (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
 143     return FFI_BAD_ABI;
 144 #elif !defined(__VEC__)
 145   /* If compiled without vector register support (used by assembly)... */
 146   if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
 147     return FFI_BAD_ABI;
 148 #else
 149   /* If the IEEE128 flag is set, but long double is only 64 bits wide... */
 150   if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) == 0 &&
 151       (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
 152     return FFI_BAD_ABI;
 153 #endif
 154
 155   /* The machine-independent calculation of cif->bytes doesn't work
 156      for us.  Redo the calculation.  */
 157 #if _CALL_ELF == 2
 158   /* Space for backchain, CR, LR, TOC and the asm's temp regs.  */
 159   bytes = (4 + ASM_NEEDS_REGISTERS64) * sizeof (long);
 160
 161   /* Space for the general registers.  */
 162   bytes += NUM_GPR_ARG_REGISTERS64 * sizeof (long);
 163 #else
 164   /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp
 165      regs.  */
 166   bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long);
 167
 168   /* Space for the mandatory parm save area and general registers.  */
 169   bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long);
 170 #endif
 171
 172   /* Return value handling.  */
 173   rtype = cif->rtype->type;
 174 #if _CALL_ELF == 2
 175 homogeneous:
 176 #endif
 177   switch (rtype)
 178     {
 179 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
 180     case FFI_TYPE_LONGDOUBLE:
 181       if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
 182         {
 183           flags |= FLAG_RETURNS_VEC;
 184           break;
 185         }
 186       if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
 187         flags |= FLAG_RETURNS_128BITS;
 188       /* Fall through.  */
 189 #endif
 190     case FFI_TYPE_DOUBLE:
 191       flags |= FLAG_RETURNS_64BITS;
 192       /* Fall through.  */
 193     case FFI_TYPE_FLOAT:
 194       flags |= FLAG_RETURNS_FP;
 195       break;
 196
 197     case FFI_TYPE_UINT128:
 198       flags |= FLAG_RETURNS_128BITS;
 199       /* Fall through.  */
 200     case FFI_TYPE_UINT64:
 201     case FFI_TYPE_SINT64:
 202     case FFI_TYPE_POINTER:
 203       flags |= FLAG_RETURNS_64BITS;
 204       break;
 205
 206     case FFI_TYPE_STRUCT:
 207 #if _CALL_ELF == 2
 208       elt = discover_homogeneous_aggregate (cif->abi, cif->rtype, &elnum);
 209       if (elt)
 210         {
 211           flags |= FLAG_RETURNS_SMST;
 212           rtype = elt;
 213           goto homogeneous;
 214         }
 215       if (cif->rtype->size <= 16)
 216         {
 217           flags |= FLAG_RETURNS_SMST;
 218           break;
 219         }
 220 #endif
 221       intarg_count++;
 222       flags |= FLAG_RETVAL_REFERENCE;
 223       /* Fall through.  */
 224     case FFI_TYPE_VOID:
 225       flags |= FLAG_RETURNS_NOTHING;
 226       break;
 227
 228     default:
 229       /* Returns 32-bit integer, or similar.  Nothing to do here.  */
 230       break;
 231     }
 232
 233   for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
 234     {
 235       unsigned int align;
 236
 237       switch ((*ptr)->type)
 238         {
 239 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
 240         case FFI_TYPE_LONGDOUBLE:
 241           if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
 242             {
 243               vecarg_count++;
 244               /* Align to 16 bytes, plus the 16-byte argument. */
 245               intarg_count = (intarg_count + 3) & ~0x1;
 246               if (vecarg_count > NUM_VEC_ARG_REGISTERS64)
 247                 flags |= FLAG_ARG_NEEDS_PSAVE;
 248               break;
 249             }
 250           if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
 251             {
 252               fparg_count++;
 253               intarg_count++;
 254             }
 255           /* Fall through.  */
 256 #endif
 257         case FFI_TYPE_DOUBLE:
 258         case FFI_TYPE_FLOAT:
 259           fparg_count++;
 260           intarg_count++;
 261           if (fparg_count > NUM_FPR_ARG_REGISTERS64)
 262             flags |= FLAG_ARG_NEEDS_PSAVE;
 263           break;
 264
 265         case FFI_TYPE_STRUCT:
 266           if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
 267             {
 268               align = (*ptr)->alignment;
 269               if (align > 16)
 270                 align = 16;
 271               align = align / 8;
 272               if (align > 1)
 273                 intarg_count = FFI_ALIGN (intarg_count, align);
 274             }
 275           intarg_count += ((*ptr)->size + 7) / 8;
 276           elt = discover_homogeneous_aggregate (cif->abi, *ptr, &elnum);
 277 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
 278           if (elt == FFI_TYPE_LONGDOUBLE &&
 279               (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
 280             {
 281               vecarg_count += elnum;
 282               if (vecarg_count > NUM_VEC_ARG_REGISTERS64)
 283                 flags |= FLAG_ARG_NEEDS_PSAVE;
 284               break;
 285             }
 286           else
 287 #endif
 288           if (elt)
 289             {
 290               fparg_count += elnum;
 291               if (fparg_count > NUM_FPR_ARG_REGISTERS64)
 292                 flags |= FLAG_ARG_NEEDS_PSAVE;
 293             }
 294           else
 295             {
 296               if (intarg_count > NUM_GPR_ARG_REGISTERS64)
 297                 flags |= FLAG_ARG_NEEDS_PSAVE;
 298             }
 299           break;
 300
 301         case FFI_TYPE_POINTER:
 302         case FFI_TYPE_UINT64:
 303         case FFI_TYPE_SINT64:
 304         case FFI_TYPE_INT:
 305         case FFI_TYPE_UINT32:
 306         case FFI_TYPE_SINT32:
 307         case FFI_TYPE_UINT16:
 308         case FFI_TYPE_SINT16:
 309         case FFI_TYPE_UINT8:
 310         case FFI_TYPE_SINT8:
 311           /* Everything else is passed as a 8-byte word in a GPR, either
 312              the object itself or a pointer to it.  */
 313           intarg_count++;
 314           if (intarg_count > NUM_GPR_ARG_REGISTERS64)
 315             flags |= FLAG_ARG_NEEDS_PSAVE;
 316           break;
 317         default:
 318           FFI_ASSERT (0);
 319         }
 320     }
 321
 322   if (fparg_count != 0)
 323     flags |= FLAG_FP_ARGUMENTS;
 324   if (intarg_count > 4)
 325     flags |= FLAG_4_GPR_ARGUMENTS;
 326   if (vecarg_count != 0)
 327     flags |= FLAG_VEC_ARGUMENTS;
 328
 329   /* Space for the FPR registers, if needed.  */
 330   if (fparg_count != 0)
 331     bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double);
 332   /* Space for the vector registers, if needed, aligned to 16 bytes. */
 333   if (vecarg_count != 0) {
 334     bytes = (bytes + 15) & ~0xF;
 335     bytes += NUM_VEC_ARG_REGISTERS64 * sizeof (float128);
 336   }
 337
 338   /* Stack space.  */
 339 #if _CALL_ELF == 2
 340   if ((flags & FLAG_ARG_NEEDS_PSAVE) != 0)
 341     bytes += intarg_count * sizeof (long);
 342 #else
 343   if (intarg_count > NUM_GPR_ARG_REGISTERS64)
 344     bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long);
 345 #endif
 346
 347   /* The stack space allocated needs to be a multiple of 16 bytes.  */
 348   bytes = (bytes + 15) & ~0xF;
 349
 350   cif->flags = flags;
 351   cif->bytes = bytes;
 352
 353   return FFI_OK;
 354 }
 355
 356 ffi_status FFI_HIDDEN
 357 ffi_prep_cif_linux64 (ffi_cif *cif)
 358 {
 359   if ((cif->abi & FFI_LINUX) != 0)
 360     cif->nfixedargs = cif->nargs;
 361 #if _CALL_ELF != 2
 362   else if (cif->abi == FFI_COMPAT_LINUX64)
 363     {
 364       /* This call is from old code.  Don't touch cif->nfixedargs
 365          since old code will be using a smaller cif.  */
 366       cif->flags |= FLAG_COMPAT;
 367       /* Translate to new abi value.  */
 368       cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128;
 369     }
 370 #endif
 371   else
 372     return FFI_BAD_ABI;
 373   return ffi_prep_cif_linux64_core (cif);
 374 }
 375
 376 ffi_status FFI_HIDDEN
 377 ffi_prep_cif_linux64_var (ffi_cif *cif,
 378                           unsigned int nfixedargs,
 379                           unsigned int ntotalargs MAYBE_UNUSED)
 380 {
 381   if ((cif->abi & FFI_LINUX) != 0)
 382     cif->nfixedargs = nfixedargs;
 383 #if _CALL_ELF != 2
 384   else if (cif->abi == FFI_COMPAT_LINUX64)
 385     {
 386       /* This call is from old code.  Don't touch cif->nfixedargs
 387          since old code will be using a smaller cif.  */
 388       cif->flags |= FLAG_COMPAT;
 389       /* Translate to new abi value.  */
 390       cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128;
 391     }
 392 #endif
 393   else
 394     return FFI_BAD_ABI;
 395 #if _CALL_ELF == 2
 396   cif->flags |= FLAG_ARG_NEEDS_PSAVE;
 397 #endif
 398   return ffi_prep_cif_linux64_core (cif);
 399 }
 400
 401
 402 /* ffi_prep_args64 is called by the assembly routine once stack space
 403    has been allocated for the function's arguments.
 404
 405    The stack layout we want looks like this:
 406
 407    |   Ret addr from ffi_call_LINUX64   8bytes  |       higher addresses
 408    |--------------------------------------------|
 409    |   CR save area                     8bytes  |
 410    |--------------------------------------------|
 411    |   Previous backchain pointer       8       |       stack pointer here
 412    |--------------------------------------------|<+ <<< on entry to
 413    |   Saved r28-r31                    4*8     | |     ffi_call_LINUX64
 414    |--------------------------------------------| |
 415    |   GPR registers r3-r10             8*8     | |
 416    |--------------------------------------------| |
 417    |   FPR registers f1-f13 (optional)  13*8    | |
 418    |--------------------------------------------| |
 419    |   VEC registers v2-v13 (optional)  12*16   | |
 420    |--------------------------------------------| |
 421    |   Parameter save area                      | |
 422    |--------------------------------------------| |
 423    |   TOC save area                    8       | |
 424    |--------------------------------------------| |     stack   |
 425    |   Linker doubleword                8       | |     grows   |
 426    |--------------------------------------------| |     down    V
 427    |   Compiler doubleword              8       | |
 428    |--------------------------------------------| |     lower addresses
 429    |   Space for callee's LR            8       | |
 430    |--------------------------------------------| |
 431    |   CR save area                     8       | |
 432    |--------------------------------------------| |     stack pointer here
 433    |   Current backchain pointer        8       |-/     during
 434    |--------------------------------------------|   <<< ffi_call_LINUX64
 435
 436 */
 437
 438 void FFI_HIDDEN
 439 ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
 440 {
 441   const unsigned long bytes = ecif->cif->bytes;
 442   const unsigned long flags = ecif->cif->flags;
 443
 444   typedef union
 445   {
 446     char *c;
 447     unsigned long *ul;
 448     float *f;
 449     double *d;
 450     float128 *f128;
 451     size_t p;
 452   } valp;
 453
 454   /* 'stacktop' points at the previous backchain pointer.  */
 455   valp stacktop;
 456
 457   /* 'next_arg' points at the space for gpr3, and grows upwards as
 458      we use GPR registers, then continues at rest.  */
 459   valp gpr_base;
 460   valp gpr_end;
 461   valp rest;
 462   valp next_arg;
 463
 464   /* 'fpr_base' points at the space for f1, and grows upwards as
 465      we use FPR registers.  */
 466   valp fpr_base;
 467   unsigned int fparg_count;
 468
 469   /* 'vec_base' points at the space for v2, and grows upwards as
 470      we use vector registers.  */
 471   valp vec_base;
 472   unsigned int vecarg_count;
 473
 474   unsigned int i, words, nargs, nfixedargs;
 475   ffi_type **ptr;
 476   double double_tmp;
 477   union
 478   {
 479     void **v;
 480     char **c;
 481     signed char **sc;
 482     unsigned char **uc;
 483     signed short **ss;
 484     unsigned short **us;
 485     signed int **si;
 486     unsigned int **ui;
 487     unsigned long **ul;
 488     float **f;
 489     double **d;
 490     float128 **f128;
 491   } p_argv;
 492   unsigned long gprvalue;
 493   unsigned long align;
 494
 495   stacktop.c = (char *) stack + bytes;
 496   gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64;
 497   gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64;
 498 #if _CALL_ELF == 2
 499   rest.ul = stack + 4 + NUM_GPR_ARG_REGISTERS64;
 500 #else
 501   rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64;
 502 #endif
 503   fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
 504   fparg_count = 0;
 505   /* Place the vector args below the FPRs, if used, else the GPRs. */
 506   if (ecif->cif->flags & FLAG_FP_ARGUMENTS)
 507     vec_base.p = fpr_base.p & ~0xF;
 508   else
 509     vec_base.p = gpr_base.p;
 510   vec_base.f128 -= NUM_VEC_ARG_REGISTERS64;
 511   vecarg_count = 0;
 512   next_arg.ul = gpr_base.ul;
 513
 514   /* Check that everything starts aligned properly.  */
 515   FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
 516   FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
 517   FFI_ASSERT (((unsigned long) gpr_base.c & 0xF) == 0);
 518   FFI_ASSERT (((unsigned long) gpr_end.c  & 0xF) == 0);
 519   FFI_ASSERT (((unsigned long) vec_base.c & 0xF) == 0);
 520   FFI_ASSERT ((bytes & 0xF) == 0);
 521
 522   /* Deal with return values that are actually pass-by-reference.  */
 523   if (flags & FLAG_RETVAL_REFERENCE)
 524     *next_arg.ul++ = (unsigned long) (char *) ecif->rvalue;
 525
 526   /* Now for the arguments.  */
 527   p_argv.v = ecif->avalue;
 528   nargs = ecif->cif->nargs;
 529 #if _CALL_ELF != 2
 530   nfixedargs = (unsigned) -1;
 531   if ((flags & FLAG_COMPAT) == 0)
 532 #endif
 533     nfixedargs = ecif->cif->nfixedargs;
 534   for (ptr = ecif->cif->arg_types, i = 0;
 535        i < nargs;
 536        i++, ptr++, p_argv.v++)
 537     {
 538       unsigned int elt, elnum;
 539
 540       switch ((*ptr)->type)
 541         {
 542 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
 543         case FFI_TYPE_LONGDOUBLE:
 544           if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
 545             {
 546               next_arg.p = FFI_ALIGN (next_arg.p, 16);
 547               if (next_arg.ul == gpr_end.ul)
 548                 next_arg.ul = rest.ul;
 549               if (vecarg_count < NUM_VEC_ARG_REGISTERS64 && i < nfixedargs)
 550                 memcpy (vec_base.f128++, *p_argv.f128, sizeof (float128));
 551               else
 552                 memcpy (next_arg.f128, *p_argv.f128, sizeof (float128));
 553               if (++next_arg.f128 == gpr_end.f128)
 554                 next_arg.f128 = rest.f128;
 555               vecarg_count++;
 556               FFI_ASSERT (__LDBL_MANT_DIG__ == 113);
 557               FFI_ASSERT (flags & FLAG_VEC_ARGUMENTS);
 558               break;
 559             }
 560           if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
 561             {
 562               double_tmp = (*p_argv.d)[0];
 563               if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
 564                 {
 565                   *fpr_base.d++ = double_tmp;
 566 # if _CALL_ELF != 2
 567                   if ((flags & FLAG_COMPAT) != 0)
 568                     *next_arg.d = double_tmp;
 569 # endif
 570                 }
 571               else
 572                 *next_arg.d = double_tmp;
 573               if (++next_arg.ul == gpr_end.ul)
 574                 next_arg.ul = rest.ul;
 575               fparg_count++;
 576               double_tmp = (*p_argv.d)[1];
 577               if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
 578                 {
 579                   *fpr_base.d++ = double_tmp;
 580 # if _CALL_ELF != 2
 581                   if ((flags & FLAG_COMPAT) != 0)
 582                     *next_arg.d = double_tmp;
 583 # endif
 584                 }
 585               else
 586                 *next_arg.d = double_tmp;
 587               if (++next_arg.ul == gpr_end.ul)
 588                 next_arg.ul = rest.ul;
 589               fparg_count++;
 590               FFI_ASSERT (__LDBL_MANT_DIG__ == 106);
 591               FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
 592               break;
 593             }
 594           /* Fall through.  */
 595 #endif
 596         case FFI_TYPE_DOUBLE:
 597 #if _CALL_ELF != 2
 598         do_double:
 599 #endif
 600           double_tmp = **p_argv.d;
 601           if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
 602             {
 603               *fpr_base.d++ = double_tmp;
 604 #if _CALL_ELF != 2
 605               if ((flags & FLAG_COMPAT) != 0)
 606                 *next_arg.d = double_tmp;
 607 #endif
 608             }
 609           else
 610             *next_arg.d = double_tmp;
 611           if (++next_arg.ul == gpr_end.ul)
 612             next_arg.ul = rest.ul;
 613           fparg_count++;
 614           FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
 615           break;
 616
 617         case FFI_TYPE_FLOAT:
 618 #if _CALL_ELF != 2
 619         do_float:
 620 #endif
 621           double_tmp = **p_argv.f;
 622           if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
 623             {
 624               *fpr_base.d++ = double_tmp;
 625 #if _CALL_ELF != 2
 626               if ((flags & FLAG_COMPAT) != 0)
 627                 {
 628 # ifndef __LITTLE_ENDIAN__
 629                   next_arg.f[1] = (float) double_tmp;
 630 # else
 631                   next_arg.f[0] = (float) double_tmp;
 632 # endif
 633                 }
 634 #endif
 635             }
 636           else
 637             {
 638 # ifndef __LITTLE_ENDIAN__
 639               next_arg.f[1] = (float) double_tmp;
 640 # else
 641               next_arg.f[0] = (float) double_tmp;
 642 # endif
 643             }
 644           if (++next_arg.ul == gpr_end.ul)
 645             next_arg.ul = rest.ul;
 646           fparg_count++;
 647           FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
 648           break;
 649
 650         case FFI_TYPE_STRUCT:
 651           if ((ecif->cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
 652             {
 653               align = (*ptr)->alignment;
 654               if (align > 16)
 655                 align = 16;
 656               if (align > 1)
 657                 {
 658                   next_arg.p = FFI_ALIGN (next_arg.p, align);
 659                   if (next_arg.ul == gpr_end.ul)
 660                     next_arg.ul = rest.ul;
 661                 }
 662             }
 663           elt = discover_homogeneous_aggregate (ecif->cif->abi, *ptr, &elnum);
 664           if (elt)
 665             {
 666 #if _CALL_ELF == 2
 667               union {
 668                 void *v;
 669                 float *f;
 670                 double *d;
 671                 float128 *f128;
 672               } arg;
 673
 674               arg.v = *p_argv.v;
 675 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
 676               if (elt == FFI_TYPE_LONGDOUBLE &&
 677                   (ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
 678                 {
 679                   do
 680                     {
 681                       if (vecarg_count < NUM_VEC_ARG_REGISTERS64
 682                           && i < nfixedargs)
 683                         memcpy (vec_base.f128++, arg.f128++, sizeof (float128));
 684                       else
 685                         memcpy (next_arg.f128, arg.f128++, sizeof (float128));
 686                       if (++next_arg.f128 == gpr_end.f128)
 687                         next_arg.f128 = rest.f128;
 688                       vecarg_count++;
 689                     }
 690                   while (--elnum != 0);
 691                 }
 692               else
 693 #endif
 694               if (elt == FFI_TYPE_FLOAT)
 695                 {
 696                   do
 697                     {
 698                       double_tmp = *arg.f++;
 699                       if (fparg_count < NUM_FPR_ARG_REGISTERS64
 700                           && i < nfixedargs)
 701                         *fpr_base.d++ = double_tmp;
 702                       else
 703                         *next_arg.f = (float) double_tmp;
 704                       if (++next_arg.f == gpr_end.f)
 705                         next_arg.f = rest.f;
 706                       fparg_count++;
 707                     }
 708                   while (--elnum != 0);
 709                   if ((next_arg.p & 7) != 0)
 710                     if (++next_arg.f == gpr_end.f)
 711                       next_arg.f = rest.f;
 712                 }
 713               else
 714                 do
 715                   {
 716                     double_tmp = *arg.d++;
 717                     if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
 718                       *fpr_base.d++ = double_tmp;
 719                     else
 720                       *next_arg.d = double_tmp;
 721                     if (++next_arg.d == gpr_end.d)
 722                       next_arg.d = rest.d;
 723                     fparg_count++;
 724                   }
 725                 while (--elnum != 0);
 726 #else
 727               if (elt == FFI_TYPE_FLOAT)
 728                 goto do_float;
 729               else
 730                 goto do_double;
 731 #endif
 732             }
 733           else
 734             {
 735               words = ((*ptr)->size + 7) / 8;
 736               if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
 737                 {
 738                   size_t first = gpr_end.c - next_arg.c;
 739                   memcpy (next_arg.c, *p_argv.c, first);
 740                   memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
 741                   next_arg.c = rest.c + words * 8 - first;
 742                 }
 743               else
 744                 {
 745                   char *where = next_arg.c;
 746
 747 #ifndef __LITTLE_ENDIAN__
 748                   /* Structures with size less than eight bytes are passed
 749                      left-padded.  */
 750                   if ((*ptr)->size < 8)
 751                     where += 8 - (*ptr)->size;
 752 #endif
 753                   memcpy (where, *p_argv.c, (*ptr)->size);
 754                   next_arg.ul += words;
 755                   if (next_arg.ul == gpr_end.ul)
 756                     next_arg.ul = rest.ul;
 757                 }
 758             }
 759           break;
 760
 761         case FFI_TYPE_UINT8:
 762           gprvalue = **p_argv.uc;
 763           goto putgpr;
 764         case FFI_TYPE_SINT8:
 765           gprvalue = **p_argv.sc;
 766           goto putgpr;
 767         case FFI_TYPE_UINT16:
 768           gprvalue = **p_argv.us;
 769           goto putgpr;
 770         case FFI_TYPE_SINT16:
 771           gprvalue = **p_argv.ss;
 772           goto putgpr;
 773         case FFI_TYPE_UINT32:
 774           gprvalue = **p_argv.ui;
 775           goto putgpr;
 776         case FFI_TYPE_INT:
 777         case FFI_TYPE_SINT32:
 778           gprvalue = **p_argv.si;
 779           goto putgpr;
 780
 781         case FFI_TYPE_UINT64:
 782         case FFI_TYPE_SINT64:
 783         case FFI_TYPE_POINTER:
 784           gprvalue = **p_argv.ul;
 785         putgpr:
 786           *next_arg.ul++ = gprvalue;
 787           if (next_arg.ul == gpr_end.ul)
 788             next_arg.ul = rest.ul;
 789           break;
 790         }
 791     }
 792
 793   FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS
 794               || (next_arg.ul >= gpr_base.ul
 795                   && next_arg.ul <= gpr_base.ul + 4));
 796 }
 797
 798
 799 #if _CALL_ELF == 2
 800 #define MIN_CACHE_LINE_SIZE 8
 801
 802 static void
 803 flush_icache (char *wraddr, char *xaddr, int size)
 804 {
 805   int i;
 806   for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE)
 807     __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;"
 808                       : : "r" (xaddr + i), "r" (wraddr + i) : "memory");
 809   __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;"
 810                     : : "r"(xaddr + size - 1), "r"(wraddr + size - 1)
 811                     : "memory");
 812 }
 813 #endif
 814
 815
 816 ffi_status FFI_HIDDEN
 817 ffi_prep_closure_loc_linux64 (ffi_closure *closure,
 818                               ffi_cif *cif,
 819                               void (*fun) (ffi_cif *, void *, void **, void *),
 820                               void *user_data,
 821                               void *codeloc)
 822 {
 823 #if _CALL_ELF == 2
 824   unsigned int *tramp = (unsigned int *) &closure->tramp[0];
 825
 826   if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI)
 827     return FFI_BAD_ABI;
 828
 829   tramp[0] = 0xe96c0018;        /* 0:   ld      11,2f-0b(12)    */
 830   tramp[1] = 0xe98c0010;        /*      ld      12,1f-0b(12)    */
 831   tramp[2] = 0x7d8903a6;        /*      mtctr   12              */
 832   tramp[3] = 0x4e800420;        /*      bctr                    */
 833                                 /* 1:   .quad   function_addr   */
 834                                 /* 2:   .quad   context         */
 835   *(void **) &tramp[4] = (void *) ffi_closure_LINUX64;
 836   *(void **) &tramp[6] = codeloc;
 837   flush_icache ((char *) tramp, (char *) codeloc, 4 * 4);
 838 #else
 839   void **tramp = (void **) &closure->tramp[0];
 840
 841   if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI)
 842     return FFI_BAD_ABI;
 843
 844   /* Copy function address and TOC from ffi_closure_LINUX64 OPD.  */
 845   memcpy (&tramp[0], (void **) ffi_closure_LINUX64, sizeof (void *));
 846   tramp[1] = codeloc;
 847   memcpy (&tramp[2], (void **) ffi_closure_LINUX64 + 1, sizeof (void *));
 848 #endif
 849
 850   closure->cif = cif;
 851   closure->fun = fun;
 852   closure->user_data = user_data;
 853
 854   return FFI_OK;
 855 }
 856
 857
 858 int FFI_HIDDEN
 859 ffi_closure_helper_LINUX64 (ffi_cif *cif,
 860                             void (*fun) (ffi_cif *, void *, void **, void *),
 861                             void *user_data,
 862                             void *rvalue,
 863                             unsigned long *pst,
 864                             ffi_dblfl *pfr,
 865                             float128 *pvec)
 866 {
 867   /* rvalue is the pointer to space for return value in closure assembly */
 868   /* pst is the pointer to parameter save area
 869      (r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */
 870   /* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */
 871   /* pvec is the pointer to where v2-v13 are stored in ffi_closure_LINUX64 */
 872
 873   void **avalue;
 874   ffi_type **arg_types;
 875   unsigned long i, avn, nfixedargs;
 876   ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
 877   float128 *end_pvec = pvec + NUM_VEC_ARG_REGISTERS64;
 878   unsigned long align;
 879
 880   avalue = alloca (cif->nargs * sizeof (void *));
 881
 882   /* Copy the caller's structure return value address so that the
 883      closure returns the data directly to the caller.  */
 884   if (cif->rtype->type == FFI_TYPE_STRUCT
 885       && (cif->flags & FLAG_RETURNS_SMST) == 0)
 886     {
 887       rvalue = (void *) *pst;
 888       pst++;
 889     }
 890
 891   i = 0;
 892   avn = cif->nargs;
 893 #if _CALL_ELF != 2
 894   nfixedargs = (unsigned) -1;
 895   if ((cif->flags & FLAG_COMPAT) == 0)
 896 #endif
 897     nfixedargs = cif->nfixedargs;
 898   arg_types = cif->arg_types;
 899
 900   /* Grab the addresses of the arguments from the stack frame.  */
 901   while (i < avn)
 902     {
 903       unsigned int elt, elnum;
 904
 905       switch (arg_types[i]->type)
 906         {
 907         case FFI_TYPE_SINT8:
 908         case FFI_TYPE_UINT8:
 909 #ifndef __LITTLE_ENDIAN__
 910           avalue[i] = (char *) pst + 7;
 911           pst++;
 912           break;
 913 #endif
 914
 915         case FFI_TYPE_SINT16:
 916         case FFI_TYPE_UINT16:
 917 #ifndef __LITTLE_ENDIAN__
 918           avalue[i] = (char *) pst + 6;
 919           pst++;
 920           break;
 921 #endif
 922
 923         case FFI_TYPE_SINT32:
 924         case FFI_TYPE_UINT32:
 925 #ifndef __LITTLE_ENDIAN__
 926           avalue[i] = (char *) pst + 4;
 927           pst++;
 928           break;
 929 #endif
 930
 931         case FFI_TYPE_SINT64:
 932         case FFI_TYPE_UINT64:
 933         case FFI_TYPE_POINTER:
 934           avalue[i] = pst;
 935           pst++;
 936           break;
 937
 938         case FFI_TYPE_STRUCT:
 939           if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
 940             {
 941               align = arg_types[i]->alignment;
 942               if (align > 16)
 943                 align = 16;
 944               if (align > 1)
 945                 pst = (unsigned long *) FFI_ALIGN ((size_t) pst, align);
 946             }
 947           elt = discover_homogeneous_aggregate (cif->abi, arg_types[i], &elnum);
 948           if (elt)
 949             {
 950 #if _CALL_ELF == 2
 951               union {
 952                 void *v;
 953                 unsigned long *ul;
 954                 float *f;
 955                 double *d;
 956                 float128 *f128;
 957                 size_t p;
 958               } to, from;
 959
 960               /* Repackage the aggregate from its parts.  The
 961                  aggregate size is not greater than the space taken by
 962                  the registers so store back to the register/parameter
 963                  save arrays.  */
 964 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
 965               if (elt == FFI_TYPE_LONGDOUBLE &&
 966                   (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
 967                 {
 968                   if (pvec + elnum <= end_pvec)
 969                     to.v = pvec;
 970                   else
 971                     to.v = pst;
 972                 }
 973               else
 974 #endif
 975               if (pfr + elnum <= end_pfr)
 976                 to.v = pfr;
 977               else
 978                 to.v = pst;
 979
 980               avalue[i] = to.v;
 981               from.ul = pst;
 982 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
 983               if (elt == FFI_TYPE_LONGDOUBLE &&
 984                   (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
 985                 {
 986                   do
 987                     {
 988                       if (pvec < end_pvec && i < nfixedargs)
 989                         memcpy (to.f128, pvec++, sizeof (float128));
 990                       else
 991                         memcpy (to.f128, from.f128, sizeof (float128));
 992                       to.f128++;
 993                       from.f128++;
 994                     }
 995                   while (--elnum != 0);
 996                 }
 997               else
 998 #endif
 999               if (elt == FFI_TYPE_FLOAT)
1000                 {
1001                   do
1002                     {
1003                       if (pfr < end_pfr && i < nfixedargs)
1004                         {
1005                           *to.f = (float) pfr->d;
1006                           pfr++;
1007                         }
1008                       else
1009                         *to.f = *from.f;
1010                       to.f++;
1011                       from.f++;
1012                     }
1013                   while (--elnum != 0);
1014                 }
1015               else
1016                 {
1017                   do
1018                     {
1019                       if (pfr < end_pfr && i < nfixedargs)
1020                         {
1021                           *to.d = pfr->d;
1022                           pfr++;
1023                         }
1024                       else
1025                         *to.d = *from.d;
1026                       to.d++;
1027                       from.d++;
1028                     }
1029                   while (--elnum != 0);
1030                 }
1031 #else
1032               if (elt == FFI_TYPE_FLOAT)
1033                 goto do_float;
1034               else
1035                 goto do_double;
1036 #endif
1037             }
1038           else
1039             {
1040 #ifndef __LITTLE_ENDIAN__
1041               /* Structures with size less than eight bytes are passed
1042                  left-padded.  */
1043               if (arg_types[i]->size < 8)
1044                 avalue[i] = (char *) pst + 8 - arg_types[i]->size;
1045               else
1046 #endif
1047                 avalue[i] = pst;
1048             }
1049           pst += (arg_types[i]->size + 7) / 8;
1050           break;
1051
1052 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
1053         case FFI_TYPE_LONGDOUBLE:
1054           if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
1055             {
1056               if (((unsigned long) pst & 0xF) != 0)
1057                 ++pst;
1058               if (pvec < end_pvec && i < nfixedargs)
1059                 avalue[i] = pvec++;
1060               else
1061                 avalue[i] = pst;
1062               pst += 2;
1063               break;
1064             }
1065           else if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
1066             {
1067               if (pfr + 1 < end_pfr && i + 1 < nfixedargs)
1068                 {
1069                   avalue[i] = pfr;
1070                   pfr += 2;
1071                 }
1072               else
1073                 {
1074                   if (pfr < end_pfr && i < nfixedargs)
1075                     {
1076                       /* Passed partly in f13 and partly on the stack.
1077                          Move it all to the stack.  */
1078                       *pst = *(unsigned long *) pfr;
1079                       pfr++;
1080                     }
1081                   avalue[i] = pst;
1082                 }
1083               pst += 2;
1084               break;
1085             }
1086           /* Fall through.  */
1087 #endif
1088         case FFI_TYPE_DOUBLE:
1089 #if _CALL_ELF != 2
1090         do_double:
1091 #endif
1092           /* On the outgoing stack all values are aligned to 8 */
1093           /* there are 13 64bit floating point registers */
1094
1095           if (pfr < end_pfr && i < nfixedargs)
1096             {
1097               avalue[i] = pfr;
1098               pfr++;
1099             }
1100           else
1101             avalue[i] = pst;
1102           pst++;
1103           break;
1104
1105         case FFI_TYPE_FLOAT:
1106 #if _CALL_ELF != 2
1107         do_float:
1108 #endif
1109           if (pfr < end_pfr && i < nfixedargs)
1110             {
1111               /* Float values are stored as doubles in the
1112                  ffi_closure_LINUX64 code.  Fix them here.  */
1113               pfr->f = (float) pfr->d;
1114               avalue[i] = pfr;
1115               pfr++;
1116             }
1117           else
1118             {
1119 #ifndef __LITTLE_ENDIAN__
1120               avalue[i] = (char *) pst + 4;
1121 #else
1122               avalue[i] = pst;
1123 #endif
1124             }
1125           pst++;
1126           break;
1127
1128         default:
1129           FFI_ASSERT (0);
1130         }
1131
1132       i++;
1133     }
1134
1135   (*fun) (cif, rvalue, avalue, user_data);
1136
1137   /* Tell ffi_closure_LINUX64 how to perform return type promotions.  */
1138   if ((cif->flags & FLAG_RETURNS_SMST) != 0)
1139     {
1140       if ((cif->flags & (FLAG_RETURNS_FP | FLAG_RETURNS_VEC)) == 0)
1141         return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1;
1142       else if ((cif->flags & FLAG_RETURNS_VEC) != 0)
1143         return FFI_V2_TYPE_VECTOR_HOMOG;
1144       else if ((cif->flags & FLAG_RETURNS_64BITS) != 0)
1145         return FFI_V2_TYPE_DOUBLE_HOMOG;
1146       else
1147         return FFI_V2_TYPE_FLOAT_HOMOG;
1148     }
1149   if ((cif->flags & FLAG_RETURNS_VEC) != 0)
1150     return FFI_V2_TYPE_VECTOR;
1151   return cif->rtype->type;
1152 }
1153 #endif