PR c++/85963 - -Wunused-but-set with ?: in template.
[official-gcc.git] / libffi / src / aarch64 / ffi.c
blobf79602bb7b8b7611be975b25337484a2331b81ba
1 /* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
3 Permission is hereby granted, free of charge, to any person obtaining
4 a copy of this software and associated documentation files (the
5 ``Software''), to deal in the Software without restriction, including
6 without limitation the rights to use, copy, modify, merge, publish,
7 distribute, sublicense, and/or sell copies of the Software, and to
8 permit persons to whom the Software is furnished to do so, subject to
9 the following conditions:
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <stdint.h>
25 #include <ffi.h>
26 #include <ffi_common.h>
27 #include "internal.h"
29 /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
30 all further uses in this file will refer to the 128-bit type. */
31 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
32 # if FFI_TYPE_LONGDOUBLE != 4
33 # error FFI_TYPE_LONGDOUBLE out of date
34 # endif
35 #else
36 # undef FFI_TYPE_LONGDOUBLE
37 # define FFI_TYPE_LONGDOUBLE 4
38 #endif
40 union _d
42 UINT64 d;
43 UINT32 s[2];
46 struct _v
48 union _d d[2] __attribute__((aligned(16)));
51 struct call_context
53 struct _v v[N_V_ARG_REG];
54 UINT64 x[N_X_ARG_REG];
57 #if defined (__clang__) && defined (__APPLE__)
58 extern void sys_icache_invalidate (void *start, size_t len);
59 #endif
61 static inline void
62 ffi_clear_cache (void *start, void *end)
64 #if defined (__clang__) && defined (__APPLE__)
65 sys_icache_invalidate (start, (char *)end - (char *)start);
66 #elif defined (__GNUC__)
67 __builtin___clear_cache (start, end);
68 #else
69 #error "Missing builtin to flush instruction cache"
70 #endif
73 /* A subroutine of is_vfp_type. Given a structure type, return the type code
74 of the first non-structure element. Recurse for structure elements.
75 Return -1 if the structure is in fact empty, i.e. no nested elements. */
77 static int
78 is_hfa0 (const ffi_type *ty)
80 ffi_type **elements = ty->elements;
81 int i, ret = -1;
83 if (elements != NULL)
84 for (i = 0; elements[i]; ++i)
86 ret = elements[i]->type;
87 if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
89 ret = is_hfa0 (elements[i]);
90 if (ret < 0)
91 continue;
93 break;
96 return ret;
99 /* A subroutine of is_vfp_type. Given a structure type, return true if all
100 of the non-structure elements are the same as CANDIDATE. */
102 static int
103 is_hfa1 (const ffi_type *ty, int candidate)
105 ffi_type **elements = ty->elements;
106 int i;
108 if (elements != NULL)
109 for (i = 0; elements[i]; ++i)
111 int t = elements[i]->type;
112 if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
114 if (!is_hfa1 (elements[i], candidate))
115 return 0;
117 else if (t != candidate)
118 return 0;
121 return 1;
124 /* Determine if TY may be allocated to the FP registers. This is both an
125 fp scalar type as well as an homogenous floating point aggregate (HFA).
126 That is, a structure consisting of 1 to 4 members of all the same type,
127 where that type is an fp scalar.
129 Returns non-zero iff TY is an HFA. The result is the AARCH64_RET_*
130 constant for the type. */
132 static int
133 is_vfp_type (const ffi_type *ty)
135 ffi_type **elements;
136 int candidate, i;
137 size_t size, ele_count;
139 /* Quickest tests first. */
140 candidate = ty->type;
141 switch (candidate)
143 default:
144 return 0;
145 case FFI_TYPE_FLOAT:
146 case FFI_TYPE_DOUBLE:
147 case FFI_TYPE_LONGDOUBLE:
148 ele_count = 1;
149 goto done;
150 case FFI_TYPE_COMPLEX:
151 candidate = ty->elements[0]->type;
152 switch (candidate)
154 case FFI_TYPE_FLOAT:
155 case FFI_TYPE_DOUBLE:
156 case FFI_TYPE_LONGDOUBLE:
157 ele_count = 2;
158 goto done;
160 return 0;
161 case FFI_TYPE_STRUCT:
162 break;
165 /* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */
166 size = ty->size;
167 if (size < 4 || size > 64)
168 return 0;
170 /* Find the type of the first non-structure member. */
171 elements = ty->elements;
172 candidate = elements[0]->type;
173 if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
175 for (i = 0; ; ++i)
177 candidate = is_hfa0 (elements[i]);
178 if (candidate >= 0)
179 break;
183 /* If the first member is not a floating point type, it's not an HFA.
184 Also quickly re-check the size of the structure. */
185 switch (candidate)
187 case FFI_TYPE_FLOAT:
188 ele_count = size / sizeof(float);
189 if (size != ele_count * sizeof(float))
190 return 0;
191 break;
192 case FFI_TYPE_DOUBLE:
193 ele_count = size / sizeof(double);
194 if (size != ele_count * sizeof(double))
195 return 0;
196 break;
197 case FFI_TYPE_LONGDOUBLE:
198 ele_count = size / sizeof(long double);
199 if (size != ele_count * sizeof(long double))
200 return 0;
201 break;
202 default:
203 return 0;
205 if (ele_count > 4)
206 return 0;
208 /* Finally, make sure that all scalar elements are the same type. */
209 for (i = 0; elements[i]; ++i)
211 int t = elements[i]->type;
212 if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
214 if (!is_hfa1 (elements[i], candidate))
215 return 0;
217 else if (t != candidate)
218 return 0;
221 /* All tests succeeded. Encode the result. */
222 done:
223 return candidate * 4 + (4 - ele_count);
226 /* Representation of the procedure call argument marshalling
227 state.
229 The terse state variable names match the names used in the AARCH64
230 PCS. */
232 struct arg_state
234 unsigned ngrn; /* Next general-purpose register number. */
235 unsigned nsrn; /* Next vector register number. */
236 size_t nsaa; /* Next stack offset. */
238 #if defined (__APPLE__)
239 unsigned allocating_variadic;
240 #endif
243 /* Initialize a procedure call argument marshalling state. */
244 static void
245 arg_init (struct arg_state *state)
247 state->ngrn = 0;
248 state->nsrn = 0;
249 state->nsaa = 0;
250 #if defined (__APPLE__)
251 state->allocating_variadic = 0;
252 #endif
255 /* Allocate an aligned slot on the stack and return a pointer to it. */
256 static void *
257 allocate_to_stack (struct arg_state *state, void *stack,
258 size_t alignment, size_t size)
260 size_t nsaa = state->nsaa;
262 /* Round up the NSAA to the larger of 8 or the natural
263 alignment of the argument's type. */
264 #if defined (__APPLE__)
265 if (state->allocating_variadic && alignment < 8)
266 alignment = 8;
267 #else
268 if (alignment < 8)
269 alignment = 8;
270 #endif
272 nsaa = ALIGN (nsaa, alignment);
273 state->nsaa = nsaa + size;
275 return (char *)stack + nsaa;
278 static ffi_arg
279 extend_integer_type (void *source, int type)
281 switch (type)
283 case FFI_TYPE_UINT8:
284 return *(UINT8 *) source;
285 case FFI_TYPE_SINT8:
286 return *(SINT8 *) source;
287 case FFI_TYPE_UINT16:
288 return *(UINT16 *) source;
289 case FFI_TYPE_SINT16:
290 return *(SINT16 *) source;
291 case FFI_TYPE_UINT32:
292 return *(UINT32 *) source;
293 case FFI_TYPE_INT:
294 case FFI_TYPE_SINT32:
295 return *(SINT32 *) source;
296 case FFI_TYPE_UINT64:
297 case FFI_TYPE_SINT64:
298 return *(UINT64 *) source;
299 break;
300 case FFI_TYPE_POINTER:
301 return *(uintptr_t *) source;
302 default:
303 abort();
307 static void
308 extend_hfa_type (void *dest, void *src, int h)
310 int f = h - AARCH64_RET_S4;
311 void *x0;
313 asm volatile (
314 "adr %0, 0f\n"
315 " add %0, %0, %1\n"
316 " br %0\n"
317 "0: ldp s16, s17, [%3]\n" /* S4 */
318 " ldp s18, s19, [%3, #8]\n"
319 " b 4f\n"
320 " ldp s16, s17, [%3]\n" /* S3 */
321 " ldr s18, [%3, #8]\n"
322 " b 3f\n"
323 " ldp s16, s17, [%3]\n" /* S2 */
324 " b 2f\n"
325 " nop\n"
326 " ldr s16, [%3]\n" /* S1 */
327 " b 1f\n"
328 " nop\n"
329 " ldp d16, d17, [%3]\n" /* D4 */
330 " ldp d18, d19, [%3, #16]\n"
331 " b 4f\n"
332 " ldp d16, d17, [%3]\n" /* D3 */
333 " ldr d18, [%3, #16]\n"
334 " b 3f\n"
335 " ldp d16, d17, [%3]\n" /* D2 */
336 " b 2f\n"
337 " nop\n"
338 " ldr d16, [%3]\n" /* D1 */
339 " b 1f\n"
340 " nop\n"
341 " ldp q16, q17, [%3]\n" /* Q4 */
342 " ldp q18, q19, [%3, #16]\n"
343 " b 4f\n"
344 " ldp q16, q17, [%3]\n" /* Q3 */
345 " ldr q18, [%3, #16]\n"
346 " b 3f\n"
347 " ldp q16, q17, [%3]\n" /* Q2 */
348 " b 2f\n"
349 " nop\n"
350 " ldr q16, [%3]\n" /* Q1 */
351 " b 1f\n"
352 "4: str q19, [%2, #48]\n"
353 "3: str q18, [%2, #32]\n"
354 "2: str q17, [%2, #16]\n"
355 "1: str q16, [%2]"
356 : "=&r"(x0)
357 : "r"(f * 12), "r"(dest), "r"(src)
358 : "memory", "v16", "v17", "v18", "v19");
361 static void *
362 compress_hfa_type (void *dest, void *reg, int h)
364 switch (h)
366 case AARCH64_RET_S1:
367 if (dest == reg)
369 #ifdef __AARCH64EB__
370 dest += 12;
371 #endif
373 else
374 *(float *)dest = *(float *)reg;
375 break;
376 case AARCH64_RET_S2:
377 asm ("ldp q16, q17, [%1]\n\t"
378 "st2 { v16.s, v17.s }[0], [%0]"
379 : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
380 break;
381 case AARCH64_RET_S3:
382 asm ("ldp q16, q17, [%1]\n\t"
383 "ldr q18, [%1, #32]\n\t"
384 "st3 { v16.s, v17.s, v18.s }[0], [%0]"
385 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
386 break;
387 case AARCH64_RET_S4:
388 asm ("ldp q16, q17, [%1]\n\t"
389 "ldp q18, q19, [%1, #32]\n\t"
390 "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
391 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
392 break;
394 case AARCH64_RET_D1:
395 if (dest == reg)
397 #ifdef __AARCH64EB__
398 dest += 8;
399 #endif
401 else
402 *(double *)dest = *(double *)reg;
403 break;
404 case AARCH64_RET_D2:
405 asm ("ldp q16, q17, [%1]\n\t"
406 "st2 { v16.d, v17.d }[0], [%0]"
407 : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
408 break;
409 case AARCH64_RET_D3:
410 asm ("ldp q16, q17, [%1]\n\t"
411 "ldr q18, [%1, #32]\n\t"
412 "st3 { v16.d, v17.d, v18.d }[0], [%0]"
413 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
414 break;
415 case AARCH64_RET_D4:
416 asm ("ldp q16, q17, [%1]\n\t"
417 "ldp q18, q19, [%1, #32]\n\t"
418 "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
419 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
420 break;
422 default:
423 if (dest != reg)
424 return memcpy (dest, reg, 16 * (4 - (h & 3)));
425 break;
427 return dest;
430 /* Either allocate an appropriate register for the argument type, or if
431 none are available, allocate a stack slot and return a pointer
432 to the allocated space. */
434 static void *
435 allocate_int_to_reg_or_stack (struct call_context *context,
436 struct arg_state *state,
437 void *stack, size_t size)
439 if (state->ngrn < N_X_ARG_REG)
440 return &context->x[state->ngrn++];
442 state->ngrn = N_X_ARG_REG;
443 return allocate_to_stack (state, stack, size, size);
446 ffi_status
447 ffi_prep_cif_machdep (ffi_cif *cif)
449 ffi_type *rtype = cif->rtype;
450 size_t bytes = cif->bytes;
451 int flags, i, n;
453 switch (rtype->type)
455 case FFI_TYPE_VOID:
456 flags = AARCH64_RET_VOID;
457 break;
458 case FFI_TYPE_UINT8:
459 flags = AARCH64_RET_UINT8;
460 break;
461 case FFI_TYPE_UINT16:
462 flags = AARCH64_RET_UINT16;
463 break;
464 case FFI_TYPE_UINT32:
465 flags = AARCH64_RET_UINT32;
466 break;
467 case FFI_TYPE_SINT8:
468 flags = AARCH64_RET_SINT8;
469 break;
470 case FFI_TYPE_SINT16:
471 flags = AARCH64_RET_SINT16;
472 break;
473 case FFI_TYPE_INT:
474 case FFI_TYPE_SINT32:
475 flags = AARCH64_RET_SINT32;
476 break;
477 case FFI_TYPE_SINT64:
478 case FFI_TYPE_UINT64:
479 flags = AARCH64_RET_INT64;
480 break;
481 case FFI_TYPE_POINTER:
482 flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
483 break;
485 case FFI_TYPE_FLOAT:
486 case FFI_TYPE_DOUBLE:
487 case FFI_TYPE_LONGDOUBLE:
488 case FFI_TYPE_STRUCT:
489 case FFI_TYPE_COMPLEX:
490 flags = is_vfp_type (rtype);
491 if (flags == 0)
493 size_t s = rtype->size;
494 if (s > 16)
496 flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
497 bytes += 8;
499 else if (s == 16)
500 flags = AARCH64_RET_INT128;
501 else if (s == 8)
502 flags = AARCH64_RET_INT64;
503 else
504 flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
506 break;
508 default:
509 abort();
512 for (i = 0, n = cif->nargs; i < n; i++)
513 if (is_vfp_type (cif->arg_types[i]))
515 flags |= AARCH64_FLAG_ARG_V;
516 break;
519 /* Round the stack up to a multiple of the stack alignment requirement. */
520 cif->bytes = ALIGN(bytes, 16);
521 cif->flags = flags;
522 #if defined (__APPLE__)
523 cif->aarch64_nfixedargs = 0;
524 #endif
526 return FFI_OK;
529 #if defined (__APPLE__)
530 /* Perform Apple-specific cif processing for variadic calls */
531 ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
532 unsigned int nfixedargs,
533 unsigned int ntotalargs)
535 ffi_status status = ffi_prep_cif_machdep (cif);
536 cif->aarch64_nfixedargs = nfixedargs;
537 return status;
539 #endif /* __APPLE__ */
541 extern void ffi_call_SYSV (struct call_context *context, void *frame,
542 void (*fn)(void), void *rvalue, int flags,
543 void *closure) FFI_HIDDEN;
545 /* Call a function with the provided arguments and capture the return
546 value. */
547 static void
548 ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
549 void **avalue, void *closure)
551 struct call_context *context;
552 void *stack, *frame, *rvalue;
553 struct arg_state state;
554 size_t stack_bytes, rtype_size, rsize;
555 int i, nargs, flags;
556 ffi_type *rtype;
558 flags = cif->flags;
559 rtype = cif->rtype;
560 rtype_size = rtype->size;
561 stack_bytes = cif->bytes;
563 /* If the target function returns a structure via hidden pointer,
564 then we cannot allow a null rvalue. Otherwise, mash a null
565 rvalue to void return type. */
566 rsize = 0;
567 if (flags & AARCH64_RET_IN_MEM)
569 if (orig_rvalue == NULL)
570 rsize = rtype_size;
572 else if (orig_rvalue == NULL)
573 flags &= AARCH64_FLAG_ARG_V;
574 else if (flags & AARCH64_RET_NEED_COPY)
575 rsize = 16;
577 /* Allocate consectutive stack for everything we'll need. */
578 context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
579 stack = context + 1;
580 frame = stack + stack_bytes;
581 rvalue = (rsize ? frame + 32 : orig_rvalue);
583 arg_init (&state);
584 for (i = 0, nargs = cif->nargs; i < nargs; i++)
586 ffi_type *ty = cif->arg_types[i];
587 size_t s = ty->size;
588 void *a = avalue[i];
589 int h, t;
591 t = ty->type;
592 switch (t)
594 case FFI_TYPE_VOID:
595 FFI_ASSERT (0);
596 break;
598 /* If the argument is a basic type the argument is allocated to an
599 appropriate register, or if none are available, to the stack. */
600 case FFI_TYPE_INT:
601 case FFI_TYPE_UINT8:
602 case FFI_TYPE_SINT8:
603 case FFI_TYPE_UINT16:
604 case FFI_TYPE_SINT16:
605 case FFI_TYPE_UINT32:
606 case FFI_TYPE_SINT32:
607 case FFI_TYPE_UINT64:
608 case FFI_TYPE_SINT64:
609 case FFI_TYPE_POINTER:
610 do_pointer:
612 ffi_arg ext = extend_integer_type (a, t);
613 if (state.ngrn < N_X_ARG_REG)
614 context->x[state.ngrn++] = ext;
615 else
617 void *d = allocate_to_stack (&state, stack, ty->alignment, s);
618 state.ngrn = N_X_ARG_REG;
619 /* Note that the default abi extends each argument
620 to a full 64-bit slot, while the iOS abi allocates
621 only enough space. */
622 #ifdef __APPLE__
623 memcpy(d, a, s);
624 #else
625 *(ffi_arg *)d = ext;
626 #endif
629 break;
631 case FFI_TYPE_FLOAT:
632 case FFI_TYPE_DOUBLE:
633 case FFI_TYPE_LONGDOUBLE:
634 case FFI_TYPE_STRUCT:
635 case FFI_TYPE_COMPLEX:
637 void *dest;
639 h = is_vfp_type (ty);
640 if (h)
642 int elems = 4 - (h & 3);
643 if (state.nsrn + elems <= N_V_ARG_REG)
645 dest = &context->v[state.nsrn];
646 state.nsrn += elems;
647 extend_hfa_type (dest, a, h);
648 break;
650 state.nsrn = N_V_ARG_REG;
651 dest = allocate_to_stack (&state, stack, ty->alignment, s);
653 else if (s > 16)
655 /* If the argument is a composite type that is larger than 16
656 bytes, then the argument has been copied to memory, and
657 the argument is replaced by a pointer to the copy. */
658 a = &avalue[i];
659 t = FFI_TYPE_POINTER;
660 goto do_pointer;
662 else
664 size_t n = (s + 7) / 8;
665 if (state.ngrn + n <= N_X_ARG_REG)
667 /* If the argument is a composite type and the size in
668 double-words is not more than the number of available
669 X registers, then the argument is copied into
670 consecutive X registers. */
671 dest = &context->x[state.ngrn];
672 state.ngrn += n;
674 else
676 /* Otherwise, there are insufficient X registers. Further
677 X register allocations are prevented, the NSAA is
678 adjusted and the argument is copied to memory at the
679 adjusted NSAA. */
680 state.ngrn = N_X_ARG_REG;
681 dest = allocate_to_stack (&state, stack, ty->alignment, s);
684 memcpy (dest, a, s);
686 break;
688 default:
689 abort();
692 #if defined (__APPLE__)
693 if (i + 1 == cif->aarch64_nfixedargs)
695 state.ngrn = N_X_ARG_REG;
696 state.nsrn = N_V_ARG_REG;
697 state.allocating_variadic = 1;
699 #endif
702 ffi_call_SYSV (context, frame, fn, rvalue, flags, closure);
704 if (flags & AARCH64_RET_NEED_COPY)
705 memcpy (orig_rvalue, rvalue, rtype_size);
708 void
709 ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
711 ffi_call_int (cif, fn, rvalue, avalue, NULL);
714 #ifdef FFI_GO_CLOSURES
715 void
716 ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
717 void **avalue, void *closure)
719 ffi_call_int (cif, fn, rvalue, avalue, closure);
721 #endif /* FFI_GO_CLOSURES */
723 /* Build a trampoline. */
725 extern void ffi_closure_SYSV (void) FFI_HIDDEN;
726 extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
728 #if FFI_EXEC_TRAMPOLINE_TABLE
730 #include <mach/mach.h>
731 #include <pthread.h>
732 #include <stdio.h>
733 #include <stdlib.h>
735 extern void *ffi_closure_trampoline_table_page;
737 typedef struct ffi_trampoline_table ffi_trampoline_table;
738 typedef struct ffi_trampoline_table_entry ffi_trampoline_table_entry;
740 struct ffi_trampoline_table
742 /* contiguous writable and executable pages */
743 vm_address_t config_page;
744 vm_address_t trampoline_page;
746 /* free list tracking */
747 uint16_t free_count;
748 ffi_trampoline_table_entry *free_list;
749 ffi_trampoline_table_entry *free_list_pool;
751 ffi_trampoline_table *prev;
752 ffi_trampoline_table *next;
755 struct ffi_trampoline_table_entry
757 void *(*trampoline) ();
758 ffi_trampoline_table_entry *next;
761 /* The trampoline configuration is placed a page prior to the trampoline's entry point */
762 #define FFI_TRAMPOLINE_CODELOC_CONFIG(codeloc) ((void **) (((uint8_t *) codeloc) - PAGE_SIZE));
764 /* Total number of trampolines that fit in one trampoline table */
765 #define FFI_TRAMPOLINE_COUNT (PAGE_SIZE / FFI_TRAMPOLINE_SIZE)
767 static pthread_mutex_t ffi_trampoline_lock = PTHREAD_MUTEX_INITIALIZER;
768 static ffi_trampoline_table *ffi_trampoline_tables = NULL;
770 static ffi_trampoline_table *
771 ffi_trampoline_table_alloc ()
773 ffi_trampoline_table *table = NULL;
775 /* Loop until we can allocate two contiguous pages */
776 while (table == NULL)
778 vm_address_t config_page = 0x0;
779 kern_return_t kt;
781 /* Try to allocate two pages */
782 kt =
783 vm_allocate (mach_task_self (), &config_page, PAGE_SIZE * 2,
784 VM_FLAGS_ANYWHERE);
785 if (kt != KERN_SUCCESS)
787 fprintf (stderr, "vm_allocate() failure: %d at %s:%d\n", kt,
788 __FILE__, __LINE__);
789 break;
792 /* Now drop the second half of the allocation to make room for the trampoline table */
793 vm_address_t trampoline_page = config_page + PAGE_SIZE;
794 kt = vm_deallocate (mach_task_self (), trampoline_page, PAGE_SIZE);
795 if (kt != KERN_SUCCESS)
797 fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
798 __FILE__, __LINE__);
799 break;
802 /* Remap the trampoline table to directly follow the config page */
803 vm_prot_t cur_prot;
804 vm_prot_t max_prot;
806 kt =
807 vm_remap (mach_task_self (), &trampoline_page, PAGE_SIZE, 0x0, FALSE,
808 mach_task_self (),
809 (vm_address_t) & ffi_closure_trampoline_table_page, FALSE,
810 &cur_prot, &max_prot, VM_INHERIT_SHARE);
812 /* If we lost access to the destination trampoline page, drop our config allocation mapping and retry */
813 if (kt != KERN_SUCCESS)
815 /* Log unexpected failures */
816 if (kt != KERN_NO_SPACE)
818 fprintf (stderr, "vm_remap() failure: %d at %s:%d\n", kt,
819 __FILE__, __LINE__);
822 vm_deallocate (mach_task_self (), config_page, PAGE_SIZE);
823 continue;
826 /* We have valid trampoline and config pages */
827 table = calloc (1, sizeof (ffi_trampoline_table));
828 table->free_count = FFI_TRAMPOLINE_COUNT;
829 table->config_page = config_page;
830 table->trampoline_page = trampoline_page;
832 /* Create and initialize the free list */
833 table->free_list_pool =
834 calloc (FFI_TRAMPOLINE_COUNT, sizeof (ffi_trampoline_table_entry));
836 uint16_t i;
837 for (i = 0; i < table->free_count; i++)
839 ffi_trampoline_table_entry *entry = &table->free_list_pool[i];
840 entry->trampoline =
841 (void *) (table->trampoline_page + (i * FFI_TRAMPOLINE_SIZE));
843 if (i < table->free_count - 1)
844 entry->next = &table->free_list_pool[i + 1];
847 table->free_list = table->free_list_pool;
850 return table;
853 void *
854 ffi_closure_alloc (size_t size, void **code)
856 /* Create the closure */
857 ffi_closure *closure = malloc (size);
858 if (closure == NULL)
859 return NULL;
861 pthread_mutex_lock (&ffi_trampoline_lock);
863 /* Check for an active trampoline table with available entries. */
864 ffi_trampoline_table *table = ffi_trampoline_tables;
865 if (table == NULL || table->free_list == NULL)
867 table = ffi_trampoline_table_alloc ();
868 if (table == NULL)
870 free (closure);
871 return NULL;
874 /* Insert the new table at the top of the list */
875 table->next = ffi_trampoline_tables;
876 if (table->next != NULL)
877 table->next->prev = table;
879 ffi_trampoline_tables = table;
882 /* Claim the free entry */
883 ffi_trampoline_table_entry *entry = ffi_trampoline_tables->free_list;
884 ffi_trampoline_tables->free_list = entry->next;
885 ffi_trampoline_tables->free_count--;
886 entry->next = NULL;
888 pthread_mutex_unlock (&ffi_trampoline_lock);
890 /* Initialize the return values */
891 *code = entry->trampoline;
892 closure->trampoline_table = table;
893 closure->trampoline_table_entry = entry;
895 return closure;
898 void
899 ffi_closure_free (void *ptr)
901 ffi_closure *closure = ptr;
903 pthread_mutex_lock (&ffi_trampoline_lock);
905 /* Fetch the table and entry references */
906 ffi_trampoline_table *table = closure->trampoline_table;
907 ffi_trampoline_table_entry *entry = closure->trampoline_table_entry;
909 /* Return the entry to the free list */
910 entry->next = table->free_list;
911 table->free_list = entry;
912 table->free_count++;
914 /* If all trampolines within this table are free, and at least one other table exists, deallocate
915 * the table */
916 if (table->free_count == FFI_TRAMPOLINE_COUNT
917 && ffi_trampoline_tables != table)
919 /* Remove from the list */
920 if (table->prev != NULL)
921 table->prev->next = table->next;
923 if (table->next != NULL)
924 table->next->prev = table->prev;
926 /* Deallocate pages */
927 kern_return_t kt;
928 kt = vm_deallocate (mach_task_self (), table->config_page, PAGE_SIZE);
929 if (kt != KERN_SUCCESS)
930 fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
931 __FILE__, __LINE__);
933 kt =
934 vm_deallocate (mach_task_self (), table->trampoline_page, PAGE_SIZE);
935 if (kt != KERN_SUCCESS)
936 fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
937 __FILE__, __LINE__);
939 /* Deallocate free list */
940 free (table->free_list_pool);
941 free (table);
943 else if (ffi_trampoline_tables != table)
945 /* Otherwise, bump this table to the top of the list */
946 table->prev = NULL;
947 table->next = ffi_trampoline_tables;
948 if (ffi_trampoline_tables != NULL)
949 ffi_trampoline_tables->prev = table;
951 ffi_trampoline_tables = table;
954 pthread_mutex_unlock (&ffi_trampoline_lock);
956 /* Free the closure */
957 free (closure);
960 #endif
962 ffi_status
963 ffi_prep_closure_loc (ffi_closure *closure,
964 ffi_cif* cif,
965 void (*fun)(ffi_cif*,void*,void**,void*),
966 void *user_data,
967 void *codeloc)
969 if (cif->abi != FFI_SYSV)
970 return FFI_BAD_ABI;
972 void (*start)(void);
974 if (cif->flags & AARCH64_FLAG_ARG_V)
975 start = ffi_closure_SYSV_V;
976 else
977 start = ffi_closure_SYSV;
979 #if FFI_EXEC_TRAMPOLINE_TABLE
980 void **config = FFI_TRAMPOLINE_CODELOC_CONFIG (codeloc);
981 config[0] = closure;
982 config[1] = start;
983 #else
984 static const unsigned char trampoline[16] = {
985 0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */
986 0xf1, 0xff, 0xff, 0x10, /* adr x17, tramp+0 */
987 0x00, 0x02, 0x1f, 0xd6 /* br x16 */
989 char *tramp = closure->tramp;
991 memcpy (tramp, trampoline, sizeof(trampoline));
993 *(UINT64 *)(tramp + 16) = (uintptr_t)start;
995 ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
996 #endif
998 closure->cif = cif;
999 closure->fun = fun;
1000 closure->user_data = user_data;
1002 return FFI_OK;
1005 #ifdef FFI_GO_CLOSURES
1006 extern void ffi_go_closure_SYSV (void) FFI_HIDDEN;
1007 extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN;
1009 ffi_status
1010 ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif,
1011 void (*fun)(ffi_cif*,void*,void**,void*))
1013 void (*start)(void);
1015 if (cif->abi != FFI_SYSV)
1016 return FFI_BAD_ABI;
1018 if (cif->flags & AARCH64_FLAG_ARG_V)
1019 start = ffi_go_closure_SYSV_V;
1020 else
1021 start = ffi_go_closure_SYSV;
1023 closure->tramp = start;
1024 closure->cif = cif;
1025 closure->fun = fun;
1027 return FFI_OK;
1029 #endif /* FFI_GO_CLOSURES */
1031 /* Primary handler to setup and invoke a function within a closure.
1033 A closure when invoked enters via the assembler wrapper
1034 ffi_closure_SYSV(). The wrapper allocates a call context on the
1035 stack, saves the interesting registers (from the perspective of
1036 the calling convention) into the context then passes control to
1037 ffi_closure_SYSV_inner() passing the saved context and a pointer to
1038 the stack at the point ffi_closure_SYSV() was invoked.
1040 On the return path the assembler wrapper will reload call context
1041 registers.
1043 ffi_closure_SYSV_inner() marshalls the call context into ffi value
1044 descriptors, invokes the wrapped function, then marshalls the return
1045 value back into the call context. */
1047 int FFI_HIDDEN
1048 ffi_closure_SYSV_inner (ffi_cif *cif,
1049 void (*fun)(ffi_cif*,void*,void**,void*),
1050 void *user_data,
1051 struct call_context *context,
1052 void *stack, void *rvalue, void *struct_rvalue)
1054 void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
1055 int i, h, nargs, flags;
1056 struct arg_state state;
1058 arg_init (&state);
1060 for (i = 0, nargs = cif->nargs; i < nargs; i++)
1062 ffi_type *ty = cif->arg_types[i];
1063 int t = ty->type;
1064 size_t n, s = ty->size;
1066 switch (t)
1068 case FFI_TYPE_VOID:
1069 FFI_ASSERT (0);
1070 break;
1072 case FFI_TYPE_INT:
1073 case FFI_TYPE_UINT8:
1074 case FFI_TYPE_SINT8:
1075 case FFI_TYPE_UINT16:
1076 case FFI_TYPE_SINT16:
1077 case FFI_TYPE_UINT32:
1078 case FFI_TYPE_SINT32:
1079 case FFI_TYPE_UINT64:
1080 case FFI_TYPE_SINT64:
1081 case FFI_TYPE_POINTER:
1082 avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s);
1083 break;
1085 case FFI_TYPE_FLOAT:
1086 case FFI_TYPE_DOUBLE:
1087 case FFI_TYPE_LONGDOUBLE:
1088 case FFI_TYPE_STRUCT:
1089 case FFI_TYPE_COMPLEX:
1090 h = is_vfp_type (ty);
1091 if (h)
1093 n = 4 - (h & 3);
1094 if (state.nsrn + n <= N_V_ARG_REG)
1096 void *reg = &context->v[state.nsrn];
1097 state.nsrn += n;
1099 /* Eeek! We need a pointer to the structure, however the
1100 homogeneous float elements are being passed in individual
1101 registers, therefore for float and double the structure
1102 is not represented as a contiguous sequence of bytes in
1103 our saved register context. We don't need the original
1104 contents of the register storage, so we reformat the
1105 structure into the same memory. */
1106 avalue[i] = compress_hfa_type (reg, reg, h);
1108 else
1110 state.nsrn = N_V_ARG_REG;
1111 avalue[i] = allocate_to_stack (&state, stack,
1112 ty->alignment, s);
1115 else if (s > 16)
1117 /* Replace Composite type of size greater than 16 with a
1118 pointer. */
1119 avalue[i] = *(void **)
1120 allocate_int_to_reg_or_stack (context, &state, stack,
1121 sizeof (void *));
1123 else
1125 n = (s + 7) / 8;
1126 if (state.ngrn + n <= N_X_ARG_REG)
1128 avalue[i] = &context->x[state.ngrn];
1129 state.ngrn += n;
1131 else
1133 state.ngrn = N_X_ARG_REG;
1134 avalue[i] = allocate_to_stack (&state, stack,
1135 ty->alignment, s);
1138 break;
1140 default:
1141 abort();
1145 flags = cif->flags;
1146 if (flags & AARCH64_RET_IN_MEM)
1147 rvalue = struct_rvalue;
1149 fun (cif, rvalue, avalue, user_data);
1151 return flags;