2 * Support code for mutithreading.
4 * Copyright: Copyright Mikola Lysenko 2005 - 2012.
5 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
6 * Authors: Mikola Lysenko, Martin Nowak, Kai Nacke
10 * Copyright Mikola Lysenko 2005 - 2012.
11 * Distributed under the Boost Software License, Version 1.0.
12 * (See accompanying file LICENSE_1_0.txt or copy at
13 * http://www.boost.org/LICENSE_1_0.txt)
16 /* NOTE: This file has been patched from the original DMD distribution to
17 * work with the GDC compiler.
19 #if (__linux__ || __FreeBSD__ || __NetBSD__ || __DragonFly__) && __ELF__
21 * Mark the resulting object file as not requiring execution permissions on
22 * stack memory. The absence of this section would mark the whole resulting
23 * library as requiring an executable stack, making it impossible to
24 * dynamically load druntime on several Linux platforms where this is
25 * forbidden due to security policies.
26 * Use %progbits instead of @progbits to support ARM and X86.
28 .section .note.GNU-stack,"",%progbits
31 /* Let preprocessor tell us if C symbols have a prefix: __USER_LABEL_PREFIX__ */
32 #ifdef __USER_LABEL_PREFIX__
33 #define GLUE2(a, b) a ## b
34 #define GLUE(a, b) GLUE2(a, b)
35 #define CSYM(name) GLUE(__USER_LABEL_PREFIX__, name)
37 #define CSYM(name) name
40 /************************************************************************************
42 ************************************************************************************/
43 #if defined( __PPC64__ )
45 #if defined(_CALL_ELF) && _CALL_ELF == 2
51 #define STACK_SZ (LINKAGE_SZ + 26*8)
52 #define OFS_R3_R10 GPR_OFS
53 #define OFS_R14_R31 (GPR_OFS+8*8)
59 #define STACK_SZ (LINKAGE_SZ + 8*8 + 18*8)
60 #define OFS_R3_R10 (STACK_SZ+LINKAGE_SZ)
61 #define OFS_R14_R31 GPR_OFS
65 #if defined( USE_ABI_2 )
68 .globl _D4core6thread18callWithStackShellFNbMDFNbPvZvZv
70 .type _D4core6thread18callWithStackShellFNbMDFNbPvZvZv,@function
71 #if defined( USE_ABI_2 )
72 .section .text._D4core6thread18callWithStackShellFNbMDFNbPvZvZv,"a",@progbits
74 .section .opd,"aw",@progbits
76 _D4core6thread18callWithStackShellFNbMDFNbPvZvZv:
77 #if !defined( USE_ABI_2 )
79 .quad .L._D4core6thread18callWithStackShellFNbMDFNbPvZvZv
87 * r4: pointer to function
89 .L._D4core6thread18callWithStackShellFNbMDFNbPvZvZv:
94 .cfi_def_cfa_offset 256
97 /* Save r14-r31 in general register save area */
98 std 14, (OFS_R14_R31 + 0 * 8)(1)
99 std 15, (OFS_R14_R31 + 1 * 8)(1)
100 std 16, (OFS_R14_R31 + 2 * 8)(1)
101 std 17, (OFS_R14_R31 + 3 * 8)(1)
102 std 18, (OFS_R14_R31 + 4 * 8)(1)
103 std 19, (OFS_R14_R31 + 5 * 8)(1)
104 std 20, (OFS_R14_R31 + 6 * 8)(1)
105 std 21, (OFS_R14_R31 + 7 * 8)(1)
106 std 22, (OFS_R14_R31 + 8 * 8)(1)
107 std 23, (OFS_R14_R31 + 9 * 8)(1)
108 std 24, (OFS_R14_R31 + 10 * 8)(1)
109 std 25, (OFS_R14_R31 + 11 * 8)(1)
110 std 26, (OFS_R14_R31 + 12 * 8)(1)
111 std 27, (OFS_R14_R31 + 13 * 8)(1)
112 std 28, (OFS_R14_R31 + 14 * 8)(1)
113 std 29, (OFS_R14_R31 + 15 * 8)(1)
114 std 30, (OFS_R14_R31 + 16 * 8)(1)
115 std 31, (OFS_R14_R31 + 17 * 8)(1)
117 /* Save r3-r10 in parameter save area of caller */
118 std 3, (OFS_R3_R10 + 0 * 8)(1)
119 std 4, (OFS_R3_R10 + 1 * 8)(1)
120 std 5, (OFS_R3_R10 + 2 * 8)(1)
121 std 6, (OFS_R3_R10 + 3 * 8)(1)
122 std 7, (OFS_R3_R10 + 4 * 8)(1)
123 std 8, (OFS_R3_R10 + 5 * 8)(1)
124 std 9, (OFS_R3_R10 + 6 * 8)(1)
125 std 10, (OFS_R3_R10 + 7 * 8)(1)
127 /* Save r2 in TOC save area */
130 /* Do not save r11, r12 and r13. */
133 * r3: pointer to context
134 * r4: pointer to stack
145 /* Restore r2 from TOC save area */
148 /* Restore r3-r10 from local variable space */
149 ld 3, (OFS_R3_R10 + 0 * 8)(1)
150 ld 4, (OFS_R3_R10 + 1 * 8)(1)
151 ld 5, (OFS_R3_R10 + 2 * 8)(1)
152 ld 6, (OFS_R3_R10 + 3 * 8)(1)
153 ld 7, (OFS_R3_R10 + 4 * 8)(1)
154 ld 8, (OFS_R3_R10 + 5 * 8)(1)
155 ld 9, (OFS_R3_R10 + 6 * 8)(1)
156 ld 10, (OFS_R3_R10 + 7 * 8)(1)
158 /* Restore r14-r31 from general register save area */
159 ld 14, (OFS_R14_R31 + 0 * 8)(1)
160 ld 15, (OFS_R14_R31 + 1 * 8)(1)
161 ld 16, (OFS_R14_R31 + 2 * 8)(1)
162 ld 17, (OFS_R14_R31 + 3 * 8)(1)
163 ld 18, (OFS_R14_R31 + 4 * 8)(1)
164 ld 19, (OFS_R14_R31 + 5 * 8)(1)
165 ld 20, (OFS_R14_R31 + 6 * 8)(1)
166 ld 21, (OFS_R14_R31 + 7 * 8)(1)
167 ld 22, (OFS_R14_R31 + 8 * 8)(1)
168 ld 23, (OFS_R14_R31 + 9 * 8)(1)
169 ld 24, (OFS_R14_R31 + 10 * 8)(1)
170 ld 25, (OFS_R14_R31 + 11 * 8)(1)
171 ld 26, (OFS_R14_R31 + 12 * 8)(1)
172 ld 27, (OFS_R14_R31 + 13 * 8)(1)
173 ld 28, (OFS_R14_R31 + 14 * 8)(1)
174 ld 29, (OFS_R14_R31 + 15 * 8)(1)
175 ld 30, (OFS_R14_R31 + 16 * 8)(1)
176 ld 31, (OFS_R14_R31 + 17 * 8)(1)
185 .size _D4core6thread18callWithStackShellFNbMDFNbPvZvZv, .Lend-.L._D4core6thread18callWithStackShellFNbMDFNbPvZvZv
188 #elif defined( __ppc__ ) || defined( __PPC__ ) || defined( __powerpc__ )
192 * Performs a context switch.
194 * r3 - old context pointer
195 * r4 - new context pointer
200 .globl _fiber_switchContext
201 _fiber_switchContext:
203 /* Save linkage area */
229 stwu 31, (-20 * 4)(1)
231 /* We update the stack pointer here, since we do not want the GC to
232 scan the floating point registers. */
244 stfd 23, (-10 * 8)(1)
245 stfd 24, (-11 * 8)(1)
246 stfd 25, (-12 * 8)(1)
247 stfd 26, (-13 * 8)(1)
248 stfd 27, (-14 * 8)(1)
249 stfd 28, (-15 * 8)(1)
250 stfd 29, (-16 * 8)(1)
251 stfd 30, (-17 * 8)(1)
252 stfd 31, (-18 * 8)(1)
254 /* Update the old stack pointer */
257 /* Set new stack pointer */
260 /* Restore linkage area */
307 /* Set condition and link register */
311 /* Return and switch context */
314 #elif defined(__mips__) && _MIPS_SIM == _ABIO32
315 /************************************************************************************
317 ************************************************************************************/
320 * Performs a context switch.
322 * $a0 - void** - ptr to old stack pointer
323 * $a1 - void* - new stack pointer
327 .globl fiber_switchContext
329 addiu $sp, $sp, -(10 * 4)
331 // fp regs and return address are stored below the stack
332 // because we don't want the GC to scan them.
334 #ifdef __mips_hard_float
335 #define ALIGN8(val) (val + (-val & 7))
336 #define BELOW (ALIGN8(6 * 8 + 4))
337 sdc1 $f20, (0 * 8 - BELOW)($sp)
338 sdc1 $f22, (1 * 8 - BELOW)($sp)
339 sdc1 $f24, (2 * 8 - BELOW)($sp)
340 sdc1 $f26, (3 * 8 - BELOW)($sp)
341 sdc1 $f28, (4 * 8 - BELOW)($sp)
342 sdc1 $f30, (5 * 8 - BELOW)($sp)
357 // swap stack pointer
361 #ifdef __mips_hard_float
362 ldc1 $f20, (0 * 8 - BELOW)($sp)
363 ldc1 $f22, (1 * 8 - BELOW)($sp)
364 ldc1 $f24, (2 * 8 - BELOW)($sp)
365 ldc1 $f26, (3 * 8 - BELOW)($sp)
366 ldc1 $f28, (4 * 8 - BELOW)($sp)
367 ldc1 $f30, (5 * 8 - BELOW)($sp)
382 addiu $sp, $sp, (10 * 4)
386 #elif defined(__arm__) && defined(__ARM_EABI__)
387 /************************************************************************************
389 ************************************************************************************/
392 * Performs a context switch.
395 * r0 - void** - ptr to old stack pointer
396 * r1 - void* - new stack pointer
398 * ARM EABI registers:
399 * r0-r3 : argument/scratch registers
400 * r4-r10 : callee-save registers
401 * r11 : frame pointer (or a callee save register if fp isn't needed)
402 * r12 =ip : inter procedure register. We can treat it like any other scratch register
403 * r13 =sp : stack pointer
404 * r14 =lr : link register, it contains the return address (belonging to the function which called us)
405 * r15 =pc : program counter
407 * For floating point registers:
408 * According to AAPCS (version 2.09, section 5.1.2) only the d8-d15 registers need to be preserved
409 * across method calls. This applies to all ARM FPU variants, whether they have 16 or 32 double registers
410 * NEON support or not, half-float support or not and so on does not matter.
412 * Note: If this file was compiled with -mfloat-abi=soft but the code runs on a softfp system with fpu the d8-d15
413 * registers won't be saved (we do not know that the system has got a fpu in that case) but the registers might actually
414 * be used by other code if it was compiled with -mfloat-abi=softfp.
416 * Interworking is only supported on ARMv5+, not on ARM v4T as ARM v4t requires special stubs when changing
417 * from thumb to arm mode or the other way round.
422 .global fiber_switchContext
423 #if defined(__ARM_PCS_VFP) || (defined(__ARM_PCS) && !defined(__SOFTFP__)) // ARM_HardFloat || ARM_SoftFP
426 .type fiber_switchContext, %function
430 // update the oldp pointer. Link register and floating point registers stored later to prevent the GC from
433 // push r0 (or any other register) as well to keep stack 8byte aligned
436 #if defined(__ARM_PCS_VFP) || (defined(__ARM_PCS) && !defined(__SOFTFP__)) // ARM_HardFloat || ARM_SoftFP
438 // now switch over to the new stack. Need to subtract (8*8[d8-d15]+2*4[r0, lr]) to position stack pointer
439 // below the last saved register. Remember we saved the SP before pushing [r0, lr, d8-d15]
446 // we don't really care about r0, we only used that for padding.
447 // r1 is now what used to be in the link register when saving.
450 * The link register for the initial jump to fiber_entryPoint must be zero: The jump actually
451 * looks like a normal method call as we jump to the start of the fiber_entryPoint function.
452 * Although fiber_entryPoint never returns and therefore never accesses lr, it saves lr to the stack.
453 * ARM unwinding will then look at the stack, find lr and think that fiber_entryPoint was called by
454 * the function in lr! So if we have some address in lr the unwinder will try to continue stack unwinding,
455 * although it's already at the stack base and crash.
456 * In all other cases the content of lr doesn't matter.
457 * Note: If we simply loaded into lr above and then moved lr into pc, the initial method call
458 * to fiber_entryPoint would look as if it was called from fiber_entryPoint itself, as the fiber_entryPoint
459 * address is in lr on the initial context switch.
462 // return by writing lr into pc
466 #elif defined(__aarch64__)
467 /************************************************************************************
468 * AArch64 (arm64) ASM BITS
469 ************************************************************************************/
471 * preserve/restore AAPCS64 registers
472 * x19-x28 5.1.1 64-bit callee saved
473 * x29 fp, or possibly callee saved reg - depends on platform choice 5.2.3)
475 * d8-d15 5.1.2 says callee only must save bottom 64-bits (the "d" regs)
477 * saved regs on stack will look like:
482 * 9: x29 (fp) <-- oldp / *newp save stack top
489 .global CSYM(fiber_switchContext)
490 .type fiber_switchContext, %function
492 CSYM(fiber_switchContext):
493 stp d15, d14, [sp, #-20*8]!
494 stp d13, d12, [sp, #2*8]
495 stp d11, d10, [sp, #4*8]
496 stp d9, d8, [sp, #6*8]
497 stp x30, x29, [sp, #8*8] // lr, fp
498 stp x28, x27, [sp, #10*8]
499 stp x26, x25, [sp, #12*8]
500 stp x24, x23, [sp, #14*8]
501 stp x22, x21, [sp, #16*8]
502 stp x20, x19, [sp, #18*8]
504 // oldp is set above saved lr (x30) to hide it and float regs
507 str x19, [x0] // *oldp tstack
508 sub sp, x1, #9*8 // switch to newp sp
510 ldp x20, x19, [sp, #18*8]
511 ldp x22, x21, [sp, #16*8]
512 ldp x24, x23, [sp, #14*8]
513 ldp x26, x25, [sp, #12*8]
514 ldp x28, x27, [sp, #10*8]
515 ldp x30, x29, [sp, #8*8] // lr, fp
516 ldp d9, d8, [sp, #6*8]
517 ldp d11, d10, [sp, #4*8]
518 ldp d13, d12, [sp, #2*8]
519 ldp d15, d14, [sp], #20*8
524 * When generating any kind of backtrace (gdb, exception handling) for
525 * a function called in a Fiber, we need to tell the unwinder to stop
526 * at our Fiber main entry point, i.e. we need to mark the bottom of
527 * the call stack. This can be done by clearing the link register lr
528 * prior to calling fiber_entryPoint (i.e. in fiber_switchContext) or
529 * using a .cfi_undefined directive for the link register in the
530 * Fiber entry point. cfi_undefined seems to yield better results in gdb.
531 * Unfortunately we can't place it into fiber_entryPoint using inline
532 * asm, so we use this trampoline instead.
535 .global CSYM(fiber_trampoline)
537 .type fiber_trampoline, %function
538 CSYM(fiber_trampoline):
541 // fiber_entryPoint never returns
545 #elif defined(__MINGW32__)
546 /************************************************************************************
548 ************************************************************************************/
549 #if defined(__x86_64__)
550 .global fiber_switchContext
565 // load newp to begin context switch
568 // load saved state from new stack
579 // 'return' to complete switch
583 .global _fiber_switchContext
584 _fiber_switchContext:
585 // Save current stack state.save current stack state
586 // Standard CDECL prologue.
597 // store oldp again with more accurate address
600 // load newp to begin context switch
603 // load saved state from new stack
613 // 'return' to complete switch
617 // if POSIX boils down to this (reference http://nadeausoftware.com)
618 #elif !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)))
619 /************************************************************************************
620 * i386- and x86_64-apple-darwin POSIX ASM BITS
621 ************************************************************************************/
622 #if defined(__i386__)
625 .globl CSYM(fiber_switchContext)
626 CSYM(fiber_switchContext):
627 // save current stack state
635 // store oldp again with more accurate address
638 // load newp to begin context switch
641 // load saved state from new stack
648 // 'return' to complete switch
651 #elif defined(__x86_64__) && !defined(__ILP32__)
654 .globl CSYM(fiber_switchContext)
655 CSYM(fiber_switchContext):
656 // Save current stack state.save current stack state
665 // store oldp again with more accurate address
667 // load newp to begin context switch
670 // load saved state from new stack
678 // 'return' to complete switch
680 #endif // __x86_64__ && !__ILP32__