1 # x86/x86_64 support for -fsplit-stack.
2 # Copyright (C) 2009-2024 Free Software Foundation, Inc.
3 # Contributed by Ian Lance Taylor <iant@google.com>.
5 # This file is part of GCC.
7 # GCC is free software; you can redistribute it and/or modify it under
8 # the terms of the GNU General Public License as published by the Free
9 # Software Foundation; either version 3, or (at your option) any later
12 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 # Under Section 7 of GPL version 3, you are granted additional
18 # permissions described in the GCC Runtime Library Exception, version
19 # 3.1, as published by the Free Software Foundation.
21 # You should have received a copy of the GNU General Public License and
22 # a copy of the GCC Runtime Library Exception along with this program;
23 # see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 # <http://www.gnu.org/licenses/>.
26 #include "auto-host.h"
28 # Support for allocating more stack space when using -fsplit-stack.
29 # When a function discovers that it needs more stack space, it will
30 # call __morestack with the size of the stack frame and the size of
31 # the parameters to copy from the old stack frame to the new one.
32 # The __morestack function preserves the parameter registers and
33 # calls __generic_morestack to actually allocate the stack space.
35 # When this is called stack space is very low, but we ensure that
36 # there is enough space to push the parameter registers and to call
37 # __generic_morestack.
39 # When calling __generic_morestack, FRAME_SIZE points to the size of
40 # the desired frame when the function is called, and the function
41 # sets it to the size of the allocated stack. OLD_STACK points to
42 # the parameters on the old stack and PARAM_SIZE is the number of
43 # bytes of parameters to copy to the new stack. These are the
44 # parameters of the function that called __morestack. The
45 # __generic_morestack function returns the new stack pointer,
46 # pointing to the address of the first copied parameter. The return
47 # value minus the returned *FRAME_SIZE will be the first address on
48 # the stack which we should not use.
50 # void *__generic_morestack (size_t *frame_size, void *old_stack,
53 # The __morestack routine has to arrange for the caller to return to a
54 # stub on the new stack. The stub is responsible for restoring the
55 # old stack pointer and returning to the caller's caller. This calls
56 # __generic_releasestack to retrieve the old stack pointer and release
57 # the newly allocated stack.
59 # void *__generic_releasestack (size_t *available);
61 # We do a little dance so that the processor's call/return return
62 # address prediction works out. The compiler arranges for the caller
64 # call __generic_morestack
67 # // carry on with function
68 # After we allocate more stack, we call L, which is in our caller.
69 # When that returns (to the predicted instruction), we release the
70 # stack segment and reset the stack pointer. We then return to the
71 # predicted instruction, namely the ret instruction immediately after
72 # the call to __generic_morestack. That then returns to the caller of
73 # the original caller.
76 # The amount of extra space we ask for. In general this has to be
77 # enough for the dynamic loader to find a symbol and for a signal
81 #define BACKOFF (1024)
83 #define BACKOFF (3584)
87 # The amount of space we ask for when calling non-split-stack code.
88 #define NON_SPLIT_STACK 0x100000
90 # This entry point is for split-stack code which calls non-split-stack
91 # code. When the linker sees this case, it converts the call to
92 # __morestack to call __morestack_non_split instead. We just bump the
93 # requested stack space by 16K.
97 .global __morestack_non_split
98 .hidden __morestack_non_split
101 .type __morestack_non_split,@function
104 __morestack_non_split:
109 # See below for an extended explanation of this.
112 pushl %eax # Save %eax in case it is a parameter.
114 .cfi_adjust_cfa_offset 4 # Account for pushed register.
116 movl %esp,%eax # Current stack,
117 subl 8(%esp),%eax # less required stack frame size,
118 subl $NON_SPLIT_STACK,%eax # less space for non-split code.
119 cmpl %gs:0x30,%eax # See if we have enough space.
120 jb 2f # Get more space if we need it.
123 # %esp + 20: stack pointer after two returns
124 # %esp + 16: return address of morestack caller's caller
125 # %esp + 12: size of parameters
126 # %esp + 8: new stack frame size
127 # %esp + 4: return address of this function
130 # Since we aren't doing a full split stack, we don't need to
131 # do anything when our caller returns. So we return to our
132 # caller rather than calling it, and let it return as usual.
133 # To make that work we adjust the return address.
135 # This breaks call/return address prediction for the call to
136 # this function. I can't figure out a way to make it work
137 # short of copying the parameters down the stack, which will
138 # probably take more clock cycles than we will lose breaking
139 # call/return address prediction. We will only break
140 # prediction for this call, not for our caller.
142 movl 4(%esp),%eax # Increment the return address
143 cmpb $0xc3,(%eax) # to skip the ret instruction;
148 # If the instruction that we return to is
149 # leal 20(%ebp),{%eax,%ecx,%edx}
150 # then we have been called by a varargs function that expects
151 # %ebp to hold a real value. That can only work if we do the
152 # full stack split routine. FIXME: This is fragile.
165 movl %eax,4(%esp) # Update return address.
167 popl %eax # Restore %eax and stack.
169 .cfi_adjust_cfa_offset -4 # Account for popped register.
171 ret $8 # Return to caller, popping args.
174 .cfi_adjust_cfa_offset 4 # Back to where we were.
176 popl %eax # Restore %eax and stack.
178 .cfi_adjust_cfa_offset -4 # Account for popped register.
180 # Increment space we request.
181 addl $NON_SPLIT_STACK+0x1000+BACKOFF,4(%esp)
183 # Fall through into morestack.
187 # See below for an extended explanation of this.
190 pushq %rax # Save %rax in case caller is using
191 # it to preserve original %r10.
192 .cfi_adjust_cfa_offset 8 # Adjust for pushed register.
194 movq %rsp,%rax # Current stack,
195 subq %r10,%rax # less required stack frame size,
196 subq $NON_SPLIT_STACK,%rax # less space for non-split code.
199 cmpq %fs:0x70,%rax # See if we have enough space.
204 jb 2f # Get more space if we need it.
206 # If the instruction that we return to is
207 # leaq 24(%rbp), %r11n
208 # then we have been called by a varargs function that expects
209 # %ebp to hold a real value. That can only work if we do the
210 # full stack split routine. FIXME: This is fragile.
212 incq %rax # Skip ret instruction in caller.
213 cmpl $0x185d8d4c,(%rax)
216 # This breaks call/return prediction, as described above.
217 incq 8(%rsp) # Increment the return address.
219 popq %rax # Restore register.
221 .cfi_adjust_cfa_offset -8 # Adjust for popped register.
223 ret # Return to caller.
226 popq %rax # Restore register.
228 .cfi_adjust_cfa_offset -8 # Adjust for popped register.
230 # Increment space we request.
231 addq $NON_SPLIT_STACK+0x1000+BACKOFF,%r10
233 # Fall through into morestack.
239 .size __morestack_non_split, . - __morestack_non_split
242 # __morestack_non_split falls through into __morestack.
245 # The __morestack function.
251 .type __morestack,@function
262 # The 32-bit __morestack function.
264 # We use a cleanup to restore the stack guard if an exception
265 # is thrown through this code.
267 .cfi_personality 0,__gcc_personality_v0
270 .cfi_personality 0x9b,DW.ref.__gcc_personality_v0
271 .cfi_lsda 0x1b,.LLSDA1
274 # We return below with a ret $8. We will return to a single
275 # return instruction, which will return to the caller of our
276 # caller. We let the unwinder skip that single return
277 # instruction, and just return to the real caller.
279 # Here CFA points just past the return address on the stack,
280 # e.g., on function entry it is %esp + 4. The stack looks
282 # CFA + 12: stack pointer after two returns
283 # CFA + 8: return address of morestack caller's caller
284 # CFA + 4: size of parameters
285 # CFA: new stack frame size
286 # CFA - 4: return address of this function
287 # CFA - 8: previous value of %ebp; %ebp points here
288 # Setting the new CFA to be the current CFA + 12 (i.e., %esp +
289 # 16) will make the unwinder pick up the right return address.
294 .cfi_adjust_cfa_offset 4
295 .cfi_offset %ebp, -20
297 .cfi_def_cfa_register %ebp
299 # In 32-bit mode the parameters are pushed on the stack. The
300 # argument size is pushed then the new stack frame size is
303 # In the body of a non-leaf function, the stack pointer will
304 # be aligned to a 16-byte boundary. That is CFA + 12 in the
305 # stack picture above: (CFA + 12) % 16 == 0. At this point we
306 # have %esp == CFA - 8, so %esp % 16 == 12. We need some
307 # space for saving registers and passing parameters, and we
308 # need to wind up with %esp % 16 == 0.
311 # Because our cleanup code may need to clobber %ebx, we need
312 # to save it here so the unwinder can restore the value used
313 # by the caller. Note that we don't have to restore the
314 # register, since we don't change it, we just have to save it
317 .cfi_offset %ebx, -24
319 # In 32-bit mode the registers %eax, %edx, and %ecx may be
320 # used for parameters, depending on the regparm and fastcall
327 call __morestack_block_signals
329 movl 12(%ebp),%eax # The size of the parameters.
331 leal 20(%ebp),%eax # Address of caller's parameters.
333 addl $BACKOFF,8(%ebp) # Ask for backoff bytes.
334 leal 8(%ebp),%eax # The address of the new frame size.
337 call __generic_morestack
339 movl %eax,%esp # Switch to the new stack.
340 subl 8(%ebp),%eax # The end of the stack space.
341 addl $BACKOFF,%eax # Back off 512 bytes.
344 # FIXME: The offset must match
345 # TARGET_THREAD_SPLIT_STACK_OFFSET in
346 # gcc/config/i386/linux.h.
347 movl %eax,%gs:0x30 # Save the new stack boundary.
349 call __morestack_unblock_signals
351 movl -12(%ebp),%edx # Restore registers.
354 movl 4(%ebp),%eax # Increment the return address
355 cmpb $0xc3,(%eax) # to skip the ret instruction;
360 movl %eax,-12(%ebp) # Store return address in an
363 movl -8(%ebp),%eax # Restore the last register.
365 call *-12(%ebp) # Call our caller!
367 # The caller will return here, as predicted.
369 # Save the registers which may hold a return value. We
370 # assume that __generic_releasestack does not touch any
371 # floating point or vector registers.
375 # Push the arguments to __generic_releasestack now so that the
376 # stack is at a 16-byte boundary for
377 # __morestack_block_signals.
378 pushl $0 # Where the available space is returned.
379 leal 0(%esp),%eax # Push its address.
382 call __morestack_block_signals
384 call __generic_releasestack
386 subl 4(%esp),%eax # Subtract available space.
387 addl $BACKOFF,%eax # Back off 512 bytes.
389 movl %eax,%gs:0x30 # Save the new stack boundary.
391 addl $8,%esp # Remove values from stack.
393 # We need to restore the old stack pointer, which is in %rbp,
394 # before we unblock signals. We also need to restore %eax and
395 # %edx after we unblock signals but before we return. Do this
396 # by moving %eax and %edx from the current stack to the old
399 popl %edx # Pop return value from current stack.
402 movl %ebp,%esp # Restore stack pointer.
404 # As before, we now have %esp % 16 == 12.
406 pushl %eax # Push return value on old stack.
408 subl $4,%esp # Align stack to 16-byte boundary.
410 call __morestack_unblock_signals
413 popl %edx # Restore return value.
418 # We never changed %ebx, so we don't have to actually restore it.
423 .cfi_def_cfa %esp, 16
424 ret $8 # Return to caller, which will
425 # immediately return. Pop
426 # arguments as we go.
428 # This is the cleanup code called by the stack unwinder when unwinding
429 # through the code between .LEHB0 and .LEHE0 above.
433 subl $16,%esp # Maintain 16 byte alignment.
434 movl %eax,4(%esp) # Save exception header.
435 movl %ebp,(%esp) # Stack pointer after resume.
436 call __generic_findstack
437 movl %ebp,%ecx # Get the stack pointer.
438 subl %eax,%ecx # Subtract available space.
439 addl $BACKOFF,%ecx # Back off 512 bytes.
440 movl %ecx,%gs:0x30 # Save new stack boundary.
441 movl 4(%esp),%eax # Function argument.
444 call __x86.get_pc_thunk.bx # %ebx may not be set up for us.
445 addl $_GLOBAL_OFFSET_TABLE_, %ebx
446 call _Unwind_Resume@PLT # Resume unwinding.
451 #else /* defined(__x86_64__) */
454 # The 64-bit __morestack function.
456 # We use a cleanup to restore the stack guard if an exception
457 # is thrown through this code.
459 .cfi_personality 0x3,__gcc_personality_v0
460 .cfi_lsda 0x3,.LLSDA1
462 .cfi_personality 0x9b,DW.ref.__gcc_personality_v0
463 .cfi_lsda 0x1b,.LLSDA1
466 # We will return a single return instruction, which will
467 # return to the caller of our caller. Let the unwinder skip
468 # that single return instruction, and just return to the real
472 # Set up a normal backtrace.
474 .cfi_adjust_cfa_offset 8
475 .cfi_offset %rbp, -24
477 .cfi_def_cfa_register %rbp
479 # In 64-bit mode the new stack frame size is passed in r10
480 # and the argument size is passed in r11.
482 addq $BACKOFF,%r10 # Ask for backoff bytes.
483 pushq %r10 # Save new frame size.
485 # In 64-bit mode the registers %rdi, %rsi, %rdx, %rcx, %r8,
486 # and %r9 may be used for parameters. We also preserve %rax
487 # which the caller may use to hold %r10.
499 # We entered morestack with the stack pointer aligned to a
500 # 16-byte boundary (the call to morestack's caller used 8
501 # bytes, and the call to morestack used 8 bytes). We have now
502 # pushed 10 registers, so we are still aligned to a 16-byte
505 call __morestack_block_signals
507 leaq -8(%rbp),%rdi # Address of new frame size.
508 leaq 24(%rbp),%rsi # The caller's parameters.
509 popq %rdx # The size of the parameters.
511 subq $8,%rsp # Align stack.
513 call __generic_morestack
515 movq -8(%rbp),%r10 # Reload modified frame size
516 movq %rax,%rsp # Switch to the new stack.
517 subq %r10,%rax # The end of the stack space.
518 addq $BACKOFF,%rax # Back off 1024 bytes.
521 # FIXME: The offset must match
522 # TARGET_THREAD_SPLIT_STACK_OFFSET in
523 # gcc/config/i386/linux64.h.
524 # Macro to save the new stack boundary.
526 #define X86_64_SAVE_NEW_STACK_BOUNDARY(reg) movq %r##reg,%fs:0x70
528 #define X86_64_SAVE_NEW_STACK_BOUNDARY(reg) movl %e##reg,%fs:0x40
530 X86_64_SAVE_NEW_STACK_BOUNDARY (ax)
532 call __morestack_unblock_signals
534 movq -24(%rbp),%rdi # Restore registers.
541 movq 8(%rbp),%r10 # Increment the return address
542 incq %r10 # to skip the ret instruction;
545 movq -16(%rbp),%rax # Restore caller's %rax.
547 call *%r10 # Call our caller!
549 # The caller will return here, as predicted.
551 # Save the registers which may hold a return value. We
552 # assume that __generic_releasestack does not touch any
553 # floating point or vector registers.
557 call __morestack_block_signals
559 pushq $0 # For alignment.
560 pushq $0 # Where the available space is returned.
561 leaq 0(%rsp),%rdi # Pass its address.
563 call __generic_releasestack
565 subq 0(%rsp),%rax # Subtract available space.
566 addq $BACKOFF,%rax # Back off 1024 bytes.
568 X86_64_SAVE_NEW_STACK_BOUNDARY (ax)
570 addq $16,%rsp # Remove values from stack.
572 # We need to restore the old stack pointer, which is in %rbp,
573 # before we unblock signals. We also need to restore %rax and
574 # %rdx after we unblock signals but before we return. Do this
575 # by moving %rax and %rdx from the current stack to the old
578 popq %rdx # Pop return value from current stack.
581 movq %rbp,%rsp # Restore stack pointer.
583 # Now (%rsp & 16) == 8.
585 subq $8,%rsp # For alignment.
586 pushq %rax # Push return value on old stack.
589 call __morestack_unblock_signals
591 popq %rdx # Restore return value.
598 .cfi_def_cfa %rsp, 16
599 ret # Return to caller, which will
600 # immediately return.
602 # This is the cleanup code called by the stack unwinder when unwinding
603 # through the code between .LEHB0 and .LEHE0 above.
607 subq $16,%rsp # Maintain 16 byte alignment.
608 movq %rax,(%rsp) # Save exception header.
609 movq %rbp,%rdi # Stack pointer after resume.
610 call __generic_findstack
611 movq %rbp,%rcx # Get the stack pointer.
612 subq %rax,%rcx # Subtract available space.
613 addq $BACKOFF,%rcx # Back off 1024 bytes.
614 X86_64_SAVE_NEW_STACK_BOUNDARY (cx)
615 movq (%rsp),%rdi # Restore exception data for call.
617 call _Unwind_Resume@PLT # Resume unwinding.
619 call _Unwind_Resume # Resume unwinding.
622 #endif /* defined(__x86_64__) */
626 .size __morestack, . - __morestack
629 #if !defined(__x86_64__) && defined(__PIC__)
630 # Output the thunk to get PC into bx, since we use it above.
631 .section .text.__x86.get_pc_thunk.bx,"axG",@progbits,__x86.get_pc_thunk.bx,comdat
632 .globl __x86.get_pc_thunk.bx
633 .hidden __x86.get_pc_thunk.bx
635 .type __x86.get_pc_thunk.bx, @function
637 __x86.get_pc_thunk.bx:
643 .size __x86.get_pc_thunk.bx, . - __x86.get_pc_thunk.bx
647 # The exception table. This tells the personality routine to execute
648 # the exception handler.
650 .section .gcc_except_table,"a",@progbits
653 .byte 0xff # @LPStart format (omit)
654 .byte 0xff # @TType format (omit)
655 .byte 0x1 # call-site format (uleb128)
656 .uleb128 .LLSDACSE1-.LLSDACSB1 # Call-site table length
658 .uleb128 .LEHB0-.LFB1 # region 0 start
659 .uleb128 .LEHE0-.LEHB0 # length
660 .uleb128 .L1-.LFB1 # landing pad
665 .global __gcc_personality_v0
667 # Build a position independent reference to the basic
668 # personality function.
669 .hidden DW.ref.__gcc_personality_v0
670 .weak DW.ref.__gcc_personality_v0
671 .section .data.DW.ref.__gcc_personality_v0,"awG",@progbits,DW.ref.__gcc_personality_v0,comdat
672 .type DW.ref.__gcc_personality_v0, @object
673 DW.ref.__gcc_personality_v0:
676 .size DW.ref.__gcc_personality_v0, 4
677 .long __gcc_personality_v0
680 .size DW.ref.__gcc_personality_v0, 8
681 .quad __gcc_personality_v0
685 #if defined __x86_64__ && defined __LP64__
687 # This entry point is used for the large model. With this entry point
688 # the upper 32 bits of %r10 hold the argument size and the lower 32
689 # bits hold the new stack frame size. There doesn't seem to be a way
690 # to know in the assembler code that we are assembling for the large
691 # model, and there doesn't seem to be a large model multilib anyhow.
692 # If one is developed, then the non-PIC code is probably OK since we
693 # will probably be close to the morestack code, but the PIC code
694 # almost certainly needs to be changed. FIXME.
697 .global __morestack_large_model
698 .hidden __morestack_large_model
701 .type __morestack_large_model,@function
704 __morestack_large_model:
710 andl $0xffffffff, %r10d
716 .size __morestack_large_model, . - __morestack_large_model
719 #endif /* __x86_64__ && __LP64__ */
721 # Initialize the stack test value when the program starts or when a
722 # new thread starts. We don't know how large the main stack is, so we
723 # guess conservatively. We might be able to use getrlimit here.
726 .global __stack_split_initialize
727 .hidden __stack_split_initialize
730 .type __stack_split_initialize, @function
733 __stack_split_initialize:
738 leal -16000(%esp),%eax # We should have at least 16K.
740 subl $4,%esp # Align stack.
744 call __generic_morestack_set_initial_sp@PLT
746 call __generic_morestack_set_initial_sp
751 #else /* defined(__x86_64__) */
753 leaq -16000(%rsp),%rax # We should have at least 16K.
754 X86_64_SAVE_NEW_STACK_BOUNDARY (ax)
755 subq $8,%rsp # Align stack.
759 call __generic_morestack_set_initial_sp@PLT
761 call __generic_morestack_set_initial_sp
766 #endif /* defined(__x86_64__) */
769 .size __stack_split_initialize, . - __stack_split_initialize
772 # Routines to get and set the guard, for __splitstack_getcontext,
773 # __splitstack_setcontext, and __splitstack_makecontext.
775 # void *__morestack_get_guard (void) returns the current stack guard.
777 .global __morestack_get_guard
778 .hidden __morestack_get_guard
781 .type __morestack_get_guard,@function
784 __morestack_get_guard:
798 .size __morestack_get_guard, . - __morestack_get_guard
801 # void __morestack_set_guard (void *) sets the stack guard.
802 .global __morestack_set_guard
803 .hidden __morestack_set_guard
806 .type __morestack_set_guard,@function
809 __morestack_set_guard:
815 X86_64_SAVE_NEW_STACK_BOUNDARY (di)
820 .size __morestack_set_guard, . - __morestack_set_guard
823 # void *__morestack_make_guard (void *, size_t) returns the stack
824 # guard value for a stack.
825 .global __morestack_make_guard
826 .hidden __morestack_make_guard
829 .type __morestack_make_guard,@function
832 __morestack_make_guard:
846 .size __morestack_make_guard, . - __morestack_make_guard
849 # Make __stack_split_initialize a high priority constructor. FIXME:
850 # This is ELF specific.
852 #if HAVE_INITFINI_ARRAY_SUPPORT
853 .section .init_array.00000,"aw",@init_array
855 .section .ctors.65535,"aw",@progbits
860 .long __stack_split_initialize
861 .long __morestack_load_mmap
864 .quad __stack_split_initialize
865 .quad __morestack_load_mmap
869 .section .note.GNU-stack,"",@progbits
870 .section .note.GNU-split-stack,"",@progbits
871 .section .note.GNU-no-split-stack,"",@progbits