1 /* PLT trampolines. x86-64 version.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
21 #include <link-defines.h>
23 #if (RTLD_SAVESPACE_SSE % 32) != 0
24 # error RTLD_SAVESPACE_SSE must be aligned to 32 bytes
27 /* Area on stack to save and restore registers used for parameter
28 passing when calling _dl_fixup. */
30 /* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX. */
31 # define REGISTER_SAVE_AREA (8 * 7)
32 # define REGISTER_SAVE_RAX 0
34 /* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as BND0,
36 # define REGISTER_SAVE_AREA (8 * 7 + 16 * 4)
37 /* Align bound register save area to 16 bytes. */
38 # define REGISTER_SAVE_BND0 0
39 # define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16)
40 # define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16)
41 # define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16)
42 # define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16)
44 #define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8)
45 #define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
46 #define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8)
47 #define REGISTER_SAVE_RDI (REGISTER_SAVE_RSI + 8)
48 #define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8)
49 #define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8)
52 .globl _dl_runtime_resolve
53 .type _dl_runtime_resolve, @function
57 cfi_adjust_cfa_offset(16) # Incorporate PLT
58 subq $REGISTER_SAVE_AREA,%rsp
59 cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
60 # Preserve registers otherwise clobbered.
61 movq %rax, REGISTER_SAVE_RAX(%rsp)
62 movq %rcx, REGISTER_SAVE_RCX(%rsp)
63 movq %rdx, REGISTER_SAVE_RDX(%rsp)
64 movq %rsi, REGISTER_SAVE_RSI(%rsp)
65 movq %rdi, REGISTER_SAVE_RDI(%rsp)
66 movq %r8, REGISTER_SAVE_R8(%rsp)
67 movq %r9, REGISTER_SAVE_R9(%rsp)
69 # We also have to preserve bound registers. These are nops if
70 # Intel MPX isn't available or disabled.
71 # ifdef HAVE_MPX_SUPPORT
72 bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
73 bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
74 bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
75 bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
77 .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
78 .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
79 .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
80 .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
83 # Copy args pushed by PLT in register.
84 # %rdi: link_map, %rsi: reloc_index
85 movq (REGISTER_SAVE_AREA + 8)(%rsp), %rsi
86 movq REGISTER_SAVE_AREA(%rsp), %rdi
87 call _dl_fixup # Call resolver.
88 movq %rax, %r11 # Save return value
90 # Restore bound registers. These are nops if Intel MPX isn't
91 # avaiable or disabled.
92 # ifdef HAVE_MPX_SUPPORT
93 bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
94 bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
95 bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
96 bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
98 .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
99 .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
100 .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
101 .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
104 # Get register content back.
105 movq REGISTER_SAVE_R9(%rsp), %r9
106 movq REGISTER_SAVE_R8(%rsp), %r8
107 movq REGISTER_SAVE_RDI(%rsp), %rdi
108 movq REGISTER_SAVE_RSI(%rsp), %rsi
109 movq REGISTER_SAVE_RDX(%rsp), %rdx
110 movq REGISTER_SAVE_RCX(%rsp), %rcx
111 movq REGISTER_SAVE_RAX(%rsp), %rax
112 # Adjust stack(PLT did 2 pushes)
113 addq $(REGISTER_SAVE_AREA + 16), %rsp
114 cfi_adjust_cfa_offset(-(REGISTER_SAVE_AREA + 16))
115 jmp *%r11 # Jump to function address.
117 .size _dl_runtime_resolve, .-_dl_runtime_resolve
121 .globl _dl_runtime_profile
122 .type _dl_runtime_profile, @function
127 cfi_adjust_cfa_offset(16) # Incorporate PLT
128 /* The La_x86_64_regs data structure pointed to by the
129 fourth paramater must be 16-byte aligned. This must
130 be explicitly enforced. We have the set up a dynamically
131 sized stack frame. %rbx points to the top half which
132 has a fixed size and preserves the original stack pointer. */
134 subq $32, %rsp # Allocate the local storage.
135 cfi_adjust_cfa_offset(32)
137 cfi_rel_offset(%rbx, 0)
140 56(%rbx) parameter #1
141 48(%rbx) return address
146 24(%rbx) La_x86_64_regs pointer
154 cfi_def_cfa_register(%rbx)
156 /* Actively align the La_x86_64_regs structure. */
157 andq $0xfffffffffffffff0, %rsp
158 # if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
159 /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers
160 to detect if any xmm0-xmm7 registers are changed by audit
162 subq $(LR_SIZE + XMM_SIZE*8), %rsp
164 subq $LR_SIZE, %rsp # sizeof(La_x86_64_regs)
168 /* Fill the La_x86_64_regs structure. */
169 movq %rdx, LR_RDX_OFFSET(%rsp)
170 movq %r8, LR_R8_OFFSET(%rsp)
171 movq %r9, LR_R9_OFFSET(%rsp)
172 movq %rcx, LR_RCX_OFFSET(%rsp)
173 movq %rsi, LR_RSI_OFFSET(%rsp)
174 movq %rdi, LR_RDI_OFFSET(%rsp)
175 movq %rbp, LR_RBP_OFFSET(%rsp)
178 movq %rax, LR_RSP_OFFSET(%rsp)
180 /* We always store the XMM registers even if AVX is available.
181 This is to provide backward binary compatibility for existing
183 movaps %xmm0, (LR_XMM_OFFSET)(%rsp)
184 movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
185 movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
186 movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
187 movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
188 movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
189 movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
190 movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
193 # ifdef HAVE_MPX_SUPPORT
194 bndmov %bnd0, (LR_BND_OFFSET)(%rsp) # Preserve bound
195 bndmov %bnd1, (LR_BND_OFFSET + BND_SIZE)(%rsp) # registers. Nops if
196 bndmov %bnd2, (LR_BND_OFFSET + BND_SIZE*2)(%rsp) # MPX not available
197 bndmov %bnd3, (LR_BND_OFFSET + BND_SIZE*3)(%rsp) # or disabled.
199 .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET)
200 .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE)
201 .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET + BND_SIZE*2)
202 .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3)
206 # if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
213 cmpl $0, L(have_avx)(%rip)
215 movq %rbx, %r11 # Save rbx
218 movq %r11,%rbx # Restore rbx
220 // AVX and XSAVE supported?
221 andl $((1 << 28) | (1 << 27)), %ecx
222 cmpl $((1 << 28) | (1 << 27)), %ecx
224 # ifdef HAVE_AVX512_ASM_SUPPORT
225 // AVX512 supported in processor?
226 movq %rbx, %r11 # Save rbx
230 andl $(1 << 16), %ebx
233 // Get XFEATURE_ENABLED_MASK
235 # ifdef HAVE_AVX512_ASM_SUPPORT
237 movq %r11, %rbx # Restore rbx
239 // Verify that XCR0[7:5] = '111b' and
240 // XCR0[2:1] = '11b' which means
241 // that zmm state is enabled
245 movl %eax, L(have_avx)(%rip)
248 # define VMOV vmovdqu64
249 # define VEC(i) zmm##i
251 # include "dl-trampoline.h"
258 movl %eax, L(have_avx)(%rip)
263 # ifdef HAVE_AVX512_ASM_SUPPORT
264 cmpl $0xe6, L(have_avx)(%rip)
269 # define VMOV vmovdqu
270 # define VEC(i) ymm##i
272 # include "dl-trampoline.h"
279 # include "dl-trampoline.h"
282 .size _dl_runtime_profile, .-_dl_runtime_profile
287 .globl _dl_x86_64_save_sse
288 .type _dl_x86_64_save_sse, @function
292 # if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
293 cmpl $0, L(have_avx)(%rip)
295 movq %rbx, %r11 # Save rbx
298 movq %r11,%rbx # Restore rbx
300 // AVX and XSAVE supported?
301 andl $((1 << 28) | (1 << 27)), %ecx
302 cmpl $((1 << 28) | (1 << 27)), %ecx
304 # ifdef HAVE_AVX512_ASM_SUPPORT
305 // AVX512 supported in a processor?
306 movq %rbx, %r11 # Save rbx
310 andl $(1 << 16), %ebx
313 // Get XFEATURE_ENABLED_MASK
315 # ifdef HAVE_AVX512_ASM_SUPPORT
317 movq %r11, %rbx # Restore rbx
319 // Verify that XCR0[7:5] = '111b' and
320 // XCR0[2:1] = '11b' which means
321 // that zmm state is enabled
323 movl %eax, L(have_avx)(%rip)
330 movl %eax, L(have_avx)(%rip)
335 # ifdef HAVE_AVX512_ASM_SUPPORT
336 cmpl $0xe6, L(have_avx)(%rip)
340 vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
341 vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
342 vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
343 vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE
344 vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE
345 vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE
346 vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
347 vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
349 # ifdef HAVE_AVX512_ASM_SUPPORT
351 vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE
352 vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE
353 vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE
354 vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE
355 vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE
356 vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE
357 vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE
358 vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE
363 movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
364 movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE
365 movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE
366 movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE
367 movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE
368 movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE
369 movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE
370 movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE
373 .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse
376 .globl _dl_x86_64_restore_sse
377 .type _dl_x86_64_restore_sse, @function
380 _dl_x86_64_restore_sse:
381 # if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
382 cmpl $0, L(have_avx)(%rip)
384 # ifdef HAVE_AVX512_ASM_SUPPORT
385 cmpl $0xe6, L(have_avx)(%rip)
389 vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
390 vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
391 vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2
392 vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3
393 vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4
394 vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5
395 vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
396 vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
398 # ifdef HAVE_AVX512_ASM_SUPPORT
400 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0
401 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1
402 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2
403 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3
404 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4
405 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5
406 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6
407 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7
412 movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
413 movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1
414 movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2
415 movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3
416 movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4
417 movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5
418 movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6
419 movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7
422 .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse