1 /* PLT trampolines. x86-64 version.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
21 #include <link-defines.h>
23 #if (RTLD_SAVESPACE_SSE % 32) != 0
24 # error RTLD_SAVESPACE_SSE must be aligned to 32 bytes
27 /* Area on stack to save and restore registers used for parameter
28 passing when calling _dl_fixup. */
30 /* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX. */
31 # define REGISTER_SAVE_AREA (8 * 7)
32 # define REGISTER_SAVE_RAX 0
33 # define PRESERVE_BND_REGS_PREFIX
35 /* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as BND0,
37 # define REGISTER_SAVE_AREA (8 * 7 + 16 * 4)
38 /* Align bound register save area to 16 bytes. */
39 # define REGISTER_SAVE_BND0 0
40 # define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16)
41 # define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16)
42 # define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16)
43 # define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16)
44 # ifdef HAVE_MPX_SUPPORT
45 # define PRESERVE_BND_REGS_PREFIX bnd
47 # define PRESERVE_BND_REGS_PREFIX .byte 0xf2
50 #define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8)
51 #define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
52 #define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8)
53 #define REGISTER_SAVE_RDI (REGISTER_SAVE_RSI + 8)
54 #define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8)
55 #define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8)
58 .globl _dl_runtime_resolve
59 .type _dl_runtime_resolve, @function
63 cfi_adjust_cfa_offset(16) # Incorporate PLT
64 subq $REGISTER_SAVE_AREA,%rsp
65 cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
66 # Preserve registers otherwise clobbered.
67 movq %rax, REGISTER_SAVE_RAX(%rsp)
68 movq %rcx, REGISTER_SAVE_RCX(%rsp)
69 movq %rdx, REGISTER_SAVE_RDX(%rsp)
70 movq %rsi, REGISTER_SAVE_RSI(%rsp)
71 movq %rdi, REGISTER_SAVE_RDI(%rsp)
72 movq %r8, REGISTER_SAVE_R8(%rsp)
73 movq %r9, REGISTER_SAVE_R9(%rsp)
75 # We also have to preserve bound registers. These are nops if
76 # Intel MPX isn't available or disabled.
77 # ifdef HAVE_MPX_SUPPORT
78 bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
79 bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
80 bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
81 bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
83 .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
84 .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
85 .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
86 .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
89 # Copy args pushed by PLT in register.
90 # %rdi: link_map, %rsi: reloc_index
91 movq (REGISTER_SAVE_AREA + 8)(%rsp), %rsi
92 movq REGISTER_SAVE_AREA(%rsp), %rdi
93 call _dl_fixup # Call resolver.
94 movq %rax, %r11 # Save return value
96 # Restore bound registers. These are nops if Intel MPX isn't
97 # avaiable or disabled.
98 # ifdef HAVE_MPX_SUPPORT
99 bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
100 bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
101 bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
102 bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
104 .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
105 .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
106 .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
107 .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
110 # Get register content back.
111 movq REGISTER_SAVE_R9(%rsp), %r9
112 movq REGISTER_SAVE_R8(%rsp), %r8
113 movq REGISTER_SAVE_RDI(%rsp), %rdi
114 movq REGISTER_SAVE_RSI(%rsp), %rsi
115 movq REGISTER_SAVE_RDX(%rsp), %rdx
116 movq REGISTER_SAVE_RCX(%rsp), %rcx
117 movq REGISTER_SAVE_RAX(%rsp), %rax
118 # Adjust stack(PLT did 2 pushes)
119 addq $(REGISTER_SAVE_AREA + 16), %rsp
120 cfi_adjust_cfa_offset(-(REGISTER_SAVE_AREA + 16))
121 # Preserve bound registers.
122 PRESERVE_BND_REGS_PREFIX
123 jmp *%r11 # Jump to function address.
125 .size _dl_runtime_resolve, .-_dl_runtime_resolve
129 .globl _dl_runtime_profile
130 .type _dl_runtime_profile, @function
135 cfi_adjust_cfa_offset(16) # Incorporate PLT
136 /* The La_x86_64_regs data structure pointed to by the
137 fourth paramater must be 16-byte aligned. This must
138 be explicitly enforced. We have the set up a dynamically
139 sized stack frame. %rbx points to the top half which
140 has a fixed size and preserves the original stack pointer. */
142 subq $32, %rsp # Allocate the local storage.
143 cfi_adjust_cfa_offset(32)
145 cfi_rel_offset(%rbx, 0)
148 56(%rbx) parameter #1
149 48(%rbx) return address
154 24(%rbx) La_x86_64_regs pointer
162 cfi_def_cfa_register(%rbx)
164 /* Actively align the La_x86_64_regs structure. */
165 andq $0xfffffffffffffff0, %rsp
166 # if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
167 /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers
168 to detect if any xmm0-xmm7 registers are changed by audit
170 subq $(LR_SIZE + XMM_SIZE*8), %rsp
172 subq $LR_SIZE, %rsp # sizeof(La_x86_64_regs)
176 /* Fill the La_x86_64_regs structure. */
177 movq %rdx, LR_RDX_OFFSET(%rsp)
178 movq %r8, LR_R8_OFFSET(%rsp)
179 movq %r9, LR_R9_OFFSET(%rsp)
180 movq %rcx, LR_RCX_OFFSET(%rsp)
181 movq %rsi, LR_RSI_OFFSET(%rsp)
182 movq %rdi, LR_RDI_OFFSET(%rsp)
183 movq %rbp, LR_RBP_OFFSET(%rsp)
186 movq %rax, LR_RSP_OFFSET(%rsp)
188 /* We always store the XMM registers even if AVX is available.
189 This is to provide backward binary compatibility for existing
191 movaps %xmm0, (LR_XMM_OFFSET)(%rsp)
192 movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
193 movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
194 movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
195 movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
196 movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
197 movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
198 movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
201 # ifdef HAVE_MPX_SUPPORT
202 bndmov %bnd0, (LR_BND_OFFSET)(%rsp) # Preserve bound
203 bndmov %bnd1, (LR_BND_OFFSET + BND_SIZE)(%rsp) # registers. Nops if
204 bndmov %bnd2, (LR_BND_OFFSET + BND_SIZE*2)(%rsp) # MPX not available
205 bndmov %bnd3, (LR_BND_OFFSET + BND_SIZE*3)(%rsp) # or disabled.
207 .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET)
208 .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE)
209 .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET + BND_SIZE*2)
210 .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3)
214 # if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
221 cmpl $0, L(have_avx)(%rip)
223 movq %rbx, %r11 # Save rbx
226 movq %r11,%rbx # Restore rbx
228 // AVX and XSAVE supported?
229 andl $((1 << 28) | (1 << 27)), %ecx
230 cmpl $((1 << 28) | (1 << 27)), %ecx
232 # ifdef HAVE_AVX512_ASM_SUPPORT
233 // AVX512 supported in processor?
234 movq %rbx, %r11 # Save rbx
238 andl $(1 << 16), %ebx
241 // Get XFEATURE_ENABLED_MASK
243 # ifdef HAVE_AVX512_ASM_SUPPORT
245 movq %r11, %rbx # Restore rbx
247 // Verify that XCR0[7:5] = '111b' and
248 // XCR0[2:1] = '11b' which means
249 // that zmm state is enabled
253 movl %eax, L(have_avx)(%rip)
256 # define VMOV vmovdqu64
257 # define VEC(i) zmm##i
259 # include "dl-trampoline.h"
266 movl %eax, L(have_avx)(%rip)
271 # ifdef HAVE_AVX512_ASM_SUPPORT
272 cmpl $0xe6, L(have_avx)(%rip)
277 # define VMOV vmovdqu
278 # define VEC(i) ymm##i
280 # include "dl-trampoline.h"
287 # include "dl-trampoline.h"
290 .size _dl_runtime_profile, .-_dl_runtime_profile
295 .globl _dl_x86_64_save_sse
296 .type _dl_x86_64_save_sse, @function
300 # if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
301 cmpl $0, L(have_avx)(%rip)
303 movq %rbx, %r11 # Save rbx
306 movq %r11,%rbx # Restore rbx
308 // AVX and XSAVE supported?
309 andl $((1 << 28) | (1 << 27)), %ecx
310 cmpl $((1 << 28) | (1 << 27)), %ecx
312 # ifdef HAVE_AVX512_ASM_SUPPORT
313 // AVX512 supported in a processor?
314 movq %rbx, %r11 # Save rbx
318 andl $(1 << 16), %ebx
321 // Get XFEATURE_ENABLED_MASK
323 # ifdef HAVE_AVX512_ASM_SUPPORT
325 movq %r11, %rbx # Restore rbx
327 // Verify that XCR0[7:5] = '111b' and
328 // XCR0[2:1] = '11b' which means
329 // that zmm state is enabled
331 movl %eax, L(have_avx)(%rip)
338 movl %eax, L(have_avx)(%rip)
343 # ifdef HAVE_AVX512_ASM_SUPPORT
344 cmpl $0xe6, L(have_avx)(%rip)
348 vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
349 vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
350 vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
351 vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE
352 vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE
353 vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE
354 vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
355 vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
357 # ifdef HAVE_AVX512_ASM_SUPPORT
359 vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE
360 vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE
361 vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE
362 vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE
363 vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE
364 vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE
365 vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE
366 vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE
371 movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
372 movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE
373 movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE
374 movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE
375 movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE
376 movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE
377 movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE
378 movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE
381 .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse
384 .globl _dl_x86_64_restore_sse
385 .type _dl_x86_64_restore_sse, @function
388 _dl_x86_64_restore_sse:
389 # if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
390 cmpl $0, L(have_avx)(%rip)
392 # ifdef HAVE_AVX512_ASM_SUPPORT
393 cmpl $0xe6, L(have_avx)(%rip)
397 vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
398 vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
399 vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2
400 vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3
401 vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4
402 vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5
403 vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
404 vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
406 # ifdef HAVE_AVX512_ASM_SUPPORT
408 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0
409 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1
410 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2
411 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3
412 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4
413 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5
414 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6
415 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7
420 movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
421 movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1
422 movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2
423 movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3
424 movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4
425 movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5
426 movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6
427 movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7
430 .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse