i386: Add _CET_ENDBR to indirect jump targets in add_n.S/sub_n.S
[glibc.git] / sysdeps / x86_64 / dl-trampoline.h
bloba28b1e73a4b187baddf03270914a53dfc220eeb7
1 /* PLT trampolines. x86-64 version.
2 Copyright (C) 2009-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 .text
20 #ifdef _dl_runtime_resolve
22 # undef REGISTER_SAVE_AREA
23 # undef LOCAL_STORAGE_AREA
24 # undef BASE
26 # if (STATE_SAVE_ALIGNMENT % 16) != 0
27 # error STATE_SAVE_ALIGNMENT must be multples of 16
28 # endif
30 # if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
31 # error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT
32 # endif
34 # if DL_RUNTIME_RESOLVE_REALIGN_STACK
35 /* Local stack area before jumping to function address: RBX. */
36 # define LOCAL_STORAGE_AREA 8
37 # define BASE rbx
38 # ifdef USE_FXSAVE
39 /* Use fxsave to save XMM registers. */
40 # define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
41 # if (REGISTER_SAVE_AREA % 16) != 0
42 # error REGISTER_SAVE_AREA must be multples of 16
43 # endif
44 # endif
45 # else
46 # ifndef USE_FXSAVE
47 # error USE_FXSAVE must be defined
48 # endif
49 /* Use fxsave to save XMM registers. */
50 # define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
51 /* Local stack area before jumping to function address: All saved
52 registers. */
53 # define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
54 # define BASE rsp
55 # if (REGISTER_SAVE_AREA % 16) != 8
56 # error REGISTER_SAVE_AREA must be odd multples of 8
57 # endif
58 # endif
60 .globl _dl_runtime_resolve
61 .hidden _dl_runtime_resolve
62 .type _dl_runtime_resolve, @function
63 .align 16
64 cfi_startproc
65 _dl_runtime_resolve:
66 cfi_adjust_cfa_offset(16) # Incorporate PLT
67 _CET_ENDBR
68 # if DL_RUNTIME_RESOLVE_REALIGN_STACK
69 # if LOCAL_STORAGE_AREA != 8
70 # error LOCAL_STORAGE_AREA must be 8
71 # endif
72 pushq %rbx # push subtracts stack by 8.
73 cfi_adjust_cfa_offset(8)
74 cfi_rel_offset(%rbx, 0)
75 mov %RSP_LP, %RBX_LP
76 cfi_def_cfa_register(%rbx)
77 and $-STATE_SAVE_ALIGNMENT, %RSP_LP
78 # endif
79 # ifdef REGISTER_SAVE_AREA
80 sub $REGISTER_SAVE_AREA, %RSP_LP
81 # if !DL_RUNTIME_RESOLVE_REALIGN_STACK
82 cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
83 # endif
84 # else
85 # Allocate stack space of the required size to save the state.
86 # if IS_IN (rtld)
87 sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
88 # else
89 sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
90 # endif
91 # endif
92 # Preserve registers otherwise clobbered.
93 movq %rax, REGISTER_SAVE_RAX(%rsp)
94 movq %rcx, REGISTER_SAVE_RCX(%rsp)
95 movq %rdx, REGISTER_SAVE_RDX(%rsp)
96 movq %rsi, REGISTER_SAVE_RSI(%rsp)
97 movq %rdi, REGISTER_SAVE_RDI(%rsp)
98 movq %r8, REGISTER_SAVE_R8(%rsp)
99 movq %r9, REGISTER_SAVE_R9(%rsp)
100 # ifdef USE_FXSAVE
101 fxsave STATE_SAVE_OFFSET(%rsp)
102 # else
103 movl $STATE_SAVE_MASK, %eax
104 xorl %edx, %edx
105 # Clear the XSAVE Header.
106 # ifdef USE_XSAVE
107 movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
108 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
109 # endif
110 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
111 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
112 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
113 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
114 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
115 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
116 # ifdef USE_XSAVE
117 xsave STATE_SAVE_OFFSET(%rsp)
118 # else
119 xsavec STATE_SAVE_OFFSET(%rsp)
120 # endif
121 # endif
122 # Copy args pushed by PLT in register.
123 # %rdi: link_map, %rsi: reloc_index
124 mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
125 mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
126 call _dl_fixup # Call resolver.
127 mov %RAX_LP, %R11_LP # Save return value
128 # Get register content back.
129 # ifdef USE_FXSAVE
130 fxrstor STATE_SAVE_OFFSET(%rsp)
131 # else
132 movl $STATE_SAVE_MASK, %eax
133 xorl %edx, %edx
134 xrstor STATE_SAVE_OFFSET(%rsp)
135 # endif
136 movq REGISTER_SAVE_R9(%rsp), %r9
137 movq REGISTER_SAVE_R8(%rsp), %r8
138 movq REGISTER_SAVE_RDI(%rsp), %rdi
139 movq REGISTER_SAVE_RSI(%rsp), %rsi
140 movq REGISTER_SAVE_RDX(%rsp), %rdx
141 movq REGISTER_SAVE_RCX(%rsp), %rcx
142 movq REGISTER_SAVE_RAX(%rsp), %rax
143 # if DL_RUNTIME_RESOLVE_REALIGN_STACK
144 mov %RBX_LP, %RSP_LP
145 cfi_def_cfa_register(%rsp)
146 movq (%rsp), %rbx
147 cfi_restore(%rbx)
148 # endif
149 # Adjust stack(PLT did 2 pushes)
150 add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
151 cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
152 # Preserve bound registers.
153 PRESERVE_BND_REGS_PREFIX
154 jmp *%r11 # Jump to function address.
155 cfi_endproc
156 .size _dl_runtime_resolve, .-_dl_runtime_resolve
157 #endif
160 #if !defined PROF && defined _dl_runtime_profile
161 # if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
162 # error LR_VECTOR_OFFSET must be multples of VEC_SIZE
163 # endif
165 .globl _dl_runtime_profile
166 .hidden _dl_runtime_profile
167 .type _dl_runtime_profile, @function
168 .align 16
169 _dl_runtime_profile:
170 cfi_startproc
171 cfi_adjust_cfa_offset(16) # Incorporate PLT
172 _CET_ENDBR
173 /* The La_x86_64_regs data structure pointed to by the
174 fourth paramater must be VEC_SIZE-byte aligned. This must
175 be explicitly enforced. We have the set up a dynamically
176 sized stack frame. %rbx points to the top half which
177 has a fixed size and preserves the original stack pointer. */
179 sub $32, %RSP_LP # Allocate the local storage.
180 cfi_adjust_cfa_offset(32)
181 movq %rbx, (%rsp)
182 cfi_rel_offset(%rbx, 0)
184 /* On the stack:
185 56(%rbx) parameter #1
186 48(%rbx) return address
188 40(%rbx) reloc index
189 32(%rbx) link_map
191 24(%rbx) La_x86_64_regs pointer
192 16(%rbx) framesize
193 8(%rbx) rax
194 (%rbx) rbx
197 movq %rax, 8(%rsp)
198 mov %RSP_LP, %RBX_LP
199 cfi_def_cfa_register(%rbx)
201 /* Actively align the La_x86_64_regs structure. */
202 and $-VEC_SIZE, %RSP_LP
203 /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers
204 to detect if any xmm0-xmm7 registers are changed by audit
205 module. */
206 sub $(LR_SIZE + XMM_SIZE*8), %RSP_LP
207 movq %rsp, 24(%rbx)
209 /* Fill the La_x86_64_regs structure. */
210 movq %rdx, LR_RDX_OFFSET(%rsp)
211 movq %r8, LR_R8_OFFSET(%rsp)
212 movq %r9, LR_R9_OFFSET(%rsp)
213 movq %rcx, LR_RCX_OFFSET(%rsp)
214 movq %rsi, LR_RSI_OFFSET(%rsp)
215 movq %rdi, LR_RDI_OFFSET(%rsp)
216 movq %rbp, LR_RBP_OFFSET(%rsp)
218 lea 48(%rbx), %RAX_LP
219 movq %rax, LR_RSP_OFFSET(%rsp)
221 /* We always store the XMM registers even if AVX is available.
222 This is to provide backward binary compatibility for existing
223 audit modules. */
224 movaps %xmm0, (LR_XMM_OFFSET)(%rsp)
225 movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
226 movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
227 movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
228 movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
229 movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
230 movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
231 movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
233 # ifndef __ILP32__
234 # ifdef HAVE_MPX_SUPPORT
235 bndmov %bnd0, (LR_BND_OFFSET)(%rsp) # Preserve bound
236 bndmov %bnd1, (LR_BND_OFFSET + BND_SIZE)(%rsp) # registers. Nops if
237 bndmov %bnd2, (LR_BND_OFFSET + BND_SIZE*2)(%rsp) # MPX not available
238 bndmov %bnd3, (LR_BND_OFFSET + BND_SIZE*3)(%rsp) # or disabled.
239 # else
240 .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET)
241 .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE)
242 .byte 0x66,0x0f,0x1b,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2)
243 .byte 0x66,0x0f,0x1b,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3)
244 # endif
245 # endif
247 # ifdef RESTORE_AVX
248 /* This is to support AVX audit modules. */
249 VMOVA %VEC(0), (LR_VECTOR_OFFSET)(%rsp)
250 VMOVA %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
251 VMOVA %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
252 VMOVA %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
253 VMOVA %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
254 VMOVA %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
255 VMOVA %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
256 VMOVA %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
258 /* Save xmm0-xmm7 registers to detect if any of them are
259 changed by audit module. */
260 vmovdqa %xmm0, (LR_SIZE)(%rsp)
261 vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp)
262 vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
263 vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
264 vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
265 vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
266 vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
267 vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
268 # endif
270 mov %RSP_LP, %RCX_LP # La_x86_64_regs pointer to %rcx.
271 mov 48(%rbx), %RDX_LP # Load return address if needed.
272 mov 40(%rbx), %RSI_LP # Copy args pushed by PLT in register.
273 mov 32(%rbx), %RDI_LP # %rdi: link_map, %rsi: reloc_index
274 lea 16(%rbx), %R8_LP # Address of framesize
275 call _dl_profile_fixup # Call resolver.
277 mov %RAX_LP, %R11_LP # Save return value.
279 movq 8(%rbx), %rax # Get back register content.
280 movq LR_RDX_OFFSET(%rsp), %rdx
281 movq LR_R8_OFFSET(%rsp), %r8
282 movq LR_R9_OFFSET(%rsp), %r9
284 movaps (LR_XMM_OFFSET)(%rsp), %xmm0
285 movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
286 movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
287 movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
288 movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
289 movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
290 movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
291 movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
293 # ifdef RESTORE_AVX
294 /* Check if any xmm0-xmm7 registers are changed by audit
295 module. */
296 vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
297 vpmovmskb %xmm8, %esi
298 cmpl $0xffff, %esi
299 je 2f
300 vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp)
301 jmp 1f
302 2: VMOVA (LR_VECTOR_OFFSET)(%rsp), %VEC(0)
303 vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp)
305 1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
306 vpmovmskb %xmm8, %esi
307 cmpl $0xffff, %esi
308 je 2f
309 vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
310 jmp 1f
311 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1)
312 vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
314 1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
315 vpmovmskb %xmm8, %esi
316 cmpl $0xffff, %esi
317 je 2f
318 vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
319 jmp 1f
320 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2)
321 vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
323 1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
324 vpmovmskb %xmm8, %esi
325 cmpl $0xffff, %esi
326 je 2f
327 vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
328 jmp 1f
329 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3)
330 vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
332 1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
333 vpmovmskb %xmm8, %esi
334 cmpl $0xffff, %esi
335 je 2f
336 vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
337 jmp 1f
338 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4)
339 vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
341 1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
342 vpmovmskb %xmm8, %esi
343 cmpl $0xffff, %esi
344 je 2f
345 vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
346 jmp 1f
347 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5)
348 vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
350 1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
351 vpmovmskb %xmm8, %esi
352 cmpl $0xffff, %esi
353 je 2f
354 vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
355 jmp 1f
356 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6)
357 vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
359 1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
360 vpmovmskb %xmm8, %esi
361 cmpl $0xffff, %esi
362 je 2f
363 vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
364 jmp 1f
365 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7)
366 vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
369 # endif
371 # ifndef __ILP32__
372 # ifdef HAVE_MPX_SUPPORT
373 bndmov (LR_BND_OFFSET)(%rsp), %bnd0 # Restore bound
374 bndmov (LR_BND_OFFSET + BND_SIZE)(%rsp), %bnd1 # registers.
375 bndmov (LR_BND_OFFSET + BND_SIZE*2)(%rsp), %bnd2
376 bndmov (LR_BND_OFFSET + BND_SIZE*3)(%rsp), %bnd3
377 # else
378 .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LR_BND_OFFSET)
379 .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE)
380 .byte 0x66,0x0f,0x1a,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2)
381 .byte 0x66,0x0f,0x1a,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3)
382 # endif
383 # endif
385 mov 16(%rbx), %R10_LP # Anything in framesize?
386 test %R10_LP, %R10_LP
387 PRESERVE_BND_REGS_PREFIX
388 jns 3f
390 /* There's nothing in the frame size, so there
391 will be no call to the _dl_call_pltexit. */
393 /* Get back registers content. */
394 movq LR_RCX_OFFSET(%rsp), %rcx
395 movq LR_RSI_OFFSET(%rsp), %rsi
396 movq LR_RDI_OFFSET(%rsp), %rdi
398 mov %RBX_LP, %RSP_LP
399 movq (%rsp), %rbx
400 cfi_restore(%rbx)
401 cfi_def_cfa_register(%rsp)
403 add $48, %RSP_LP # Adjust the stack to the return value
404 # (eats the reloc index and link_map)
405 cfi_adjust_cfa_offset(-48)
406 PRESERVE_BND_REGS_PREFIX
407 jmp *%r11 # Jump to function address.
410 cfi_adjust_cfa_offset(48)
411 cfi_rel_offset(%rbx, 0)
412 cfi_def_cfa_register(%rbx)
414 /* At this point we need to prepare new stack for the function
415 which has to be called. We copy the original stack to a
416 temporary buffer of the size specified by the 'framesize'
417 returned from _dl_profile_fixup */
419 lea LR_RSP_OFFSET(%rbx), %RSI_LP # stack
420 add $8, %R10_LP
421 and $-16, %R10_LP
422 mov %R10_LP, %RCX_LP
423 sub %R10_LP, %RSP_LP
424 mov %RSP_LP, %RDI_LP
425 shr $3, %RCX_LP
427 movsq
429 movq 24(%rdi), %rcx # Get back register content.
430 movq 32(%rdi), %rsi
431 movq 40(%rdi), %rdi
433 PRESERVE_BND_REGS_PREFIX
434 call *%r11
436 mov 24(%rbx), %RSP_LP # Drop the copied stack content
438 /* Now we have to prepare the La_x86_64_retval structure for the
439 _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
440 so we just need to allocate the sizeof(La_x86_64_retval) space on
441 the stack, since the alignment has already been taken care of. */
442 # ifdef RESTORE_AVX
443 /* sizeof(La_x86_64_retval). Need extra space for 2 SSE
444 registers to detect if xmm0/xmm1 registers are changed
445 by audit module. Since rsp is aligned to VEC_SIZE, we
446 need to make sure that the address of La_x86_64_retval +
447 LRV_VECTOR0_OFFSET is aligned to VEC_SIZE. */
448 # define LRV_SPACE (LRV_SIZE + XMM_SIZE*2)
449 # define LRV_MISALIGNED ((LRV_SIZE + LRV_VECTOR0_OFFSET) & (VEC_SIZE - 1))
450 # if LRV_MISALIGNED == 0
451 sub $LRV_SPACE, %RSP_LP
452 # else
453 sub $(LRV_SPACE + VEC_SIZE - LRV_MISALIGNED), %RSP_LP
454 # endif
455 # else
456 sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval)
457 # endif
458 mov %RSP_LP, %RCX_LP # La_x86_64_retval argument to %rcx.
460 /* Fill in the La_x86_64_retval structure. */
461 movq %rax, LRV_RAX_OFFSET(%rcx)
462 movq %rdx, LRV_RDX_OFFSET(%rcx)
464 movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
465 movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
467 # ifdef RESTORE_AVX
468 /* This is to support AVX audit modules. */
469 VMOVA %VEC(0), LRV_VECTOR0_OFFSET(%rcx)
470 VMOVA %VEC(1), LRV_VECTOR1_OFFSET(%rcx)
472 /* Save xmm0/xmm1 registers to detect if they are changed
473 by audit module. */
474 vmovdqa %xmm0, (LRV_SIZE)(%rcx)
475 vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
476 # endif
478 # ifndef __ILP32__
479 # ifdef HAVE_MPX_SUPPORT
480 bndmov %bnd0, LRV_BND0_OFFSET(%rcx) # Preserve returned bounds.
481 bndmov %bnd1, LRV_BND1_OFFSET(%rcx)
482 # else
483 .byte 0x66,0x0f,0x1b,0x81;.long (LRV_BND0_OFFSET)
484 .byte 0x66,0x0f,0x1b,0x89;.long (LRV_BND1_OFFSET)
485 # endif
486 # endif
488 fstpt LRV_ST0_OFFSET(%rcx)
489 fstpt LRV_ST1_OFFSET(%rcx)
491 movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
492 movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
493 movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
494 call _dl_call_pltexit
496 /* Restore return registers. */
497 movq LRV_RAX_OFFSET(%rsp), %rax
498 movq LRV_RDX_OFFSET(%rsp), %rdx
500 movaps LRV_XMM0_OFFSET(%rsp), %xmm0
501 movaps LRV_XMM1_OFFSET(%rsp), %xmm1
503 # ifdef RESTORE_AVX
504 /* Check if xmm0/xmm1 registers are changed by audit module. */
505 vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
506 vpmovmskb %xmm2, %esi
507 cmpl $0xffff, %esi
508 jne 1f
509 VMOVA LRV_VECTOR0_OFFSET(%rsp), %VEC(0)
511 1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
512 vpmovmskb %xmm2, %esi
513 cmpl $0xffff, %esi
514 jne 1f
515 VMOVA LRV_VECTOR1_OFFSET(%rsp), %VEC(1)
518 # endif
520 # ifndef __ILP32__
521 # ifdef HAVE_MPX_SUPPORT
522 bndmov LRV_BND0_OFFSET(%rsp), %bnd0 # Restore bound registers.
523 bndmov LRV_BND1_OFFSET(%rsp), %bnd1
524 # else
525 .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LRV_BND0_OFFSET)
526 .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LRV_BND1_OFFSET)
527 # endif
528 # endif
530 fldt LRV_ST1_OFFSET(%rsp)
531 fldt LRV_ST0_OFFSET(%rsp)
533 mov %RBX_LP, %RSP_LP
534 movq (%rsp), %rbx
535 cfi_restore(%rbx)
536 cfi_def_cfa_register(%rsp)
538 add $48, %RSP_LP # Adjust the stack to the return value
539 # (eats the reloc index and link_map)
540 cfi_adjust_cfa_offset(-48)
541 PRESERVE_BND_REGS_PREFIX
542 retq
544 cfi_endproc
545 .size _dl_runtime_profile, .-_dl_runtime_profile
546 #endif