1 /* PLT trampolines. ia64 version.
2 Copyright (C) 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 This code is used in dl-runtime.c to call the `_dl_fixup' function
25 and then redirect to the address it returns. `_dl_fixup()' takes two
26 arguments, however _dl_profile_fixup() takes five.
28 The ABI specifies that we will never see more than 8 input
29 registers to a function call, thus it is safe to simply allocate
30 those, and simpler than playing stack games. */
32 /* Used to save and restore 8 incoming fp registers */
33 #define RESOLVE_FRAME_SIZE (16*8)
35 ENTRY(_dl_runtime_resolve)
39 alloc loc0 = ar.pfs, 8, 6, 2, 0
40 /* Use the 16 byte scratch area. r2 will start at f8 and
41 r3 will start at f9. */
42 adds r2 = -(RESOLVE_FRAME_SIZE - 16), r12
43 adds r3 = -(RESOLVE_FRAME_SIZE - 32), r12
46 .fframe RESOLVE_FRAME_SIZE
47 adds r12 = -RESOLVE_FRAME_SIZE, r12
51 mov loc2 = r8 /* preserve struct value register */
55 mov loc3 = r9 /* preserve language specific register */
56 mov loc4 = r10 /* preserve language specific register */
57 mov loc5 = r11 /* preserve language specific register */
60 stf.spill [r2] = f8, 32
61 stf.spill [r3] = f9, 32
66 stf.spill [r2] = f10, 32
67 stf.spill [r3] = f11, 32
72 stf.spill [r2] = f12, 32
73 stf.spill [r3] = f13, 32
74 /* Relocation record is 24 byte. */
75 shladd out1 = r15, 3, out1
81 br.call.sptk.many b0 = _dl_fixup
84 /* Skip the 16byte scratch area. */
91 ldf.fill f8 = [r2], 32
92 ldf.fill f9 = [r3], 32
97 ldf.fill f10 = [r2], 32
98 ldf.fill f11 = [r3], 32
103 ldf.fill f12 = [r2], 32
104 ldf.fill f13 = [r3], 32
109 ldf.fill f14 = [r2], 32
110 ldf.fill f15 = [r3], 32
111 .restore sp /* pop the unwind frame state */
112 adds r12 = RESOLVE_FRAME_SIZE, r12
116 mov r9 = loc3 /* restore language specific register */
117 mov r10 = loc4 /* restore language specific register */
118 mov r11 = loc5 /* restore language specific register */
121 mov r8 = loc2 /* restore struct value register */
124 /* An alloc is needed for the break system call to work.
125 We don't care about the old value of the pfs register. */
129 alloc r2 = ar.pfs, 0, 0, 8, 0
133 END(_dl_runtime_resolve)
136 /* The fourth argument to _dl_profile_fixup and the third one to
137 _dl_call_pltexit are a pointer to La_ia64_regs:
162 The fifth argument to _dl_profile_fixup is a pointer to long int.
163 The fourth argument to _dl_call_pltexit is a pointer to
179 Since stack has to be 16 byte aligned, the stack allocation is in
180 16byte increment. Before calling _dl_profile_fixup, the stack will
189 #define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16)
190 #define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16)
193 ENTRY(_dl_runtime_profile)
197 alloc loc0 = ar.pfs, 8, 12, 8, 0
212 /* There is a 16 byte scratch area. r2 will start at r8 and
213 r3 will start at r9 for La_ia64_regs. */
214 adds r2 = -(PLTENTER_FRAME_SIZE - 16), r12
215 adds r3 = -(PLTENTER_FRAME_SIZE - 24), r12
216 adds r12 = -PLTENTER_FRAME_SIZE, r12
222 mov out2 = b0 /* needed by _dl_fixup_profile */
228 adds out3 = 16, r12 /* pointer to La_ia64_regs */
233 st8.spill [r2] = in0, 16
235 st8.spill [r3] = in1, 16
236 mov out4 = loc10 /* pointer to new frame size */
241 st8.spill [r2] = in2, 16
243 st8.spill [r3] = in3, 16
244 mov loc2 = r8 /* preserve struct value register */
249 st8.spill [r2] = in4, 16
251 st8.spill [r3] = in5, 16
252 mov loc3 = r9 /* preserve language specific register */
259 st8 [r3] = in7, 24 /* adjust for f9 */
260 mov loc4 = r10 /* preserve language specific register */
264 mov r18 = ar.unat /* save it in La_ia64_regs */
265 mov loc7 = out3 /* save it for _dl_call_pltexit */
266 mov loc5 = r11 /* preserve language specific register */
269 stf.spill [r2] = f8, 32
270 stf.spill [r3] = f9, 32
271 mov out0 = r16 /* needed by _dl_fixup_profile */
275 mov ar.unat = r17 /* restore it for function call */
276 mov loc8 = r16 /* save it for _dl_call_pltexit */
280 stf.spill [r2] = f10, 32
281 stf.spill [r3] = f11, 32
286 stf.spill [r2] = f12, 32
287 stf.spill [r3] = f13, 32
288 /* Relocation record is 24 byte. */
289 shladd out1 = r15, 3, out1
293 stf.spill [r2] = f14, 32
294 stf.spill [r3] = f15, 24
295 mov loc9 = out1 /* save it for _dl_call_pltexit */
299 st8 [r2] = r18 /* store ar.unat */
300 st8 [r3] = loc10 /* store sp */
301 br.call.sptk.many b0 = _dl_profile_fixup
304 /* Skip the 16byte scratch area, 4 language specific GRs and
305 8 incoming GRs to restore incoming fp registers. */
306 adds r2 = (4*8 + 8*8 + 16), r12
307 adds r3 = (4*8 + 8*8 + 32), r12
312 ldf.fill f8 = [r2], 32
313 ldf.fill f9 = [r3], 32
318 ldf.fill f10 = [r2], 32
319 ldf.fill f11 = [r3], 32
320 mov r8 = loc2 /* restore struct value register */
324 ldf.fill f12 = [r2], 32
325 ldf.fill f13 = [r3], 32
326 mov r9 = loc3 /* restore language specific register */
330 ldf.fill f14 = [r2], 32
331 ldf.fill f15 = [r3], 32
332 mov r10 = loc4 /* restore language specific register */
336 ld8 r15 = [loc10] /* load the new frame size */
337 mov r11 = loc5 /* restore language specific register */
339 cmp.eq p6, p7 = -1, r15
343 (p7) cmp.eq p8, p9 = 0, r15
345 (p6) mov ar.lc = loc6
349 (p6) mov ar.pfs = loc0
350 (p6) br.cond.dptk.many .Lresolved
354 /* At this point, the stack looks like
360 We need to keep the current stack and call the resolved
361 function by copying the r15 byte from sp + PLTENTER_FRAME_SIZE
362 + 16 (scratch area) to sp + 16 (scratch area). Since stack
363 has to be 16byte aligned, we around r15 up to 16byte. */
366 (p9) adds r15 = 15, r15
367 (p8) br.cond.dptk.many .Lno_new_frame
374 /* We don't copy the 16byte scatch area. Prepare r16/r17 as
383 sub r12 = r12, r15 /* Adjust stack */
394 /* Skip the 16byte scatch area. Prepare r2/r3 as source. */
410 br.cloop.sptk.few .Lcopy
426 /* Call the resolved function */
427 br.call.sptk.many b0 = b6
430 /* Prepare stack for _dl_call_pltexit. Loc10 has the original
432 adds r12 = -PLTEXIT_FRAME_SIZE, loc10
433 adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10
434 adds r3 = -(PLTEXIT_FRAME_SIZE - 24), loc10
438 /* Load all possible return values into buffer. */
451 stf.spill [r2] = f8, 32
452 stf.spill [r3] = f9, 32
453 mov out2 = loc7 /* Pointer to La_ia64_regs */
457 stf.spill [r2] = f10, 32
458 stf.spill [r3] = f11, 32
459 adds out3 = 16, r12 /* Pointer to La_ia64_retval */
463 stf.spill [r2] = f12, 32
464 stf.spill [r3] = f13, 32
465 /* We need to restore gp for _dl_call_pltexit. */
472 br.call.sptk.many b0 = _dl_call_pltexit
475 /* Load all the non-floating and floating return values. Skip
476 the 16byte scratch area. */
495 ldf.fill f8 = [r2], 32
496 ldf.fill f9 = [r3], 32
501 ldf.fill f10 = [r2], 32
502 ldf.fill f11 = [r3], 32
507 ldf.fill f12 = [r2], 32
508 ldf.fill f13 = [r3], 32
515 /* We know that the previous stack pointer, loc10, isn't 0.
516 We use it to reload p7. */
517 cmp.ne p7, p0 = 0, loc10
524 (p7) br.ret.sptk.many b0
527 /* An alloc is needed for the break system call to work. We
528 don't care about the old value of the pfs register. After
529 this alloc, we can't use any rotating registers. Otherwise
530 assembler won't be happy. This has to be at the end. */
534 alloc r2 = ar.pfs, 0, 0, 8, 0
538 END(_dl_runtime_profile)