1 /* Machine-dependent ELF dynamic relocation functions. PowerPC version.
2 Copyright (C) 1995-2001, 2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22 #include <sys/param.h>
25 #include <elf/dynamic-link.h>
26 #include <dl-machine.h>
27 #include <stdio-common/_itoa.h>
29 /* Because ld.so is now versioned, these functions can be in their own file;
30 no relocations need to be done to call them.
31 Of course, if ld.so is not versioned... */
32 #if !(DO_VERSIONING - 0)
33 #error This will not work with versioning turned off, sorry.
37 /* Stuff for the PLT. */
38 #define PLT_INITIAL_ENTRY_WORDS 18
39 #define PLT_LONGBRANCH_ENTRY_WORDS 0
40 #define PLT_TRAMPOLINE_ENTRY_WORDS 6
41 #define PLT_DOUBLE_SIZE (1<<13)
42 #define PLT_ENTRY_START_WORDS(entry_number) \
43 (PLT_INITIAL_ENTRY_WORDS + (entry_number)*2 \
44 + ((entry_number) > PLT_DOUBLE_SIZE \
45 ? ((entry_number) - PLT_DOUBLE_SIZE)*2 \
47 #define PLT_DATA_START_WORDS(num_entries) PLT_ENTRY_START_WORDS(num_entries)
49 /* Macros to build PowerPC opcode words. */
50 #define OPCODE_ADDI(rd,ra,simm) \
51 (0x38000000 | (rd) << 21 | (ra) << 16 | ((simm) & 0xffff))
52 #define OPCODE_ADDIS(rd,ra,simm) \
53 (0x3c000000 | (rd) << 21 | (ra) << 16 | ((simm) & 0xffff))
54 #define OPCODE_ADD(rd,ra,rb) \
55 (0x7c000214 | (rd) << 21 | (ra) << 16 | (rb) << 11)
56 #define OPCODE_B(target) (0x48000000 | ((target) & 0x03fffffc))
57 #define OPCODE_BA(target) (0x48000002 | ((target) & 0x03fffffc))
58 #define OPCODE_BCTR() 0x4e800420
59 #define OPCODE_LWZ(rd,d,ra) \
60 (0x80000000 | (rd) << 21 | (ra) << 16 | ((d) & 0xffff))
61 #define OPCODE_LWZU(rd,d,ra) \
62 (0x84000000 | (rd) << 21 | (ra) << 16 | ((d) & 0xffff))
63 #define OPCODE_MTCTR(rd) (0x7C0903A6 | (rd) << 21)
64 #define OPCODE_RLWINM(ra,rs,sh,mb,me) \
65 (0x54000000 | (rs) << 21 | (ra) << 16 | (sh) << 11 | (mb) << 6 | (me) << 1)
67 #define OPCODE_LI(rd,simm) OPCODE_ADDI(rd,0,simm)
68 #define OPCODE_ADDIS_HI(rd,ra,value) \
69 OPCODE_ADDIS(rd,ra,((value) + 0x8000) >> 16)
70 #define OPCODE_LIS_HI(rd,value) OPCODE_ADDIS_HI(rd,0,value)
71 #define OPCODE_SLWI(ra,rs,sh) OPCODE_RLWINM(ra,rs,sh,0,31-sh)
74 #define PPC_DCBST(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory")
75 #define PPC_SYNC asm volatile ("sync" : : : "memory")
76 #define PPC_ISYNC asm volatile ("sync; isync" : : : "memory")
77 #define PPC_ICBI(where) asm volatile ("icbi 0,%0" : : "r"(where) : "memory")
78 #define PPC_DIE asm volatile ("tweq 0,0")
80 /* Use this when you've modified some code, but it won't be in the
81 instruction fetch queue (or when it doesn't matter if it is). */
82 #define MODIFIED_CODE_NOQUEUE(where) \
83 do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); } while (0)
84 /* Use this when it might be in the instruction queue. */
85 #define MODIFIED_CODE(where) \
86 do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); PPC_ISYNC; } while (0)
89 /* The idea here is that to conform to the ABI, we are supposed to try
90 to load dynamic objects between 0x10000 (we actually use 0x40000 as
91 the lower bound, to increase the chance of a memory reference from
92 a null pointer giving a segfault) and the program's load address;
93 this may allow us to use a branch instruction in the PLT rather
94 than a computed jump. The address is only used as a preference for
95 mmap, so if we get it wrong the worst that happens is that it gets
96 mapped somewhere else. */
99 __elf_preferred_address(struct link_map
*loader
, size_t maplength
,
100 ElfW(Addr
) mapstartpref
)
102 ElfW(Addr
) low
, high
;
105 /* If the object has a preference, load it there! */
106 if (mapstartpref
!= 0)
109 /* Otherwise, quickly look for a suitable gap between 0x3FFFF and
110 0x70000000. 0x3FFFF is so that references off NULL pointers will
111 cause a segfault, 0x70000000 is just paranoia (it should always
112 be superceded by the program's load address). */
115 for (l
= GL(dl_loaded
); l
; l
= l
->l_next
)
117 ElfW(Addr
) mapstart
, mapend
;
118 mapstart
= l
->l_map_start
& ~(GL(dl_pagesize
) - 1);
119 mapend
= l
->l_map_end
| (GL(dl_pagesize
) - 1);
120 assert (mapend
> mapstart
);
122 /* Prefer gaps below the main executable, note that l ==
123 _dl_loaded does not work for static binaries loading
125 if ((mapend
>= high
|| l
->l_type
== lt_executable
)
128 else if (mapend
>= low
&& low
>= mapstart
)
130 else if (high
>= mapend
&& mapstart
>= low
)
132 if (high
- mapend
>= mapstart
- low
)
139 high
-= 0x10000; /* Allow some room between objects. */
140 maplength
= (maplength
| (GL(dl_pagesize
) - 1)) + 1;
141 if (high
<= low
|| high
- low
< maplength
)
143 return high
- maplength
; /* Both high and maplength are page-aligned. */
146 /* Set up the loaded object described by L so its unrelocated PLT
147 entries will jump to the on-demand fixup code in dl-runtime.c.
148 Also install a small trampoline to be used by entries that have
149 been relocated to an address too far away for a single branch. */
151 /* There are many kinds of PLT entries:
153 (1) A direct jump to the actual routine, either a relative or
154 absolute branch. These are set up in __elf_machine_fixup_plt.
156 (2) Short lazy entries. These cover the first 8192 slots in
157 the PLT, and look like (where 'index' goes from 0 to 8191):
160 b &plt[PLT_TRAMPOLINE_ENTRY_WORDS+1]
162 (3) Short indirect jumps. These replace (2) when a direct jump
163 wouldn't reach. They look the same except that the branch
164 is 'b &plt[PLT_LONGBRANCH_ENTRY_WORDS]'.
166 (4) Long lazy entries. These cover the slots when a short entry
167 won't fit ('index*4' overflows its field), and look like:
169 lis %r11, %hi(index*4 + &plt[PLT_DATA_START_WORDS])
170 lwzu %r12, %r11, %lo(index*4 + &plt[PLT_DATA_START_WORDS])
171 b &plt[PLT_TRAMPOLINE_ENTRY_WORDS]
174 (5) Long indirect jumps. These replace (4) when a direct jump
175 wouldn't reach. They look like:
177 lis %r11, %hi(index*4 + &plt[PLT_DATA_START_WORDS])
178 lwz %r12, %r11, %lo(index*4 + &plt[PLT_DATA_START_WORDS])
182 (6) Long direct jumps. These are used when thread-safety is not
183 required. They look like:
185 lis %r12, %hi(finaladdr)
186 addi %r12, %r12, %lo(finaladdr)
191 The lazy entries, (2) and (4), are set up here in
192 __elf_machine_runtime_setup. (1), (3), and (5) are set up in
193 __elf_machine_fixup_plt. (1), (3), and (6) can also be constructed
194 in __process_machine_rela.
196 The reason for the somewhat strange construction of the long
197 entries, (4) and (5), is that we need to ensure thread-safety. For
198 (1) and (3), this is obvious because only one instruction is
199 changed and the PPC architecture guarantees that aligned stores are
200 atomic. For (5), this is more tricky. When changing (4) to (5),
201 the `b' instruction is first changed to to `mtctr'; this is safe
202 and is why the `lwzu' instruction is not just a simple `addi'.
203 Once this is done, and is visible to all processors, the `lwzu' can
204 safely be changed to a `lwz'. */
206 __elf_machine_runtime_setup (struct link_map
*map
, int lazy
, int profile
)
208 if (map
->l_info
[DT_JMPREL
])
211 Elf32_Word
*plt
= (Elf32_Word
*) D_PTR (map
, l_info
[DT_PLTGOT
]);
212 Elf32_Word num_plt_entries
= (map
->l_info
[DT_PLTRELSZ
]->d_un
.d_val
213 / sizeof (Elf32_Rela
));
214 Elf32_Word rel_offset_words
= PLT_DATA_START_WORDS (num_plt_entries
);
215 Elf32_Word data_words
= (Elf32_Word
) (plt
+ rel_offset_words
);
216 Elf32_Word size_modified
;
218 extern void _dl_runtime_resolve (void);
219 extern void _dl_prof_resolve (void);
221 /* Convert the index in r11 into an actual address, and get the
222 word at that address. */
223 plt
[PLT_LONGBRANCH_ENTRY_WORDS
] = OPCODE_ADDIS_HI (11, 11, data_words
);
224 plt
[PLT_LONGBRANCH_ENTRY_WORDS
+ 1] = OPCODE_LWZ (11, data_words
, 11);
226 /* Call the procedure at that address. */
227 plt
[PLT_LONGBRANCH_ENTRY_WORDS
+ 2] = OPCODE_MTCTR (11);
228 plt
[PLT_LONGBRANCH_ENTRY_WORDS
+ 3] = OPCODE_BCTR ();
232 Elf32_Word
*tramp
= plt
+ PLT_TRAMPOLINE_ENTRY_WORDS
;
233 Elf32_Word dlrr
= (Elf32_Word
)(profile
235 : _dl_runtime_resolve
);
238 if (profile
&& _dl_name_match_p (GL(dl_profile
), map
))
239 /* This is the object we are looking for. Say that we really
240 want profiling and the timers are started. */
241 GL(dl_profile_map
) = map
;
243 /* For the long entries, subtract off data_words. */
244 tramp
[0] = OPCODE_ADDIS_HI (11, 11, -data_words
);
245 tramp
[1] = OPCODE_ADDI (11, 11, -data_words
);
247 /* Multiply index of entry by 3 (in r11). */
248 tramp
[2] = OPCODE_SLWI (12, 11, 1);
249 tramp
[3] = OPCODE_ADD (11, 12, 11);
250 if (dlrr
<= 0x01fffffc || dlrr
>= 0xfe000000)
252 /* Load address of link map in r12. */
253 tramp
[4] = OPCODE_LI (12, (Elf32_Word
) map
);
254 tramp
[5] = OPCODE_ADDIS_HI (12, 12, (Elf32_Word
) map
);
256 /* Call _dl_runtime_resolve. */
257 tramp
[6] = OPCODE_BA (dlrr
);
261 /* Get address of _dl_runtime_resolve in CTR. */
262 tramp
[4] = OPCODE_LI (12, dlrr
);
263 tramp
[5] = OPCODE_ADDIS_HI (12, 12, dlrr
);
264 tramp
[6] = OPCODE_MTCTR (12);
266 /* Load address of link map in r12. */
267 tramp
[7] = OPCODE_LI (12, (Elf32_Word
) map
);
268 tramp
[8] = OPCODE_ADDIS_HI (12, 12, (Elf32_Word
) map
);
270 /* Call _dl_runtime_resolve. */
271 tramp
[9] = OPCODE_BCTR ();
274 /* Set up the lazy PLT entries. */
275 offset
= PLT_INITIAL_ENTRY_WORDS
;
277 while (i
< num_plt_entries
&& i
< PLT_DOUBLE_SIZE
)
279 plt
[offset
] = OPCODE_LI (11, i
* 4);
280 plt
[offset
+1] = OPCODE_B ((PLT_TRAMPOLINE_ENTRY_WORDS
+ 2
286 while (i
< num_plt_entries
)
288 plt
[offset
] = OPCODE_LIS_HI (11, i
* 4 + data_words
);
289 plt
[offset
+1] = OPCODE_LWZU (12, i
* 4 + data_words
, 11);
290 plt
[offset
+2] = OPCODE_B ((PLT_TRAMPOLINE_ENTRY_WORDS
293 plt
[offset
+3] = OPCODE_BCTR ();
299 /* Now, we've modified code. We need to write the changes from
300 the data cache to a second-level unified cache, then make
301 sure that stale data in the instruction cache is removed.
302 (In a multiprocessor system, the effect is more complex.)
303 Most of the PLT shouldn't be in the instruction cache, but
304 there may be a little overlap at the start and the end.
306 Assumes that dcbst and icbi apply to lines of 16 bytes or
307 more. Current known line sizes are 16, 32, and 128 bytes. */
309 size_modified
= lazy
? rel_offset_words
: 6;
310 for (i
= 0; i
< size_modified
; i
+= 4)
312 PPC_DCBST (plt
+ size_modified
- 1);
315 PPC_ICBI (plt
+ size_modified
- 1);
323 __elf_machine_fixup_plt(struct link_map
*map
, const Elf32_Rela
*reloc
,
324 Elf32_Addr
*reloc_addr
, Elf32_Addr finaladdr
)
326 Elf32_Sword delta
= finaladdr
- (Elf32_Word
) reloc_addr
;
327 if (delta
<< 6 >> 6 == delta
)
328 *reloc_addr
= OPCODE_B (delta
);
329 else if (finaladdr
<= 0x01fffffc || finaladdr
>= 0xfe000000)
330 *reloc_addr
= OPCODE_BA (finaladdr
);
333 Elf32_Word
*plt
, *data_words
;
334 Elf32_Word index
, offset
, num_plt_entries
;
336 num_plt_entries
= (map
->l_info
[DT_PLTRELSZ
]->d_un
.d_val
337 / sizeof(Elf32_Rela
));
338 plt
= (Elf32_Word
*) D_PTR (map
, l_info
[DT_PLTGOT
]);
339 offset
= reloc_addr
- plt
;
340 index
= (offset
- PLT_INITIAL_ENTRY_WORDS
)/2;
341 data_words
= plt
+ PLT_DATA_START_WORDS (num_plt_entries
);
345 if (index
< PLT_DOUBLE_SIZE
)
347 data_words
[index
] = finaladdr
;
349 *reloc_addr
= OPCODE_B ((PLT_LONGBRANCH_ENTRY_WORDS
- (offset
+1))
354 index
-= (index
- PLT_DOUBLE_SIZE
)/2;
356 data_words
[index
] = finaladdr
;
359 reloc_addr
[1] = OPCODE_MTCTR (12);
360 MODIFIED_CODE_NOQUEUE (reloc_addr
+ 1);
363 reloc_addr
[0] = OPCODE_LWZ (12,
364 (Elf32_Word
) (data_words
+ index
), 11);
367 MODIFIED_CODE (reloc_addr
);
372 dl_reloc_overflow (struct link_map
*map
,
374 Elf32_Addr
*const reloc_addr
,
375 const Elf32_Sym
*sym
,
376 const Elf32_Sym
*refsym
)
380 const Elf32_Sym
*errsym
= sym
?: refsym
;
381 t
= stpcpy (buffer
, name
);
382 t
= stpcpy (t
, " relocation at 0x00000000");
383 _itoa_word ((unsigned) reloc_addr
, t
, 16, 0);
388 strtab
= (const void *) D_PTR (map
, l_info
[DT_STRTAB
]);
389 t
= stpcpy (t
, " for symbol `");
390 t
= stpcpy (t
, strtab
+ errsym
->st_name
);
393 t
= stpcpy (t
, " out of range");
394 _dl_signal_error (0, map
->l_name
, NULL
, buffer
);
398 __process_machine_rela (struct link_map
*map
,
399 const Elf32_Rela
*reloc
,
400 const Elf32_Sym
*sym
,
401 const Elf32_Sym
*refsym
,
402 Elf32_Addr
*const reloc_addr
,
403 Elf32_Addr
const finaladdr
,
415 *reloc_addr
= finaladdr
;
419 if (finaladdr
> 0x01fffffc && finaladdr
< 0xfe000000)
420 dl_reloc_overflow (map
, "R_PPC_ADDR24", reloc_addr
, sym
, refsym
);
421 *reloc_addr
= (*reloc_addr
& 0xfc000003) | (finaladdr
& 0x3fffffc);
426 if (finaladdr
> 0x7fff && finaladdr
< 0x8000)
427 dl_reloc_overflow (map
, "R_PPC_ADDR16", reloc_addr
, sym
, refsym
);
428 *(Elf32_Half
*) reloc_addr
= finaladdr
;
431 case R_PPC_ADDR16_LO
:
432 *(Elf32_Half
*) reloc_addr
= finaladdr
;
435 case R_PPC_ADDR16_HI
:
436 *(Elf32_Half
*) reloc_addr
= finaladdr
>> 16;
439 case R_PPC_ADDR16_HA
:
440 *(Elf32_Half
*) reloc_addr
= (finaladdr
+ 0x8000) >> 16;
444 case R_PPC_ADDR14_BRTAKEN
:
445 case R_PPC_ADDR14_BRNTAKEN
:
446 if (finaladdr
> 0x7fff && finaladdr
< 0x8000)
447 dl_reloc_overflow (map
, "R_PPC_ADDR14", reloc_addr
, sym
, refsym
);
448 *reloc_addr
= (*reloc_addr
& 0xffff0003) | (finaladdr
& 0xfffc);
449 if (rinfo
!= R_PPC_ADDR14
)
450 *reloc_addr
= ((*reloc_addr
& 0xffdfffff)
451 | ((rinfo
== R_PPC_ADDR14_BRTAKEN
)
452 ^ (finaladdr
>> 31)) << 21);
457 Elf32_Sword delta
= finaladdr
- (Elf32_Word
) reloc_addr
;
458 if (delta
<< 6 >> 6 != delta
)
459 dl_reloc_overflow (map
, "R_PPC_REL24", reloc_addr
, sym
, refsym
);
460 *reloc_addr
= (*reloc_addr
& 0xfc000003) | (delta
& 0x3fffffc);
466 /* This can happen in trace mode when an object could not be
469 if (sym
->st_size
> refsym
->st_size
470 || (GL(dl_verbose
) && sym
->st_size
< refsym
->st_size
))
474 strtab
= (const void *) D_PTR (map
, l_info
[DT_STRTAB
]);
476 %s: Symbol `%s' has different size in shared object, onsider re-linking\n",
477 rtld_progname
?: "<program name unknown>",
478 strtab
+ refsym
->st_name
);
480 memcpy (reloc_addr
, (char *) finaladdr
, MIN (sym
->st_size
,
485 *reloc_addr
= finaladdr
- (Elf32_Word
) reloc_addr
;
489 /* It used to be that elf_machine_fixup_plt was used here,
490 but that doesn't work when ld.so relocates itself
491 for the second time. On the bright side, there's
492 no need to worry about thread-safety here. */
494 Elf32_Sword delta
= finaladdr
- (Elf32_Word
) reloc_addr
;
495 if (delta
<< 6 >> 6 == delta
)
496 *reloc_addr
= OPCODE_B (delta
);
497 else if (finaladdr
<= 0x01fffffc || finaladdr
>= 0xfe000000)
498 *reloc_addr
= OPCODE_BA (finaladdr
);
501 Elf32_Word
*plt
, *data_words
;
502 Elf32_Word index
, offset
, num_plt_entries
;
504 plt
= (Elf32_Word
*) D_PTR (map
, l_info
[DT_PLTGOT
]);
505 offset
= reloc_addr
- plt
;
507 if (offset
< PLT_DOUBLE_SIZE
*2 + PLT_INITIAL_ENTRY_WORDS
)
509 index
= (offset
- PLT_INITIAL_ENTRY_WORDS
)/2;
510 num_plt_entries
= (map
->l_info
[DT_PLTRELSZ
]->d_un
.d_val
511 / sizeof(Elf32_Rela
));
512 data_words
= plt
+ PLT_DATA_START_WORDS (num_plt_entries
);
513 data_words
[index
] = finaladdr
;
514 reloc_addr
[0] = OPCODE_LI (11, index
* 4);
515 reloc_addr
[1] = OPCODE_B ((PLT_LONGBRANCH_ENTRY_WORDS
518 MODIFIED_CODE_NOQUEUE (reloc_addr
+ 1);
522 reloc_addr
[0] = OPCODE_LIS_HI (12, finaladdr
);
523 reloc_addr
[1] = OPCODE_ADDI (12, 12, finaladdr
);
524 reloc_addr
[2] = OPCODE_MTCTR (12);
525 reloc_addr
[3] = OPCODE_BCTR ();
526 MODIFIED_CODE_NOQUEUE (reloc_addr
+ 3);
533 _dl_reloc_bad_type (map
, rinfo
, 0);
537 MODIFIED_CODE_NOQUEUE (reloc_addr
);