Adjusted.
[glibc.git] / sysdeps / powerpc / dl-machine.c
bloba76f1f092c331d64fa55bf1660fb57641c3eac9b
1 /* Machine-dependent ELF dynamic relocation functions. PowerPC version.
2 Copyright (C) 1995,96,97,98,99,2000,2001 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 #include <unistd.h>
21 #include <string.h>
22 #include <sys/param.h>
23 #include <link.h>
24 #include <ldsodefs.h>
25 #include <elf/dynamic-link.h>
26 #include <dl-machine.h>
27 #include <stdio-common/_itoa.h>
29 /* Because ld.so is now versioned, these functions can be in their own file;
30 no relocations need to be done to call them.
31 Of course, if ld.so is not versioned... */
32 #if !(DO_VERSIONING - 0)
33 #error This will not work with versioning turned off, sorry.
34 #endif
37 /* Stuff for the PLT. */
38 #define PLT_INITIAL_ENTRY_WORDS 18
39 #define PLT_LONGBRANCH_ENTRY_WORDS 0
40 #define PLT_TRAMPOLINE_ENTRY_WORDS 6
41 #define PLT_DOUBLE_SIZE (1<<13)
42 #define PLT_ENTRY_START_WORDS(entry_number) \
43 (PLT_INITIAL_ENTRY_WORDS + (entry_number)*2 \
44 + ((entry_number) > PLT_DOUBLE_SIZE \
45 ? ((entry_number) - PLT_DOUBLE_SIZE)*2 \
46 : 0))
47 #define PLT_DATA_START_WORDS(num_entries) PLT_ENTRY_START_WORDS(num_entries)
49 /* Macros to build PowerPC opcode words. */
50 #define OPCODE_ADDI(rd,ra,simm) \
51 (0x38000000 | (rd) << 21 | (ra) << 16 | ((simm) & 0xffff))
52 #define OPCODE_ADDIS(rd,ra,simm) \
53 (0x3c000000 | (rd) << 21 | (ra) << 16 | ((simm) & 0xffff))
54 #define OPCODE_ADD(rd,ra,rb) \
55 (0x7c000214 | (rd) << 21 | (ra) << 16 | (rb) << 11)
56 #define OPCODE_B(target) (0x48000000 | ((target) & 0x03fffffc))
57 #define OPCODE_BA(target) (0x48000002 | ((target) & 0x03fffffc))
58 #define OPCODE_BCTR() 0x4e800420
59 #define OPCODE_LWZ(rd,d,ra) \
60 (0x80000000 | (rd) << 21 | (ra) << 16 | ((d) & 0xffff))
61 #define OPCODE_LWZU(rd,d,ra) \
62 (0x84000000 | (rd) << 21 | (ra) << 16 | ((d) & 0xffff))
63 #define OPCODE_MTCTR(rd) (0x7C0903A6 | (rd) << 21)
64 #define OPCODE_RLWINM(ra,rs,sh,mb,me) \
65 (0x54000000 | (rs) << 21 | (ra) << 16 | (sh) << 11 | (mb) << 6 | (me) << 1)
67 #define OPCODE_LI(rd,simm) OPCODE_ADDI(rd,0,simm)
68 #define OPCODE_ADDIS_HI(rd,ra,value) \
69 OPCODE_ADDIS(rd,ra,((value) + 0x8000) >> 16)
70 #define OPCODE_LIS_HI(rd,value) OPCODE_ADDIS_HI(rd,0,value)
71 #define OPCODE_SLWI(ra,rs,sh) OPCODE_RLWINM(ra,rs,sh,0,31-sh)
74 #define PPC_DCBST(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory")
75 #define PPC_SYNC asm volatile ("sync" : : : "memory")
76 #define PPC_ISYNC asm volatile ("sync; isync" : : : "memory")
77 #define PPC_ICBI(where) asm volatile ("icbi 0,%0" : : "r"(where) : "memory")
78 #define PPC_DIE asm volatile ("tweq 0,0")
80 /* Use this when you've modified some code, but it won't be in the
81 instruction fetch queue (or when it doesn't matter if it is). */
82 #define MODIFIED_CODE_NOQUEUE(where) \
83 do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); } while (0)
84 /* Use this when it might be in the instruction queue. */
85 #define MODIFIED_CODE(where) \
86 do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); PPC_ISYNC; } while (0)
89 /* The idea here is that to conform to the ABI, we are supposed to try
90 to load dynamic objects between 0x10000 (we actually use 0x40000 as
91 the lower bound, to increase the chance of a memory reference from
92 a null pointer giving a segfault) and the program's load address;
93 this may allow us to use a branch instruction in the PLT rather
94 than a computed jump. The address is only used as a preference for
95 mmap, so if we get it wrong the worst that happens is that it gets
96 mapped somewhere else. */
98 ElfW(Addr)
99 __elf_preferred_address(struct link_map *loader, size_t maplength,
100 ElfW(Addr) mapstartpref)
102 ElfW(Addr) low, high;
103 struct link_map *l;
105 /* If the object has a preference, load it there! */
106 if (mapstartpref != 0)
107 return mapstartpref;
109 /* Otherwise, quickly look for a suitable gap between 0x3FFFF and
110 0x70000000. 0x3FFFF is so that references off NULL pointers will
111 cause a segfault, 0x70000000 is just paranoia (it should always
112 be superceded by the program's load address). */
113 low = 0x0003FFFF;
114 high = 0x70000000;
115 for (l = _dl_loaded; l; l = l->l_next)
117 ElfW(Addr) mapstart, mapend;
118 mapstart = l->l_map_start & ~(_dl_pagesize - 1);
119 mapend = l->l_map_end | (_dl_pagesize - 1);
120 assert (mapend > mapstart);
122 if (mapend >= high && high >= mapstart)
123 high = mapstart;
124 else if (mapend >= low && low >= mapstart)
125 low = mapend;
126 else if (high >= mapend && mapstart >= low)
128 if (high - mapend >= mapstart - low)
129 low = mapend;
130 else
131 high = mapstart;
135 high -= 0x10000; /* Allow some room between objects. */
136 maplength = (maplength | (_dl_pagesize-1)) + 1;
137 if (high <= low || high - low < maplength )
138 return 0;
139 return high - maplength; /* Both high and maplength are page-aligned. */
142 /* Set up the loaded object described by L so its unrelocated PLT
143 entries will jump to the on-demand fixup code in dl-runtime.c.
144 Also install a small trampoline to be used by entries that have
145 been relocated to an address too far away for a single branch. */
147 /* There are many kinds of PLT entries:
149 (1) A direct jump to the actual routine, either a relative or
150 absolute branch. These are set up in __elf_machine_fixup_plt.
152 (2) Short lazy entries. These cover the first 8192 slots in
153 the PLT, and look like (where 'index' goes from 0 to 8191):
155 li %r11, index*4
156 b &plt[PLT_TRAMPOLINE_ENTRY_WORDS+1]
158 (3) Short indirect jumps. These replace (2) when a direct jump
159 wouldn't reach. They look the same except that the branch
160 is 'b &plt[PLT_LONGBRANCH_ENTRY_WORDS]'.
162 (4) Long lazy entries. These cover the slots when a short entry
163 won't fit ('index*4' overflows its field), and look like:
165 lis %r11, %hi(index*4 + &plt[PLT_DATA_START_WORDS])
166 lwzu %r12, %r11, %lo(index*4 + &plt[PLT_DATA_START_WORDS])
167 b &plt[PLT_TRAMPOLINE_ENTRY_WORDS]
168 bctr
170 (5) Long indirect jumps. These replace (4) when a direct jump
171 wouldn't reach. They look like:
173 lis %r11, %hi(index*4 + &plt[PLT_DATA_START_WORDS])
174 lwz %r12, %r11, %lo(index*4 + &plt[PLT_DATA_START_WORDS])
175 mtctr %r12
176 bctr
178 (6) Long direct jumps. These are used when thread-safety is not
179 required. They look like:
181 lis %r12, %hi(finaladdr)
182 addi %r12, %r12, %lo(finaladdr)
183 mtctr %r12
184 bctr
187 The lazy entries, (2) and (4), are set up here in
188 __elf_machine_runtime_setup. (1), (3), and (5) are set up in
189 __elf_machine_fixup_plt. (1), (3), and (6) can also be constructed
190 in __process_machine_rela.
192 The reason for the somewhat strange construction of the long
193 entries, (4) and (5), is that we need to ensure thread-safety. For
194 (1) and (3), this is obvious because only one instruction is
195 changed and the PPC architecture guarantees that aligned stores are
196 atomic. For (5), this is more tricky. When changing (4) to (5),
197 the `b' instruction is first changed to to `mtctr'; this is safe
198 and is why the `lwzu' instruction is not just a simple `addi'.
199 Once this is done, and is visible to all processors, the `lwzu' can
200 safely be changed to a `lwz'. */
202 __elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
204 if (map->l_info[DT_JMPREL])
206 Elf32_Word i;
207 Elf32_Word *plt = (Elf32_Word *) D_PTR (map, l_info[DT_PLTGOT]);
208 Elf32_Word num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val
209 / sizeof (Elf32_Rela));
210 Elf32_Word rel_offset_words = PLT_DATA_START_WORDS (num_plt_entries);
211 Elf32_Word data_words = (Elf32_Word) (plt + rel_offset_words);
212 Elf32_Word size_modified;
214 extern void _dl_runtime_resolve (void);
215 extern void _dl_prof_resolve (void);
217 /* Convert the index in r11 into an actual address, and get the
218 word at that address. */
219 plt[PLT_LONGBRANCH_ENTRY_WORDS] = OPCODE_ADDIS_HI (11, 11, data_words);
220 plt[PLT_LONGBRANCH_ENTRY_WORDS + 1] = OPCODE_LWZ (11, data_words, 11);
222 /* Call the procedure at that address. */
223 plt[PLT_LONGBRANCH_ENTRY_WORDS + 2] = OPCODE_MTCTR (11);
224 plt[PLT_LONGBRANCH_ENTRY_WORDS + 3] = OPCODE_BCTR ();
226 if (lazy)
228 Elf32_Word *tramp = plt + PLT_TRAMPOLINE_ENTRY_WORDS;
229 Elf32_Word dlrr = (Elf32_Word)(profile
230 ? _dl_prof_resolve
231 : _dl_runtime_resolve);
232 Elf32_Word offset;
234 if (profile && _dl_name_match_p (_dl_profile, map))
235 /* This is the object we are looking for. Say that we really
236 want profiling and the timers are started. */
237 _dl_profile_map = map;
239 /* For the long entries, subtract off data_words. */
240 tramp[0] = OPCODE_ADDIS_HI (11, 11, -data_words);
241 tramp[1] = OPCODE_ADDI (11, 11, -data_words);
243 /* Multiply index of entry by 3 (in r11). */
244 tramp[2] = OPCODE_SLWI (12, 11, 1);
245 tramp[3] = OPCODE_ADD (11, 12, 11);
246 if (dlrr <= 0x01fffffc || dlrr >= 0xfe000000)
248 /* Load address of link map in r12. */
249 tramp[4] = OPCODE_LI (12, (Elf32_Word) map);
250 tramp[5] = OPCODE_ADDIS_HI (12, 12, (Elf32_Word) map);
252 /* Call _dl_runtime_resolve. */
253 tramp[6] = OPCODE_BA (dlrr);
255 else
257 /* Get address of _dl_runtime_resolve in CTR. */
258 tramp[4] = OPCODE_LI (12, dlrr);
259 tramp[5] = OPCODE_ADDIS_HI (12, 12, dlrr);
260 tramp[6] = OPCODE_MTCTR (12);
262 /* Load address of link map in r12. */
263 tramp[7] = OPCODE_LI (12, (Elf32_Word) map);
264 tramp[8] = OPCODE_ADDIS_HI (12, 12, (Elf32_Word) map);
266 /* Call _dl_runtime_resolve. */
267 tramp[9] = OPCODE_BCTR ();
270 /* Set up the lazy PLT entries. */
271 offset = PLT_INITIAL_ENTRY_WORDS;
272 i = 0;
273 while (i < num_plt_entries && i < PLT_DOUBLE_SIZE)
275 plt[offset ] = OPCODE_LI (11, i * 4);
276 plt[offset+1] = OPCODE_B ((PLT_TRAMPOLINE_ENTRY_WORDS + 2
277 - (offset+1))
278 * 4);
279 i++;
280 offset += 2;
282 while (i < num_plt_entries)
284 plt[offset ] = OPCODE_LIS_HI (11, i * 4 + data_words);
285 plt[offset+1] = OPCODE_LWZU (12, i * 4 + data_words, 11);
286 plt[offset+2] = OPCODE_B ((PLT_TRAMPOLINE_ENTRY_WORDS
287 - (offset+2))
288 * 4);
289 plt[offset+3] = OPCODE_BCTR ();
290 i++;
291 offset += 4;
295 /* Now, we've modified code. We need to write the changes from
296 the data cache to a second-level unified cache, then make
297 sure that stale data in the instruction cache is removed.
298 (In a multiprocessor system, the effect is more complex.)
299 Most of the PLT shouldn't be in the instruction cache, but
300 there may be a little overlap at the start and the end.
302 Assumes that dcbst and icbi apply to lines of 16 bytes or
303 more. Current known line sizes are 16, 32, and 128 bytes. */
305 size_modified = lazy ? rel_offset_words : 6;
306 for (i = 0; i < size_modified; i += 4)
307 PPC_DCBST (plt + i);
308 PPC_DCBST (plt + size_modified - 1);
309 PPC_SYNC;
310 PPC_ICBI (plt);
311 PPC_ICBI (plt + size_modified - 1);
312 PPC_ISYNC;
315 return lazy;
318 Elf32_Addr
319 __elf_machine_fixup_plt(struct link_map *map, const Elf32_Rela *reloc,
320 Elf32_Addr *reloc_addr, Elf32_Addr finaladdr)
322 Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr;
323 if (delta << 6 >> 6 == delta)
324 *reloc_addr = OPCODE_B (delta);
325 else if (finaladdr <= 0x01fffffc || finaladdr >= 0xfe000000)
326 *reloc_addr = OPCODE_BA (finaladdr);
327 else
329 Elf32_Word *plt, *data_words;
330 Elf32_Word index, offset, num_plt_entries;
332 num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val
333 / sizeof(Elf32_Rela));
334 plt = (Elf32_Word *) D_PTR (map, l_info[DT_PLTGOT]);
335 offset = reloc_addr - plt;
336 index = (offset - PLT_INITIAL_ENTRY_WORDS)/2;
337 data_words = plt + PLT_DATA_START_WORDS (num_plt_entries);
339 reloc_addr += 1;
341 if (index < PLT_DOUBLE_SIZE)
343 data_words[index] = finaladdr;
344 PPC_SYNC;
345 *reloc_addr = OPCODE_B ((PLT_LONGBRANCH_ENTRY_WORDS - (offset+1))
346 * 4);
348 else
350 index -= (index - PLT_DOUBLE_SIZE)/2;
352 data_words[index] = finaladdr;
353 PPC_SYNC;
355 reloc_addr[1] = OPCODE_MTCTR (12);
356 MODIFIED_CODE_NOQUEUE (reloc_addr + 1);
357 PPC_SYNC;
359 reloc_addr[0] = OPCODE_LWZ (12,
360 (Elf32_Word) (data_words + index), 11);
363 MODIFIED_CODE (reloc_addr);
364 return finaladdr;
367 static void
368 dl_reloc_overflow (struct link_map *map,
369 const char *name,
370 Elf32_Addr *const reloc_addr,
371 const Elf32_Sym *sym,
372 const Elf32_Sym *refsym)
374 char buffer[128];
375 char *t;
376 const Elf32_Sym *errsym = sym ?: refsym;
377 t = stpcpy (buffer, name);
378 t = stpcpy (t, " relocation at 0x00000000");
379 _itoa_word ((unsigned) reloc_addr, t, 16, 0);
380 if (errsym)
382 const char *strtab;
384 strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
385 t = stpcpy (t, " for symbol `");
386 t = stpcpy (t, strtab + errsym->st_name);
387 t = stpcpy (t, "'");
389 t = stpcpy (t, " out of range");
390 _dl_signal_error (0, map->l_name, buffer);
393 void
394 __process_machine_rela (struct link_map *map,
395 const Elf32_Rela *reloc,
396 const Elf32_Sym *sym,
397 const Elf32_Sym *refsym,
398 Elf32_Addr *const reloc_addr,
399 Elf32_Addr const finaladdr,
400 int rinfo)
402 switch (rinfo)
404 case R_PPC_NONE:
405 return;
407 case R_PPC_ADDR32:
408 case R_PPC_UADDR32:
409 case R_PPC_GLOB_DAT:
410 case R_PPC_RELATIVE:
411 *reloc_addr = finaladdr;
412 return;
414 case R_PPC_ADDR24:
415 if (finaladdr > 0x01fffffc && finaladdr < 0xfe000000)
416 dl_reloc_overflow (map, "R_PPC_ADDR24", reloc_addr, sym, refsym);
417 *reloc_addr = (*reloc_addr & 0xfc000003) | (finaladdr & 0x3fffffc);
418 break;
420 case R_PPC_ADDR16:
421 case R_PPC_UADDR16:
422 if (finaladdr > 0x7fff && finaladdr < 0x8000)
423 dl_reloc_overflow (map, "R_PPC_ADDR16", reloc_addr, sym, refsym);
424 *(Elf32_Half*) reloc_addr = finaladdr;
425 break;
427 case R_PPC_ADDR16_LO:
428 *(Elf32_Half*) reloc_addr = finaladdr;
429 break;
431 case R_PPC_ADDR16_HI:
432 *(Elf32_Half*) reloc_addr = finaladdr >> 16;
433 break;
435 case R_PPC_ADDR16_HA:
436 *(Elf32_Half*) reloc_addr = (finaladdr + 0x8000) >> 16;
437 break;
439 case R_PPC_ADDR14:
440 case R_PPC_ADDR14_BRTAKEN:
441 case R_PPC_ADDR14_BRNTAKEN:
442 if (finaladdr > 0x7fff && finaladdr < 0x8000)
443 dl_reloc_overflow (map, "R_PPC_ADDR14", reloc_addr, sym, refsym);
444 *reloc_addr = (*reloc_addr & 0xffff0003) | (finaladdr & 0xfffc);
445 if (rinfo != R_PPC_ADDR14)
446 *reloc_addr = ((*reloc_addr & 0xffdfffff)
447 | ((rinfo == R_PPC_ADDR14_BRTAKEN)
448 ^ (finaladdr >> 31)) << 21);
449 break;
451 case R_PPC_REL24:
453 Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr;
454 if (delta << 6 >> 6 != delta)
455 dl_reloc_overflow (map, "R_PPC_REL24", reloc_addr, sym, refsym);
456 *reloc_addr = (*reloc_addr & 0xfc000003) | (delta & 0x3fffffc);
458 break;
460 case R_PPC_COPY:
461 if (sym == NULL)
462 /* This can happen in trace mode when an object could not be
463 found. */
464 return;
465 if (sym->st_size > refsym->st_size
466 || (_dl_verbose && sym->st_size < refsym->st_size))
468 const char *strtab;
470 strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
471 _dl_error_printf ("\
472 %s: Symbol `%s' has different size in shared object, onsider re-linking\n",
473 _dl_argv[0] ?: "<program name unknown>",
474 strtab + refsym->st_name);
476 memcpy (reloc_addr, (char *) finaladdr, MIN (sym->st_size,
477 refsym->st_size));
478 return;
480 case R_PPC_REL32:
481 *reloc_addr = finaladdr - (Elf32_Word) reloc_addr;
482 return;
484 case R_PPC_JMP_SLOT:
485 /* It used to be that elf_machine_fixup_plt was used here,
486 but that doesn't work when ld.so relocates itself
487 for the second time. On the bright side, there's
488 no need to worry about thread-safety here. */
490 Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr;
491 if (delta << 6 >> 6 == delta)
492 *reloc_addr = OPCODE_B (delta);
493 else if (finaladdr <= 0x01fffffc || finaladdr >= 0xfe000000)
494 *reloc_addr = OPCODE_BA (finaladdr);
495 else
497 Elf32_Word *plt, *data_words;
498 Elf32_Word index, offset, num_plt_entries;
500 plt = (Elf32_Word *) D_PTR (map, l_info[DT_PLTGOT]);
501 offset = reloc_addr - plt;
503 if (offset < PLT_DOUBLE_SIZE*2 + PLT_INITIAL_ENTRY_WORDS)
505 index = (offset - PLT_INITIAL_ENTRY_WORDS)/2;
506 num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val
507 / sizeof(Elf32_Rela));
508 data_words = plt + PLT_DATA_START_WORDS (num_plt_entries);
509 data_words[index] = finaladdr;
510 reloc_addr[0] = OPCODE_LI (11, index * 4);
511 reloc_addr[1] = OPCODE_B ((PLT_LONGBRANCH_ENTRY_WORDS
512 - (offset+1))
513 * 4);
514 MODIFIED_CODE_NOQUEUE (reloc_addr + 1);
516 else
518 reloc_addr[0] = OPCODE_LIS_HI (12, finaladdr);
519 reloc_addr[1] = OPCODE_ADDI (12, 12, finaladdr);
520 reloc_addr[2] = OPCODE_MTCTR (12);
521 reloc_addr[3] = OPCODE_BCTR ();
522 MODIFIED_CODE_NOQUEUE (reloc_addr + 3);
526 break;
528 default:
529 _dl_reloc_bad_type (map, rinfo, 0);
530 return;
533 MODIFIED_CODE_NOQUEUE (reloc_addr);