Replace ksplice_mod_find_sym with module_on_each_symbol.
[ksplice.git] / objmanip.c
blobbe397820faf9f12ecd255e5494c1c3d16533049b
1 /* This file is based in part on objcopy.c from GNU Binutils v2.17.
3 * Copyright (C) 1991-2006 Free Software Foundation, Inc.
4 * Copyright (C) 2008 Jeffrey Brian Arnold <jbarnold@mit.edu>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 * 02110-1301, USA.
20 /* objmanip performs various object file manipulations for Ksplice. Its first
21 * argument is always an object file, which is modified in-place during
22 * objmanip's execution. (objmanip's code is similar to objcopy from GNU
23 * binutils because every manipulation that objmanip performs is essentially a
24 * "copy" operation with certain changes which make the new version different
25 * from the old version). objmanip has four modes of operation:
27 * (1) keep mode
29 * This mode is the first objmanip step in processing the target object files.
31 * This mode can be broken down into two submodes, called "keep-primary" (which
32 * is used to prepare the primary kernel module) and "keep-helper" (which is
33 * used to prepare the helper kernel module):
35 * (a) keep-primary: "objmanip file.o keep-primary ADDSTR sect_1 ... sect_n"
37 * In this submode, only certain sections are kept; all other sections are
38 * discarded. Specifically, the following sections are kept: the listed
39 * sections (sect_1 ... sect_n), certain sections referenced by the listed
40 * sections, and certain special sections. The sections that are kept have
41 * ADDSTR added to the end of their names.
43 * The sections that are kept have most of their ELF relocations removed.
44 * (Relocations that point to sections that are being kept are not removed; all
45 * other relocations are removed). Information about each of the removed ELF
46 * relocations is printed to STDOUT (ksplice-create will save this information
47 * into Ksplice-specific ELF sections for the primary kernel module to use
48 * later).
50 * Each line of the STDOUT output represents a single place within the ELF
51 * object file at which a relocation has been removed. Each line contains the
52 * following fields, separated by spaces: an ELF symbol name, the name of a
53 * section previously containing a relocation pointing to that symbol, the
54 * offset (within that section) of the former relocation to that symbol, a bit
55 * representing whether that ELF relocation is PC-relative, and the ELF addend
56 * value for that relocation.
58 * (b) keep-helper: "objmanip file.o keep-helper ADDSTR"
60 * In this submode, essentially all sections are kept and have ADDSTR added to
61 * the end of their names.
63 * The sections that are kept have all of their ELF relocations removed.
64 * Information about each of the removed ELF relocations is printed to STDOUT
65 * (ksplice-create will save this information into Ksplice-specific ELF
66 * sections for the helper kernel module to use later).
68 * The fields of the STDOUT output are the same as with keep-primary.
70 * (2) globalize mode: "objmanip file.o globalize GLOBALIZESTR"
72 * This mode is the second objmanip step in processing the target object files.
73 * In this mode, all symbols whose names end in GLOBALIZESTR will be
74 * duplicated, with the duplicate symbols differing slightly from the original
75 * symbols. The duplicate symbols will have the string "_global" added to the
76 * end of their symbol names, and they will be global ELF symbols, regardless
77 * of whether the corresponding original symbol was global.
79 * (3) sizelist mode: "objmanip file.o sizelist"
81 * After the target object files have been linked into a single collection
82 * object file, this mode is used in order to obtain a list of all of the
83 * functions in the collection object file. Each line of the STDOUT output
84 * contains an ELF section name and that section's size, as presented by BFD's
85 * bfd_print_symbol function.
87 * (4) rmsyms mode: "objmanip file.o rmsyms sym_1 ... sym_n"
89 * This mode is the final objmanip step in preparing the Ksplice kernel
90 * modules. In this mode, any ELF relocations involving the listed symbols
91 * (sym_1 ... sym_n) are removed, and information about each of the removed
92 * relocations is printed to STDOUT.
94 * The fields of the STDOUT output are the same as with keep-primary.
97 #include "objcommon.h"
98 #include "objmanip.h"
99 #include <stdint.h>
101 asymbol **isympp = NULL;
102 long symcount;
104 char **varargs;
105 int varargs_count;
106 char *modestr, *addstr_all = "", *addstr_sect = "", *globalizestr;
108 struct wsect *wanted_sections = NULL;
110 struct specsect special_sections[] = {
111 {".altinstructions", 1, ".altinstr_replacement",
112 2 * sizeof(void *) + 4},
113 {".smp_locks", 0, NULL, sizeof(void *)},
114 {".parainstructions", 0, NULL, sizeof(void *) + 4},
115 }, *const end_special_sections = *(&special_sections + 1);
117 #define mode(str) starts_with(modestr, str)
119 int main(int argc, char **argv)
121 char *debug_name = malloc(strlen(argv[1]) + 4 + strlen(argv[2]) + 1);
122 sprintf(debug_name, "%s.pre%s", argv[1], argv[2]);
123 rename(argv[1], debug_name);
125 bfd_init();
126 bfd *ibfd = bfd_openr(debug_name, NULL);
127 assert(ibfd);
129 char **matching;
130 assert(bfd_check_format_matches(ibfd, bfd_object, &matching));
132 const char *output_target = bfd_get_target(ibfd);
133 bfd *obfd = bfd_openw(argv[1], output_target);
134 assert(obfd);
136 symcount = get_syms(ibfd, &isympp);
138 modestr = argv[2];
139 if (mode("keep")) {
140 addstr_all = argv[3];
141 addstr_sect = argv[4];
142 varargs = &argv[5];
143 varargs_count = argc - 5;
144 } else if (mode("globalize")) {
145 globalizestr = argv[3];
146 varargs = &argv[4];
147 varargs_count = argc - 4;
148 } else {
149 varargs = &argv[3];
150 varargs_count = argc - 3;
153 if (mode("keep")) {
154 while (1) {
155 struct wsect *tmp = wanted_sections;
156 bfd_map_over_sections(ibfd, mark_wanted_if_referenced,
157 NULL);
158 if (tmp == wanted_sections)
159 break;
163 int i;
164 for (i = 0; mode("sizelist") && i < symcount; i++) {
165 if ((isympp[i]->flags & BSF_FUNCTION)
166 && isympp[i]->value == 0 && !(isympp[i]->flags & BSF_WEAK)) {
167 /* We call bfd_print_symbol in order to get access to
168 * the size associated with the function symbol, which
169 * is not otherwise available through the BFD API
171 bfd_print_symbol(ibfd, stdout, isympp[i],
172 bfd_print_symbol_all);
173 printf("\n");
177 asection *p;
178 for (p = ibfd->sections; p != NULL; p = p->next) {
179 if (is_special(p->name))
180 continue;
181 if (want_section(p->name, NULL) || mode("rmsyms"))
182 rm_some_relocs(ibfd, p);
185 struct specsect *ss;
186 if (mode("keep")) {
187 for (ss = special_sections; ss != end_special_sections; ss++)
188 rm_from_special(ibfd, ss);
191 copy_object(ibfd, obfd);
192 assert(bfd_close(obfd));
193 assert(bfd_close(ibfd));
194 return EXIT_SUCCESS;
197 void rm_some_relocs(bfd *ibfd, asection *isection)
199 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
200 arelent **orig_relocs = malloc(ss->num_relocs * sizeof(*orig_relocs));
201 memcpy(orig_relocs, ss->relocs, ss->num_relocs * sizeof(*orig_relocs));
202 int orig_num_relocs = ss->num_relocs;
203 ss->num_relocs = 0;
205 int i;
206 for (i = 0; i < orig_num_relocs; i++) {
207 int rm_reloc = 0;
208 asymbol *sym_ptr = *orig_relocs[i]->sym_ptr_ptr;
210 if (mode("rmsyms") && match_varargs(sym_ptr->name))
211 rm_reloc = 1;
213 if (mode("keep"))
214 rm_reloc = 1;
216 if (mode("keep-primary") && want_section(sym_ptr->name, NULL))
217 rm_reloc = 0;
219 if (rm_reloc)
220 print_reloc(ibfd, isection, orig_relocs[i], ss);
221 else
222 ss->relocs[ss->num_relocs++] = orig_relocs[i];
226 void print_reloc(bfd *ibfd, asection *isection, arelent *orig_reloc,
227 struct supersect *ss)
229 asymbol *sym_ptr = *orig_reloc->sym_ptr_ptr;
231 char *new_sectname = strdup(isection->name);
232 if (mode("keep"))
233 want_section(isection->name, &new_sectname);
235 char *new_symname = strdup(sym_ptr->name);
236 if (mode("keep-primary"))
237 want_section(sym_ptr->name, &new_symname);
239 int addend = orig_reloc->addend;
240 reloc_howto_type *howto = orig_reloc->howto;
241 int size = bfd_get_reloc_size(howto);
242 int addend2 = blot_section(ibfd, isection, orig_reloc->address, size);
243 assert(addend == 0 || addend2 == 0);
244 if (addend == 0)
245 addend = addend2;
247 printf("%s%s ", new_symname, addstr_all);
248 printf("%s%s%s ", canonical_sym(new_sectname), addstr_all, addstr_sect);
249 printf("%08x ", (int)orig_reloc->address);
250 printf("%d %08x %d\n", howto->pc_relative, addend, size);
253 int blot_section(bfd *abfd, asection *sect, int offset, int size)
255 struct supersect *ss = fetch_supersect(abfd, sect, isympp);
256 long address = (long)ss->contents + offset;
257 int tmp;
258 if (size == 4) {
259 tmp = *(int *)address;
260 *((int *)address) = 0x77777777;
261 } else if (size == 8) {
262 tmp = *(long long *)address;
263 *((long long *)address) = 0x7777777777777777ll;
264 } else {
265 printf("ksplice: Unsupported size %d\n", size);
266 DIE;
268 return tmp;
271 const char *canonical_sym(const char *sect_wlabel)
273 const char *sect = sect_wlabel;
274 if (!mode("sizelist"))
275 sect = dup_wolabel(sect_wlabel);
277 if (starts_with(sect, ".rodata"))
278 return sect;
280 int i;
281 for (i = 0; i < symcount; i++) {
282 const char *cur_sectname = isympp[i]->section->name;
283 if (!mode("sizelist"))
284 cur_sectname = dup_wolabel(cur_sectname);
286 if (strlen(isympp[i]->name) != 0 &&
287 !starts_with(isympp[i]->name, ".text") &&
288 strcmp(cur_sectname, sect) == 0 && isympp[i]->value == 0)
289 return isympp[i]->name;
291 printf("ksplice: Failed to canonicalize %s\n", sect);
292 DIE;
295 void rm_from_special(bfd *ibfd, struct specsect *s)
297 asection *isection = bfd_get_section_by_name(ibfd, s->sectname);
298 if (isection == NULL)
299 return;
301 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
302 int contents_size = align(ss->contents_size, ss->alignment);
303 void *orig_buffer = calloc(1, contents_size);
304 memcpy(orig_buffer, ss->contents, ss->contents_size);
305 arelent **orig_relocs = malloc(ss->num_relocs * sizeof(*orig_relocs));
306 memcpy(orig_relocs, ss->relocs, ss->num_relocs * sizeof(*orig_relocs));
308 int entry_size = align(s->entry_size, ss->alignment);
309 assert(contents_size % entry_size == 0);
310 if (s->odd_relocs)
311 assert((contents_size / entry_size) * 2 == ss->num_relocs);
312 else
313 assert(contents_size / entry_size == ss->num_relocs);
315 int orig_num_relocs = ss->num_relocs;
316 ss->num_relocs = 0;
317 int new_num_entries = 0;
318 int i, orig_buffer_index, end_last_entry = 0, modifier = 0;
319 for (i = 0; i < orig_num_relocs; i++) {
320 asymbol *sym_ptr = *orig_relocs[i]->sym_ptr_ptr;
321 if (s->odd_relocs && i % 2 == 1) {
322 assert(strcmp(sym_ptr->name, s->odd_relocname) == 0);
323 continue;
325 asection *p;
326 for (p = ibfd->sections; p != NULL; p = p->next) {
327 if (strcmp(sym_ptr->name, p->name) == 0
328 && !is_special(p->name)
329 && !want_section(p->name, NULL))
330 break;
332 if (p != NULL)
333 continue;
335 if (s->odd_relocs)
336 orig_buffer_index = i / 2;
337 else
338 orig_buffer_index = i;
339 memcpy(ss->contents + (new_num_entries++) * entry_size,
340 orig_buffer + orig_buffer_index * entry_size,
341 entry_size);
342 modifier += orig_buffer_index * entry_size - end_last_entry;
343 ss->relocs[ss->num_relocs] = orig_relocs[i];
344 ss->relocs[ss->num_relocs++]->address -= modifier;
345 if (s->odd_relocs) {
346 ss->relocs[ss->num_relocs] = orig_relocs[i + 1];
347 ss->relocs[ss->num_relocs++]->address -= modifier;
349 end_last_entry = orig_buffer_index * entry_size + entry_size;
351 ss->contents_size = new_num_entries * entry_size;
354 void mark_wanted_if_referenced(bfd *abfd, asection *sect, void *ignored)
356 if (want_section(sect->name, NULL))
357 return;
358 if (!starts_with(sect->name, ".text")
359 && !starts_with(sect->name, ".rodata"))
360 return;
362 bfd_map_over_sections(abfd, check_for_ref_to_section, sect);
365 void check_for_ref_to_section(bfd *abfd, asection *looking_at,
366 void *looking_for)
368 if (!want_section(looking_at->name, NULL))
369 return;
371 struct supersect *ss = fetch_supersect(abfd, looking_at, isympp);
372 int i;
373 for (i = 0; i < ss->num_relocs; i++) {
374 asymbol *sym_ptr = *ss->relocs[i]->sym_ptr_ptr;
375 if (sym_ptr->section == (asection *)looking_for) {
376 struct wsect *w = malloc(sizeof(*w));
377 w->name = strdup(((asection *)looking_for)->name);
378 w->next = wanted_sections;
379 wanted_sections = w;
384 /* Modified function from GNU Binutils objcopy.c */
385 bfd_boolean copy_object(bfd *ibfd, bfd *obfd)
387 assert(bfd_set_format(obfd, bfd_get_format(ibfd)));
389 bfd_vma start = bfd_get_start_address(ibfd);
391 flagword flags = bfd_get_file_flags(ibfd);
392 flags &= bfd_applicable_file_flags(obfd);
394 assert(bfd_set_start_address(obfd, start)
395 && bfd_set_file_flags(obfd, flags));
397 enum bfd_architecture iarch = bfd_get_arch(ibfd);
398 unsigned int imach = bfd_get_mach(ibfd);
399 assert(bfd_set_arch_mach(obfd, iarch, imach));
400 assert(bfd_set_format(obfd, bfd_get_format(ibfd)));
402 /* BFD mandates that all output sections be created and sizes set before
403 any output is done. Thus, we traverse all sections multiple times. */
404 bfd_map_over_sections(ibfd, setup_section, obfd);
406 assert(bfd_count_sections(obfd));
408 /* Mark symbols used in output relocations so that they
409 are kept, even if they are local labels or static symbols.
411 Note we iterate over the input sections examining their
412 relocations since the relocations for the output sections
413 haven't been set yet. mark_symbols_used_in_relocations will
414 ignore input sections which have no corresponding output
415 section. */
417 bfd_map_over_sections(ibfd, mark_symbols_used_in_relocations, isympp);
418 asymbol **osympp = (void *)malloc((2 * symcount + 1) * sizeof(*osympp));
419 symcount = filter_symbols(ibfd, obfd, osympp, isympp, symcount);
421 bfd_set_symtab(obfd, osympp, symcount);
423 /* This has to happen after the symbol table has been set. */
424 bfd_map_over_sections(ibfd, copy_section, obfd);
426 /* Allow the BFD backend to copy any private data it understands
427 from the input BFD to the output BFD. This is done last to
428 permit the routine to look at the filtered symbol table, which is
429 important for the ECOFF code at least. */
430 assert(bfd_copy_private_bfd_data(ibfd, obfd));
432 return TRUE;
435 /* Modified function from GNU Binutils objcopy.c */
436 void setup_section(bfd *ibfd, asection *isection, void *obfdarg)
438 bfd *obfd = obfdarg;
439 bfd_vma vma;
441 char *name = strdup(isection->name);
442 if (!want_section(isection->name, &name))
443 return;
445 asection *osection = bfd_make_section_anyway(obfd, name);
446 assert(osection != NULL);
448 flagword flags = bfd_get_section_flags(ibfd, isection);
449 bfd_set_section_flags(obfd, osection, flags);
451 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
452 assert(bfd_set_section_size(obfd, osection, ss->contents_size));
454 vma = bfd_section_vma(ibfd, isection);
455 assert(bfd_set_section_vma(obfd, osection, vma));
457 osection->lma = isection->lma;
458 assert(bfd_set_section_alignment(obfd,
459 osection,
460 bfd_section_alignment(ibfd,
461 isection)));
462 osection->entsize = isection->entsize;
463 isection->output_section = osection;
464 isection->output_offset = 0;
465 return;
468 /* Modified function from GNU Binutils objcopy.c */
469 void copy_section(bfd *ibfd, asection *isection, void *obfdarg)
471 bfd *obfd = obfdarg;
473 char *name = strdup(isection->name);
474 if (!want_section(isection->name, &name))
475 return;
477 flagword flags = bfd_get_section_flags(ibfd, isection);
478 if ((flags & SEC_GROUP) != 0)
479 return;
481 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
482 asection *osection = isection->output_section;
483 if (ss->contents_size == 0 || osection == 0)
484 return;
486 bfd_set_reloc(obfd, osection,
487 ss->num_relocs == 0 ? NULL : ss->relocs, ss->num_relocs);
489 if (bfd_get_section_flags(ibfd, isection) & SEC_HAS_CONTENTS
490 && bfd_get_section_flags(obfd, osection) & SEC_HAS_CONTENTS)
491 assert(bfd_set_section_contents
492 (obfd, osection, ss->contents, 0, ss->contents_size));
495 /* Modified function from GNU Binutils objcopy.c
497 * Mark all the symbols which will be used in output relocations with
498 * the BSF_KEEP flag so that those symbols will not be stripped.
500 * Ignore relocations which will not appear in the output file.
502 void mark_symbols_used_in_relocations(bfd *ibfd, asection *isection,
503 void *symbolsarg)
505 if (isection->output_section == NULL)
506 return;
508 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
510 /* Examine each symbol used in a relocation. If it's not one of the
511 special bfd section symbols, then mark it with BSF_KEEP. */
512 int i;
513 for (i = 0; i < ss->num_relocs; i++) {
514 if (*ss->relocs[i]->sym_ptr_ptr != bfd_com_section_ptr->symbol
515 && *ss->relocs[i]->sym_ptr_ptr !=
516 bfd_abs_section_ptr->symbol
517 && *ss->relocs[i]->sym_ptr_ptr !=
518 bfd_und_section_ptr->symbol)
519 (*ss->relocs[i]->sym_ptr_ptr)->flags |= BSF_KEEP;
523 /* Modified function from GNU Binutils objcopy.c
525 * Choose which symbol entries to copy.
526 * We don't copy in place, because that confuses the relocs.
527 * Return the number of symbols to print.
529 unsigned int filter_symbols(bfd *abfd, bfd *obfd, asymbol **osyms,
530 asymbol **isyms, long symcount)
532 asymbol **from = isyms, **to = osyms;
533 long src_count = 0, dst_count = 0;
535 for (; src_count < symcount; src_count++) {
536 asymbol *sym = from[src_count];
537 flagword flags = sym->flags;
539 if (mode("keep") && want_section(sym->section->name, NULL)) {
540 char *newname =
541 malloc(strlen(sym->name) + strlen(addstr_all) +
542 strlen(addstr_sect) + 1);
543 sprintf(newname, "%s%s%s", sym->name, addstr_all,
544 addstr_sect);
545 sym->name = newname;
548 int keep;
549 if ((flags & BSF_KEEP) != 0 /* Used in relocation. */
550 || ((flags & BSF_SECTION_SYM) != 0
551 && ((*(sym->section)->symbol_ptr_ptr)->flags
552 & BSF_KEEP) != 0))
553 keep = 1;
554 else if ((flags & (BSF_GLOBAL | BSF_WEAK)) != 0)
555 keep = 1;
556 else if (bfd_decode_symclass(sym) == 'I')
557 /* Global symbols in $idata sections need to be retained.
558 External users of the library containing the $idata
559 section may reference these symbols. */
560 keep = 1;
561 else if ((flags & BSF_GLOBAL) != 0
562 || (flags & BSF_WEAK) != 0
563 || bfd_is_com_section(sym->section))
564 keep = 1;
565 else if ((flags & BSF_DEBUGGING) != 0)
566 keep = 1;
567 else
568 keep = !bfd_is_local_label(abfd, sym);
570 if (!want_section(sym->section->name, NULL))
571 keep = 0;
573 if (mode("rmsyms") && match_varargs(sym->name))
574 keep = 0;
576 if (keep)
577 to[dst_count++] = sym;
579 if (keep && mode("globalize")
580 && ends_with(sym->name, globalizestr)) {
581 asymbol *new = bfd_make_empty_symbol(obfd);
582 char *tmp =
583 malloc(strlen(sym->name) + strlen("_global") + 1);
584 sprintf(tmp, "%s_global", sym->name);
585 new->name = tmp;
586 new->value = sym->value;
587 new->flags = BSF_GLOBAL;
588 new->section = sym->section;
589 to[dst_count++] = new;
593 asection *p;
594 for (p = obfd->sections; mode("keep") && p != NULL; p = p->next) {
595 if (starts_with(p->name, ".rodata") &&
596 !exists_sym_with_name(from, symcount, p->name)) {
597 asymbol *new = bfd_make_empty_symbol(obfd);
598 new->name = p->name;
599 new->value = 0x0;
600 new->flags = BSF_GLOBAL;
601 new->section = p;
602 to[dst_count++] = new;
606 to[dst_count] = NULL;
607 return dst_count;
610 int exists_sym_with_name(asymbol **syms, int symcount, const char *desired)
612 int i;
613 for (i = 0; i < symcount; i++) {
614 if (strcmp(bfd_asymbol_name(syms[i]), desired) == 0)
615 return 1;
617 return 0;
620 int match_varargs(const char *str)
622 int i;
623 for (i = 0; i < varargs_count; i++) {
624 if (strcmp(str, varargs[i]) == 0)
625 return 1;
626 if (starts_with(str, varargs[i]) &&
627 strcmp(str + strlen(varargs[i]), "_global") == 0)
628 return 1;
630 return 0;
633 int want_section(const char *name, char **newname)
635 static const char *static_want[] = {
636 ".altinstructions",
637 ".altinstr_replacement",
638 ".smp_locks",
639 ".parainstructions",
640 NULL
643 if (!mode("keep"))
644 return 1;
646 struct wsect *w = wanted_sections;
647 for (; w != NULL; w = w->next) {
648 if (strcmp(w->name, name) == 0)
649 goto success;
652 if (starts_with(name, ".ksplice"))
653 goto success;
654 if (mode("keep-helper") && starts_with(name, ".text"))
655 goto success;
656 if (match_varargs(name))
657 goto success;
659 int i;
660 for (i = 0; static_want[i] != NULL; i++) {
661 if (strcmp(name, static_want[i]) == 0)
662 return 1;
664 return 0;
666 success:
668 if (newname != NULL) {
669 *newname =
670 malloc(strlen(name) + strlen(addstr_all) +
671 strlen(addstr_sect) + 1);
672 sprintf(*newname, "%s%s%s", name, addstr_all, addstr_sect);
674 return 1;
677 struct specsect *is_special(const char *name)
679 struct specsect *ss;
680 for (ss = special_sections; ss != end_special_sections; ss++) {
681 if (strcmp(ss->sectname, name) == 0)
682 return ss;
684 return NULL;