Convert symbol tables to vector interface.
[ksplice.git] / objmanip.c
blob2d0c8f4c81380bf1f2cb41b51d678a3a85334491
1 /* This file is based in part on objcopy.c from GNU Binutils v2.17.
3 * Copyright (C) 1991-2006 Free Software Foundation, Inc.
4 * Copyright (C) 2008 Jeffrey Brian Arnold <jbarnold@mit.edu>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 * 02110-1301, USA.
20 /* objmanip performs various object file manipulations for Ksplice. Its first
21 * argument is always an object file, which is modified in-place during
22 * objmanip's execution. (objmanip's code is similar to objcopy from GNU
23 * binutils because every manipulation that objmanip performs is essentially a
24 * "copy" operation with certain changes which make the new version different
25 * from the old version). objmanip has four modes of operation:
27 * (1) keep mode
29 * This mode is the first objmanip step in processing the target object files.
31 * This mode can be broken down into two submodes, called "keep-primary" (which
32 * is used to prepare the primary kernel module) and "keep-helper" (which is
33 * used to prepare the helper kernel module):
35 * (a) keep-primary: "objmanip file.o keep-primary ADDSTR sect_1 ... sect_n"
37 * In this submode, only certain sections are kept; all other sections are
38 * discarded. Specifically, the following sections are kept: the listed
39 * sections (sect_1 ... sect_n), certain sections referenced by the listed
40 * sections, and certain special sections. The sections that are kept have
41 * ADDSTR added to the end of their names.
43 * The sections that are kept have most of their ELF relocations removed.
44 * (Relocations that point to sections that are being kept are not removed; all
45 * other relocations are removed). Information about each of the removed ELF
46 * relocations is printed to STDOUT (ksplice-create will save this information
47 * into Ksplice-specific ELF sections for the primary kernel module to use
48 * later).
50 * Each line of the STDOUT output represents a single place within the ELF
51 * object file at which a relocation has been removed. Each line contains the
52 * following fields, separated by spaces: an ELF symbol name, the name of a
53 * section previously containing a relocation pointing to that symbol, the
54 * offset (within that section) of the former relocation to that symbol, a bit
55 * representing whether that ELF relocation is PC-relative, and the ELF addend
56 * value for that relocation.
58 * (b) keep-helper: "objmanip file.o keep-helper ADDSTR"
60 * In this submode, essentially all sections are kept and have ADDSTR added to
61 * the end of their names.
63 * The sections that are kept have all of their ELF relocations removed.
64 * Information about each of the removed ELF relocations is printed to STDOUT
65 * (ksplice-create will save this information into Ksplice-specific ELF
66 * sections for the helper kernel module to use later).
68 * The fields of the STDOUT output are the same as with keep-primary.
70 * (2) globalize mode: "objmanip file.o globalize GLOBALIZESTR"
72 * This mode is the second objmanip step in processing the target object files.
73 * In this mode, all symbols whose names end in GLOBALIZESTR will be
74 * duplicated, with the duplicate symbols differing slightly from the original
75 * symbols. The duplicate symbols will have the string "_global" added to the
76 * end of their symbol names, and they will be global ELF symbols, regardless
77 * of whether the corresponding original symbol was global.
79 * (3) sizelist mode: "objmanip file.o sizelist"
81 * After the target object files have been linked into a single collection
82 * object file, this mode is used in order to obtain a list of all of the
83 * functions in the collection object file. Each line of the STDOUT output
84 * contains an ELF section name and that section's size, as presented by BFD's
85 * bfd_print_symbol function.
87 * (4) rmsyms mode: "objmanip file.o rmsyms sym_1 ... sym_n"
89 * This mode is the final objmanip step in preparing the Ksplice kernel
90 * modules. In this mode, any ELF relocations involving the listed symbols
91 * (sym_1 ... sym_n) are removed, and information about each of the removed
92 * relocations is printed to STDOUT.
94 * The fields of the STDOUT output are the same as with keep-primary.
97 #include "objcommon.h"
98 #include "objmanip.h"
99 #include <stdint.h>
101 struct asymbolp_vec isyms;
103 char **varargs;
104 int varargs_count;
105 char *modestr, *addstr_all = "", *addstr_sect = "", *globalizestr;
107 struct wsect *wanted_sections = NULL;
109 struct specsect special_sections[] = {
110 {".altinstructions", 1, ".altinstr_replacement",
111 2 * sizeof(void *) + 4},
112 {".smp_locks", 0, NULL, sizeof(void *)},
113 {".parainstructions", 0, NULL, sizeof(void *) + 4},
114 }, *const end_special_sections = *(&special_sections + 1);
116 #define mode(str) starts_with(modestr, str)
118 int main(int argc, char **argv)
120 char *debug_name = malloc(strlen(argv[1]) + 4 + strlen(argv[2]) + 1);
121 sprintf(debug_name, "%s.pre%s", argv[1], argv[2]);
122 rename(argv[1], debug_name);
124 bfd_init();
125 bfd *ibfd = bfd_openr(debug_name, NULL);
126 assert(ibfd);
128 char **matching;
129 assert(bfd_check_format_matches(ibfd, bfd_object, &matching));
131 const char *output_target = bfd_get_target(ibfd);
132 bfd *obfd = bfd_openw(argv[1], output_target);
133 assert(obfd);
135 get_syms(ibfd, &isyms);
137 modestr = argv[2];
138 if (mode("keep")) {
139 addstr_all = argv[3];
140 addstr_sect = argv[4];
141 varargs = &argv[5];
142 varargs_count = argc - 5;
143 } else if (mode("globalize")) {
144 globalizestr = argv[3];
145 varargs = &argv[4];
146 varargs_count = argc - 4;
147 } else {
148 varargs = &argv[3];
149 varargs_count = argc - 3;
152 if (mode("keep")) {
153 while (1) {
154 struct wsect *tmp = wanted_sections;
155 bfd_map_over_sections(ibfd, mark_wanted_if_referenced,
156 NULL);
157 if (tmp == wanted_sections)
158 break;
162 asymbol **symp;
163 for (symp = isyms.data;
164 mode("sizelist") && symp < isyms.data + isyms.size; symp++) {
165 asymbol *sym = *symp;
166 if ((sym->flags & BSF_FUNCTION)
167 && sym->value == 0 && !(sym->flags & BSF_WEAK)) {
168 /* We call bfd_print_symbol in order to get access to
169 * the size associated with the function symbol, which
170 * is not otherwise available through the BFD API
172 bfd_print_symbol(ibfd, stdout, sym,
173 bfd_print_symbol_all);
174 printf("\n");
178 asection *p;
179 for (p = ibfd->sections; p != NULL; p = p->next) {
180 if (is_special(p->name))
181 continue;
182 if (want_section(p->name, NULL) || mode("rmsyms"))
183 rm_some_relocs(ibfd, p);
186 struct specsect *ss;
187 if (mode("keep")) {
188 for (ss = special_sections; ss != end_special_sections; ss++)
189 rm_from_special(ibfd, ss);
192 copy_object(ibfd, obfd);
193 assert(bfd_close(obfd));
194 assert(bfd_close(ibfd));
195 return EXIT_SUCCESS;
198 void rm_some_relocs(bfd *ibfd, asection *isection)
200 struct supersect *ss = fetch_supersect(ibfd, isection, &isyms);
201 struct arelentp_vec orig_relocs;
202 vec_move(&orig_relocs, &ss->relocs);
204 arelent **relocp;
205 for (relocp = orig_relocs.data;
206 relocp < orig_relocs.data + orig_relocs.size; ++relocp) {
207 int rm_reloc = 0;
208 asymbol *sym_ptr = *(*relocp)->sym_ptr_ptr;
210 if (mode("rmsyms") && match_varargs(sym_ptr->name))
211 rm_reloc = 1;
213 if (mode("keep"))
214 rm_reloc = 1;
216 if (mode("keep-primary") && want_section(sym_ptr->name, NULL))
217 rm_reloc = 0;
219 if (rm_reloc)
220 print_reloc(ibfd, isection, *relocp, ss);
221 else
222 *vec_grow(&ss->relocs, 1) = *relocp;
226 void print_reloc(bfd *ibfd, asection *isection, arelent *orig_reloc,
227 struct supersect *ss)
229 asymbol *sym_ptr = *orig_reloc->sym_ptr_ptr;
231 char *new_sectname = strdup(isection->name);
232 if (mode("keep"))
233 want_section(isection->name, &new_sectname);
235 char *new_symname = strdup(sym_ptr->name);
236 if (mode("keep-primary"))
237 want_section(sym_ptr->name, &new_symname);
239 int addend = orig_reloc->addend;
240 reloc_howto_type *howto = orig_reloc->howto;
241 int size = bfd_get_reloc_size(howto);
242 int addend2 = blot_section(ibfd, isection, orig_reloc->address, size);
243 assert(addend == 0 || addend2 == 0);
244 if (addend == 0)
245 addend = addend2;
247 printf("%s%s ", new_symname, addstr_all);
248 printf("%s%s%s ", canonical_sym(new_sectname), addstr_all, addstr_sect);
249 printf("%08x ", (int)orig_reloc->address);
250 printf("%d %08x %d\n", howto->pc_relative, addend, size);
253 int blot_section(bfd *abfd, asection *sect, int offset, int size)
255 struct supersect *ss = fetch_supersect(abfd, sect, &isyms);
256 void *address = ss->contents.data + offset;
257 int tmp;
258 if (size == 4) {
259 tmp = *(int *)address;
260 *((int *)address) = 0x77777777;
261 } else if (size == 8) {
262 tmp = *(long long *)address;
263 *((long long *)address) = 0x7777777777777777ll;
264 } else {
265 fprintf(stderr, "ksplice: Unsupported size %d\n", size);
266 DIE;
268 return tmp;
271 const char *canonical_sym(const char *sect_wlabel)
273 const char *sect = sect_wlabel;
274 if (!mode("sizelist"))
275 sect = dup_wolabel(sect_wlabel);
277 if (starts_with(sect, ".rodata"))
278 return sect;
280 asymbol **symp;
281 for (symp = isyms.data; symp < isyms.data + isyms.size; symp++) {
282 asymbol *sym = *symp;
283 const char *cur_sectname = sym->section->name;
284 if (!mode("sizelist"))
285 cur_sectname = dup_wolabel(cur_sectname);
287 if (strlen(sym->name) != 0 &&
288 !starts_with(sym->name, ".text") &&
289 strcmp(cur_sectname, sect) == 0 && sym->value == 0)
290 return sym->name;
292 printf("ksplice: Failed to canonicalize %s\n", sect);
293 DIE;
296 void rm_from_special(bfd *ibfd, struct specsect *s)
298 asection *isection = bfd_get_section_by_name(ibfd, s->sectname);
299 if (isection == NULL)
300 return;
302 struct supersect *ss = fetch_supersect(ibfd, isection, &isyms);
303 struct void_vec orig_contents;
304 vec_move(&orig_contents, &ss->contents);
305 size_t pad = align(orig_contents.size, ss->alignment) -
306 orig_contents.size;
307 memset(vec_grow(&orig_contents, pad), 0, pad);
308 struct arelentp_vec orig_relocs;
309 vec_move(&orig_relocs, &ss->relocs);
311 int entry_size = align(s->entry_size, ss->alignment);
312 int relocs_per_entry = s->odd_relocs ? 2 : 1;
313 assert((orig_contents.size / entry_size) * relocs_per_entry ==
314 orig_relocs.size);
316 void *orig_entry;
317 arelent **relocp;
318 for (orig_entry = orig_contents.data, relocp = orig_relocs.data;
319 orig_entry < orig_contents.data + orig_contents.size;
320 orig_entry += entry_size, relocp += relocs_per_entry) {
321 asymbol *sym = *(*relocp)->sym_ptr_ptr;
322 if (s->odd_relocs) {
323 asymbol *odd_sym = *(*(relocp + 1))->sym_ptr_ptr;
324 assert(strcmp(odd_sym->name, s->odd_relocname) == 0);
326 asection *p;
327 for (p = ibfd->sections; p != NULL; p = p->next) {
328 if (strcmp(sym->name, p->name) == 0
329 && !is_special(p->name)
330 && !want_section(p->name, NULL))
331 break;
333 if (p != NULL)
334 continue;
336 void *new_entry = vec_grow(&ss->contents, entry_size);
337 memcpy(new_entry, orig_entry, entry_size);
338 int modifier = (new_entry - ss->contents.data) -
339 (orig_entry - orig_contents.data);
340 arelent **new_relocp = vec_grow(&ss->relocs, 1);
341 *new_relocp = *relocp;
342 (*new_relocp)->address += modifier;
343 if (s->odd_relocs) {
344 new_relocp = vec_grow(&ss->relocs, 1);
345 *new_relocp = *(relocp + 1);
346 (*new_relocp)->address += modifier;
351 void mark_wanted_if_referenced(bfd *abfd, asection *sect, void *ignored)
353 if (want_section(sect->name, NULL))
354 return;
355 if (!starts_with(sect->name, ".text")
356 && !starts_with(sect->name, ".rodata"))
357 return;
359 bfd_map_over_sections(abfd, check_for_ref_to_section, sect);
362 void check_for_ref_to_section(bfd *abfd, asection *looking_at,
363 void *looking_for)
365 if (!want_section(looking_at->name, NULL))
366 return;
368 struct supersect *ss = fetch_supersect(abfd, looking_at, &isyms);
369 arelent **relocp;
370 for (relocp = ss->relocs.data;
371 relocp != ss->relocs.data + ss->relocs.size; ++relocp) {
372 asymbol *sym = *(*relocp)->sym_ptr_ptr;
373 if (sym->section == (asection *)looking_for) {
374 struct wsect *w = malloc(sizeof(*w));
375 w->name = strdup(((asection *)looking_for)->name);
376 w->next = wanted_sections;
377 wanted_sections = w;
382 /* Modified function from GNU Binutils objcopy.c */
383 bfd_boolean copy_object(bfd *ibfd, bfd *obfd)
385 assert(bfd_set_format(obfd, bfd_get_format(ibfd)));
387 bfd_vma start = bfd_get_start_address(ibfd);
389 flagword flags = bfd_get_file_flags(ibfd);
390 flags &= bfd_applicable_file_flags(obfd);
392 assert(bfd_set_start_address(obfd, start)
393 && bfd_set_file_flags(obfd, flags));
395 enum bfd_architecture iarch = bfd_get_arch(ibfd);
396 unsigned int imach = bfd_get_mach(ibfd);
397 assert(bfd_set_arch_mach(obfd, iarch, imach));
398 assert(bfd_set_format(obfd, bfd_get_format(ibfd)));
400 /* BFD mandates that all output sections be created and sizes set before
401 any output is done. Thus, we traverse all sections multiple times. */
402 bfd_map_over_sections(ibfd, setup_section, obfd);
404 assert(bfd_count_sections(obfd));
406 /* Mark symbols used in output relocations so that they
407 are kept, even if they are local labels or static symbols.
409 Note we iterate over the input sections examining their
410 relocations since the relocations for the output sections
411 haven't been set yet. mark_symbols_used_in_relocations will
412 ignore input sections which have no corresponding output
413 section. */
415 bfd_map_over_sections(ibfd, mark_symbols_used_in_relocations, &isyms);
416 struct asymbolp_vec osyms;
417 vec_init(&osyms);
418 filter_symbols(ibfd, obfd, &osyms, &isyms);
420 bfd_set_symtab(obfd, osyms.data, osyms.size);
422 /* This has to happen after the symbol table has been set. */
423 bfd_map_over_sections(ibfd, copy_section, obfd);
425 /* Allow the BFD backend to copy any private data it understands
426 from the input BFD to the output BFD. This is done last to
427 permit the routine to look at the filtered symbol table, which is
428 important for the ECOFF code at least. */
429 assert(bfd_copy_private_bfd_data(ibfd, obfd));
431 return TRUE;
434 /* Modified function from GNU Binutils objcopy.c */
435 void setup_section(bfd *ibfd, asection *isection, void *obfdarg)
437 bfd *obfd = obfdarg;
438 bfd_vma vma;
440 char *name = strdup(isection->name);
441 if (!want_section(isection->name, &name))
442 return;
444 asection *osection = bfd_make_section_anyway(obfd, name);
445 assert(osection != NULL);
447 flagword flags = bfd_get_section_flags(ibfd, isection);
448 bfd_set_section_flags(obfd, osection, flags);
450 struct supersect *ss = fetch_supersect(ibfd, isection, &isyms);
451 assert(bfd_set_section_size(obfd, osection, ss->contents.size));
453 vma = bfd_section_vma(ibfd, isection);
454 assert(bfd_set_section_vma(obfd, osection, vma));
456 osection->lma = isection->lma;
457 assert(bfd_set_section_alignment(obfd,
458 osection,
459 bfd_section_alignment(ibfd,
460 isection)));
461 osection->entsize = isection->entsize;
462 isection->output_section = osection;
463 isection->output_offset = 0;
464 return;
467 /* Modified function from GNU Binutils objcopy.c */
468 void copy_section(bfd *ibfd, asection *isection, void *obfdarg)
470 bfd *obfd = obfdarg;
472 char *name = strdup(isection->name);
473 if (!want_section(isection->name, &name))
474 return;
476 flagword flags = bfd_get_section_flags(ibfd, isection);
477 if ((flags & SEC_GROUP) != 0)
478 return;
480 struct supersect *ss = fetch_supersect(ibfd, isection, &isyms);
481 asection *osection = isection->output_section;
482 if (ss->contents.size == 0 || osection == 0)
483 return;
485 bfd_set_reloc(obfd, osection,
486 ss->relocs.size == 0 ? NULL : ss->relocs.data,
487 ss->relocs.size);
489 if (bfd_get_section_flags(ibfd, isection) & SEC_HAS_CONTENTS
490 && bfd_get_section_flags(obfd, osection) & SEC_HAS_CONTENTS)
491 assert(bfd_set_section_contents
492 (obfd, osection, ss->contents.data, 0,
493 ss->contents.size));
496 /* Modified function from GNU Binutils objcopy.c
498 * Mark all the symbols which will be used in output relocations with
499 * the BSF_KEEP flag so that those symbols will not be stripped.
501 * Ignore relocations which will not appear in the output file.
503 void mark_symbols_used_in_relocations(bfd *ibfd, asection *isection,
504 void *symbolsarg)
506 if (isection->output_section == NULL)
507 return;
509 struct supersect *ss = fetch_supersect(ibfd, isection, &isyms);
511 /* Examine each symbol used in a relocation. If it's not one of the
512 special bfd section symbols, then mark it with BSF_KEEP. */
513 arelent **relocp;
514 for (relocp = ss->relocs.data;
515 relocp < ss->relocs.data + ss->relocs.size; relocp++) {
516 asymbol *sym = *(*relocp)->sym_ptr_ptr;
517 if (sym != bfd_com_section_ptr->symbol
518 && sym != bfd_abs_section_ptr->symbol
519 && sym != bfd_und_section_ptr->symbol)
520 sym->flags |= BSF_KEEP;
524 /* Modified function from GNU Binutils objcopy.c
526 * Choose which symbol entries to copy.
527 * We don't copy in place, because that confuses the relocs.
528 * Return the number of symbols to print.
530 void filter_symbols(bfd *abfd, bfd *obfd, struct asymbolp_vec *osyms,
531 struct asymbolp_vec *isyms)
533 asymbol **symp;
534 for (symp = isyms->data; symp < isyms->data + isyms->size; symp++) {
535 asymbol *sym = *symp;
536 flagword flags = sym->flags;
538 if (mode("keep") && want_section(sym->section->name, NULL)) {
539 char *newname =
540 malloc(strlen(sym->name) + strlen(addstr_all) +
541 strlen(addstr_sect) + 1);
542 sprintf(newname, "%s%s%s", sym->name, addstr_all,
543 addstr_sect);
544 sym->name = newname;
547 int keep;
548 if ((flags & BSF_KEEP) != 0 /* Used in relocation. */
549 || ((flags & BSF_SECTION_SYM) != 0
550 && ((*(sym->section)->symbol_ptr_ptr)->flags
551 & BSF_KEEP) != 0))
552 keep = 1;
553 else if ((flags & (BSF_GLOBAL | BSF_WEAK)) != 0)
554 keep = 1;
555 else if (bfd_decode_symclass(sym) == 'I')
556 /* Global symbols in $idata sections need to be retained.
557 External users of the library containing the $idata
558 section may reference these symbols. */
559 keep = 1;
560 else if ((flags & BSF_GLOBAL) != 0
561 || (flags & BSF_WEAK) != 0
562 || bfd_is_com_section(sym->section))
563 keep = 1;
564 else if ((flags & BSF_DEBUGGING) != 0)
565 keep = 1;
566 else
567 keep = !bfd_is_local_label(abfd, sym);
569 if (!want_section(sym->section->name, NULL))
570 keep = 0;
572 if (mode("rmsyms") && match_varargs(sym->name))
573 keep = 0;
575 if (keep)
576 *vec_grow(osyms, 1) = sym;
578 if (keep && mode("globalize")
579 && ends_with(sym->name, globalizestr)) {
580 asymbol *new = bfd_make_empty_symbol(obfd);
581 char *tmp =
582 malloc(strlen(sym->name) + strlen("_global") + 1);
583 sprintf(tmp, "%s_global", sym->name);
584 new->name = tmp;
585 new->value = sym->value;
586 new->flags = BSF_GLOBAL;
587 new->section = sym->section;
588 *vec_grow(osyms, 1) = new;
592 asection *p;
593 for (p = obfd->sections; mode("keep") && p != NULL; p = p->next) {
594 if (starts_with(p->name, ".rodata") &&
595 !exists_sym_with_name(isyms, p->name)) {
596 asymbol *new = bfd_make_empty_symbol(obfd);
597 new->name = p->name;
598 new->value = 0x0;
599 new->flags = BSF_GLOBAL;
600 new->section = p;
601 *vec_grow(osyms, 1) = new;
606 int exists_sym_with_name(struct asymbolp_vec *syms, const char *desired)
608 asymbol **symp;
609 for (symp = syms->data; symp < syms->data + syms->size; symp++) {
610 if (strcmp(bfd_asymbol_name(*symp), desired) == 0)
611 return 1;
613 return 0;
616 int match_varargs(const char *str)
618 int i;
619 for (i = 0; i < varargs_count; i++) {
620 if (strcmp(str, varargs[i]) == 0)
621 return 1;
622 if (starts_with(str, varargs[i]) &&
623 strcmp(str + strlen(varargs[i]), "_global") == 0)
624 return 1;
626 return 0;
629 int want_section(const char *name, char **newname)
631 static const char *static_want[] = {
632 ".altinstructions",
633 ".altinstr_replacement",
634 ".smp_locks",
635 ".parainstructions",
636 NULL
639 if (!mode("keep"))
640 return 1;
642 struct wsect *w = wanted_sections;
643 for (; w != NULL; w = w->next) {
644 if (strcmp(w->name, name) == 0)
645 goto success;
648 if (starts_with(name, ".ksplice"))
649 goto success;
650 if (mode("keep-helper") && starts_with(name, ".text"))
651 goto success;
652 if (match_varargs(name))
653 goto success;
655 int i;
656 for (i = 0; static_want[i] != NULL; i++) {
657 if (strcmp(name, static_want[i]) == 0)
658 return 1;
660 return 0;
662 success:
664 if (newname != NULL) {
665 *newname =
666 malloc(strlen(name) + strlen(addstr_all) +
667 strlen(addstr_sect) + 1);
668 sprintf(*newname, "%s%s%s", name, addstr_all, addstr_sect);
670 return 1;
673 struct specsect *is_special(const char *name)
675 struct specsect *ss;
676 for (ss = special_sections; ss != end_special_sections; ss++) {
677 if (strcmp(ss->sectname, name) == 0)
678 return ss;
680 return NULL;