activate_primary no longer needs to be in module_pack.
[ksplice.git] / objmanip.c
blobcbb1d452946e4d3d41d638bee9f5e977d4cbad4f
1 /* This file is based in part on objcopy.c from GNU Binutils v2.17.
3 * Copyright (C) 1991-2006 Free Software Foundation, Inc.
4 * Copyright (C) 2008 Jeffrey Brian Arnold <jbarnold@mit.edu>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 * 02110-1301, USA.
20 /* objmanip performs various object file manipulations for Ksplice. Its first
21 * argument is always an object file, which is modified in-place during
22 * objmanip's execution. (objmanip's code is similar to objcopy from GNU
23 * binutils because every manipulation that objmanip performs is essentially a
24 * "copy" operation with certain changes which make the new version different
25 * from the old version). objmanip has four modes of operation:
27 * (1) keep mode
29 * This mode is the first objmanip step in processing the target object files.
31 * This mode can be broken down into two submodes, called "keep-primary" (which
32 * is used to prepare the primary kernel module) and "keep-helper" (which is
33 * used to prepare the helper kernel module):
35 * (a) keep-primary: "objmanip file.o keep-primary ADDSTR sect_1 ... sect_n"
37 * In this submode, only certain sections are kept; all other sections are
38 * discarded. Specifically, the following sections are kept: the listed
39 * sections (sect_1 ... sect_n), certain sections referenced by the listed
40 * sections, and certain special sections. The sections that are kept have
41 * ADDSTR added to the end of their names.
43 * The sections that are kept have most of their ELF relocations removed.
44 * (Relocations that point to sections that are being kept are not removed; all
45 * other relocations are removed). Information about each of the removed ELF
46 * relocations is printed to STDOUT (ksplice-create will save this information
47 * into Ksplice-specific ELF sections for the primary kernel module to use
48 * later).
50 * Each line of the STDOUT output represents a single place within the ELF
51 * object file at which a relocation has been removed. Each line contains the
52 * following fields, separated by spaces: an ELF symbol name, the name of a
53 * section previously containing a relocation pointing to that symbol, the
54 * offset (within that section) of the former relocation to that symbol, a bit
55 * representing whether that ELF relocation is PC-relative, and the ELF addend
56 * value for that relocation.
58 * (b) keep-helper: "objmanip file.o keep-helper ADDSTR"
60 * In this submode, essentially all sections are kept and have ADDSTR added to
61 * the end of their names.
63 * The sections that are kept have all of their ELF relocations removed.
64 * Information about each of the removed ELF relocations is printed to STDOUT
65 * (ksplice-create will save this information into Ksplice-specific ELF
66 * sections for the helper kernel module to use later).
68 * The fields of the STDOUT output are the same as with keep-primary.
70 * (2) globalize mode: "objmanip file.o globalize GLOBALIZESTR"
72 * This mode is the second objmanip step in processing the target object files.
73 * In this mode, all symbols whose names end in GLOBALIZESTR will be
74 * duplicated, with the duplicate symbols differing slightly from the original
75 * symbols. The duplicate symbols will have the string "_global" added to the
76 * end of their symbol names, and they will be global ELF symbols, regardless
77 * of whether the corresponding original symbol was global.
79 * (3) sizelist mode: "objmanip file.o sizelist"
81 * After the target object files have been linked into a single collection
82 * object file, this mode is used in order to obtain a list of all of the
83 * functions in the collection object file. Each line of the STDOUT output
84 * contains an ELF section name and that section's size, as presented by BFD's
85 * bfd_print_symbol function.
87 * (4) rmsyms mode: "objmanip file.o rmsyms sym_1 ... sym_n"
89 * This mode is the final objmanip step in preparing the Ksplice kernel
90 * modules. In this mode, any ELF relocations involving the listed symbols
91 * (sym_1 ... sym_n) are removed, and information about each of the removed
92 * relocations is printed to STDOUT.
94 * The fields of the STDOUT output are the same as with keep-primary.
97 #include "objcommon.h"
98 #include "objmanip.h"
99 #include <stdint.h>
101 asymbol **isympp = NULL;
102 long symcount;
104 char **varargs;
105 int varargs_count;
106 char *modestr, *addstr_all = "", *addstr_sect = "", *globalizestr;
108 struct wsect *wanted_sections = NULL;
110 struct specsect special_sections[] =
111 { {".altinstructions", 1, ".altinstr_replacement",
112 2 * sizeof(char *) + 4 * sizeof(char)},
113 {".smp_locks", 0, NULL, sizeof(char *)},
114 {".parainstructions", 0, NULL, sizeof(char *) + 4 * sizeof(char)},
115 {NULL}
118 #define mode(str) starts_with(modestr, str)
120 int main(int argc, char **argv)
122 char *debug_name = malloc(strlen(argv[1]) + 4 + strlen(argv[2]) + 1);
123 sprintf(debug_name, "%s.pre%s", argv[1], argv[2]);
124 rename(argv[1], debug_name);
126 bfd_init();
127 bfd *ibfd = bfd_openr(debug_name, NULL);
128 assert(ibfd);
130 char **matching;
131 assert(bfd_check_format_matches(ibfd, bfd_object, &matching));
133 const char *output_target = bfd_get_target(ibfd);
134 bfd *obfd = bfd_openw(argv[1], output_target);
135 assert(obfd);
137 symcount = get_syms(ibfd, &isympp);
139 modestr = argv[2];
140 if (mode("keep")) {
141 addstr_all = argv[3];
142 addstr_sect = argv[4];
143 varargs = &argv[5];
144 varargs_count = argc - 5;
145 } else if (mode("globalize")) {
146 globalizestr = argv[3];
147 varargs = &argv[4];
148 varargs_count = argc - 4;
149 } else {
150 varargs = &argv[3];
151 varargs_count = argc - 3;
154 if (mode("keep")) {
155 while (1) {
156 struct wsect *tmp = wanted_sections;
157 bfd_map_over_sections(ibfd, mark_wanted_if_referenced,
158 NULL);
159 if (tmp == wanted_sections)
160 break;
164 int i;
165 for (i = 0; mode("sizelist") && i < symcount; i++) {
166 if ((isympp[i]->flags & BSF_FUNCTION)
167 && isympp[i]->value == 0 && !(isympp[i]->flags & BSF_WEAK)) {
168 /* We call bfd_print_symbol in order to get access to
169 * the size associated with the function symbol, which
170 * is not otherwise available through the BFD API
172 bfd_print_symbol(ibfd, stdout, isympp[i],
173 bfd_print_symbol_all);
174 printf("\n");
178 asection *p;
179 for (p = ibfd->sections; p != NULL; p = p->next) {
180 if (is_special(p->name))
181 continue;
182 if (want_section(p->name, NULL) || mode("rmsyms"))
183 rm_some_relocs(ibfd, p);
186 for (i = 0; mode("keep") && special_sections[i].sectname != NULL; i++) {
187 rm_from_special(ibfd, &special_sections[i]);
190 copy_object(ibfd, obfd);
191 assert(bfd_close(obfd));
192 assert(bfd_close(ibfd));
193 return EXIT_SUCCESS;
196 void rm_some_relocs(bfd *ibfd, asection *isection)
198 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
199 arelent **orig_relocs = malloc(ss->num_relocs * sizeof(*orig_relocs));
200 memcpy(orig_relocs, ss->relocs, ss->num_relocs * sizeof(*orig_relocs));
201 int orig_num_relocs = ss->num_relocs;
202 ss->num_relocs = 0;
204 int i;
205 for (i = 0; i < orig_num_relocs; i++) {
206 int rm_reloc = 0;
207 asymbol *sym_ptr = *orig_relocs[i]->sym_ptr_ptr;
209 if (mode("rmsyms") && match_varargs(sym_ptr->name))
210 rm_reloc = 1;
212 if (mode("keep"))
213 rm_reloc = 1;
215 if (mode("keep-primary") && want_section(sym_ptr->name, NULL))
216 rm_reloc = 0;
218 if (rm_reloc) {
219 print_reloc(ibfd, isection, orig_relocs[i], ss);
220 } else {
221 ss->relocs[ss->num_relocs++] = orig_relocs[i];
226 void print_reloc(bfd *ibfd, asection *isection, arelent *orig_reloc,
227 struct supersect *ss)
229 asymbol *sym_ptr = *orig_reloc->sym_ptr_ptr;
231 char *new_sectname = strdup(isection->name);
232 if (mode("keep"))
233 want_section(isection->name, &new_sectname);
235 char *new_symname = strdup(sym_ptr->name);
236 if (mode("keep-primary"))
237 want_section(sym_ptr->name, &new_symname);
239 int addend = orig_reloc->addend;
240 reloc_howto_type *howto = orig_reloc->howto;
241 int size = bfd_get_reloc_size(howto);
242 int addend2 = blot_section(ibfd, isection, orig_reloc->address, size);
243 assert(addend == 0 || addend2 == 0);
244 if (addend == 0)
245 addend = addend2;
247 printf("%s%s ", new_symname, addstr_all);
248 printf("%s%s%s ", canonical_sym(new_sectname), addstr_all, addstr_sect);
249 printf("%08x ", (int)orig_reloc->address);
250 printf("%d %08x %d\n", howto->pc_relative, addend, size);
253 int blot_section(bfd *abfd, asection *sect, int offset, int size)
255 struct supersect *ss = fetch_supersect(abfd, sect, isympp);
256 long address = (long)ss->contents + offset;
257 int tmp;
258 if (size == 4) {
259 tmp = *(int *)address;
260 *((int *)address) = 0x77777777;
261 } else if (size == 8) {
262 tmp = *(long long *)address;
263 *((long long *)address) = 0x7777777777777777ll;
264 } else {
265 printf("ksplice: Unsupported size %d\n", size);
266 DIE;
268 return tmp;
271 const char *canonical_sym(const char *sect_wlabel)
273 const char *sect = sect_wlabel;
274 if (!mode("sizelist"))
275 sect = dup_wolabel(sect_wlabel);
277 if (starts_with(sect, ".rodata"))
278 return sect;
280 int i;
281 for (i = 0; i < symcount; i++) {
282 const char *cur_sectname = isympp[i]->section->name;
283 if (!mode("sizelist"))
284 cur_sectname = dup_wolabel(cur_sectname);
286 if (strlen(isympp[i]->name) != 0 &&
287 !starts_with(isympp[i]->name, ".text") &&
288 strcmp(cur_sectname, sect) == 0 && isympp[i]->value == 0) {
289 return isympp[i]->name;
292 printf("ksplice: Failed to canonicalize %s\n", sect);
293 DIE;
296 void rm_from_special(bfd *ibfd, struct specsect *s)
298 asection *isection = bfd_get_section_by_name(ibfd, s->sectname);
299 if (isection == NULL)
300 return;
302 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
303 void *orig_buffer = malloc(ss->contents_size);
304 memcpy(orig_buffer, ss->contents, ss->contents_size);
305 arelent **orig_relocs = malloc(ss->num_relocs * sizeof(*orig_relocs));
306 memcpy(orig_relocs, ss->relocs, ss->num_relocs * sizeof(*orig_relocs));
308 assert(align(ss->contents_size, 4) % s->entry_size == 0);
309 if (s->odd_relocs) {
310 assert(align(ss->contents_size, 4) / s->entry_size ==
311 ss->num_relocs / 2);
312 } else {
313 assert(align(ss->contents_size, 4) / s->entry_size ==
314 ss->num_relocs);
317 int orig_num_relocs = ss->num_relocs;
318 ss->num_relocs = 0;
319 int new_num_entries = 0;
320 int i, orig_buffer_index, end_last_entry = 0, modifier = 0;
321 for (i = 0; i < orig_num_relocs; i++) {
322 asymbol *sym_ptr = *orig_relocs[i]->sym_ptr_ptr;
323 if (s->odd_relocs && i % 2 == 1) {
324 assert(strcmp(sym_ptr->name, s->odd_relocname) == 0);
325 continue;
327 asection *p;
328 for (p = ibfd->sections; p != NULL; p = p->next) {
329 if (strcmp(sym_ptr->name, p->name) == 0
330 && !is_special(p->name)
331 && !want_section(p->name, NULL))
332 break;
334 if (p != NULL)
335 continue;
337 if (s->odd_relocs)
338 orig_buffer_index = i / 2;
339 else
340 orig_buffer_index = i;
341 memcpy(ss->contents + (new_num_entries++) * s->entry_size,
342 orig_buffer + orig_buffer_index * s->entry_size,
343 s->entry_size);
344 modifier += orig_buffer_index * s->entry_size - end_last_entry;
345 ss->relocs[ss->num_relocs] = orig_relocs[i];
346 ss->relocs[ss->num_relocs++]->address -= modifier;
347 if (s->odd_relocs) {
348 ss->relocs[ss->num_relocs] = orig_relocs[i + 1];
349 ss->relocs[ss->num_relocs++]->address -= modifier;
351 end_last_entry =
352 orig_buffer_index * s->entry_size + s->entry_size;
354 ss->contents_size = new_num_entries * s->entry_size;
357 void mark_wanted_if_referenced(bfd *abfd, asection *sect, void *ignored)
359 if (want_section(sect->name, NULL))
360 return;
361 if (!starts_with(sect->name, ".text")
362 && !starts_with(sect->name, ".rodata"))
363 return;
365 bfd_map_over_sections(abfd, check_for_ref_to_section, sect);
368 void check_for_ref_to_section(bfd *abfd, asection *looking_at,
369 void *looking_for)
371 if (!want_section(looking_at->name, NULL))
372 return;
374 struct supersect *ss = fetch_supersect(abfd, looking_at, isympp);
375 int i;
376 for (i = 0; i < ss->num_relocs; i++) {
377 asymbol *sym_ptr = *ss->relocs[i]->sym_ptr_ptr;
378 if (sym_ptr->section == (asection *)looking_for) {
379 struct wsect *w = malloc(sizeof(*w));
380 w->name = strdup(((asection *)looking_for)->name);
381 w->next = wanted_sections;
382 wanted_sections = w;
387 /* Modified function from GNU Binutils objcopy.c */
388 bfd_boolean copy_object(bfd *ibfd, bfd *obfd)
390 assert(bfd_set_format(obfd, bfd_get_format(ibfd)));
392 bfd_vma start = bfd_get_start_address(ibfd);
394 flagword flags = bfd_get_file_flags(ibfd);
395 flags &= bfd_applicable_file_flags(obfd);
397 assert(bfd_set_start_address(obfd, start)
398 && bfd_set_file_flags(obfd, flags));
400 enum bfd_architecture iarch = bfd_get_arch(ibfd);
401 unsigned int imach = bfd_get_mach(ibfd);
402 assert(bfd_set_arch_mach(obfd, iarch, imach));
403 assert(bfd_set_format(obfd, bfd_get_format(ibfd)));
405 /* BFD mandates that all output sections be created and sizes set before
406 any output is done. Thus, we traverse all sections multiple times. */
407 bfd_map_over_sections(ibfd, setup_section, obfd);
409 assert(bfd_count_sections(obfd));
411 /* Mark symbols used in output relocations so that they
412 are kept, even if they are local labels or static symbols.
414 Note we iterate over the input sections examining their
415 relocations since the relocations for the output sections
416 haven't been set yet. mark_symbols_used_in_relocations will
417 ignore input sections which have no corresponding output
418 section. */
420 bfd_map_over_sections(ibfd, mark_symbols_used_in_relocations, isympp);
421 asymbol **osympp = (void *)malloc((2 * symcount + 1) * sizeof(*osympp));
422 symcount = filter_symbols(ibfd, obfd, osympp, isympp, symcount);
424 bfd_set_symtab(obfd, osympp, symcount);
426 /* This has to happen after the symbol table has been set. */
427 bfd_map_over_sections(ibfd, copy_section, obfd);
429 /* Allow the BFD backend to copy any private data it understands
430 from the input BFD to the output BFD. This is done last to
431 permit the routine to look at the filtered symbol table, which is
432 important for the ECOFF code at least. */
433 assert(bfd_copy_private_bfd_data(ibfd, obfd));
435 return TRUE;
438 /* Modified function from GNU Binutils objcopy.c */
439 void setup_section(bfd *ibfd, asection *isection, void *obfdarg)
441 bfd *obfd = obfdarg;
442 bfd_vma vma;
444 char *name = strdup(isection->name);
445 if (!want_section(isection->name, &name))
446 return;
448 asection *osection = bfd_make_section_anyway(obfd, name);
449 assert(osection != NULL);
451 flagword flags = bfd_get_section_flags(ibfd, isection);
452 bfd_set_section_flags(obfd, osection, flags);
454 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
455 assert(bfd_set_section_size(obfd, osection, ss->contents_size));
457 vma = bfd_section_vma(ibfd, isection);
458 assert(bfd_set_section_vma(obfd, osection, vma));
460 osection->lma = isection->lma;
461 assert(bfd_set_section_alignment(obfd,
462 osection,
463 bfd_section_alignment(ibfd,
464 isection)));
465 osection->entsize = isection->entsize;
466 isection->output_section = osection;
467 isection->output_offset = 0;
468 return;
471 /* Modified function from GNU Binutils objcopy.c */
472 void copy_section(bfd *ibfd, asection *isection, void *obfdarg)
474 bfd *obfd = obfdarg;
476 char *name = strdup(isection->name);
477 if (!want_section(isection->name, &name))
478 return;
480 flagword flags = bfd_get_section_flags(ibfd, isection);
481 if ((flags & SEC_GROUP) != 0)
482 return;
484 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
485 asection *osection = isection->output_section;
486 if (ss->contents_size == 0 || osection == 0)
487 return;
489 bfd_set_reloc(obfd, osection,
490 ss->num_relocs == 0 ? NULL : ss->relocs, ss->num_relocs);
492 if (bfd_get_section_flags(ibfd, isection) & SEC_HAS_CONTENTS
493 && bfd_get_section_flags(obfd, osection) & SEC_HAS_CONTENTS) {
494 assert(bfd_set_section_contents
495 (obfd, osection, ss->contents, 0, ss->contents_size));
499 /* Modified function from GNU Binutils objcopy.c
501 * Mark all the symbols which will be used in output relocations with
502 * the BSF_KEEP flag so that those symbols will not be stripped.
504 * Ignore relocations which will not appear in the output file.
506 void mark_symbols_used_in_relocations(bfd *ibfd, asection *isection,
507 void *symbolsarg)
509 if (isection->output_section == NULL)
510 return;
512 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
514 /* Examine each symbol used in a relocation. If it's not one of the
515 special bfd section symbols, then mark it with BSF_KEEP. */
516 int i;
517 for (i = 0; i < ss->num_relocs; i++) {
518 if (*ss->relocs[i]->sym_ptr_ptr != bfd_com_section_ptr->symbol
519 && *ss->relocs[i]->sym_ptr_ptr !=
520 bfd_abs_section_ptr->symbol
521 && *ss->relocs[i]->sym_ptr_ptr !=
522 bfd_und_section_ptr->symbol)
523 (*ss->relocs[i]->sym_ptr_ptr)->flags |= BSF_KEEP;
527 /* Modified function from GNU Binutils objcopy.c
529 * Choose which symbol entries to copy.
530 * We don't copy in place, because that confuses the relocs.
531 * Return the number of symbols to print.
533 unsigned int filter_symbols(bfd *abfd, bfd *obfd, asymbol **osyms,
534 asymbol **isyms, long symcount)
536 asymbol **from = isyms, **to = osyms;
537 long src_count = 0, dst_count = 0;
539 for (; src_count < symcount; src_count++) {
540 asymbol *sym = from[src_count];
541 flagword flags = sym->flags;
543 if (mode("keep") && want_section(sym->section->name, NULL)) {
544 char *newname =
545 malloc(strlen(sym->name) + strlen(addstr_all) +
546 strlen(addstr_sect) + 1);
547 sprintf(newname, "%s%s%s", sym->name, addstr_all,
548 addstr_sect);
549 sym->name = newname;
552 int keep;
553 if ((flags & BSF_KEEP) != 0 /* Used in relocation. */
554 || ((flags & BSF_SECTION_SYM) != 0
555 && ((*(sym->section)->symbol_ptr_ptr)->flags
556 & BSF_KEEP) != 0))
557 keep = 1;
558 else if ((flags & (BSF_GLOBAL | BSF_WEAK)) != 0)
559 keep = 1;
560 else if (bfd_decode_symclass(sym) == 'I')
561 /* Global symbols in $idata sections need to be retained.
562 External users of the library containing the $idata
563 section may reference these symbols. */
564 keep = 1;
565 else if ((flags & BSF_GLOBAL) != 0
566 || (flags & BSF_WEAK) != 0
567 || bfd_is_com_section(sym->section))
568 keep = 1;
569 else if ((flags & BSF_DEBUGGING) != 0)
570 keep = 1;
571 else
572 keep = !bfd_is_local_label(abfd, sym);
574 if (!want_section(sym->section->name, NULL))
575 keep = 0;
577 if (mode("rmsyms") && match_varargs(sym->name))
578 keep = 0;
580 if (keep)
581 to[dst_count++] = sym;
583 if (keep && mode("globalize")
584 && ends_with(sym->name, globalizestr)) {
585 asymbol *new = bfd_make_empty_symbol(obfd);
586 char *tmp =
587 malloc(strlen(sym->name) + strlen("_global") + 1);
588 sprintf(tmp, "%s_global", sym->name);
589 new->name = tmp;
590 new->value = sym->value;
591 new->flags = BSF_GLOBAL;
592 new->section = sym->section;
593 to[dst_count++] = new;
597 asection *p;
598 for (p = obfd->sections; mode("keep") && p != NULL; p = p->next) {
599 if (starts_with(p->name, ".rodata") &&
600 !exists_sym_with_name(from, symcount, p->name)) {
601 asymbol *new = bfd_make_empty_symbol(obfd);
602 new->name = p->name;
603 new->value = 0x0;
604 new->flags = BSF_GLOBAL;
605 new->section = p;
606 to[dst_count++] = new;
610 to[dst_count] = NULL;
611 return dst_count;
614 int exists_sym_with_name(asymbol **syms, int symcount, const char *desired)
616 int i;
617 for (i = 0; i < symcount; i++) {
618 if (strcmp(bfd_asymbol_name(syms[i]), desired) == 0)
619 return 1;
621 return 0;
624 int match_varargs(const char *str)
626 int i;
627 for (i = 0; i < varargs_count; i++) {
628 if (strcmp(str, varargs[i]) == 0)
629 return 1;
630 if (starts_with(str, varargs[i]) &&
631 strcmp(str + strlen(varargs[i]), "_global") == 0)
632 return 1;
634 return 0;
637 int want_section(const char *name, char **newname)
639 static const char *static_want[] =
640 { ".altinstructions", ".altinstr_replacement", ".smp_locks",
641 ".parainstructions", NULL
644 if (!mode("keep"))
645 return 1;
647 struct wsect *w = wanted_sections;
648 for (; w != NULL; w = w->next) {
649 if (strcmp(w->name, name) == 0)
650 goto success;
653 if (starts_with(name, ".ksplice"))
654 goto success;
655 if (mode("keep-helper") && starts_with(name, ".text"))
656 goto success;
657 if (match_varargs(name)) {
658 goto success;
661 int i;
662 for (i = 0; static_want[i] != NULL; i++) {
663 if (strcmp(name, static_want[i]) == 0)
664 return 1;
666 return 0;
668 success:
670 if (newname != NULL) {
671 *newname =
672 malloc(strlen(name) + strlen(addstr_all) +
673 strlen(addstr_sect) + 1);
674 sprintf(*newname, "%s%s%s", name, addstr_all, addstr_sect);
676 return 1;
679 struct specsect *is_special(const char *name)
681 int i;
682 for (i = 0; special_sections[i].sectname != NULL; i++) {
683 if (strcmp(special_sections[i].sectname, name) == 0) {
684 return &special_sections[i];
687 return NULL;