Change applied flag to a three-state enum.
[ksplice.git] / objmanip.c
blobdae63d3c40fa4070d458661f41de14e0e9049f44
1 /* This file is based in part on objcopy.c from GNU Binutils v2.17.
3 * Copyright (C) 1991-2006 Free Software Foundation, Inc.
4 * Copyright (C) 2008 Jeffrey Brian Arnold <jbarnold@mit.edu>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 * 02110-1301, USA.
20 /* objmanip performs various object file manipulations for Ksplice. Its first
21 * argument is always an object file, which is modified in-place during
22 * objmanip's execution. (objmanip's code is similar to objcopy from GNU
23 * binutils because every manipulation that objmanip performs is essentially a
24 * "copy" operation with certain changes which make the new version different
25 * from the old version). objmanip has four modes of operation:
27 * (1) keep mode
29 * This mode is the first objmanip step in processing the target object files.
31 * This mode can be broken down into two submodes, called "keep-primary" (which
32 * is used to prepare the primary kernel module) and "keep-helper" (which is
33 * used to prepare the helper kernel module):
35 * (a) keep-primary: "objmanip file.o keep-primary ADDSTR sect_1 ... sect_n"
37 * In this submode, only certain sections are kept; all other sections are
38 * discarded. Specifically, the following sections are kept: the listed
39 * sections (sect_1 ... sect_n), certain sections referenced by the listed
40 * sections, and certain special sections. The sections that are kept have
41 * ADDSTR added to the end of their names.
43 * The sections that are kept have most of their ELF relocations removed.
44 * (Relocations that point to sections that are being kept are not removed; all
45 * other relocations are removed). Information about each of the removed ELF
46 * relocations is printed to STDOUT (ksplice-create will save this information
47 * into Ksplice-specific ELF sections for the primary kernel module to use
48 * later).
50 * Each line of the STDOUT output represents a single place within the ELF
51 * object file at which a relocation has been removed. Each line contains the
52 * following fields, separated by spaces: an ELF symbol name, the name of a
53 * section previously containing a relocation pointing to that symbol, the
54 * offset (within that section) of the former relocation to that symbol, a bit
55 * representing whether that ELF relocation is PC-relative, and the ELF addend
56 * value for that relocation.
58 * (b) keep-helper: "objmanip file.o keep-helper ADDSTR"
60 * In this submode, essentially all sections are kept and have ADDSTR added to
61 * the end of their names.
63 * The sections that are kept have all of their ELF relocations removed.
64 * Information about each of the removed ELF relocations is printed to STDOUT
65 * (ksplice-create will save this information into Ksplice-specific ELF
66 * sections for the helper kernel module to use later).
68 * The fields of the STDOUT output are the same as with keep-primary.
70 * (2) globalize mode: "objmanip file.o globalize GLOBALIZESTR"
72 * This mode is the second objmanip step in processing the target object files.
73 * In this mode, all symbols whose names end in GLOBALIZESTR will be
74 * duplicated, with the duplicate symbols differing slightly from the original
75 * symbols. The duplicate symbols will have the string "_global" added to the
76 * end of their symbol names, and they will be global ELF symbols, regardless
77 * of whether the corresponding original symbol was global.
79 * (3) sizelist mode: "objmanip file.o sizelist"
81 * After the target object files have been linked into a single collection
82 * object file, this mode is used in order to obtain a list of all of the
83 * functions in the collection object file. Each line of the STDOUT output
84 * contains an ELF section name and that section's size, as presented by BFD's
85 * bfd_print_symbol function.
87 * (4) rmsyms mode: "objmanip file.o rmsyms sym_1 ... sym_n"
89 * This mode is the final objmanip step in preparing the Ksplice kernel
90 * modules. In this mode, any ELF relocations involving the listed symbols
91 * (sym_1 ... sym_n) are removed, and information about each of the removed
92 * relocations is printed to STDOUT.
94 * The fields of the STDOUT output are the same as with keep-primary.
97 #include "objcommon.h"
98 #include "objmanip.h"
100 asymbol **isympp = NULL;
101 long symcount;
103 char **varargs;
104 int varargs_count;
105 char *modestr, *addstr_all = "", *addstr_sect = "", *globalizestr;
107 struct wsect *wanted_sections = NULL;
109 struct specsect special_sections[] =
110 { {".altinstructions", 1, ".altinstr_replacement",
111 2 * sizeof (char *) + 4 * sizeof (char)},
112 {".smp_locks", 0, NULL, sizeof (char *)},
113 {".parainstructions", 0, NULL, sizeof (char *) + 4 * sizeof (char)},
114 {NULL}
117 #define mode(str) starts_with(modestr, str)
120 main(int argc, char **argv)
122 char *debug_name = malloc(strlen(argv[1]) + 4 + strlen(argv[2]) + 1);
123 sprintf(debug_name, "%s.pre%s", argv[1], argv[2]);
124 rename(argv[1], debug_name);
126 bfd_init();
127 bfd *ibfd = bfd_openr(debug_name, NULL);
128 assert(ibfd);
130 char **matching;
131 assert(bfd_check_format_matches(ibfd, bfd_object, &matching));
133 const char *output_target = bfd_get_target(ibfd);
134 bfd *obfd = bfd_openw(argv[1], output_target);
135 assert(obfd);
137 symcount = get_syms(ibfd, &isympp);
139 modestr = argv[2];
140 if (mode("keep")) {
141 addstr_all = argv[3];
142 addstr_sect = argv[4];
143 varargs = &argv[5];
144 varargs_count = argc - 5;
145 } else if (mode("globalize")) {
146 globalizestr = argv[3];
147 varargs = &argv[4];
148 varargs_count = argc - 4;
149 } else {
150 varargs = &argv[3];
151 varargs_count = argc - 3;
154 if (mode("keep")) {
155 while (1) {
156 struct wsect *tmp = wanted_sections;
157 bfd_map_over_sections(ibfd, mark_wanted_if_referenced,
158 NULL);
159 if (tmp == wanted_sections)
160 break;
164 int i;
165 for (i = 0; mode("sizelist") && i < symcount; i++) {
166 if ((isympp[i]->flags & BSF_FUNCTION)
167 && isympp[i]->value == 0 && !(isympp[i]->flags & BSF_WEAK)) {
168 /* We call bfd_print_symbol in order to get access to
169 * the size associated with the function symbol, which
170 * is not otherwise available through the BFD API
172 bfd_print_symbol(ibfd, stdout, isympp[i],
173 bfd_print_symbol_all);
174 printf("\n");
178 asection *p;
179 for (p = ibfd->sections; p != NULL; p = p->next) {
180 if (is_special(p->name))
181 continue;
182 if (want_section(p->name, NULL) || mode("rmsyms"))
183 rm_some_relocs(ibfd, p);
186 for (i = 0; mode("keep") && special_sections[i].sectname != NULL; i++) {
187 rm_from_special(ibfd, &special_sections[i]);
190 copy_object(ibfd, obfd);
191 assert(bfd_close(obfd));
192 assert(bfd_close(ibfd));
193 printf("ksplice: success\n");
194 return(EXIT_SUCCESS);
197 void
198 rm_some_relocs(bfd * ibfd, asection * isection)
200 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
201 arelent **orig_relocs = malloc(ss->num_relocs * sizeof (*orig_relocs));
202 memcpy(orig_relocs, ss->relocs, ss->num_relocs * sizeof (*orig_relocs));
203 int orig_num_relocs = ss->num_relocs;
204 ss->num_relocs = 0;
206 int i;
207 for (i = 0; i < orig_num_relocs; i++) {
208 int rm_reloc = 0;
209 asymbol *sym_ptr = *orig_relocs[i]->sym_ptr_ptr;
211 if (mode("rmsyms") && match_varargs(sym_ptr->name))
212 rm_reloc = 1;
214 if (mode("keep"))
215 rm_reloc = 1;
217 if (mode("keep-primary") && want_section(sym_ptr->name, NULL))
218 rm_reloc = 0;
220 if (rm_reloc) {
221 print_reloc(ibfd, isection, orig_relocs[i], ss);
222 } else {
223 ss->relocs[ss->num_relocs++] = orig_relocs[i];
228 void
229 print_reloc(bfd * ibfd, asection * isection, arelent * orig_reloc,
230 struct supersect *ss)
232 asymbol *sym_ptr = *orig_reloc->sym_ptr_ptr;
234 char *new_sectname = strdup(isection->name);
235 if (mode("keep"))
236 want_section(isection->name, &new_sectname);
238 char *new_symname = strdup(sym_ptr->name);
239 if (mode("keep-primary"))
240 want_section(sym_ptr->name, &new_symname);
242 int addend = orig_reloc->addend;
243 int addend2 = blot_section(ibfd, isection, orig_reloc->address);
244 assert(addend == 0 || addend2 == 0);
245 if (addend == 0)
246 addend = addend2;
248 printf("%s%s ", new_symname, addstr_all);
249 printf("%s%s%s ", canonical_sym(new_sectname), addstr_all, addstr_sect);
250 printf("%08x ", (int)orig_reloc->address);
251 printf("%d %08x\n", orig_reloc->howto->pc_relative, addend);
255 blot_section(bfd * abfd, asection * sect, int offset)
257 struct supersect *ss = fetch_supersect(abfd, sect, isympp);
258 int tmp = *((int *) (ss->contents + offset));
259 *((int *) (ss->contents + offset)) = 0x77777777;
260 return tmp;
263 const char *
264 canonical_sym(const char *sect_wlabel)
266 const char *sect = sect_wlabel;
267 if (!mode("sizelist"))
268 sect = dup_wolabel(sect_wlabel);
270 if (starts_with(sect, ".rodata"))
271 return sect;
273 int i;
274 for (i = 0; i < symcount; i++) {
275 const char *cur_sectname = isympp[i]->section->name;
276 if (!mode("sizelist"))
277 cur_sectname = dup_wolabel(cur_sectname);
279 if (strlen(isympp[i]->name) != 0 &&
280 !starts_with(isympp[i]->name, ".text") &&
281 strcmp(cur_sectname, sect) == 0 && isympp[i]->value == 0) {
282 return isympp[i]->name;
285 printf("ksplice: Failed to canonicalize %s\n", sect);
286 DIE;
289 void
290 rm_from_special(bfd * ibfd, struct specsect *s)
292 asection *isection = bfd_get_section_by_name(ibfd, s->sectname);
293 if (isection == NULL)
294 return;
296 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
297 void *orig_buffer = malloc(ss->contents_size);
298 memcpy(orig_buffer, ss->contents, ss->contents_size);
299 arelent **orig_relocs = malloc(ss->num_relocs * sizeof (*orig_relocs));
300 memcpy(orig_relocs, ss->relocs, ss->num_relocs * sizeof (*orig_relocs));
302 assert(align(ss->contents_size, 4) % s->entry_size == 0);
303 if (s->odd_relocs) {
304 assert(align(ss->contents_size, 4) / s->entry_size ==
305 ss->num_relocs / 2);
306 } else {
307 assert(align(ss->contents_size, 4) / s->entry_size ==
308 ss->num_relocs);
311 int orig_num_relocs = ss->num_relocs;
312 ss->num_relocs = 0;
313 int new_num_entries = 0;
314 int i, orig_buffer_index, end_last_entry = 0, modifier = 0;
315 for (i = 0; i < orig_num_relocs; i++) {
316 asymbol *sym_ptr = *orig_relocs[i]->sym_ptr_ptr;
317 if (s->odd_relocs && i % 2 == 1) {
318 assert(strcmp(sym_ptr->name, s->odd_relocname) == 0);
319 continue;
321 asection *p;
322 for (p = ibfd->sections; p != NULL; p = p->next) {
323 if (strcmp(sym_ptr->name, p->name) == 0
324 && !is_special(p->name)
325 && !want_section(p->name, NULL))
326 break;
328 if (p != NULL)
329 continue;
331 if (s->odd_relocs)
332 orig_buffer_index = i / 2;
333 else
334 orig_buffer_index = i;
335 memcpy(ss->contents + (new_num_entries++) * s->entry_size,
336 orig_buffer + orig_buffer_index * s->entry_size,
337 s->entry_size);
338 modifier += orig_buffer_index * s->entry_size - end_last_entry;
339 ss->relocs[ss->num_relocs] = orig_relocs[i];
340 ss->relocs[ss->num_relocs++]->address -= modifier;
341 if (s->odd_relocs) {
342 ss->relocs[ss->num_relocs] = orig_relocs[i + 1];
343 ss->relocs[ss->num_relocs++]->address -= modifier;
345 end_last_entry =
346 orig_buffer_index * s->entry_size + s->entry_size;
348 ss->contents_size = new_num_entries * s->entry_size;
351 void
352 mark_wanted_if_referenced(bfd * abfd, asection * sect, void *ignored)
354 if (want_section(sect->name, NULL))
355 return;
356 if (!starts_with(sect->name, ".text")
357 && !starts_with(sect->name, ".rodata"))
358 return;
360 bfd_map_over_sections(abfd, check_for_ref_to_section, sect);
363 void
364 check_for_ref_to_section(bfd * abfd, asection * looking_at, void *looking_for)
366 if (!want_section(looking_at->name, NULL))
367 return;
369 struct supersect *ss = fetch_supersect(abfd, looking_at, isympp);
370 int i;
371 for (i = 0; i < ss->num_relocs; i++) {
372 asymbol *sym_ptr = *ss->relocs[i]->sym_ptr_ptr;
373 if (sym_ptr->section == (asection *) looking_for) {
374 struct wsect *w = malloc(sizeof (*w));
375 w->name = strdup(((asection *) looking_for)->name);
376 w->next = wanted_sections;
377 wanted_sections = w;
382 /* Modified function from GNU Binutils objcopy.c */
383 bfd_boolean
384 copy_object(bfd * ibfd, bfd * obfd)
386 assert(bfd_set_format(obfd, bfd_get_format(ibfd)));
388 bfd_vma start = bfd_get_start_address(ibfd);
390 flagword flags = bfd_get_file_flags(ibfd);
391 flags &= bfd_applicable_file_flags(obfd);
393 assert(bfd_set_start_address(obfd, start)
394 && bfd_set_file_flags(obfd, flags));
396 enum bfd_architecture iarch = bfd_get_arch(ibfd);
397 unsigned int imach = bfd_get_mach(ibfd);
398 assert(bfd_set_arch_mach(obfd, iarch, imach));
399 assert(bfd_set_format(obfd, bfd_get_format(ibfd)));
401 /* BFD mandates that all output sections be created and sizes set before
402 any output is done. Thus, we traverse all sections multiple times. */
403 bfd_map_over_sections(ibfd, setup_section, obfd);
405 assert(bfd_count_sections(obfd));
407 /* Mark symbols used in output relocations so that they
408 are kept, even if they are local labels or static symbols.
410 Note we iterate over the input sections examining their
411 relocations since the relocations for the output sections
412 haven't been set yet. mark_symbols_used_in_relocations will
413 ignore input sections which have no corresponding output
414 section. */
416 bfd_map_over_sections(ibfd, mark_symbols_used_in_relocations, isympp);
417 asymbol **osympp =
418 (void *) malloc((2 * symcount + 1) * sizeof (*osympp));
419 symcount = filter_symbols(ibfd, obfd, osympp, isympp, symcount);
421 bfd_set_symtab(obfd, osympp, symcount);
423 /* This has to happen after the symbol table has been set. */
424 bfd_map_over_sections(ibfd, copy_section, obfd);
426 /* Allow the BFD backend to copy any private data it understands
427 from the input BFD to the output BFD. This is done last to
428 permit the routine to look at the filtered symbol table, which is
429 important for the ECOFF code at least. */
430 assert(bfd_copy_private_bfd_data(ibfd, obfd));
432 return TRUE;
435 /* Modified function from GNU Binutils objcopy.c */
436 void
437 setup_section(bfd * ibfd, asection * isection, void *obfdarg)
439 bfd *obfd = obfdarg;
440 bfd_vma vma;
442 char *name = strdup(isection->name);
443 if (!want_section(isection->name, &name))
444 return;
446 asection *osection = bfd_make_section_anyway(obfd, name);
447 assert(osection != NULL);
449 flagword flags = bfd_get_section_flags(ibfd, isection);
450 bfd_set_section_flags(obfd, osection, flags);
452 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
453 assert(bfd_set_section_size(obfd, osection, ss->contents_size));
455 vma = bfd_section_vma(ibfd, isection);
456 assert(bfd_set_section_vma(obfd, osection, vma));
458 osection->lma = isection->lma;
459 assert(bfd_set_section_alignment(obfd,
460 osection,
461 bfd_section_alignment(ibfd,
462 isection)));
463 osection->entsize = isection->entsize;
464 isection->output_section = osection;
465 isection->output_offset = 0;
466 return;
469 /* Modified function from GNU Binutils objcopy.c */
470 void
471 copy_section(bfd * ibfd, asection * isection, void *obfdarg)
473 bfd *obfd = obfdarg;
475 char *name = strdup(isection->name);
476 if (!want_section(isection->name, &name))
477 return;
479 flagword flags = bfd_get_section_flags(ibfd, isection);
480 if ((flags & SEC_GROUP) != 0)
481 return;
483 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
484 asection *osection = isection->output_section;
485 if (ss->contents_size == 0 || osection == 0)
486 return;
488 bfd_set_reloc(obfd, osection,
489 ss->num_relocs == 0 ? NULL : ss->relocs, ss->num_relocs);
491 if (bfd_get_section_flags(ibfd, isection) & SEC_HAS_CONTENTS
492 && bfd_get_section_flags(obfd, osection) & SEC_HAS_CONTENTS) {
493 assert(bfd_set_section_contents
494 (obfd, osection, ss->contents, 0, ss->contents_size));
498 /* Modified function from GNU Binutils objcopy.c
500 * Mark all the symbols which will be used in output relocations with
501 * the BSF_KEEP flag so that those symbols will not be stripped.
503 * Ignore relocations which will not appear in the output file.
505 void
506 mark_symbols_used_in_relocations(bfd * ibfd, asection * isection,
507 void *symbolsarg)
509 if (isection->output_section == NULL)
510 return;
512 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
514 /* Examine each symbol used in a relocation. If it's not one of the
515 special bfd section symbols, then mark it with BSF_KEEP. */
516 int i;
517 for (i = 0; i < ss->num_relocs; i++) {
518 if (*ss->relocs[i]->sym_ptr_ptr != bfd_com_section_ptr->symbol
519 && *ss->relocs[i]->sym_ptr_ptr !=
520 bfd_abs_section_ptr->symbol
521 && *ss->relocs[i]->sym_ptr_ptr !=
522 bfd_und_section_ptr->symbol)
523 (*ss->relocs[i]->sym_ptr_ptr)->flags |= BSF_KEEP;
527 /* Modified function from GNU Binutils objcopy.c
529 * Choose which symbol entries to copy.
530 * We don't copy in place, because that confuses the relocs.
531 * Return the number of symbols to print.
533 unsigned int
534 filter_symbols(bfd * abfd, bfd * obfd, asymbol ** osyms,
535 asymbol ** isyms, long symcount)
537 asymbol **from = isyms, **to = osyms;
538 long src_count = 0, dst_count = 0;
540 for (; src_count < symcount; src_count++) {
541 asymbol *sym = from[src_count];
542 flagword flags = sym->flags;
544 if (mode("keep") && want_section(sym->section->name, NULL)) {
545 char *newname =
546 malloc(strlen(sym->name) + strlen(addstr_all) +
547 strlen(addstr_sect) + 1);
548 sprintf(newname, "%s%s%s", sym->name, addstr_all,
549 addstr_sect);
550 sym->name = newname;
553 int keep;
554 if ((flags & BSF_KEEP) != 0 /* Used in relocation. */
555 || ((flags & BSF_SECTION_SYM) != 0
556 && ((*(sym->section)->symbol_ptr_ptr)->flags
557 & BSF_KEEP) != 0))
558 keep = 1;
559 else if ((flags & (BSF_GLOBAL | BSF_WEAK)) != 0)
560 keep = 1;
561 else if (bfd_decode_symclass(sym) == 'I')
562 /* Global symbols in $idata sections need to be retained.
563 External users of the library containing the $idata
564 section may reference these symbols. */
565 keep = 1;
566 else if ((flags & BSF_GLOBAL) != 0
567 || (flags & BSF_WEAK) != 0
568 || bfd_is_com_section(sym->section))
569 keep = 1;
570 else if ((flags & BSF_DEBUGGING) != 0)
571 keep = 1;
572 else
573 keep = !bfd_is_local_label(abfd, sym);
575 if (!want_section(sym->section->name, NULL))
576 keep = 0;
578 if (mode("rmsyms") && match_varargs(sym->name))
579 keep = 0;
581 if (keep)
582 to[dst_count++] = sym;
584 if (keep && mode("globalize")
585 && ends_with(sym->name, globalizestr)) {
586 asymbol *new = bfd_make_empty_symbol(obfd);
587 char *tmp =
588 malloc(strlen(sym->name) + strlen("_global") + 1);
589 sprintf(tmp, "%s_global", sym->name);
590 new->name = tmp;
591 new->value = sym->value;
592 new->flags = BSF_GLOBAL;
593 new->section = sym->section;
594 to[dst_count++] = new;
598 asection *p;
599 for (p = obfd->sections; mode("keep") && p != NULL; p = p->next) {
600 if (starts_with(p->name, ".rodata") &&
601 !exists_sym_with_name(from, symcount, p->name)) {
602 asymbol *new = bfd_make_empty_symbol(obfd);
603 new->name = p->name;
604 new->value = 0x0;
605 new->flags = BSF_GLOBAL;
606 new->section = p;
607 to[dst_count++] = new;
611 to[dst_count] = NULL;
612 return dst_count;
616 exists_sym_with_name(asymbol ** syms, int symcount, const char *desired)
618 int i;
619 for (i = 0; i < symcount; i++) {
620 if (strcmp(bfd_asymbol_name(syms[i]), desired) == 0)
621 return 1;
623 return 0;
627 match_varargs(const char *str)
629 int i;
630 for (i = 0; i < varargs_count; i++) {
631 if (strcmp(str, varargs[i]) == 0)
632 return 1;
633 if (starts_with(str, varargs[i]) &&
634 strcmp(str + strlen(varargs[i]), "_global") == 0)
635 return 1;
637 return 0;
641 want_section(const char *name, char **newname)
643 static const char *static_want[] =
644 { ".altinstructions", ".altinstr_replacement", ".smp_locks",
645 ".parainstructions", NULL
648 if (!mode("keep"))
649 return 1;
651 struct wsect *w = wanted_sections;
652 for (; w != NULL; w = w->next) {
653 if (strcmp(w->name, name) == 0)
654 goto success;
657 if (starts_with(name, ".ksplice"))
658 goto success;
659 if (mode("keep-helper") && starts_with(name, ".text"))
660 goto success;
661 if (match_varargs(name)) {
662 goto success;
665 int i;
666 for (i = 0; static_want[i] != NULL; i++) {
667 if (strcmp(name, static_want[i]) == 0)
668 return 1;
670 return 0;
672 success:
674 if (newname != NULL) {
675 *newname =
676 malloc(strlen(name) + strlen(addstr_all) +
677 strlen(addstr_sect) + 1);
678 sprintf(*newname, "%s%s%s", name, addstr_all, addstr_sect);
680 return 1;
683 struct specsect *
684 is_special(const char *name)
686 int i;
687 for (i = 0; special_sections[i].sectname != NULL; i++) {
688 if (strcmp(special_sections[i].sectname, name) == 0) {
689 return &special_sections[i];
692 return NULL;