Fix various off-by-one comparison bugs.
[ksplice.git] / objmanip.c
blobb9d48d30a1e38c507bef20bce371a30745f61f0a
1 /* This file is based in part on objcopy.c from GNU Binutils v2.17.
3 * Copyright (C) 1991-2006 Free Software Foundation, Inc.
4 * Copyright (C) 2008 Jeffrey Brian Arnold <jbarnold@mit.edu>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 * 02110-1301, USA.
20 /* objmanip performs various object file manipulations for Ksplice. Its first
21 * argument is always an object file, which is modified in-place during
22 * objmanip's execution. (objmanip's code is similar to objcopy from GNU
23 * binutils because every manipulation that objmanip performs is essentially a
24 * "copy" operation with certain changes which make the new version different
25 * from the old version). objmanip has four modes of operation:
27 * (1) keep mode
29 * This mode is the first objmanip step in processing the target object files.
31 * This mode can be broken down into two submodes, called "keep-primary" (which
32 * is used to prepare the primary kernel module) and "keep-helper" (which is
33 * used to prepare the helper kernel module):
35 * (a) keep-primary: "objmanip file.o keep-primary ADDSTR sect_1 ... sect_n"
37 * In this submode, only certain sections are kept; all other sections are
38 * discarded. Specifically, the following sections are kept: the listed
39 * sections (sect_1 ... sect_n), certain sections referenced by the listed
40 * sections, and certain special sections. The sections that are kept have
41 * ADDSTR added to the end of their names.
43 * The sections that are kept have most of their ELF relocations removed.
44 * (Relocations that point to sections that are being kept are not removed; all
45 * other relocations are removed). Information about each of the removed ELF
46 * relocations is printed to STDOUT (ksplice-create will save this information
47 * into Ksplice-specific ELF sections for the primary kernel module to use
48 * later).
50 * Each line of the STDOUT output represents a single place within the ELF
51 * object file at which a relocation has been removed. Each line contains the
52 * following fields, separated by spaces: an ELF symbol name, the name of a
53 * section previously containing a relocation pointing to that symbol, the
54 * offset (within that section) of the former relocation to that symbol, a bit
55 * representing whether that ELF relocation is PC-relative, and the ELF addend
56 * value for that relocation.
58 * (b) keep-helper: "objmanip file.o keep-helper ADDSTR"
60 * In this submode, essentially all sections are kept and have ADDSTR added to
61 * the end of their names.
63 * The sections that are kept have all of their ELF relocations removed.
64 * Information about each of the removed ELF relocations is printed to STDOUT
65 * (ksplice-create will save this information into Ksplice-specific ELF
66 * sections for the helper kernel module to use later).
68 * The fields of the STDOUT output are the same as with keep-primary.
70 * (2) globalize mode: "objmanip file.o globalize GLOBALIZESTR"
72 * This mode is the second objmanip step in processing the target object files.
73 * In this mode, all symbols whose names end in GLOBALIZESTR will be
74 * duplicated, with the duplicate symbols differing slightly from the original
75 * symbols. The duplicate symbols will have the string "_global" added to the
76 * end of their symbol names, and they will be global ELF symbols, regardless
77 * of whether the corresponding original symbol was global.
79 * (3) sizelist mode: "objmanip file.o sizelist"
81 * After the target object files have been linked into a single collection
82 * object file, this mode is used in order to obtain a list of all of the
83 * functions in the collection object file. Each line of the STDOUT output
84 * contains an ELF section name and that section's size, as presented by BFD's
85 * bfd_print_symbol function.
87 * (4) rmsyms mode: "objmanip file.o rmsyms sym_1 ... sym_n"
89 * This mode is the final objmanip step in preparing the Ksplice kernel
90 * modules. In this mode, any ELF relocations involving the listed symbols
91 * (sym_1 ... sym_n) are removed, and information about each of the removed
92 * relocations is printed to STDOUT.
94 * The fields of the STDOUT output are the same as with keep-primary.
97 #include "objcommon.h"
98 #include "objmanip.h"
99 #include <stdint.h>
101 asymbol **isympp = NULL;
102 long symcount;
104 char **varargs;
105 int varargs_count;
106 char *modestr, *addstr_all = "", *addstr_sect = "", *globalizestr;
108 struct wsect *wanted_sections = NULL;
110 struct specsect special_sections[] =
111 { {".altinstructions", 1, ".altinstr_replacement",
112 2 * sizeof(char *) + 4 * sizeof(char)},
113 {".smp_locks", 0, NULL, sizeof(char *)},
114 {".parainstructions", 0, NULL, sizeof(char *) + 4 * sizeof(char)},
115 {NULL}
118 #define mode(str) starts_with(modestr, str)
120 int main(int argc, char **argv)
122 char *debug_name = malloc(strlen(argv[1]) + 4 + strlen(argv[2]) + 1);
123 sprintf(debug_name, "%s.pre%s", argv[1], argv[2]);
124 rename(argv[1], debug_name);
126 bfd_init();
127 bfd *ibfd = bfd_openr(debug_name, NULL);
128 assert(ibfd);
130 char **matching;
131 assert(bfd_check_format_matches(ibfd, bfd_object, &matching));
133 const char *output_target = bfd_get_target(ibfd);
134 bfd *obfd = bfd_openw(argv[1], output_target);
135 assert(obfd);
137 symcount = get_syms(ibfd, &isympp);
139 modestr = argv[2];
140 if (mode("keep")) {
141 addstr_all = argv[3];
142 addstr_sect = argv[4];
143 varargs = &argv[5];
144 varargs_count = argc - 5;
145 } else if (mode("globalize")) {
146 globalizestr = argv[3];
147 varargs = &argv[4];
148 varargs_count = argc - 4;
149 } else {
150 varargs = &argv[3];
151 varargs_count = argc - 3;
154 if (mode("keep")) {
155 while (1) {
156 struct wsect *tmp = wanted_sections;
157 bfd_map_over_sections(ibfd, mark_wanted_if_referenced,
158 NULL);
159 if (tmp == wanted_sections)
160 break;
164 int i;
165 for (i = 0; mode("sizelist") && i < symcount; i++) {
166 if ((isympp[i]->flags & BSF_FUNCTION)
167 && isympp[i]->value == 0 && !(isympp[i]->flags & BSF_WEAK)) {
168 /* We call bfd_print_symbol in order to get access to
169 * the size associated with the function symbol, which
170 * is not otherwise available through the BFD API
172 bfd_print_symbol(ibfd, stdout, isympp[i],
173 bfd_print_symbol_all);
174 printf("\n");
178 asection *p;
179 for (p = ibfd->sections; p != NULL; p = p->next) {
180 if (is_special(p->name))
181 continue;
182 if (want_section(p->name, NULL) || mode("rmsyms"))
183 rm_some_relocs(ibfd, p);
186 for (i = 0; mode("keep") && special_sections[i].sectname != NULL; i++)
187 rm_from_special(ibfd, &special_sections[i]);
189 copy_object(ibfd, obfd);
190 assert(bfd_close(obfd));
191 assert(bfd_close(ibfd));
192 return EXIT_SUCCESS;
195 void rm_some_relocs(bfd *ibfd, asection *isection)
197 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
198 arelent **orig_relocs = malloc(ss->num_relocs * sizeof(*orig_relocs));
199 memcpy(orig_relocs, ss->relocs, ss->num_relocs * sizeof(*orig_relocs));
200 int orig_num_relocs = ss->num_relocs;
201 ss->num_relocs = 0;
203 int i;
204 for (i = 0; i < orig_num_relocs; i++) {
205 int rm_reloc = 0;
206 asymbol *sym_ptr = *orig_relocs[i]->sym_ptr_ptr;
208 if (mode("rmsyms") && match_varargs(sym_ptr->name))
209 rm_reloc = 1;
211 if (mode("keep"))
212 rm_reloc = 1;
214 if (mode("keep-primary") && want_section(sym_ptr->name, NULL))
215 rm_reloc = 0;
217 if (rm_reloc)
218 print_reloc(ibfd, isection, orig_relocs[i], ss);
219 else
220 ss->relocs[ss->num_relocs++] = orig_relocs[i];
224 void print_reloc(bfd *ibfd, asection *isection, arelent *orig_reloc,
225 struct supersect *ss)
227 asymbol *sym_ptr = *orig_reloc->sym_ptr_ptr;
229 char *new_sectname = strdup(isection->name);
230 if (mode("keep"))
231 want_section(isection->name, &new_sectname);
233 char *new_symname = strdup(sym_ptr->name);
234 if (mode("keep-primary"))
235 want_section(sym_ptr->name, &new_symname);
237 int addend = orig_reloc->addend;
238 reloc_howto_type *howto = orig_reloc->howto;
239 int size = bfd_get_reloc_size(howto);
240 int addend2 = blot_section(ibfd, isection, orig_reloc->address, size);
241 assert(addend == 0 || addend2 == 0);
242 if (addend == 0)
243 addend = addend2;
245 printf("%s%s ", new_symname, addstr_all);
246 printf("%s%s%s ", canonical_sym(new_sectname), addstr_all, addstr_sect);
247 printf("%08x ", (int)orig_reloc->address);
248 printf("%d %08x %d\n", howto->pc_relative, addend, size);
251 int blot_section(bfd *abfd, asection *sect, int offset, int size)
253 struct supersect *ss = fetch_supersect(abfd, sect, isympp);
254 long address = (long)ss->contents + offset;
255 int tmp;
256 if (size == 4) {
257 tmp = *(int *)address;
258 *((int *)address) = 0x77777777;
259 } else if (size == 8) {
260 tmp = *(long long *)address;
261 *((long long *)address) = 0x7777777777777777ll;
262 } else {
263 printf("ksplice: Unsupported size %d\n", size);
264 DIE;
266 return tmp;
269 const char *canonical_sym(const char *sect_wlabel)
271 const char *sect = sect_wlabel;
272 if (!mode("sizelist"))
273 sect = dup_wolabel(sect_wlabel);
275 if (starts_with(sect, ".rodata"))
276 return sect;
278 int i;
279 for (i = 0; i < symcount; i++) {
280 const char *cur_sectname = isympp[i]->section->name;
281 if (!mode("sizelist"))
282 cur_sectname = dup_wolabel(cur_sectname);
284 if (strlen(isympp[i]->name) != 0 &&
285 !starts_with(isympp[i]->name, ".text") &&
286 strcmp(cur_sectname, sect) == 0 && isympp[i]->value == 0)
287 return isympp[i]->name;
289 printf("ksplice: Failed to canonicalize %s\n", sect);
290 DIE;
293 void rm_from_special(bfd *ibfd, struct specsect *s)
295 asection *isection = bfd_get_section_by_name(ibfd, s->sectname);
296 if (isection == NULL)
297 return;
299 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
300 void *orig_buffer = malloc(ss->contents_size);
301 memcpy(orig_buffer, ss->contents, ss->contents_size);
302 arelent **orig_relocs = malloc(ss->num_relocs * sizeof(*orig_relocs));
303 memcpy(orig_relocs, ss->relocs, ss->num_relocs * sizeof(*orig_relocs));
305 assert(align(ss->contents_size, 4) % s->entry_size == 0);
306 if (s->odd_relocs)
307 assert(align(ss->contents_size, 4) / s->entry_size ==
308 ss->num_relocs / 2);
309 else
310 assert(align(ss->contents_size, 4) / s->entry_size ==
311 ss->num_relocs);
313 int orig_num_relocs = ss->num_relocs;
314 ss->num_relocs = 0;
315 int new_num_entries = 0;
316 int i, orig_buffer_index, end_last_entry = 0, modifier = 0;
317 for (i = 0; i < orig_num_relocs; i++) {
318 asymbol *sym_ptr = *orig_relocs[i]->sym_ptr_ptr;
319 if (s->odd_relocs && i % 2 == 1) {
320 assert(strcmp(sym_ptr->name, s->odd_relocname) == 0);
321 continue;
323 asection *p;
324 for (p = ibfd->sections; p != NULL; p = p->next) {
325 if (strcmp(sym_ptr->name, p->name) == 0
326 && !is_special(p->name)
327 && !want_section(p->name, NULL))
328 break;
330 if (p != NULL)
331 continue;
333 if (s->odd_relocs)
334 orig_buffer_index = i / 2;
335 else
336 orig_buffer_index = i;
337 memcpy(ss->contents + (new_num_entries++) * s->entry_size,
338 orig_buffer + orig_buffer_index * s->entry_size,
339 s->entry_size);
340 modifier += orig_buffer_index * s->entry_size - end_last_entry;
341 ss->relocs[ss->num_relocs] = orig_relocs[i];
342 ss->relocs[ss->num_relocs++]->address -= modifier;
343 if (s->odd_relocs) {
344 ss->relocs[ss->num_relocs] = orig_relocs[i + 1];
345 ss->relocs[ss->num_relocs++]->address -= modifier;
347 end_last_entry =
348 orig_buffer_index * s->entry_size + s->entry_size;
350 ss->contents_size = new_num_entries * s->entry_size;
353 void mark_wanted_if_referenced(bfd *abfd, asection *sect, void *ignored)
355 if (want_section(sect->name, NULL))
356 return;
357 if (!starts_with(sect->name, ".text")
358 && !starts_with(sect->name, ".rodata"))
359 return;
361 bfd_map_over_sections(abfd, check_for_ref_to_section, sect);
364 void check_for_ref_to_section(bfd *abfd, asection *looking_at,
365 void *looking_for)
367 if (!want_section(looking_at->name, NULL))
368 return;
370 struct supersect *ss = fetch_supersect(abfd, looking_at, isympp);
371 int i;
372 for (i = 0; i < ss->num_relocs; i++) {
373 asymbol *sym_ptr = *ss->relocs[i]->sym_ptr_ptr;
374 if (sym_ptr->section == (asection *)looking_for) {
375 struct wsect *w = malloc(sizeof(*w));
376 w->name = strdup(((asection *)looking_for)->name);
377 w->next = wanted_sections;
378 wanted_sections = w;
383 /* Modified function from GNU Binutils objcopy.c */
384 bfd_boolean copy_object(bfd *ibfd, bfd *obfd)
386 assert(bfd_set_format(obfd, bfd_get_format(ibfd)));
388 bfd_vma start = bfd_get_start_address(ibfd);
390 flagword flags = bfd_get_file_flags(ibfd);
391 flags &= bfd_applicable_file_flags(obfd);
393 assert(bfd_set_start_address(obfd, start)
394 && bfd_set_file_flags(obfd, flags));
396 enum bfd_architecture iarch = bfd_get_arch(ibfd);
397 unsigned int imach = bfd_get_mach(ibfd);
398 assert(bfd_set_arch_mach(obfd, iarch, imach));
399 assert(bfd_set_format(obfd, bfd_get_format(ibfd)));
401 /* BFD mandates that all output sections be created and sizes set before
402 any output is done. Thus, we traverse all sections multiple times. */
403 bfd_map_over_sections(ibfd, setup_section, obfd);
405 assert(bfd_count_sections(obfd));
407 /* Mark symbols used in output relocations so that they
408 are kept, even if they are local labels or static symbols.
410 Note we iterate over the input sections examining their
411 relocations since the relocations for the output sections
412 haven't been set yet. mark_symbols_used_in_relocations will
413 ignore input sections which have no corresponding output
414 section. */
416 bfd_map_over_sections(ibfd, mark_symbols_used_in_relocations, isympp);
417 asymbol **osympp = (void *)malloc((2 * symcount + 1) * sizeof(*osympp));
418 symcount = filter_symbols(ibfd, obfd, osympp, isympp, symcount);
420 bfd_set_symtab(obfd, osympp, symcount);
422 /* This has to happen after the symbol table has been set. */
423 bfd_map_over_sections(ibfd, copy_section, obfd);
425 /* Allow the BFD backend to copy any private data it understands
426 from the input BFD to the output BFD. This is done last to
427 permit the routine to look at the filtered symbol table, which is
428 important for the ECOFF code at least. */
429 assert(bfd_copy_private_bfd_data(ibfd, obfd));
431 return TRUE;
434 /* Modified function from GNU Binutils objcopy.c */
435 void setup_section(bfd *ibfd, asection *isection, void *obfdarg)
437 bfd *obfd = obfdarg;
438 bfd_vma vma;
440 char *name = strdup(isection->name);
441 if (!want_section(isection->name, &name))
442 return;
444 asection *osection = bfd_make_section_anyway(obfd, name);
445 assert(osection != NULL);
447 flagword flags = bfd_get_section_flags(ibfd, isection);
448 bfd_set_section_flags(obfd, osection, flags);
450 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
451 assert(bfd_set_section_size(obfd, osection, ss->contents_size));
453 vma = bfd_section_vma(ibfd, isection);
454 assert(bfd_set_section_vma(obfd, osection, vma));
456 osection->lma = isection->lma;
457 assert(bfd_set_section_alignment(obfd,
458 osection,
459 bfd_section_alignment(ibfd,
460 isection)));
461 osection->entsize = isection->entsize;
462 isection->output_section = osection;
463 isection->output_offset = 0;
464 return;
467 /* Modified function from GNU Binutils objcopy.c */
468 void copy_section(bfd *ibfd, asection *isection, void *obfdarg)
470 bfd *obfd = obfdarg;
472 char *name = strdup(isection->name);
473 if (!want_section(isection->name, &name))
474 return;
476 flagword flags = bfd_get_section_flags(ibfd, isection);
477 if ((flags & SEC_GROUP) != 0)
478 return;
480 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
481 asection *osection = isection->output_section;
482 if (ss->contents_size == 0 || osection == 0)
483 return;
485 bfd_set_reloc(obfd, osection,
486 ss->num_relocs == 0 ? NULL : ss->relocs, ss->num_relocs);
488 if (bfd_get_section_flags(ibfd, isection) & SEC_HAS_CONTENTS
489 && bfd_get_section_flags(obfd, osection) & SEC_HAS_CONTENTS)
490 assert(bfd_set_section_contents
491 (obfd, osection, ss->contents, 0, ss->contents_size));
494 /* Modified function from GNU Binutils objcopy.c
496 * Mark all the symbols which will be used in output relocations with
497 * the BSF_KEEP flag so that those symbols will not be stripped.
499 * Ignore relocations which will not appear in the output file.
501 void mark_symbols_used_in_relocations(bfd *ibfd, asection *isection,
502 void *symbolsarg)
504 if (isection->output_section == NULL)
505 return;
507 struct supersect *ss = fetch_supersect(ibfd, isection, isympp);
509 /* Examine each symbol used in a relocation. If it's not one of the
510 special bfd section symbols, then mark it with BSF_KEEP. */
511 int i;
512 for (i = 0; i < ss->num_relocs; i++) {
513 if (*ss->relocs[i]->sym_ptr_ptr != bfd_com_section_ptr->symbol
514 && *ss->relocs[i]->sym_ptr_ptr !=
515 bfd_abs_section_ptr->symbol
516 && *ss->relocs[i]->sym_ptr_ptr !=
517 bfd_und_section_ptr->symbol)
518 (*ss->relocs[i]->sym_ptr_ptr)->flags |= BSF_KEEP;
522 /* Modified function from GNU Binutils objcopy.c
524 * Choose which symbol entries to copy.
525 * We don't copy in place, because that confuses the relocs.
526 * Return the number of symbols to print.
528 unsigned int filter_symbols(bfd *abfd, bfd *obfd, asymbol **osyms,
529 asymbol **isyms, long symcount)
531 asymbol **from = isyms, **to = osyms;
532 long src_count = 0, dst_count = 0;
534 for (; src_count < symcount; src_count++) {
535 asymbol *sym = from[src_count];
536 flagword flags = sym->flags;
538 if (mode("keep") && want_section(sym->section->name, NULL)) {
539 char *newname =
540 malloc(strlen(sym->name) + strlen(addstr_all) +
541 strlen(addstr_sect) + 1);
542 sprintf(newname, "%s%s%s", sym->name, addstr_all,
543 addstr_sect);
544 sym->name = newname;
547 int keep;
548 if ((flags & BSF_KEEP) != 0 /* Used in relocation. */
549 || ((flags & BSF_SECTION_SYM) != 0
550 && ((*(sym->section)->symbol_ptr_ptr)->flags
551 & BSF_KEEP) != 0))
552 keep = 1;
553 else if ((flags & (BSF_GLOBAL | BSF_WEAK)) != 0)
554 keep = 1;
555 else if (bfd_decode_symclass(sym) == 'I')
556 /* Global symbols in $idata sections need to be retained.
557 External users of the library containing the $idata
558 section may reference these symbols. */
559 keep = 1;
560 else if ((flags & BSF_GLOBAL) != 0
561 || (flags & BSF_WEAK) != 0
562 || bfd_is_com_section(sym->section))
563 keep = 1;
564 else if ((flags & BSF_DEBUGGING) != 0)
565 keep = 1;
566 else
567 keep = !bfd_is_local_label(abfd, sym);
569 if (!want_section(sym->section->name, NULL))
570 keep = 0;
572 if (mode("rmsyms") && match_varargs(sym->name))
573 keep = 0;
575 if (keep)
576 to[dst_count++] = sym;
578 if (keep && mode("globalize")
579 && ends_with(sym->name, globalizestr)) {
580 asymbol *new = bfd_make_empty_symbol(obfd);
581 char *tmp =
582 malloc(strlen(sym->name) + strlen("_global") + 1);
583 sprintf(tmp, "%s_global", sym->name);
584 new->name = tmp;
585 new->value = sym->value;
586 new->flags = BSF_GLOBAL;
587 new->section = sym->section;
588 to[dst_count++] = new;
592 asection *p;
593 for (p = obfd->sections; mode("keep") && p != NULL; p = p->next) {
594 if (starts_with(p->name, ".rodata") &&
595 !exists_sym_with_name(from, symcount, p->name)) {
596 asymbol *new = bfd_make_empty_symbol(obfd);
597 new->name = p->name;
598 new->value = 0x0;
599 new->flags = BSF_GLOBAL;
600 new->section = p;
601 to[dst_count++] = new;
605 to[dst_count] = NULL;
606 return dst_count;
609 int exists_sym_with_name(asymbol **syms, int symcount, const char *desired)
611 int i;
612 for (i = 0; i < symcount; i++) {
613 if (strcmp(bfd_asymbol_name(syms[i]), desired) == 0)
614 return 1;
616 return 0;
619 int match_varargs(const char *str)
621 int i;
622 for (i = 0; i < varargs_count; i++) {
623 if (strcmp(str, varargs[i]) == 0)
624 return 1;
625 if (starts_with(str, varargs[i]) &&
626 strcmp(str + strlen(varargs[i]), "_global") == 0)
627 return 1;
629 return 0;
632 int want_section(const char *name, char **newname)
634 static const char *static_want[] =
635 { ".altinstructions", ".altinstr_replacement", ".smp_locks",
636 ".parainstructions", NULL
639 if (!mode("keep"))
640 return 1;
642 struct wsect *w = wanted_sections;
643 for (; w != NULL; w = w->next) {
644 if (strcmp(w->name, name) == 0)
645 goto success;
648 if (starts_with(name, ".ksplice"))
649 goto success;
650 if (mode("keep-helper") && starts_with(name, ".text"))
651 goto success;
652 if (match_varargs(name))
653 goto success;
655 int i;
656 for (i = 0; static_want[i] != NULL; i++) {
657 if (strcmp(name, static_want[i]) == 0)
658 return 1;
660 return 0;
662 success:
664 if (newname != NULL) {
665 *newname =
666 malloc(strlen(name) + strlen(addstr_all) +
667 strlen(addstr_sect) + 1);
668 sprintf(*newname, "%s%s%s", name, addstr_all, addstr_sect);
670 return 1;
673 struct specsect *is_special(const char *name)
675 int i;
676 for (i = 0; special_sections[i].sectname != NULL; i++) {
677 if (strcmp(special_sections[i].sectname, name) == 0)
678 return &special_sections[i];
680 return NULL;