128-2
[darwin-xtools.git] / ld64 / src / other / rebase.cpp
blob2255789436f2303f6ea371a1a8fe0941219a6228
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
3 * Copyright (c) 2006-2008 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <sys/mman.h>
28 #include <mach/mach.h>
29 #include <limits.h>
30 #include <stdarg.h>
31 #include <stdio.h>
32 #include <fcntl.h>
33 #include <errno.h>
34 #include <unistd.h>
35 #include <vector>
36 #include <set>
39 #include "MachOFileAbstraction.hpp"
40 #include "Architectures.hpp"
42 static bool verbose = false;
44 __attribute__((noreturn))
45 void throwf(const char* format, ...)
47 va_list list;
48 char* p;
49 va_start(list, format);
50 vasprintf(&p, format, list);
51 va_end(list);
53 const char* t = p;
54 throw t;
58 class AbstractRebaser
60 public:
61 virtual cpu_type_t getArchitecture() const = 0;
62 virtual uint64_t getBaseAddress() const = 0;
63 virtual uint64_t getVMSize() const = 0;
64 virtual void setBaseAddress(uint64_t) = 0;
68 template <typename A>
69 class Rebaser : public AbstractRebaser
71 public:
72 Rebaser(const void* machHeader);
73 virtual ~Rebaser() {}
75 virtual cpu_type_t getArchitecture() const;
76 virtual uint64_t getBaseAddress() const;
77 virtual uint64_t getVMSize() const;
78 virtual void setBaseAddress(uint64_t);
80 private:
81 typedef typename A::P P;
82 typedef typename A::P::E E;
83 typedef typename A::P::uint_t pint_t;
85 struct vmmap { pint_t vmaddr; pint_t vmsize; pint_t fileoff; };
87 void setRelocBase();
88 void buildSectionTable();
89 void adjustLoadCommands();
90 void adjustSymbolTable();
91 void adjustDATA();
92 void doLocalRelocation(const macho_relocation_info<P>* reloc);
93 pint_t* mappedAddressForVMAddress(uint32_t vmaddress);
94 void rebaseAt(int segIndex, uint64_t offset, uint8_t type);
96 const macho_header<P>* fHeader;
97 pint_t fOrignalVMRelocBaseAddress;
98 pint_t fSlide;
99 std::vector<vmmap> fVMMApping;
104 class MultiArchRebaser
106 public:
107 MultiArchRebaser(const char* path, bool writable=false);
108 ~MultiArchRebaser();
110 const std::vector<AbstractRebaser*>& getArchs() const { return fRebasers; }
111 void commit();
113 private:
114 std::vector<AbstractRebaser*> fRebasers;
115 void* fMappingAddress;
116 uint64_t fFileSize;
121 MultiArchRebaser::MultiArchRebaser(const char* path, bool writable)
122 : fMappingAddress(0), fFileSize(0)
124 // map in whole file
125 int fd = ::open(path, (writable ? O_RDWR : O_RDONLY), 0);
126 if ( fd == -1 )
127 throwf("can't open file %s, errno=%d", path, errno);
128 struct stat stat_buf;
129 if ( fstat(fd, &stat_buf) == -1)
130 throwf("can't stat open file %s, errno=%d", path, errno);
131 if ( stat_buf.st_size < 20 )
132 throwf("file too small %s", path);
133 const int prot = writable ? (PROT_READ | PROT_WRITE) : PROT_READ;
134 const int flags = writable ? (MAP_FILE | MAP_SHARED) : (MAP_FILE | MAP_PRIVATE);
135 uint8_t* p = (uint8_t*)::mmap(NULL, stat_buf.st_size, prot, flags, fd, 0);
136 if ( p == (uint8_t*)(-1) )
137 throwf("can't map file %s, errno=%d", path, errno);
138 ::close(fd);
140 // if fat file, process each architecture
141 const fat_header* fh = (fat_header*)p;
142 const mach_header* mh = (mach_header*)p;
143 if ( fh->magic == OSSwapBigToHostInt32(FAT_MAGIC) ) {
144 // Fat header is always big-endian
145 const struct fat_arch* archs = (struct fat_arch*)(p + sizeof(struct fat_header));
146 for (unsigned long i=0; i < OSSwapBigToHostInt32(fh->nfat_arch); ++i) {
147 uint32_t fileOffset = OSSwapBigToHostInt32(archs[i].offset);
148 try {
149 switch ( OSSwapBigToHostInt32(archs[i].cputype) ) {
150 case CPU_TYPE_POWERPC:
151 fRebasers.push_back(new Rebaser<ppc>(&p[fileOffset]));
152 break;
153 case CPU_TYPE_POWERPC64:
154 fRebasers.push_back(new Rebaser<ppc64>(&p[fileOffset]));
155 break;
156 case CPU_TYPE_I386:
157 fRebasers.push_back(new Rebaser<x86>(&p[fileOffset]));
158 break;
159 case CPU_TYPE_X86_64:
160 fRebasers.push_back(new Rebaser<x86_64>(&p[fileOffset]));
161 break;
162 case CPU_TYPE_ARM:
163 fRebasers.push_back(new Rebaser<arm>(&p[fileOffset]));
164 break;
165 default:
166 throw "unknown file format";
169 catch (const char* msg) {
170 fprintf(stderr, "rebase warning: %s for %s\n", msg, path);
174 else {
175 try {
176 if ( (OSSwapBigToHostInt32(mh->magic) == MH_MAGIC) && (OSSwapBigToHostInt32(mh->cputype) == CPU_TYPE_POWERPC)) {
177 fRebasers.push_back(new Rebaser<ppc>(mh));
179 else if ( (OSSwapBigToHostInt32(mh->magic) == MH_MAGIC_64) && (OSSwapBigToHostInt32(mh->cputype) == CPU_TYPE_POWERPC64)) {
180 fRebasers.push_back(new Rebaser<ppc64>(mh));
182 else if ( (OSSwapLittleToHostInt32(mh->magic) == MH_MAGIC) && (OSSwapLittleToHostInt32(mh->cputype) == CPU_TYPE_I386)) {
183 fRebasers.push_back(new Rebaser<x86>(mh));
185 else if ( (OSSwapLittleToHostInt32(mh->magic) == MH_MAGIC_64) && (OSSwapLittleToHostInt32(mh->cputype) == CPU_TYPE_X86_64)) {
186 fRebasers.push_back(new Rebaser<x86_64>(mh));
188 else if ( (OSSwapLittleToHostInt32(mh->magic) == MH_MAGIC) && (OSSwapLittleToHostInt32(mh->cputype) == CPU_TYPE_ARM)) {
189 fRebasers.push_back(new Rebaser<arm>(mh));
191 else {
192 throw "unknown file format";
195 catch (const char* msg) {
196 fprintf(stderr, "rebase warning: %s for %s\n", msg, path);
200 fMappingAddress = p;
201 fFileSize = stat_buf.st_size;
205 MultiArchRebaser::~MultiArchRebaser()
207 ::munmap(fMappingAddress, fFileSize);
210 void MultiArchRebaser::commit()
212 ::msync(fMappingAddress, fFileSize, MS_ASYNC);
217 template <typename A>
218 Rebaser<A>::Rebaser(const void* machHeader)
219 : fHeader((const macho_header<P>*)machHeader)
221 switch ( fHeader->filetype() ) {
222 case MH_DYLIB:
223 if ( (fHeader->flags() & MH_SPLIT_SEGS) != 0 )
224 throw "split-seg dylibs cannot be rebased";
225 break;
226 case MH_BUNDLE:
227 break;
228 default:
229 throw "file is not a dylib or bundle";
234 template <> cpu_type_t Rebaser<ppc>::getArchitecture() const { return CPU_TYPE_POWERPC; }
235 template <> cpu_type_t Rebaser<ppc64>::getArchitecture() const { return CPU_TYPE_POWERPC64; }
236 template <> cpu_type_t Rebaser<x86>::getArchitecture() const { return CPU_TYPE_I386; }
237 template <> cpu_type_t Rebaser<x86_64>::getArchitecture() const { return CPU_TYPE_X86_64; }
238 template <> cpu_type_t Rebaser<arm>::getArchitecture() const { return CPU_TYPE_ARM; }
240 template <typename A>
241 uint64_t Rebaser<A>::getBaseAddress() const
243 uint64_t lowestSegmentAddress = LLONG_MAX;
244 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
245 const uint32_t cmd_count = fHeader->ncmds();
246 const macho_load_command<P>* cmd = cmds;
247 for (uint32_t i = 0; i < cmd_count; ++i) {
248 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
249 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
250 if ( segCmd->vmaddr() < lowestSegmentAddress ) {
251 lowestSegmentAddress = segCmd->vmaddr();
254 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
256 return lowestSegmentAddress;
259 template <typename A>
260 uint64_t Rebaser<A>::getVMSize() const
262 const macho_segment_command<P>* highestSegmentCmd = NULL;
263 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
264 const uint32_t cmd_count = fHeader->ncmds();
265 const macho_load_command<P>* cmd = cmds;
266 for (uint32_t i = 0; i < cmd_count; ++i) {
267 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
268 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
269 if ( (highestSegmentCmd == NULL) || (segCmd->vmaddr() > highestSegmentCmd->vmaddr()) ) {
270 highestSegmentCmd = segCmd;
273 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
276 return ((highestSegmentCmd->vmaddr() + highestSegmentCmd->vmsize() - this->getBaseAddress() + 4095) & (-4096));
280 template <typename A>
281 void Rebaser<A>::setBaseAddress(uint64_t addr)
283 // calculate slide
284 fSlide = addr - this->getBaseAddress();
286 // compute base address for relocations
287 this->setRelocBase();
289 // build cache of section index to section
290 this->buildSectionTable();
292 // update load commands
293 this->adjustLoadCommands();
295 // update symbol table
296 this->adjustSymbolTable();
298 // update writable segments that have internal pointers
299 this->adjustDATA();
302 template <typename A>
303 void Rebaser<A>::adjustLoadCommands()
305 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
306 const uint32_t cmd_count = fHeader->ncmds();
307 const macho_load_command<P>* cmd = cmds;
308 for (uint32_t i = 0; i < cmd_count; ++i) {
309 switch ( cmd->cmd() ) {
310 case LC_ID_DYLIB:
311 if ( (fHeader->flags() & MH_PREBOUND) != 0 ) {
312 // clear timestamp so that any prebound clients are invalidated
313 macho_dylib_command<P>* dylib = (macho_dylib_command<P>*)cmd;
314 dylib->set_timestamp(1);
316 break;
317 case LC_LOAD_DYLIB:
318 case LC_LOAD_WEAK_DYLIB:
319 case LC_REEXPORT_DYLIB:
320 case LC_LOAD_UPWARD_DYLIB:
321 if ( (fHeader->flags() & MH_PREBOUND) != 0 ) {
322 // clear expected timestamps so that this image will load with invalid prebinding
323 macho_dylib_command<P>* dylib = (macho_dylib_command<P>*)cmd;
324 dylib->set_timestamp(2);
326 break;
327 case macho_routines_command<P>::CMD:
328 // update -init command
330 macho_routines_command<P>* routines = (macho_routines_command<P>*)cmd;
331 routines->set_init_address(routines->init_address() + fSlide);
333 break;
334 case macho_segment_command<P>::CMD:
335 // update segment commands
337 macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
338 seg->set_vmaddr(seg->vmaddr() + fSlide);
339 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)seg + sizeof(macho_segment_command<P>));
340 macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
341 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
342 sect->set_addr(sect->addr() + fSlide);
345 break;
347 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
352 template <typename A>
353 void Rebaser<A>::buildSectionTable()
355 // build vector of sections
356 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
357 const uint32_t cmd_count = fHeader->ncmds();
358 const macho_load_command<P>* cmd = cmds;
359 for (uint32_t i = 0; i < cmd_count; ++i) {
360 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
361 const macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
362 vmmap mapping;
363 mapping.vmaddr = seg->vmaddr();
364 mapping.vmsize = seg->vmsize();
365 mapping.fileoff = seg->fileoff();
366 fVMMApping.push_back(mapping);
368 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
373 template <typename A>
374 void Rebaser<A>::adjustSymbolTable()
376 const macho_dysymtab_command<P>* dysymtab = NULL;
377 macho_nlist<P>* symbolTable = NULL;
378 const char* strings = NULL;
380 // get symbol table info
381 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
382 const uint32_t cmd_count = fHeader->ncmds();
383 const macho_load_command<P>* cmd = cmds;
384 for (uint32_t i = 0; i < cmd_count; ++i) {
385 switch (cmd->cmd()) {
386 case LC_SYMTAB:
388 const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
389 symbolTable = (macho_nlist<P>*)(((uint8_t*)fHeader) + symtab->symoff());
390 strings = (char*)(((uint8_t*)fHeader) + symtab->stroff());
392 break;
393 case LC_DYSYMTAB:
394 dysymtab = (macho_dysymtab_command<P>*)cmd;
395 break;
397 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
400 // walk all exports and slide their n_value
401 macho_nlist<P>* lastExport = &symbolTable[dysymtab->iextdefsym()+dysymtab->nextdefsym()];
402 for (macho_nlist<P>* entry = &symbolTable[dysymtab->iextdefsym()]; entry < lastExport; ++entry) {
403 if ( (entry->n_type() & N_TYPE) == N_SECT )
404 entry->set_n_value(entry->n_value() + fSlide);
407 // walk all local symbols and slide their n_value
408 macho_nlist<P>* lastLocal = &symbolTable[dysymtab->ilocalsym()+dysymtab->nlocalsym()];
409 for (macho_nlist<P>* entry = &symbolTable[dysymtab->ilocalsym()]; entry < lastLocal; ++entry) {
410 if ( ((entry->n_type() & N_STAB) == 0) && ((entry->n_type() & N_TYPE) == N_SECT) ) {
411 entry->set_n_value(entry->n_value() + fSlide);
413 else if ( entry->n_type() & N_STAB ) {
414 // some stabs need to be slid too
415 switch ( entry->n_type() ) {
416 case N_FUN:
417 // don't slide end-of-function FUN which is FUN with no string
418 if ( (entry->n_strx() == 0) || (strings[entry->n_strx()] == '\0') )
419 break;
420 case N_BNSYM:
421 case N_STSYM:
422 case N_LCSYM:
423 entry->set_n_value(entry->n_value() + fSlide);
424 break;
429 // FIXME ¥¥¥ adjust dylib_module if it exists
432 static uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end)
434 uint64_t result = 0;
435 int bit = 0;
436 do {
437 if (p == end)
438 throwf("malformed uleb128");
440 uint64_t slice = *p & 0x7f;
442 if (bit >= 64 || slice << bit >> bit != slice)
443 throwf("uleb128 too big");
444 else {
445 result |= (slice << bit);
446 bit += 7;
449 while (*p++ & 0x80);
450 return result;
453 template <typename A>
454 void Rebaser<A>::rebaseAt(int segIndex, uint64_t offset, uint8_t type)
456 //fprintf(stderr, "rebaseAt(seg=%d, offset=0x%08llX, type=%d\n", segIndex, offset, type);
457 static int lastSegIndex = -1;
458 static uint8_t* lastSegMappedStart = NULL;
459 if ( segIndex != lastSegIndex ) {
460 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
461 const uint32_t cmd_count = fHeader->ncmds();
462 const macho_load_command<P>* cmd = cmds;
463 int segCount = 0;
464 for (uint32_t i = 0; i < cmd_count; ++i) {
465 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
466 if ( segIndex == segCount ) {
467 const macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
468 lastSegMappedStart = (uint8_t*)fHeader + seg->fileoff();
469 lastSegIndex = segCount;
470 break;
472 ++segCount;
474 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
478 pint_t* locationToFix = (pint_t*)(lastSegMappedStart+offset);
479 uint32_t* locationToFix32 = (uint32_t*)(lastSegMappedStart+offset);
480 switch (type) {
481 case REBASE_TYPE_POINTER:
482 P::setP(*locationToFix, A::P::getP(*locationToFix) + fSlide);
483 break;
484 case REBASE_TYPE_TEXT_ABSOLUTE32:
485 E::set32(*locationToFix32, E::get32(*locationToFix32) + fSlide);
486 break;
487 default:
488 throwf("bad rebase type %d", type);
493 template <typename A>
494 void Rebaser<A>::adjustDATA()
496 const macho_dysymtab_command<P>* dysymtab = NULL;
497 const macho_dyld_info_command<P>* dyldInfo = NULL;
499 // get symbol table info
500 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
501 const uint32_t cmd_count = fHeader->ncmds();
502 const macho_load_command<P>* cmd = cmds;
503 for (uint32_t i = 0; i < cmd_count; ++i) {
504 switch (cmd->cmd()) {
505 case LC_DYSYMTAB:
506 dysymtab = (macho_dysymtab_command<P>*)cmd;
507 break;
508 case LC_DYLD_INFO:
509 case LC_DYLD_INFO_ONLY:
510 dyldInfo = (macho_dyld_info_command<P>*)cmd;
511 break;
513 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
516 // use new encoding of rebase info if present
517 if ( dyldInfo != NULL ) {
518 if ( dyldInfo->rebase_size() != 0 ) {
519 const uint8_t* p = (uint8_t*)fHeader + dyldInfo->rebase_off();
520 const uint8_t* end = &p[dyldInfo->rebase_size()];
522 uint8_t type = 0;
523 uint64_t offset = 0;
524 uint32_t count;
525 uint32_t skip;
526 int segIndex;
527 bool done = false;
528 while ( !done && (p < end) ) {
529 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
530 uint8_t opcode = *p & REBASE_OPCODE_MASK;
531 ++p;
532 switch (opcode) {
533 case REBASE_OPCODE_DONE:
534 done = true;
535 break;
536 case REBASE_OPCODE_SET_TYPE_IMM:
537 type = immediate;
538 break;
539 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
540 segIndex = immediate;
541 offset = read_uleb128(p, end);
542 break;
543 case REBASE_OPCODE_ADD_ADDR_ULEB:
544 offset += read_uleb128(p, end);
545 break;
546 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
547 offset += immediate*sizeof(pint_t);
548 break;
549 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
550 for (int i=0; i < immediate; ++i) {
551 rebaseAt(segIndex, offset, type);
552 offset += sizeof(pint_t);
554 break;
555 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
556 count = read_uleb128(p, end);
557 for (uint32_t i=0; i < count; ++i) {
558 rebaseAt(segIndex, offset, type);
559 offset += sizeof(pint_t);
561 break;
562 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
563 rebaseAt(segIndex, offset, type);
564 offset += read_uleb128(p, end) + sizeof(pint_t);
565 break;
566 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
567 count = read_uleb128(p, end);
568 skip = read_uleb128(p, end);
569 for (uint32_t i=0; i < count; ++i) {
570 rebaseAt(segIndex, offset, type);
571 offset += skip + sizeof(pint_t);
573 break;
574 default:
575 throwf("bad rebase opcode %d", *p);
583 else {
584 // walk all local relocations and slide every pointer
585 const macho_relocation_info<P>* const relocsStart = (macho_relocation_info<P>*)(((uint8_t*)fHeader) + dysymtab->locreloff());
586 const macho_relocation_info<P>* const relocsEnd = &relocsStart[dysymtab->nlocrel()];
587 for (const macho_relocation_info<P>* reloc=relocsStart; reloc < relocsEnd; ++reloc) {
588 this->doLocalRelocation(reloc);
591 // walk non-lazy-pointers and slide the ones that are LOCAL
592 cmd = cmds;
593 for (uint32_t i = 0; i < cmd_count; ++i) {
594 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
595 const macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
596 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)seg + sizeof(macho_segment_command<P>));
597 const macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
598 const uint32_t* const indirectTable = (uint32_t*)(((uint8_t*)fHeader) + dysymtab->indirectsymoff());
599 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
600 if ( (sect->flags() & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS ) {
601 const uint32_t indirectTableOffset = sect->reserved1();
602 uint32_t pointerCount = sect->size() / sizeof(pint_t);
603 pint_t* nonLazyPointer = (pint_t*)(((uint8_t*)fHeader) + sect->offset());
604 for (uint32_t i=0; i < pointerCount; ++i, ++nonLazyPointer) {
605 if ( E::get32(indirectTable[indirectTableOffset + i]) == INDIRECT_SYMBOL_LOCAL ) {
606 P::setP(*nonLazyPointer, A::P::getP(*nonLazyPointer) + fSlide);
612 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
618 template <typename A>
619 typename A::P::uint_t* Rebaser<A>::mappedAddressForVMAddress(uint32_t vmaddress)
621 for(typename std::vector<vmmap>::iterator it = fVMMApping.begin(); it != fVMMApping.end(); ++it) {
622 //fprintf(stderr, "vmaddr=0x%08lX, vmsize=0x%08lX\n", it->vmaddr, it->vmsize);
623 if ( (vmaddress >= it->vmaddr) && (vmaddress < (it->vmaddr+it->vmsize)) ) {
624 return (pint_t*)((vmaddress - it->vmaddr) + it->fileoff + (uint8_t*)fHeader);
627 throwf("reloc address 0x%08X not found", vmaddress);
631 template <>
632 void Rebaser<x86_64>::doLocalRelocation(const macho_relocation_info<x86_64::P>* reloc)
634 if ( reloc->r_type() == X86_64_RELOC_UNSIGNED ) {
635 pint_t* addr = mappedAddressForVMAddress(reloc->r_address() + fOrignalVMRelocBaseAddress);
636 P::setP(*addr, P::getP(*addr) + fSlide);
638 else {
639 throw "invalid relocation type";
643 template <>
644 void Rebaser<ppc>::doLocalRelocation(const macho_relocation_info<P>* reloc)
646 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
647 if ( reloc->r_type() == GENERIC_RELOC_VANILLA ) {
648 pint_t* addr = mappedAddressForVMAddress(reloc->r_address() + fOrignalVMRelocBaseAddress);
649 P::setP(*addr, P::getP(*addr) + fSlide);
652 else {
653 throw "cannot rebase final linked image with scattered relocations";
657 template <>
658 void Rebaser<x86>::doLocalRelocation(const macho_relocation_info<P>* reloc)
660 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
661 if ( reloc->r_type() == GENERIC_RELOC_VANILLA ) {
662 pint_t* addr = mappedAddressForVMAddress(reloc->r_address() + fOrignalVMRelocBaseAddress);
663 P::setP(*addr, P::getP(*addr) + fSlide);
666 else {
667 macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
668 if ( sreloc->r_type() == GENERIC_RELOC_PB_LA_PTR ) {
669 sreloc->set_r_value( sreloc->r_value() + fSlide );
671 else {
672 throw "cannot rebase final linked image with scattered relocations";
677 template <>
678 void Rebaser<arm>::doLocalRelocation(const macho_relocation_info<P>* reloc)
680 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
681 if ( reloc->r_type() == ARM_RELOC_VANILLA ) {
682 pint_t* addr = mappedAddressForVMAddress(reloc->r_address() + fOrignalVMRelocBaseAddress);
683 P::setP(*addr, P::getP(*addr) + fSlide);
686 else {
687 macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
688 if ( sreloc->r_type() == ARM_RELOC_PB_LA_PTR ) {
689 sreloc->set_r_value( sreloc->r_value() + fSlide );
691 else {
692 throw "cannot rebase final linked image with scattered relocations";
697 template <typename A>
698 void Rebaser<A>::doLocalRelocation(const macho_relocation_info<P>* reloc)
700 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
701 if ( reloc->r_type() == GENERIC_RELOC_VANILLA ) {
702 pint_t* addr = mappedAddressForVMAddress(reloc->r_address() + fOrignalVMRelocBaseAddress);
703 P::setP(*addr, P::getP(*addr) + fSlide);
706 else {
707 throw "cannot rebase final linked image with scattered relocations";
712 template <typename A>
713 void Rebaser<A>::setRelocBase()
715 // reloc addresses are from the start of the mapped file (base address)
716 fOrignalVMRelocBaseAddress = this->getBaseAddress();
717 //fprintf(stderr, "fOrignalVMRelocBaseAddress=0x%08X\n", fOrignalVMRelocBaseAddress);
720 template <>
721 void Rebaser<ppc64>::setRelocBase()
723 // reloc addresses either:
724 // 1) from the base address if no writable segment is > 4GB from base address
725 // 2) from start of first writable segment
726 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
727 const uint32_t cmd_count = fHeader->ncmds();
728 const macho_load_command<P>* cmd = cmds;
729 for (uint32_t i = 0; i < cmd_count; ++i) {
730 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
731 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
732 if ( segCmd->initprot() & VM_PROT_WRITE ) {
733 if ( (segCmd->vmaddr() + segCmd->vmsize() - this->getBaseAddress()) > 0x100000000ULL ) {
734 // found writable segment with address > 4GB past base address
735 fOrignalVMRelocBaseAddress = segCmd->vmaddr();
736 return;
740 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
742 // just use base address
743 fOrignalVMRelocBaseAddress = this->getBaseAddress();
746 template <>
747 void Rebaser<x86_64>::setRelocBase()
749 // reloc addresses are always based from the start of the first writable segment
750 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
751 const uint32_t cmd_count = fHeader->ncmds();
752 const macho_load_command<P>* cmd = cmds;
753 for (uint32_t i = 0; i < cmd_count; ++i) {
754 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
755 const macho_segment_command<P>* segCmd = (const macho_segment_command<P>*)cmd;
756 if ( segCmd->initprot() & VM_PROT_WRITE ) {
757 fOrignalVMRelocBaseAddress = segCmd->vmaddr();
758 return;
761 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
763 throw "no writable segment";
767 static void copyFile(const char* srcFile, const char* dstFile)
769 // open files
770 int src = open(srcFile, O_RDONLY);
771 if ( src == -1 )
772 throwf("can't open file %s, errno=%d", srcFile, errno);
773 struct stat stat_buf;
774 if ( fstat(src, &stat_buf) == -1)
775 throwf("can't stat open file %s, errno=%d", srcFile, errno);
777 // create new file with all same permissions to hold copy of dylib
778 ::unlink(dstFile);
779 int dst = open(dstFile, O_CREAT | O_RDWR | O_TRUNC, stat_buf.st_mode);
780 if ( dst == -1 )
781 throwf("can't create temp file %s, errnor=%d", dstFile, errno);
783 // mark source as "don't cache"
784 (void)fcntl(src, F_NOCACHE, 1);
785 // we want to cache the dst because we are about to map it in and modify it
787 // copy permission bits
788 if ( chmod(dstFile, stat_buf.st_mode & 07777) == -1 )
789 throwf("can't chmod temp file %s, errno=%d", dstFile, errno);
790 if ( chown(dstFile, stat_buf.st_uid, stat_buf.st_gid) == -1)
791 throwf("can't chown temp file %s, errno=%d", dstFile, errno);
793 // copy contents
794 ssize_t len;
795 const uint32_t kBufferSize = 128*1024;
796 static uint8_t* buffer = NULL;
797 if ( buffer == NULL ) {
798 vm_address_t addr = 0;
799 if ( vm_allocate(mach_task_self(), &addr, kBufferSize, true /*find range*/) == KERN_SUCCESS )
800 buffer = (uint8_t*)addr;
801 else
802 throw "can't allcoate copy buffer";
804 while ( (len = read(src, buffer, kBufferSize)) > 0 ) {
805 if ( write(dst, buffer, len) == -1 )
806 throwf("write failure copying feil %s, errno=%d", dstFile, errno);
809 // close files
810 int result1 = close(dst);
811 int result2 = close(src);
812 if ( (result1 != 0) || (result2 != 0) )
813 throw "can't close file";
817 // scan dylibs and collect size info
818 // calculate new base address for each dylib
819 // rebase each file
820 // copy to temp and mmap
821 // update content
822 // unmap/flush
823 // rename
825 struct archInfo {
826 cpu_type_t arch;
827 uint64_t vmSize;
828 uint64_t orgBase;
829 uint64_t newBase;
832 struct fileInfo
834 fileInfo(const char* p) : path(p) {}
836 const char* path;
837 std::vector<archInfo> archs;
841 // add archInfos to fileInfo for every slice of a fat file
842 // for ppc, there may be duplicate architectures (with different sub-types)
844 static void setSizes(fileInfo& info, const std::set<cpu_type_t>& onlyArchs)
846 const MultiArchRebaser mar(info.path);
847 const std::vector<AbstractRebaser*>& rebasers = mar.getArchs();
848 for(std::set<cpu_type_t>::iterator ait=onlyArchs.begin(); ait != onlyArchs.end(); ++ait) {
849 for(std::vector<AbstractRebaser*>::const_iterator rit=rebasers.begin(); rit != rebasers.end(); ++rit) {
850 AbstractRebaser* rebaser = *rit;
851 if ( rebaser->getArchitecture() == *ait ) {
852 archInfo ai;
853 ai.arch = *ait;
854 ai.vmSize = rebaser->getVMSize();
855 ai.orgBase = rebaser->getBaseAddress();
856 ai.newBase = 0;
857 //fprintf(stderr, "base=0x%llX, size=0x%llX\n", ai.orgBase, ai.vmSize);
858 info.archs.push_back(ai);
864 static const char* nameForArch(cpu_type_t arch)
866 switch( arch ) {
867 case CPU_TYPE_POWERPC:
868 return "ppc";
869 case CPU_TYPE_POWERPC64:
870 return "ppca64";
871 case CPU_TYPE_I386:
872 return "i386";
873 case CPU_TYPE_X86_64:
874 return "x86_64";
875 case CPU_TYPE_ARM:
876 return "arm";
878 return "unknown";
881 static void rebase(const fileInfo& info)
883 // generate temp file name
884 char realFilePath[PATH_MAX];
885 if ( realpath(info.path, realFilePath) == NULL ) {
886 throwf("realpath() failed on %s, errno=%d", info.path, errno);
888 const char* tempPath;
889 asprintf((char**)&tempPath, "%s_rebase", realFilePath);
891 // copy whole file to temp file
892 copyFile(info.path, tempPath);
894 try {
895 // rebase temp file
896 MultiArchRebaser mar(tempPath, true);
897 const std::vector<AbstractRebaser*>& rebasers = mar.getArchs();
898 for(std::vector<archInfo>::const_iterator fait=info.archs.begin(); fait != info.archs.end(); ++fait) {
899 for(std::vector<AbstractRebaser*>::const_iterator rit=rebasers.begin(); rit != rebasers.end(); ++rit) {
900 if ( (*rit)->getArchitecture() == fait->arch ) {
901 (*rit)->setBaseAddress(fait->newBase);
902 if ( verbose )
903 printf("%8s 0x%0llX -> 0x%0llX %s\n", nameForArch(fait->arch), fait->orgBase, fait->newBase, info.path);
908 // flush temp file out to disk
909 mar.commit();
911 // rename
912 int result = rename(tempPath, info.path);
913 if ( result != 0 ) {
914 throwf("can't swap temporary rebased file: rename(%s,%s) returned errno=%d", tempPath, info.path, errno);
917 // make sure every really gets out to disk
918 ::sync();
920 catch (const char* msg) {
921 // delete temp file
922 ::unlink(tempPath);
924 // throw exception with file name added
925 const char* newMsg;
926 asprintf((char**)&newMsg, "%s for file %s", msg, info.path);
927 throw newMsg;
931 static uint64_t totalVMSize(cpu_type_t arch, std::vector<fileInfo>& files)
933 uint64_t totalSize = 0;
934 for(std::vector<fileInfo>::iterator fit=files.begin(); fit != files.end(); ++fit) {
935 fileInfo& fi = *fit;
936 for(std::vector<archInfo>::iterator fait=fi.archs.begin(); fait != fi.archs.end(); ++fait) {
937 if ( fait->arch == arch )
938 totalSize += fait->vmSize;
941 return totalSize;
944 static uint64_t startAddress(cpu_type_t arch, std::vector<fileInfo>& files, uint64_t lowAddress, uint64_t highAddress)
946 if ( lowAddress != 0 )
947 return lowAddress;
948 else if ( highAddress != 0 ) {
949 uint64_t totalSize = totalVMSize(arch, files);
950 if ( highAddress < totalSize )
951 throwf("cannot use -high_address 0x%X because total size of images is greater: 0x%X", highAddress, totalSize);
952 return highAddress - totalSize;
954 else {
955 if ( (arch == CPU_TYPE_I386) || (arch == CPU_TYPE_POWERPC) ) {
956 // place dylibs below dyld
957 uint64_t topAddr = 0x8FE00000;
958 uint64_t totalSize = totalVMSize(arch, files);
959 if ( totalSize > topAddr )
960 throwf("total size of images (0x%X) does not fit below 0x8FE00000", totalSize);
961 return topAddr - totalSize;
963 else if ( arch == CPU_TYPE_POWERPC64 ) {
964 return 0x200000000ULL;
966 else if ( arch == CPU_TYPE_X86_64 ) {
967 return 0x200000000ULL;
969 else if ( arch == CPU_TYPE_ARM ) {
970 // place dylibs below dyld
971 uint64_t topAddr = 0x2FE00000;
972 uint64_t totalSize = totalVMSize(arch, files);
973 if ( totalSize > topAddr )
974 throwf("total size of images (0x%X) does not fit below 0x2FE00000", totalSize);
975 return topAddr - totalSize;
977 else
978 throw "unknown architecture";
982 static void usage()
984 fprintf(stderr, "rebase [-low_address] [-high_address] [-v] [-arch <arch>] files...\n");
988 int main(int argc, const char* argv[])
990 std::vector<fileInfo> files;
991 std::set<cpu_type_t> onlyArchs;
992 uint64_t lowAddress = 0;
993 uint64_t highAddress = 0;
995 try {
996 // parse command line options
997 char* endptr;
998 for(int i=1; i < argc; ++i) {
999 const char* arg = argv[i];
1000 if ( arg[0] == '-' ) {
1001 if ( strcmp(arg, "-v") == 0 ) {
1002 verbose = true;
1004 else if ( strcmp(arg, "-low_address") == 0 ) {
1005 lowAddress = strtoull(argv[++i], &endptr, 16);
1007 else if ( strcmp(arg, "-high_address") == 0 ) {
1008 highAddress = strtoull(argv[++i], &endptr, 16);
1010 else if ( strcmp(arg, "-arch") == 0 ) {
1011 const char* arch = argv[++i];
1012 if ( strcmp(arch, "ppc") == 0 )
1013 onlyArchs.insert(CPU_TYPE_POWERPC);
1014 else if ( strcmp(arch, "ppc64") == 0 )
1015 onlyArchs.insert(CPU_TYPE_POWERPC64);
1016 else if ( strcmp(arch, "i386") == 0 )
1017 onlyArchs.insert(CPU_TYPE_I386);
1018 else if ( strcmp(arch, "x86_64") == 0 )
1019 onlyArchs.insert(CPU_TYPE_X86_64);
1020 else {
1021 bool found = false;
1022 for (const ARMSubType* t=ARMSubTypes; t->subTypeName != NULL; ++t) {
1023 if ( strcmp(t->subTypeName,arch) == 0 ) {
1024 onlyArchs.insert(CPU_TYPE_ARM);
1025 found = true;
1026 break;
1029 if ( !found )
1030 throwf("unknown architecture %s", arch);
1033 else {
1034 usage();
1035 throwf("unknown option: %s\n", arg);
1038 else {
1039 files.push_back(fileInfo(arg));
1043 if ( files.size() == 0 )
1044 throw "no files specified";
1046 // use all architectures if no restrictions specified
1047 if ( onlyArchs.size() == 0 ) {
1048 onlyArchs.insert(CPU_TYPE_POWERPC);
1049 onlyArchs.insert(CPU_TYPE_POWERPC64);
1050 onlyArchs.insert(CPU_TYPE_I386);
1051 onlyArchs.insert(CPU_TYPE_X86_64);
1052 onlyArchs.insert(CPU_TYPE_ARM);
1055 // scan files and collect sizes
1056 for(std::vector<fileInfo>::iterator it=files.begin(); it != files.end(); ++it) {
1057 setSizes(*it, onlyArchs);
1060 // assign new base address for each arch
1061 for(std::set<cpu_type_t>::iterator ait=onlyArchs.begin(); ait != onlyArchs.end(); ++ait) {
1062 cpu_type_t arch = *ait;
1063 uint64_t baseAddress = startAddress(arch, files, lowAddress, highAddress);
1064 for(std::vector<fileInfo>::iterator fit=files.begin(); fit != files.end(); ++fit) {
1065 fileInfo& fi = *fit;
1066 for(std::vector<archInfo>::iterator fait=fi.archs.begin(); fait != fi.archs.end(); ++fait) {
1067 if ( fait->arch == arch ) {
1068 fait->newBase = baseAddress;
1069 baseAddress += fait->vmSize;
1070 baseAddress = (baseAddress + 4095) & (-4096); // page align
1076 // rebase each file if it contains something rebaseable
1077 for(std::vector<fileInfo>::iterator it=files.begin(); it != files.end(); ++it) {
1078 fileInfo& fi = *it;
1079 if ( fi.archs.size() > 0 )
1080 rebase(fi);
1084 catch (const char* msg) {
1085 fprintf(stderr, "rebase failed: %s\n", msg);
1086 return 1;
1089 return 0;