195-6
[darwin-xtools.git] / dyld / launch-cache / MachORebaser.hpp
blob8907aad90dda84b0b8a8c4a8f13f3b69a898c385
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
3 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
25 #ifndef __MACHO_REBASER__
26 #define __MACHO_REBASER__
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/mman.h>
31 #include <mach/mach.h>
32 #include <limits.h>
33 #include <stdarg.h>
34 #include <stdio.h>
35 #include <fcntl.h>
36 #include <errno.h>
37 #include <unistd.h>
38 #include <mach-o/loader.h>
39 #include <mach-o/fat.h>
40 #include <mach-o/reloc.h>
41 #include <mach-o/ppc/reloc.h>
42 #include <mach-o/x86_64/reloc.h>
43 #include <mach-o/arm/reloc.h>
44 #include <vector>
45 #include <set>
47 #include "MachOFileAbstraction.hpp"
48 #include "Architectures.hpp"
49 #include "MachOLayout.hpp"
50 #include "MachOTrie.hpp"
54 class AbstractRebaser
56 public:
57 virtual cpu_type_t getArchitecture() const = 0;
58 virtual uint64_t getBaseAddress() const = 0;
59 virtual uint64_t getVMSize() const = 0;
60 virtual void rebase(std::vector<void*>&) = 0;
64 template <typename A>
65 class Rebaser : public AbstractRebaser
67 public:
68 Rebaser(const MachOLayoutAbstraction&);
69 virtual ~Rebaser() {}
71 virtual cpu_type_t getArchitecture() const;
72 virtual uint64_t getBaseAddress() const;
73 virtual uint64_t getVMSize() const;
74 virtual void rebase(std::vector<void*>&);
76 protected:
77 typedef typename A::P P;
78 typedef typename A::P::E E;
79 typedef typename A::P::uint_t pint_t;
81 pint_t* mappedAddressForNewAddress(pint_t vmaddress);
82 pint_t getSlideForNewAddress(pint_t newAddress);
84 private:
85 void calculateRelocBase();
86 void adjustLoadCommands();
87 void adjustSymbolTable();
88 void optimzeStubs();
89 void makeNoPicStub(uint8_t* stub, pint_t logicalAddress);
90 void adjustDATA();
91 void adjustCode();
92 void applyRebaseInfo(std::vector<void*>& pointersInData);
93 void adjustExportInfo();
94 void doRebase(int segIndex, uint64_t segOffset, uint8_t type, std::vector<void*>& pointersInData);
95 void adjustSegmentLoadCommand(macho_segment_command<P>* seg);
96 pint_t getSlideForVMAddress(pint_t vmaddress);
97 pint_t* mappedAddressForVMAddress(pint_t vmaddress);
98 pint_t* mappedAddressForRelocAddress(pint_t r_address);
99 void adjustRelocBaseAddresses();
100 const uint8_t* doCodeUpdateForEachULEB128Address(const uint8_t* p, uint8_t kind, uint64_t orgBaseAddress, int64_t codeToDataDelta, int64_t codeToImportDelta);
101 void doCodeUpdate(uint8_t kind, uint64_t address, int64_t codeToDataDelta, int64_t codeToImportDelta);
102 void doLocalRelocation(const macho_relocation_info<P>* reloc);
103 bool unequalSlides() const;
105 protected:
106 const macho_header<P>* fHeader;
107 uint8_t* fLinkEditBase; // add file offset to this to get linkedit content
108 const MachOLayoutAbstraction& fLayout;
109 private:
110 pint_t fOrignalVMRelocBaseAddress; // add reloc address to this to get original address reloc referred to
111 const macho_symtab_command<P>* fSymbolTable;
112 const macho_dysymtab_command<P>* fDynamicSymbolTable;
113 const macho_dyld_info_command<P>* fDyldInfo;
114 bool fSplittingSegments;
115 bool fOrignalVMRelocBaseAddressValid;
116 pint_t fSkipSplitSegInfoStart;
117 pint_t fSkipSplitSegInfoEnd;
122 template <typename A>
123 Rebaser<A>::Rebaser(const MachOLayoutAbstraction& layout)
124 : fLayout(layout), fOrignalVMRelocBaseAddress(NULL), fLinkEditBase(NULL),
125 fSymbolTable(NULL), fDynamicSymbolTable(NULL), fDyldInfo(NULL), fSplittingSegments(false),
126 fOrignalVMRelocBaseAddressValid(false), fSkipSplitSegInfoStart(0), fSkipSplitSegInfoEnd(0)
128 fHeader = (const macho_header<P>*)fLayout.getSegments()[0].mappedAddress();
129 switch ( fHeader->filetype() ) {
130 case MH_DYLIB:
131 case MH_BUNDLE:
132 break;
133 default:
134 throw "file is not a dylib or bundle";
137 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
138 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
139 const MachOLayoutAbstraction::Segment& seg = *it;
140 if ( strcmp(seg.name(), "__LINKEDIT") == 0 ) {
141 fLinkEditBase = (uint8_t*)seg.mappedAddress() - seg.fileOffset();
142 break;
145 if ( fLinkEditBase == NULL )
146 throw "no __LINKEDIT segment";
148 // get symbol table info
149 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
150 const uint32_t cmd_count = fHeader->ncmds();
151 const macho_load_command<P>* cmd = cmds;
152 for (uint32_t i = 0; i < cmd_count; ++i) {
153 switch (cmd->cmd()) {
154 case LC_SYMTAB:
155 fSymbolTable = (macho_symtab_command<P>*)cmd;
156 break;
157 case LC_DYSYMTAB:
158 fDynamicSymbolTable = (macho_dysymtab_command<P>*)cmd;
159 break;
160 case LC_DYLD_INFO:
161 case LC_DYLD_INFO_ONLY:
162 fDyldInfo = (macho_dyld_info_command<P>*)cmd;
163 break;
165 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
168 calculateRelocBase();
170 fSplittingSegments = layout.hasSplitSegInfo() && this->unequalSlides();
173 template <> cpu_type_t Rebaser<ppc>::getArchitecture() const { return CPU_TYPE_POWERPC; }
174 template <> cpu_type_t Rebaser<x86>::getArchitecture() const { return CPU_TYPE_I386; }
175 template <> cpu_type_t Rebaser<x86_64>::getArchitecture() const { return CPU_TYPE_X86_64; }
176 template <> cpu_type_t Rebaser<arm>::getArchitecture() const { return CPU_TYPE_ARM; }
178 template <typename A>
179 bool Rebaser<A>::unequalSlides() const
181 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
182 uint64_t slide = segments[0].newAddress() - segments[0].address();
183 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
184 const MachOLayoutAbstraction::Segment& seg = *it;
185 if ( (seg.newAddress() - seg.address()) != slide )
186 return true;
188 return false;
191 template <typename A>
192 uint64_t Rebaser<A>::getBaseAddress() const
194 return fLayout.getSegments()[0].address();
197 template <typename A>
198 uint64_t Rebaser<A>::getVMSize() const
200 uint64_t highestVMAddress = 0;
201 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
202 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
203 const MachOLayoutAbstraction::Segment& seg = *it;
204 if ( seg.address() > highestVMAddress )
205 highestVMAddress = seg.address();
207 return (((highestVMAddress - getBaseAddress()) + 4095) & (-4096));
212 template <typename A>
213 void Rebaser<A>::rebase(std::vector<void*>& pointersInData)
215 // update writable segments that have internal pointers
216 if ( fDyldInfo != NULL )
217 this->applyRebaseInfo(pointersInData);
218 else
219 this->adjustDATA();
221 // if splitting segments, update code-to-data references
222 this->adjustCode();
224 // change address on relocs now that segments are split
225 this->adjustRelocBaseAddresses();
227 // update load commands
228 this->adjustLoadCommands();
230 // update symbol table
231 this->adjustSymbolTable();
233 // optimize stubs
234 this->optimzeStubs();
236 // update export info
237 if ( fDyldInfo != NULL )
238 this->adjustExportInfo();
241 template <>
242 void Rebaser<x86>::adjustSegmentLoadCommand(macho_segment_command<P>* seg)
244 // __IMPORT segments are not-writable in shared cache
245 if ( strcmp(seg->segname(), "__IMPORT") == 0 )
246 seg->set_initprot(VM_PROT_READ|VM_PROT_EXECUTE);
249 template <typename A>
250 void Rebaser<A>::adjustSegmentLoadCommand(macho_segment_command<P>* seg)
255 template <typename A>
256 void Rebaser<A>::adjustLoadCommands()
258 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
259 const uint32_t cmd_count = fHeader->ncmds();
260 const macho_load_command<P>* cmd = cmds;
261 for (uint32_t i = 0; i < cmd_count; ++i) {
262 switch ( cmd->cmd() ) {
263 case LC_ID_DYLIB:
264 if ( (fHeader->flags() & MH_PREBOUND) != 0 ) {
265 // clear timestamp so that any prebound clients are invalidated
266 macho_dylib_command<P>* dylib = (macho_dylib_command<P>*)cmd;
267 dylib->set_timestamp(1);
269 break;
270 case LC_LOAD_DYLIB:
271 case LC_LOAD_WEAK_DYLIB:
272 case LC_REEXPORT_DYLIB:
273 case LC_LOAD_UPWARD_DYLIB:
274 if ( (fHeader->flags() & MH_PREBOUND) != 0 ) {
275 // clear expected timestamps so that this image will load with invalid prebinding
276 macho_dylib_command<P>* dylib = (macho_dylib_command<P>*)cmd;
277 dylib->set_timestamp(2);
279 break;
280 case macho_routines_command<P>::CMD:
281 // update -init command
283 struct macho_routines_command<P>* routines = (struct macho_routines_command<P>*)cmd;
284 routines->set_init_address(routines->init_address() + this->getSlideForVMAddress(routines->init_address()));
286 break;
287 case macho_segment_command<P>::CMD:
288 // update segment commands
290 macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
291 this->adjustSegmentLoadCommand(seg);
292 pint_t slide = this->getSlideForVMAddress(seg->vmaddr());
293 seg->set_vmaddr(seg->vmaddr() + slide);
294 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)seg + sizeof(macho_segment_command<P>));
295 macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
296 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
297 sect->set_addr(sect->addr() + slide);
300 break;
302 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
308 template <typename A>
309 typename A::P::uint_t Rebaser<A>::getSlideForVMAddress(pint_t vmaddress)
311 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
312 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
313 const MachOLayoutAbstraction::Segment& seg = *it;
314 if ( (seg.address() <= vmaddress) && (seg.size() != 0) && ((vmaddress < (seg.address()+seg.size())) || (seg.address() == vmaddress)) ) {
315 return seg.newAddress() - seg.address();
318 throwf("vm address 0x%08llX not found", (uint64_t)vmaddress);
322 template <typename A>
323 typename A::P::uint_t* Rebaser<A>::mappedAddressForVMAddress(pint_t vmaddress)
325 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
326 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
327 const MachOLayoutAbstraction::Segment& seg = *it;
328 if ( (seg.address() <= vmaddress) && (vmaddress < (seg.address()+seg.size())) ) {
329 return (pint_t*)((vmaddress - seg.address()) + (uint8_t*)seg.mappedAddress());
332 throwf("mappedAddressForVMAddress(0x%08llX) not found", (uint64_t)vmaddress);
335 template <typename A>
336 typename A::P::uint_t* Rebaser<A>::mappedAddressForNewAddress(pint_t vmaddress)
338 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
339 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
340 const MachOLayoutAbstraction::Segment& seg = *it;
341 if ( (seg.newAddress() <= vmaddress) && (vmaddress < (seg.newAddress()+seg.size())) ) {
342 return (pint_t*)((vmaddress - seg.newAddress()) + (uint8_t*)seg.mappedAddress());
345 throwf("mappedAddressForNewAddress(0x%08llX) not found", (uint64_t)vmaddress);
348 template <typename A>
349 typename A::P::uint_t Rebaser<A>::getSlideForNewAddress(pint_t newAddress)
351 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
352 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
353 const MachOLayoutAbstraction::Segment& seg = *it;
354 if ( (seg.newAddress() <= newAddress) && (newAddress < (seg.newAddress()+seg.size())) ) {
355 return seg.newAddress() - seg.address();
358 throwf("new address 0x%08llX not found", (uint64_t)newAddress);
361 template <typename A>
362 typename A::P::uint_t* Rebaser<A>::mappedAddressForRelocAddress(pint_t r_address)
364 if ( fOrignalVMRelocBaseAddressValid )
365 return this->mappedAddressForVMAddress(r_address + fOrignalVMRelocBaseAddress);
366 else
367 throw "can't apply relocation. Relocation base not known";
371 template <>
372 void Rebaser<arm>::makeNoPicStub(uint8_t* stub, pint_t logicalAddress)
374 uint32_t* instructions = (uint32_t*)stub;
375 if ( (LittleEndian::get32(instructions[0]) == 0xE59FC004) &&
376 (LittleEndian::get32(instructions[1]) == 0xE08FC00C) &&
377 (LittleEndian::get32(instructions[2]) == 0xE59CF000) ) {
378 uint32_t lazyPtrAddress = instructions[3] + logicalAddress + 12;
379 LittleEndian::set32(instructions[0], 0xE59FC000); // ldr ip, [pc, #0]
380 LittleEndian::set32(instructions[1], 0xE59CF000); // ldr pc, [ip]
381 LittleEndian::set32(instructions[2], lazyPtrAddress); // .long L_foo$lazy_ptr
382 LittleEndian::set32(instructions[3], 0xE1A00000); // nop
384 else
385 fprintf(stderr, "unoptimized stub in %s at 0x%08X\n", fLayout.getFilePath(), logicalAddress);
389 #if 0
390 // disable this optimization do allow cache to slide
391 template <>
392 void Rebaser<arm>::optimzeStubs()
394 // convert pic stubs to no-pic stubs in dyld shared cache
395 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
396 const uint32_t cmd_count = fHeader->ncmds();
397 const macho_load_command<P>* cmd = cmds;
398 for (uint32_t i = 0; i < cmd_count; ++i) {
399 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
400 macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
401 macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)seg + sizeof(macho_segment_command<P>));
402 macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
403 for(macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
404 if ( (sect->flags() & SECTION_TYPE) == S_SYMBOL_STUBS ) {
405 const uint32_t stubSize = sect->reserved2();
406 // ARM PIC stubs are 4 32-bit instructions long
407 if ( stubSize == 16 ) {
408 uint32_t stubCount = sect->size() / 16;
409 pint_t stubLogicalAddress = sect->addr();
410 uint8_t* stubMappedAddress = (uint8_t*)mappedAddressForNewAddress(stubLogicalAddress);
411 for(uint32_t s=0; s < stubCount; ++s) {
412 makeNoPicStub(stubMappedAddress, stubLogicalAddress);
413 stubLogicalAddress += 16;
414 stubMappedAddress += 16;
420 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
423 #endif
425 template <typename A>
426 void Rebaser<A>::optimzeStubs()
428 // other architectures don't need stubs changed in shared cache
431 template <typename A>
432 void Rebaser<A>::adjustSymbolTable()
434 macho_nlist<P>* symbolTable = (macho_nlist<P>*)(&fLinkEditBase[fSymbolTable->symoff()]);
436 // walk all exports and slide their n_value
437 macho_nlist<P>* lastExport = &symbolTable[fDynamicSymbolTable->iextdefsym()+fDynamicSymbolTable->nextdefsym()];
438 for (macho_nlist<P>* entry = &symbolTable[fDynamicSymbolTable->iextdefsym()]; entry < lastExport; ++entry) {
439 if ( (entry->n_type() & N_TYPE) == N_SECT )
440 entry->set_n_value(entry->n_value() + this->getSlideForVMAddress(entry->n_value()));
443 // walk all local symbols and slide their n_value (don't adjust any stabs)
444 macho_nlist<P>* lastLocal = &symbolTable[fDynamicSymbolTable->ilocalsym()+fDynamicSymbolTable->nlocalsym()];
445 for (macho_nlist<P>* entry = &symbolTable[fDynamicSymbolTable->ilocalsym()]; entry < lastLocal; ++entry) {
446 if ( (entry->n_sect() != NO_SECT) && ((entry->n_type() & N_STAB) == 0) )
447 entry->set_n_value(entry->n_value() + this->getSlideForVMAddress(entry->n_value()));
451 template <typename A>
452 void Rebaser<A>::adjustExportInfo()
454 // if no export info, nothing to adjust
455 if ( fDyldInfo->export_size() == 0 )
456 return;
458 // since export info addresses are offsets from mach_header, everything in __TEXT is fine
459 // only __DATA addresses need to be updated
460 const uint8_t* start = fLayout.getDyldInfoExports();
461 const uint8_t* end = &start[fDyldInfo->export_size()];
462 std::vector<mach_o::trie::Entry> originalExports;
463 try {
464 parseTrie(start, end, originalExports);
466 catch (const char* msg) {
467 throwf("%s in %s", msg, fLayout.getFilePath());
470 std::vector<mach_o::trie::Entry> newExports;
471 newExports.reserve(originalExports.size());
472 pint_t baseAddress = this->getBaseAddress();
473 pint_t baseAddressSlide = this->getSlideForVMAddress(baseAddress);
474 for (std::vector<mach_o::trie::Entry>::iterator it=originalExports.begin(); it != originalExports.end(); ++it) {
475 // remove symbols used by the static linker only
476 if ( (strncmp(it->name, "$ld$", 4) == 0)
477 || (strncmp(it->name, ".objc_class_name",16) == 0)
478 || (strncmp(it->name, ".objc_category_name",19) == 0) ) {
479 //fprintf(stderr, "ignoring symbol %s\n", it->name);
480 continue;
482 // adjust symbols in slid segments
483 //uint32_t oldOffset = it->address;
484 it->address += (this->getSlideForVMAddress(it->address + baseAddress) - baseAddressSlide);
485 //fprintf(stderr, "orig=0x%08X, new=0x%08llX, sym=%s\n", oldOffset, it->address, it->name);
486 newExports.push_back(*it);
489 // rebuild export trie
490 std::vector<uint8_t> newExportTrieBytes;
491 newExportTrieBytes.reserve(fDyldInfo->export_size());
492 mach_o::trie::makeTrie(newExports, newExportTrieBytes);
493 // align
494 while ( (newExportTrieBytes.size() % sizeof(pint_t)) != 0 )
495 newExportTrieBytes.push_back(0);
497 // allocate new buffer and set export_off to use new buffer instead
498 uint32_t newExportsSize = newExportTrieBytes.size();
499 uint8_t* sideTrie = new uint8_t[newExportsSize];
500 memcpy(sideTrie, &newExportTrieBytes[0], newExportsSize);
501 fLayout.setDyldInfoExports(sideTrie);
502 ((macho_dyld_info_command<P>*)fDyldInfo)->set_export_off(0); // invalidate old trie
503 ((macho_dyld_info_command<P>*)fDyldInfo)->set_export_size(newExportsSize);
508 template <typename A>
509 void Rebaser<A>::doCodeUpdate(uint8_t kind, uint64_t address, int64_t codeToDataDelta, int64_t codeToImportDelta)
511 // begin hack for <rdar://problem/8253549> split seg info wrong for x86_64 stub helpers
512 if ( (fSkipSplitSegInfoStart <= address) && (address < fSkipSplitSegInfoEnd) ) {
513 uint8_t* p = (uint8_t*)mappedAddressForVMAddress(address);
514 // only ignore split seg info for "push" instructions
515 if ( p[-1] == 0x68 )
516 return;
518 // end hack for <rdar://problem/8253549>
520 //fprintf(stderr, "doCodeUpdate(kind=%d, address=0x%0llX, dataDelta=0x%08llX, importDelta=0x%08llX, path=%s)\n",
521 // kind, address, codeToDataDelta, codeToImportDelta, fLayout.getFilePath());
522 uint32_t* p;
523 uint32_t instruction;
524 uint32_t value;
525 uint64_t value64;
526 switch (kind) {
527 case 1: // 32-bit pointer
528 p = (uint32_t*)mappedAddressForVMAddress(address);
529 value = A::P::E::get32(*p);
530 value += codeToDataDelta;
531 A::P::E::set32(*p, value);
532 break;
533 case 2: // 64-bit pointer
534 p = (uint32_t*)mappedAddressForVMAddress(address);
535 value64 = A::P::E::get64(*(uint64_t*)p);
536 value64 += codeToDataDelta;
537 A::P::E::set64(*(uint64_t*)p, value64);
538 break;
539 case 3: // used only for ppc, an instruction that sets the hi16 of a register
540 // adjust low 16 bits of instruction which contain hi16 of distance to something in DATA
541 if ( (codeToDataDelta & 0xFFFF) != 0 )
542 throwf("codeToDataDelta=0x%0llX is not a multiple of 64K", codeToDataDelta);
543 p = (uint32_t*)mappedAddressForVMAddress(address);
544 instruction = BigEndian::get32(*p);
546 uint16_t originalLo16 = instruction & 0x0000FFFF;
547 uint16_t delta64Ks = codeToDataDelta >> 16;
548 instruction = (instruction & 0xFFFF0000) | ((originalLo16+delta64Ks) & 0x0000FFFF);
550 BigEndian::set32(*p, instruction);
551 break;
552 case 4: // only used for i386, a reference to something in the IMPORT segment
553 p = (uint32_t*)mappedAddressForVMAddress(address);
554 value = A::P::E::get32(*p);
555 value += codeToImportDelta;
556 A::P::E::set32(*p, value);
557 break;
558 default:
559 throwf("invalid kind=%d in split seg info", kind);
563 template <typename A>
564 const uint8_t* Rebaser<A>::doCodeUpdateForEachULEB128Address(const uint8_t* p, uint8_t kind, uint64_t orgBaseAddress, int64_t codeToDataDelta, int64_t codeToImportDelta)
566 uint64_t address = 0;
567 uint64_t delta = 0;
568 uint32_t shift = 0;
569 bool more = true;
570 do {
571 uint8_t byte = *p++;
572 delta |= ((byte & 0x7F) << shift);
573 shift += 7;
574 if ( byte < 0x80 ) {
575 if ( delta != 0 ) {
576 address += delta;
577 doCodeUpdate(kind, address+orgBaseAddress, codeToDataDelta, codeToImportDelta);
578 delta = 0;
579 shift = 0;
581 else {
582 more = false;
585 } while (more);
586 return p;
589 template <typename A>
590 void Rebaser<A>::adjustCode()
592 if ( fSplittingSegments ) {
593 // get uleb128 compressed runs of code addresses to update
594 const uint8_t* infoStart = NULL;
595 const uint8_t* infoEnd = NULL;
596 const macho_segment_command<P>* seg;
597 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
598 const uint32_t cmd_count = fHeader->ncmds();
599 const macho_load_command<P>* cmd = cmds;
600 for (uint32_t i = 0; i < cmd_count; ++i) {
601 switch (cmd->cmd()) {
602 case LC_SEGMENT_SPLIT_INFO:
604 const macho_linkedit_data_command<P>* segInfo = (macho_linkedit_data_command<P>*)cmd;
605 infoStart = &fLinkEditBase[segInfo->dataoff()];
606 infoEnd = &infoStart[segInfo->datasize()];
608 break;
609 // begin hack for <rdar://problem/8253549> split seg info wrong for x86_64 stub helpers
610 case macho_segment_command<P>::CMD:
611 seg = (macho_segment_command<P>*)cmd;
612 if ( (getArchitecture() == CPU_TYPE_X86_64) && (strcmp(seg->segname(), "__TEXT") == 0) ) {
613 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)seg + sizeof(macho_segment_command<P>));
614 const macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
615 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
616 if ( strcmp(sect->sectname(), "__stub_helper") == 0 ) {
617 fSkipSplitSegInfoStart = sect->addr();
618 fSkipSplitSegInfoEnd = sect->addr() + sect->size() - 16;
622 break;
623 // end hack for <rdar://problem/8253549> split seg info wrong for x86_64 stub helpers
625 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
627 // calculate how much we need to slide writable segments
628 const uint64_t orgBaseAddress = this->getBaseAddress();
629 int64_t codeToDataDelta = 0;
630 int64_t codeToImportDelta = 0;
631 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
632 const MachOLayoutAbstraction::Segment& codeSeg = segments[0];
633 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
634 const MachOLayoutAbstraction::Segment& dataSeg = *it;
635 if ( strcmp(dataSeg.name(), "__IMPORT") == 0 )
636 codeToImportDelta = (dataSeg.newAddress() - codeSeg.newAddress()) - (dataSeg.address() - codeSeg.address());
637 else if ( dataSeg.writable() )
638 codeToDataDelta = (dataSeg.newAddress() - codeSeg.newAddress()) - (dataSeg.address() - codeSeg.address());
640 // decompress and call doCodeUpdate() on each address
641 for(const uint8_t* p = infoStart; (*p != 0) && (p < infoEnd);) {
642 uint8_t kind = *p++;
643 p = this->doCodeUpdateForEachULEB128Address(p, kind, orgBaseAddress, codeToDataDelta, codeToImportDelta);
648 template <typename A>
649 void Rebaser<A>::doRebase(int segIndex, uint64_t segOffset, uint8_t type, std::vector<void*>& pointersInData)
651 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
652 if ( segIndex > segments.size() )
653 throw "bad segment index in rebase info";
654 const MachOLayoutAbstraction::Segment& seg = segments[segIndex];
655 uint8_t* mappedAddr = (uint8_t*)seg.mappedAddress() + segOffset;
656 pint_t* mappedAddrP = (pint_t*)mappedAddr;
657 uint32_t* mappedAddr32 = (uint32_t*)mappedAddr;
658 pint_t valueP;
659 pint_t valuePnew;
660 uint32_t value32;
661 int32_t svalue32;
662 int32_t svalue32new;
663 switch ( type ) {
664 case REBASE_TYPE_POINTER:
665 valueP= P::getP(*mappedAddrP);
666 P::setP(*mappedAddrP, valueP + this->getSlideForVMAddress(valueP));
667 break;
669 case REBASE_TYPE_TEXT_ABSOLUTE32:
670 value32 = E::get32(*mappedAddr32);
671 E::set32(*mappedAddr32, value32 + this->getSlideForVMAddress(value32));
672 break;
674 case REBASE_TYPE_TEXT_PCREL32:
675 svalue32 = E::get32(*mappedAddr32);
676 valueP = seg.address() + segOffset + 4 + svalue32;
677 valuePnew = valueP + this->getSlideForVMAddress(valueP);
678 svalue32new = seg.address() + segOffset + 4 - valuePnew;
679 E::set32(*mappedAddr32, svalue32new);
680 break;
682 default:
683 throw "bad rebase type";
685 pointersInData.push_back(mappedAddr);
689 template <typename A>
690 void Rebaser<A>::applyRebaseInfo(std::vector<void*>& pointersInData)
692 const uint8_t* p = &fLinkEditBase[fDyldInfo->rebase_off()];
693 const uint8_t* end = &p[fDyldInfo->rebase_size()];
695 uint8_t type = 0;
696 int segIndex;
697 uint64_t segOffset = 0;
698 uint32_t count;
699 uint32_t skip;
700 bool done = false;
701 while ( !done && (p < end) ) {
702 uint8_t immediate = *p & REBASE_IMMEDIATE_MASK;
703 uint8_t opcode = *p & REBASE_OPCODE_MASK;
704 ++p;
705 switch (opcode) {
706 case REBASE_OPCODE_DONE:
707 done = true;
708 break;
709 case REBASE_OPCODE_SET_TYPE_IMM:
710 type = immediate;
711 break;
712 case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
713 segIndex = immediate;
714 segOffset = read_uleb128(p, end);
715 break;
716 case REBASE_OPCODE_ADD_ADDR_ULEB:
717 segOffset += read_uleb128(p, end);
718 break;
719 case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
720 segOffset += immediate*sizeof(pint_t);
721 break;
722 case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
723 for (int i=0; i < immediate; ++i) {
724 doRebase(segIndex, segOffset, type, pointersInData);
725 segOffset += sizeof(pint_t);
727 break;
728 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
729 count = read_uleb128(p, end);
730 for (uint32_t i=0; i < count; ++i) {
731 doRebase(segIndex, segOffset, type, pointersInData);
732 segOffset += sizeof(pint_t);
734 break;
735 case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
736 doRebase(segIndex, segOffset, type, pointersInData);
737 segOffset += read_uleb128(p, end) + sizeof(pint_t);
738 break;
739 case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
740 count = read_uleb128(p, end);
741 skip = read_uleb128(p, end);
742 for (uint32_t i=0; i < count; ++i) {
743 doRebase(segIndex, segOffset, type, pointersInData);
744 segOffset += skip + sizeof(pint_t);
746 break;
747 default:
748 throwf("bad rebase opcode %d", *p);
753 template <typename A>
754 void Rebaser<A>::adjustDATA()
756 // walk all local relocations and slide every pointer
757 const macho_relocation_info<P>* const relocsStart = (macho_relocation_info<P>*)(&fLinkEditBase[fDynamicSymbolTable->locreloff()]);
758 const macho_relocation_info<P>* const relocsEnd = &relocsStart[fDynamicSymbolTable->nlocrel()];
759 for (const macho_relocation_info<P>* reloc=relocsStart; reloc < relocsEnd; ++reloc) {
760 this->doLocalRelocation(reloc);
763 // walk non-lazy-pointers and slide the ones that are LOCAL
764 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((uint8_t*)fHeader + sizeof(macho_header<P>));
765 const uint32_t cmd_count = fHeader->ncmds();
766 const macho_load_command<P>* cmd = cmds;
767 for (uint32_t i = 0; i < cmd_count; ++i) {
768 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
769 const macho_segment_command<P>* seg = (macho_segment_command<P>*)cmd;
770 const macho_section<P>* const sectionsStart = (macho_section<P>*)((char*)seg + sizeof(macho_segment_command<P>));
771 const macho_section<P>* const sectionsEnd = &sectionsStart[seg->nsects()];
772 const uint32_t* const indirectTable = (uint32_t*)(&fLinkEditBase[fDynamicSymbolTable->indirectsymoff()]);
773 for(const macho_section<P>* sect = sectionsStart; sect < sectionsEnd; ++sect) {
774 if ( (sect->flags() & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS ) {
775 const uint32_t indirectTableOffset = sect->reserved1();
776 uint32_t pointerCount = sect->size() / sizeof(pint_t);
777 pint_t* nonLazyPointerAddr = this->mappedAddressForVMAddress(sect->addr());
778 for (uint32_t j=0; j < pointerCount; ++j, ++nonLazyPointerAddr) {
779 if ( E::get32(indirectTable[indirectTableOffset + j]) == INDIRECT_SYMBOL_LOCAL ) {
780 pint_t value = A::P::getP(*nonLazyPointerAddr);
781 P::setP(*nonLazyPointerAddr, value + this->getSlideForVMAddress(value));
787 cmd = (const macho_load_command<P>*)(((uint8_t*)cmd)+cmd->cmdsize());
792 template <typename A>
793 void Rebaser<A>::adjustRelocBaseAddresses()
795 // split seg file need reloc base to be first writable segment
796 if ( fSplittingSegments && ((fHeader->flags() & MH_SPLIT_SEGS) == 0) ) {
798 // get amount to adjust reloc address
799 int32_t relocAddressAdjust = 0;
800 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
801 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
802 const MachOLayoutAbstraction::Segment& seg = *it;
803 if ( seg.writable() ) {
804 relocAddressAdjust = seg.address() - segments[0].address();
805 break;
809 // walk all local relocations and adjust every address
810 macho_relocation_info<P>* const relocsStart = (macho_relocation_info<P>*)(&fLinkEditBase[fDynamicSymbolTable->locreloff()]);
811 macho_relocation_info<P>* const relocsEnd = &relocsStart[fDynamicSymbolTable->nlocrel()];
812 for (macho_relocation_info<P>* reloc=relocsStart; reloc < relocsEnd; ++reloc) {
813 reloc->set_r_address(reloc->r_address()-relocAddressAdjust);
816 // walk all external relocations and adjust every address
817 macho_relocation_info<P>* const externRelocsStart = (macho_relocation_info<P>*)(&fLinkEditBase[fDynamicSymbolTable->extreloff()]);
818 macho_relocation_info<P>* const externRelocsEnd = &externRelocsStart[fDynamicSymbolTable->nextrel()];
819 for (macho_relocation_info<P>* reloc=externRelocsStart; reloc < externRelocsEnd; ++reloc) {
820 reloc->set_r_address(reloc->r_address()-relocAddressAdjust);
825 template <>
826 void Rebaser<x86_64>::adjustRelocBaseAddresses()
828 // x86_64 already have reloc base of first writable segment
832 template <>
833 void Rebaser<x86_64>::doLocalRelocation(const macho_relocation_info<x86_64::P>* reloc)
835 if ( reloc->r_type() == X86_64_RELOC_UNSIGNED ) {
836 pint_t* addr = this->mappedAddressForRelocAddress(reloc->r_address());
837 pint_t value = P::getP(*addr);
838 P::setP(*addr, value + this->getSlideForVMAddress(value));
840 else {
841 throw "invalid relocation type";
845 template <>
846 void Rebaser<ppc>::doLocalRelocation(const macho_relocation_info<P>* reloc)
848 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
849 if ( reloc->r_type() == GENERIC_RELOC_VANILLA ) {
850 pint_t* addr = this->mappedAddressForRelocAddress(reloc->r_address());
851 pint_t value = P::getP(*addr);
852 P::setP(*addr, value + this->getSlideForVMAddress(value));
855 else {
856 macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
857 if ( sreloc->r_type() == PPC_RELOC_PB_LA_PTR ) {
858 sreloc->set_r_value( sreloc->r_value() + this->getSlideForVMAddress(sreloc->r_value()) );
860 else {
861 throw "cannot rebase final linked image with scattered relocations";
866 template <>
867 void Rebaser<x86>::doLocalRelocation(const macho_relocation_info<P>* reloc)
869 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
870 if ( reloc->r_type() == GENERIC_RELOC_VANILLA ) {
871 pint_t* addr = this->mappedAddressForRelocAddress(reloc->r_address());
872 pint_t value = P::getP(*addr);
873 P::setP(*addr, value + this->getSlideForVMAddress(value));
876 else {
877 macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
878 if ( sreloc->r_type() == GENERIC_RELOC_PB_LA_PTR ) {
879 sreloc->set_r_value( sreloc->r_value() + this->getSlideForVMAddress(sreloc->r_value()) );
881 else {
882 throw "cannot rebase final linked image with scattered relocations";
887 template <typename A>
888 void Rebaser<A>::doLocalRelocation(const macho_relocation_info<P>* reloc)
890 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
891 if ( reloc->r_type() == GENERIC_RELOC_VANILLA ) {
892 pint_t* addr = this->mappedAddressForRelocAddress(reloc->r_address());
893 pint_t value = P::getP(*addr);
894 P::setP(*addr, value + this->getSlideForVMAddress(value));
897 else {
898 throw "cannot rebase final linked image with scattered relocations";
903 template <typename A>
904 void Rebaser<A>::calculateRelocBase()
906 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
907 if ( fHeader->flags() & MH_SPLIT_SEGS ) {
908 // reloc addresses are from the start of the first writable segment
909 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
910 const MachOLayoutAbstraction::Segment& seg = *it;
911 if ( seg.writable() ) {
912 // found first writable segment
913 fOrignalVMRelocBaseAddress = seg.address();
914 fOrignalVMRelocBaseAddressValid = true;
918 else {
919 // reloc addresses are from the start of the mapped file (base address)
920 fOrignalVMRelocBaseAddress = segments[0].address();
921 fOrignalVMRelocBaseAddressValid = true;
926 template <>
927 void Rebaser<x86_64>::calculateRelocBase()
929 // reloc addresses are always based from the start of the first writable segment
930 const std::vector<MachOLayoutAbstraction::Segment>& segments = fLayout.getSegments();
931 for(std::vector<MachOLayoutAbstraction::Segment>::const_iterator it = segments.begin(); it != segments.end(); ++it) {
932 const MachOLayoutAbstraction::Segment& seg = *it;
933 if ( seg.writable() ) {
934 // found first writable segment
935 fOrignalVMRelocBaseAddress = seg.address();
936 fOrignalVMRelocBaseAddressValid = true;
942 #if 0
943 class MultiArchRebaser
945 public:
946 MultiArchRebaser::MultiArchRebaser(const char* path, bool writable=false)
947 : fMappingAddress(0), fFileSize(0)
949 // map in whole file
950 int fd = ::open(path, (writable ? O_RDWR : O_RDONLY), 0);
951 if ( fd == -1 )
952 throwf("can't open file, errno=%d", errno);
953 struct stat stat_buf;
954 if ( fstat(fd, &stat_buf) == -1)
955 throwf("can't stat open file %s, errno=%d", path, errno);
956 if ( stat_buf.st_size < 20 )
957 throwf("file too small %s", path);
958 const int prot = writable ? (PROT_READ | PROT_WRITE) : PROT_READ;
959 const int flags = writable ? (MAP_FILE | MAP_SHARED) : (MAP_FILE | MAP_PRIVATE);
960 uint8_t* p = (uint8_t*)::mmap(NULL, stat_buf.st_size, prot, flags, fd, 0);
961 if ( p == (uint8_t*)(-1) )
962 throwf("can't map file %s, errno=%d", path, errno);
963 ::close(fd);
965 // if fat file, process each architecture
966 const fat_header* fh = (fat_header*)p;
967 const mach_header* mh = (mach_header*)p;
968 if ( fh->magic == OSSwapBigToHostInt32(FAT_MAGIC) ) {
969 // Fat header is always big-endian
970 const struct fat_arch* archs = (struct fat_arch*)(p + sizeof(struct fat_header));
971 for (unsigned long i=0; i < OSSwapBigToHostInt32(fh->nfat_arch); ++i) {
972 uint32_t fileOffset = OSSwapBigToHostInt32(archs[i].offset);
973 try {
974 switch ( OSSwapBigToHostInt32(archs[i].cputype) ) {
975 case CPU_TYPE_POWERPC:
976 fRebasers.push_back(new Rebaser<ppc>(&p[fileOffset]));
977 break;
978 case CPU_TYPE_I386:
979 fRebasers.push_back(new Rebaser<x86>(&p[fileOffset]));
980 break;
981 case CPU_TYPE_X86_64:
982 fRebasers.push_back(new Rebaser<x86_64>(&p[fileOffset]));
983 break;
984 case CPU_TYPE_ARM:
985 fRebasers.push_back(new Rebaser<arm>(&p[fileOffset]));
986 break;
987 default:
988 throw "unknown file format";
991 catch (const char* msg) {
992 fprintf(stderr, "rebase warning: %s for %s\n", msg, path);
996 else {
997 try {
998 if ( (OSSwapBigToHostInt32(mh->magic) == MH_MAGIC) && (OSSwapBigToHostInt32(mh->cputype) == CPU_TYPE_POWERPC)) {
999 fRebasers.push_back(new Rebaser<ppc>(mh));
1001 else if ( (OSSwapLittleToHostInt32(mh->magic) == MH_MAGIC) && (OSSwapLittleToHostInt32(mh->cputype) == CPU_TYPE_I386)) {
1002 fRebasers.push_back(new Rebaser<x86>(mh));
1004 else if ( (OSSwapLittleToHostInt32(mh->magic) == MH_MAGIC_64) && (OSSwapLittleToHostInt32(mh->cputype) == CPU_TYPE_X86_64)) {
1005 fRebasers.push_back(new Rebaser<x86_64>(mh));
1007 else if ( (OSSwapLittleToHostInt32(mh->magic) == MH_MAGIC) && (OSSwapLittleToHostInt32(mh->cputype) == CPU_TYPE_ARM)) {
1008 fRebasers.push_back(new Rebaser<arm>(mh));
1010 else {
1011 throw "unknown file format";
1014 catch (const char* msg) {
1015 fprintf(stderr, "rebase warning: %s for %s\n", msg, path);
1019 fMappingAddress = p;
1020 fFileSize = stat_buf.st_size;
1024 ~MultiArchRebaser() {::munmap(fMappingAddress, fFileSize); }
1026 const std::vector<AbstractRebaser*>& getArchs() const { return fRebasers; }
1027 void commit() { ::msync(fMappingAddress, fFileSize, MS_ASYNC); }
1029 private:
1030 std::vector<AbstractRebaser*> fRebasers;
1031 void* fMappingAddress;
1032 uint64_t fFileSize;
1034 #endif
1037 #endif // __MACHO_REBASER__