128-2
[darwin-xtools.git] / ld64 / src / ld / parsers / macho_relocatable_file.cpp
blobe79d2618c01215181f594fdcc15b39e98bc499d9
1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
3 * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <math.h>
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <sys/param.h>
32 #include <sys/stat.h>
33 #include <sys/mman.h>
35 #include "MachOFileAbstraction.hpp"
37 #include "libunwind/DwarfInstructions.hpp"
38 #include "libunwind/AddressSpace.hpp"
39 #include "libunwind/Registers.hpp"
41 #include <vector>
42 #include <set>
43 #include <map>
44 #include <algorithm>
46 #include "dwarf2.h"
47 #include "debugline.h"
49 #include "Architectures.hpp"
50 #include "ld.hpp"
51 #include "macho_relocatable_file.h"
55 extern void throwf(const char* format, ...) __attribute__ ((noreturn,format(printf, 1, 2)));
56 extern void warning(const char* format, ...) __attribute__((format(printf, 1, 2)));
58 namespace mach_o {
59 namespace relocatable {
62 // forward reference
63 template <typename A> class Parser;
64 template <typename A> class Atom;
65 template <typename A> class Section;
66 template <typename A> class CFISection;
67 template <typename A> class CUSection;
69 template <typename A>
70 class File : public ld::relocatable::File
72 public:
73 File(const char* p, time_t mTime, const uint8_t* content, uint32_t ord) :
74 ld::relocatable::File(p,mTime,ord), _fileContent(content),
75 _sectionsArray(NULL), _atomsArray(NULL),
76 _sectionsArrayCount(0), _atomsArrayCount(0),
77 _debugInfoKind(ld::relocatable::File::kDebugInfoNone),
78 _dwarfTranslationUnitDir(NULL), _dwarfTranslationUnitFile(NULL),
79 _dwarfDebugInfoSect(NULL), _dwarfDebugAbbrevSect(NULL),
80 _dwarfDebugLineSect(NULL), _dwarfDebugStringSect(NULL),
81 _objConstraint(ld::File::objcConstraintNone),
82 _cpuSubType(0),
83 _ojcReplacmentClass(false), _canScatterAtoms(false) {}
84 virtual ~File();
86 // overrides of ld::File
87 virtual bool forEachAtom(ld::File::AtomHandler&) const;
88 virtual bool justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const
89 { return false; }
91 // overrides of ld::relocatable::File
92 virtual bool objcReplacementClasses() const { return _ojcReplacmentClass; }
93 virtual ObjcConstraint objCConstraint() const { return _objConstraint; }
94 virtual uint32_t cpuSubType() const { return _cpuSubType; }
95 virtual DebugInfoKind debugInfo() const { return _debugInfoKind; }
96 virtual const std::vector<ld::relocatable::File::Stab>* stabs() const { return &_stabs; }
97 virtual bool canScatterAtoms() const { return _canScatterAtoms; }
98 bool translationUnitSource(const char** dir, const char** name) const;
100 const uint8_t* fileContent() { return _fileContent; }
101 private:
102 friend class Atom<A>;
103 friend class Section<A>;
104 friend class Parser<A>;
105 friend class CFISection<A>::OAS;
107 typedef typename A::P P;
109 const uint8_t* _fileContent;
110 Section<A>** _sectionsArray;
111 uint8_t* _atomsArray;
112 uint32_t _sectionsArrayCount;
113 uint32_t _atomsArrayCount;
114 std::vector<ld::Fixup> _fixups;
115 std::vector<ld::Atom::UnwindInfo> _unwindInfos;
116 std::vector<ld::Atom::LineInfo> _lineInfos;
117 std::vector<ld::relocatable::File::Stab>_stabs;
118 ld::relocatable::File::DebugInfoKind _debugInfoKind;
119 const char* _dwarfTranslationUnitDir;
120 const char* _dwarfTranslationUnitFile;
121 const macho_section<P>* _dwarfDebugInfoSect;
122 const macho_section<P>* _dwarfDebugAbbrevSect;
123 const macho_section<P>* _dwarfDebugLineSect;
124 const macho_section<P>* _dwarfDebugStringSect;
125 ld::File::ObjcConstraint _objConstraint;
126 uint32_t _cpuSubType;
127 bool _ojcReplacmentClass;
128 bool _canScatterAtoms;
132 template <typename A>
133 class Section : public ld::Section
135 public:
136 typedef typename A::P::uint_t pint_t;
137 typedef typename A::P P;
138 typedef typename A::P::E E;
140 virtual ~Section() { }
141 class File<A>& file() const { return _file; }
142 const macho_section<P>* machoSection() const { return _machOSection; }
143 uint32_t sectionNum(class Parser<A>&) const;
144 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr);
145 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeUnclassified; }
146 virtual bool dontDeadStrip() { return (this->_machOSection->flags() & S_ATTR_NO_DEAD_STRIP); }
147 virtual Atom<A>* findAtomByAddress(pint_t addr) { return this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms); }
148 virtual bool addFollowOnFixups() const { return ! _file.canScatterAtoms(); }
149 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer,
150 struct Parser<A>::LabelAndCFIBreakIterator& it,
151 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
152 virtual uint32_t computeAtomCount(class Parser<A>& parser,
153 struct Parser<A>::LabelAndCFIBreakIterator& it,
154 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
155 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
156 virtual bool addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
157 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const { return 0; }
158 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
159 const ld::IndirectBindingTable& ind) const { return false; }
160 static const char* makeSectionName(const macho_section<typename A::P>* s);
162 protected:
163 Section(File<A>& f, const macho_section<typename A::P>* s)
164 : ld::Section(makeSegmentName(s), makeSectionName(s), sectionType(s)),
165 _file(f), _machOSection(s), _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
166 Section(File<A>& f, const char* segName, const char* sectName, ld::Section::Type t, bool hidden=false)
167 : ld::Section(segName, sectName, t, hidden), _file(f), _machOSection(NULL),
168 _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
171 Atom<A>* findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end);
172 uint32_t x86_64PcRelOffset(uint8_t r_type);
173 static const char* makeSegmentName(const macho_section<typename A::P>* s);
174 static bool readable(const macho_section<typename A::P>* s);
175 static bool writable(const macho_section<typename A::P>* s);
176 static bool exectuable(const macho_section<typename A::P>* s);
177 static ld::Section::Type sectionType(const macho_section<typename A::P>* s);
179 File<A>& _file;
180 const macho_section<P>* _machOSection;
181 class Atom<A>* _beginAtoms;
182 class Atom<A>* _endAtoms;
183 bool _hasAliases;
187 template <typename A>
188 class CFISection : public Section<A>
190 public:
191 CFISection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
192 : Section<A>(f, s) { }
193 uint32_t cfiCount();
195 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeCFI; }
196 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
197 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
198 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
199 virtual bool addFollowOnFixups() const { return false; }
203 /// ObjectFileAddressSpace is used as a template parameter to UnwindCursor for parsing
204 /// dwarf CFI information in an object file.
206 class OAS
208 public:
209 typedef typename A::P::uint_t pint_t;
210 typedef typename A::P P;
211 typedef typename A::P::E E;
212 typedef typename A::P::uint_t sint_t;
214 OAS(CFISection<A>& ehFrameSection, const uint8_t* ehFrameBuffer) :
215 _ehFrameSection(ehFrameSection),
216 _ehFrameContent(ehFrameBuffer),
217 _ehFrameStartAddr(ehFrameSection.machoSection()->addr()),
218 _ehFrameEndAddr(ehFrameSection.machoSection()->addr()+ehFrameSection.machoSection()->size()) {}
220 uint8_t get8(pint_t addr) { return *((uint8_t*)mappedAddress(addr)); }
221 uint16_t get16(pint_t addr) { return E::get16(*((uint16_t*)mappedAddress(addr))); }
222 uint32_t get32(pint_t addr) { return E::get32(*((uint32_t*)mappedAddress(addr))); }
223 uint64_t get64(pint_t addr) { return E::get64(*((uint64_t*)mappedAddress(addr))); }
224 pint_t getP(pint_t addr) { return P::getP(*((pint_t*)mappedAddress(addr))); }
225 uint64_t getULEB128(pint_t& addr, pint_t end);
226 int64_t getSLEB128(pint_t& addr, pint_t end);
227 pint_t getEncodedP(pint_t& addr, pint_t end, uint8_t encoding);
228 private:
229 const void* mappedAddress(pint_t addr);
231 CFISection<A>& _ehFrameSection;
232 const uint8_t* _ehFrameContent;
233 pint_t _ehFrameStartAddr;
234 pint_t _ehFrameEndAddr;
238 typedef typename A::P::uint_t pint_t;
239 typedef libunwind::CFI_Atom_Info<OAS> CFI_Atom_Info;
241 void cfiParse(class Parser<A>& parser, uint8_t* buffer, CFI_Atom_Info cfiArray[], uint32_t cfiCount);
242 bool needsRelocating();
244 static bool bigEndian();
245 private:
246 void addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo);
247 static void warnFunc(void* ref, uint64_t funcAddr, const char* msg);
251 template <typename A>
252 class CUSection : public Section<A>
254 public:
255 CUSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
256 : Section<A>(f, s) { }
258 typedef typename A::P::uint_t pint_t;
259 typedef typename A::P P;
260 typedef typename A::P::E E;
262 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
263 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
264 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
265 virtual bool addFollowOnFixups() const { return false; }
267 struct Info {
268 pint_t functionStartAddress;
269 uint32_t functionSymbolIndex;
270 uint32_t rangeLength;
271 uint32_t compactUnwindInfo;
272 const char* personality;
273 pint_t lsdaAddress;
274 Atom<A>* function;
275 Atom<A>* lsda;
278 uint32_t count();
279 void parse(class Parser<A>& parser, uint32_t cnt, Info array[]);
282 private:
284 const char* personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc);
286 static int infoSorter(const void* l, const void* r);
291 template <typename A>
292 class TentativeDefinitionSection : public Section<A>
294 public:
295 TentativeDefinitionSection(Parser<A>& parser, File<A>& f)
296 : Section<A>(f, "__DATA", "__comm/tent", ld::Section::typeTentativeDefs) {}
298 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeZeroFill; }
299 virtual bool addFollowOnFixups() const { return false; }
300 virtual Atom<A>* findAtomByAddress(typename A::P::uint_t addr) { throw "TentativeDefinitionSection::findAtomByAddress() should never be called"; }
301 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
302 const struct Parser<A>::CFI_CU_InfoArrays&);
303 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer,
304 struct Parser<A>::LabelAndCFIBreakIterator& it,
305 const struct Parser<A>::CFI_CU_InfoArrays&);
306 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
307 private:
308 typedef typename A::P::uint_t pint_t;
309 typedef typename A::P P;
313 template <typename A>
314 class AbsoluteSymbolSection : public Section<A>
316 public:
317 AbsoluteSymbolSection(Parser<A>& parser, File<A>& f)
318 : Section<A>(f, "__DATA", "__abs", ld::Section::typeAbsoluteSymbols, true) {}
320 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeUnclassified; }
321 virtual bool dontDeadStrip() { return false; }
322 virtual ld::Atom::Alignment alignmentForAddress(typename A::P::uint_t addr) { return ld::Atom::Alignment(0); }
323 virtual bool addFollowOnFixups() const { return false; }
324 virtual Atom<A>* findAtomByAddress(typename A::P::uint_t addr) { throw "AbsoluteSymbolSection::findAtomByAddress() should never be called"; }
325 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
326 const struct Parser<A>::CFI_CU_InfoArrays&);
327 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer,
328 struct Parser<A>::LabelAndCFIBreakIterator& it,
329 const struct Parser<A>::CFI_CU_InfoArrays&);
330 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
331 virtual Atom<A>* findAbsAtomForValue(typename A::P::uint_t);
333 private:
334 typedef typename A::P::uint_t pint_t;
335 typedef typename A::P P;
339 template <typename A>
340 class SymboledSection : public Section<A>
342 public:
343 SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s);
344 virtual ld::Atom::ContentType contentType() { return _type; }
345 virtual bool dontDeadStrip();
346 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
347 const struct Parser<A>::CFI_CU_InfoArrays&);
348 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer,
349 struct Parser<A>::LabelAndCFIBreakIterator& it,
350 const struct Parser<A>::CFI_CU_InfoArrays&);
351 protected:
352 typedef typename A::P::uint_t pint_t;
353 typedef typename A::P P;
355 ld::Atom::ContentType _type;
359 template <typename A>
360 class TLVDefsSection : public SymboledSection<A>
362 public:
363 TLVDefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s) :
364 SymboledSection<A>(parser, f, s) { }
366 private:
371 template <typename A>
372 class ImplicitSizeSection : public Section<A>
374 public:
375 ImplicitSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
376 : Section<A>(f, s) { }
377 virtual uint32_t computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
378 virtual uint32_t appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
379 protected:
380 typedef typename A::P::uint_t pint_t;
381 typedef typename A::P P;
383 virtual bool addFollowOnFixups() const { return false; }
384 virtual const char* unlabeledAtomName(Parser<A>& parser, pint_t addr) = 0;
385 virtual ld::Atom::SymbolTableInclusion symbolTableInclusion() { return ld::Atom::symbolTableNotIn; }
386 virtual pint_t elementSizeAtAddress(pint_t addr) = 0;
387 virtual ld::Atom::Scope scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeLinkageUnit; }
388 virtual bool useElementAt(Parser<A>& parser,
389 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr) = 0;
390 virtual ld::Atom::Definition definition() { return ld::Atom::definitionRegular; }
391 virtual ld::Atom::Combine combine(Parser<A>& parser, pint_t addr) = 0;
392 virtual bool ignoreLabel(const char* label) { return (label[0] == 'L'); }
395 template <typename A>
396 class FixedSizeSection : public ImplicitSizeSection<A>
398 public:
399 FixedSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
400 : ImplicitSizeSection<A>(parser, f, s) { }
401 protected:
402 typedef typename A::P::uint_t pint_t;
403 typedef typename A::P P;
404 typedef typename A::P::E E;
406 virtual bool useElementAt(Parser<A>& parser,
407 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
408 { return true; }
412 template <typename A>
413 class Literal4Section : public FixedSizeSection<A>
415 public:
416 Literal4Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
417 : FixedSizeSection<A>(parser, f, s) {}
418 protected:
419 typedef typename A::P::uint_t pint_t;
420 typedef typename A::P P;
422 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(2); }
423 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "4-byte-literal"; }
424 virtual pint_t elementSizeAtAddress(pint_t addr) { return 4; }
425 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
426 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
427 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
428 const ld::IndirectBindingTable& ind) const;
431 template <typename A>
432 class Literal8Section : public FixedSizeSection<A>
434 public:
435 Literal8Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
436 : FixedSizeSection<A>(parser, f, s) {}
437 protected:
438 typedef typename A::P::uint_t pint_t;
439 typedef typename A::P P;
441 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(3); }
442 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "8-byte-literal"; }
443 virtual pint_t elementSizeAtAddress(pint_t addr) { return 8; }
444 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
445 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
446 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
447 const ld::IndirectBindingTable& ind) const;
450 template <typename A>
451 class Literal16Section : public FixedSizeSection<A>
453 public:
454 Literal16Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
455 : FixedSizeSection<A>(parser, f, s) {}
456 protected:
457 typedef typename A::P::uint_t pint_t;
458 typedef typename A::P P;
460 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(4); }
461 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "16-byte-literal"; }
462 virtual pint_t elementSizeAtAddress(pint_t addr) { return 16; }
463 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
464 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
465 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
466 const ld::IndirectBindingTable& ind) const;
470 template <typename A>
471 class NonLazyPointerSection : public FixedSizeSection<A>
473 public:
474 NonLazyPointerSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
475 : FixedSizeSection<A>(parser, f, s) {}
476 protected:
477 typedef typename A::P::uint_t pint_t;
478 typedef typename A::P P;
480 virtual void makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
481 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeNonLazyPointer; }
482 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
483 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "non_lazy_ptr"; }
484 virtual pint_t elementSizeAtAddress(pint_t addr) { return sizeof(pint_t); }
485 virtual ld::Atom::Scope scopeAtAddress(Parser<A>& parser, pint_t addr);
486 virtual ld::Atom::Combine combine(Parser<A>&, pint_t);
487 virtual bool ignoreLabel(const char* label) { return true; }
488 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
489 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
490 const ld::IndirectBindingTable& ind) const;
492 private:
493 static const char* targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind);
494 static ld::Fixup::Kind fixupKind();
498 template <typename A>
499 class CFStringSection : public FixedSizeSection<A>
501 public:
502 CFStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
503 : FixedSizeSection<A>(parser, f, s) {}
504 protected:
505 typedef typename A::P::uint_t pint_t;
507 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
508 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "CFString"; }
509 virtual pint_t elementSizeAtAddress(pint_t addr) { return 4*sizeof(pint_t); }
510 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndReferences; }
511 virtual bool ignoreLabel(const char* label) { return true; }
512 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
513 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
514 const ld::IndirectBindingTable& ind) const;
515 private:
516 enum ContentType { contentUTF8, contentUTF16, contentUnknown };
517 static const uint8_t* targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
518 ContentType* ct, unsigned int* count);
522 template <typename A>
523 class ObjC1ClassSection : public FixedSizeSection<A>
525 public:
526 ObjC1ClassSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
527 : FixedSizeSection<A>(parser, f, s) {}
528 protected:
529 typedef typename A::P::uint_t pint_t;
530 typedef typename A::P P;
531 typedef typename A::P::E E;
533 virtual ld::Atom::Scope scopeAtAddress(Parser<A>& , pint_t ) { return ld::Atom::scopeGlobal; }
534 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(2); }
535 virtual const char* unlabeledAtomName(Parser<A>&, pint_t);
536 virtual ld::Atom::SymbolTableInclusion symbolTableInclusion() { return ld::Atom::symbolTableIn; }
537 virtual pint_t elementSizeAtAddress(pint_t addr);
538 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineNever; }
539 virtual bool ignoreLabel(const char* label) { return true; }
540 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
541 { return 0; }
542 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
543 const ld::IndirectBindingTable& ind) const { return false; }
544 virtual bool addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
548 template <typename A>
549 class ObjC2ClassRefsSection : public FixedSizeSection<A>
551 public:
552 ObjC2ClassRefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
553 : FixedSizeSection<A>(parser, f, s) {}
554 protected:
555 typedef typename A::P::uint_t pint_t;
557 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
558 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "objc-class-ref"; }
559 virtual pint_t elementSizeAtAddress(pint_t addr) { return sizeof(pint_t); }
560 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndReferences; }
561 virtual bool ignoreLabel(const char* label) { return true; }
562 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
563 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
564 const ld::IndirectBindingTable& ind) const;
565 private:
566 const char* targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
570 template <typename A>
571 class ObjC2CategoryListSection : public FixedSizeSection<A>
573 public:
574 ObjC2CategoryListSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
575 : FixedSizeSection<A>(parser, f, s) {}
576 protected:
577 typedef typename A::P::uint_t pint_t;
579 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
580 virtual ld::Atom::Scope scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeTranslationUnit; }
581 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "objc-cat-list"; }
582 virtual pint_t elementSizeAtAddress(pint_t addr) { return sizeof(pint_t); }
583 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineNever; }
584 virtual bool ignoreLabel(const char* label) { return true; }
585 private:
586 const char* targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
590 template <typename A>
591 class PointerToCStringSection : public FixedSizeSection<A>
593 public:
594 PointerToCStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
595 : FixedSizeSection<A>(parser, f, s) {}
596 protected:
597 typedef typename A::P::uint_t pint_t;
599 virtual ld::Atom::Alignment alignmentForAddress(pint_t addr) { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
600 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "pointer-to-literal-cstring"; }
601 virtual pint_t elementSizeAtAddress(pint_t addr) { return sizeof(pint_t); }
602 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndReferences; }
603 virtual bool ignoreLabel(const char* label) { return true; }
604 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
605 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
606 const ld::IndirectBindingTable& ind) const;
607 virtual const char* targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
611 template <typename A>
612 class Objc1ClassReferences : public PointerToCStringSection<A>
614 public:
615 Objc1ClassReferences(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
616 : PointerToCStringSection<A>(parser, f, s) {}
618 typedef typename A::P::uint_t pint_t;
619 typedef typename A::P P;
621 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "pointer-to-literal-objc-class-name"; }
622 virtual bool addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
623 virtual const char* targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
627 template <typename A>
628 class CStringSection : public ImplicitSizeSection<A>
630 public:
631 CStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
632 : ImplicitSizeSection<A>(parser, f, s) {}
633 protected:
634 typedef typename A::P::uint_t pint_t;
635 typedef typename A::P P;
637 virtual ld::Atom::ContentType contentType() { return ld::Atom::typeCString; }
638 virtual Atom<A>* findAtomByAddress(pint_t addr);
639 virtual const char* unlabeledAtomName(Parser<A>&, pint_t) { return "cstring"; }
640 virtual pint_t elementSizeAtAddress(pint_t addr);
641 virtual bool ignoreLabel(const char* label);
642 virtual bool useElementAt(Parser<A>& parser,
643 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr);
644 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
645 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
646 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
647 const ld::IndirectBindingTable& ind) const;
652 template <typename A>
653 class UTF16StringSection : public SymboledSection<A>
655 public:
656 UTF16StringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
657 : SymboledSection<A>(parser, f, s) {}
658 protected:
659 typedef typename A::P::uint_t pint_t;
660 typedef typename A::P P;
662 virtual ld::Atom::Combine combine(Parser<A>&, pint_t) { return ld::Atom::combineByNameAndContent; }
663 virtual unsigned long contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
664 virtual bool canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
665 const ld::IndirectBindingTable& ind) const;
670 // Atoms in mach-o files
672 template <typename A>
673 class Atom : public ld::Atom
675 public:
676 // overrides of ld::Atom
677 virtual ld::File* file() const { return &sect().file(); }
678 virtual bool translationUnitSource(const char** dir, const char** nm) const
679 { return sect().file().translationUnitSource(dir, nm); }
680 virtual const char* name() const { return _name; }
681 virtual uint64_t size() const { return _size; }
682 virtual uint64_t objectAddress() const { return _objAddress; }
683 virtual void copyRawContent(uint8_t buffer[]) const;
684 virtual const uint8_t* rawContentPointer() const { return contentPointer(); }
685 virtual unsigned long contentHash(const ld::IndirectBindingTable& ind) const
686 { if ( _hash == 0 ) _hash = sect().contentHash(this, ind); return _hash; }
687 virtual bool canCoalesceWith(const ld::Atom& rhs, const ld::IndirectBindingTable& ind) const
688 { return sect().canCoalesceWith(this, rhs, ind); }
689 virtual ld::Fixup::iterator fixupsBegin() const { return &machofile()._fixups[_fixupsStartIndex]; }
690 virtual ld::Fixup::iterator fixupsEnd() const { return &machofile()._fixups[_fixupsStartIndex+_fixupsCount]; }
691 virtual ld::Atom::UnwindInfo::iterator beginUnwind() const { return &machofile()._unwindInfos[_unwindInfoStartIndex]; }
692 virtual ld::Atom::UnwindInfo::iterator endUnwind() const { return &machofile()._unwindInfos[_unwindInfoStartIndex+_unwindInfoCount]; }
693 virtual ld::Atom::LineInfo::iterator beginLineInfo() const{ return &machofile()._lineInfos[_lineInfoStartIndex]; }
694 virtual ld::Atom::LineInfo::iterator endLineInfo() const { return &machofile()._lineInfos[_lineInfoStartIndex+_lineInfoCount]; }
696 private:
698 enum { kFixupStartIndexBits = 32,
699 kLineInfoStartIndexBits = 32,
700 kUnwindInfoStartIndexBits = 24,
701 kFixupCountBits = 24,
702 kLineInfoCountBits = 12,
703 kUnwindInfoCountBits = 4
704 }; // must sum to 128
706 public:
707 // methods for all atoms from mach-o object file
708 Section<A>& sect() const { return (Section<A>&)section(); }
709 File<A>& machofile() const { return ((Section<A>*)(this->_section))->file(); }
710 void setFixupsRange(uint32_t s, uint32_t c);
711 void setUnwindInfoRange(uint32_t s, uint32_t c);
712 void extendUnwindInfoRange();
713 void setLineInfoRange(uint32_t s, uint32_t c);
714 bool roomForMoreLineInfoCount() { return (_lineInfoCount < ((1<<kLineInfoCountBits)-1)); }
715 void incrementLineInfoCount() { assert(roomForMoreLineInfoCount()); ++_lineInfoCount; }
716 void incrementFixupCount() { if (_fixupsCount == ((1 << kFixupCountBits)-1))
717 throwf("too may fixups in %s", name()); ++_fixupsCount; }
718 const uint8_t* contentPointer() const;
719 uint32_t fixupCount() const { return _fixupsCount; }
720 void verifyAlignment() const;
722 typedef typename A::P P;
723 typedef typename A::P::E E;
724 typedef typename A::P::uint_t pint_t;
725 // constuct via all attributes
726 Atom(Section<A>& sct, const char* nm, pint_t addr, uint64_t sz,
727 ld::Atom::Definition d, ld::Atom::Combine c, ld::Atom::Scope s,
728 ld::Atom::ContentType ct, ld::Atom::SymbolTableInclusion i,
729 bool dds, bool thumb, bool al, ld::Atom::Alignment a)
730 : ld::Atom((ld::Section&)sct, d, c, s, ct, i, dds, thumb, al, a),
731 _size(sz), _objAddress(addr), _name(nm), _hash(0),
732 _fixupsStartIndex(0), _lineInfoStartIndex(0),
733 _unwindInfoStartIndex(0), _fixupsCount(0),
734 _lineInfoCount(0), _unwindInfoCount(0) { }
735 // construct via symbol table entry
736 Atom(Section<A>& sct, Parser<A>& parser, const macho_nlist<P>& sym,
737 uint64_t sz, bool alias=false)
738 : ld::Atom((ld::Section&)sct, parser.definitionFromSymbol(sym),
739 parser.combineFromSymbol(sym), parser.scopeFromSymbol(sym),
740 parser.resolverFromSymbol(sym) ? ld::Atom::typeResolver : sct.contentType(),
741 parser.inclusionFromSymbol(sym),
742 parser.dontDeadStripFromSymbol(sym) || sct.dontDeadStrip(),
743 parser.isThumbFromSymbol(sym), alias,
744 sct.alignmentForAddress(sym.n_value())),
745 _size(sz), _objAddress(sym.n_value()),
746 _name(parser.nameFromSymbol(sym)), _hash(0),
747 _fixupsStartIndex(0), _lineInfoStartIndex(0),
748 _unwindInfoStartIndex(0), _fixupsCount(0),
749 _lineInfoCount(0), _unwindInfoCount(0) {
750 // <rdar://problem/6783167> support auto-hidden weak symbols
751 if ( _scope == ld::Atom::scopeGlobal &&
752 (sym.n_desc() & (N_WEAK_DEF|N_WEAK_REF)) == (N_WEAK_DEF|N_WEAK_REF) )
753 this->setAutoHide();
754 this->verifyAlignment();
757 private:
758 friend class Parser<A>;
759 friend class Section<A>;
760 friend class CStringSection<A>;
761 friend class AbsoluteSymbolSection<A>;
763 pint_t _size;
764 pint_t _objAddress;
765 const char* _name;
766 mutable unsigned long _hash;
768 uint64_t _fixupsStartIndex : kFixupStartIndexBits,
769 _lineInfoStartIndex : kLineInfoStartIndexBits,
770 _unwindInfoStartIndex : kUnwindInfoStartIndexBits,
771 _fixupsCount : kFixupCountBits,
772 _lineInfoCount : kLineInfoCountBits,
773 _unwindInfoCount : kUnwindInfoCountBits;
779 template <typename A>
780 void Atom<A>::setFixupsRange(uint32_t startIndex, uint32_t count)
782 if ( count >= (1 << kFixupCountBits) )
783 throwf("too many fixups in function %s", this->name());
784 if ( startIndex >= (1 << kFixupStartIndexBits) )
785 throwf("too many fixups in file");
786 assert(((startIndex+count) <= sect().file()._fixups.size()) && "fixup index out of range");
787 _fixupsStartIndex = startIndex;
788 _fixupsCount = count;
791 template <typename A>
792 void Atom<A>::setUnwindInfoRange(uint32_t startIndex, uint32_t count)
794 if ( count >= (1 << kUnwindInfoCountBits) )
795 throwf("too many compact unwind infos in function %s", this->name());
796 if ( startIndex >= (1 << kUnwindInfoStartIndexBits) )
797 throwf("too many compact unwind infos (%d) in file", startIndex);
798 assert((startIndex+count) <= sect().file()._unwindInfos.size() && "unwindinfo index out of range");
799 _unwindInfoStartIndex = startIndex;
800 _unwindInfoCount = count;
803 template <typename A>
804 void Atom<A>::extendUnwindInfoRange()
806 if ( _unwindInfoCount+1 >= (1 << kUnwindInfoCountBits) )
807 throwf("too many compact unwind infos in function %s", this->name());
808 _unwindInfoCount += 1;
811 template <typename A>
812 void Atom<A>::setLineInfoRange(uint32_t startIndex, uint32_t count)
814 assert((count < (1 << kLineInfoCountBits)) && "too many line infos");
815 assert((startIndex+count) < sect().file()._lineInfos.size() && "line info index out of range");
816 _lineInfoStartIndex = startIndex;
817 _lineInfoCount = count;
820 template <typename A>
821 const uint8_t* Atom<A>::contentPointer() const
823 const macho_section<P>* sct = this->sect().machoSection();
824 uint32_t fileOffset = sct->offset() - sct->addr() + this->_objAddress;
825 return this->sect().file().fileContent()+fileOffset;
829 template <typename A>
830 void Atom<A>::copyRawContent(uint8_t buffer[]) const
832 // copy base bytes
833 if ( this->contentType() == ld::Atom::typeZeroFill ) {
834 bzero(buffer, _size);
836 else if ( _size != 0 ) {
837 memcpy(buffer, this->contentPointer(), _size);
841 template <>
842 void Atom<arm>::verifyAlignment() const
844 if ( (this->section().type() == ld::Section::typeCode) && ! isThumb() ) {
845 if ( ((_objAddress % 4) != 0) || (this->alignment().powerOf2 < 2) )
846 warning("ARM function not 4-byte aligned: %s from %s", this->name(), this->file()->path());
850 template <typename A>
851 void Atom<A>::verifyAlignment() const
856 template <typename A>
857 class Parser
859 public:
860 static bool validFile(const uint8_t* fileContent, bool subtypeMustMatch=false,
861 cpu_subtype_t subtype=0);
862 static const char* fileKind(const uint8_t* fileContent);
863 static bool hasObjC2Categories(const uint8_t* fileContent);
864 static ld::relocatable::File* parse(const uint8_t* fileContent, uint64_t fileLength,
865 const char* path, time_t modTime, uint32_t ordinal,
866 const ParserOptions& opts) {
867 Parser p(fileContent, fileLength, path, modTime,
868 ordinal, opts.convertUnwindInfo);
869 return p.parse(opts);
872 typedef typename A::P P;
873 typedef typename A::P::E E;
874 typedef typename A::P::uint_t pint_t;
876 struct SourceLocation {
877 SourceLocation() {}
878 SourceLocation(Atom<A>* a, uint32_t o) : atom(a), offsetInAtom(o) {}
879 Atom<A>* atom;
880 uint32_t offsetInAtom;
883 struct TargetDesc {
884 Atom<A>* atom;
885 const char* name; // only used if targetAtom is NULL
886 int64_t addend;
887 bool weakImport; // only used if targetAtom is NULL
890 struct FixupInAtom {
891 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) :
892 fixup(src.offsetInAtom, c, k, target), atom(src.atom) { src.atom->incrementFixupCount(); }
894 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) :
895 fixup(src.offsetInAtom, c, k, b, target), atom(src.atom) { src.atom->incrementFixupCount(); }
897 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) :
898 fixup(src.offsetInAtom, c, k, wi, name), atom(src.atom) { src.atom->incrementFixupCount(); }
900 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) :
901 fixup(src.offsetInAtom, c, k, b, name), atom(src.atom) { src.atom->incrementFixupCount(); }
903 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) :
904 fixup(src.offsetInAtom, c, k, addend), atom(src.atom) { src.atom->incrementFixupCount(); }
906 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) :
907 fixup(src.offsetInAtom, c, k, (uint64_t)0), atom(src.atom) { src.atom->incrementFixupCount(); }
909 ld::Fixup fixup;
910 Atom<A>* atom;
913 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) {
914 _allFixups.push_back(FixupInAtom(src, c, k, target));
917 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) {
918 _allFixups.push_back(FixupInAtom(src, c, k, b, target));
921 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) {
922 _allFixups.push_back(FixupInAtom(src, c, k, wi, name));
925 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) {
926 _allFixups.push_back(FixupInAtom(src, c, k, b, name));
929 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) {
930 _allFixups.push_back(FixupInAtom(src, c, k, addend));
933 void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) {
934 _allFixups.push_back(FixupInAtom(src, c, k));
938 uint32_t symbolCount() { return _symbolCount; }
939 uint32_t indirectSymbol(uint32_t indirectIndex);
940 const macho_nlist<P>& symbolFromIndex(uint32_t index);
941 const char* nameFromSymbol(const macho_nlist<P>& sym);
942 ld::Atom::Scope scopeFromSymbol(const macho_nlist<P>& sym);
943 static ld::Atom::Definition definitionFromSymbol(const macho_nlist<P>& sym);
944 static ld::Atom::Combine combineFromSymbol(const macho_nlist<P>& sym);
945 ld::Atom::SymbolTableInclusion inclusionFromSymbol(const macho_nlist<P>& sym);
946 static bool dontDeadStripFromSymbol(const macho_nlist<P>& sym);
947 static bool isThumbFromSymbol(const macho_nlist<P>& sym);
948 static bool weakImportFromSymbol(const macho_nlist<P>& sym);
949 static bool resolverFromSymbol(const macho_nlist<P>& sym);
950 uint32_t symbolIndexFromIndirectSectionAddress(pint_t,const macho_section<P>*);
951 const macho_section<P>* firstMachOSection() { return _sectionsStart; }
952 const macho_section<P>* machOSectionFromSectionIndex(uint32_t index);
953 uint32_t machOSectionCount() { return _machOSectionsCount; }
954 uint32_t undefinedStartIndex() { return _undefinedStartIndex; }
955 uint32_t undefinedEndIndex() { return _undefinedEndIndex; }
956 void addFixup(FixupInAtom f) { _allFixups.push_back(f); }
957 Section<A>* sectionForNum(unsigned int sectNum);
958 Section<A>* sectionForAddress(pint_t addr);
959 Atom<A>* findAtomByAddress(pint_t addr);
960 Atom<A>* findAtomByAddressOrNullIfStub(pint_t addr);
961 Atom<A>* findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom);
962 Atom<A>* findAtomByName(const char* name); // slow!
963 void findTargetFromAddress(pint_t addr, TargetDesc& target);
964 void findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target);
965 void findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum,
966 TargetDesc& target);
967 uint32_t tentativeDefinitionCount() { return _tentativeDefinitionCount; }
968 uint32_t absoluteSymbolCount() { return _absoluteSymbolCount; }
970 bool hasStubsSection() { return (_stubsSectionNum != 0); }
971 unsigned int stubsSectionNum() { return _stubsSectionNum; }
972 void addDtraceExtraInfos(const SourceLocation& src, const char* provider);
973 const char* scanSymbolTableForAddress(uint64_t addr);
974 bool convertUnwindInfo() { return _convertUnwindInfo; }
977 void addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target);
978 void addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase);
982 struct LabelAndCFIBreakIterator {
983 typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
984 LabelAndCFIBreakIterator(const uint32_t* ssa, uint32_t ssc, const pint_t* cfisa,
985 uint32_t cfisc, bool ols)
986 : sortedSymbolIndexes(ssa), sortedSymbolCount(ssc), cfiStartsArray(cfisa),
987 cfiStartsCount(cfisc), fileHasOverlappingSymbols(ols),
988 newSection(false), cfiIndex(0), symIndex(0) {}
989 bool next(Parser<A>& parser, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
990 pint_t* addr, pint_t* size, const macho_nlist<P>** sym);
991 pint_t peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr);
992 void beginSection() { newSection = true; symIndex = 0; }
994 const uint32_t* const sortedSymbolIndexes;
995 const uint32_t sortedSymbolCount;
996 const pint_t* cfiStartsArray;
997 const uint32_t cfiStartsCount;
998 const bool fileHasOverlappingSymbols;
999 bool newSection;
1000 uint32_t cfiIndex;
1001 uint32_t symIndex;
1004 struct CFI_CU_InfoArrays {
1005 typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
1006 typedef typename CUSection<A>::Info CU_Info;
1007 CFI_CU_InfoArrays(const CFI_Atom_Info* cfiAr, uint32_t cfiC, CU_Info* cuAr, uint32_t cuC)
1008 : cfiArray(cfiAr), cuArray(cuAr), cfiCount(cfiC), cuCount(cuC) {}
1009 const CFI_Atom_Info* const cfiArray;
1010 CU_Info* const cuArray;
1011 const uint32_t cfiCount;
1012 const uint32_t cuCount;
1017 private:
1018 friend class Section<A>;
1020 enum SectionType { sectionTypeIgnore, sectionTypeLiteral4, sectionTypeLiteral8, sectionTypeLiteral16,
1021 sectionTypeNonLazy, sectionTypeCFI, sectionTypeCString, sectionTypeCStringPointer,
1022 sectionTypeUTF16Strings, sectionTypeCFString, sectionTypeObjC2ClassRefs, typeObjC2CategoryList,
1023 sectionTypeObjC1Classes, sectionTypeSymboled, sectionTypeObjC1ClassRefs,
1024 sectionTypeTentativeDefinitions, sectionTypeAbsoluteSymbols, sectionTypeTLVDefs,
1025 sectionTypeCompactUnwind };
1027 template <typename P>
1028 struct MachOSectionAndSectionClass
1030 const macho_section<P>* sect;
1031 SectionType type;
1033 static int sorter(const void* l, const void* r) {
1034 const MachOSectionAndSectionClass<P>* left = (MachOSectionAndSectionClass<P>*)l;
1035 const MachOSectionAndSectionClass<P>* right = (MachOSectionAndSectionClass<P>*)r;
1036 int64_t diff = left->sect->addr() - right->sect->addr();
1037 if ( diff == 0 )
1038 return 0;
1039 if ( diff < 0 )
1040 return -1;
1041 else
1042 return 1;
1046 struct ParserAndSectionsArray { Parser* parser; const uint32_t* sortedSectionsArray; };
1049 Parser(const uint8_t* fileContent, uint64_t fileLength,
1050 const char* path, time_t modTime,
1051 uint32_t ordinal, bool convertUnwindInfo);
1052 ld::relocatable::File* parse(const ParserOptions& opts);
1053 uint8_t loadCommandSizeMask();
1054 bool parseLoadCommands();
1055 void makeSections();
1056 void prescanSymbolTable();
1057 void makeSortedSymbolsArray(uint32_t symArray[], const uint32_t sectionArray[]);
1058 void makeSortedSectionsArray(uint32_t array[]);
1059 static int pointerSorter(const void* l, const void* r);
1060 static int symbolIndexSorter(void* extra, const void* l, const void* r);
1061 static int sectionIndexSorter(void* extra, const void* l, const void* r);
1063 void parseDebugInfo();
1064 void parseStabs();
1065 static bool isConstFunStabs(const char *stabStr);
1066 bool read_comp_unit(const char ** name, const char ** comp_dir,
1067 uint64_t *stmt_list);
1068 const char* getDwarfString(uint64_t form, const uint8_t* p);
1069 bool skip_form(const uint8_t ** offset, const uint8_t * end,
1070 uint64_t form, uint8_t addr_size, bool dwarf64);
1073 // filled in by constructor
1074 const uint8_t* _fileContent;
1075 uint32_t _fileLength;
1076 const char* _path;
1077 time_t _modTime;
1078 uint32_t _ordinal;
1080 // filled in by parseLoadCommands()
1081 File<A>* _file;
1082 const macho_nlist<P>* _symbols;
1083 uint32_t _symbolCount;
1084 const char* _strings;
1085 uint32_t _stringsSize;
1086 const uint32_t* _indirectTable;
1087 uint32_t _indirectTableCount;
1088 uint32_t _undefinedStartIndex;
1089 uint32_t _undefinedEndIndex;
1090 const macho_section<P>* _sectionsStart;
1091 uint32_t _machOSectionsCount;
1092 bool _hasUUID;
1094 // filled in by parse()
1095 CFISection<A>* _EHFrameSection;
1096 CUSection<A>* _compactUnwindSection;
1097 AbsoluteSymbolSection<A>* _absoluteSection;
1098 uint32_t _tentativeDefinitionCount;
1099 uint32_t _absoluteSymbolCount;
1100 uint32_t _symbolsInSections;
1101 bool _hasLongBranchStubs;
1102 bool _AppleObjc; // FSF has objc that uses different data layout
1103 bool _overlappingSymbols;
1104 bool _convertUnwindInfo;
1105 unsigned int _stubsSectionNum;
1106 const macho_section<P>* _stubsMachOSection;
1107 std::vector<const char*> _dtraceProviderInfo;
1108 std::vector<FixupInAtom> _allFixups;
1113 template <typename A>
1114 Parser<A>::Parser(const uint8_t* fileContent, uint64_t fileLength, const char* path, time_t modTime,
1115 uint32_t ordinal, bool convertDUI)
1116 : _fileContent(fileContent), _fileLength(fileLength), _path(path), _modTime(modTime),
1117 _ordinal(ordinal), _file(NULL),
1118 _symbols(NULL), _symbolCount(0), _strings(NULL), _stringsSize(0),
1119 _indirectTable(NULL), _indirectTableCount(0),
1120 _undefinedStartIndex(0), _undefinedEndIndex(0),
1121 _sectionsStart(NULL), _machOSectionsCount(0), _hasUUID(false),
1122 _EHFrameSection(NULL), _compactUnwindSection(NULL), _absoluteSection(NULL),
1123 _tentativeDefinitionCount(0), _absoluteSymbolCount(0),
1124 _symbolsInSections(0), _hasLongBranchStubs(false), _AppleObjc(false),
1125 _overlappingSymbols(false), _convertUnwindInfo(convertDUI),
1126 _stubsSectionNum(0), _stubsMachOSection(NULL)
1131 template <>
1132 bool Parser<x86>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1134 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1135 if ( header->magic() != MH_MAGIC )
1136 return false;
1137 if ( header->cputype() != CPU_TYPE_I386 )
1138 return false;
1139 if ( header->filetype() != MH_OBJECT )
1140 return false;
1141 return true;
1144 template <>
1145 bool Parser<x86_64>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1147 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1148 if ( header->magic() != MH_MAGIC_64 )
1149 return false;
1150 if ( header->cputype() != CPU_TYPE_X86_64 )
1151 return false;
1152 if ( header->filetype() != MH_OBJECT )
1153 return false;
1154 return true;
1157 template <>
1158 bool Parser<arm>::validFile(const uint8_t* fileContent, bool subtypeMustMatch, cpu_subtype_t subtype)
1160 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1161 if ( header->magic() != MH_MAGIC )
1162 return false;
1163 if ( header->cputype() != CPU_TYPE_ARM )
1164 return false;
1165 if ( header->filetype() != MH_OBJECT )
1166 return false;
1167 if ( subtypeMustMatch ) {
1168 if ( (cpu_subtype_t)header->cpusubtype() == subtype )
1169 return true;
1170 // hack until libcc_kext.a is made fat
1171 if ( header->cpusubtype() == CPU_SUBTYPE_ARM_ALL )
1172 return true;
1173 return false;
1175 return true;
1180 template <>
1181 const char* Parser<x86>::fileKind(const uint8_t* fileContent)
1183 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1184 if ( header->magic() != MH_MAGIC )
1185 return NULL;
1186 if ( header->cputype() != CPU_TYPE_I386 )
1187 return NULL;
1188 return "i386";
1191 template <>
1192 const char* Parser<x86_64>::fileKind(const uint8_t* fileContent)
1194 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1195 if ( header->magic() != MH_MAGIC )
1196 return NULL;
1197 if ( header->cputype() != CPU_TYPE_X86_64 )
1198 return NULL;
1199 return "x86_64";
1202 template <>
1203 const char* Parser<arm>::fileKind(const uint8_t* fileContent)
1205 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1206 if ( header->magic() != MH_MAGIC )
1207 return NULL;
1208 if ( header->cputype() != CPU_TYPE_ARM )
1209 return NULL;
1210 for (const ARMSubType* t=ARMSubTypes; t->subTypeName != NULL; ++t) {
1211 if ( t->subType == (cpu_subtype_t)header->cpusubtype() ) {
1212 return t->subTypeName;
1215 return "arm???";
1219 template <typename A>
1220 bool Parser<A>::hasObjC2Categories(const uint8_t* fileContent)
1222 const macho_header<P>* header = (const macho_header<P>*)fileContent;
1223 const uint32_t cmd_count = header->ncmds();
1224 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1225 const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1226 const macho_load_command<P>* cmd = cmds;
1227 for (uint32_t i = 0; i < cmd_count; ++i) {
1228 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1229 const macho_segment_command<P>* segment = (macho_segment_command<P>*)cmd;
1230 const macho_section<P>* sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1231 for (uint32_t si=0; si < segment->nsects(); ++si) {
1232 const macho_section<P>* sect = &sectionsStart[si];
1233 if ( (sect->size() > 0)
1234 && (strcmp(sect->sectname(), "__objc_catlist") == 0)
1235 && (strcmp(sect->segname(), "__DATA") == 0) ) {
1236 return true;
1240 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1241 if ( cmd > cmdsEnd )
1242 throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1244 return false;
1247 template <typename A>
1248 int Parser<A>::pointerSorter(const void* l, const void* r)
1250 // sort references by address
1251 const pint_t* left = (pint_t*)l;
1252 const pint_t* right = (pint_t*)r;
1253 return (*left - *right);
1256 template <typename A>
1257 typename A::P::uint_t Parser<A>::LabelAndCFIBreakIterator::peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr)
1259 pint_t symbolAddr;
1260 if ( symIndex < sortedSymbolCount )
1261 symbolAddr = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]).n_value();
1262 else
1263 symbolAddr = endAddr;
1264 pint_t cfiAddr;
1265 if ( cfiIndex < cfiStartsCount )
1266 cfiAddr = cfiStartsArray[cfiIndex];
1267 else
1268 cfiAddr = endAddr;
1269 if ( (cfiAddr < symbolAddr) && (cfiAddr >= startAddr) ) {
1270 if ( cfiAddr < endAddr )
1271 return cfiAddr;
1272 else
1273 return endAddr;
1275 else {
1276 if ( symbolAddr < endAddr )
1277 return symbolAddr;
1278 else
1279 return endAddr;
1284 // Parses up a section into chunks based on labels and CFI information.
1285 // Each call returns the next chunk address and size, and (if the break
1286 // was becuase of a label, the symbol). Returns false when no more chunks.
1288 template <typename A>
1289 bool Parser<A>::LabelAndCFIBreakIterator::next(Parser<A>& parser, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
1290 pint_t* addr, pint_t* size, const macho_nlist<P>** symbol)
1292 // may not be a label on start of section, but need atom demarcation there
1293 if ( newSection ) {
1294 newSection = false;
1295 // advance symIndex until we get to the first label at or past the start of this section
1296 while ( symIndex < sortedSymbolCount ) {
1297 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1298 pint_t nextSymbolAddr = sym.n_value();
1299 //fprintf(stderr, "sectNum=%d, nextSymbolAddr=0x%08llX, name=%s\n", sectNum, (uint64_t)nextSymbolAddr, parser.nameFromSymbol(sym));
1300 if ( (nextSymbolAddr > startAddr) || ((nextSymbolAddr == startAddr) && (sym.n_sect() == sectNum)) )
1301 break;
1302 ++symIndex;
1304 if ( symIndex < sortedSymbolCount ) {
1305 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1306 pint_t nextSymbolAddr = sym.n_value();
1307 // if next symbol found is not in this section
1308 if ( sym.n_sect() != sectNum ) {
1309 // check for CFI break instead of symbol break
1310 if ( cfiIndex < cfiStartsCount ) {
1311 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1312 if ( nextCfiAddr < endAddr ) {
1313 // use cfi
1314 ++cfiIndex;
1315 *addr = nextCfiAddr;
1316 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1317 *symbol = NULL;
1318 return true;
1321 *addr = startAddr;
1322 *size = endAddr - startAddr;
1323 *symbol = NULL;
1324 if ( startAddr == endAddr )
1325 return false; // zero size section
1326 else
1327 return true; // whole section is one atom with no label
1329 // if also CFI break here, eat it
1330 if ( cfiIndex < cfiStartsCount ) {
1331 if ( cfiStartsArray[cfiIndex] == nextSymbolAddr )
1332 ++cfiIndex;
1334 if ( nextSymbolAddr == startAddr ) {
1335 // label at start of section, return it as chunk
1336 ++symIndex;
1337 *addr = startAddr;
1338 *size = peek(parser, startAddr, endAddr) - startAddr;
1339 *symbol = &sym;
1340 return true;
1342 // return chunk before first symbol
1343 *addr = startAddr;
1344 *size = nextSymbolAddr - startAddr;
1345 *symbol = NULL;
1346 return true;
1348 // no symbols left in whole file, so entire section is one chunk
1349 *addr = startAddr;
1350 *size = endAddr - startAddr;
1351 *symbol = NULL;
1352 if ( startAddr == endAddr )
1353 return false; // zero size section
1354 else
1355 return true; // whole section is one atom with no label
1358 while ( (symIndex < sortedSymbolCount) && (cfiIndex < cfiStartsCount) ) {
1359 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1360 pint_t nextSymbolAddr = sym.n_value();
1361 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1362 if ( nextSymbolAddr < nextCfiAddr ) {
1363 if ( nextSymbolAddr >= endAddr )
1364 return false;
1365 ++symIndex;
1366 if ( nextSymbolAddr < startAddr )
1367 continue;
1368 *addr = nextSymbolAddr;
1369 *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1370 *symbol = &sym;
1371 return true;
1373 else if ( nextCfiAddr < nextSymbolAddr ) {
1374 if ( nextCfiAddr >= endAddr )
1375 return false;
1376 ++cfiIndex;
1377 if ( nextCfiAddr < startAddr )
1378 continue;
1379 *addr = nextCfiAddr;
1380 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1381 *symbol = NULL;
1382 return true;
1384 else {
1385 if ( nextCfiAddr >= endAddr )
1386 return false;
1387 ++symIndex;
1388 ++cfiIndex;
1389 if ( nextCfiAddr < startAddr )
1390 continue;
1391 *addr = nextCfiAddr;
1392 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1393 *symbol = &sym;
1394 return true;
1397 while ( symIndex < sortedSymbolCount ) {
1398 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1399 pint_t nextSymbolAddr = sym.n_value();
1400 // if next symbol found is not in this section, then done with iteration
1401 if ( sym.n_sect() != sectNum )
1402 return false;
1403 ++symIndex;
1404 if ( nextSymbolAddr < startAddr )
1405 continue;
1406 *addr = nextSymbolAddr;
1407 *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1408 *symbol = &sym;
1409 return true;
1411 while ( cfiIndex < cfiStartsCount ) {
1412 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1413 if ( nextCfiAddr >= endAddr )
1414 return false;
1415 ++cfiIndex;
1416 if ( nextCfiAddr < startAddr )
1417 continue;
1418 *addr = nextCfiAddr;
1419 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1420 *symbol = NULL;
1421 return true;
1423 return false;
1428 template <typename A>
1429 ld::relocatable::File* Parser<A>::parse(const ParserOptions& opts)
1431 // create file object
1432 _file = new File<A>(_path, _modTime, _fileContent, _ordinal);
1434 // respond to -t option
1435 if ( opts.logAllFiles )
1436 printf("%s\n", _path);
1438 // parse start of mach-o file
1439 if ( ! parseLoadCommands() )
1440 return _file;
1442 // make array of
1443 uint32_t sortedSectionIndexes[_machOSectionsCount];
1444 this->makeSortedSectionsArray(sortedSectionIndexes);
1446 // make symbol table sorted by address
1447 this->prescanSymbolTable();
1448 uint32_t sortedSymbolIndexes[_symbolsInSections];
1449 this->makeSortedSymbolsArray(sortedSymbolIndexes, sortedSectionIndexes);
1451 // allocate Section<A> object for each mach-o section
1452 makeSections();
1454 // if it exists, do special early parsing of __compact_unwind section
1455 uint32_t countOfCUs = 0;
1456 if ( _compactUnwindSection != NULL )
1457 countOfCUs = _compactUnwindSection->count();
1458 uint8_t cuInfoBuffer[sizeof(typename CUSection<A>::Info) * countOfCUs];
1459 typename CUSection<A>::Info* cuInfoArray = (typename CUSection<A>::Info*)cuInfoBuffer;
1460 if ( countOfCUs != 0 )
1461 _compactUnwindSection->parse(*this, countOfCUs, cuInfoArray);
1463 // if it exists, do special early parsing of __eh_frame section
1464 // stack allocate array of CFI_Atom_Info
1465 uint32_t countOfCFIs = 0;
1466 if ( _EHFrameSection != NULL )
1467 countOfCFIs = _EHFrameSection->cfiCount();
1468 typename CFISection<A>::CFI_Atom_Info cfiArray[countOfCFIs];
1469 // stack allocate (if not too large) a copy of __eh_frame to apply relocations to
1470 uint8_t* ehBuffer = NULL;
1471 uint32_t stackAllocSize = 0;
1472 if ( (countOfCFIs != 0) && _EHFrameSection->needsRelocating() ) {
1473 uint32_t sectSize = _EHFrameSection->machoSection()->size();
1474 if ( sectSize > 50*1024 )
1475 ehBuffer = (uint8_t*)malloc(sectSize);
1476 else
1477 stackAllocSize = sectSize;
1479 uint32_t ehStackBuffer[1+stackAllocSize/4]; // make 4-byte aligned stack bufffer
1480 if ( ehBuffer == NULL )
1481 ehBuffer = (uint8_t*)&ehStackBuffer;
1482 uint32_t cfiStartsCount = 0;
1483 if ( countOfCFIs != 0 ) {
1484 _EHFrameSection->cfiParse(*this, ehBuffer, cfiArray, countOfCFIs);
1485 // count functions and lsdas
1486 for(uint32_t i=0; i < countOfCFIs; ++i) {
1487 if ( cfiArray[i].isCIE )
1488 continue;
1489 //fprintf(stderr, "cfiArray[i].func = 0x%08llX, cfiArray[i].lsda = 0x%08llX, encoding=0x%08X\n",
1490 // (uint64_t)cfiArray[i].u.fdeInfo.function.targetAddress,
1491 // (uint64_t)cfiArray[i].u.fdeInfo.lsda.targetAddress,
1492 // cfiArray[i].u.fdeInfo.compactUnwindInfo);
1493 if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1494 ++cfiStartsCount;
1495 if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1496 ++cfiStartsCount;
1499 CFI_CU_InfoArrays cfis(cfiArray, countOfCFIs, cuInfoArray, countOfCUs);
1501 // create sorted array of function starts and lsda starts
1502 pint_t cfiStartsArray[cfiStartsCount];
1503 uint32_t countOfFDEs = 0;
1504 if ( countOfCFIs != 0 ) {
1505 int index = 0;
1506 for(uint32_t i=0; i < countOfCFIs; ++i) {
1507 if ( cfiArray[i].isCIE )
1508 continue;
1509 if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1510 cfiStartsArray[index++] = cfiArray[i].u.fdeInfo.function.targetAddress;
1511 if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1512 cfiStartsArray[index++] = cfiArray[i].u.fdeInfo.lsda.targetAddress;
1513 ++countOfFDEs;
1515 ::qsort(cfiStartsArray, cfiStartsCount, sizeof(pint_t), pointerSorter);
1516 #ifndef NDEBUG
1517 // scan for FDEs claming the same function
1518 for(int i=1; i < index; ++i) {
1519 assert( cfiStartsArray[i] != cfiStartsArray[i-1] );
1521 #endif
1524 Section<A>** sections = _file->_sectionsArray;
1525 uint32_t sectionsCount = _file->_sectionsArrayCount;
1527 // figure out how many atoms will be allocated and allocate
1528 LabelAndCFIBreakIterator breakIterator(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1529 cfiStartsCount, _overlappingSymbols);
1530 uint32_t computedAtomCount = 0;
1531 for (uint32_t i=0; i < sectionsCount; ++i ) {
1532 breakIterator.beginSection();
1533 uint32_t count = sections[i]->computeAtomCount(*this, breakIterator, cfis);
1534 //const macho_section<P>* sect = sections[i]->machoSection();
1535 //fprintf(stderr, "computed count=%u for section %s size=%llu\n", count, sect->sectname(), (sect != NULL) ? sect->size() : 0);
1536 computedAtomCount += count;
1538 //fprintf(stderr, "allocating %d atoms * sizeof(Atom<A>)=%ld, sizeof(ld::Atom)=%ld\n", computedAtomCount, sizeof(Atom<A>), sizeof(ld::Atom));
1539 _file->_atomsArray = new uint8_t[computedAtomCount*sizeof(Atom<A>)];
1540 _file->_atomsArrayCount = 0;
1542 // have each section append atoms to _atomsArray
1543 LabelAndCFIBreakIterator breakIterator2(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1544 cfiStartsCount, _overlappingSymbols);
1545 for (uint32_t i=0; i < sectionsCount; ++i ) {
1546 uint8_t* atoms = _file->_atomsArray + _file->_atomsArrayCount*sizeof(Atom<A>);
1547 breakIterator2.beginSection();
1548 uint32_t count = sections[i]->appendAtoms(*this, atoms, breakIterator2, cfis);
1549 //fprintf(stderr, "append count=%u for section %s/%s\n", count, sections[i]->machoSection()->segname(), sections[i]->machoSection()->sectname());
1550 _file->_atomsArrayCount += count;
1552 assert( _file->_atomsArrayCount == computedAtomCount && "more atoms allocated than expected");
1555 // have each section add all fix-ups for its atoms
1556 _allFixups.reserve(computedAtomCount*5);
1557 for (uint32_t i=0; i < sectionsCount; ++i )
1558 sections[i]->makeFixups(*this, cfis);
1560 // assign fixups start offset for each atom
1561 uint8_t* p = _file->_atomsArray;
1562 uint32_t fixupOffset = 0;
1563 for(int i=_file->_atomsArrayCount; i > 0; --i) {
1564 Atom<A>* atom = (Atom<A>*)p;
1565 atom->_fixupsStartIndex = fixupOffset;
1566 fixupOffset += atom->_fixupsCount;
1567 atom->_fixupsCount = 0;
1568 p += sizeof(Atom<A>);
1570 assert(fixupOffset == _allFixups.size());
1571 _file->_fixups.reserve(fixupOffset);
1573 // copy each fixup for each atom
1574 for(typename std::vector<FixupInAtom>::iterator it=_allFixups.begin(); it != _allFixups.end(); ++it) {
1575 uint32_t slot = it->atom->_fixupsStartIndex + it->atom->_fixupsCount;
1576 _file->_fixups[slot] = it->fixup;
1577 it->atom->_fixupsCount++;
1580 // done with temp vector
1581 _allFixups.clear();
1583 // add unwind info
1584 _file->_unwindInfos.reserve(countOfFDEs+countOfCUs);
1585 for(uint32_t i=0; i < countOfCFIs; ++i) {
1586 if ( cfiArray[i].isCIE )
1587 continue;
1588 if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS ) {
1589 ld::Atom::UnwindInfo info;
1590 info.startOffset = 0;
1591 info.unwindInfo = cfiArray[i].u.fdeInfo.compactUnwindInfo;
1592 _file->_unwindInfos.push_back(info);
1593 Atom<A>* func = findAtomByAddress(cfiArray[i].u.fdeInfo.function.targetAddress);
1594 func->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1597 // apply compact infos in __LD,__compact_unwind section to each function
1598 // if function also has dwarf unwind, CU will override it
1599 Atom<A>* lastFunc = NULL;
1600 uint32_t lastEnd = 0;
1601 for(uint32_t i=0; i < countOfCUs; ++i) {
1602 typename CUSection<A>::Info* info = &cuInfoArray[i];
1603 assert(info->function != NULL);
1604 ld::Atom::UnwindInfo ui;
1605 ui.startOffset = info->functionStartAddress - info->function->objectAddress();
1606 ui.unwindInfo = info->compactUnwindInfo;
1607 _file->_unwindInfos.push_back(ui);
1608 // if previous is for same function, extend range
1609 if ( info->function == lastFunc ) {
1610 if ( lastEnd != ui.startOffset ) {
1611 if ( lastEnd < ui.startOffset )
1612 warning("__LD,__compact_unwind entries for %s have a gap at offset 0x%0X", info->function->name(), lastEnd);
1613 else
1614 warning("__LD,__compact_unwind entries for %s overlap at offset 0x%0X", info->function->name(), lastEnd);
1616 lastFunc->extendUnwindInfoRange();
1618 else
1619 info->function->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1620 lastFunc = info->function;
1621 lastEnd = ui.startOffset + info->rangeLength;
1624 // parse dwarf debug info to get line info
1625 this->parseDebugInfo();
1627 return _file;
1632 template <> uint8_t Parser<x86>::loadCommandSizeMask() { return 0x03; }
1633 template <> uint8_t Parser<x86_64>::loadCommandSizeMask() { return 0x07; }
1634 template <> uint8_t Parser<arm>::loadCommandSizeMask() { return 0x03; }
1636 template <typename A>
1637 bool Parser<A>::parseLoadCommands()
1639 const macho_header<P>* header = (const macho_header<P>*)_fileContent;
1641 // set File attributes
1642 _file->_canScatterAtoms = (header->flags() & MH_SUBSECTIONS_VIA_SYMBOLS);
1643 _file->_cpuSubType = header->cpusubtype();
1645 const macho_segment_command<P>* segment = NULL;
1646 const uint8_t* const endOfFile = _fileContent + _fileLength;
1647 const uint32_t cmd_count = header->ncmds();
1648 // <rdar://problem/5394172> an empty .o file with zero load commands will crash linker
1649 if ( cmd_count == 0 )
1650 return false;
1651 const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1652 const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1653 const macho_load_command<P>* cmd = cmds;
1654 for (uint32_t i = 0; i < cmd_count; ++i) {
1655 uint32_t size = cmd->cmdsize();
1656 if ( (size & this->loadCommandSizeMask()) != 0 )
1657 throwf("load command #%d has a unaligned size", i);
1658 const uint8_t* endOfCmd = ((uint8_t*)cmd)+cmd->cmdsize();
1659 if ( endOfCmd > (uint8_t*)cmdsEnd )
1660 throwf("load command #%d extends beyond the end of the load commands", i);
1661 if ( endOfCmd > endOfFile )
1662 throwf("load command #%d extends beyond the end of the file", i);
1663 switch (cmd->cmd()) {
1664 case LC_SYMTAB:
1666 const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
1667 _symbolCount = symtab->nsyms();
1668 _symbols = (const macho_nlist<P>*)(_fileContent + symtab->symoff());
1669 _strings = (char*)_fileContent + symtab->stroff();
1670 _stringsSize = symtab->strsize();
1671 if ( (symtab->symoff() + _symbolCount*sizeof(macho_nlist<P>)) > _fileLength )
1672 throw "mach-o symbol table extends beyond end of file";
1673 if ( (_strings + _stringsSize) > (char*)endOfFile )
1674 throw "mach-o string pool extends beyond end of file";
1675 if ( _indirectTable == NULL ) {
1676 if ( _undefinedEndIndex == 0 ) {
1677 _undefinedStartIndex = 0;
1678 _undefinedEndIndex = symtab->nsyms();
1682 break;
1683 case LC_DYSYMTAB:
1685 const macho_dysymtab_command<P>* dsymtab = (macho_dysymtab_command<P>*)cmd;
1686 _indirectTable = (uint32_t*)(_fileContent + dsymtab->indirectsymoff());
1687 _indirectTableCount = dsymtab->nindirectsyms();
1688 if ( &_indirectTable[_indirectTableCount] > (uint32_t*)endOfFile )
1689 throw "indirect symbol table extends beyond end of file";
1690 _undefinedStartIndex = dsymtab->iundefsym();
1691 _undefinedEndIndex = _undefinedStartIndex + dsymtab->nundefsym();
1693 break;
1694 case LC_UUID:
1695 _hasUUID = true;
1696 break;
1698 default:
1699 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1700 if ( segment != NULL )
1701 throw "more than one LC_SEGMENT found in object file";
1702 segment = (macho_segment_command<P>*)cmd;
1704 break;
1706 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1707 if ( cmd > cmdsEnd )
1708 throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1711 // record range of sections
1712 if ( segment == NULL )
1713 throw "missing LC_SEGMENT";
1714 _sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1715 _machOSectionsCount = segment->nsects();
1717 return true;
1721 template <typename A>
1722 void Parser<A>::prescanSymbolTable()
1724 _tentativeDefinitionCount = 0;
1725 _absoluteSymbolCount = 0;
1726 _symbolsInSections = 0;
1727 for (uint32_t i=0; i < this->_symbolCount; ++i) {
1728 const macho_nlist<P>& sym = symbolFromIndex(i);
1729 // ignore stabs
1730 if ( (sym.n_type() & N_STAB) != 0 )
1731 continue;
1733 // look at undefines
1734 const char* symbolName = this->nameFromSymbol(sym);
1735 if ( (sym.n_type() & N_TYPE) == N_UNDF ) {
1736 if ( sym.n_value() != 0 ) {
1737 // count tentative definitions
1738 ++_tentativeDefinitionCount;
1740 else if ( strncmp(symbolName, "___dtrace_", 10) == 0 ) {
1741 // any undefined starting with __dtrace_*$ that is not ___dtrace_probe$* or ___dtrace_isenabled$*
1742 // is extra provider info
1743 if ( (strncmp(&symbolName[10], "probe$", 6) != 0) && (strncmp(&symbolName[10], "isenabled$", 10) != 0) ) {
1744 _dtraceProviderInfo.push_back(symbolName);
1747 continue;
1750 // count absolute symbols
1751 if ( (sym.n_type() & N_TYPE) == N_ABS ) {
1752 const char* absName = this->nameFromSymbol(sym);
1753 // ignore .objc_class_name_* symbols
1754 if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
1755 _AppleObjc = true;
1756 continue;
1758 // ignore .objc_class_name_* symbols
1759 if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
1760 continue;
1761 // ignore empty *.eh symbols
1762 if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
1763 continue;
1764 ++_absoluteSymbolCount;
1767 // only look at definitions
1768 if ( (sym.n_type() & N_TYPE) != N_SECT )
1769 continue;
1771 // 'L' labels do not denote atom breaks
1772 if ( symbolName[0] == 'L' )
1773 continue;
1775 // how many def syms in each section
1776 if ( sym.n_sect() > _machOSectionsCount )
1777 throw "bad n_sect in symbol table";
1779 _symbolsInSections++;
1783 template <typename A>
1784 int Parser<A>::sectionIndexSorter(void* extra, const void* l, const void* r)
1786 Parser<A>* parser = (Parser<A>*)extra;
1787 const uint32_t* left = (uint32_t*)l;
1788 const uint32_t* right = (uint32_t*)r;
1789 const macho_section<P>* leftSect = parser->machOSectionFromSectionIndex(*left);
1790 const macho_section<P>* rightSect = parser->machOSectionFromSectionIndex(*right);
1792 // can't just return difference because 64-bit diff does not fit in 32-bit return type
1793 int64_t result = leftSect->addr() - rightSect->addr();
1794 if ( result == 0 ) {
1795 // two sections with same start address
1796 // one with zero size goes first
1797 bool leftEmpty = ( leftSect->size() == 0 );
1798 bool rightEmpty = ( rightSect->size() == 0 );
1799 if ( leftEmpty != rightEmpty ) {
1800 return ( rightEmpty ? 1 : -1 );
1802 if ( !leftEmpty && !rightEmpty )
1803 throwf("overlapping sections");
1804 // both empty, so chose file order
1805 return ( rightSect - leftSect );
1807 else if ( result < 0 )
1808 return -1;
1809 else
1810 return 1;
1813 template <typename A>
1814 void Parser<A>::makeSortedSectionsArray(uint32_t array[])
1816 const bool log = false;
1818 if ( log ) {
1819 fprintf(stderr, "unsorted sections:\n");
1820 for(unsigned int i=0; i < _machOSectionsCount; ++i )
1821 fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[i].addr(), _sectionsStart[i].segname(), _sectionsStart[i].sectname());
1824 // sort by symbol table address
1825 for (uint32_t i=0; i < _machOSectionsCount; ++i)
1826 array[i] = i;
1827 ::qsort_r(array, _machOSectionsCount, sizeof(uint32_t), this, &sectionIndexSorter);
1829 if ( log ) {
1830 fprintf(stderr, "sorted sections:\n");
1831 for(unsigned int i=0; i < _machOSectionsCount; ++i )
1832 fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[array[i]].addr(), _sectionsStart[array[i]].segname(), _sectionsStart[array[i]].sectname());
1838 template <typename A>
1839 int Parser<A>::symbolIndexSorter(void* extra, const void* l, const void* r)
1841 ParserAndSectionsArray* extraInfo = (ParserAndSectionsArray*)extra;
1842 Parser<A>* parser = extraInfo->parser;
1843 const uint32_t* sortedSectionsArray = extraInfo->sortedSectionsArray;
1844 const uint32_t* left = (uint32_t*)l;
1845 const uint32_t* right = (uint32_t*)r;
1846 const macho_nlist<P>& leftSym = parser->symbolFromIndex(*left);
1847 const macho_nlist<P>& rightSym = parser->symbolFromIndex(*right);
1848 // can't just return difference because 64-bit diff does not fit in 32-bit return type
1849 int64_t result = leftSym.n_value() - rightSym.n_value();
1850 if ( result == 0 ) {
1851 // two symbols with same address
1852 // if in different sections, sort earlier section first
1853 if ( leftSym.n_sect() != rightSym.n_sect() ) {
1854 for (uint32_t i=0; i < parser->machOSectionCount(); ++i) {
1855 if ( sortedSectionsArray[i]+1 == leftSym.n_sect() )
1856 return -1;
1857 if ( sortedSectionsArray[i]+1 == rightSym.n_sect() )
1858 return 1;
1861 // two symbols in same section, means one is an alias
1862 // if only one is global, make the other an alias (sort first)
1863 if ( (leftSym.n_type() & N_EXT) != (rightSym.n_type() & N_EXT) ) {
1864 if ( (rightSym.n_type() & N_EXT) != 0 )
1865 return -1;
1866 else
1867 return 1;
1869 // if both are global, make alphabetically last one be the alias
1870 return ( strcmp(parser->nameFromSymbol(rightSym), parser->nameFromSymbol(leftSym)) );
1872 else if ( result < 0 )
1873 return -1;
1874 else
1875 return 1;
1879 template <typename A>
1880 void Parser<A>::makeSortedSymbolsArray(uint32_t array[], const uint32_t sectionArray[])
1882 const bool log = false;
1884 uint32_t* p = array;
1885 for (uint32_t i=0; i < this->_symbolCount; ++i) {
1886 const macho_nlist<P>& sym = symbolFromIndex(i);
1887 // ignore stabs
1888 if ( (sym.n_type() & N_STAB) != 0 )
1889 continue;
1891 // only look at definitions
1892 if ( (sym.n_type() & N_TYPE) != N_SECT )
1893 continue;
1895 // 'L' labels do not denote atom breaks
1896 const char* symbolName = this->nameFromSymbol(sym);
1897 if ( symbolName[0] == 'L' )
1898 continue;
1900 // how many def syms in each section
1901 if ( sym.n_sect() > _machOSectionsCount )
1902 throw "bad n_sect in symbol table";
1904 // append to array
1905 *p++ = i;
1907 assert(p == &array[_symbolsInSections] && "second pass over symbol table yield a different number of symbols");
1909 // sort by symbol table address
1910 ParserAndSectionsArray extra = { this, sectionArray };
1911 ::qsort_r(array, _symbolsInSections, sizeof(uint32_t), &extra, &symbolIndexSorter);
1913 // look for two symbols at same address
1914 _overlappingSymbols = false;
1915 for (unsigned int i=1; i < _symbolsInSections; ++i) {
1916 if ( symbolFromIndex(array[i-1]).n_value() == symbolFromIndex(array[i]).n_value() ) {
1917 //fprintf(stderr, "overlapping symbols at 0x%08llX\n", symbolFromIndex(array[i-1]).n_value());
1918 _overlappingSymbols = true;
1922 if ( log ) {
1923 fprintf(stderr, "sorted symbols:\n");
1924 for(unsigned int i=0; i < _symbolsInSections; ++i )
1925 fprintf(stderr, "0x%09llX symIndex=%d sectNum=%2d, %s\n", symbolFromIndex(array[i]).n_value(), array[i], symbolFromIndex(array[i]).n_sect(), nameFromSymbol(symbolFromIndex(array[i])) );
1930 template <typename A>
1931 void Parser<A>::makeSections()
1933 // classify each section by type
1934 // compute how many Section objects will be needed and total size for all
1935 unsigned int totalSectionsSize = 0;
1936 uint8_t machOSectsStorage[sizeof(MachOSectionAndSectionClass<P>)*(_machOSectionsCount+2)]; // also room for tentative-defs and absolute symbols
1937 // allocate raw storage for all section objects on stack
1938 MachOSectionAndSectionClass<P>* machOSects = (MachOSectionAndSectionClass<P>*)machOSectsStorage;
1939 unsigned int count = 0;
1940 for (uint32_t i=0; i < _machOSectionsCount; ++i) {
1941 const macho_section<P>* sect = &_sectionsStart[i];
1942 if ( (sect->flags() & S_ATTR_DEBUG) != 0 ) {
1943 if ( strcmp(sect->segname(), "__DWARF") == 0 ) {
1944 // note that .o file has dwarf
1945 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoDwarf;
1946 // save off iteresting dwarf sections
1947 if ( strcmp(sect->sectname(), "__debug_info") == 0 )
1948 _file->_dwarfDebugInfoSect = sect;
1949 else if ( strcmp(sect->sectname(), "__debug_abbrev") == 0 )
1950 _file->_dwarfDebugAbbrevSect = sect;
1951 else if ( strcmp(sect->sectname(), "__debug_line") == 0 )
1952 _file->_dwarfDebugLineSect = sect;
1953 else if ( strcmp(sect->sectname(), "__debug_str") == 0 )
1954 _file->_dwarfDebugStringSect = sect;
1955 // linker does not propagate dwarf sections to output file
1956 continue;
1958 else if ( strcmp(sect->segname(), "__LD") == 0 ) {
1959 if ( strncmp(sect->sectname(), "__compact_unwind", 16) == 0 ) {
1960 machOSects[count].sect = sect;
1961 totalSectionsSize += sizeof(CUSection<A>);
1962 machOSects[count++].type = sectionTypeCompactUnwind;
1963 continue;
1967 // ignore empty __OBJC sections
1968 if ( (sect->size() == 0) && (strcmp(sect->segname(), "__OBJC") == 0) )
1969 continue;
1970 // objc image info section is really attributes and not content
1971 if ( ((strcmp(sect->sectname(), "__image_info") == 0) && (strcmp(sect->segname(), "__OBJC") == 0))
1972 || ((strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0) && (strcmp(sect->segname(), "__DATA") == 0)) ) {
1973 // struct objc_image_info {
1974 // uint32_t version; // initially 0
1975 // uint32_t flags;
1976 // };
1977 // #define OBJC_IMAGE_SUPPORTS_GC 2
1978 // #define OBJC_IMAGE_GC_ONLY 4
1980 const uint32_t* contents = (uint32_t*)(_file->fileContent()+sect->offset());
1981 if ( (sect->size() >= 8) && (contents[0] == 0) ) {
1982 uint32_t flags = E::get32(contents[1]);
1983 if ( (flags & 4) == 4 )
1984 _file->_objConstraint = ld::File::objcConstraintGC;
1985 else if ( (flags & 2) == 2 )
1986 _file->_objConstraint = ld::File::objcConstraintRetainReleaseOrGC;
1987 else
1988 _file->_objConstraint = ld::File::objcConstraintRetainRelease;
1989 if ( (flags & 1) == 1 )
1990 _file->_ojcReplacmentClass = true;
1991 if ( sect->size() > 8 ) {
1992 warning("section %s/%s has unexpectedly large size %llu in %s",
1993 sect->segname(), Section<A>::makeSectionName(sect), sect->size(), _file->path());
1996 else {
1997 warning("can't parse %s/%s section in %s", sect->segname(), Section<A>::makeSectionName(sect), _file->path());
1999 continue;
2001 machOSects[count].sect = sect;
2002 switch ( sect->flags() & SECTION_TYPE ) {
2003 case S_SYMBOL_STUBS:
2004 if ( _stubsSectionNum == 0 ) {
2005 _stubsSectionNum = i+1;
2006 _stubsMachOSection = sect;
2008 else
2009 assert(1 && "multiple S_SYMBOL_STUBS sections");
2010 case S_LAZY_SYMBOL_POINTERS:
2011 break;
2012 case S_4BYTE_LITERALS:
2013 totalSectionsSize += sizeof(Literal4Section<A>);
2014 machOSects[count++].type = sectionTypeLiteral4;
2015 break;
2016 case S_8BYTE_LITERALS:
2017 totalSectionsSize += sizeof(Literal8Section<A>);
2018 machOSects[count++].type = sectionTypeLiteral8;
2019 break;
2020 case S_16BYTE_LITERALS:
2021 totalSectionsSize += sizeof(Literal16Section<A>);
2022 machOSects[count++].type = sectionTypeLiteral16;
2023 break;
2024 case S_NON_LAZY_SYMBOL_POINTERS:
2025 totalSectionsSize += sizeof(NonLazyPointerSection<A>);
2026 machOSects[count++].type = sectionTypeNonLazy;
2027 break;
2028 case S_LITERAL_POINTERS:
2029 if ( (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__cls_refs") == 0) ) {
2030 totalSectionsSize += sizeof(Objc1ClassReferences<A>);
2031 machOSects[count++].type = sectionTypeObjC1ClassRefs;
2033 else {
2034 totalSectionsSize += sizeof(PointerToCStringSection<A>);
2035 machOSects[count++].type = sectionTypeCStringPointer;
2037 break;
2038 case S_CSTRING_LITERALS:
2039 totalSectionsSize += sizeof(CStringSection<A>);
2040 machOSects[count++].type = sectionTypeCString;
2041 break;
2042 case S_MOD_INIT_FUNC_POINTERS:
2043 case S_MOD_TERM_FUNC_POINTERS:
2044 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
2045 case S_INTERPOSING:
2046 case S_ZEROFILL:
2047 case S_REGULAR:
2048 case S_COALESCED:
2049 case S_THREAD_LOCAL_REGULAR:
2050 case S_THREAD_LOCAL_ZEROFILL:
2051 if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__eh_frame") == 0) ) {
2052 totalSectionsSize += sizeof(CFISection<A>);
2053 machOSects[count++].type = sectionTypeCFI;
2055 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__cfstring") == 0) ) {
2056 totalSectionsSize += sizeof(CFStringSection<A>);
2057 machOSects[count++].type = sectionTypeCFString;
2059 else if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__ustring") == 0) ) {
2060 totalSectionsSize += sizeof(UTF16StringSection<A>);
2061 machOSects[count++].type = sectionTypeUTF16Strings;
2063 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strncmp(sect->sectname(), "__objc_classrefs", 16) == 0) ) {
2064 totalSectionsSize += sizeof(ObjC2ClassRefsSection<A>);
2065 machOSects[count++].type = sectionTypeObjC2ClassRefs;
2067 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__objc_catlist") == 0) ) {
2068 totalSectionsSize += sizeof(ObjC2CategoryListSection<A>);
2069 machOSects[count++].type = typeObjC2CategoryList;
2071 else if ( _AppleObjc && (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__class") == 0) ) {
2072 totalSectionsSize += sizeof(ObjC1ClassSection<A>);
2073 machOSects[count++].type = sectionTypeObjC1Classes;
2075 else {
2076 totalSectionsSize += sizeof(SymboledSection<A>);
2077 machOSects[count++].type = sectionTypeSymboled;
2079 break;
2080 case S_THREAD_LOCAL_VARIABLES:
2081 totalSectionsSize += sizeof(TLVDefsSection<A>);
2082 machOSects[count++].type = sectionTypeTLVDefs;
2083 break;
2084 case S_THREAD_LOCAL_VARIABLE_POINTERS:
2085 default:
2086 throwf("unknown section type %d", sect->flags() & SECTION_TYPE);
2090 // sort by address (mach-o object files don't aways have sections sorted)
2091 ::qsort(machOSects, count, sizeof(MachOSectionAndSectionClass<P>), MachOSectionAndSectionClass<P>::sorter);
2093 // we will synthesize a dummy Section<A> object for tentative definitions
2094 if ( _tentativeDefinitionCount > 0 ) {
2095 totalSectionsSize += sizeof(TentativeDefinitionSection<A>);
2096 machOSects[count++].type = sectionTypeTentativeDefinitions;
2099 // we will synthesize a dummy Section<A> object for Absolute symbols
2100 if ( _absoluteSymbolCount > 0 ) {
2101 totalSectionsSize += sizeof(AbsoluteSymbolSection<A>);
2102 machOSects[count++].type = sectionTypeAbsoluteSymbols;
2105 // allocate one block for all Section objects as well as pointers to each
2106 uint8_t* space = new uint8_t[totalSectionsSize+count*sizeof(Section<A>*)];
2107 _file->_sectionsArray = (Section<A>**)space;
2108 _file->_sectionsArrayCount = count;
2109 Section<A>** objects = _file->_sectionsArray;
2110 space += count*sizeof(Section<A>*);
2111 for (uint32_t i=0; i < count; ++i) {
2112 switch ( machOSects[i].type ) {
2113 case sectionTypeIgnore:
2114 break;
2115 case sectionTypeLiteral4:
2116 *objects++ = new (space) Literal4Section<A>(*this, *_file, machOSects[i].sect);
2117 space += sizeof(Literal4Section<A>);
2118 break;
2119 case sectionTypeLiteral8:
2120 *objects++ = new (space) Literal8Section<A>(*this, *_file, machOSects[i].sect);
2121 space += sizeof(Literal8Section<A>);
2122 break;
2123 case sectionTypeLiteral16:
2124 *objects++ = new (space) Literal16Section<A>(*this, *_file, machOSects[i].sect);
2125 space += sizeof(Literal16Section<A>);
2126 break;
2127 case sectionTypeNonLazy:
2128 *objects++ = new (space) NonLazyPointerSection<A>(*this, *_file, machOSects[i].sect);
2129 space += sizeof(NonLazyPointerSection<A>);
2130 break;
2131 case sectionTypeCFI:
2132 _EHFrameSection = new (space) CFISection<A>(*this, *_file, machOSects[i].sect);
2133 *objects++ = _EHFrameSection;
2134 space += sizeof(CFISection<A>);
2135 break;
2136 case sectionTypeCString:
2137 *objects++ = new (space) CStringSection<A>(*this, *_file, machOSects[i].sect);
2138 space += sizeof(CStringSection<A>);
2139 break;
2140 case sectionTypeCStringPointer:
2141 *objects++ = new (space) PointerToCStringSection<A>(*this, *_file, machOSects[i].sect);
2142 space += sizeof(PointerToCStringSection<A>);
2143 break;
2144 case sectionTypeObjC1ClassRefs:
2145 *objects++ = new (space) Objc1ClassReferences<A>(*this, *_file, machOSects[i].sect);
2146 space += sizeof(Objc1ClassReferences<A>);
2147 break;
2148 case sectionTypeUTF16Strings:
2149 *objects++ = new (space) UTF16StringSection<A>(*this, *_file, machOSects[i].sect);
2150 space += sizeof(UTF16StringSection<A>);
2151 break;
2152 case sectionTypeCFString:
2153 *objects++ = new (space) CFStringSection<A>(*this, *_file, machOSects[i].sect);
2154 space += sizeof(CFStringSection<A>);
2155 break;
2156 case sectionTypeObjC2ClassRefs:
2157 *objects++ = new (space) ObjC2ClassRefsSection<A>(*this, *_file, machOSects[i].sect);
2158 space += sizeof(ObjC2ClassRefsSection<A>);
2159 break;
2160 case typeObjC2CategoryList:
2161 *objects++ = new (space) ObjC2CategoryListSection<A>(*this, *_file, machOSects[i].sect);
2162 space += sizeof(ObjC2CategoryListSection<A>);
2163 break;
2164 case sectionTypeObjC1Classes:
2165 *objects++ = new (space) ObjC1ClassSection<A>(*this, *_file, machOSects[i].sect);
2166 space += sizeof(ObjC1ClassSection<A>);
2167 break;
2168 case sectionTypeSymboled:
2169 *objects++ = new (space) SymboledSection<A>(*this, *_file, machOSects[i].sect);
2170 space += sizeof(SymboledSection<A>);
2171 break;
2172 case sectionTypeTLVDefs:
2173 *objects++ = new (space) TLVDefsSection<A>(*this, *_file, machOSects[i].sect);
2174 space += sizeof(TLVDefsSection<A>);
2175 break;
2176 case sectionTypeCompactUnwind:
2177 _compactUnwindSection = new (space) CUSection<A>(*this, *_file, machOSects[i].sect);
2178 *objects++ = _compactUnwindSection;
2179 space += sizeof(CUSection<A>);
2180 break;
2181 case sectionTypeTentativeDefinitions:
2182 *objects++ = new (space) TentativeDefinitionSection<A>(*this, *_file);
2183 space += sizeof(TentativeDefinitionSection<A>);
2184 break;
2185 case sectionTypeAbsoluteSymbols:
2186 _absoluteSection = new (space) AbsoluteSymbolSection<A>(*this, *_file);
2187 *objects++ = _absoluteSection;
2188 space += sizeof(AbsoluteSymbolSection<A>);
2189 break;
2190 default:
2191 throw "internal error uknown SectionType";
2197 template <typename A>
2198 Section<A>* Parser<A>::sectionForAddress(typename A::P::uint_t addr)
2200 for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2201 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2202 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2203 if ( sect != NULL ) {
2204 if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
2205 return _file->_sectionsArray[i];
2209 // not strictly in any section
2210 // may be in a zero length section
2211 for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2212 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2213 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2214 if ( sect != NULL ) {
2215 if ( (sect->addr() == addr) && (sect->size() == 0) ) {
2216 return _file->_sectionsArray[i];
2221 throwf("sectionForAddress(0x%llX) address not in any section", (uint64_t)addr);
2224 template <typename A>
2225 Section<A>* Parser<A>::sectionForNum(unsigned int num)
2227 for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2228 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2229 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2230 if ( sect != NULL ) {
2231 if ( num == (unsigned int)((sect - _sectionsStart)+1) )
2232 return _file->_sectionsArray[i];
2235 throwf("sectionForNum(%u) section number not for any section", num);
2238 template <typename A>
2239 Atom<A>* Parser<A>::findAtomByAddress(pint_t addr)
2241 Section<A>* section = this->sectionForAddress(addr);
2242 return section->findAtomByAddress(addr);
2245 template <typename A>
2246 Atom<A>* Parser<A>::findAtomByAddressOrNullIfStub(pint_t addr)
2248 if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) )
2249 return NULL;
2250 return findAtomByAddress(addr);
2253 template <typename A>
2254 Atom<A>* Parser<A>::findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom)
2256 if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2257 // target is a stub, remove indirection
2258 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2259 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2260 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2261 // can't be to external weak symbol
2262 assert( (this->combineFromSymbol(sym) != ld::Atom::combineByName) || (this->scopeFromSymbol(sym) != ld::Atom::scopeGlobal) );
2263 *offsetInAtom = 0;
2264 return this->findAtomByName(this->nameFromSymbol(sym));
2266 Atom<A>* target = this->findAtomByAddress(addr);
2267 *offsetInAtom = addr - target->_objAddress;
2268 return target;
2271 template <typename A>
2272 Atom<A>* Parser<A>::findAtomByName(const char* name)
2274 uint8_t* p = _file->_atomsArray;
2275 for(int i=_file->_atomsArrayCount; i > 0; --i) {
2276 Atom<A>* atom = (Atom<A>*)p;
2277 if ( strcmp(name, atom->name()) == 0 )
2278 return atom;
2279 p += sizeof(Atom<A>);
2281 return NULL;
2284 template <typename A>
2285 void Parser<A>::findTargetFromAddress(pint_t addr, TargetDesc& target)
2287 if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2288 // target is a stub, remove indirection
2289 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2290 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2291 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2292 target.atom = NULL;
2293 target.name = this->nameFromSymbol(sym);
2294 target.weakImport = this->weakImportFromSymbol(sym);
2295 target.addend = 0;
2296 return;
2298 Section<A>* section = this->sectionForAddress(addr);
2299 target.atom = section->findAtomByAddress(addr);
2300 target.addend = addr - target.atom->_objAddress;
2301 target.weakImport = false;
2302 target.name = NULL;
2305 template <typename A>
2306 void Parser<A>::findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target)
2308 findTargetFromAddress(baseAddr, target);
2309 target.addend = addr - target.atom->_objAddress;
2312 template <typename A>
2313 void Parser<A>::findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum, TargetDesc& target)
2315 if ( sectNum == R_ABS ) {
2316 // target is absolute symbol that corresponds to addr
2317 if ( _absoluteSection != NULL ) {
2318 target.atom = _absoluteSection->findAbsAtomForValue(addr);
2319 if ( target.atom != NULL ) {
2320 target.name = NULL;
2321 target.weakImport = false;
2322 target.addend = 0;
2323 return;
2326 throwf("R_ABS reloc but no absolute symbol at target address");
2329 if ( hasStubsSection() && (stubsSectionNum() == sectNum) ) {
2330 // target is a stub, remove indirection
2331 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2332 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2333 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2334 // use direct reference when stub is to a static function
2335 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (this->nameFromSymbol(sym)[0] == 'L')) ) {
2336 this->findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
2338 else {
2339 target.atom = NULL;
2340 target.name = this->nameFromSymbol(sym);
2341 target.weakImport = this->weakImportFromSymbol(sym);
2342 target.addend = 0;
2344 return;
2346 Section<A>* section = this->sectionForNum(sectNum);
2347 target.atom = section->findAtomByAddress(addr);
2348 if ( target.atom == NULL ) {
2349 typedef typename A::P::sint_t sint_t;
2350 sint_t a = (sint_t)addr;
2351 sint_t sectStart = (sint_t)(section->machoSection()->addr());
2352 sint_t sectEnd = sectStart + section->machoSection()->size();
2353 if ( a < sectStart ) {
2354 // target address is before start of section, so must be negative addend
2355 target.atom = section->findAtomByAddress(sectStart);
2356 target.addend = a - sectStart;
2357 target.weakImport = false;
2358 target.name = NULL;
2359 return;
2361 else if ( a >= sectEnd ) {
2362 target.atom = section->findAtomByAddress(sectEnd-1);
2363 target.addend = a - sectEnd;
2364 target.weakImport = false;
2365 target.name = NULL;
2366 return;
2369 assert(target.atom != NULL);
2370 target.addend = addr - target.atom->_objAddress;
2371 target.weakImport = false;
2372 target.name = NULL;
2375 template <typename A>
2376 void Parser<A>::addDtraceExtraInfos(const SourceLocation& src, const char* providerName)
2378 // for every ___dtrace_stability$* and ___dtrace_typedefs$* undefine with
2379 // a matching provider name, add a by-name kDtraceTypeReference at probe site
2380 const char* dollar = strchr(providerName, '$');
2381 if ( dollar != NULL ) {
2382 int providerNameLen = dollar-providerName+1;
2383 for ( std::vector<const char*>::iterator it = _dtraceProviderInfo.begin(); it != _dtraceProviderInfo.end(); ++it) {
2384 const char* typeDollar = strchr(*it, '$');
2385 if ( typeDollar != NULL ) {
2386 if ( strncmp(typeDollar+1, providerName, providerNameLen) == 0 ) {
2387 addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindDtraceExtra,false, *it);
2394 template <typename A>
2395 const char* Parser<A>::scanSymbolTableForAddress(uint64_t addr)
2397 uint64_t closestSymAddr = 0;
2398 const char* closestSymName = NULL;
2399 for (uint32_t i=0; i < this->_symbolCount; ++i) {
2400 const macho_nlist<P>& sym = symbolFromIndex(i);
2401 // ignore stabs
2402 if ( (sym.n_type() & N_STAB) != 0 )
2403 continue;
2405 // only look at definitions
2406 if ( (sym.n_type() & N_TYPE) != N_SECT )
2407 continue;
2409 // return with exact match
2410 if ( sym.n_value() == addr )
2411 return nameFromSymbol(sym);
2413 // record closest seen so far
2414 if ( (sym.n_value() < addr) && ((sym.n_value() > closestSymAddr) || (closestSymName == NULL)) )
2415 closestSymName = nameFromSymbol(sym);
2418 return (closestSymName != NULL) ? closestSymName : "unknown";
2422 template <typename A>
2423 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind setKind, const TargetDesc& target)
2425 // some fixup pairs can be combined
2426 ld::Fixup::Cluster cl = ld::Fixup::k1of3;
2427 ld::Fixup::Kind firstKind = ld::Fixup::kindSetTargetAddress;
2428 bool combined = false;
2429 if ( target.addend == 0 ) {
2430 cl = ld::Fixup::k1of1;
2431 combined = true;
2432 switch ( setKind ) {
2433 case ld::Fixup::kindStoreLittleEndian32:
2434 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian32;
2435 break;
2436 case ld::Fixup::kindStoreLittleEndian64:
2437 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian64;
2438 break;
2439 case ld::Fixup::kindStoreBigEndian32:
2440 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian32;
2441 break;
2442 case ld::Fixup::kindStoreBigEndian64:
2443 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian64;
2444 break;
2445 case ld::Fixup::kindStoreX86BranchPCRel32:
2446 firstKind = ld::Fixup::kindStoreTargetAddressX86BranchPCRel32;
2447 break;
2448 case ld::Fixup::kindStoreX86PCRel32:
2449 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32;
2450 break;
2451 case ld::Fixup::kindStoreX86PCRel32GOTLoad:
2452 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32GOTLoad;
2453 break;
2454 case ld::Fixup::kindStoreX86PCRel32TLVLoad:
2455 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32TLVLoad;
2456 break;
2457 case ld::Fixup::kindStoreX86Abs32TLVLoad:
2458 firstKind = ld::Fixup::kindStoreTargetAddressX86Abs32TLVLoad;
2459 break;
2460 case ld::Fixup::kindStoreARMBranch24:
2461 firstKind = ld::Fixup::kindStoreTargetAddressARMBranch24;
2462 break;
2463 case ld::Fixup::kindStoreThumbBranch22:
2464 firstKind = ld::Fixup::kindStoreTargetAddressThumbBranch22;
2465 break;
2466 default:
2467 combined = false;
2468 cl = ld::Fixup::k1of2;
2469 break;
2473 if ( target.atom != NULL ) {
2474 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2475 addFixup(src, cl, firstKind, target.atom);
2477 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2478 addFixup(src, cl, firstKind, ld::Fixup::bindingByContentBound, target.atom);
2480 else if ( (src.atom->section().type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
2481 // backing string in CFStrings should always be direct
2482 addFixup(src, cl, firstKind, target.atom);
2484 else {
2485 // change direct fixup to by-name fixup
2486 addFixup(src, cl, firstKind, false, target.atom->name());
2489 else {
2490 addFixup(src, cl, firstKind, target.weakImport, target.name);
2492 if ( target.addend == 0 ) {
2493 if ( ! combined )
2494 addFixup(src, ld::Fixup::k2of2, setKind);
2496 else {
2497 addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, target.addend);
2498 addFixup(src, ld::Fixup::k3of3, setKind);
2502 template <typename A>
2503 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase)
2505 ld::Fixup::Cluster cl = (target.addend == 0) ? ld::Fixup::k1of4 : ld::Fixup::k1of5;
2506 if ( target.atom != NULL ) {
2507 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2508 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.atom);
2510 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2511 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
2513 else {
2514 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
2517 else {
2518 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.weakImport, target.name);
2520 if ( target.addend == 0 ) {
2521 assert(picBase.atom != NULL);
2522 addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2523 addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, picBase.addend);
2524 addFixup(src, ld::Fixup::k4of4, kind);
2526 else {
2527 addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
2528 addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2529 addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, picBase.addend);
2530 addFixup(src, ld::Fixup::k5of5, kind);
2536 template <typename A>
2537 uint32_t TentativeDefinitionSection<A>::computeAtomCount(class Parser<A>& parser,
2538 struct Parser<A>::LabelAndCFIBreakIterator& it,
2539 const struct Parser<A>::CFI_CU_InfoArrays&)
2541 return parser.tentativeDefinitionCount();
2544 template <typename A>
2545 uint32_t TentativeDefinitionSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2546 struct Parser<A>::LabelAndCFIBreakIterator& it,
2547 const struct Parser<A>::CFI_CU_InfoArrays&)
2549 this->_beginAtoms = (Atom<A>*)p;
2550 uint32_t count = 0;
2551 for (uint32_t i=parser.undefinedStartIndex(); i < parser.undefinedEndIndex(); ++i) {
2552 const macho_nlist<P>& sym = parser.symbolFromIndex(i);
2553 if ( ((sym.n_type() & N_TYPE) == N_UNDF) && (sym.n_value() != 0) ) {
2554 uint64_t size = sym.n_value();
2555 uint8_t alignP2 = GET_COMM_ALIGN(sym.n_desc());
2556 if ( alignP2 == 0 ) {
2557 // common symbols align to their size
2558 // that is, a 4-byte common aligns to 4-bytes
2559 // if this size is not a power of two,
2560 // then round up to the next power of two
2561 alignP2 = 63 - (uint8_t)__builtin_clzll(size);
2562 if ( size != (1ULL << alignP2) )
2563 ++alignP2;
2565 // limit alignment of extremely large commons to 2^15 bytes (8-page)
2566 if ( alignP2 > 15 )
2567 alignP2 = 15;
2568 Atom<A>* allocatedSpace = (Atom<A>*)p;
2569 new (allocatedSpace) Atom<A>(*this, parser.nameFromSymbol(sym), (pint_t)ULLONG_MAX, size,
2570 ld::Atom::definitionTentative, ld::Atom::combineByName,
2571 parser.scopeFromSymbol(sym), ld::Atom::typeZeroFill, ld::Atom::symbolTableIn,
2572 parser.dontDeadStripFromSymbol(sym), false, false, ld::Atom::Alignment(alignP2) );
2573 p += sizeof(Atom<A>);
2574 ++count;
2577 this->_endAtoms = (Atom<A>*)p;
2578 return count;
2582 template <typename A>
2583 uint32_t AbsoluteSymbolSection<A>::computeAtomCount(class Parser<A>& parser,
2584 struct Parser<A>::LabelAndCFIBreakIterator& it,
2585 const struct Parser<A>::CFI_CU_InfoArrays&)
2587 return parser.absoluteSymbolCount();
2590 template <typename A>
2591 uint32_t AbsoluteSymbolSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2592 struct Parser<A>::LabelAndCFIBreakIterator& it,
2593 const struct Parser<A>::CFI_CU_InfoArrays&)
2595 this->_beginAtoms = (Atom<A>*)p;
2596 uint32_t count = 0;
2597 for (uint32_t i=0; i < parser.symbolCount(); ++i) {
2598 const macho_nlist<P>& sym = parser.symbolFromIndex(i);
2599 if ( (sym.n_type() & N_TYPE) != N_ABS )
2600 continue;
2601 const char* absName = parser.nameFromSymbol(sym);
2602 // ignore .objc_class_name_* symbols
2603 if ( strncmp(absName, ".objc_class_name_", 17) == 0 )
2604 continue;
2605 // ignore .objc_class_name_* symbols
2606 if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
2607 continue;
2608 // ignore empty *.eh symbols
2609 if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
2610 continue;
2612 Atom<A>* allocatedSpace = (Atom<A>*)p;
2613 new (allocatedSpace) Atom<A>(*this, parser, sym, 0);
2614 p += sizeof(Atom<A>);
2615 ++count;
2617 this->_endAtoms = (Atom<A>*)p;
2618 return count;
2621 template <typename A>
2622 Atom<A>* AbsoluteSymbolSection<A>::findAbsAtomForValue(typename A::P::uint_t value)
2624 Atom<A>* end = this->_endAtoms;
2625 for(Atom<A>* p = this->_beginAtoms; p < end; ++p) {
2626 if ( p->_objAddress == value )
2627 return p;
2629 return NULL;
2633 template <typename A>
2634 uint32_t Parser<A>::indirectSymbol(uint32_t indirectIndex)
2636 if ( indirectIndex >= _indirectTableCount )
2637 throw "indirect symbol index out of range";
2638 return E::get32(_indirectTable[indirectIndex]);
2641 template <typename A>
2642 const macho_nlist<typename A::P>& Parser<A>::symbolFromIndex(uint32_t index)
2644 if ( index > _symbolCount )
2645 throw "symbol index out of range";
2646 return _symbols[index];
2649 template <typename A>
2650 const macho_section<typename A::P>* Parser<A>::machOSectionFromSectionIndex(uint32_t index)
2652 if ( index >= _machOSectionsCount )
2653 throw "section index out of range";
2654 return &_sectionsStart[index];
2657 template <typename A>
2658 uint32_t Parser<A>::symbolIndexFromIndirectSectionAddress(pint_t addr, const macho_section<P>* sect)
2660 uint32_t elementSize = 0;
2661 switch ( sect->flags() & SECTION_TYPE ) {
2662 case S_SYMBOL_STUBS:
2663 elementSize = sect->reserved2();
2664 break;
2665 case S_LAZY_SYMBOL_POINTERS:
2666 case S_NON_LAZY_SYMBOL_POINTERS:
2667 elementSize = sizeof(pint_t);
2668 break;
2669 default:
2670 throw "section does not use inirect symbol table";
2672 uint32_t indexInSection = (addr - sect->addr()) / elementSize;
2673 uint32_t indexIntoIndirectTable = sect->reserved1() + indexInSection;
2674 return this->indirectSymbol(indexIntoIndirectTable);
2679 template <typename A>
2680 const char* Parser<A>::nameFromSymbol(const macho_nlist<P>& sym)
2682 return &_strings[sym.n_strx()];
2685 template <typename A>
2686 ld::Atom::Scope Parser<A>::scopeFromSymbol(const macho_nlist<P>& sym)
2688 if ( (sym.n_type() & N_EXT) == 0 )
2689 return ld::Atom::scopeTranslationUnit;
2690 else if ( (sym.n_type() & N_PEXT) != 0 )
2691 return ld::Atom::scopeLinkageUnit;
2692 else if ( this->nameFromSymbol(sym)[0] == 'l' ) // since all 'l' symbols will be remove, don't make them global
2693 return ld::Atom::scopeLinkageUnit;
2694 else
2695 return ld::Atom::scopeGlobal;
2698 template <typename A>
2699 ld::Atom::Definition Parser<A>::definitionFromSymbol(const macho_nlist<P>& sym)
2701 switch ( sym.n_type() & N_TYPE ) {
2702 case N_ABS:
2703 return ld::Atom::definitionAbsolute;
2704 case N_SECT:
2705 return ld::Atom::definitionRegular;
2706 case N_UNDF:
2707 if ( sym.n_value() != 0 )
2708 return ld::Atom::definitionTentative;
2710 throw "definitionFromSymbol() bad symbol";
2713 template <typename A>
2714 ld::Atom::Combine Parser<A>::combineFromSymbol(const macho_nlist<P>& sym)
2716 if ( sym.n_desc() & N_WEAK_DEF )
2717 return ld::Atom::combineByName;
2718 else
2719 return ld::Atom::combineNever;
2723 template <typename A>
2724 ld::Atom::SymbolTableInclusion Parser<A>::inclusionFromSymbol(const macho_nlist<P>& sym)
2726 const char* symbolName = nameFromSymbol(sym);
2727 // labels beginning with 'l' (lowercase ell) are automatically removed in final linked images <rdar://problem/4571042>
2728 // labels beginning with 'L' should have been stripped by the assembler, so are stripped now
2729 if ( sym.n_desc() & REFERENCED_DYNAMICALLY )
2730 return ld::Atom::symbolTableInAndNeverStrip;
2731 else if ( symbolName[0] == 'l' )
2732 return ld::Atom::symbolTableNotInFinalLinkedImages;
2733 else if ( symbolName[0] == 'L' )
2734 return ld::Atom::symbolTableNotIn;
2735 else
2736 return ld::Atom::symbolTableIn;
2739 template <typename A>
2740 bool Parser<A>::dontDeadStripFromSymbol(const macho_nlist<P>& sym)
2742 return ( (sym.n_desc() & (N_NO_DEAD_STRIP|REFERENCED_DYNAMICALLY)) != 0 );
2745 template <typename A>
2746 bool Parser<A>::isThumbFromSymbol(const macho_nlist<P>& sym)
2748 return ( sym.n_desc() & N_ARM_THUMB_DEF );
2751 template <typename A>
2752 bool Parser<A>::weakImportFromSymbol(const macho_nlist<P>& sym)
2754 return ( ((sym.n_type() & N_TYPE) == N_UNDF) && ((sym.n_desc() & N_WEAK_REF) != 0) );
2757 template <typename A>
2758 bool Parser<A>::resolverFromSymbol(const macho_nlist<P>& sym)
2760 return ( sym.n_desc() & N_SYMBOL_RESOLVER );
2764 /* Skip over a LEB128 value (signed or unsigned). */
2765 static void
2766 skip_leb128 (const uint8_t ** offset, const uint8_t * end)
2768 while (*offset != end && **offset >= 0x80)
2769 (*offset)++;
2770 if (*offset != end)
2771 (*offset)++;
2774 /* Read a ULEB128 into a 64-bit word. Return (uint64_t)-1 on overflow
2775 or error. On overflow, skip past the rest of the uleb128. */
2776 static uint64_t
2777 read_uleb128 (const uint8_t ** offset, const uint8_t * end)
2779 uint64_t result = 0;
2780 int bit = 0;
2782 do {
2783 uint64_t b;
2785 if (*offset == end)
2786 return (uint64_t) -1;
2788 b = **offset & 0x7f;
2790 if (bit >= 64 || b << bit >> bit != b)
2791 result = (uint64_t) -1;
2792 else
2793 result |= b << bit, bit += 7;
2794 } while (*(*offset)++ >= 0x80);
2795 return result;
2799 /* Skip over a DWARF attribute of form FORM. */
2800 template <typename A>
2801 bool Parser<A>::skip_form(const uint8_t ** offset, const uint8_t * end, uint64_t form,
2802 uint8_t addr_size, bool dwarf64)
2804 int64_t sz=0;
2806 switch (form)
2808 case DW_FORM_addr:
2809 sz = addr_size;
2810 break;
2812 case DW_FORM_block2:
2813 if (end - *offset < 2)
2814 return false;
2815 sz = 2 + A::P::E::get16(*(uint16_t*)offset);
2816 break;
2818 case DW_FORM_block4:
2819 if (end - *offset < 4)
2820 return false;
2821 sz = 2 + A::P::E::get32(*(uint32_t*)offset);
2822 break;
2824 case DW_FORM_data2:
2825 case DW_FORM_ref2:
2826 sz = 2;
2827 break;
2829 case DW_FORM_data4:
2830 case DW_FORM_ref4:
2831 sz = 4;
2832 break;
2834 case DW_FORM_data8:
2835 case DW_FORM_ref8:
2836 sz = 8;
2837 break;
2839 case DW_FORM_string:
2840 while (*offset != end && **offset)
2841 ++*offset;
2842 case DW_FORM_data1:
2843 case DW_FORM_flag:
2844 case DW_FORM_ref1:
2845 sz = 1;
2846 break;
2848 case DW_FORM_block:
2849 sz = read_uleb128 (offset, end);
2850 break;
2852 case DW_FORM_block1:
2853 if (*offset == end)
2854 return false;
2855 sz = 1 + **offset;
2856 break;
2858 case DW_FORM_sdata:
2859 case DW_FORM_udata:
2860 case DW_FORM_ref_udata:
2861 skip_leb128 (offset, end);
2862 return true;
2864 case DW_FORM_strp:
2865 case DW_FORM_ref_addr:
2866 sz = 4;
2867 break;
2869 default:
2870 return false;
2872 if (end - *offset < sz)
2873 return false;
2874 *offset += sz;
2875 return true;
2879 template <typename A>
2880 const char* Parser<A>::getDwarfString(uint64_t form, const uint8_t* p)
2882 if ( form == DW_FORM_string )
2883 return (const char*)p;
2884 else if ( form == DW_FORM_strp ) {
2885 uint32_t offset = E::get32(*((uint32_t*)p));
2886 const char* dwarfStrings = (char*)_file->fileContent() + _file->_dwarfDebugStringSect->offset();
2887 if ( offset > _file->_dwarfDebugStringSect->size() ) {
2888 warning("unknown dwarf DW_FORM_strp (offset=0x%08X) is too big in %s\n", offset, this->_path);
2889 return NULL;
2891 return &dwarfStrings[offset];
2893 warning("unknown dwarf string encoding (form=%lld) in %s\n", form, this->_path);
2894 return NULL;
2898 template <typename A>
2899 struct AtomAndLineInfo {
2900 Atom<A>* atom;
2901 ld::Atom::LineInfo info;
2905 // <rdar://problem/5591394> Add support to ld64 for N_FUN stabs when used for symbolic constants
2906 // Returns whether a stabStr belonging to an N_FUN stab represents a
2907 // symbolic constant rather than a function
2908 template <typename A>
2909 bool Parser<A>::isConstFunStabs(const char *stabStr)
2911 const char* colon;
2912 // N_FUN can be used for both constants and for functions. In case it's a constant,
2913 // the format of the stabs string is "symname:c=<value>;"
2914 // ':' cannot appear in the symbol name, except if it's an Objective-C method
2915 // (in which case the symbol name starts with + or -, and then it's definitely
2916 // not a constant)
2917 return (stabStr != NULL) && (stabStr[0] != '+') && (stabStr[0] != '-')
2918 && ((colon = strchr(stabStr, ':')) != NULL)
2919 && (colon[1] == 'c') && (colon[2] == '=');
2923 template <typename A>
2924 void Parser<A>::parseDebugInfo()
2926 // check for dwarf __debug_info section
2927 if ( _file->_dwarfDebugInfoSect == NULL ) {
2928 // if no DWARF debug info, look for stabs
2929 this->parseStabs();
2930 return;
2932 if ( _file->_dwarfDebugInfoSect->size() == 0 )
2933 return;
2935 uint64_t stmtList;
2936 if ( !read_comp_unit(&_file->_dwarfTranslationUnitFile, &_file->_dwarfTranslationUnitDir, &stmtList) ) {
2937 // if can't parse dwarf, warn and give up
2938 _file->_dwarfTranslationUnitFile = NULL;
2939 _file->_dwarfTranslationUnitDir = NULL;
2940 warning("can't parse dwarf compilation unit info in %s", _path);
2941 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoNone;
2942 return;
2945 // add line number info to atoms from dwarf
2946 std::vector<AtomAndLineInfo<A> > entries;
2947 entries.reserve(64);
2948 if ( _file->_debugInfoKind == ld::relocatable::File::kDebugInfoDwarf ) {
2949 // file with just data will have no __debug_line info
2950 if ( (_file->_dwarfDebugLineSect != NULL) && (_file->_dwarfDebugLineSect->size() != 0) ) {
2951 // validate stmt_list
2952 if ( (stmtList != (uint64_t)-1) && (stmtList < _file->_dwarfDebugLineSect->size()) ) {
2953 const uint8_t* debug_line = (uint8_t*)_file->fileContent() + _file->_dwarfDebugLineSect->offset();
2954 struct line_reader_data* lines = line_open(&debug_line[stmtList],
2955 _file->_dwarfDebugLineSect->size() - stmtList, E::little_endian);
2956 struct line_info result;
2957 Atom<A>* curAtom = NULL;
2958 uint32_t curAtomOffset = 0;
2959 uint32_t curAtomAddress = 0;
2960 uint32_t curAtomSize = 0;
2961 std::map<uint32_t,const char*> dwarfIndexToFile;
2962 if ( lines != NULL ) {
2963 while ( line_next(lines, &result, line_stop_pc) ) {
2964 //fprintf(stderr, "curAtom=%p, result.pc=0x%llX, result.line=%llu, result.end_of_sequence=%d,"
2965 // " curAtomAddress=0x%X, curAtomSize=0x%X\n",
2966 // curAtom, result.pc, result.line, result.end_of_sequence, curAtomAddress, curAtomSize);
2967 // work around weird debug line table compiler generates if no functions in __text section
2968 if ( (curAtom == NULL) && (result.pc == 0) && result.end_of_sequence && (result.file == 1))
2969 continue;
2970 // for performance, see if in next pc is in current atom
2971 if ( (curAtom != NULL) && (curAtomAddress <= result.pc) && (result.pc < (curAtomAddress+curAtomSize)) ) {
2972 curAtomOffset = result.pc - curAtomAddress;
2974 // or pc at end of current atom
2975 else if ( result.end_of_sequence && (curAtom != NULL) && (result.pc == (curAtomAddress+curAtomSize)) ) {
2976 curAtomOffset = result.pc - curAtomAddress;
2978 // or only one function that is a one line function
2979 else if ( result.end_of_sequence && (curAtom == NULL) && (this->findAtomByAddress(0) != NULL) && (result.pc == this->findAtomByAddress(0)->size()) ) {
2980 curAtom = this->findAtomByAddress(0);
2981 curAtomOffset = result.pc - curAtom->objectAddress();
2982 curAtomAddress = curAtom->objectAddress();
2983 curAtomSize = curAtom->size();
2985 else {
2986 // do slow look up of atom by address
2987 try {
2988 curAtom = this->findAtomByAddress(result.pc);
2990 catch (...) {
2991 // in case of bug in debug info, don't abort link, just limp on
2992 curAtom = NULL;
2994 if ( curAtom == NULL )
2995 break; // file has line info but no functions
2996 if ( result.end_of_sequence && (curAtomAddress+curAtomSize < result.pc) ) {
2997 // a one line function can be returned by line_next() as one entry with pc at end of blob
2998 // look for alt atom starting at end of previous atom
2999 uint32_t previousEnd = curAtomAddress+curAtomSize;
3000 Atom<A>* alt = this->findAtomByAddressOrNullIfStub(previousEnd);
3001 if ( alt == NULL )
3002 continue; // ignore spurious debug info for stubs
3003 if ( result.pc <= alt->objectAddress() + alt->size() ) {
3004 curAtom = alt;
3005 curAtomOffset = result.pc - alt->objectAddress();
3006 curAtomAddress = alt->objectAddress();
3007 curAtomSize = alt->size();
3009 else {
3010 curAtomOffset = result.pc - curAtom->objectAddress();
3011 curAtomAddress = curAtom->objectAddress();
3012 curAtomSize = curAtom->size();
3015 else {
3016 curAtomOffset = result.pc - curAtom->objectAddress();
3017 curAtomAddress = curAtom->objectAddress();
3018 curAtomSize = curAtom->size();
3021 const char* filename;
3022 std::map<uint32_t,const char*>::iterator pos = dwarfIndexToFile.find(result.file);
3023 if ( pos == dwarfIndexToFile.end() ) {
3024 filename = line_file(lines, result.file);
3025 dwarfIndexToFile[result.file] = filename;
3027 else {
3028 filename = pos->second;
3030 // only record for ~8000 line info records per function
3031 if ( curAtom->roomForMoreLineInfoCount() ) {
3032 AtomAndLineInfo<A> entry;
3033 entry.atom = curAtom;
3034 entry.info.atomOffset = curAtomOffset;
3035 entry.info.fileName = filename;
3036 entry.info.lineNumber = result.line;
3037 //fprintf(stderr, "addr=0x%08llX, line=%lld, file=%s, atom=%s, atom.size=0x%X, end=%d\n",
3038 // result.pc, result.line, filename, curAtom->name(), curAtomSize, result.end_of_sequence);
3039 entries.push_back(entry);
3040 curAtom->incrementLineInfoCount();
3042 if ( result.end_of_sequence ) {
3043 curAtom = NULL;
3046 line_free(lines);
3052 // assign line info start offset for each atom
3053 uint8_t* p = _file->_atomsArray;
3054 uint32_t liOffset = 0;
3055 for(int i=_file->_atomsArrayCount; i > 0; --i) {
3056 Atom<A>* atom = (Atom<A>*)p;
3057 atom->_lineInfoStartIndex = liOffset;
3058 liOffset += atom->_lineInfoCount;
3059 atom->_lineInfoCount = 0;
3060 p += sizeof(Atom<A>);
3062 assert(liOffset == entries.size());
3063 _file->_lineInfos.reserve(liOffset);
3065 // copy each line info for each atom
3066 for (typename std::vector<AtomAndLineInfo<A> >::iterator it = entries.begin(); it != entries.end(); ++it) {
3067 uint32_t slot = it->atom->_lineInfoStartIndex + it->atom->_lineInfoCount;
3068 _file->_lineInfos[slot] = it->info;
3069 it->atom->_lineInfoCount++;
3072 // done with temp vector
3073 entries.clear();
3076 template <typename A>
3077 void Parser<A>::parseStabs()
3079 // scan symbol table for stabs entries
3080 Atom<A>* currentAtom = NULL;
3081 pint_t currentAtomAddress = 0;
3082 enum { start, inBeginEnd, inFun } state = start;
3083 for (uint32_t symbolIndex = 0; symbolIndex < _symbolCount; ++symbolIndex ) {
3084 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
3085 bool useStab = true;
3086 uint8_t type = sym.n_type();
3087 const char* symString = (sym.n_strx() != 0) ? this->nameFromSymbol(sym) : NULL;
3088 if ( (type & N_STAB) != 0 ) {
3089 _file->_debugInfoKind = (_hasUUID ? ld::relocatable::File::kDebugInfoStabsUUID : ld::relocatable::File::kDebugInfoStabs);
3090 ld::relocatable::File::Stab stab;
3091 stab.atom = NULL;
3092 stab.type = type;
3093 stab.other = sym.n_sect();
3094 stab.desc = sym.n_desc();
3095 stab.value = sym.n_value();
3096 stab.string = NULL;
3097 switch (state) {
3098 case start:
3099 switch (type) {
3100 case N_BNSYM:
3101 // beginning of function block
3102 state = inBeginEnd;
3103 // fall into case to lookup atom by addresss
3104 case N_LCSYM:
3105 case N_STSYM:
3106 currentAtomAddress = sym.n_value();
3107 currentAtom = this->findAtomByAddress(currentAtomAddress);
3108 if ( currentAtom != NULL ) {
3109 stab.atom = currentAtom;
3110 stab.string = symString;
3112 else {
3113 fprintf(stderr, "can't find atom for stabs BNSYM at %08llX in %s",
3114 (uint64_t)sym.n_value(), _path);
3116 break;
3117 case N_SO:
3118 case N_OSO:
3119 case N_OPT:
3120 case N_LSYM:
3121 case N_RSYM:
3122 case N_PSYM:
3123 // not associated with an atom, just copy
3124 stab.string = symString;
3125 break;
3126 case N_GSYM:
3128 // n_value field is NOT atom address ;-(
3129 // need to find atom by name match
3130 const char* colon = strchr(symString, ':');
3131 if ( colon != NULL ) {
3132 // build underscore leading name
3133 int nameLen = colon - symString;
3134 char symName[nameLen+2];
3135 strlcpy(&symName[1], symString, nameLen+1);
3136 symName[0] = '_';
3137 symName[nameLen+1] = '\0';
3138 currentAtom = this->findAtomByName(symName);
3139 if ( currentAtom != NULL ) {
3140 stab.atom = currentAtom;
3141 stab.string = symString;
3144 else {
3145 // might be a debug-note without trailing :G()
3146 currentAtom = this->findAtomByName(symString);
3147 if ( currentAtom != NULL ) {
3148 stab.atom = currentAtom;
3149 stab.string = symString;
3152 if ( stab.atom == NULL ) {
3153 // ld_classic added bogus GSYM stabs for old style dtrace probes
3154 if ( (strncmp(symString, "__dtrace_probe$", 15) != 0) )
3155 warning("can't find atom for N_GSYM stabs %s in %s", symString, _path);
3156 useStab = false;
3158 break;
3160 case N_FUN:
3161 if ( isConstFunStabs(symString) ) {
3162 // constant not associated with a function
3163 stab.string = symString;
3165 else {
3166 // old style stabs without BNSYM
3167 state = inFun;
3168 currentAtomAddress = sym.n_value();
3169 currentAtom = this->findAtomByAddress(currentAtomAddress);
3170 if ( currentAtom != NULL ) {
3171 stab.atom = currentAtom;
3172 stab.string = symString;
3174 else {
3175 warning("can't find atom for stabs FUN at %08llX in %s",
3176 (uint64_t)currentAtomAddress, _path);
3179 break;
3180 case N_SOL:
3181 case N_SLINE:
3182 stab.string = symString;
3183 // old stabs
3184 break;
3185 case N_BINCL:
3186 case N_EINCL:
3187 case N_EXCL:
3188 stab.string = symString;
3189 // -gfull built .o file
3190 break;
3191 default:
3192 warning("unknown stabs type 0x%X in %s", type, _path);
3194 break;
3195 case inBeginEnd:
3196 stab.atom = currentAtom;
3197 switch (type) {
3198 case N_ENSYM:
3199 state = start;
3200 currentAtom = NULL;
3201 break;
3202 case N_LCSYM:
3203 case N_STSYM:
3205 Atom<A>* nestedAtom = this->findAtomByAddress(sym.n_value());
3206 if ( nestedAtom != NULL ) {
3207 stab.atom = nestedAtom;
3208 stab.string = symString;
3210 else {
3211 warning("can't find atom for stabs 0x%X at %08llX in %s",
3212 type, (uint64_t)sym.n_value(), _path);
3214 break;
3216 case N_LBRAC:
3217 case N_RBRAC:
3218 case N_SLINE:
3219 // adjust value to be offset in atom
3220 stab.value -= currentAtomAddress;
3221 default:
3222 stab.string = symString;
3223 break;
3225 break;
3226 case inFun:
3227 switch (type) {
3228 case N_FUN:
3229 if ( isConstFunStabs(symString) ) {
3230 stab.atom = currentAtom;
3231 stab.string = symString;
3233 else {
3234 if ( sym.n_sect() != 0 ) {
3235 // found another start stab, must be really old stabs...
3236 currentAtomAddress = sym.n_value();
3237 currentAtom = this->findAtomByAddress(currentAtomAddress);
3238 if ( currentAtom != NULL ) {
3239 stab.atom = currentAtom;
3240 stab.string = symString;
3242 else {
3243 warning("can't find atom for stabs FUN at %08llX in %s",
3244 (uint64_t)currentAtomAddress, _path);
3247 else {
3248 // found ending stab, switch back to start state
3249 stab.string = symString;
3250 stab.atom = currentAtom;
3251 state = start;
3252 currentAtom = NULL;
3255 break;
3256 case N_LBRAC:
3257 case N_RBRAC:
3258 case N_SLINE:
3259 // adjust value to be offset in atom
3260 stab.value -= currentAtomAddress;
3261 stab.atom = currentAtom;
3262 break;
3263 case N_SO:
3264 stab.string = symString;
3265 state = start;
3266 break;
3267 default:
3268 stab.atom = currentAtom;
3269 stab.string = symString;
3270 break;
3272 break;
3274 // add to list of stabs for this .o file
3275 if ( useStab )
3276 _file->_stabs.push_back(stab);
3283 // Look at the compilation unit DIE and determine
3284 // its NAME, compilation directory (in COMP_DIR) and its
3285 // line number information offset (in STMT_LIST). NAME and COMP_DIR
3286 // may be NULL (especially COMP_DIR) if they are not in the .o file;
3287 // STMT_LIST will be (uint64_t) -1.
3289 // At present this assumes that there's only one compilation unit DIE.
3291 template <typename A>
3292 bool Parser<A>::read_comp_unit(const char ** name, const char ** comp_dir,
3293 uint64_t *stmt_list)
3295 const uint8_t * debug_info;
3296 const uint8_t * debug_abbrev;
3297 const uint8_t * di;
3298 const uint8_t * da;
3299 const uint8_t * end;
3300 const uint8_t * enda;
3301 uint64_t sz;
3302 uint16_t vers;
3303 uint64_t abbrev_base;
3304 uint64_t abbrev;
3305 uint8_t address_size;
3306 bool dwarf64;
3308 *name = NULL;
3309 *comp_dir = NULL;
3310 *stmt_list = (uint64_t) -1;
3312 if ( (_file->_dwarfDebugInfoSect == NULL) || (_file->_dwarfDebugAbbrevSect == NULL) )
3313 return false;
3315 debug_info = (uint8_t*)_file->fileContent() + _file->_dwarfDebugInfoSect->offset();
3316 debug_abbrev = (uint8_t*)_file->fileContent() + _file->_dwarfDebugAbbrevSect->offset();
3317 di = debug_info;
3319 if (_file->_dwarfDebugInfoSect->size() < 12)
3320 /* Too small to be a real debug_info section. */
3321 return false;
3322 sz = A::P::E::get32(*(uint32_t*)di);
3323 di += 4;
3324 dwarf64 = sz == 0xffffffff;
3325 if (dwarf64)
3326 sz = A::P::E::get64(*(uint64_t*)di), di += 8;
3327 else if (sz > 0xffffff00)
3328 /* Unknown dwarf format. */
3329 return false;
3331 /* Verify claimed size. */
3332 if (sz + (di - debug_info) > _file->_dwarfDebugInfoSect->size() || sz <= (dwarf64 ? 23 : 11))
3333 return false;
3335 vers = A::P::E::get16(*(uint16_t*)di);
3336 if (vers < 2 || vers > 3)
3337 /* DWARF version wrong for this code.
3338 Chances are we could continue anyway, but we don't know for sure. */
3339 return false;
3340 di += 2;
3342 /* Find the debug_abbrev section. */
3343 abbrev_base = dwarf64 ? A::P::E::get64(*(uint64_t*)di) : A::P::E::get32(*(uint32_t*)di);
3344 di += dwarf64 ? 8 : 4;
3346 if (abbrev_base > _file->_dwarfDebugAbbrevSect->size())
3347 return false;
3348 da = debug_abbrev + abbrev_base;
3349 enda = debug_abbrev + _file->_dwarfDebugAbbrevSect->size();
3351 address_size = *di++;
3353 /* Find the abbrev number we're looking for. */
3354 end = di + sz;
3355 abbrev = read_uleb128 (&di, end);
3356 if (abbrev == (uint64_t) -1)
3357 return false;
3359 /* Skip through the debug_abbrev section looking for that abbrev. */
3360 for (;;)
3362 uint64_t this_abbrev = read_uleb128 (&da, enda);
3363 uint64_t attr;
3365 if (this_abbrev == abbrev)
3366 /* This is almost always taken. */
3367 break;
3368 skip_leb128 (&da, enda); /* Skip the tag. */
3369 if (da == enda)
3370 return false;
3371 da++; /* Skip the DW_CHILDREN_* value. */
3373 do {
3374 attr = read_uleb128 (&da, enda);
3375 skip_leb128 (&da, enda);
3376 } while (attr != 0 && attr != (uint64_t) -1);
3377 if (attr != 0)
3378 return false;
3381 /* Check that the abbrev is one for a DW_TAG_compile_unit. */
3382 if (read_uleb128 (&da, enda) != DW_TAG_compile_unit)
3383 return false;
3384 if (da == enda)
3385 return false;
3386 da++; /* Skip the DW_CHILDREN_* value. */
3388 /* Now, go through the DIE looking for DW_AT_name,
3389 DW_AT_comp_dir, and DW_AT_stmt_list. */
3390 for (;;)
3392 uint64_t attr = read_uleb128 (&da, enda);
3393 uint64_t form = read_uleb128 (&da, enda);
3395 if (attr == (uint64_t) -1)
3396 return false;
3397 else if (attr == 0)
3398 return true;
3400 if (form == DW_FORM_indirect)
3401 form = read_uleb128 (&di, end);
3403 if (attr == DW_AT_name)
3404 *name = getDwarfString(form, di);
3405 else if (attr == DW_AT_comp_dir)
3406 *comp_dir = getDwarfString(form, di);
3407 else if (attr == DW_AT_stmt_list && form == DW_FORM_data4)
3408 *stmt_list = A::P::E::get32(*(uint32_t*)di);
3409 else if (attr == DW_AT_stmt_list && form == DW_FORM_data8)
3410 *stmt_list = A::P::E::get64(*(uint64_t*)di);
3411 if (! skip_form (&di, end, form, address_size, dwarf64))
3412 return false;
3418 template <typename A>
3419 File<A>::~File()
3421 free(_sectionsArray);
3422 free(_atomsArray);
3425 template <typename A>
3426 bool File<A>::translationUnitSource(const char** dir, const char** name) const
3428 if ( _debugInfoKind == ld::relocatable::File::kDebugInfoDwarf ) {
3429 *dir = _dwarfTranslationUnitDir;
3430 *name = _dwarfTranslationUnitFile;
3431 return (_dwarfTranslationUnitFile != NULL);
3433 return false;
3438 template <typename A>
3439 bool File<A>::forEachAtom(ld::File::AtomHandler& handler) const
3441 handler.doFile(*this);
3442 uint8_t* p = _atomsArray;
3443 for(int i=_atomsArrayCount; i > 0; --i) {
3444 handler.doAtom(*((Atom<A>*)p));
3445 p += sizeof(Atom<A>);
3447 return (_atomsArrayCount != 0);
3450 template <typename A>
3451 const char* Section<A>::makeSegmentName(const macho_section<typename A::P>* sect)
3453 // mach-o section record only has room for 16-byte seg/sect names
3454 // so a 16-byte name has no trailing zero
3455 const char* name = sect->segname();
3456 if ( strlen(name) < 16 )
3457 return name;
3458 char* tmp = new char[17];
3459 strlcpy(tmp, name, 17);
3460 return tmp;
3463 template <typename A>
3464 const char* Section<A>::makeSectionName(const macho_section<typename A::P>* sect)
3466 const char* name = sect->sectname();
3467 if ( strlen(name) < 16 )
3468 return name;
3470 // special case common long section names so we don't have to malloc
3471 if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
3472 return "__objc_classrefs";
3473 if ( strncmp(sect->sectname(), "__objc_classlist", 16) == 0 )
3474 return "__objc_classlist";
3475 if ( strncmp(sect->sectname(), "__objc_nlclslist", 16) == 0 )
3476 return "__objc_nlclslist";
3477 if ( strncmp(sect->sectname(), "__objc_nlcatlist", 16) == 0 )
3478 return "__objc_nlcatlist";
3479 if ( strncmp(sect->sectname(), "__objc_protolist", 16) == 0 )
3480 return "__objc_protolist";
3481 if ( strncmp(sect->sectname(), "__objc_protorefs", 16) == 0 )
3482 return "__objc_protorefs";
3483 if ( strncmp(sect->sectname(), "__objc_superrefs", 16) == 0 )
3484 return "__objc_superrefs";
3485 if ( strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0 )
3486 return "__objc_imageinfo";
3487 if ( strncmp(sect->sectname(), "__objc_stringobj", 16) == 0 )
3488 return "__objc_stringobj";
3489 if ( strncmp(sect->sectname(), "__gcc_except_tab", 16) == 0 )
3490 return "__gcc_except_tab";
3492 char* tmp = new char[17];
3493 strlcpy(tmp, name, 17);
3494 return tmp;
3497 template <typename A>
3498 bool Section<A>::readable(const macho_section<typename A::P>* sect)
3500 return true;
3503 template <typename A>
3504 bool Section<A>::writable(const macho_section<typename A::P>* sect)
3506 // mach-o .o files do not contain segment permissions
3507 // we just know TEXT is special
3508 return ( strcmp(sect->segname(), "__TEXT") != 0 );
3511 template <typename A>
3512 bool Section<A>::exectuable(const macho_section<typename A::P>* sect)
3514 // mach-o .o files do not contain segment permissions
3515 // we just know TEXT is special
3516 return ( strcmp(sect->segname(), "__TEXT") == 0 );
3520 template <typename A>
3521 ld::Section::Type Section<A>::sectionType(const macho_section<typename A::P>* sect)
3523 switch ( sect->flags() & SECTION_TYPE ) {
3524 case S_ZEROFILL:
3525 return ld::Section::typeZeroFill;
3526 case S_CSTRING_LITERALS:
3527 if ( (strcmp(sect->sectname(), "__cstring") == 0) && (strcmp(sect->segname(), "__TEXT") == 0) )
3528 return ld::Section::typeCString;
3529 else
3530 return ld::Section::typeNonStdCString;
3531 case S_4BYTE_LITERALS:
3532 return ld::Section::typeLiteral4;
3533 case S_8BYTE_LITERALS:
3534 return ld::Section::typeLiteral8;
3535 case S_LITERAL_POINTERS:
3536 return ld::Section::typeCStringPointer;
3537 case S_NON_LAZY_SYMBOL_POINTERS:
3538 return ld::Section::typeNonLazyPointer;
3539 case S_LAZY_SYMBOL_POINTERS:
3540 return ld::Section::typeLazyPointer;
3541 case S_SYMBOL_STUBS:
3542 return ld::Section::typeStub;
3543 case S_MOD_INIT_FUNC_POINTERS:
3544 return ld::Section::typeInitializerPointers;
3545 case S_MOD_TERM_FUNC_POINTERS:
3546 return ld::Section::typeTerminatorPointers;
3547 case S_INTERPOSING:
3548 return ld::Section::typeUnclassified;
3549 case S_16BYTE_LITERALS:
3550 return ld::Section::typeLiteral16;
3551 case S_REGULAR:
3552 case S_COALESCED:
3553 if ( sect->flags() & S_ATTR_PURE_INSTRUCTIONS ) {
3554 return ld::Section::typeCode;
3556 else if ( strcmp(sect->segname(), "__TEXT") == 0 ) {
3557 if ( strcmp(sect->sectname(), "__eh_frame") == 0 )
3558 return ld::Section::typeCFI;
3559 else if ( strcmp(sect->sectname(), "__ustring") == 0 )
3560 return ld::Section::typeUTF16Strings;
3561 else if ( strcmp(sect->sectname(), "__textcoal_nt") == 0 )
3562 return ld::Section::typeCode;
3563 else if ( strcmp(sect->sectname(), "__StaticInit") == 0 )
3564 return ld::Section::typeCode;
3565 else if ( strcmp(sect->sectname(), "__constructor") == 0 )
3566 return ld::Section::typeInitializerPointers;
3568 else if ( strcmp(sect->segname(), "__DATA") == 0 ) {
3569 if ( strcmp(sect->sectname(), "__cfstring") == 0 )
3570 return ld::Section::typeCFString;
3571 else if ( strcmp(sect->sectname(), "__dyld") == 0 )
3572 return ld::Section::typeDyldInfo;
3573 else if ( strcmp(sect->sectname(), "__program_vars") == 0 )
3574 return ld::Section::typeDyldInfo;
3575 else if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
3576 return ld::Section::typeObjCClassRefs;
3577 else if ( strcmp(sect->sectname(), "__objc_catlist") == 0 )
3578 return ld::Section::typeObjC2CategoryList;
3580 else if ( strcmp(sect->segname(), "__OBJC") == 0 ) {
3581 if ( strcmp(sect->sectname(), "__class") == 0 )
3582 return ld::Section::typeObjC1Classes;
3584 break;
3585 case S_THREAD_LOCAL_REGULAR:
3586 return ld::Section::typeTLVInitialValues;
3587 case S_THREAD_LOCAL_ZEROFILL:
3588 return ld::Section::typeTLVZeroFill;
3589 case S_THREAD_LOCAL_VARIABLES:
3590 return ld::Section::typeTLVDefs;
3591 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
3592 return ld::Section::typeTLVInitializerPointers;
3594 return ld::Section::typeUnclassified;
3598 template <typename A>
3599 Atom<A>* Section<A>::findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end)
3601 // do a binary search of atom array
3602 uint32_t atomCount = end - start;
3603 Atom<A>* base = start;
3604 for (uint32_t n = atomCount; n > 0; n /= 2) {
3605 Atom<A>* pivot = &base[n/2];
3606 pint_t atomStartAddr = pivot->_objAddress;
3607 pint_t atomEndAddr = atomStartAddr + pivot->_size;
3608 if ( atomStartAddr <= addr ) {
3609 // address in normal atom
3610 if (addr < atomEndAddr)
3611 return pivot;
3612 // address in "end" label (but not in alias)
3613 if ( (pivot->_size == 0) && (addr == atomEndAddr) && !pivot->isAlias() )
3614 return pivot;
3616 if ( addr >= atomEndAddr ) {
3617 // key > pivot
3618 // move base to atom after pivot
3619 base = &pivot[1];
3620 --n;
3622 else {
3623 // key < pivot
3624 // keep same base
3627 return NULL;
3630 template <typename A>
3631 ld::Atom::Alignment Section<A>::alignmentForAddress(pint_t addr)
3633 const uint32_t sectionAlignment = this->_machOSection->align();
3634 return ld::Atom::Alignment(sectionAlignment, (addr % (1 << sectionAlignment)));
3637 template <typename A>
3638 uint32_t Section<A>::sectionNum(class Parser<A>& parser) const
3640 if ( _machOSection == NULL )
3641 return 0;
3642 else
3643 return 1 + (this->_machOSection - parser.firstMachOSection());
3646 // arm does not have zero cost exceptions
3647 template <> uint32_t CFISection<arm>::cfiCount() { return 0; }
3649 template <typename A>
3650 uint32_t CFISection<A>::cfiCount()
3652 // create ObjectAddressSpace object for use by libunwind
3653 OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
3654 return libunwind::CFI_Parser<OAS>::getCFICount(oas,
3655 this->_machOSection->addr(), this->_machOSection->size());
3658 template <typename A>
3659 void CFISection<A>::warnFunc(void* ref, uint64_t funcAddr, const char* msg)
3661 Parser<A>* parser = (Parser<A>*)ref;
3662 if ( ! parser->convertUnwindInfo() )
3663 return;
3664 if ( funcAddr != CFI_INVALID_ADDRESS ) {
3665 // atoms are not constructed yet, so scan symbol table for labels
3666 const char* name = parser->scanSymbolTableForAddress(funcAddr);
3667 warning("could not create compact unwind for %s: %s", name, msg);
3669 else {
3670 warning("could not create compact unwind: %s", msg);
3674 template <>
3675 bool CFISection<x86_64>::needsRelocating()
3677 return true;
3680 template <typename A>
3681 bool CFISection<A>::needsRelocating()
3683 return false;
3686 template <>
3687 void CFISection<x86_64>::cfiParse(class Parser<x86_64>& parser, uint8_t* buffer,
3688 libunwind::CFI_Atom_Info<CFISection<x86_64>::OAS>::CFI_Atom_Info cfiArray[],
3689 uint32_t count)
3691 // copy __eh_frame data to buffer
3692 memcpy(buffer, file().fileContent() + this->_machOSection->offset(), this->_machOSection->size());
3694 // and apply relocations
3695 const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + this->_machOSection->reloff());
3696 const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
3697 for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
3698 uint64_t value = 0;
3699 switch ( reloc->r_type() ) {
3700 case X86_64_RELOC_SUBTRACTOR:
3701 value = 0 - parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3702 ++reloc;
3703 if ( reloc->r_extern() )
3704 value += parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3705 break;
3706 case X86_64_RELOC_UNSIGNED:
3707 value = parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3708 break;
3709 case X86_64_RELOC_GOT:
3710 // this is used for the reference to the personality function in CIEs
3711 // store the symbol number of the personality function for later use as a Fixup
3712 value = reloc->r_symbolnum();
3713 break;
3714 default:
3715 fprintf(stderr, "CFISection::cfiParse() unexpected relocation type at r_address=0x%08X\n", reloc->r_address());
3716 break;
3718 uint64_t* p64;
3719 uint32_t* p32;
3720 switch ( reloc->r_length() ) {
3721 case 3:
3722 p64 = (uint64_t*)&buffer[reloc->r_address()];
3723 E::set64(*p64, value + E::get64(*p64));
3724 break;
3725 case 2:
3726 p32 = (uint32_t*)&buffer[reloc->r_address()];
3727 E::set32(*p32, value + E::get32(*p32));
3728 break;
3729 default:
3730 fprintf(stderr, "CFISection::cfiParse() unexpected relocation size at r_address=0x%08X\n", reloc->r_address());
3731 break;
3736 // create ObjectAddressSpace object for use by libunwind
3737 OAS oas(*this, buffer);
3739 // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
3740 const char* msg;
3741 msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86_64>::parseCFIs(
3742 oas, this->_machOSection->addr(), this->_machOSection->size(),
3743 cfiArray, count, (void*)&parser, warnFunc);
3744 if ( msg != NULL )
3745 throwf("malformed __eh_frame section: %s", msg);
3748 template <>
3749 void CFISection<x86>::cfiParse(class Parser<x86>& parser, uint8_t* buffer,
3750 libunwind::CFI_Atom_Info<CFISection<x86>::OAS>::CFI_Atom_Info cfiArray[],
3751 uint32_t count)
3753 // create ObjectAddressSpace object for use by libunwind
3754 OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
3756 // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
3757 const char* msg;
3758 msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86>::parseCFIs(
3759 oas, this->_machOSection->addr(), this->_machOSection->size(),
3760 cfiArray, count, (void*)&parser, warnFunc);
3761 if ( msg != NULL )
3762 throwf("malformed __eh_frame section: %s", msg);
3768 template <>
3769 void CFISection<arm>::cfiParse(class Parser<arm>& parser, uint8_t* buffer,
3770 libunwind::CFI_Atom_Info<CFISection<arm>::OAS>::CFI_Atom_Info cfiArray[],
3771 uint32_t count)
3773 // arm does not use zero cost exceptions
3774 assert(count == 0);
3779 template <typename A>
3780 uint32_t CFISection<A>::computeAtomCount(class Parser<A>& parser,
3781 struct Parser<A>::LabelAndCFIBreakIterator& it,
3782 const struct Parser<A>::CFI_CU_InfoArrays& cfis)
3784 return cfis.cfiCount;
3789 template <typename A>
3790 uint32_t CFISection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
3791 struct Parser<A>::LabelAndCFIBreakIterator& it,
3792 const struct Parser<A>::CFI_CU_InfoArrays& cfis)
3794 this->_beginAtoms = (Atom<A>*)p;
3795 // walk CFI_Atom_Info array and create atom for each entry
3796 const CFI_Atom_Info* start = &cfis.cfiArray[0];
3797 const CFI_Atom_Info* end = &cfis.cfiArray[cfis.cfiCount];
3798 for(const CFI_Atom_Info* a=start; a < end; ++a) {
3799 Atom<A>* space = (Atom<A>*)p;
3800 new (space) Atom<A>(*this, (a->isCIE ? "CIE" : "FDE"), a->address, a->size,
3801 ld::Atom::definitionRegular, ld::Atom::combineNever, ld::Atom::scopeTranslationUnit,
3802 ld::Atom::typeCFI, ld::Atom::symbolTableNotInFinalLinkedImages,
3803 false, false, false, ld::Atom::Alignment(0));
3804 p += sizeof(Atom<A>);
3806 this->_endAtoms = (Atom<A>*)p;
3807 return cfis.cfiCount;
3811 template <> bool CFISection<x86_64>::bigEndian() { return false; }
3812 template <> bool CFISection<x86>::bigEndian() { return false; }
3813 template <> bool CFISection<arm>::bigEndian() { return false; }
3816 template <>
3817 void CFISection<x86_64>::addCiePersonalityFixups(class Parser<x86_64>& parser, const CFI_Atom_Info* cieInfo)
3819 uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
3820 if ( personalityEncoding == 0x9B ) {
3821 // compiler always produces X86_64_RELOC_GOT with addend of 4 to personality function
3822 // CFISection<x86_64>::cfiParse() set targetAddress to be symbolIndex + 4 + addressInCIE
3823 uint32_t symbolIndex = cieInfo->u.cieInfo.personality.targetAddress - 4
3824 - cieInfo->address - cieInfo->u.cieInfo.personality.offsetInCFI;
3825 const macho_nlist<P>& sym = parser.symbolFromIndex(symbolIndex);
3826 const char* personalityName = parser.nameFromSymbol(sym);
3828 Atom<x86_64>* cieAtom = this->findAtomByAddress(cieInfo->address);
3829 Parser<x86_64>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
3830 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, personalityName);
3831 parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, 4);
3832 parser.addFixup(src, ld::Fixup::k3of3, ld::Fixup::kindStoreX86PCRel32GOT);
3834 else if ( personalityEncoding != 0 ) {
3835 throwf("unsupported address encoding (%02X) of personality function in CIE",
3836 personalityEncoding);
3840 template <>
3841 void CFISection<x86>::addCiePersonalityFixups(class Parser<x86>& parser, const CFI_Atom_Info* cieInfo)
3843 uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
3844 if ( (personalityEncoding == 0x9B) || (personalityEncoding == 0x90) ) {
3845 uint32_t offsetInCFI = cieInfo->u.cieInfo.personality.offsetInCFI;
3846 uint32_t nlpAddr = cieInfo->u.cieInfo.personality.targetAddress;
3847 Atom<x86>* cieAtom = this->findAtomByAddress(cieInfo->address);
3848 Atom<x86>* nlpAtom = parser.findAtomByAddress(nlpAddr);
3849 assert(nlpAtom->contentType() == ld::Atom::typeNonLazyPointer);
3850 Parser<x86>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
3852 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, nlpAtom);
3853 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
3854 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, offsetInCFI);
3855 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
3857 else if ( personalityEncoding != 0 ) {
3858 throwf("unsupported address encoding (%02X) of personality function in CIE", personalityEncoding);
3863 template <typename A>
3864 void CFISection<A>::addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo)
3866 // FIX ME
3867 assert(0);
3870 template <typename A>
3871 void CFISection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cfis)
3873 ld::Fixup::Kind store32 = bigEndian() ? ld::Fixup::kindStoreBigEndian32 : ld::Fixup::kindStoreLittleEndian32;
3874 ld::Fixup::Kind store64 = bigEndian() ? ld::Fixup::kindStoreBigEndian64 : ld::Fixup::kindStoreLittleEndian64;
3876 // add all references for FDEs, including implicit group references
3877 const CFI_Atom_Info* end = &cfis.cfiArray[cfis.cfiCount];
3878 for(const CFI_Atom_Info* p = &cfis.cfiArray[0]; p < end; ++p) {
3879 if ( p->isCIE ) {
3880 // add reference to personality function if used
3881 if ( p->u.cieInfo.personality.targetAddress != CFI_INVALID_ADDRESS ) {
3882 this->addCiePersonalityFixups(parser, p);
3885 else {
3886 // find FDE Atom
3887 Atom<A>* fdeAtom = this->findAtomByAddress(p->address);
3888 // find function Atom
3889 Atom<A>* functionAtom = parser.findAtomByAddress(p->u.fdeInfo.function.targetAddress);
3890 // find CIE Atom
3891 Atom<A>* cieAtom = this->findAtomByAddress(p->u.fdeInfo.cie.targetAddress);
3892 // find LSDA Atom
3893 Atom<A>* lsdaAtom = NULL;
3894 if ( p->u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS ) {
3895 lsdaAtom = parser.findAtomByAddress(p->u.fdeInfo.lsda.targetAddress);
3897 // add reference from FDE to CIE (always 32-bit pc-rel)
3898 typename Parser<A>::SourceLocation fdeToCieSrc(fdeAtom, p->u.fdeInfo.cie.offsetInCFI);
3899 parser.addFixup(fdeToCieSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, fdeAtom);
3900 parser.addFixup(fdeToCieSrc, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, p->u.fdeInfo.cie.offsetInCFI);
3901 parser.addFixup(fdeToCieSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
3902 parser.addFixup(fdeToCieSrc, ld::Fixup::k4of4, store32, cieAtom);
3904 // add reference from FDE to function
3905 typename Parser<A>::SourceLocation fdeToFuncSrc(fdeAtom, p->u.fdeInfo.function.offsetInCFI);
3906 switch (p->u.fdeInfo.function.encodingOfTargetAddress) {
3907 case DW_EH_PE_pcrel|DW_EH_PE_ptr:
3908 if ( sizeof(typename A::P::uint_t) == 8 ) {
3909 parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
3910 parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3911 parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
3912 parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store64);
3913 break;
3915 // else fall into 32-bit case
3916 case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
3917 parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
3918 parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3919 parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
3920 parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store32);
3921 break;
3922 default:
3923 throw "unsupported encoding in FDE of pointer to function";
3926 // add reference from FDE to LSDA
3927 typename Parser<A>::SourceLocation fdeToLsdaSrc(fdeAtom, p->u.fdeInfo.lsda.offsetInCFI);
3928 if ( lsdaAtom != NULL ) {
3929 switch (p->u.fdeInfo.lsda.encodingOfTargetAddress) {
3930 case DW_EH_PE_pcrel|DW_EH_PE_ptr:
3931 if ( sizeof(typename A::P::uint_t) == 8 ) {
3932 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
3933 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3934 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
3935 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store64);
3936 break;
3938 // else fall into 32-bit case
3939 case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
3940 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
3941 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3942 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
3943 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store32);
3944 break;
3945 default:
3946 throw "unsupported encoding in FDE of pointer to LSDA";
3950 // FDE is in group lead by function atom
3951 typename Parser<A>::SourceLocation fdeSrc(functionAtom,0);
3952 parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateFDE, fdeAtom);
3954 // LSDA is in group lead by function atom
3955 if ( lsdaAtom != NULL ) {
3956 parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, lsdaAtom);
3965 template <typename A>
3966 const void* CFISection<A>::OAS::mappedAddress(pint_t addr)
3968 if ( (_ehFrameStartAddr <= addr) && (addr < _ehFrameEndAddr) )
3969 return &_ehFrameContent[addr-_ehFrameStartAddr];
3970 else {
3971 // requested bytes are not in __eh_frame section
3972 // this can occur when examining the instruction bytes in the __text
3973 File<A>& file = _ehFrameSection.file();
3974 for (uint32_t i=0; i < file._sectionsArrayCount; ++i ) {
3975 const macho_section<typename A::P>* sect = file._sectionsArray[i]->machoSection();
3976 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
3977 if ( sect != NULL ) {
3978 if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
3979 return file.fileContent() + sect->offset() + addr - sect->addr();
3983 throwf("__eh_frame parsing problem. Can't find target of reference to address 0x%08llX", (uint64_t)addr);
3988 template <typename A>
3989 uint64_t CFISection<A>::OAS::getULEB128(pint_t& logicalAddr, pint_t end)
3991 uintptr_t size = (end - logicalAddr);
3992 libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
3993 libunwind::LocalAddressSpace::pint_t sladdr = laddr;
3994 uint64_t result = libunwind::LocalAddressSpace::getULEB128(laddr, laddr+size);
3995 logicalAddr += (laddr-sladdr);
3996 return result;
3999 template <typename A>
4000 int64_t CFISection<A>::OAS::getSLEB128(pint_t& logicalAddr, pint_t end)
4002 uintptr_t size = (end - logicalAddr);
4003 libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
4004 libunwind::LocalAddressSpace::pint_t sladdr = laddr;
4005 int64_t result = libunwind::LocalAddressSpace::getSLEB128(laddr, laddr+size);
4006 logicalAddr += (laddr-sladdr);
4007 return result;
4010 template <typename A>
4011 typename A::P::uint_t CFISection<A>::OAS::getEncodedP(pint_t& addr, pint_t end, uint8_t encoding)
4013 pint_t startAddr = addr;
4014 pint_t p = addr;
4015 pint_t result;
4017 // first get value
4018 switch (encoding & 0x0F) {
4019 case DW_EH_PE_ptr:
4020 result = getP(addr);
4021 p += sizeof(pint_t);
4022 addr = (pint_t)p;
4023 break;
4024 case DW_EH_PE_uleb128:
4025 result = getULEB128(addr, end);
4026 break;
4027 case DW_EH_PE_udata2:
4028 result = get16(addr);
4029 p += 2;
4030 addr = (pint_t)p;
4031 break;
4032 case DW_EH_PE_udata4:
4033 result = get32(addr);
4034 p += 4;
4035 addr = (pint_t)p;
4036 break;
4037 case DW_EH_PE_udata8:
4038 result = get64(addr);
4039 p += 8;
4040 addr = (pint_t)p;
4041 break;
4042 case DW_EH_PE_sleb128:
4043 result = getSLEB128(addr, end);
4044 break;
4045 case DW_EH_PE_sdata2:
4046 result = (int16_t)get16(addr);
4047 p += 2;
4048 addr = (pint_t)p;
4049 break;
4050 case DW_EH_PE_sdata4:
4051 result = (int32_t)get32(addr);
4052 p += 4;
4053 addr = (pint_t)p;
4054 break;
4055 case DW_EH_PE_sdata8:
4056 result = get64(addr);
4057 p += 8;
4058 addr = (pint_t)p;
4059 break;
4060 default:
4061 throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4064 // then add relative offset
4065 switch ( encoding & 0x70 ) {
4066 case DW_EH_PE_absptr:
4067 // do nothing
4068 break;
4069 case DW_EH_PE_pcrel:
4070 result += startAddr;
4071 break;
4072 case DW_EH_PE_textrel:
4073 throw "DW_EH_PE_textrel pointer encoding not supported";
4074 break;
4075 case DW_EH_PE_datarel:
4076 throw "DW_EH_PE_datarel pointer encoding not supported";
4077 break;
4078 case DW_EH_PE_funcrel:
4079 throw "DW_EH_PE_funcrel pointer encoding not supported";
4080 break;
4081 case DW_EH_PE_aligned:
4082 throw "DW_EH_PE_aligned pointer encoding not supported";
4083 break;
4084 default:
4085 throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4086 break;
4089 // Note: DW_EH_PE_indirect is only used in CIEs to refernce the personality pointer
4090 // When parsing .o files that pointer contains zero, so we don't to return that.
4091 // Instead we skip the dereference and return the address of the pointer.
4092 // if ( encoding & DW_EH_PE_indirect )
4093 // result = getP(result);
4095 return result;
4098 template <>
4099 const char* CUSection<x86_64>::personalityName(class Parser<x86_64>& parser, const macho_relocation_info<x86_64::P>* reloc)
4101 assert(reloc->r_extern() && "reloc not extern on personality column in __compact_unwind section");
4102 assert((reloc->r_type() == X86_64_RELOC_UNSIGNED) && "wrong reloc type on personality column in __compact_unwind section");
4103 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4104 return parser.nameFromSymbol(sym);
4107 template <>
4108 const char* CUSection<x86>::personalityName(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
4110 assert(reloc->r_extern() && "reloc not extern on personality column in __compact_unwind section");
4111 assert((reloc->r_type() == GENERIC_RELOC_VANILLA) && "wrong reloc type on personality column in __compact_unwind section");
4112 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4113 return parser.nameFromSymbol(sym);
4116 template <typename A>
4117 const char* CUSection<A>::personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
4119 return NULL;
4123 template <typename A>
4124 int CUSection<A>::infoSorter(const void* l, const void* r)
4126 // sort references by symbol index, then address
4127 const Info* left = (Info*)l;
4128 const Info* right = (Info*)r;
4129 if ( left->functionSymbolIndex == right->functionSymbolIndex )
4130 return (left->functionStartAddress - right->functionStartAddress);
4131 else
4132 return (left->functionSymbolIndex - right->functionSymbolIndex);
4135 template <typename A>
4136 void CUSection<A>::parse(class Parser<A>& parser, uint32_t cnt, Info array[])
4138 // walk section content and copy to Info array
4139 const macho_compact_unwind_entry<P>* const entries = (macho_compact_unwind_entry<P>*)(this->file().fileContent() + this->_machOSection->offset());
4140 for (uint32_t i=0; i < cnt; ++i) {
4141 Info* info = &array[i];
4142 const macho_compact_unwind_entry<P>* entry = &entries[i];
4143 info->functionStartAddress = entry->codeStart();
4144 info->functionSymbolIndex = 0xFFFFFFFF;
4145 info->rangeLength = entry->codeLen();
4146 info->compactUnwindInfo = entry->compactUnwindInfo();
4147 info->personality = NULL;
4148 info->lsdaAddress = entry->lsda();
4149 info->function = NULL;
4150 info->lsda = NULL;
4151 if ( (info->compactUnwindInfo & UNWIND_PERSONALITY_MASK) != 0 )
4152 warning("no bits should be set in UNWIND_PERSONALITY_MASK of compact unwind encoding in __LD,__compact_unwind section");
4153 if ( info->lsdaAddress != 0 ) {
4154 info->compactUnwindInfo |= UNWIND_HAS_LSDA;
4158 // scan relocs, local relocs are useless - ignore them
4159 // extern relocs are needed for personality references (possibly for function/lsda refs??)
4160 const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(this->file().fileContent() + this->_machOSection->reloff());
4161 const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
4162 for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
4163 if ( reloc->r_extern() ) {
4164 // only expect external relocs on some colummns
4165 if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::personalityFieldOffset() ) {
4166 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4167 array[entryIndex].personality = this->personalityName(parser, reloc);
4169 else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::lsdaFieldOffset() ) {
4170 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4171 const macho_nlist<P>& lsdaSym = parser.symbolFromIndex(reloc->r_symbolnum());
4172 if ( (lsdaSym.n_type() & N_TYPE) == N_SECT )
4173 array[entryIndex].lsdaAddress = lsdaSym.n_value();
4174 else
4175 warning("unexpected extern relocation to lsda in __compact_unwind section");
4177 else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::codeStartFieldOffset() ) {
4178 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4179 array[entryIndex].functionSymbolIndex = reloc->r_symbolnum();
4181 else {
4182 warning("unexpected extern relocation in __compact_unwind section");
4187 // sort array by function start address so unwind infos will be contiguous for a given function
4188 ::qsort(array, cnt, sizeof(Info), infoSorter);
4191 template <typename A>
4192 uint32_t CUSection<A>::count()
4194 const macho_section<P>* machoSect = this->machoSection();
4195 if ( (machoSect->size() % sizeof(macho_compact_unwind_entry<P>)) != 0 )
4196 throw "malformed __LD,__compact_unwind section, bad length";
4198 return machoSect->size() / sizeof(macho_compact_unwind_entry<P>);
4201 template <typename A>
4202 void CUSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cus)
4204 Info* const arrayStart = cus.cuArray;
4205 Info* const arrayEnd = &cus.cuArray[cus.cuCount];
4206 for (Info* info=arrayStart; info < arrayEnd; ++info) {
4207 // if external reloc was used, real address is symbol n_value + addend
4208 if ( info->functionSymbolIndex != 0xFFFFFFFF )
4209 info->functionStartAddress += parser.symbolFromIndex(info->functionSymbolIndex).n_value();
4210 // find function atom from address
4211 info->function = parser.findAtomByAddress(info->functionStartAddress);
4212 // find lsda atom from address
4213 if ( info->lsdaAddress != 0 ) {
4214 info->lsda = parser.findAtomByAddress(info->lsdaAddress);
4215 // add lsda subordinate
4216 typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4217 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, info->lsda);
4219 if ( info->personality != NULL ) {
4220 // add personality subordinate
4221 typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4222 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinatePersonality, false, info->personality);
4228 template <typename A>
4229 SymboledSection<A>::SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
4230 : Section<A>(f, s), _type(ld::Atom::typeUnclassified)
4232 switch ( s->flags() & SECTION_TYPE ) {
4233 case S_ZEROFILL:
4234 _type = ld::Atom::typeZeroFill;
4235 break;
4236 case S_MOD_INIT_FUNC_POINTERS:
4237 _type = ld::Atom::typeInitializerPointers;
4238 break;
4239 case S_MOD_TERM_FUNC_POINTERS:
4240 _type = ld::Atom::typeTerminatorPointers;
4241 break;
4242 case S_THREAD_LOCAL_VARIABLES:
4243 _type = ld::Atom::typeTLV;
4244 break;
4245 case S_THREAD_LOCAL_ZEROFILL:
4246 _type = ld::Atom::typeTLVZeroFill;
4247 break;
4248 case S_THREAD_LOCAL_REGULAR:
4249 _type = ld::Atom::typeTLVInitialValue;
4250 break;
4251 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
4252 _type = ld::Atom::typeTLVInitializerPointers;
4253 break;
4254 case S_REGULAR:
4255 if ( strncmp(s->sectname(), "__gcc_except_tab", 16) == 0 )
4256 _type = ld::Atom::typeLSDA;
4257 else if ( this->type() == ld::Section::typeInitializerPointers )
4258 _type = ld::Atom::typeInitializerPointers;
4259 break;
4264 template <typename A>
4265 bool SymboledSection<A>::dontDeadStrip()
4267 switch ( _type ) {
4268 case ld::Atom::typeInitializerPointers:
4269 case ld::Atom::typeTerminatorPointers:
4270 return true;
4271 default:
4272 // model an object file without MH_SUBSECTIONS_VIA_SYMBOLS as one in which nothing can be dead stripped
4273 if ( ! this->_file.canScatterAtoms() )
4274 return true;
4275 // call inherited
4276 return Section<A>::dontDeadStrip();
4278 return false;
4282 template <typename A>
4283 uint32_t SymboledSection<A>::computeAtomCount(class Parser<A>& parser,
4284 struct Parser<A>::LabelAndCFIBreakIterator& it,
4285 const struct Parser<A>::CFI_CU_InfoArrays&)
4287 const pint_t startAddr = this->_machOSection->addr();
4288 const pint_t endAddr = startAddr + this->_machOSection->size();
4289 const uint32_t sectNum = this->sectionNum(parser);
4291 uint32_t count = 0;
4292 pint_t addr;
4293 pint_t size;
4294 const macho_nlist<P>* sym;
4295 while ( it.next(parser, sectNum, startAddr, endAddr, &addr, &size, &sym) ) {
4296 ++count;
4298 //fprintf(stderr, "computeAtomCount(%s,%s) => %d\n", this->segmentName(), this->sectionName(), count);
4299 return count;
4302 template <typename A>
4303 uint32_t SymboledSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4304 struct Parser<A>::LabelAndCFIBreakIterator& it,
4305 const struct Parser<A>::CFI_CU_InfoArrays&)
4307 this->_beginAtoms = (Atom<A>*)p;
4309 //fprintf(stderr, "SymboledSection::appendAtoms() in section %s\n", this->_machOSection->sectname());
4310 const pint_t startAddr = this->_machOSection->addr();
4311 const pint_t endAddr = startAddr + this->_machOSection->size();
4312 const uint32_t sectNum = this->sectionNum(parser);
4314 uint32_t count = 0;
4315 pint_t addr;
4316 pint_t size;
4317 const macho_nlist<P>* label;
4318 while ( it.next(parser, sectNum, startAddr, endAddr, &addr, &size, &label) ) {
4319 Atom<A>* allocatedSpace = (Atom<A>*)p;
4320 // is break because of label or CFI?
4321 if ( label != NULL ) {
4322 // The size is computed based on the address of the next label (or the end of the section for the last label)
4323 // If there are two labels at the same address, we want them one to be an alias of the other.
4324 // If the label is at the end of a section, it is has zero size, but is not an alias
4325 const bool isAlias = ( (size == 0) && (addr < endAddr) );
4326 new (allocatedSpace) Atom<A>(*this, parser, *label, size, isAlias);
4327 if ( isAlias )
4328 this->_hasAliases = true;
4330 else {
4331 ld::Atom::SymbolTableInclusion inclusion = ld::Atom::symbolTableNotIn;
4332 ld::Atom::ContentType ctype = this->contentType();
4333 if ( ctype == ld::Atom::typeLSDA )
4334 inclusion = ld::Atom::symbolTableInWithRandomAutoStripLabel;
4335 new (allocatedSpace) Atom<A>(*this, "anon", addr, size, ld::Atom::definitionRegular, ld::Atom::combineNever,
4336 ld::Atom::scopeTranslationUnit, ctype, inclusion,
4337 this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
4339 p += sizeof(Atom<A>);
4340 ++count;
4343 this->_endAtoms = (Atom<A>*)p;
4344 return count;
4348 template <typename A>
4349 uint32_t ImplicitSizeSection<A>::computeAtomCount(class Parser<A>& parser,
4350 struct Parser<A>::LabelAndCFIBreakIterator& it,
4351 const struct Parser<A>::CFI_CU_InfoArrays&)
4353 uint32_t count = 0;
4354 const macho_section<P>* sect = this->machoSection();
4355 const pint_t startAddr = sect->addr();
4356 const pint_t endAddr = startAddr + sect->size();
4357 for (pint_t addr = startAddr; addr < endAddr; addr += elementSizeAtAddress(addr) ) {
4358 if ( useElementAt(parser, it, addr) )
4359 ++count;
4361 if ( it.fileHasOverlappingSymbols && (sect->size() != 0) && (this->combine(parser, startAddr) == ld::Atom::combineByNameAndContent) ) {
4362 // if there are multiple labels in this section for the same address, then clone them into multi atoms
4363 pint_t prevSymbolAddr = (pint_t)(-1);
4364 uint8_t prevSymbolSectNum = 0;
4365 for(uint32_t i=0; i < it.sortedSymbolCount; ++i) {
4366 const macho_nlist<P>& sym = parser.symbolFromIndex(it.sortedSymbolIndexes[i]);
4367 const pint_t symbolAddr = sym.n_value();
4368 const pint_t symbolSectNum = sym.n_sect();
4369 if ( (symbolAddr == prevSymbolAddr) && (prevSymbolSectNum == symbolSectNum) && (symbolSectNum == this->sectionNum(parser)) ) {
4370 ++count;
4372 prevSymbolAddr = symbolAddr;
4373 prevSymbolSectNum = symbolSectNum;
4376 return count;
4379 template <typename A>
4380 uint32_t ImplicitSizeSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4381 struct Parser<A>::LabelAndCFIBreakIterator& it,
4382 const struct Parser<A>::CFI_CU_InfoArrays&)
4384 this->_beginAtoms = (Atom<A>*)p;
4386 const macho_section<P>* sect = this->machoSection();
4387 const pint_t startAddr = sect->addr();
4388 const pint_t endAddr = startAddr + sect->size();
4389 const uint32_t sectNum = this->sectionNum(parser);
4390 //fprintf(stderr, "ImplicitSizeSection::appendAtoms() in section %s\n", sect->sectname());
4391 uint32_t count = 0;
4392 pint_t foundAddr;
4393 pint_t size;
4394 const macho_nlist<P>* foundLabel;
4395 Atom<A>* allocatedSpace;
4396 while ( it.next(parser, sectNum, startAddr, endAddr, &foundAddr, &size, &foundLabel) ) {
4397 if ( foundLabel != NULL ) {
4398 pint_t labeledAtomSize = this->elementSizeAtAddress(foundAddr);
4399 allocatedSpace = (Atom<A>*)p;
4400 if ( this->ignoreLabel(parser.nameFromSymbol(*foundLabel)) ) {
4401 //fprintf(stderr, " 0x%08llX make annon\n", (uint64_t)foundAddr);
4402 new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, foundAddr), foundAddr,
4403 this->elementSizeAtAddress(foundAddr), this->definition(),
4404 this->combine(parser, foundAddr), this->scopeAtAddress(parser, foundAddr),
4405 this->contentType(), this->symbolTableInclusion(),
4406 this->dontDeadStrip(), false, false, this->alignmentForAddress(foundAddr));
4408 else {
4409 // make named atom for label
4410 //fprintf(stderr, " 0x%08llX make labeled\n", (uint64_t)foundAddr);
4411 new (allocatedSpace) Atom<A>(*this, parser, *foundLabel, labeledAtomSize);
4413 ++count;
4414 p += sizeof(Atom<A>);
4415 foundAddr += labeledAtomSize;
4416 size -= labeledAtomSize;
4418 // some number of anonymous atoms
4419 for (pint_t addr = foundAddr; addr < (foundAddr+size); addr += elementSizeAtAddress(addr) ) {
4420 // make anon atoms for area before label
4421 if ( this->useElementAt(parser, it, addr) ) {
4422 //fprintf(stderr, " 0x%08llX make annon\n", (uint64_t)addr);
4423 allocatedSpace = (Atom<A>*)p;
4424 new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, addr), addr, this->elementSizeAtAddress(addr),
4425 this->definition(), this->combine(parser, addr), this->scopeAtAddress(parser, addr),
4426 this->contentType(), this->symbolTableInclusion(),
4427 this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
4428 ++count;
4429 p += sizeof(Atom<A>);
4434 this->_endAtoms = (Atom<A>*)p;
4436 return count;
4440 template <typename A>
4441 unsigned long Literal4Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4443 const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
4444 return *literalContent;
4447 template <typename A>
4448 bool Literal4Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4449 const ld::IndirectBindingTable& ind) const
4451 assert(this->type() == rhs.section().type());
4452 const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
4454 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4455 assert(rhsAtom != NULL);
4456 if ( rhsAtom != NULL ) {
4457 const uint32_t* rhsLiteralContent = (uint32_t*)rhsAtom->contentPointer();
4458 return (*literalContent == *rhsLiteralContent);
4460 return false;
4464 template <typename A>
4465 unsigned long Literal8Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4467 #if __LP64__
4468 const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4469 return *literalContent;
4470 #else
4471 unsigned long hash = 5381;
4472 const uint8_t* byteContent = atom->contentPointer();
4473 for (int i=0; i < 8; ++i) {
4474 hash = hash * 33 + byteContent[i];
4476 return hash;
4477 #endif
4480 template <typename A>
4481 bool Literal8Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4482 const ld::IndirectBindingTable& ind) const
4484 if ( rhs.section().type() != ld::Section::typeLiteral8 )
4485 return false;
4486 assert(this->type() == rhs.section().type());
4487 const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4489 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4490 assert(rhsAtom != NULL);
4491 if ( rhsAtom != NULL ) {
4492 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
4493 return (*literalContent == *rhsLiteralContent);
4495 return false;
4499 template <typename A>
4500 unsigned long Literal16Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4502 unsigned long hash = 5381;
4503 const uint8_t* byteContent = atom->contentPointer();
4504 for (int i=0; i < 16; ++i) {
4505 hash = hash * 33 + byteContent[i];
4507 return hash;
4510 template <typename A>
4511 bool Literal16Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4512 const ld::IndirectBindingTable& ind) const
4514 if ( rhs.section().type() != ld::Section::typeLiteral16 )
4515 return false;
4516 assert(this->type() == rhs.section().type());
4517 const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4519 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4520 assert(rhsAtom != NULL);
4521 if ( rhsAtom != NULL ) {
4522 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
4523 return ((literalContent[0] == rhsLiteralContent[0]) && (literalContent[1] == rhsLiteralContent[1]));
4525 return false;
4530 template <typename A>
4531 typename A::P::uint_t CStringSection<A>::elementSizeAtAddress(pint_t addr)
4533 const macho_section<P>* sect = this->machoSection();
4534 const char* stringContent = (char*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
4535 return strlen(stringContent) + 1;
4538 template <typename A>
4539 bool CStringSection<A>::useElementAt(Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
4541 return true;
4544 template <typename A>
4545 bool CStringSection<A>::ignoreLabel(const char* label)
4547 return (label[0] == 'L') || (label[0] == 'l');
4550 template <typename A>
4551 Atom<A>* CStringSection<A>::findAtomByAddress(pint_t addr)
4553 Atom<A>* result = this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms);
4554 return result;
4557 template <typename A>
4558 unsigned long CStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4560 unsigned long hash = 5381;
4561 const char* stringContent = (char*)atom->contentPointer();
4562 for (const char* s = stringContent; *s != '\0'; ++s) {
4563 hash = hash * 33 + *s;
4565 return hash;
4569 template <typename A>
4570 bool CStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4571 const ld::IndirectBindingTable& ind) const
4573 if ( rhs.section().type() != ld::Section::typeCString )
4574 return false;
4575 assert(this->type() == rhs.section().type());
4576 assert(strcmp(this->sectionName(), rhs.section().sectionName())== 0);
4577 assert(strcmp(this->segmentName(), rhs.section().segmentName())== 0);
4578 const char* stringContent = (char*)atom->contentPointer();
4580 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4581 assert(rhsAtom != NULL);
4582 if ( rhsAtom != NULL ) {
4583 if ( atom->_size != rhsAtom->_size )
4584 return false;
4585 const char* rhsStringContent = (char*)rhsAtom->contentPointer();
4586 return (strcmp(stringContent, rhsStringContent) == 0);
4588 return false;
4592 template <>
4593 ld::Fixup::Kind NonLazyPointerSection<x86>::fixupKind()
4595 return ld::Fixup::kindStoreLittleEndian32;
4598 template <>
4599 ld::Fixup::Kind NonLazyPointerSection<arm>::fixupKind()
4601 return ld::Fixup::kindStoreLittleEndian32;
4605 template <>
4606 void NonLazyPointerSection<x86_64>::makeFixups(class Parser<x86_64>& parser, const struct Parser<x86_64>::CFI_CU_InfoArrays&)
4608 assert(0 && "x86_64 should not have non-lazy-pointer sections in .o files");
4611 template <typename A>
4612 void NonLazyPointerSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
4614 // add references for each NLP atom based on indirect symbol table
4615 const macho_section<P>* sect = this->machoSection();
4616 const pint_t endAddr = sect->addr() + sect->size();
4617 for( pint_t addr = sect->addr(); addr < endAddr; addr += sizeof(pint_t)) {
4618 typename Parser<A>::SourceLocation src;
4619 typename Parser<A>::TargetDesc target;
4620 src.atom = this->findAtomByAddress(addr);
4621 src.offsetInAtom = 0;
4622 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
4623 target.atom = NULL;
4624 target.name = NULL;
4625 target.weakImport = false;
4626 target.addend = 0;
4627 if ( symIndex == INDIRECT_SYMBOL_LOCAL ) {
4628 // use direct reference for local symbols
4629 const pint_t* nlpContent = (pint_t*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
4630 pint_t targetAddr = P::getP(*nlpContent);
4631 target.atom = parser.findAtomByAddress(targetAddr);
4632 target.weakImport = false;
4633 target.addend = (targetAddr - target.atom->objectAddress());
4634 // <rdar://problem/8385011> if pointer to thumb function, mask of thumb bit (not an addend of +1)
4635 if ( target.atom->isThumb() )
4636 target.addend &= (-2);
4637 assert(src.atom->combine() == ld::Atom::combineNever);
4639 else {
4640 const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
4641 // use direct reference for local symbols
4642 if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
4643 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
4644 assert(src.atom->combine() == ld::Atom::combineNever);
4646 else {
4647 target.name = parser.nameFromSymbol(sym);
4648 target.weakImport = parser.weakImportFromSymbol(sym);
4649 assert(src.atom->combine() == ld::Atom::combineByNameAndReferences);
4652 parser.addFixups(src, this->fixupKind(), target);
4656 template <typename A>
4657 ld::Atom::Combine NonLazyPointerSection<A>::combine(Parser<A>& parser, pint_t addr)
4659 const macho_section<P>* sect = this->machoSection();
4660 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
4661 if ( symIndex == INDIRECT_SYMBOL_LOCAL)
4662 return ld::Atom::combineNever;
4664 // don't coalesce non-lazy-pointers to local symbols
4665 const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
4666 if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) )
4667 return ld::Atom::combineNever;
4669 return ld::Atom::combineByNameAndReferences;
4672 template <typename A>
4673 const char* NonLazyPointerSection<A>::targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind)
4675 assert(atom->combine() == ld::Atom::combineByNameAndReferences);
4676 assert(atom->fixupCount() == 1);
4677 ld::Fixup::iterator fit = atom->fixupsBegin();
4678 const char* name = NULL;
4679 switch ( fit->binding ) {
4680 case ld::Fixup::bindingByNameUnbound:
4681 name = fit->u.name;
4682 break;
4683 case ld::Fixup::bindingByContentBound:
4684 name = fit->u.target->name();
4685 break;
4686 case ld::Fixup::bindingsIndirectlyBound:
4687 name = ind.indirectName(fit->u.bindingIndex);
4688 break;
4689 default:
4690 assert(0);
4692 assert(name != NULL);
4693 return name;
4696 template <typename A>
4697 unsigned long NonLazyPointerSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4699 assert(atom->combine() == ld::Atom::combineByNameAndReferences);
4700 unsigned long hash = 9508;
4701 for (const char* s = this->targetName(atom, ind); *s != '\0'; ++s) {
4702 hash = hash * 33 + *s;
4704 return hash;
4707 template <typename A>
4708 bool NonLazyPointerSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4709 const ld::IndirectBindingTable& indirectBindingTable) const
4711 if ( rhs.section().type() != ld::Section::typeNonLazyPointer )
4712 return false;
4713 assert(this->type() == rhs.section().type());
4714 // there can be many non-lazy pointer in different section names
4715 // we only want to coalesce in same section name
4716 if ( *this != rhs.section() )
4717 return false;
4718 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4719 assert(rhsAtom != NULL);
4720 const char* thisName = this->targetName(atom, indirectBindingTable);
4721 const char* rhsName = this->targetName(rhsAtom, indirectBindingTable);
4722 return (strcmp(thisName, rhsName) == 0);
4725 template <typename A>
4726 ld::Atom::Scope NonLazyPointerSection<A>::scopeAtAddress(Parser<A>& parser, pint_t addr)
4728 const macho_section<P>* sect = this->machoSection();
4729 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
4730 if ( symIndex == INDIRECT_SYMBOL_LOCAL)
4731 return ld::Atom::scopeTranslationUnit;
4732 else
4733 return ld::Atom::scopeLinkageUnit;
4737 template <typename A>
4738 const uint8_t* CFStringSection<A>::targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
4739 ContentType* ct, unsigned int* count)
4741 *ct = contentUnknown;
4742 for (ld::Fixup::iterator fit=atom->fixupsBegin(), end=atom->fixupsEnd(); fit != end; ++fit) {
4743 const ld::Atom* targetAtom = NULL;
4744 switch ( fit->binding ) {
4745 case ld::Fixup::bindingByNameUnbound:
4746 // ignore reference to ___CFConstantStringClassReference
4747 // we are just looking for reference to backing string data
4748 assert(fit->offsetInAtom == 0);
4749 assert(strcmp(fit->u.name, "___CFConstantStringClassReference") == 0);
4750 break;
4751 case ld::Fixup::bindingDirectlyBound:
4752 case ld::Fixup::bindingByContentBound:
4753 targetAtom = fit->u.target;
4754 break;
4755 case ld::Fixup::bindingsIndirectlyBound:
4756 targetAtom = ind.indirectAtom(fit->u.bindingIndex);
4757 break;
4758 default:
4759 assert(0 && "bad binding type");
4761 assert(targetAtom != NULL);
4762 const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
4763 if ( targetAtom->section().type() == ld::Section::typeCString ) {
4764 *ct = contentUTF8;
4765 *count = targetAtom->size();
4767 else if ( targetAtom->section().type() == ld::Section::typeUTF16Strings ) {
4768 *ct = contentUTF16;
4769 *count = (targetAtom->size()+1)/2; // round up incase of buggy compiler that has only one trailing zero byte
4771 assert(target != NULL);
4772 return target->contentPointer();
4774 assert(0);
4775 return NULL;
4778 template <typename A>
4779 unsigned long CFStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4781 // base hash of CFString on hash of cstring it wraps
4782 ContentType cType;
4783 unsigned long hash;
4784 unsigned int charCount;
4785 const uint8_t* content = this->targetContent(atom, ind, &cType, &charCount);
4786 switch ( cType ) {
4787 case contentUTF8:
4788 hash = 9408;
4789 for (const char* s = (char*)content; *s != '\0'; ++s) {
4790 hash = hash * 33 + *s;
4792 return hash;
4793 case contentUTF16:
4794 hash = 407955;
4795 --charCount; // don't add last 0x0000 to hash because some buggy compilers only have trailing single byte
4796 for (const uint16_t* s = (uint16_t*)content; charCount > 0; ++s, --charCount) {
4797 hash = hash * 1025 + *s;
4799 return hash;
4800 case contentUnknown:
4801 return 0;
4803 return 0;
4807 template <typename A>
4808 bool CFStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4809 const ld::IndirectBindingTable& indirectBindingTable) const
4811 if ( atom == &rhs )
4812 return true;
4813 if ( rhs.section().type() != ld::Section::typeCFString)
4814 return false;
4815 assert(this->type() == rhs.section().type());
4816 assert(strcmp(this->sectionName(), "__cfstring") == 0);
4818 ContentType thisType;
4819 unsigned int charCount;
4820 const uint8_t* cstringContent = this->targetContent(atom, indirectBindingTable, &thisType, &charCount);
4821 ContentType rhsType;
4822 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4823 assert(rhsAtom != NULL);
4824 unsigned int rhsCharCount;
4825 const uint8_t* rhsStringContent = this->targetContent(rhsAtom, indirectBindingTable, &rhsType, &rhsCharCount);
4827 if ( thisType != rhsType )
4828 return false;
4830 // no need to compare content of pointers are already the same
4831 if ( cstringContent == rhsStringContent )
4832 return true;
4834 // no need to compare content if size is different
4835 if ( charCount != rhsCharCount )
4836 return false;
4838 switch ( thisType ) {
4839 case contentUTF8:
4840 return (strcmp((char*)cstringContent, (char*)rhsStringContent) == 0);
4841 case contentUTF16:
4843 const uint16_t* cstringContent16 = (uint16_t*)cstringContent;
4844 const uint16_t* rhsStringContent16 = (uint16_t*)rhsStringContent;
4845 for (unsigned int i = 0; i < charCount; ++i) {
4846 if ( cstringContent16[i] != rhsStringContent16[i] )
4847 return false;
4849 return true;
4851 case contentUnknown:
4852 return false;
4854 return false;
4858 template <typename A>
4859 typename A::P::uint_t ObjC1ClassSection<A>::elementSizeAtAddress(pint_t addr)
4861 // nominal size for each class is 48 bytes, but sometimes the compiler
4862 // over aligns and there is padding after class data
4863 const macho_section<P>* sct = this->machoSection();
4864 uint32_t align = 1 << sct->align();
4865 uint32_t size = ((12 * sizeof(pint_t)) + align-1) & (-align);
4866 return size;
4869 template <typename A>
4870 const char* ObjC1ClassSection<A>::unlabeledAtomName(Parser<A>& parser, pint_t addr)
4872 // 8-bytes into class object is pointer to class name
4873 const macho_section<P>* sct = this->machoSection();
4874 uint32_t classObjcFileOffset = sct->offset() - sct->addr() + addr;
4875 const uint8_t* mappedFileContent = this->file().fileContent();
4876 pint_t nameAddr = P::getP(*((pint_t*)(mappedFileContent+classObjcFileOffset+2*sizeof(pint_t))));
4878 // find section containing string address to get string bytes
4879 const macho_section<P>* const sections = parser.firstMachOSection();
4880 const uint32_t sectionCount = parser.machOSectionCount();
4881 for (uint32_t i=0; i < sectionCount; ++i) {
4882 const macho_section<P>* aSect = &sections[i];
4883 if ( (aSect->addr() <= nameAddr) && (nameAddr < (aSect->addr()+aSect->size())) ) {
4884 assert((aSect->flags() & SECTION_TYPE) == S_CSTRING_LITERALS);
4885 uint32_t nameFileOffset = aSect->offset() - aSect->addr() + nameAddr;
4886 const char* name = (char*)mappedFileContent + nameFileOffset;
4887 // spin through symbol table to find absolute symbol corresponding to this class
4888 for (uint32_t s=0; s < parser.symbolCount(); ++s) {
4889 const macho_nlist<P>& sym = parser.symbolFromIndex(s);
4890 if ( (sym.n_type() & N_TYPE) != N_ABS )
4891 continue;
4892 const char* absName = parser.nameFromSymbol(sym);
4893 if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
4894 if ( strcmp(&absName[17], name) == 0 )
4895 return absName;
4898 assert(0 && "obj class name not found in symbol table");
4901 assert(0 && "obj class name not found");
4902 return "unknown objc class";
4906 template <typename A>
4907 const char* ObjC2ClassRefsSection<A>::targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4909 assert(atom->fixupCount() == 1);
4910 ld::Fixup::iterator fit = atom->fixupsBegin();
4911 const char* className = NULL;
4912 switch ( fit->binding ) {
4913 case ld::Fixup::bindingByNameUnbound:
4914 className = fit->u.name;
4915 break;
4916 case ld::Fixup::bindingDirectlyBound:
4917 case ld::Fixup::bindingByContentBound:
4918 className = fit->u.target->name();
4919 break;
4920 case ld::Fixup::bindingsIndirectlyBound:
4921 className = ind.indirectName(fit->u.bindingIndex);
4922 break;
4923 default:
4924 assert(0 && "unsupported binding in objc2 class ref section");
4926 assert(className != NULL);
4927 return className;
4931 template <typename A>
4932 unsigned long ObjC2ClassRefsSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4934 unsigned long hash = 978;
4935 for (const char* s = targetClassName(atom, ind); *s != '\0'; ++s) {
4936 hash = hash * 33 + *s;
4938 return hash;
4941 template <typename A>
4942 bool ObjC2ClassRefsSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4943 const ld::IndirectBindingTable& indirectBindingTable) const
4945 assert(this->type() == rhs.section().type());
4946 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4947 assert(rhsAtom != NULL);
4948 const char* thisClassName = targetClassName(atom, indirectBindingTable);
4949 const char* rhsClassName = targetClassName(rhsAtom, indirectBindingTable);
4950 return (strcmp(thisClassName, rhsClassName) == 0);
4954 template <typename A>
4955 const char* Objc1ClassReferences<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4957 assert(atom->fixupCount() == 2);
4958 ld::Fixup::iterator fit = atom->fixupsBegin();
4959 if ( fit->kind == ld::Fixup::kindSetTargetAddress )
4960 ++fit;
4961 const ld::Atom* targetAtom = NULL;
4962 switch ( fit->binding ) {
4963 case ld::Fixup::bindingByContentBound:
4964 targetAtom = fit->u.target;
4965 break;
4966 case ld::Fixup::bindingsIndirectlyBound:
4967 targetAtom = ind.indirectAtom(fit->u.bindingIndex);
4968 if ( targetAtom == NULL ) {
4969 fprintf(stderr, "missing target named %s\n", ind.indirectName(fit->u.bindingIndex));
4971 break;
4972 default:
4973 assert(0);
4975 assert(targetAtom != NULL);
4976 const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
4977 assert(target != NULL);
4978 return (char*)target->contentPointer();
4982 template <typename A>
4983 const char* PointerToCStringSection<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4985 assert(atom->fixupCount() == 1);
4986 ld::Fixup::iterator fit = atom->fixupsBegin();
4987 const ld::Atom* targetAtom = NULL;
4988 switch ( fit->binding ) {
4989 case ld::Fixup::bindingByContentBound:
4990 targetAtom = fit->u.target;
4991 break;
4992 case ld::Fixup::bindingsIndirectlyBound:
4993 targetAtom = ind.indirectAtom(fit->u.bindingIndex);
4994 break;
4995 default:
4996 assert(0);
4998 assert(targetAtom != NULL);
4999 const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5000 assert(target != NULL);
5001 return (char*)target->contentPointer();
5004 template <typename A>
5005 unsigned long PointerToCStringSection<A>::contentHash(const class Atom<A>* atom,
5006 const ld::IndirectBindingTable& indirectBindingTable) const
5008 // make hash from section name and target cstring name
5009 unsigned long hash = 123;
5010 for (const char* s = this->sectionName(); *s != '\0'; ++s) {
5011 hash = hash * 33 + *s;
5013 for (const char* s = this->targetCString(atom, indirectBindingTable); *s != '\0'; ++s) {
5014 hash = hash * 33 + *s;
5016 return hash;
5019 template <typename A>
5020 bool PointerToCStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5021 const ld::IndirectBindingTable& indirectBindingTable) const
5023 assert(this->type() == rhs.section().type());
5024 // there can be pointers-to-cstrings in different section names
5025 // we only want to coalesce in same section name
5026 if ( *this != rhs.section() )
5027 return false;
5029 // get string content for this
5030 const char* cstringContent = this->targetCString(atom, indirectBindingTable);
5031 const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5032 assert(rhsAtom != NULL);
5033 const char* rhsCstringContent = this->targetCString(rhsAtom, indirectBindingTable);
5035 assert(cstringContent != NULL);
5036 assert(rhsCstringContent != NULL);
5037 return (strcmp(cstringContent, rhsCstringContent) == 0);
5042 template <typename A>
5043 unsigned long UTF16StringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5045 unsigned long hash = 5381;
5046 const uint16_t* stringContent = (uint16_t*)atom->contentPointer();
5047 // some buggy compilers end utf16 data with single byte, so don't use last word in hash computation
5048 unsigned int count = (atom->size()/2) - 1;
5049 for (const uint16_t* s = stringContent; count > 0; ++s, --count) {
5050 hash = hash * 33 + *s;
5052 return hash;
5055 template <typename A>
5056 bool UTF16StringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5057 const ld::IndirectBindingTable& ind) const
5059 if ( rhs.section().type() != ld::Section::typeUTF16Strings )
5060 return false;
5061 assert(0);
5062 return false;
5071 template <>
5072 uint32_t Section<x86_64>::x86_64PcRelOffset(uint8_t r_type)
5074 switch ( r_type ) {
5075 case X86_64_RELOC_SIGNED:
5076 return 4;
5077 case X86_64_RELOC_SIGNED_1:
5078 return 5;
5079 case X86_64_RELOC_SIGNED_2:
5080 return 6;
5081 case X86_64_RELOC_SIGNED_4:
5082 return 8;
5084 return 0;
5088 template <>
5089 bool Section<x86_64>::addRelocFixup(class Parser<x86_64>& parser, const macho_relocation_info<P>* reloc)
5091 const macho_section<P>* sect = this->machoSection();
5092 uint64_t srcAddr = sect->addr() + reloc->r_address();
5093 Parser<x86_64>::SourceLocation src;
5094 Parser<x86_64>::TargetDesc target;
5095 Parser<x86_64>::TargetDesc toTarget;
5096 src.atom = this->findAtomByAddress(srcAddr);
5097 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5098 const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
5099 uint64_t contentValue = 0;
5100 const macho_relocation_info<x86_64::P>* nextReloc = &reloc[1];
5101 bool result = false;
5102 bool useDirectBinding;
5103 switch ( reloc->r_length() ) {
5104 case 0:
5105 contentValue = *fixUpPtr;
5106 break;
5107 case 1:
5108 contentValue = (int64_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
5109 break;
5110 case 2:
5111 contentValue = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
5112 break;
5113 case 3:
5114 contentValue = E::get64(*((uint64_t*)fixUpPtr));
5115 break;
5117 target.atom = NULL;
5118 target.name = NULL;
5119 target.weakImport = false;
5120 target.addend = 0;
5121 if ( reloc->r_extern() ) {
5122 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
5123 // use direct reference for local symbols
5124 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5125 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5126 target.addend += contentValue;
5128 else {
5129 target.name = parser.nameFromSymbol(sym);
5130 target.weakImport = parser.weakImportFromSymbol(sym);
5131 target.addend = contentValue;
5133 // cfstrings should always use direct reference to backing store
5134 if ( (this->type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
5135 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5136 target.addend = contentValue;
5139 else {
5140 if ( reloc->r_pcrel() )
5141 contentValue += srcAddr + x86_64PcRelOffset(reloc->r_type());
5142 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5144 switch ( reloc->r_type() ) {
5145 case X86_64_RELOC_UNSIGNED:
5146 if ( reloc->r_pcrel() )
5147 throw "pcrel and X86_64_RELOC_UNSIGNED not supported";
5148 switch ( reloc->r_length() ) {
5149 case 0:
5150 case 1:
5151 throw "length < 2 and X86_64_RELOC_UNSIGNED not supported";
5152 case 2:
5153 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5154 break;
5155 case 3:
5156 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian64, target);
5157 break;
5159 break;
5160 case X86_64_RELOC_SIGNED:
5161 case X86_64_RELOC_SIGNED_1:
5162 case X86_64_RELOC_SIGNED_2:
5163 case X86_64_RELOC_SIGNED_4:
5164 if ( ! reloc->r_pcrel() )
5165 throw "not pcrel and X86_64_RELOC_SIGNED* not supported";
5166 if ( reloc->r_length() != 2 )
5167 throw "length != 2 and X86_64_RELOC_SIGNED* not supported";
5168 switch ( reloc->r_type() ) {
5169 case X86_64_RELOC_SIGNED:
5170 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
5171 break;
5172 case X86_64_RELOC_SIGNED_1:
5173 if ( reloc->r_extern() )
5174 target.addend += 1;
5175 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_1, target);
5176 break;
5177 case X86_64_RELOC_SIGNED_2:
5178 if ( reloc->r_extern() )
5179 target.addend += 2;
5180 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_2, target);
5181 break;
5182 case X86_64_RELOC_SIGNED_4:
5183 if ( reloc->r_extern() )
5184 target.addend += 4;
5185 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_4, target);
5186 break;
5188 break;
5189 case X86_64_RELOC_BRANCH:
5190 if ( ! reloc->r_pcrel() )
5191 throw "not pcrel and X86_64_RELOC_BRANCH not supported";
5192 switch ( reloc->r_length() ) {
5193 case 2:
5194 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5195 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
5196 parser.addDtraceExtraInfos(src, &target.name[16]);
5198 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5199 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
5200 parser.addDtraceExtraInfos(src, &target.name[20]);
5202 else {
5203 parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel32, target);
5205 break;
5206 case 0:
5207 parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel8, target);
5208 break;
5209 default:
5210 throwf("length=%d and X86_64_RELOC_BRANCH not supported", reloc->r_length());
5212 break;
5213 case X86_64_RELOC_GOT:
5214 if ( ! reloc->r_extern() )
5215 throw "not extern and X86_64_RELOC_GOT not supported";
5216 if ( ! reloc->r_pcrel() )
5217 throw "not pcrel and X86_64_RELOC_GOT not supported";
5218 if ( reloc->r_length() != 2 )
5219 throw "length != 2 and X86_64_RELOC_GOT not supported";
5220 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOT, target);
5221 break;
5222 case X86_64_RELOC_GOT_LOAD:
5223 if ( ! reloc->r_extern() )
5224 throw "not extern and X86_64_RELOC_GOT_LOAD not supported";
5225 if ( ! reloc->r_pcrel() )
5226 throw "not pcrel and X86_64_RELOC_GOT_LOAD not supported";
5227 if ( reloc->r_length() != 2 )
5228 throw "length != 2 and X86_64_RELOC_GOT_LOAD not supported";
5229 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOTLoad, target);
5230 break;
5231 case X86_64_RELOC_SUBTRACTOR:
5232 if ( reloc->r_pcrel() )
5233 throw "X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5234 if ( reloc->r_length() < 2 )
5235 throw "X86_64_RELOC_SUBTRACTOR must have r_length of 2 or 3";
5236 if ( !reloc->r_extern() )
5237 throw "X86_64_RELOC_SUBTRACTOR must have r_extern=1";
5238 if ( nextReloc->r_type() != X86_64_RELOC_UNSIGNED )
5239 throw "X86_64_RELOC_SUBTRACTOR must be followed by X86_64_RELOC_UNSIGNED";
5240 result = true;
5241 if ( nextReloc->r_pcrel() )
5242 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5243 if ( nextReloc->r_length() != reloc->r_length() )
5244 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR must have same r_length";
5245 if ( nextReloc->r_extern() ) {
5246 const macho_nlist<P>& sym = parser.symbolFromIndex(nextReloc->r_symbolnum());
5247 // use direct reference for local symbols
5248 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5249 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), toTarget);
5250 toTarget.addend = contentValue;
5251 useDirectBinding = true;
5253 else {
5254 toTarget.name = parser.nameFromSymbol(sym);
5255 toTarget.weakImport = parser.weakImportFromSymbol(sym);
5256 toTarget.addend = contentValue;
5257 useDirectBinding = false;
5260 else {
5261 parser.findTargetFromAddressAndSectionNum(contentValue, nextReloc->r_symbolnum(), toTarget);
5262 useDirectBinding = (toTarget.atom->scope() == ld::Atom::scopeTranslationUnit);
5264 if ( useDirectBinding )
5265 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.atom);
5266 else
5267 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.weakImport, toTarget.name);
5268 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, toTarget.addend);
5269 if ( target.atom == NULL )
5270 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, false, target.name);
5271 else
5272 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, target.atom);
5273 if ( reloc->r_length() == 2 )
5274 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
5275 else
5276 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian64);
5277 break;
5278 case X86_64_RELOC_TLV:
5279 if ( ! reloc->r_extern() )
5280 throw "not extern and X86_64_RELOC_TLV not supported";
5281 if ( ! reloc->r_pcrel() )
5282 throw "not pcrel and X86_64_RELOC_TLV not supported";
5283 if ( reloc->r_length() != 2 )
5284 throw "length != 2 and X86_64_RELOC_TLV not supported";
5285 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
5286 break;
5287 default:
5288 throwf("unknown relocation type %d", reloc->r_type());
5290 return result;
5295 template <>
5296 bool Section<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<P>* reloc)
5298 const macho_section<P>* sect = this->machoSection();
5299 uint32_t srcAddr;
5300 const uint8_t* fixUpPtr;
5301 uint32_t contentValue = 0;
5302 ld::Fixup::Kind kind = ld::Fixup::kindNone;
5303 Parser<x86>::SourceLocation src;
5304 Parser<x86>::TargetDesc target;
5306 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
5307 srcAddr = sect->addr() + reloc->r_address();
5308 src.atom = this->findAtomByAddress(srcAddr);
5309 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5310 fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
5311 switch ( reloc->r_type() ) {
5312 case GENERIC_RELOC_VANILLA:
5313 switch ( reloc->r_length() ) {
5314 case 0:
5315 contentValue = (int32_t)(int8_t)*fixUpPtr;
5316 if ( reloc->r_pcrel() ) {
5317 kind = ld::Fixup::kindStoreX86BranchPCRel8;
5318 contentValue += srcAddr + sizeof(uint8_t);
5320 else
5321 throw "r_length=0 and r_pcrel=0 not supported";
5322 break;
5323 case 1:
5324 contentValue = (int32_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
5325 if ( reloc->r_pcrel() ) {
5326 kind = ld::Fixup::kindStoreX86PCRel16;
5327 contentValue += srcAddr + sizeof(uint16_t);
5329 else
5330 kind = ld::Fixup::kindStoreLittleEndian16;
5331 break;
5332 case 2:
5333 contentValue = E::get32(*((uint32_t*)fixUpPtr));
5334 if ( reloc->r_pcrel() ) {
5335 kind = ld::Fixup::kindStoreX86BranchPCRel32;
5336 contentValue += srcAddr + sizeof(uint32_t);
5338 else
5339 kind = ld::Fixup::kindStoreLittleEndian32;
5340 break;
5341 case 3:
5342 throw "r_length=3 not supported";
5344 if ( reloc->r_extern() ) {
5345 target.atom = NULL;
5346 const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
5347 target.name = parser.nameFromSymbol(targetSymbol);
5348 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
5349 target.addend = (int32_t)contentValue;
5351 else {
5352 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5354 if ( (kind == ld::Fixup::kindStoreX86BranchPCRel32) && (target.name != NULL) ) {
5355 if ( strncmp(target.name, "___dtrace_probe$", 16) == 0 ) {
5356 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
5357 parser.addDtraceExtraInfos(src, &target.name[16]);
5358 return false;
5360 else if ( strncmp(target.name, "___dtrace_isenabled$", 20) == 0 ) {
5361 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
5362 parser.addDtraceExtraInfos(src, &target.name[20]);
5363 return false;
5366 parser.addFixups(src, kind, target);
5367 return false;
5368 break;
5369 case GENERIC_RLEOC_TLV:
5371 if ( !reloc->r_extern() )
5372 throw "r_extern=0 and r_type=GENERIC_RLEOC_TLV not supported";
5373 if ( reloc->r_length() != 2 )
5374 throw "r_length!=2 and r_type=GENERIC_RLEOC_TLV not supported";
5375 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
5376 // use direct reference for local symbols
5377 if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
5378 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5380 else {
5381 target.atom = NULL;
5382 target.name = parser.nameFromSymbol(sym);
5383 target.weakImport = parser.weakImportFromSymbol(sym);
5385 target.addend = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
5386 if ( reloc->r_pcrel() ) {
5387 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
5389 else {
5390 parser.addFixups(src, ld::Fixup::kindStoreX86Abs32TLVLoad, target);
5392 return false;
5394 break;
5395 default:
5396 throwf("unsupported i386 relocation type (%d)", reloc->r_type());
5399 else {
5400 // scattered relocation
5401 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
5402 srcAddr = sect->addr() + sreloc->r_address();
5403 src.atom = this->findAtomByAddress(srcAddr);
5404 assert(src.atom != NULL);
5405 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5406 fixUpPtr = file().fileContent() + sect->offset() + sreloc->r_address();
5407 uint32_t relocValue = sreloc->r_value();
5408 bool result = false;
5409 // file format allows pair to be scattered or not
5410 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
5411 const macho_relocation_info<P>* nextReloc = &reloc[1];
5412 bool nextRelocIsPair = false;
5413 uint32_t nextRelocAddress = 0;
5414 uint32_t nextRelocValue = 0;
5415 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
5416 if ( nextReloc->r_type() == GENERIC_RELOC_PAIR ) {
5417 nextRelocIsPair = true;
5418 nextRelocAddress = nextReloc->r_address();
5419 result = true; // iterator should skip next reloc, since we've consumed it here
5422 else {
5423 if ( nextSReloc->r_type() == GENERIC_RELOC_PAIR ) {
5424 nextRelocIsPair = true;
5425 nextRelocAddress = nextSReloc->r_address();
5426 nextRelocValue = nextSReloc->r_value();
5429 switch (sreloc->r_type()) {
5430 case GENERIC_RELOC_VANILLA:
5431 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
5432 target.atom = parser.findAtomByAddress(relocValue);
5433 if ( sreloc->r_pcrel() ) {
5434 switch ( sreloc->r_length() ) {
5435 case 0:
5436 contentValue = srcAddr + 1 + *fixUpPtr;
5437 target.addend = (int32_t)contentValue - (int32_t)relocValue;
5438 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel8, target);
5439 break;
5440 case 1:
5441 contentValue = srcAddr + 2 + LittleEndian::get16(*((uint16_t*)fixUpPtr));
5442 target.addend = (int32_t)contentValue - (int32_t)relocValue;
5443 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel16, target);
5444 break;
5445 case 2:
5446 contentValue = srcAddr + 4 + LittleEndian::get32(*((uint32_t*)fixUpPtr));
5447 target.addend = (int32_t)contentValue - (int32_t)relocValue;
5448 parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
5449 break;
5450 case 3:
5451 throw "unsupported r_length=3 for scattered pc-rel vanilla reloc";
5452 break;
5455 else {
5456 if ( sreloc->r_length() != 2 )
5457 throwf("unsupported r_length=%d for scattered vanilla reloc", sreloc->r_length());
5458 contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
5459 target.addend = (int32_t)contentValue - (int32_t)(target.atom->objectAddress());
5460 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5462 break;
5463 case GENERIC_RELOC_SECTDIFF:
5464 case GENERIC_RELOC_LOCAL_SECTDIFF:
5466 if ( !nextRelocIsPair )
5467 throw "GENERIC_RELOC_SECTDIFF missing following pair";
5468 switch ( sreloc->r_length() ) {
5469 case 0:
5470 case 3:
5471 throw "bad length for GENERIC_RELOC_SECTDIFF";
5472 case 1:
5473 contentValue = (int32_t)(int16_t)LittleEndian::get16(*((uint16_t*)fixUpPtr));
5474 kind = ld::Fixup::kindStoreLittleEndian16;
5475 break;
5476 case 2:
5477 contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
5478 kind = ld::Fixup::kindStoreLittleEndian32;
5479 break;
5481 Atom<x86>* fromAtom = parser.findAtomByAddress(nextRelocValue);
5482 uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
5483 parser.findTargetFromAddress(sreloc->r_value(), target);
5484 // check for addend encoded in the section content
5485 int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
5486 if ( addend < 0 ) {
5487 // switch binding base on coalescing
5488 if ( target.atom == NULL ) {
5489 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
5491 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
5492 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
5494 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
5495 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
5497 else {
5498 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
5500 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
5501 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5502 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
5503 parser.addFixup(src, ld::Fixup::k5of5, kind);
5505 else {
5506 // switch binding base on coalescing
5507 if ( target.atom == NULL ) {
5508 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
5510 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
5511 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
5513 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
5514 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
5516 else {
5517 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
5519 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend+addend);
5520 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5521 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
5522 parser.addFixup(src, ld::Fixup::k5of5, kind);
5525 break;
5527 return result;
5535 template <>
5536 bool Section<arm>::addRelocFixup(class Parser<arm>& parser, const macho_relocation_info<P>* reloc)
5538 const macho_section<P>* sect = this->machoSection();
5539 bool result = false;
5540 uint32_t srcAddr;
5541 uint32_t dstAddr;
5542 uint32_t* fixUpPtr;
5543 int32_t displacement = 0;
5544 uint32_t instruction = 0;
5545 pint_t contentValue = 0;
5546 Parser<arm>::SourceLocation src;
5547 Parser<arm>::TargetDesc target;
5548 const macho_relocation_info<P>* nextReloc;
5550 if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
5551 bool externSymbolIsThumbDef = false;
5552 srcAddr = sect->addr() + reloc->r_address();
5553 src.atom = this->findAtomByAddress(srcAddr);
5554 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5555 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + reloc->r_address());
5556 if ( reloc->r_type() != ARM_RELOC_PAIR )
5557 instruction = LittleEndian::get32(*fixUpPtr);
5558 if ( reloc->r_extern() ) {
5559 const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
5560 // use direct reference for local symbols
5561 if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) && (((targetSymbol.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(targetSymbol)[0] == 'L')) ) {
5562 parser.findTargetFromAddressAndSectionNum(targetSymbol.n_value(), targetSymbol.n_sect(), target);
5564 else {
5565 target.atom = NULL;
5566 target.name = parser.nameFromSymbol(targetSymbol);
5567 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
5568 if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) && (targetSymbol.n_desc() & N_ARM_THUMB_DEF) )
5569 externSymbolIsThumbDef = true;
5572 switch ( reloc->r_type() ) {
5573 case ARM_RELOC_BR24:
5574 // Sign-extend displacement
5575 displacement = (instruction & 0x00FFFFFF) << 2;
5576 if ( (displacement & 0x02000000) != 0 )
5577 displacement |= 0xFC000000;
5578 // The pc added will be +8 from the pc
5579 displacement += 8;
5580 // If this is BLX add H << 1
5581 if ((instruction & 0xFE000000) == 0xFA000000)
5582 displacement += ((instruction & 0x01000000) >> 23);
5583 if ( reloc->r_extern() ) {
5584 target.addend = srcAddr + displacement;
5585 if ( externSymbolIsThumbDef )
5586 target.addend &= -2; // remove thumb bit
5588 else {
5589 dstAddr = srcAddr + displacement;
5590 parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
5592 // special case "calls" for dtrace
5593 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5594 parser.addFixup(src, ld::Fixup::k1of1,
5595 ld::Fixup::kindStoreARMDtraceCallSiteNop, false, target.name);
5596 parser.addDtraceExtraInfos(src, &target.name[16]);
5598 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5599 parser.addFixup(src, ld::Fixup::k1of1,
5600 ld::Fixup::kindStoreARMDtraceIsEnableSiteClear, false, target.name);
5601 parser.addDtraceExtraInfos(src, &target.name[20]);
5603 else {
5604 parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
5606 break;
5607 case ARM_THUMB_RELOC_BR22:
5608 // thumb2 added two more bits to displacement, complicating the displacement decoding
5610 uint32_t s = (instruction >> 10) & 0x1;
5611 uint32_t j1 = (instruction >> 29) & 0x1;
5612 uint32_t j2 = (instruction >> 27) & 0x1;
5613 uint32_t imm10 = instruction & 0x3FF;
5614 uint32_t imm11 = (instruction >> 16) & 0x7FF;
5615 uint32_t i1 = (j1 == s);
5616 uint32_t i2 = (j2 == s);
5617 uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
5618 int32_t sdis = dis;
5619 if ( s )
5620 sdis |= 0xFE000000;
5621 displacement = sdis;
5623 // The pc added will be +4 from the pc
5624 displacement += 4;
5625 // If the instruction was blx, force the low 2 bits to be clear
5626 dstAddr = srcAddr + displacement;
5627 if ((instruction & 0xF8000000) == 0xE8000000)
5628 dstAddr &= 0xFFFFFFFC;
5630 if ( reloc->r_extern() ) {
5631 target.addend = dstAddr;
5633 else {
5634 parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
5636 // special case "calls" for dtrace
5637 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5638 parser.addFixup(src, ld::Fixup::k1of1,
5639 ld::Fixup::kindStoreThumbDtraceCallSiteNop, false, target.name);
5640 parser.addDtraceExtraInfos(src, &target.name[16]);
5642 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5643 parser.addFixup(src, ld::Fixup::k1of1,
5644 ld::Fixup::kindStoreThumbDtraceIsEnableSiteClear, false, target.name);
5645 parser.addDtraceExtraInfos(src, &target.name[20]);
5647 else {
5648 parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
5650 break;
5651 case ARM_RELOC_VANILLA:
5652 if ( reloc->r_length() != 2 )
5653 throw "bad length for ARM_RELOC_VANILLA";
5654 contentValue = LittleEndian::get32(*fixUpPtr);
5655 if ( reloc->r_extern() ) {
5656 target.addend = (int32_t)contentValue;
5657 if ( externSymbolIsThumbDef )
5658 target.addend &= -2; // remove thumb bit
5660 else {
5661 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5662 // possible non-extern relocation turned into by-name ref because target is a weak-def
5663 if ( target.atom != NULL ) {
5664 if ( target.atom->isThumb() )
5665 target.addend &= -2; // remove thumb bit
5666 // if reference to LSDA, add group subordinate fixup
5667 if ( target.atom->contentType() == ld::Atom::typeLSDA ) {
5668 Parser<arm>::SourceLocation src2;
5669 src2.atom = src.atom;
5670 src2.offsetInAtom = 0;
5671 parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, target.atom);
5675 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5676 break;
5677 case ARM_THUMB_32BIT_BRANCH:
5678 // silently ignore old unnecessary reloc
5679 break;
5680 case ARM_RELOC_HALF:
5681 nextReloc = &reloc[1];
5682 if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
5683 uint32_t instruction16;
5684 uint32_t other16 = (nextReloc->r_address() & 0xFFFF);
5685 bool isThumb;
5686 if ( reloc->r_length() & 2 ) {
5687 isThumb = true;
5688 uint32_t i = ((instruction & 0x00000400) >> 10);
5689 uint32_t imm4 = (instruction & 0x0000000F);
5690 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
5691 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
5692 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
5694 else {
5695 isThumb = false;
5696 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
5697 uint32_t imm12 = (instruction & 0x00000FFF);
5698 instruction16 = (imm4 << 12) | imm12;
5700 if ( reloc->r_length() & 1 ) {
5701 // high 16
5702 dstAddr = ((instruction16 << 16) | other16);
5703 if ( reloc->r_extern() ) {
5704 target.addend = dstAddr;
5705 if ( externSymbolIsThumbDef )
5706 target.addend &= -2; // remove thumb bit
5708 else {
5709 parser.findTargetFromAddress(dstAddr, target);
5710 if ( target.atom->isThumb() )
5711 target.addend &= (-2); // remove thumb bit
5713 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16), target);
5715 else {
5716 // low 16
5717 dstAddr = (other16 << 16) | instruction16;
5718 if ( reloc->r_extern() ) {
5719 target.addend = dstAddr;
5720 if ( externSymbolIsThumbDef )
5721 target.addend &= -2; // remove thumb bit
5723 else {
5724 parser.findTargetFromAddress(dstAddr, target);
5725 if ( target.atom->isThumb() )
5726 target.addend &= (-2); // remove thumb bit
5728 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16), target);
5730 result = true;
5732 else
5733 throw "for ARM_RELOC_HALF, next reloc is not ARM_RELOC_PAIR";
5734 break;
5735 default:
5736 throwf("unknown relocation type %d", reloc->r_type());
5737 break;
5740 else {
5741 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
5742 // file format allows pair to be scattered or not
5743 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
5744 nextReloc = &reloc[1];
5745 srcAddr = sect->addr() + sreloc->r_address();
5746 dstAddr = sreloc->r_value();
5747 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + sreloc->r_address());
5748 instruction = LittleEndian::get32(*fixUpPtr);
5749 src.atom = this->findAtomByAddress(srcAddr);
5750 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5751 bool nextRelocIsPair = false;
5752 uint32_t nextRelocAddress = 0;
5753 uint32_t nextRelocValue = 0;
5754 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
5755 if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
5756 nextRelocIsPair = true;
5757 nextRelocAddress = nextReloc->r_address();
5758 result = true;
5761 else {
5762 if ( nextSReloc->r_type() == ARM_RELOC_PAIR ) {
5763 nextRelocIsPair = true;
5764 nextRelocAddress = nextSReloc->r_address();
5765 nextRelocValue = nextSReloc->r_value();
5766 result = true;
5769 switch ( sreloc->r_type() ) {
5770 case ARM_RELOC_VANILLA:
5771 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
5772 if ( sreloc->r_length() != 2 )
5773 throw "bad length for ARM_RELOC_VANILLA";
5774 target.atom = parser.findAtomByAddress(sreloc->r_value());
5775 contentValue = LittleEndian::get32(*fixUpPtr);
5776 target.addend = contentValue - target.atom->_objAddress;
5777 if ( target.atom->isThumb() )
5778 target.addend &= -2; // remove thumb bit
5779 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5780 break;
5781 case ARM_RELOC_BR24:
5782 // Sign-extend displacement
5783 displacement = (instruction & 0x00FFFFFF) << 2;
5784 if ( (displacement & 0x02000000) != 0 )
5785 displacement |= 0xFC000000;
5786 // The pc added will be +8 from the pc
5787 displacement += 8;
5788 // If this is BLX add H << 1
5789 if ((instruction & 0xFE000000) == 0xFA000000)
5790 displacement += ((instruction & 0x01000000) >> 23);
5791 target.atom = parser.findAtomByAddress(sreloc->r_value());
5792 target.addend = (int64_t)(srcAddr + displacement) - (int64_t)(target.atom->_objAddress);
5793 parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
5794 break;
5795 case ARM_THUMB_RELOC_BR22:
5796 // thumb2 added two more bits to displacement, complicating the displacement decoding
5798 uint32_t s = (instruction >> 10) & 0x1;
5799 uint32_t j1 = (instruction >> 29) & 0x1;
5800 uint32_t j2 = (instruction >> 27) & 0x1;
5801 uint32_t imm10 = instruction & 0x3FF;
5802 uint32_t imm11 = (instruction >> 16) & 0x7FF;
5803 uint32_t i1 = (j1 == s);
5804 uint32_t i2 = (j2 == s);
5805 uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
5806 int32_t sdis = dis;
5807 if ( s )
5808 sdis |= 0xFE000000;
5809 displacement = sdis;
5811 // The pc added will be +4 from the pc
5812 displacement += 4;
5813 dstAddr = srcAddr+displacement;
5814 // If the instruction was blx, force the low 2 bits to be clear
5815 if ((instruction & 0xF8000000) == 0xE8000000)
5816 dstAddr &= 0xFFFFFFFC;
5817 target.atom = parser.findAtomByAddress(sreloc->r_value());
5818 target.addend = dstAddr - target.atom->_objAddress;
5819 parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
5820 break;
5821 case ARM_RELOC_SECTDIFF:
5822 case ARM_RELOC_LOCAL_SECTDIFF:
5824 if ( ! nextRelocIsPair )
5825 throw "ARM_RELOC_SECTDIFF missing following pair";
5826 if ( sreloc->r_length() != 2 )
5827 throw "bad length for ARM_RELOC_SECTDIFF";
5828 contentValue = LittleEndian::get32(*fixUpPtr);
5829 Atom<arm>* fromAtom = parser.findAtomByAddress(nextRelocValue);
5830 uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
5831 uint32_t offsetInTarget;
5832 Atom<arm>* targetAtom = parser.findAtomByAddressOrLocalTargetOfStub(sreloc->r_value(), &offsetInTarget);
5833 // check for addend encoded in the section content
5834 int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
5835 if ( targetAtom->isThumb() )
5836 addend &= -2; // remove thumb bit
5837 // if reference to LSDA, add group subordinate fixup
5838 if ( targetAtom->contentType() == ld::Atom::typeLSDA ) {
5839 Parser<arm>::SourceLocation src2;
5840 src2.atom = src.atom;
5841 src2.offsetInAtom = 0;
5842 parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, targetAtom);
5844 if ( addend < 0 ) {
5845 // switch binding base on coalescing
5846 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5847 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
5849 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5850 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5852 else {
5853 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5855 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, offsetInTarget);
5856 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5857 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
5858 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
5860 else {
5861 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5862 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
5864 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5865 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5867 else {
5868 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5870 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)(offsetInTarget+addend));
5871 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5872 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
5873 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
5876 break;
5877 case ARM_RELOC_HALF_SECTDIFF:
5878 if ( nextRelocIsPair ) {
5879 instruction = LittleEndian::get32(*fixUpPtr);
5880 Atom<arm>* fromAtom = parser.findAtomByAddress(nextRelocValue);
5881 uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
5882 Atom<arm>* targetAtom = parser.findAtomByAddress(sreloc->r_value());
5883 uint32_t offsetInTarget = sreloc->r_value() - targetAtom->_objAddress;
5884 uint32_t instruction16;
5885 uint32_t other16 = (nextRelocAddress & 0xFFFF);
5886 bool isThumb;
5887 if ( sreloc->r_length() & 2 ) {
5888 isThumb = true;
5889 uint32_t i = ((instruction & 0x00000400) >> 10);
5890 uint32_t imm4 = (instruction & 0x0000000F);
5891 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
5892 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
5893 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
5895 else {
5896 isThumb = false;
5897 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
5898 uint32_t imm12 = (instruction & 0x00000FFF);
5899 instruction16 = (imm4 << 12) | imm12;
5901 if ( sreloc->r_length() & 1 )
5902 dstAddr = ((instruction16 << 16) | other16);
5903 else
5904 dstAddr = (other16 << 16) | instruction16;
5905 if ( targetAtom->isThumb() )
5906 dstAddr &= (-2); // remove thumb bit
5907 int32_t addend = dstAddr - (sreloc->r_value() - nextRelocValue);
5908 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5909 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
5911 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5912 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5914 else {
5915 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5917 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)offsetInTarget+addend);
5918 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5919 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
5920 if ( sreloc->r_length() & 1 ) {
5921 // high 16
5922 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
5924 else {
5925 // low 16
5926 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
5928 result = true;
5930 else
5931 throw "ARM_RELOC_HALF_SECTDIFF reloc missing following pair";
5932 break;
5933 case ARM_RELOC_HALF:
5934 if ( nextRelocIsPair ) {
5935 instruction = LittleEndian::get32(*fixUpPtr);
5936 Atom<arm>* targetAtom = parser.findAtomByAddress(sreloc->r_value());
5937 uint32_t instruction16;
5938 uint32_t other16 = (nextRelocAddress & 0xFFFF);
5939 bool isThumb;
5940 if ( sreloc->r_length() & 2 ) {
5941 isThumb = true;
5942 uint32_t i = ((instruction & 0x00000400) >> 10);
5943 uint32_t imm4 = (instruction & 0x0000000F);
5944 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
5945 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
5946 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
5948 else {
5949 isThumb = false;
5950 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
5951 uint32_t imm12 = (instruction & 0x00000FFF);
5952 instruction16 = (imm4 << 12) | imm12;
5954 if ( sreloc->r_length() & 1 )
5955 dstAddr = ((instruction16 << 16) | other16);
5956 else
5957 dstAddr = (other16 << 16) | instruction16;
5958 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5959 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, targetAtom);
5961 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5962 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5964 else {
5965 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5967 parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, dstAddr - targetAtom->_objAddress);
5968 if ( sreloc->r_length() & 1 ) {
5969 // high 16
5970 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
5972 else {
5973 // low 16
5974 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
5976 result = true;
5978 else
5979 throw "scattered ARM_RELOC_HALF reloc missing following pair";
5980 break;
5981 default:
5982 throwf("unknown ARM scattered relocation type %d", sreloc->r_type());
5985 return result;
5992 template <typename A>
5993 bool ObjC1ClassSection<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
5995 // inherited
5996 FixedSizeSection<A>::addRelocFixup(parser, reloc);
5998 assert(0 && "needs template specialization");
5999 return false;
6002 template <>
6003 bool ObjC1ClassSection<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
6005 // if this is the reloc for the super class name string, add implicit reference to super class
6006 if ( ((reloc->r_address() & R_SCATTERED) == 0) && (reloc->r_type() == GENERIC_RELOC_VANILLA) ) {
6007 assert( reloc->r_length() == 2 );
6008 assert( ! reloc->r_pcrel() );
6010 const macho_section<P>* sect = this->machoSection();
6011 Parser<x86>::SourceLocation src;
6012 uint32_t srcAddr = sect->addr() + reloc->r_address();
6013 src.atom = this->findAtomByAddress(srcAddr);
6014 src.offsetInAtom = srcAddr - src.atom->objectAddress();
6015 if ( src.offsetInAtom == 4 ) {
6016 Parser<x86>::TargetDesc stringTarget;
6017 const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6018 uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6019 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
6021 assert(stringTarget.atom != NULL);
6022 assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
6023 const char* superClassBaseName = (char*)stringTarget.atom->rawContentPointer();
6024 char* superClassName = new char[strlen(superClassBaseName) + 20];
6025 strcpy(superClassName, ".objc_class_name_");
6026 strcat(superClassName, superClassBaseName);
6028 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, superClassName);
6031 // inherited
6032 return FixedSizeSection<x86>::addRelocFixup(parser, reloc);
6037 template <typename A>
6038 bool Objc1ClassReferences<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
6040 // inherited
6041 PointerToCStringSection<A>::addRelocFixup(parser, reloc);
6043 assert(0 && "needs template specialization");
6044 return false;
6049 template <>
6050 bool Objc1ClassReferences<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
6052 // add implict class refs, fixups not usable yet, so look at relocations
6053 assert( (reloc->r_address() & R_SCATTERED) == 0 );
6054 assert( reloc->r_type() == GENERIC_RELOC_VANILLA );
6055 assert( reloc->r_length() == 2 );
6056 assert( ! reloc->r_pcrel() );
6058 const macho_section<P>* sect = this->machoSection();
6059 Parser<x86>::SourceLocation src;
6060 uint32_t srcAddr = sect->addr() + reloc->r_address();
6061 src.atom = this->findAtomByAddress(srcAddr);
6062 src.offsetInAtom = srcAddr - src.atom->objectAddress();
6063 Parser<x86>::TargetDesc stringTarget;
6064 const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6065 uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6066 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
6068 assert(stringTarget.atom != NULL);
6069 assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
6070 const char* baseClassName = (char*)stringTarget.atom->rawContentPointer();
6071 char* objcClassName = new char[strlen(baseClassName) + 20];
6072 strcpy(objcClassName, ".objc_class_name_");
6073 strcat(objcClassName, baseClassName);
6075 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, objcClassName);
6077 // inherited
6078 return PointerToCStringSection<x86>::addRelocFixup(parser, reloc);
6082 template <typename A>
6083 void Section<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
6085 const macho_section<P>* sect = this->machoSection();
6086 const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + sect->reloff());
6087 const uint32_t relocCount = sect->nreloc();
6088 for (uint32_t r = 0; r < relocCount; ++r) {
6089 try {
6090 if ( this->addRelocFixup(parser, &relocs[r]) )
6091 ++r; // skip next
6093 catch (const char* msg) {
6094 throwf("in section %s,%s reloc %u: %s", sect->segname(), Section<A>::makeSectionName(sect), r, msg);
6098 // add follow-on fixups if .o file is missing .subsections_via_symbols
6099 if ( this->addFollowOnFixups() ) {
6100 Atom<A>* end = &_endAtoms[-1];
6101 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
6102 typename Parser<A>::SourceLocation src(p, 0);
6103 Atom<A>* nextAtom = &p[1];
6104 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
6107 else if ( this->type() == ld::Section::typeCode ) {
6108 // if FDE broke text not at a symbol, use followOn to keep code together
6109 Atom<A>* end = &_endAtoms[-1];
6110 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
6111 typename Parser<A>::SourceLocation src(p, 0);
6112 Atom<A>* nextAtom = &p[1];
6113 if ( (p->symbolTableInclusion() == ld::Atom::symbolTableIn) && (nextAtom->symbolTableInclusion() == ld::Atom::symbolTableNotIn) ) {
6114 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
6119 // add follow-on fixups for aliases
6120 if ( _hasAliases ) {
6121 for(Atom<A>* p = _beginAtoms; p < _endAtoms; ++p) {
6122 if ( p->isAlias() && ! this->addFollowOnFixups() ) {
6123 Atom<A>* targetOfAlias = &p[1];
6124 assert(p < &_endAtoms[-1]);
6125 assert(p->_objAddress == targetOfAlias->_objAddress);
6126 typename Parser<A>::SourceLocation src(p, 0);
6127 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, targetOfAlias);
6136 // main function used by linker to instantiate ld::Files
6138 ld::relocatable::File* parse(const uint8_t* fileContent, uint64_t fileLength,
6139 const char* path, time_t modTime, uint32_t ordinal, const ParserOptions& opts)
6141 switch ( opts.architecture ) {
6142 case CPU_TYPE_X86_64:
6143 if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) )
6144 return mach_o::relocatable::Parser<x86_64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
6145 break;
6146 case CPU_TYPE_I386:
6147 if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) )
6148 return mach_o::relocatable::Parser<x86>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
6149 break;
6150 case CPU_TYPE_ARM:
6151 if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) )
6152 return mach_o::relocatable::Parser<arm>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
6153 break;
6155 return NULL;
6159 // used by archive reader to validate member object file
6161 bool isObjectFile(const uint8_t* fileContent, uint64_t fileLength, const ParserOptions& opts)
6163 switch ( opts.architecture ) {
6164 case CPU_TYPE_X86_64:
6165 return ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) );
6166 case CPU_TYPE_I386:
6167 return ( mach_o::relocatable::Parser<x86>::validFile(fileContent) );
6168 case CPU_TYPE_ARM:
6169 return ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) );
6171 return false;
6175 // used by linker to infer architecture when no -arch is on command line
6177 bool isObjectFile(const uint8_t* fileContent, cpu_type_t* result, cpu_subtype_t* subResult)
6179 if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
6180 *result = CPU_TYPE_X86_64;
6181 *subResult = CPU_SUBTYPE_X86_64_ALL;
6182 return true;
6184 if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
6185 *result = CPU_TYPE_I386;
6186 *subResult = CPU_SUBTYPE_X86_ALL;
6187 return true;
6189 if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
6190 *result = CPU_TYPE_ARM;
6191 const macho_header<Pointer32<LittleEndian> >* header = (const macho_header<Pointer32<LittleEndian> >*)fileContent;
6192 *subResult = header->cpusubtype();
6193 return true;
6195 return false;
6199 // used by linker is error messages to describe bad .o file
6201 const char* archName(const uint8_t* fileContent)
6203 if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
6204 return mach_o::relocatable::Parser<x86_64>::fileKind(fileContent);
6206 if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
6207 return mach_o::relocatable::Parser<x86>::fileKind(fileContent);
6209 if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
6210 return mach_o::relocatable::Parser<arm>::fileKind(fileContent);
6212 return NULL;
6216 // Used by archive reader when -ObjC option is specified
6218 bool hasObjC2Categories(const uint8_t* fileContent)
6220 if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
6221 return mach_o::relocatable::Parser<x86_64>::hasObjC2Categories(fileContent);
6223 else if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
6224 return mach_o::relocatable::Parser<arm>::hasObjC2Categories(fileContent);
6226 else if ( mach_o::relocatable::Parser<x86>::validFile(fileContent, false, 0) ) {
6227 return mach_o::relocatable::Parser<x86>::hasObjC2Categories(fileContent);
6229 return false;
6234 } // namespace relocatable
6235 } // namespace mach_o