ld64/src/ld/parsers/macho_relocatable_file.cpp

   1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
   2  *
   3  * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
   4  *
   5  * @APPLE_LICENSE_HEADER_START@
   6  *
   7  * This file contains Original Code and/or Modifications of Original Code
   8  * as defined in and that are subject to the Apple Public Source License
   9  * Version 2.0 (the 'License'). You may not use this file except in
  10  * compliance with the License. Please obtain a copy of the License at
  11  * http://www.opensource.apple.com/apsl/ and read it before using this
  12  * file.
  13  *
  14  * The Original Code and all software distributed under the License are
  15  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  16  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  17  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  19  * Please see the License for the specific language governing rights and
  20  * limitations under the License.
  21  *
  22  * @APPLE_LICENSE_HEADER_END@
  23  */
  24
  25
  26 #include <stdint.h>
  27 #include <stdlib.h>
  28 #include <math.h>
  29 #include <unistd.h>
  30 #include <fcntl.h>
  31 #include <sys/param.h>
  32 #include <sys/stat.h>
  33 #include <sys/mman.h>
  34
  35 #include "MachOFileAbstraction.hpp"
  36
  37 #include "libunwind/DwarfInstructions.hpp"
  38 #include "libunwind/AddressSpace.hpp"
  39 #include "libunwind/Registers.hpp"
  40
  41 #include <vector>
  42 #include <set>
  43 #include <map>
  44 #include <algorithm>
  45
  46 #include "dwarf2.h"
  47 #include "debugline.h"
  48
  49 #include "Architectures.hpp"
  50 #include "ld.hpp"
  51 #include "macho_relocatable_file.h"
  52
  53
  54
  55 extern void throwf(const char* format, ...) __attribute__ ((noreturn,format(printf, 1, 2)));
  56 extern void warning(const char* format, ...) __attribute__((format(printf, 1, 2)));
  57
  58 namespace mach_o {
  59 namespace relocatable {
  60
  61
  62 // forward reference
  63 template <typename A> class Parser;
  64 template <typename A> class Atom;
  65 template <typename A> class Section;
  66 template <typename A> class CFISection;
  67 template <typename A> class CUSection;
  68
  69 template <typename A>
  70 class File : public ld::relocatable::File
  71 {
  72 public:
  73                                                                                         File(const char* p, time_t mTime, const uint8_t* content, uint32_t ord) :
  74                                                                                                 ld::relocatable::File(p,mTime,ord), _fileContent(content),
  75                                                                                                 _sectionsArray(NULL), _atomsArray(NULL),
  76                                                                                                 _sectionsArrayCount(0), _atomsArrayCount(0),
  77                                                                                                 _debugInfoKind(ld::relocatable::File::kDebugInfoNone),
  78                                                                                                 _dwarfTranslationUnitDir(NULL), _dwarfTranslationUnitFile(NULL),
  79                                                                                                 _dwarfDebugInfoSect(NULL), _dwarfDebugAbbrevSect(NULL),
  80                                                                                                 _dwarfDebugLineSect(NULL), _dwarfDebugStringSect(NULL),
  81                                                                                                 _objConstraint(ld::File::objcConstraintNone),
  82                                                                                                 _cpuSubType(0),
  83                                                                                                 _ojcReplacmentClass(false),  _canScatterAtoms(false) {}
  84         virtual                                                                 ~File();
  85
  86         // overrides of ld::File
  87         virtual bool                                                                            forEachAtom(ld::File::AtomHandler&) const;
  88         virtual bool                                                                            justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const
  89                                                                                                                                                                         { return false; }
  90
  91         // overrides of ld::relocatable::File
  92         virtual bool                                                                            objcReplacementClasses() const  { return _ojcReplacmentClass; }
  93         virtual ObjcConstraint                                                          objCConstraint() const                  { return _objConstraint; }
  94         virtual uint32_t                                                                        cpuSubType() const                              { return _cpuSubType; }
  95         virtual DebugInfoKind                                                           debugInfo() const                               { return _debugInfoKind; }
  96         virtual const std::vector<ld::relocatable::File::Stab>* stabs() const                                   { return &_stabs; }
  97         virtual bool                                                                            canScatterAtoms() const                 { return _canScatterAtoms; }
  98         bool                                                                                            translationUnitSource(const char** dir, const char** name) const;
  99
 100         const uint8_t*                                                                          fileContent()                                   { return _fileContent; }
 101 private:
 102         friend class Atom<A>;
 103         friend class Section<A>;
 104         friend class Parser<A>;
 105         friend class CFISection<A>::OAS;
 106
 107         typedef typename A::P                                   P;
 108
 109         const uint8_t*                                                  _fileContent;
 110         Section<A>**                                                    _sectionsArray;
 111         uint8_t*                                                                _atomsArray;
 112         uint32_t                                                                _sectionsArrayCount;
 113         uint32_t                                                                _atomsArrayCount;
 114         std::vector<ld::Fixup>                                  _fixups;
 115         std::vector<ld::Atom::UnwindInfo>               _unwindInfos;
 116         std::vector<ld::Atom::LineInfo>                 _lineInfos;
 117         std::vector<ld::relocatable::File::Stab>_stabs;
 118         ld::relocatable::File::DebugInfoKind    _debugInfoKind;
 119         const char*                                                             _dwarfTranslationUnitDir;
 120         const char*                                                             _dwarfTranslationUnitFile;
 121         const macho_section<P>*                                 _dwarfDebugInfoSect;
 122         const macho_section<P>*                                 _dwarfDebugAbbrevSect;
 123         const macho_section<P>*                                 _dwarfDebugLineSect;
 124         const macho_section<P>*                                 _dwarfDebugStringSect;
 125         ld::File::ObjcConstraint                                _objConstraint;
 126         uint32_t                                                                _cpuSubType;
 127         bool                                                                    _ojcReplacmentClass;
 128         bool                                                                    _canScatterAtoms;
 129 };
 130
 131
 132 template <typename A>
 133 class Section : public ld::Section
 134 {
 135 public:
 136         typedef typename A::P::uint_t   pint_t;
 137         typedef typename A::P                   P;
 138         typedef typename A::P::E                E;
 139
 140         virtual                                                 ~Section()                                      { }
 141         class File<A>&                                  file() const                            { return _file; }
 142         const macho_section<P>*                 machoSection() const            { return _machOSection; }
 143         uint32_t                                                sectionNum(class Parser<A>&) const;
 144         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr);
 145         virtual ld::Atom::ContentType   contentType()                           { return ld::Atom::typeUnclassified; }
 146         virtual bool                                    dontDeadStrip()                         { return (this->_machOSection->flags() & S_ATTR_NO_DEAD_STRIP); }
 147         virtual Atom<A>*                                findAtomByAddress(pint_t addr) { return this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms); }
 148         virtual bool                                    addFollowOnFixups() const       { return ! _file.canScatterAtoms(); }
 149         virtual uint32_t                                appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 150                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 151                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
 152         virtual uint32_t                                computeAtomCount(class Parser<A>& parser,
 153                                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 154                                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&) = 0;
 155         virtual void                                    makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 156         virtual bool                                    addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
 157         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const { return 0; }
 158         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 159                                                                                                         const ld::IndirectBindingTable& ind) const { return false; }
 160         static const char*                              makeSectionName(const macho_section<typename A::P>* s);
 161
 162 protected:
 163                                                 Section(File<A>& f, const macho_section<typename A::P>* s)
 164                                                         : ld::Section(makeSegmentName(s), makeSectionName(s), sectionType(s)),
 165                                                                 _file(f), _machOSection(s), _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
 166                                                 Section(File<A>& f, const char* segName, const char* sectName, ld::Section::Type t, bool hidden=false)
 167                                                         : ld::Section(segName, sectName, t, hidden), _file(f), _machOSection(NULL),
 168                                                                 _beginAtoms(NULL), _endAtoms(NULL), _hasAliases(false) { }
 169
 170
 171         Atom<A>*                                                findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end);
 172         uint32_t                                                x86_64PcRelOffset(uint8_t r_type);
 173         static const char*                              makeSegmentName(const macho_section<typename A::P>* s);
 174         static bool                                             readable(const macho_section<typename A::P>* s);
 175         static bool                                             writable(const macho_section<typename A::P>* s);
 176         static bool                                             exectuable(const macho_section<typename A::P>* s);
 177         static ld::Section::Type                sectionType(const macho_section<typename A::P>* s);
 178
 179         File<A>&                                                _file;
 180         const macho_section<P>*                 _machOSection;
 181         class Atom<A>*                                  _beginAtoms;
 182         class Atom<A>*                                  _endAtoms;
 183         bool                                                    _hasAliases;
 184 };
 185
 186
 187 template <typename A>
 188 class CFISection : public Section<A>
 189 {
 190 public:
 191                                                 CFISection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 192                                                         : Section<A>(f, s) { }
 193         uint32_t                        cfiCount();
 194
 195         virtual ld::Atom::ContentType   contentType()           { return ld::Atom::typeCFI; }
 196         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 197         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 198         virtual void            makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 199         virtual bool            addFollowOnFixups() const       { return false; }
 200
 201
 202         ///
 203         /// ObjectFileAddressSpace is used as a template parameter to UnwindCursor for parsing
 204         /// dwarf CFI information in an object file.
 205         ///
 206         class OAS
 207         {
 208         public:
 209                         typedef typename A::P::uint_t   pint_t;
 210                         typedef typename A::P                   P;
 211                         typedef typename A::P::E                E;
 212                         typedef typename A::P::uint_t   sint_t;
 213
 214                                                         OAS(CFISection<A>& ehFrameSection, const uint8_t* ehFrameBuffer) :
 215                                                                 _ehFrameSection(ehFrameSection),
 216                                                                 _ehFrameContent(ehFrameBuffer),
 217                                                                 _ehFrameStartAddr(ehFrameSection.machoSection()->addr()),
 218                                                                 _ehFrameEndAddr(ehFrameSection.machoSection()->addr()+ehFrameSection.machoSection()->size()) {}
 219
 220                         uint8_t                 get8(pint_t addr) { return *((uint8_t*)mappedAddress(addr)); }
 221                         uint16_t                get16(pint_t addr)      { return E::get16(*((uint16_t*)mappedAddress(addr))); }
 222                         uint32_t                get32(pint_t addr)      { return E::get32(*((uint32_t*)mappedAddress(addr))); }
 223                         uint64_t                get64(pint_t addr)      { return E::get64(*((uint64_t*)mappedAddress(addr))); }
 224                         pint_t                  getP(pint_t addr)       { return P::getP(*((pint_t*)mappedAddress(addr))); }
 225                         uint64_t                getULEB128(pint_t& addr, pint_t end);
 226                         int64_t                 getSLEB128(pint_t& addr, pint_t end);
 227                         pint_t                  getEncodedP(pint_t& addr, pint_t end, uint8_t encoding);
 228         private:
 229                 const void*                     mappedAddress(pint_t addr);
 230
 231                 CFISection<A>&                          _ehFrameSection;
 232                 const uint8_t*                          _ehFrameContent;
 233                 pint_t                                          _ehFrameStartAddr;
 234                 pint_t                                          _ehFrameEndAddr;
 235         };
 236
 237
 238         typedef typename A::P::uint_t                   pint_t;
 239         typedef libunwind::CFI_Atom_Info<OAS>   CFI_Atom_Info;
 240
 241         void                            cfiParse(class Parser<A>& parser, uint8_t* buffer, CFI_Atom_Info cfiArray[], uint32_t cfiCount);
 242         bool                            needsRelocating();
 243
 244         static bool                     bigEndian();
 245 private:
 246         void                            addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo);
 247         static void                     warnFunc(void* ref, uint64_t funcAddr, const char* msg);
 248 };
 249
 250
 251 template <typename A>
 252 class CUSection : public Section<A>
 253 {
 254 public:
 255                                                 CUSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 256                                                         : Section<A>(f, s) { }
 257
 258         typedef typename A::P::uint_t   pint_t;
 259         typedef typename A::P                   P;
 260         typedef typename A::P::E                E;
 261
 262         virtual uint32_t                computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
 263         virtual uint32_t                appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&) { return 0; }
 264         virtual void                    makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 265         virtual bool                    addFollowOnFixups() const       { return false; }
 266
 267         struct Info {
 268                 pint_t          functionStartAddress;
 269                 uint32_t        functionSymbolIndex;
 270                 uint32_t        rangeLength;
 271                 uint32_t        compactUnwindInfo;
 272                 const char*     personality;
 273                 pint_t          lsdaAddress;
 274                 Atom<A>*        function;
 275                 Atom<A>*        lsda;
 276         };
 277
 278         uint32_t                                count();
 279         void                                    parse(class Parser<A>& parser, uint32_t cnt, Info array[]);
 280
 281
 282 private:
 283
 284         const char*                             personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc);
 285
 286         static int                              infoSorter(const void* l, const void* r);
 287
 288 };
 289
 290
 291 template <typename A>
 292 class TentativeDefinitionSection : public Section<A>
 293 {
 294 public:
 295                                                 TentativeDefinitionSection(Parser<A>& parser, File<A>& f)
 296                                                         : Section<A>(f, "__DATA", "__comm/tent", ld::Section::typeTentativeDefs)  {}
 297
 298         virtual ld::Atom::ContentType   contentType()           { return ld::Atom::typeZeroFill; }
 299         virtual bool            addFollowOnFixups() const       { return false; }
 300         virtual Atom<A>*        findAtomByAddress(typename A::P::uint_t addr) { throw "TentativeDefinitionSection::findAtomByAddress() should never be called"; }
 301         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
 302                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 303         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 304                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 305                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&);
 306         virtual void            makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
 307 private:
 308         typedef typename A::P::uint_t   pint_t;
 309         typedef typename A::P                   P;
 310 };
 311
 312
 313 template <typename A>
 314 class AbsoluteSymbolSection : public Section<A>
 315 {
 316 public:
 317                                                 AbsoluteSymbolSection(Parser<A>& parser, File<A>& f)
 318                                                         : Section<A>(f, "__DATA", "__abs", ld::Section::typeAbsoluteSymbols, true)  {}
 319
 320         virtual ld::Atom::ContentType   contentType()           { return ld::Atom::typeUnclassified; }
 321         virtual bool                                    dontDeadStrip()         { return false; }
 322         virtual ld::Atom::Alignment             alignmentForAddress(typename A::P::uint_t addr) { return ld::Atom::Alignment(0); }
 323         virtual bool            addFollowOnFixups() const       { return false; }
 324         virtual Atom<A>*        findAtomByAddress(typename A::P::uint_t addr) { throw "AbsoluteSymbolSection::findAtomByAddress() should never be called"; }
 325         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
 326                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 327         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 328                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
 329                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&);
 330         virtual void            makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&) {}
 331         virtual Atom<A>*        findAbsAtomForValue(typename A::P::uint_t);
 332
 333 private:
 334         typedef typename A::P::uint_t   pint_t;
 335         typedef typename A::P                   P;
 336 };
 337
 338
 339 template <typename A>
 340 class SymboledSection : public Section<A>
 341 {
 342 public:
 343                                                 SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s);
 344         virtual ld::Atom::ContentType   contentType() { return _type; }
 345         virtual bool                                    dontDeadStrip();
 346         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it,
 347                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 348         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer,
 349                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
 350                                                                         const struct Parser<A>::CFI_CU_InfoArrays&);
 351 protected:
 352         typedef typename A::P::uint_t   pint_t;
 353         typedef typename A::P                   P;
 354
 355         ld::Atom::ContentType                   _type;
 356 };
 357
 358
 359 template <typename A>
 360 class TLVDefsSection : public SymboledSection<A>
 361 {
 362 public:
 363                                                 TLVDefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s) :
 364                                                         SymboledSection<A>(parser, f, s) { }
 365
 366 private:
 367
 368 };
 369
 370
 371 template <typename A>
 372 class ImplicitSizeSection : public Section<A>
 373 {
 374 public:
 375                                                 ImplicitSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 376                                                         : Section<A>(f, s) { }
 377         virtual uint32_t        computeAtomCount(class Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 378         virtual uint32_t        appendAtoms(class Parser<A>& parser, uint8_t* buffer, struct Parser<A>::LabelAndCFIBreakIterator& it, const struct Parser<A>::CFI_CU_InfoArrays&);
 379 protected:
 380         typedef typename A::P::uint_t   pint_t;
 381         typedef typename A::P                   P;
 382
 383         virtual bool                                            addFollowOnFixups() const               { return false; }
 384         virtual const char*                                     unlabeledAtomName(Parser<A>& parser, pint_t addr) = 0;
 385         virtual ld::Atom::SymbolTableInclusion  symbolTableInclusion()          { return ld::Atom::symbolTableNotIn; }
 386         virtual pint_t                                          elementSizeAtAddress(pint_t addr) = 0;
 387         virtual ld::Atom::Scope                         scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeLinkageUnit; }
 388         virtual bool                                            useElementAt(Parser<A>& parser,
 389                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr) = 0;
 390         virtual ld::Atom::Definition            definition()                                    { return ld::Atom::definitionRegular; }
 391         virtual ld::Atom::Combine                       combine(Parser<A>& parser, pint_t addr) = 0;
 392         virtual bool                                            ignoreLabel(const char* label)  { return (label[0] == 'L'); }
 393 };
 394
 395 template <typename A>
 396 class FixedSizeSection : public ImplicitSizeSection<A>
 397 {
 398 public:
 399                                                 FixedSizeSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 400                                                         : ImplicitSizeSection<A>(parser, f, s) { }
 401 protected:
 402         typedef typename A::P::uint_t   pint_t;
 403         typedef typename A::P                   P;
 404         typedef typename A::P::E                E;
 405
 406         virtual bool                                    useElementAt(Parser<A>& parser,
 407                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
 408                                                                                                                 { return true; }
 409 };
 410
 411
 412 template <typename A>
 413 class Literal4Section : public FixedSizeSection<A>
 414 {
 415 public:
 416                                                 Literal4Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 417                                                         : FixedSizeSection<A>(parser, f, s) {}
 418 protected:
 419         typedef typename A::P::uint_t   pint_t;
 420         typedef typename A::P                   P;
 421
 422         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(2); }
 423         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "4-byte-literal"; }
 424         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 4; }
 425         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 426         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 427         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 428                                                                                                         const ld::IndirectBindingTable& ind) const;
 429 };
 430
 431 template <typename A>
 432 class Literal8Section : public FixedSizeSection<A>
 433 {
 434 public:
 435                                                 Literal8Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 436                                                         : FixedSizeSection<A>(parser, f, s) {}
 437 protected:
 438         typedef typename A::P::uint_t   pint_t;
 439         typedef typename A::P                   P;
 440
 441         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(3); }
 442         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "8-byte-literal"; }
 443         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 8; }
 444         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 445         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 446         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 447                                                                                                         const ld::IndirectBindingTable& ind) const;
 448 };
 449
 450 template <typename A>
 451 class Literal16Section : public FixedSizeSection<A>
 452 {
 453 public:
 454                                                 Literal16Section(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 455                                                         : FixedSizeSection<A>(parser, f, s) {}
 456 protected:
 457         typedef typename A::P::uint_t   pint_t;
 458         typedef typename A::P                   P;
 459
 460         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(4); }
 461         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "16-byte-literal"; }
 462         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 16; }
 463         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 464         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 465         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 466                                                                                                         const ld::IndirectBindingTable& ind) const;
 467 };
 468
 469
 470 template <typename A>
 471 class NonLazyPointerSection : public FixedSizeSection<A>
 472 {
 473 public:
 474                                                 NonLazyPointerSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 475                                                         : FixedSizeSection<A>(parser, f, s) {}
 476 protected:
 477         typedef typename A::P::uint_t   pint_t;
 478         typedef typename A::P                   P;
 479
 480         virtual void                                    makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&);
 481         virtual ld::Atom::ContentType   contentType()                                                   { return ld::Atom::typeNonLazyPointer; }
 482         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 483         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "non_lazy_ptr"; }
 484         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 485         virtual ld::Atom::Scope                 scopeAtAddress(Parser<A>& parser, pint_t addr);
 486         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t);
 487         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 488         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 489         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 490                                                                                                         const ld::IndirectBindingTable& ind) const;
 491
 492 private:
 493         static const char*                              targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind);
 494         static ld::Fixup::Kind                  fixupKind();
 495 };
 496
 497
 498 template <typename A>
 499 class CFStringSection : public FixedSizeSection<A>
 500 {
 501 public:
 502                                                 CFStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 503                                                         : FixedSizeSection<A>(parser, f, s) {}
 504 protected:
 505         typedef typename A::P::uint_t   pint_t;
 506
 507         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 508         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "CFString"; }
 509         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return 4*sizeof(pint_t); }
 510         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndReferences; }
 511         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 512         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 513         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 514                                                                                                         const ld::IndirectBindingTable& ind) const;
 515 private:
 516         enum ContentType { contentUTF8, contentUTF16, contentUnknown };
 517         static const uint8_t*                   targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
 518                                                                                                 ContentType* ct, unsigned int* count);
 519 };
 520
 521
 522 template <typename A>
 523 class ObjC1ClassSection : public FixedSizeSection<A>
 524 {
 525 public:
 526                                                 ObjC1ClassSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 527                                                         : FixedSizeSection<A>(parser, f, s) {}
 528 protected:
 529         typedef typename A::P::uint_t   pint_t;
 530         typedef typename A::P                   P;
 531         typedef typename A::P::E                E;
 532
 533         virtual ld::Atom::Scope                 scopeAtAddress(Parser<A>& , pint_t )    { return ld::Atom::scopeGlobal; }
 534         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(2); }
 535         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t);
 536         virtual ld::Atom::SymbolTableInclusion  symbolTableInclusion()                  { return ld::Atom::symbolTableIn; }
 537         virtual pint_t                                  elementSizeAtAddress(pint_t addr);
 538         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineNever; }
 539         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 540         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
 541                                                                                                                                                         { return 0; }
 542         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 543                                                                                                         const ld::IndirectBindingTable& ind) const { return false; }
 544         virtual bool                                    addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
 545 };
 546
 547
 548 template <typename A>
 549 class ObjC2ClassRefsSection : public FixedSizeSection<A>
 550 {
 551 public:
 552                                                 ObjC2ClassRefsSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 553                                                         : FixedSizeSection<A>(parser, f, s) {}
 554 protected:
 555         typedef typename A::P::uint_t   pint_t;
 556
 557         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 558         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "objc-class-ref"; }
 559         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 560         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndReferences; }
 561         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 562         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 563         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 564                                                                                                         const ld::IndirectBindingTable& ind) const;
 565 private:
 566         const char*                                             targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 567 };
 568
 569
 570 template <typename A>
 571 class ObjC2CategoryListSection : public FixedSizeSection<A>
 572 {
 573 public:
 574                                                 ObjC2CategoryListSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 575                                                         : FixedSizeSection<A>(parser, f, s) {}
 576 protected:
 577         typedef typename A::P::uint_t   pint_t;
 578
 579         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 580         virtual ld::Atom::Scope                 scopeAtAddress(Parser<A>& parser, pint_t addr) { return ld::Atom::scopeTranslationUnit; }
 581         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "objc-cat-list"; }
 582         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 583         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineNever; }
 584         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 585 private:
 586         const char*                                             targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 587 };
 588
 589
 590 template <typename A>
 591 class PointerToCStringSection : public FixedSizeSection<A>
 592 {
 593 public:
 594                                                 PointerToCStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 595                                                         : FixedSizeSection<A>(parser, f, s) {}
 596 protected:
 597         typedef typename A::P::uint_t   pint_t;
 598
 599         virtual ld::Atom::Alignment             alignmentForAddress(pint_t addr)                { return ld::Atom::Alignment(log2(sizeof(pint_t))); }
 600         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "pointer-to-literal-cstring"; }
 601         virtual pint_t                                  elementSizeAtAddress(pint_t addr)               { return sizeof(pint_t); }
 602         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndReferences; }
 603         virtual bool                                    ignoreLabel(const char* label)                  { return true; }
 604         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 605         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 606                                                                                                         const ld::IndirectBindingTable& ind) const;
 607         virtual const char*                             targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 608 };
 609
 610
 611 template <typename A>
 612 class Objc1ClassReferences : public PointerToCStringSection<A>
 613 {
 614 public:
 615                                                 Objc1ClassReferences(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 616                                                         : PointerToCStringSection<A>(parser, f, s) {}
 617
 618         typedef typename A::P::uint_t   pint_t;
 619         typedef typename A::P                   P;
 620
 621         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "pointer-to-literal-objc-class-name"; }
 622         virtual bool                                    addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>*);
 623         virtual const char*                             targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 624 };
 625
 626
 627 template <typename A>
 628 class CStringSection : public ImplicitSizeSection<A>
 629 {
 630 public:
 631                                                 CStringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 632                                                         : ImplicitSizeSection<A>(parser, f, s) {}
 633 protected:
 634         typedef typename A::P::uint_t   pint_t;
 635         typedef typename A::P                   P;
 636
 637         virtual ld::Atom::ContentType   contentType()                                                   { return ld::Atom::typeCString; }
 638         virtual Atom<A>*                                findAtomByAddress(pint_t addr);
 639         virtual const char*                             unlabeledAtomName(Parser<A>&, pint_t)   { return "cstring"; }
 640         virtual pint_t                                  elementSizeAtAddress(pint_t addr);
 641         virtual bool                                    ignoreLabel(const char* label);
 642         virtual bool                                    useElementAt(Parser<A>& parser,
 643                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr);
 644         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 645         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 646         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 647                                                                                                         const ld::IndirectBindingTable& ind) const;
 648
 649 };
 650
 651
 652 template <typename A>
 653 class UTF16StringSection : public SymboledSection<A>
 654 {
 655 public:
 656                                                 UTF16StringSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
 657                                                         : SymboledSection<A>(parser, f, s) {}
 658 protected:
 659         typedef typename A::P::uint_t   pint_t;
 660         typedef typename A::P                   P;
 661
 662         virtual ld::Atom::Combine               combine(Parser<A>&, pint_t)                             { return ld::Atom::combineByNameAndContent; }
 663         virtual unsigned long                   contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const;
 664         virtual bool                                    canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
 665                                                                                                         const ld::IndirectBindingTable& ind) const;
 666 };
 667
 668
 669 //
 670 // Atoms in mach-o files
 671 //
 672 template <typename A>
 673 class Atom : public ld::Atom
 674 {
 675 public:
 676         // overrides of ld::Atom
 677         virtual ld::File*                                                       file() const            { return &sect().file(); }
 678         virtual bool                                                            translationUnitSource(const char** dir, const char** nm) const
 679                                                                                                                                         { return sect().file().translationUnitSource(dir, nm); }
 680         virtual const char*                                                     name() const            { return _name; }
 681         virtual uint64_t                                                        size() const            { return _size; }
 682         virtual uint64_t                                                        objectAddress() const { return _objAddress; }
 683         virtual void                                                            copyRawContent(uint8_t buffer[]) const;
 684         virtual const uint8_t*                                          rawContentPointer() const { return contentPointer(); }
 685         virtual unsigned long                                           contentHash(const ld::IndirectBindingTable& ind) const
 686                                                                                                                         { if ( _hash == 0 ) _hash = sect().contentHash(this, ind); return _hash; }
 687         virtual bool                                                            canCoalesceWith(const ld::Atom& rhs, const ld::IndirectBindingTable& ind) const
 688                                                                                                                         { return sect().canCoalesceWith(this, rhs, ind); }
 689         virtual ld::Fixup::iterator                                     fixupsBegin() const     { return &machofile()._fixups[_fixupsStartIndex]; }
 690         virtual ld::Fixup::iterator                                     fixupsEnd()     const   { return &machofile()._fixups[_fixupsStartIndex+_fixupsCount]; }
 691         virtual ld::Atom::UnwindInfo::iterator          beginUnwind() const     { return &machofile()._unwindInfos[_unwindInfoStartIndex]; }
 692         virtual ld::Atom::UnwindInfo::iterator          endUnwind()     const   { return &machofile()._unwindInfos[_unwindInfoStartIndex+_unwindInfoCount];  }
 693         virtual ld::Atom::LineInfo::iterator            beginLineInfo() const{ return &machofile()._lineInfos[_lineInfoStartIndex]; }
 694         virtual ld::Atom::LineInfo::iterator            endLineInfo() const { return &machofile()._lineInfos[_lineInfoStartIndex+_lineInfoCount];  }
 695
 696 private:
 697
 698         enum {  kFixupStartIndexBits = 32,
 699                         kLineInfoStartIndexBits = 32,
 700                         kUnwindInfoStartIndexBits = 24,
 701                         kFixupCountBits = 24,
 702                         kLineInfoCountBits = 12,
 703                         kUnwindInfoCountBits = 4
 704                 }; // must sum to 128
 705
 706 public:
 707         // methods for all atoms from mach-o object file
 708                         Section<A>&                                                     sect() const                    { return (Section<A>&)section(); }
 709                         File<A>&                                                        machofile() const                       { return ((Section<A>*)(this->_section))->file(); }
 710                         void                                                            setFixupsRange(uint32_t s, uint32_t c);
 711                         void                                                            setUnwindInfoRange(uint32_t s, uint32_t c);
 712                         void                                                            extendUnwindInfoRange();
 713                         void                                                            setLineInfoRange(uint32_t s, uint32_t c);
 714                         bool                                                            roomForMoreLineInfoCount() { return (_lineInfoCount < ((1<<kLineInfoCountBits)-1)); }
 715                         void                                                            incrementLineInfoCount() { assert(roomForMoreLineInfoCount()); ++_lineInfoCount; }
 716                         void                                                            incrementFixupCount() { if (_fixupsCount == ((1 << kFixupCountBits)-1))
 717                                                                                                                                                         throwf("too may fixups in %s", name()); ++_fixupsCount; }
 718                         const uint8_t*                                          contentPointer() const;
 719                         uint32_t                                                        fixupCount() const { return _fixupsCount; }
 720                         void                                                            verifyAlignment() const;
 721
 722         typedef typename A::P                                           P;
 723         typedef typename A::P::E                                        E;
 724         typedef typename A::P::uint_t                           pint_t;
 725                                                                                                 // constuct via all attributes
 726                                                                                                 Atom(Section<A>& sct, const char* nm, pint_t addr, uint64_t sz,
 727                                                                                                         ld::Atom::Definition d, ld::Atom::Combine c, ld::Atom::Scope s,
 728                                                                                                         ld::Atom::ContentType ct, ld::Atom::SymbolTableInclusion i,
 729                                                                                                         bool dds, bool thumb, bool al, ld::Atom::Alignment a)
 730                                                                                                                 : ld::Atom((ld::Section&)sct, d, c, s, ct, i, dds, thumb, al, a),
 731                                                                                                                         _size(sz), _objAddress(addr), _name(nm), _hash(0),
 732                                                                                                                         _fixupsStartIndex(0), _lineInfoStartIndex(0),
 733                                                                                                                         _unwindInfoStartIndex(0), _fixupsCount(0),
 734                                                                                                                         _lineInfoCount(0), _unwindInfoCount(0) { }
 735                                                                                                 // construct via symbol table entry
 736                                                                                                 Atom(Section<A>& sct, Parser<A>& parser, const macho_nlist<P>& sym,
 737                                                                                                                                 uint64_t sz, bool alias=false)
 738                                                                                                                 : ld::Atom((ld::Section&)sct, parser.definitionFromSymbol(sym),
 739                                                                                                                                 parser.combineFromSymbol(sym), parser.scopeFromSymbol(sym),
 740                                                                                                                                 parser.resolverFromSymbol(sym) ? ld::Atom::typeResolver : sct.contentType(),
 741                                                                                                                                 parser.inclusionFromSymbol(sym),
 742                                                                                                                                 parser.dontDeadStripFromSymbol(sym) || sct.dontDeadStrip(),
 743                                                                                                                                 parser.isThumbFromSymbol(sym), alias,
 744                                                                                                                                 sct.alignmentForAddress(sym.n_value())),
 745                                                                                                                         _size(sz), _objAddress(sym.n_value()),
 746                                                                                                                         _name(parser.nameFromSymbol(sym)), _hash(0),
 747                                                                                                                         _fixupsStartIndex(0), _lineInfoStartIndex(0),
 748                                                                                                                         _unwindInfoStartIndex(0), _fixupsCount(0),
 749                                                                                                                         _lineInfoCount(0), _unwindInfoCount(0) {
 750                                                                                                                                 // <rdar://problem/6783167> support auto-hidden weak symbols
 751                                                                                                                                 if ( _scope == ld::Atom::scopeGlobal &&
 752                                                                                                                                                 (sym.n_desc() & (N_WEAK_DEF|N_WEAK_REF)) == (N_WEAK_DEF|N_WEAK_REF) )
 753                                                                                                                                         this->setAutoHide();
 754                                                                                                                                         this->verifyAlignment();
 755                                                                                                                         }
 756
 757 private:
 758         friend class Parser<A>;
 759         friend class Section<A>;
 760         friend class CStringSection<A>;
 761         friend class AbsoluteSymbolSection<A>;
 762
 763         pint_t                                                                          _size;
 764         pint_t                                                                          _objAddress;
 765         const char*                                                                     _name;
 766         mutable unsigned long                                           _hash;
 767
 768         uint64_t                                                                        _fixupsStartIndex               : kFixupStartIndexBits,
 769                                                                                                 _lineInfoStartIndex             : kLineInfoStartIndexBits,
 770                                                                                                 _unwindInfoStartIndex   : kUnwindInfoStartIndexBits,
 771                                                                                                 _fixupsCount                    : kFixupCountBits,
 772                                                                                                 _lineInfoCount                  : kLineInfoCountBits,
 773                                                                                                 _unwindInfoCount                : kUnwindInfoCountBits;
 774
 775 };
 776
 777
 778
 779 template <typename A>
 780 void Atom<A>::setFixupsRange(uint32_t startIndex, uint32_t count)
 781 {
 782         if ( count >= (1 << kFixupCountBits) )
 783                 throwf("too many fixups in function %s", this->name());
 784         if ( startIndex >= (1 << kFixupStartIndexBits) )
 785                 throwf("too many fixups in file");
 786         assert(((startIndex+count) <= sect().file()._fixups.size()) && "fixup index out of range");
 787         _fixupsStartIndex = startIndex;
 788         _fixupsCount = count;
 789 }
 790
 791 template <typename A>
 792 void Atom<A>::setUnwindInfoRange(uint32_t startIndex, uint32_t count)
 793 {
 794         if ( count >= (1 << kUnwindInfoCountBits) )
 795                 throwf("too many compact unwind infos in function %s", this->name());
 796         if ( startIndex >= (1 << kUnwindInfoStartIndexBits) )
 797                 throwf("too many compact unwind infos (%d) in file", startIndex);
 798         assert((startIndex+count) <= sect().file()._unwindInfos.size() && "unwindinfo index out of range");
 799         _unwindInfoStartIndex = startIndex;
 800         _unwindInfoCount = count;
 801 }
 802
 803 template <typename A>
 804 void Atom<A>::extendUnwindInfoRange()
 805 {
 806         if ( _unwindInfoCount+1 >= (1 << kUnwindInfoCountBits) )
 807                 throwf("too many compact unwind infos in function %s", this->name());
 808         _unwindInfoCount += 1;
 809 }
 810
 811 template <typename A>
 812 void Atom<A>::setLineInfoRange(uint32_t startIndex, uint32_t count)
 813 {
 814         assert((count < (1 << kLineInfoCountBits)) && "too many line infos");
 815         assert((startIndex+count) < sect().file()._lineInfos.size() && "line info index out of range");
 816         _lineInfoStartIndex = startIndex;
 817         _lineInfoCount = count;
 818 }
 819
 820 template <typename A>
 821 const uint8_t* Atom<A>::contentPointer() const
 822 {
 823         const macho_section<P>* sct = this->sect().machoSection();
 824         uint32_t fileOffset = sct->offset() - sct->addr() + this->_objAddress;
 825         return this->sect().file().fileContent()+fileOffset;
 826 }
 827
 828
 829 template <typename A>
 830 void Atom<A>::copyRawContent(uint8_t buffer[]) const
 831 {
 832         // copy base bytes
 833         if ( this->contentType() == ld::Atom::typeZeroFill ) {
 834                 bzero(buffer, _size);
 835         }
 836         else if ( _size != 0 ) {
 837                 memcpy(buffer, this->contentPointer(), _size);
 838         }
 839 }
 840
 841 template <>
 842 void Atom<arm>::verifyAlignment() const
 843 {
 844         if ( (this->section().type() == ld::Section::typeCode) && ! isThumb() ) {
 845                 if ( ((_objAddress % 4) != 0) || (this->alignment().powerOf2 < 2) )
 846                         warning("ARM function not 4-byte aligned: %s from %s", this->name(), this->file()->path());
 847         }
 848 }
 849
 850 template <typename A>
 851 void Atom<A>::verifyAlignment() const
 852 {
 853 }
 854
 855
 856 template <typename A>
 857 class Parser
 858 {
 859 public:
 860         static bool                                                                             validFile(const uint8_t* fileContent, bool subtypeMustMatch=false,
 861                                                                                                                                 cpu_subtype_t subtype=0);
 862         static const char*                                                              fileKind(const uint8_t* fileContent);
 863         static bool                                                                             hasObjC2Categories(const uint8_t* fileContent);
 864         static ld::relocatable::File*                                   parse(const uint8_t* fileContent, uint64_t fileLength,
 865                                                                                                                         const char* path, time_t modTime, uint32_t ordinal,
 866                                                                                                                          const ParserOptions& opts) {
 867                                                                                                                                 Parser p(fileContent, fileLength, path, modTime,
 868                                                                                                                                                 ordinal, opts.convertUnwindInfo);
 869                                                                                                                                 return p.parse(opts);
 870                                                                                                                 }
 871
 872         typedef typename A::P                                           P;
 873         typedef typename A::P::E                                        E;
 874         typedef typename A::P::uint_t                           pint_t;
 875
 876         struct SourceLocation {
 877                                                                 SourceLocation() {}
 878                                                                 SourceLocation(Atom<A>* a, uint32_t o) : atom(a), offsetInAtom(o) {}
 879                 Atom<A>*        atom;
 880                 uint32_t        offsetInAtom;
 881         };
 882
 883         struct TargetDesc {
 884                 Atom<A>*        atom;
 885                 const char*     name;           // only used if targetAtom is NULL
 886                 int64_t         addend;
 887                 bool            weakImport;     // only used if targetAtom is NULL
 888         };
 889
 890         struct FixupInAtom {
 891                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) :
 892                         fixup(src.offsetInAtom, c, k, target), atom(src.atom) { src.atom->incrementFixupCount(); }
 893
 894                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) :
 895                         fixup(src.offsetInAtom, c, k, b, target), atom(src.atom) { src.atom->incrementFixupCount(); }
 896
 897                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) :
 898                         fixup(src.offsetInAtom, c, k, wi, name), atom(src.atom) { src.atom->incrementFixupCount(); }
 899
 900                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) :
 901                         fixup(src.offsetInAtom, c, k, b, name), atom(src.atom) { src.atom->incrementFixupCount(); }
 902
 903                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) :
 904                         fixup(src.offsetInAtom, c, k, addend), atom(src.atom) { src.atom->incrementFixupCount(); }
 905
 906                 FixupInAtom(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) :
 907                         fixup(src.offsetInAtom, c, k, (uint64_t)0), atom(src.atom) { src.atom->incrementFixupCount(); }
 908
 909                 ld::Fixup               fixup;
 910                 Atom<A>*                atom;
 911         };
 912
 913         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, Atom<A>* target) {
 914                 _allFixups.push_back(FixupInAtom(src, c, k, target));
 915         }
 916
 917         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, Atom<A>* target) {
 918                 _allFixups.push_back(FixupInAtom(src, c, k, b, target));
 919         }
 920
 921         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, bool wi, const char* name) {
 922                 _allFixups.push_back(FixupInAtom(src, c, k, wi, name));
 923         }
 924
 925         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, ld::Fixup::TargetBinding b, const char* name) {
 926                 _allFixups.push_back(FixupInAtom(src, c, k, b, name));
 927         }
 928
 929         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k, uint64_t addend) {
 930                 _allFixups.push_back(FixupInAtom(src, c, k, addend));
 931         }
 932
 933         void addFixup(const SourceLocation& src, ld::Fixup::Cluster c, ld::Fixup::Kind k) {
 934                 _allFixups.push_back(FixupInAtom(src, c, k));
 935         }
 936
 937
 938         uint32_t                                                                                symbolCount() { return _symbolCount; }
 939         uint32_t                                                                                indirectSymbol(uint32_t indirectIndex);
 940         const macho_nlist<P>&                                                   symbolFromIndex(uint32_t index);
 941         const char*                                                                             nameFromSymbol(const macho_nlist<P>& sym);
 942         ld::Atom::Scope                                                                 scopeFromSymbol(const macho_nlist<P>& sym);
 943         static ld::Atom::Definition                                             definitionFromSymbol(const macho_nlist<P>& sym);
 944         static ld::Atom::Combine                                                combineFromSymbol(const macho_nlist<P>& sym);
 945                         ld::Atom::SymbolTableInclusion                  inclusionFromSymbol(const macho_nlist<P>& sym);
 946         static bool                                                                             dontDeadStripFromSymbol(const macho_nlist<P>& sym);
 947         static bool                                                                             isThumbFromSymbol(const macho_nlist<P>& sym);
 948         static bool                                                                             weakImportFromSymbol(const macho_nlist<P>& sym);
 949         static bool                                                                             resolverFromSymbol(const macho_nlist<P>& sym);
 950         uint32_t                                                                                symbolIndexFromIndirectSectionAddress(pint_t,const macho_section<P>*);
 951         const macho_section<P>*                                                 firstMachOSection() { return _sectionsStart; }
 952         const macho_section<P>*                                                 machOSectionFromSectionIndex(uint32_t index);
 953         uint32_t                                                                                machOSectionCount() { return _machOSectionsCount; }
 954         uint32_t                                                                                undefinedStartIndex() { return _undefinedStartIndex; }
 955         uint32_t                                                                                undefinedEndIndex() { return _undefinedEndIndex; }
 956         void                                                                                    addFixup(FixupInAtom f) { _allFixups.push_back(f); }
 957         Section<A>*                                                                             sectionForNum(unsigned int sectNum);
 958         Section<A>*                                                                             sectionForAddress(pint_t addr);
 959         Atom<A>*                                                                                findAtomByAddress(pint_t addr);
 960         Atom<A>*                                                                                findAtomByAddressOrNullIfStub(pint_t addr);
 961         Atom<A>*                                                                                findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom);
 962         Atom<A>*                                                                                findAtomByName(const char* name);       // slow!
 963         void                                                                                    findTargetFromAddress(pint_t addr, TargetDesc& target);
 964         void                                                                                    findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target);
 965         void                                                                                    findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum,
 966                                                                                                                                                                                 TargetDesc& target);
 967         uint32_t                                                                                tentativeDefinitionCount() { return _tentativeDefinitionCount; }
 968         uint32_t                                                                                absoluteSymbolCount() { return _absoluteSymbolCount; }
 969
 970         bool                                                                                    hasStubsSection() { return (_stubsSectionNum != 0); }
 971         unsigned int                                                                    stubsSectionNum() { return _stubsSectionNum; }
 972         void                                                                                    addDtraceExtraInfos(const SourceLocation& src, const char* provider);
 973         const char*                                                                             scanSymbolTableForAddress(uint64_t addr);
 974         bool                                                                                    convertUnwindInfo() { return _convertUnwindInfo; }
 975
 976
 977         void                                                    addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target);
 978         void                                                    addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase);
 979
 980
 981
 982         struct LabelAndCFIBreakIterator {
 983                 typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
 984                                                                 LabelAndCFIBreakIterator(const uint32_t* ssa, uint32_t ssc, const pint_t* cfisa,
 985                                                                                                                 uint32_t cfisc, bool ols)
 986                                                                         : sortedSymbolIndexes(ssa), sortedSymbolCount(ssc), cfiStartsArray(cfisa),
 987                                                                                 cfiStartsCount(cfisc), fileHasOverlappingSymbols(ols),
 988                                                                                 newSection(false), cfiIndex(0), symIndex(0) {}
 989                 bool                                    next(Parser<A>& parser, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
 990                                                                                 pint_t* addr, pint_t* size, const macho_nlist<P>** sym);
 991                 pint_t                                  peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr);
 992                 void                                    beginSection() { newSection = true; symIndex = 0; }
 993
 994                 const uint32_t* const           sortedSymbolIndexes;
 995                 const uint32_t                          sortedSymbolCount;
 996                 const pint_t*                           cfiStartsArray;
 997                 const uint32_t                          cfiStartsCount;
 998                 const bool                                      fileHasOverlappingSymbols;
 999                 bool                                            newSection;
1000                 uint32_t                                        cfiIndex;
1001                 uint32_t                                        symIndex;
1002         };
1003
1004         struct CFI_CU_InfoArrays {
1005                         typedef typename CFISection<A>::CFI_Atom_Info CFI_Atom_Info;
1006                         typedef typename CUSection<A>::Info CU_Info;
1007                                                 CFI_CU_InfoArrays(const CFI_Atom_Info* cfiAr, uint32_t cfiC, CU_Info* cuAr, uint32_t cuC)
1008                                                         : cfiArray(cfiAr), cuArray(cuAr), cfiCount(cfiC), cuCount(cuC) {}
1009                 const CFI_Atom_Info* const      cfiArray;
1010                         CU_Info* const                  cuArray;
1011                 const uint32_t                          cfiCount;
1012                 const uint32_t                          cuCount;
1013         };
1014
1015
1016
1017 private:
1018         friend class Section<A>;
1019
1020         enum SectionType { sectionTypeIgnore, sectionTypeLiteral4, sectionTypeLiteral8, sectionTypeLiteral16,
1021                                                 sectionTypeNonLazy, sectionTypeCFI, sectionTypeCString, sectionTypeCStringPointer,
1022                                                 sectionTypeUTF16Strings, sectionTypeCFString, sectionTypeObjC2ClassRefs, typeObjC2CategoryList,
1023                                                 sectionTypeObjC1Classes, sectionTypeSymboled, sectionTypeObjC1ClassRefs,
1024                                                 sectionTypeTentativeDefinitions, sectionTypeAbsoluteSymbols, sectionTypeTLVDefs,
1025                                                 sectionTypeCompactUnwind };
1026
1027         template <typename P>
1028         struct MachOSectionAndSectionClass
1029         {
1030                 const macho_section<P>* sect;
1031                 SectionType                             type;
1032
1033                 static int sorter(const void* l, const void* r) {
1034                         const MachOSectionAndSectionClass<P>* left = (MachOSectionAndSectionClass<P>*)l;
1035                         const MachOSectionAndSectionClass<P>* right = (MachOSectionAndSectionClass<P>*)r;
1036                         int64_t diff = left->sect->addr() - right->sect->addr();
1037                         if ( diff == 0 )
1038                                 return 0;
1039                         if ( diff < 0 )
1040                                 return -1;
1041                         else
1042                                 return 1;
1043                 }
1044         };
1045
1046         struct ParserAndSectionsArray { Parser* parser; const uint32_t* sortedSectionsArray; };
1047
1048
1049                                                                                                         Parser(const uint8_t* fileContent, uint64_t fileLength,
1050                                                                                                                         const char* path, time_t modTime,
1051                                                                                                                         uint32_t ordinal, bool convertUnwindInfo);
1052         ld::relocatable::File*                                                  parse(const ParserOptions& opts);
1053         uint8_t                                                                                 loadCommandSizeMask();
1054         bool                                                                                    parseLoadCommands();
1055         void                                                                                    makeSections();
1056         void                                                                                    prescanSymbolTable();
1057         void                                                                                    makeSortedSymbolsArray(uint32_t symArray[], const uint32_t sectionArray[]);
1058         void                                                                                    makeSortedSectionsArray(uint32_t array[]);
1059         static int                                                                              pointerSorter(const void* l, const void* r);
1060         static int                                                                              symbolIndexSorter(void* extra, const void* l, const void* r);
1061         static int                                                                              sectionIndexSorter(void* extra, const void* l, const void* r);
1062
1063         void                                                                                    parseDebugInfo();
1064         void                                                                                    parseStabs();
1065         static bool                                                                             isConstFunStabs(const char *stabStr);
1066         bool                                                                                    read_comp_unit(const char ** name, const char ** comp_dir,
1067                                                                                                                                                                                                 uint64_t *stmt_list);
1068         const char*                                                                             getDwarfString(uint64_t form, const uint8_t* p);
1069         bool                                                                                    skip_form(const uint8_t ** offset, const uint8_t * end,
1070                                                                                                                                 uint64_t form, uint8_t addr_size, bool dwarf64);
1071
1072
1073         // filled in by constructor
1074         const uint8_t*                                                          _fileContent;
1075         uint32_t                                                                        _fileLength;
1076         const char*                                                                     _path;
1077         time_t                                                                          _modTime;
1078         uint32_t                                                                        _ordinal;
1079
1080         // filled in by parseLoadCommands()
1081         File<A>*                                                                        _file;
1082         const macho_nlist<P>*                                           _symbols;
1083         uint32_t                                                                        _symbolCount;
1084         const char*                                                                     _strings;
1085         uint32_t                                                                        _stringsSize;
1086         const uint32_t*                                                         _indirectTable;
1087         uint32_t                                                                        _indirectTableCount;
1088         uint32_t                                                                        _undefinedStartIndex;
1089         uint32_t                                                                        _undefinedEndIndex;
1090         const macho_section<P>*                                         _sectionsStart;
1091         uint32_t                                                                        _machOSectionsCount;
1092         bool                                                                            _hasUUID;
1093
1094         // filled in by parse()
1095         CFISection<A>*                                                          _EHFrameSection;
1096         CUSection<A>*                                                           _compactUnwindSection;
1097         AbsoluteSymbolSection<A>*                                       _absoluteSection;
1098         uint32_t                                                                        _tentativeDefinitionCount;
1099         uint32_t                                                                        _absoluteSymbolCount;
1100         uint32_t                                                                        _symbolsInSections;
1101         bool                                                                            _hasLongBranchStubs;
1102         bool                                                                            _AppleObjc; // FSF has objc that uses different data layout
1103         bool                                                                            _overlappingSymbols;
1104         bool                                                                            _convertUnwindInfo;
1105         unsigned int                                                            _stubsSectionNum;
1106         const macho_section<P>*                                         _stubsMachOSection;
1107         std::vector<const char*>                                        _dtraceProviderInfo;
1108         std::vector<FixupInAtom>                                        _allFixups;
1109 };
1110
1111
1112
1113 template <typename A>
1114 Parser<A>::Parser(const uint8_t* fileContent, uint64_t fileLength, const char* path, time_t modTime,
1115                                         uint32_t ordinal, bool convertDUI)
1116                 : _fileContent(fileContent), _fileLength(fileLength), _path(path), _modTime(modTime),
1117                         _ordinal(ordinal), _file(NULL),
1118                         _symbols(NULL), _symbolCount(0), _strings(NULL), _stringsSize(0),
1119                         _indirectTable(NULL), _indirectTableCount(0),
1120                         _undefinedStartIndex(0), _undefinedEndIndex(0),
1121                         _sectionsStart(NULL), _machOSectionsCount(0), _hasUUID(false),
1122                         _EHFrameSection(NULL), _compactUnwindSection(NULL), _absoluteSection(NULL),
1123                         _tentativeDefinitionCount(0), _absoluteSymbolCount(0),
1124                         _symbolsInSections(0), _hasLongBranchStubs(false),  _AppleObjc(false),
1125                         _overlappingSymbols(false), _convertUnwindInfo(convertDUI),
1126                         _stubsSectionNum(0), _stubsMachOSection(NULL)
1127 {
1128 }
1129
1130
1131 template <>
1132 bool Parser<x86>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1133 {
1134         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1135         if ( header->magic() != MH_MAGIC )
1136                 return false;
1137         if ( header->cputype() != CPU_TYPE_I386 )
1138                 return false;
1139         if ( header->filetype() != MH_OBJECT )
1140                 return false;
1141         return true;
1142 }
1143
1144 template <>
1145 bool Parser<x86_64>::validFile(const uint8_t* fileContent, bool, cpu_subtype_t)
1146 {
1147         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1148         if ( header->magic() != MH_MAGIC_64 )
1149                 return false;
1150         if ( header->cputype() != CPU_TYPE_X86_64 )
1151                 return false;
1152         if ( header->filetype() != MH_OBJECT )
1153                 return false;
1154         return true;
1155 }
1156
1157 template <>
1158 bool Parser<arm>::validFile(const uint8_t* fileContent, bool subtypeMustMatch, cpu_subtype_t subtype)
1159 {
1160         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1161         if ( header->magic() != MH_MAGIC )
1162                 return false;
1163         if ( header->cputype() != CPU_TYPE_ARM )
1164                 return false;
1165         if ( header->filetype() != MH_OBJECT )
1166                 return false;
1167         if ( subtypeMustMatch ) {
1168                 if ( (cpu_subtype_t)header->cpusubtype() == subtype )
1169                         return true;
1170                 // hack until libcc_kext.a is made fat
1171                 if ( header->cpusubtype() == CPU_SUBTYPE_ARM_ALL )
1172                         return true;
1173                 return false;
1174         }
1175         return true;
1176 }
1177
1178
1179
1180 template <>
1181 const char* Parser<x86>::fileKind(const uint8_t* fileContent)
1182 {
1183         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1184         if ( header->magic() != MH_MAGIC )
1185                 return NULL;
1186         if ( header->cputype() != CPU_TYPE_I386 )
1187                 return NULL;
1188         return "i386";
1189 }
1190
1191 template <>
1192 const char* Parser<x86_64>::fileKind(const uint8_t* fileContent)
1193 {
1194         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1195         if ( header->magic() != MH_MAGIC )
1196                 return NULL;
1197         if ( header->cputype() != CPU_TYPE_X86_64 )
1198                 return NULL;
1199         return "x86_64";
1200 }
1201
1202 template <>
1203 const char* Parser<arm>::fileKind(const uint8_t* fileContent)
1204 {
1205         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1206         if ( header->magic() != MH_MAGIC )
1207                 return NULL;
1208         if ( header->cputype() != CPU_TYPE_ARM )
1209                 return NULL;
1210         for (const ARMSubType* t=ARMSubTypes; t->subTypeName != NULL; ++t) {
1211                 if ( t->subType == (cpu_subtype_t)header->cpusubtype() ) {
1212                         return t->subTypeName;
1213                 }
1214         }
1215         return "arm???";
1216 }
1217
1218
1219 template <typename A>
1220 bool Parser<A>::hasObjC2Categories(const uint8_t* fileContent)
1221 {
1222         const macho_header<P>* header = (const macho_header<P>*)fileContent;
1223         const uint32_t cmd_count = header->ncmds();
1224         const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1225         const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1226         const macho_load_command<P>* cmd = cmds;
1227         for (uint32_t i = 0; i < cmd_count; ++i) {
1228                 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1229                         const macho_segment_command<P>* segment = (macho_segment_command<P>*)cmd;
1230                         const macho_section<P>* sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1231                         for (uint32_t si=0; si < segment->nsects(); ++si) {
1232                                 const macho_section<P>* sect = &sectionsStart[si];
1233                                 if ( (sect->size() > 0)
1234                                         && (strcmp(sect->sectname(), "__objc_catlist") == 0)
1235                                         && (strcmp(sect->segname(), "__DATA") == 0) ) {
1236                                                 return true;
1237                                 }
1238                         }
1239                 }
1240                 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1241                 if ( cmd > cmdsEnd )
1242                         throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1243         }
1244         return false;
1245 }
1246
1247 template <typename A>
1248 int Parser<A>::pointerSorter(const void* l, const void* r)
1249 {
1250         // sort references by address
1251         const pint_t* left = (pint_t*)l;
1252         const pint_t* right = (pint_t*)r;
1253         return (*left - *right);
1254 }
1255
1256 template <typename A>
1257 typename A::P::uint_t Parser<A>::LabelAndCFIBreakIterator::peek(Parser<A>& parser, pint_t startAddr, pint_t endAddr)
1258 {
1259         pint_t symbolAddr;
1260         if ( symIndex < sortedSymbolCount )
1261                 symbolAddr = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]).n_value();
1262         else
1263                 symbolAddr = endAddr;
1264         pint_t cfiAddr;
1265         if ( cfiIndex < cfiStartsCount )
1266                 cfiAddr = cfiStartsArray[cfiIndex];
1267         else
1268                 cfiAddr = endAddr;
1269         if ( (cfiAddr < symbolAddr) && (cfiAddr >= startAddr) ) {
1270                 if ( cfiAddr <  endAddr )
1271                         return cfiAddr;
1272                 else
1273                         return endAddr;
1274         }
1275         else  {
1276                 if ( symbolAddr <  endAddr )
1277                         return symbolAddr;
1278                 else
1279                         return endAddr;
1280         }
1281 }
1282
1283 //
1284 // Parses up a section into chunks based on labels and CFI information.
1285 // Each call returns the next chunk address and size, and (if the break
1286 // was becuase of a label, the symbol). Returns false when no more chunks.
1287 //
1288 template <typename A>
1289 bool Parser<A>::LabelAndCFIBreakIterator::next(Parser<A>& parser, uint32_t sectNum, pint_t startAddr, pint_t endAddr,
1290                                                                                                 pint_t* addr, pint_t* size, const macho_nlist<P>** symbol)
1291 {
1292         // may not be a label on start of section, but need atom demarcation there
1293         if ( newSection ) {
1294                 newSection = false;
1295                 // advance symIndex until we get to the first label at or past the start of this section
1296                 while ( symIndex < sortedSymbolCount ) {
1297                         const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1298                         pint_t nextSymbolAddr = sym.n_value();
1299                         //fprintf(stderr, "sectNum=%d, nextSymbolAddr=0x%08llX, name=%s\n", sectNum, (uint64_t)nextSymbolAddr, parser.nameFromSymbol(sym));
1300                         if ( (nextSymbolAddr > startAddr) || ((nextSymbolAddr == startAddr) && (sym.n_sect() == sectNum)) )
1301                                 break;
1302                         ++symIndex;
1303                 }
1304                 if ( symIndex < sortedSymbolCount ) {
1305                         const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1306                         pint_t nextSymbolAddr = sym.n_value();
1307                         // if next symbol found is not in this section
1308                         if ( sym.n_sect() != sectNum ) {
1309                                 // check for CFI break instead of symbol break
1310                                 if ( cfiIndex < cfiStartsCount ) {
1311                                         pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1312                                         if ( nextCfiAddr < endAddr ) {
1313                                                 // use cfi
1314                                                 ++cfiIndex;
1315                                                 *addr = nextCfiAddr;
1316                                                 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1317                                                 *symbol = NULL;
1318                                                 return true;
1319                                         }
1320                                 }
1321                                 *addr = startAddr;
1322                                 *size = endAddr - startAddr;
1323                                 *symbol = NULL;
1324                                 if ( startAddr == endAddr )
1325                                         return false;  // zero size section
1326                                 else
1327                                         return true;  // whole section is one atom with no label
1328                         }
1329                         // if also CFI break here, eat it
1330                         if ( cfiIndex < cfiStartsCount ) {
1331                                 if ( cfiStartsArray[cfiIndex] == nextSymbolAddr )
1332                                         ++cfiIndex;
1333                         }
1334                         if ( nextSymbolAddr == startAddr ) {
1335                                 // label at start of section, return it as chunk
1336                                 ++symIndex;
1337                                 *addr = startAddr;
1338                                 *size = peek(parser, startAddr, endAddr) - startAddr;
1339                                 *symbol = &sym;
1340                                 return true;
1341                         }
1342                         // return chunk before first symbol
1343                         *addr = startAddr;
1344                         *size = nextSymbolAddr - startAddr;
1345                         *symbol = NULL;
1346                         return true;
1347                 }
1348                 // no symbols left in whole file, so entire section is one chunk
1349                 *addr = startAddr;
1350                 *size = endAddr - startAddr;
1351                 *symbol = NULL;
1352                 if ( startAddr == endAddr )
1353                         return false;  // zero size section
1354                 else
1355                         return true;  // whole section is one atom with no label
1356         }
1357
1358         while ( (symIndex < sortedSymbolCount) && (cfiIndex < cfiStartsCount) ) {
1359                 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1360                 pint_t nextSymbolAddr = sym.n_value();
1361                 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1362                 if ( nextSymbolAddr <  nextCfiAddr ) {
1363                         if ( nextSymbolAddr >= endAddr )
1364                                 return false;
1365                         ++symIndex;
1366                         if ( nextSymbolAddr < startAddr )
1367                                 continue;
1368                         *addr = nextSymbolAddr;
1369                         *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1370                         *symbol = &sym;
1371                         return true;
1372                 }
1373                 else if ( nextCfiAddr < nextSymbolAddr ) {
1374                         if ( nextCfiAddr >= endAddr )
1375                                 return false;
1376                         ++cfiIndex;
1377                         if ( nextCfiAddr < startAddr )
1378                                 continue;
1379                         *addr = nextCfiAddr;
1380                         *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1381                         *symbol = NULL;
1382                         return true;
1383                 }
1384                 else {
1385                         if ( nextCfiAddr >= endAddr )
1386                                 return false;
1387                         ++symIndex;
1388                         ++cfiIndex;
1389                         if ( nextCfiAddr < startAddr )
1390                                 continue;
1391                         *addr = nextCfiAddr;
1392                         *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1393                         *symbol = &sym;
1394                         return true;
1395                 }
1396         }
1397         while ( symIndex < sortedSymbolCount ) {
1398                 const macho_nlist<P>& sym = parser.symbolFromIndex(sortedSymbolIndexes[symIndex]);
1399                 pint_t nextSymbolAddr = sym.n_value();
1400                 // if next symbol found is not in this section, then done with iteration
1401                 if ( sym.n_sect() != sectNum )
1402                         return false;
1403                 ++symIndex;
1404                 if ( nextSymbolAddr < startAddr )
1405                         continue;
1406                 *addr = nextSymbolAddr;
1407                 *size = peek(parser, startAddr, endAddr) - nextSymbolAddr;
1408                 *symbol = &sym;
1409                 return true;
1410         }
1411         while ( cfiIndex < cfiStartsCount ) {
1412                 pint_t nextCfiAddr = cfiStartsArray[cfiIndex];
1413                 if ( nextCfiAddr >= endAddr )
1414                         return false;
1415                 ++cfiIndex;
1416                 if ( nextCfiAddr < startAddr )
1417                         continue;
1418                 *addr = nextCfiAddr;
1419                 *size = peek(parser, startAddr, endAddr) - nextCfiAddr;
1420                 *symbol = NULL;
1421                 return true;
1422         }
1423         return false;
1424 }
1425
1426
1427
1428 template <typename A>
1429 ld::relocatable::File* Parser<A>::parse(const ParserOptions& opts)
1430 {
1431         // create file object
1432         _file = new File<A>(_path, _modTime, _fileContent, _ordinal);
1433
1434         // respond to -t option
1435         if ( opts.logAllFiles )
1436                 printf("%s\n", _path);
1437
1438         // parse start of mach-o file
1439         if ( ! parseLoadCommands() )
1440                 return _file;
1441
1442         // make array of
1443         uint32_t sortedSectionIndexes[_machOSectionsCount];
1444         this->makeSortedSectionsArray(sortedSectionIndexes);
1445
1446         // make symbol table sorted by address
1447         this->prescanSymbolTable();
1448         uint32_t sortedSymbolIndexes[_symbolsInSections];
1449         this->makeSortedSymbolsArray(sortedSymbolIndexes, sortedSectionIndexes);
1450
1451         // allocate Section<A> object for each mach-o section
1452         makeSections();
1453
1454         // if it exists, do special early parsing of __compact_unwind section
1455         uint32_t countOfCUs = 0;
1456         if ( _compactUnwindSection != NULL )
1457                 countOfCUs = _compactUnwindSection->count();
1458         uint8_t cuInfoBuffer[sizeof(typename CUSection<A>::Info) * countOfCUs];
1459         typename CUSection<A>::Info*  cuInfoArray = (typename CUSection<A>::Info*)cuInfoBuffer;
1460         if ( countOfCUs != 0 )
1461                 _compactUnwindSection->parse(*this, countOfCUs, cuInfoArray);
1462
1463         // if it exists, do special early parsing of __eh_frame section
1464         // stack allocate array of CFI_Atom_Info
1465         uint32_t countOfCFIs = 0;
1466         if ( _EHFrameSection != NULL )
1467                 countOfCFIs = _EHFrameSection->cfiCount();
1468         typename CFISection<A>::CFI_Atom_Info  cfiArray[countOfCFIs];
1469         // stack allocate (if not too large) a copy of __eh_frame to apply relocations to
1470         uint8_t* ehBuffer = NULL;
1471         uint32_t stackAllocSize = 0;
1472         if ( (countOfCFIs != 0) && _EHFrameSection->needsRelocating() ) {
1473                 uint32_t sectSize = _EHFrameSection->machoSection()->size();
1474                 if ( sectSize > 50*1024 )
1475                         ehBuffer = (uint8_t*)malloc(sectSize);
1476                 else
1477                         stackAllocSize = sectSize;
1478         }
1479         uint32_t ehStackBuffer[1+stackAllocSize/4]; // make 4-byte aligned stack bufffer
1480         if ( ehBuffer == NULL )
1481                 ehBuffer = (uint8_t*)&ehStackBuffer;
1482         uint32_t cfiStartsCount = 0;
1483         if ( countOfCFIs != 0 ) {
1484                 _EHFrameSection->cfiParse(*this, ehBuffer, cfiArray, countOfCFIs);
1485                 // count functions and lsdas
1486                 for(uint32_t i=0; i < countOfCFIs; ++i) {
1487                         if ( cfiArray[i].isCIE )
1488                                 continue;
1489                         //fprintf(stderr, "cfiArray[i].func = 0x%08llX, cfiArray[i].lsda = 0x%08llX, encoding=0x%08X\n",
1490                         //                      (uint64_t)cfiArray[i].u.fdeInfo.function.targetAddress,
1491                         //                      (uint64_t)cfiArray[i].u.fdeInfo.lsda.targetAddress,
1492                         //                      cfiArray[i].u.fdeInfo.compactUnwindInfo);
1493                         if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1494                                 ++cfiStartsCount;
1495                         if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1496                                 ++cfiStartsCount;
1497                 }
1498         }
1499         CFI_CU_InfoArrays cfis(cfiArray, countOfCFIs, cuInfoArray, countOfCUs);
1500
1501         // create sorted array of function starts and lsda starts
1502         pint_t cfiStartsArray[cfiStartsCount];
1503         uint32_t countOfFDEs = 0;
1504         if ( countOfCFIs != 0 ) {
1505                 int index = 0;
1506                 for(uint32_t i=0; i < countOfCFIs; ++i) {
1507                         if ( cfiArray[i].isCIE )
1508                                 continue;
1509                         if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS )
1510                                 cfiStartsArray[index++] = cfiArray[i].u.fdeInfo.function.targetAddress;
1511                         if ( cfiArray[i].u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS )
1512                                 cfiStartsArray[index++] = cfiArray[i].u.fdeInfo.lsda.targetAddress;
1513                         ++countOfFDEs;
1514                 }
1515                 ::qsort(cfiStartsArray, cfiStartsCount, sizeof(pint_t), pointerSorter);
1516         #ifndef NDEBUG
1517                 // scan for FDEs claming the same function
1518                 for(int i=1; i < index; ++i) {
1519                         assert( cfiStartsArray[i] != cfiStartsArray[i-1] );
1520                 }
1521         #endif
1522         }
1523
1524         Section<A>** sections = _file->_sectionsArray;
1525         uint32_t        sectionsCount = _file->_sectionsArrayCount;
1526
1527         // figure out how many atoms will be allocated and allocate
1528         LabelAndCFIBreakIterator breakIterator(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1529                                                                                         cfiStartsCount, _overlappingSymbols);
1530         uint32_t computedAtomCount = 0;
1531         for (uint32_t i=0; i < sectionsCount; ++i ) {
1532                 breakIterator.beginSection();
1533                 uint32_t count = sections[i]->computeAtomCount(*this, breakIterator, cfis);
1534                 //const macho_section<P>* sect = sections[i]->machoSection();
1535                 //fprintf(stderr, "computed count=%u for section %s size=%llu\n", count, sect->sectname(), (sect != NULL) ? sect->size() : 0);
1536                 computedAtomCount += count;
1537         }
1538         //fprintf(stderr, "allocating %d atoms * sizeof(Atom<A>)=%ld, sizeof(ld::Atom)=%ld\n", computedAtomCount, sizeof(Atom<A>), sizeof(ld::Atom));
1539         _file->_atomsArray = new uint8_t[computedAtomCount*sizeof(Atom<A>)];
1540         _file->_atomsArrayCount = 0;
1541
1542         // have each section append atoms to _atomsArray
1543         LabelAndCFIBreakIterator breakIterator2(sortedSymbolIndexes, _symbolsInSections, cfiStartsArray,
1544                                                                                                 cfiStartsCount, _overlappingSymbols);
1545         for (uint32_t i=0; i < sectionsCount; ++i ) {
1546                 uint8_t* atoms = _file->_atomsArray + _file->_atomsArrayCount*sizeof(Atom<A>);
1547                 breakIterator2.beginSection();
1548                 uint32_t count = sections[i]->appendAtoms(*this, atoms, breakIterator2, cfis);
1549                 //fprintf(stderr, "append count=%u for section %s/%s\n", count, sections[i]->machoSection()->segname(), sections[i]->machoSection()->sectname());
1550                 _file->_atomsArrayCount += count;
1551         }
1552         assert( _file->_atomsArrayCount == computedAtomCount && "more atoms allocated than expected");
1553
1554
1555         // have each section add all fix-ups for its atoms
1556         _allFixups.reserve(computedAtomCount*5);
1557         for (uint32_t i=0; i < sectionsCount; ++i )
1558                 sections[i]->makeFixups(*this, cfis);
1559
1560         // assign fixups start offset for each atom
1561         uint8_t* p = _file->_atomsArray;
1562         uint32_t fixupOffset = 0;
1563         for(int i=_file->_atomsArrayCount; i > 0; --i) {
1564                 Atom<A>* atom = (Atom<A>*)p;
1565                 atom->_fixupsStartIndex = fixupOffset;
1566                 fixupOffset += atom->_fixupsCount;
1567                 atom->_fixupsCount = 0;
1568                 p += sizeof(Atom<A>);
1569         }
1570         assert(fixupOffset == _allFixups.size());
1571         _file->_fixups.reserve(fixupOffset);
1572
1573         // copy each fixup for each atom
1574         for(typename std::vector<FixupInAtom>::iterator it=_allFixups.begin(); it != _allFixups.end(); ++it) {
1575                 uint32_t slot = it->atom->_fixupsStartIndex + it->atom->_fixupsCount;
1576                 _file->_fixups[slot] = it->fixup;
1577                 it->atom->_fixupsCount++;
1578         }
1579
1580         // done with temp vector
1581         _allFixups.clear();
1582
1583         // add unwind info
1584         _file->_unwindInfos.reserve(countOfFDEs+countOfCUs);
1585         for(uint32_t i=0; i < countOfCFIs; ++i) {
1586                 if ( cfiArray[i].isCIE )
1587                         continue;
1588                 if ( cfiArray[i].u.fdeInfo.function.targetAddress != CFI_INVALID_ADDRESS ) {
1589                         ld::Atom::UnwindInfo info;
1590                         info.startOffset = 0;
1591                         info.unwindInfo = cfiArray[i].u.fdeInfo.compactUnwindInfo;
1592                         _file->_unwindInfos.push_back(info);
1593                         Atom<A>* func = findAtomByAddress(cfiArray[i].u.fdeInfo.function.targetAddress);
1594                         func->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1595                 }
1596         }
1597         // apply compact infos in __LD,__compact_unwind section to each function
1598         // if function also has dwarf unwind, CU will override it
1599         Atom<A>* lastFunc = NULL;
1600         uint32_t lastEnd = 0;
1601         for(uint32_t i=0; i < countOfCUs; ++i) {
1602                 typename CUSection<A>::Info* info = &cuInfoArray[i];
1603                 assert(info->function != NULL);
1604                 ld::Atom::UnwindInfo ui;
1605                 ui.startOffset = info->functionStartAddress - info->function->objectAddress();
1606                 ui.unwindInfo = info->compactUnwindInfo;
1607                 _file->_unwindInfos.push_back(ui);
1608                 // if previous is for same function, extend range
1609                 if ( info->function == lastFunc ) {
1610                         if ( lastEnd != ui.startOffset ) {
1611                                 if ( lastEnd < ui.startOffset )
1612                                         warning("__LD,__compact_unwind entries for %s have a gap at offset 0x%0X", info->function->name(), lastEnd);
1613                                 else
1614                                         warning("__LD,__compact_unwind entries for %s overlap at offset 0x%0X", info->function->name(), lastEnd);
1615                         }
1616                         lastFunc->extendUnwindInfoRange();
1617                 }
1618                 else
1619                         info->function->setUnwindInfoRange(_file->_unwindInfos.size()-1, 1);
1620                 lastFunc = info->function;
1621                 lastEnd = ui.startOffset + info->rangeLength;
1622         }
1623
1624         // parse dwarf debug info to get line info
1625         this->parseDebugInfo();
1626
1627         return _file;
1628 }
1629
1630
1631
1632 template <> uint8_t Parser<x86>::loadCommandSizeMask()          { return 0x03; }
1633 template <> uint8_t Parser<x86_64>::loadCommandSizeMask()       { return 0x07; }
1634 template <> uint8_t Parser<arm>::loadCommandSizeMask()          { return 0x03; }
1635
1636 template <typename A>
1637 bool Parser<A>::parseLoadCommands()
1638 {
1639         const macho_header<P>* header = (const macho_header<P>*)_fileContent;
1640
1641         // set File attributes
1642         _file->_canScatterAtoms = (header->flags() & MH_SUBSECTIONS_VIA_SYMBOLS);
1643         _file->_cpuSubType = header->cpusubtype();
1644
1645         const macho_segment_command<P>* segment = NULL;
1646         const uint8_t* const endOfFile = _fileContent + _fileLength;
1647         const uint32_t cmd_count = header->ncmds();
1648         // <rdar://problem/5394172> an empty .o file with zero load commands will crash linker
1649         if ( cmd_count == 0 )
1650                 return false;
1651         const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
1652         const macho_load_command<P>* const cmdsEnd = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>) + header->sizeofcmds());
1653         const macho_load_command<P>* cmd = cmds;
1654         for (uint32_t i = 0; i < cmd_count; ++i) {
1655                 uint32_t size = cmd->cmdsize();
1656                 if ( (size & this->loadCommandSizeMask()) != 0 )
1657                         throwf("load command #%d has a unaligned size", i);
1658                 const uint8_t* endOfCmd = ((uint8_t*)cmd)+cmd->cmdsize();
1659                 if ( endOfCmd > (uint8_t*)cmdsEnd )
1660                         throwf("load command #%d extends beyond the end of the load commands", i);
1661                 if ( endOfCmd > endOfFile )
1662                         throwf("load command #%d extends beyond the end of the file", i);
1663                 switch (cmd->cmd()) {
1664                     case LC_SYMTAB:
1665                                 {
1666                                         const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
1667                                         _symbolCount = symtab->nsyms();
1668                                         _symbols = (const macho_nlist<P>*)(_fileContent + symtab->symoff());
1669                                         _strings = (char*)_fileContent + symtab->stroff();
1670                                         _stringsSize = symtab->strsize();
1671                                         if ( (symtab->symoff() + _symbolCount*sizeof(macho_nlist<P>)) > _fileLength )
1672                                                 throw "mach-o symbol table extends beyond end of file";
1673                                         if ( (_strings + _stringsSize) > (char*)endOfFile )
1674                                                 throw "mach-o string pool extends beyond end of file";
1675                                         if ( _indirectTable == NULL ) {
1676                                                 if ( _undefinedEndIndex == 0 ) {
1677                                                         _undefinedStartIndex = 0;
1678                                                         _undefinedEndIndex = symtab->nsyms();
1679                                                 }
1680                                         }
1681                                 }
1682                                 break;
1683                         case LC_DYSYMTAB:
1684                                 {
1685                                         const macho_dysymtab_command<P>* dsymtab = (macho_dysymtab_command<P>*)cmd;
1686                                         _indirectTable = (uint32_t*)(_fileContent + dsymtab->indirectsymoff());
1687                                         _indirectTableCount = dsymtab->nindirectsyms();
1688                                         if ( &_indirectTable[_indirectTableCount] > (uint32_t*)endOfFile )
1689                                                 throw "indirect symbol table extends beyond end of file";
1690                                         _undefinedStartIndex = dsymtab->iundefsym();
1691                                         _undefinedEndIndex = _undefinedStartIndex + dsymtab->nundefsym();
1692                                 }
1693                                 break;
1694                     case LC_UUID:
1695                                 _hasUUID = true;
1696                                 break;
1697
1698                         default:
1699                                 if ( cmd->cmd() == macho_segment_command<P>::CMD ) {
1700                                         if ( segment != NULL )
1701                                                 throw "more than one LC_SEGMENT found in object file";
1702                                         segment = (macho_segment_command<P>*)cmd;
1703                                 }
1704                                 break;
1705                 }
1706                 cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
1707                 if ( cmd > cmdsEnd )
1708                         throwf("malformed mach-o file, load command #%d is outside size of load commands", i);
1709         }
1710
1711         // record range of sections
1712         if ( segment == NULL )
1713                 throw "missing LC_SEGMENT";
1714         _sectionsStart = (macho_section<P>*)((char*)segment + sizeof(macho_segment_command<P>));
1715         _machOSectionsCount = segment->nsects();
1716
1717         return true;
1718 }
1719
1720
1721 template <typename A>
1722 void Parser<A>::prescanSymbolTable()
1723 {
1724         _tentativeDefinitionCount = 0;
1725         _absoluteSymbolCount = 0;
1726         _symbolsInSections = 0;
1727         for (uint32_t i=0; i < this->_symbolCount; ++i) {
1728                 const macho_nlist<P>& sym =     symbolFromIndex(i);
1729                 // ignore stabs
1730                 if ( (sym.n_type() & N_STAB) != 0 )
1731                         continue;
1732
1733                 // look at undefines
1734                 const char* symbolName = this->nameFromSymbol(sym);
1735                 if ( (sym.n_type() & N_TYPE) == N_UNDF ) {
1736                         if ( sym.n_value() != 0 ) {
1737                                 // count tentative definitions
1738                                 ++_tentativeDefinitionCount;
1739                         }
1740                         else if ( strncmp(symbolName, "___dtrace_", 10) == 0 ) {
1741                                 // any undefined starting with __dtrace_*$ that is not ___dtrace_probe$* or ___dtrace_isenabled$*
1742                                 // is extra provider info
1743                                 if ( (strncmp(&symbolName[10], "probe$", 6) != 0) && (strncmp(&symbolName[10], "isenabled$", 10) != 0) ) {
1744                                         _dtraceProviderInfo.push_back(symbolName);
1745                                 }
1746                         }
1747                         continue;
1748                 }
1749
1750                 // count absolute symbols
1751                 if ( (sym.n_type() & N_TYPE) == N_ABS ) {
1752                         const char* absName = this->nameFromSymbol(sym);
1753                         // ignore .objc_class_name_* symbols
1754                         if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
1755                                 _AppleObjc = true;
1756                                 continue;
1757                         }
1758                         // ignore .objc_class_name_* symbols
1759                         if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
1760                                 continue;
1761                         // ignore empty *.eh symbols
1762                         if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
1763                                 continue;
1764                         ++_absoluteSymbolCount;
1765                 }
1766
1767                 // only look at definitions
1768                 if ( (sym.n_type() & N_TYPE) != N_SECT )
1769                         continue;
1770
1771                 // 'L' labels do not denote atom breaks
1772                 if ( symbolName[0] == 'L' )
1773                         continue;
1774
1775                 // how many def syms in each section
1776                 if ( sym.n_sect() > _machOSectionsCount )
1777                         throw "bad n_sect in symbol table";
1778
1779                 _symbolsInSections++;
1780         }
1781 }
1782
1783 template <typename A>
1784 int Parser<A>::sectionIndexSorter(void* extra, const void* l, const void* r)
1785 {
1786         Parser<A>* parser = (Parser<A>*)extra;
1787         const uint32_t* left = (uint32_t*)l;
1788         const uint32_t* right = (uint32_t*)r;
1789         const macho_section<P>* leftSect =      parser->machOSectionFromSectionIndex(*left);
1790         const macho_section<P>* rightSect = parser->machOSectionFromSectionIndex(*right);
1791
1792         // can't just return difference because 64-bit diff does not fit in 32-bit return type
1793         int64_t result = leftSect->addr() - rightSect->addr();
1794         if ( result == 0 ) {
1795                 // two sections with same start address
1796                 // one with zero size goes first
1797                 bool leftEmpty = ( leftSect->size() == 0 );
1798                 bool rightEmpty = ( rightSect->size() == 0 );
1799                 if ( leftEmpty != rightEmpty ) {
1800                         return ( rightEmpty ? 1 : -1 );
1801                 }
1802                 if ( !leftEmpty && !rightEmpty )
1803                         throwf("overlapping sections");
1804                 // both empty, so chose file order
1805                 return ( rightSect - leftSect );
1806         }
1807         else if ( result < 0 )
1808                 return -1;
1809         else
1810                 return 1;
1811 }
1812
1813 template <typename A>
1814 void Parser<A>::makeSortedSectionsArray(uint32_t array[])
1815 {
1816         const bool log = false;
1817
1818         if ( log ) {
1819                 fprintf(stderr, "unsorted sections:\n");
1820                 for(unsigned int i=0; i < _machOSectionsCount; ++i )
1821                         fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[i].addr(), _sectionsStart[i].segname(), _sectionsStart[i].sectname());
1822         }
1823
1824         // sort by symbol table address
1825         for (uint32_t i=0; i < _machOSectionsCount; ++i)
1826                 array[i] = i;
1827         ::qsort_r(array, _machOSectionsCount, sizeof(uint32_t), this, &sectionIndexSorter);
1828
1829         if ( log ) {
1830                 fprintf(stderr, "sorted sections:\n");
1831                 for(unsigned int i=0; i < _machOSectionsCount; ++i )
1832                         fprintf(stderr, "0x%08llX %s %s\n", _sectionsStart[array[i]].addr(), _sectionsStart[array[i]].segname(), _sectionsStart[array[i]].sectname());
1833         }
1834 }
1835
1836
1837
1838 template <typename A>
1839 int Parser<A>::symbolIndexSorter(void* extra, const void* l, const void* r)
1840 {
1841         ParserAndSectionsArray* extraInfo = (ParserAndSectionsArray*)extra;
1842         Parser<A>* parser = extraInfo->parser;
1843         const uint32_t* sortedSectionsArray = extraInfo->sortedSectionsArray;
1844         const uint32_t* left = (uint32_t*)l;
1845         const uint32_t* right = (uint32_t*)r;
1846         const macho_nlist<P>& leftSym = parser->symbolFromIndex(*left);
1847         const macho_nlist<P>& rightSym = parser->symbolFromIndex(*right);
1848         // can't just return difference because 64-bit diff does not fit in 32-bit return type
1849         int64_t result = leftSym.n_value() - rightSym.n_value();
1850         if ( result == 0 ) {
1851                 // two symbols with same address
1852                 // if in different sections, sort earlier section first
1853                 if ( leftSym.n_sect() != rightSym.n_sect() ) {
1854                         for (uint32_t i=0; i < parser->machOSectionCount(); ++i) {
1855                                 if ( sortedSectionsArray[i]+1 == leftSym.n_sect() )
1856                                         return -1;
1857                                 if ( sortedSectionsArray[i]+1 == rightSym.n_sect() )
1858                                         return 1;
1859                         }
1860                 }
1861                 // two symbols in same section, means one is an alias
1862                 // if only one is global, make the other an alias (sort first)
1863                 if ( (leftSym.n_type() & N_EXT) != (rightSym.n_type() & N_EXT) ) {
1864                         if ( (rightSym.n_type() & N_EXT) != 0 )
1865                                 return -1;
1866                         else
1867                                 return 1;
1868                 }
1869                 // if both are global, make alphabetically last one be the alias
1870                 return ( strcmp(parser->nameFromSymbol(rightSym), parser->nameFromSymbol(leftSym)) );
1871         }
1872         else if ( result < 0 )
1873                 return -1;
1874         else
1875                 return 1;
1876 }
1877
1878
1879 template <typename A>
1880 void Parser<A>::makeSortedSymbolsArray(uint32_t array[], const uint32_t sectionArray[])
1881 {
1882         const bool log = false;
1883
1884         uint32_t* p = array;
1885         for (uint32_t i=0; i < this->_symbolCount; ++i) {
1886                 const macho_nlist<P>& sym =     symbolFromIndex(i);
1887                 // ignore stabs
1888                 if ( (sym.n_type() & N_STAB) != 0 )
1889                         continue;
1890
1891                 // only look at definitions
1892                 if ( (sym.n_type() & N_TYPE) != N_SECT )
1893                         continue;
1894
1895                 // 'L' labels do not denote atom breaks
1896                 const char* symbolName = this->nameFromSymbol(sym);
1897                 if ( symbolName[0] == 'L' )
1898                         continue;
1899
1900                 // how many def syms in each section
1901                 if ( sym.n_sect() > _machOSectionsCount )
1902                         throw "bad n_sect in symbol table";
1903
1904                 // append to array
1905                 *p++ = i;
1906         }
1907         assert(p == &array[_symbolsInSections] && "second pass over symbol table yield a different number of symbols");
1908
1909         // sort by symbol table address
1910         ParserAndSectionsArray extra = { this, sectionArray };
1911         ::qsort_r(array, _symbolsInSections, sizeof(uint32_t), &extra, &symbolIndexSorter);
1912
1913         // look for two symbols at same address
1914         _overlappingSymbols = false;
1915         for (unsigned int i=1; i < _symbolsInSections; ++i) {
1916                 if ( symbolFromIndex(array[i-1]).n_value() == symbolFromIndex(array[i]).n_value() ) {
1917                         //fprintf(stderr, "overlapping symbols at 0x%08llX\n", symbolFromIndex(array[i-1]).n_value());
1918                         _overlappingSymbols = true;
1919                 }
1920         }
1921
1922         if ( log ) {
1923                 fprintf(stderr, "sorted symbols:\n");
1924                 for(unsigned int i=0; i < _symbolsInSections; ++i )
1925                         fprintf(stderr, "0x%09llX symIndex=%d sectNum=%2d, %s\n", symbolFromIndex(array[i]).n_value(), array[i], symbolFromIndex(array[i]).n_sect(), nameFromSymbol(symbolFromIndex(array[i])) );
1926         }
1927 }
1928
1929
1930 template <typename A>
1931 void Parser<A>::makeSections()
1932 {
1933         // classify each section by type
1934         // compute how many Section objects will be needed and total size for all
1935         unsigned int totalSectionsSize = 0;
1936         uint8_t machOSectsStorage[sizeof(MachOSectionAndSectionClass<P>)*(_machOSectionsCount+2)]; // also room for tentative-defs and absolute symbols
1937         // allocate raw storage for all section objects on stack
1938         MachOSectionAndSectionClass<P>* machOSects = (MachOSectionAndSectionClass<P>*)machOSectsStorage;
1939         unsigned int count = 0;
1940         for (uint32_t i=0; i < _machOSectionsCount; ++i) {
1941                 const macho_section<P>* sect = &_sectionsStart[i];
1942                 if ( (sect->flags() & S_ATTR_DEBUG) != 0 ) {
1943                         if ( strcmp(sect->segname(), "__DWARF") == 0 ) {
1944                                 // note that .o file has dwarf
1945                                 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoDwarf;
1946                                 // save off iteresting dwarf sections
1947                                 if ( strcmp(sect->sectname(), "__debug_info") == 0 )
1948                                         _file->_dwarfDebugInfoSect = sect;
1949                                 else if ( strcmp(sect->sectname(), "__debug_abbrev") == 0 )
1950                                         _file->_dwarfDebugAbbrevSect = sect;
1951                                 else if ( strcmp(sect->sectname(), "__debug_line") == 0 )
1952                                         _file->_dwarfDebugLineSect = sect;
1953                                 else if ( strcmp(sect->sectname(), "__debug_str") == 0 )
1954                                         _file->_dwarfDebugStringSect = sect;
1955                                 // linker does not propagate dwarf sections to output file
1956                                 continue;
1957                         }
1958                         else if ( strcmp(sect->segname(), "__LD") == 0 ) {
1959                                 if ( strncmp(sect->sectname(), "__compact_unwind", 16) == 0 ) {
1960                                         machOSects[count].sect = sect;
1961                                         totalSectionsSize += sizeof(CUSection<A>);
1962                                         machOSects[count++].type = sectionTypeCompactUnwind;
1963                                         continue;
1964                                 }
1965                         }
1966                 }
1967                 // ignore empty __OBJC sections
1968                 if ( (sect->size() == 0) && (strcmp(sect->segname(), "__OBJC") == 0) )
1969                         continue;
1970                 // objc image info section is really attributes and not content
1971                 if ( ((strcmp(sect->sectname(), "__image_info") == 0) && (strcmp(sect->segname(), "__OBJC") == 0))
1972                         || ((strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0) && (strcmp(sect->segname(), "__DATA") == 0)) ) {
1973                         //      struct objc_image_info  {
1974                         //              uint32_t        version;        // initially 0
1975                         //              uint32_t        flags;
1976                         //      };
1977                         // #define OBJC_IMAGE_SUPPORTS_GC   2
1978                         // #define OBJC_IMAGE_GC_ONLY       4
1979                         //
1980                         const uint32_t* contents = (uint32_t*)(_file->fileContent()+sect->offset());
1981                         if ( (sect->size() >= 8) && (contents[0] == 0) ) {
1982                                 uint32_t flags = E::get32(contents[1]);
1983                                 if ( (flags & 4) == 4 )
1984                                         _file->_objConstraint = ld::File::objcConstraintGC;
1985                                 else if ( (flags & 2) == 2 )
1986                                         _file->_objConstraint = ld::File::objcConstraintRetainReleaseOrGC;
1987                                 else
1988                                         _file->_objConstraint = ld::File::objcConstraintRetainRelease;
1989                                 if ( (flags & 1) == 1 )
1990                                         _file->_ojcReplacmentClass = true;
1991                                 if ( sect->size() > 8 ) {
1992                                         warning("section %s/%s has unexpectedly large size %llu in %s",
1993                                                         sect->segname(), Section<A>::makeSectionName(sect), sect->size(), _file->path());
1994                                 }
1995                         }
1996                         else {
1997                                 warning("can't parse %s/%s section in %s", sect->segname(), Section<A>::makeSectionName(sect), _file->path());
1998                         }
1999                         continue;
2000                 }
2001                 machOSects[count].sect = sect;
2002                 switch ( sect->flags() & SECTION_TYPE ) {
2003                         case S_SYMBOL_STUBS:
2004                                 if ( _stubsSectionNum == 0 ) {
2005                                         _stubsSectionNum = i+1;
2006                                         _stubsMachOSection = sect;
2007                                 }
2008                                 else
2009                                         assert(1 && "multiple S_SYMBOL_STUBS sections");
2010                         case S_LAZY_SYMBOL_POINTERS:
2011                                 break;
2012                         case S_4BYTE_LITERALS:
2013                                 totalSectionsSize += sizeof(Literal4Section<A>);
2014                                 machOSects[count++].type = sectionTypeLiteral4;
2015                                 break;
2016                         case S_8BYTE_LITERALS:
2017                                 totalSectionsSize += sizeof(Literal8Section<A>);
2018                                 machOSects[count++].type = sectionTypeLiteral8;
2019                                 break;
2020                         case S_16BYTE_LITERALS:
2021                                 totalSectionsSize += sizeof(Literal16Section<A>);
2022                                 machOSects[count++].type = sectionTypeLiteral16;
2023                                 break;
2024                         case S_NON_LAZY_SYMBOL_POINTERS:
2025                                 totalSectionsSize += sizeof(NonLazyPointerSection<A>);
2026                                 machOSects[count++].type = sectionTypeNonLazy;
2027                                 break;
2028                         case S_LITERAL_POINTERS:
2029                                 if ( (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__cls_refs") == 0) ) {
2030                                         totalSectionsSize += sizeof(Objc1ClassReferences<A>);
2031                                         machOSects[count++].type = sectionTypeObjC1ClassRefs;
2032                                 }
2033                                 else {
2034                                         totalSectionsSize += sizeof(PointerToCStringSection<A>);
2035                                         machOSects[count++].type = sectionTypeCStringPointer;
2036                                 }
2037                                 break;
2038                         case S_CSTRING_LITERALS:
2039                                 totalSectionsSize += sizeof(CStringSection<A>);
2040                                 machOSects[count++].type = sectionTypeCString;
2041                                 break;
2042                         case S_MOD_INIT_FUNC_POINTERS:
2043                         case S_MOD_TERM_FUNC_POINTERS:
2044                         case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
2045                         case S_INTERPOSING:
2046                         case S_ZEROFILL:
2047                         case S_REGULAR:
2048                         case S_COALESCED:
2049                         case S_THREAD_LOCAL_REGULAR:
2050                         case S_THREAD_LOCAL_ZEROFILL:
2051                                 if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__eh_frame") == 0) ) {
2052                                         totalSectionsSize += sizeof(CFISection<A>);
2053                                         machOSects[count++].type = sectionTypeCFI;
2054                                 }
2055                                 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__cfstring") == 0) ) {
2056                                         totalSectionsSize += sizeof(CFStringSection<A>);
2057                                         machOSects[count++].type = sectionTypeCFString;
2058                                 }
2059                                 else if ( (strcmp(sect->segname(), "__TEXT") == 0) && (strcmp(sect->sectname(), "__ustring") == 0) ) {
2060                                         totalSectionsSize += sizeof(UTF16StringSection<A>);
2061                                         machOSects[count++].type = sectionTypeUTF16Strings;
2062                                 }
2063                                 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strncmp(sect->sectname(), "__objc_classrefs", 16) == 0) ) {
2064                                         totalSectionsSize += sizeof(ObjC2ClassRefsSection<A>);
2065                                         machOSects[count++].type = sectionTypeObjC2ClassRefs;
2066                                 }
2067                                 else if ( (strcmp(sect->segname(), "__DATA") == 0) && (strcmp(sect->sectname(), "__objc_catlist") == 0) ) {
2068                                         totalSectionsSize += sizeof(ObjC2CategoryListSection<A>);
2069                                         machOSects[count++].type = typeObjC2CategoryList;
2070                                 }
2071                                 else if ( _AppleObjc && (strcmp(sect->segname(), "__OBJC") == 0) && (strcmp(sect->sectname(), "__class") == 0) ) {
2072                                         totalSectionsSize += sizeof(ObjC1ClassSection<A>);
2073                                         machOSects[count++].type = sectionTypeObjC1Classes;
2074                                 }
2075                                 else {
2076                                         totalSectionsSize += sizeof(SymboledSection<A>);
2077                                         machOSects[count++].type = sectionTypeSymboled;
2078                                 }
2079                                 break;
2080                         case S_THREAD_LOCAL_VARIABLES:
2081                                 totalSectionsSize += sizeof(TLVDefsSection<A>);
2082                                 machOSects[count++].type = sectionTypeTLVDefs;
2083                                 break;
2084                         case S_THREAD_LOCAL_VARIABLE_POINTERS:
2085                         default:
2086                                 throwf("unknown section type %d", sect->flags() & SECTION_TYPE);
2087                 }
2088         }
2089
2090         // sort by address (mach-o object files don't aways have sections sorted)
2091         ::qsort(machOSects, count, sizeof(MachOSectionAndSectionClass<P>), MachOSectionAndSectionClass<P>::sorter);
2092
2093         // we will synthesize a dummy Section<A> object for tentative definitions
2094         if ( _tentativeDefinitionCount > 0 ) {
2095                 totalSectionsSize += sizeof(TentativeDefinitionSection<A>);
2096                 machOSects[count++].type = sectionTypeTentativeDefinitions;
2097         }
2098
2099         // we will synthesize a dummy Section<A> object for Absolute symbols
2100         if ( _absoluteSymbolCount > 0 ) {
2101                 totalSectionsSize += sizeof(AbsoluteSymbolSection<A>);
2102                 machOSects[count++].type = sectionTypeAbsoluteSymbols;
2103         }
2104
2105         // allocate one block for all Section objects as well as pointers to each
2106         uint8_t* space = new uint8_t[totalSectionsSize+count*sizeof(Section<A>*)];
2107         _file->_sectionsArray = (Section<A>**)space;
2108         _file->_sectionsArrayCount = count;
2109         Section<A>** objects = _file->_sectionsArray;
2110         space += count*sizeof(Section<A>*);
2111         for (uint32_t i=0; i < count; ++i) {
2112                 switch ( machOSects[i].type ) {
2113                         case sectionTypeIgnore:
2114                                 break;
2115                         case sectionTypeLiteral4:
2116                                 *objects++ = new (space) Literal4Section<A>(*this, *_file, machOSects[i].sect);
2117                                 space += sizeof(Literal4Section<A>);
2118                                 break;
2119                         case sectionTypeLiteral8:
2120                                 *objects++ = new (space) Literal8Section<A>(*this, *_file, machOSects[i].sect);
2121                                 space += sizeof(Literal8Section<A>);
2122                                 break;
2123                         case sectionTypeLiteral16:
2124                                 *objects++ = new (space) Literal16Section<A>(*this, *_file, machOSects[i].sect);
2125                                 space += sizeof(Literal16Section<A>);
2126                                 break;
2127                         case sectionTypeNonLazy:
2128                                 *objects++ = new (space) NonLazyPointerSection<A>(*this, *_file, machOSects[i].sect);
2129                                 space += sizeof(NonLazyPointerSection<A>);
2130                                 break;
2131                         case sectionTypeCFI:
2132                                 _EHFrameSection = new (space) CFISection<A>(*this, *_file, machOSects[i].sect);
2133                                 *objects++ = _EHFrameSection;
2134                                 space += sizeof(CFISection<A>);
2135                                 break;
2136                         case sectionTypeCString:
2137                                 *objects++ = new (space) CStringSection<A>(*this, *_file, machOSects[i].sect);
2138                                 space += sizeof(CStringSection<A>);
2139                                 break;
2140                         case sectionTypeCStringPointer:
2141                                 *objects++ = new (space) PointerToCStringSection<A>(*this, *_file, machOSects[i].sect);
2142                                 space += sizeof(PointerToCStringSection<A>);
2143                                 break;
2144                         case sectionTypeObjC1ClassRefs:
2145                                 *objects++ = new (space) Objc1ClassReferences<A>(*this, *_file, machOSects[i].sect);
2146                                 space += sizeof(Objc1ClassReferences<A>);
2147                                 break;
2148                         case sectionTypeUTF16Strings:
2149                                 *objects++ = new (space) UTF16StringSection<A>(*this, *_file, machOSects[i].sect);
2150                                 space += sizeof(UTF16StringSection<A>);
2151                                 break;
2152                         case sectionTypeCFString:
2153                                 *objects++ = new (space) CFStringSection<A>(*this, *_file, machOSects[i].sect);
2154                                 space += sizeof(CFStringSection<A>);
2155                                 break;
2156                         case sectionTypeObjC2ClassRefs:
2157                                 *objects++ = new (space) ObjC2ClassRefsSection<A>(*this, *_file, machOSects[i].sect);
2158                                 space += sizeof(ObjC2ClassRefsSection<A>);
2159                                 break;
2160                         case typeObjC2CategoryList:
2161                                 *objects++ = new (space) ObjC2CategoryListSection<A>(*this, *_file, machOSects[i].sect);
2162                                 space += sizeof(ObjC2CategoryListSection<A>);
2163                                 break;
2164                         case sectionTypeObjC1Classes:
2165                                 *objects++ = new (space) ObjC1ClassSection<A>(*this, *_file, machOSects[i].sect);
2166                                 space += sizeof(ObjC1ClassSection<A>);
2167                                 break;
2168                         case sectionTypeSymboled:
2169                                 *objects++ = new (space) SymboledSection<A>(*this, *_file, machOSects[i].sect);
2170                                 space += sizeof(SymboledSection<A>);
2171                                 break;
2172                         case sectionTypeTLVDefs:
2173                                 *objects++ = new (space) TLVDefsSection<A>(*this, *_file, machOSects[i].sect);
2174                                 space += sizeof(TLVDefsSection<A>);
2175                                 break;
2176                         case sectionTypeCompactUnwind:
2177                                 _compactUnwindSection = new (space) CUSection<A>(*this, *_file, machOSects[i].sect);
2178                                 *objects++ = _compactUnwindSection;
2179                                 space += sizeof(CUSection<A>);
2180                                 break;
2181                         case sectionTypeTentativeDefinitions:
2182                                 *objects++ = new (space) TentativeDefinitionSection<A>(*this, *_file);
2183                                 space += sizeof(TentativeDefinitionSection<A>);
2184                                 break;
2185                         case sectionTypeAbsoluteSymbols:
2186                                 _absoluteSection = new (space) AbsoluteSymbolSection<A>(*this, *_file);
2187                                 *objects++ = _absoluteSection;
2188                                 space += sizeof(AbsoluteSymbolSection<A>);
2189                                 break;
2190                         default:
2191                                 throw "internal error uknown SectionType";
2192                 }
2193         }
2194 }
2195
2196
2197 template <typename A>
2198 Section<A>* Parser<A>::sectionForAddress(typename A::P::uint_t addr)
2199 {
2200         for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2201                 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2202                 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2203                 if ( sect != NULL ) {
2204                         if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
2205                                 return _file->_sectionsArray[i];
2206                         }
2207                 }
2208         }
2209         // not strictly in any section
2210         // may be in a zero length section
2211         for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2212                 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2213                 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2214                 if ( sect != NULL ) {
2215                         if ( (sect->addr() == addr) && (sect->size() == 0) ) {
2216                                 return _file->_sectionsArray[i];
2217                         }
2218                 }
2219         }
2220
2221         throwf("sectionForAddress(0x%llX) address not in any section", (uint64_t)addr);
2222 }
2223
2224 template <typename A>
2225 Section<A>* Parser<A>::sectionForNum(unsigned int num)
2226 {
2227         for (uint32_t i=0; i < _file->_sectionsArrayCount; ++i ) {
2228                 const macho_section<typename A::P>* sect = _file->_sectionsArray[i]->machoSection();
2229                 // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
2230                 if ( sect != NULL ) {
2231                         if ( num == (unsigned int)((sect - _sectionsStart)+1) )
2232                                 return _file->_sectionsArray[i];
2233                 }
2234         }
2235         throwf("sectionForNum(%u) section number not for any section", num);
2236 }
2237
2238 template <typename A>
2239 Atom<A>* Parser<A>::findAtomByAddress(pint_t addr)
2240 {
2241         Section<A>* section = this->sectionForAddress(addr);
2242         return section->findAtomByAddress(addr);
2243 }
2244
2245 template <typename A>
2246 Atom<A>* Parser<A>::findAtomByAddressOrNullIfStub(pint_t addr)
2247 {
2248         if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) )
2249                 return NULL;
2250         return findAtomByAddress(addr);
2251 }
2252
2253 template <typename A>
2254 Atom<A>* Parser<A>::findAtomByAddressOrLocalTargetOfStub(pint_t addr, uint32_t* offsetInAtom)
2255 {
2256         if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2257                 // target is a stub, remove indirection
2258                 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2259                 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2260                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2261                 // can't be to external weak symbol
2262                 assert( (this->combineFromSymbol(sym) != ld::Atom::combineByName) || (this->scopeFromSymbol(sym) != ld::Atom::scopeGlobal) );
2263                 *offsetInAtom = 0;
2264                 return this->findAtomByName(this->nameFromSymbol(sym));
2265         }
2266         Atom<A>* target = this->findAtomByAddress(addr);
2267         *offsetInAtom = addr - target->_objAddress;
2268         return target;
2269 }
2270
2271 template <typename A>
2272 Atom<A>* Parser<A>::findAtomByName(const char* name)
2273 {
2274         uint8_t* p = _file->_atomsArray;
2275         for(int i=_file->_atomsArrayCount; i > 0; --i) {
2276                 Atom<A>* atom = (Atom<A>*)p;
2277                 if ( strcmp(name, atom->name()) == 0 )
2278                         return atom;
2279                 p += sizeof(Atom<A>);
2280         }
2281         return NULL;
2282 }
2283
2284 template <typename A>
2285 void Parser<A>::findTargetFromAddress(pint_t addr, TargetDesc& target)
2286 {
2287         if ( hasStubsSection() && (_stubsMachOSection->addr() <= addr) && (addr < (_stubsMachOSection->addr()+_stubsMachOSection->size())) ) {
2288                 // target is a stub, remove indirection
2289                 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2290                 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2291                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2292                 target.atom = NULL;
2293                 target.name = this->nameFromSymbol(sym);
2294                 target.weakImport = this->weakImportFromSymbol(sym);
2295                 target.addend = 0;
2296                 return;
2297         }
2298         Section<A>* section = this->sectionForAddress(addr);
2299         target.atom = section->findAtomByAddress(addr);
2300         target.addend = addr - target.atom->_objAddress;
2301         target.weakImport = false;
2302         target.name = NULL;
2303 }
2304
2305 template <typename A>
2306 void Parser<A>::findTargetFromAddress(pint_t baseAddr, pint_t addr, TargetDesc& target)
2307 {
2308         findTargetFromAddress(baseAddr, target);
2309         target.addend = addr - target.atom->_objAddress;
2310 }
2311
2312 template <typename A>
2313 void Parser<A>::findTargetFromAddressAndSectionNum(pint_t addr, unsigned int sectNum, TargetDesc& target)
2314 {
2315         if ( sectNum == R_ABS ) {
2316                 // target is absolute symbol that corresponds to addr
2317                 if ( _absoluteSection != NULL ) {
2318                         target.atom = _absoluteSection->findAbsAtomForValue(addr);
2319                         if ( target.atom != NULL ) {
2320                                 target.name = NULL;
2321                                 target.weakImport = false;
2322                                 target.addend = 0;
2323                                 return;
2324                         }
2325                 }
2326                 throwf("R_ABS reloc but no absolute symbol at target address");
2327         }
2328
2329         if ( hasStubsSection() && (stubsSectionNum() == sectNum) ) {
2330                 // target is a stub, remove indirection
2331                 uint32_t symbolIndex = this->symbolIndexFromIndirectSectionAddress(addr, _stubsMachOSection);
2332                 assert(symbolIndex != INDIRECT_SYMBOL_LOCAL);
2333                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
2334                 // use direct reference when stub is to a static function
2335                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (this->nameFromSymbol(sym)[0] == 'L')) ) {
2336                         this->findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
2337                 }
2338                 else {
2339                         target.atom = NULL;
2340                         target.name = this->nameFromSymbol(sym);
2341                         target.weakImport = this->weakImportFromSymbol(sym);
2342                         target.addend = 0;
2343                 }
2344                 return;
2345         }
2346         Section<A>* section = this->sectionForNum(sectNum);
2347         target.atom = section->findAtomByAddress(addr);
2348         if ( target.atom == NULL ) {
2349                 typedef typename A::P::sint_t sint_t;
2350                 sint_t a = (sint_t)addr;
2351                 sint_t sectStart = (sint_t)(section->machoSection()->addr());
2352                 sint_t sectEnd  = sectStart + section->machoSection()->size();
2353                 if ( a < sectStart ) {
2354                         // target address is before start of section, so must be negative addend
2355                         target.atom = section->findAtomByAddress(sectStart);
2356                         target.addend = a - sectStart;
2357                         target.weakImport = false;
2358                         target.name = NULL;
2359                         return;
2360                 }
2361                 else if ( a >= sectEnd ) {
2362                         target.atom = section->findAtomByAddress(sectEnd-1);
2363                         target.addend = a - sectEnd;
2364                         target.weakImport = false;
2365                         target.name = NULL;
2366                         return;
2367                 }
2368         }
2369         assert(target.atom != NULL);
2370         target.addend = addr - target.atom->_objAddress;
2371         target.weakImport = false;
2372         target.name = NULL;
2373 }
2374
2375 template <typename A>
2376 void Parser<A>::addDtraceExtraInfos(const SourceLocation& src, const char* providerName)
2377 {
2378         // for every ___dtrace_stability$* and ___dtrace_typedefs$* undefine with
2379         // a matching provider name, add a by-name kDtraceTypeReference at probe site
2380         const char* dollar = strchr(providerName, '$');
2381         if ( dollar != NULL ) {
2382                 int providerNameLen = dollar-providerName+1;
2383                 for ( std::vector<const char*>::iterator it = _dtraceProviderInfo.begin(); it != _dtraceProviderInfo.end(); ++it) {
2384                         const char* typeDollar = strchr(*it, '$');
2385                         if ( typeDollar != NULL ) {
2386                                 if ( strncmp(typeDollar+1, providerName, providerNameLen) == 0 ) {
2387                                         addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindDtraceExtra,false, *it);
2388                                 }
2389                         }
2390                 }
2391         }
2392 }
2393
2394 template <typename A>
2395 const char* Parser<A>::scanSymbolTableForAddress(uint64_t addr)
2396 {
2397         uint64_t closestSymAddr = 0;
2398         const char* closestSymName = NULL;
2399         for (uint32_t i=0; i < this->_symbolCount; ++i) {
2400                 const macho_nlist<P>& sym =     symbolFromIndex(i);
2401                 // ignore stabs
2402                 if ( (sym.n_type() & N_STAB) != 0 )
2403                         continue;
2404
2405                 // only look at definitions
2406                 if ( (sym.n_type() & N_TYPE) != N_SECT )
2407                         continue;
2408
2409                 // return with exact match
2410                 if ( sym.n_value() == addr )
2411                         return nameFromSymbol(sym);
2412
2413                 // record closest seen so far
2414                 if ( (sym.n_value() < addr) && ((sym.n_value() > closestSymAddr) || (closestSymName == NULL)) )
2415                         closestSymName = nameFromSymbol(sym);
2416         }
2417
2418         return (closestSymName != NULL) ? closestSymName : "unknown";
2419 }
2420
2421
2422 template <typename A>
2423 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind setKind, const TargetDesc& target)
2424 {
2425         // some fixup pairs can be combined
2426         ld::Fixup::Cluster cl = ld::Fixup::k1of3;
2427         ld::Fixup::Kind firstKind = ld::Fixup::kindSetTargetAddress;
2428         bool combined = false;
2429         if ( target.addend == 0 ) {
2430                 cl = ld::Fixup::k1of1;
2431                 combined = true;
2432                 switch ( setKind ) {
2433                         case ld::Fixup::kindStoreLittleEndian32:
2434                                 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian32;
2435                                 break;
2436                         case ld::Fixup::kindStoreLittleEndian64:
2437                                 firstKind = ld::Fixup::kindStoreTargetAddressLittleEndian64;
2438                                 break;
2439                         case ld::Fixup::kindStoreBigEndian32:
2440                                 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian32;
2441                                 break;
2442                         case ld::Fixup::kindStoreBigEndian64:
2443                                 firstKind = ld::Fixup::kindStoreTargetAddressBigEndian64;
2444                                 break;
2445                         case ld::Fixup::kindStoreX86BranchPCRel32:
2446                                 firstKind = ld::Fixup::kindStoreTargetAddressX86BranchPCRel32;
2447                                 break;
2448                         case ld::Fixup::kindStoreX86PCRel32:
2449                                 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32;
2450                                 break;
2451                         case ld::Fixup::kindStoreX86PCRel32GOTLoad:
2452                                 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32GOTLoad;
2453                                 break;
2454                         case ld::Fixup::kindStoreX86PCRel32TLVLoad:
2455                                 firstKind = ld::Fixup::kindStoreTargetAddressX86PCRel32TLVLoad;
2456                                 break;
2457                         case ld::Fixup::kindStoreX86Abs32TLVLoad:
2458                                 firstKind = ld::Fixup::kindStoreTargetAddressX86Abs32TLVLoad;
2459                                 break;
2460                         case ld::Fixup::kindStoreARMBranch24:
2461                                 firstKind = ld::Fixup::kindStoreTargetAddressARMBranch24;
2462                                 break;
2463                         case ld::Fixup::kindStoreThumbBranch22:
2464                                 firstKind = ld::Fixup::kindStoreTargetAddressThumbBranch22;
2465                                 break;
2466                         default:
2467                                 combined = false;
2468                                 cl = ld::Fixup::k1of2;
2469                                 break;
2470                 }
2471         }
2472
2473         if ( target.atom != NULL ) {
2474                 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2475                         addFixup(src, cl, firstKind, target.atom);
2476                 }
2477                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2478                         addFixup(src, cl, firstKind, ld::Fixup::bindingByContentBound, target.atom);
2479                 }
2480                 else if ( (src.atom->section().type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
2481                         // backing string in CFStrings should always be direct
2482                         addFixup(src, cl, firstKind, target.atom);
2483                 }
2484                 else {
2485                         // change direct fixup to by-name fixup
2486                         addFixup(src, cl, firstKind, false, target.atom->name());
2487                 }
2488         }
2489         else {
2490                 addFixup(src, cl, firstKind, target.weakImport, target.name);
2491         }
2492         if ( target.addend == 0 ) {
2493                 if ( ! combined )
2494                         addFixup(src, ld::Fixup::k2of2, setKind);
2495         }
2496         else {
2497                 addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, target.addend);
2498                 addFixup(src, ld::Fixup::k3of3, setKind);
2499         }
2500 }
2501
2502 template <typename A>
2503 void Parser<A>::addFixups(const SourceLocation& src, ld::Fixup::Kind kind, const TargetDesc& target, const TargetDesc& picBase)
2504 {
2505         ld::Fixup::Cluster cl = (target.addend == 0) ? ld::Fixup::k1of4 : ld::Fixup::k1of5;
2506         if ( target.atom != NULL ) {
2507                 if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
2508                         addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.atom);
2509                 }
2510                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
2511                         addFixup(src, cl, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
2512                 }
2513                 else {
2514                         addFixup(src, cl, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
2515                 }
2516         }
2517         else {
2518                 addFixup(src, cl, ld::Fixup::kindSetTargetAddress, target.weakImport, target.name);
2519         }
2520         if ( target.addend == 0 ) {
2521                 assert(picBase.atom != NULL);
2522                 addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2523                 addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, picBase.addend);
2524                 addFixup(src, ld::Fixup::k4of4, kind);
2525         }
2526         else {
2527                 addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
2528                 addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, picBase.atom);
2529                 addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, picBase.addend);
2530                 addFixup(src, ld::Fixup::k5of5, kind);
2531         }
2532 }
2533
2534
2535
2536 template <typename A>
2537 uint32_t TentativeDefinitionSection<A>::computeAtomCount(class Parser<A>& parser,
2538                                                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
2539                                                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
2540 {
2541         return parser.tentativeDefinitionCount();
2542 }
2543
2544 template <typename A>
2545 uint32_t TentativeDefinitionSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2546                                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
2547                                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&)
2548 {
2549         this->_beginAtoms = (Atom<A>*)p;
2550         uint32_t count = 0;
2551         for (uint32_t i=parser.undefinedStartIndex(); i < parser.undefinedEndIndex(); ++i) {
2552                 const macho_nlist<P>& sym =     parser.symbolFromIndex(i);
2553                 if ( ((sym.n_type() & N_TYPE) == N_UNDF) && (sym.n_value() != 0) ) {
2554                         uint64_t size = sym.n_value();
2555                         uint8_t alignP2 = GET_COMM_ALIGN(sym.n_desc());
2556                         if ( alignP2 == 0 ) {
2557                                 // common symbols align to their size
2558                                 // that is, a 4-byte common aligns to 4-bytes
2559                                 // if this size is not a power of two,
2560                                 // then round up to the next power of two
2561                                 alignP2 = 63 - (uint8_t)__builtin_clzll(size);
2562                                 if ( size != (1ULL << alignP2) )
2563                                         ++alignP2;
2564                         }
2565                         // limit alignment of extremely large commons to 2^15 bytes (8-page)
2566                         if ( alignP2 > 15 )
2567                                 alignP2 = 15;
2568                         Atom<A>* allocatedSpace = (Atom<A>*)p;
2569                         new (allocatedSpace) Atom<A>(*this, parser.nameFromSymbol(sym), (pint_t)ULLONG_MAX, size,
2570                                                                                 ld::Atom::definitionTentative,  ld::Atom::combineByName,
2571                                                                                 parser.scopeFromSymbol(sym), ld::Atom::typeZeroFill, ld::Atom::symbolTableIn,
2572                                                                                 parser.dontDeadStripFromSymbol(sym), false, false, ld::Atom::Alignment(alignP2) );
2573                         p += sizeof(Atom<A>);
2574                         ++count;
2575                 }
2576         }
2577         this->_endAtoms = (Atom<A>*)p;
2578         return count;
2579 }
2580
2581
2582 template <typename A>
2583 uint32_t AbsoluteSymbolSection<A>::computeAtomCount(class Parser<A>& parser,
2584                                                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
2585                                                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
2586 {
2587         return parser.absoluteSymbolCount();
2588 }
2589
2590 template <typename A>
2591 uint32_t AbsoluteSymbolSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
2592                                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
2593                                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&)
2594 {
2595         this->_beginAtoms = (Atom<A>*)p;
2596         uint32_t count = 0;
2597         for (uint32_t i=0; i < parser.symbolCount(); ++i) {
2598                 const macho_nlist<P>& sym =     parser.symbolFromIndex(i);
2599                 if ( (sym.n_type() & N_TYPE) != N_ABS )
2600                         continue;
2601                 const char* absName = parser.nameFromSymbol(sym);
2602                 // ignore .objc_class_name_* symbols
2603                 if ( strncmp(absName, ".objc_class_name_", 17) == 0 )
2604                         continue;
2605                 // ignore .objc_class_name_* symbols
2606                 if ( strncmp(absName, ".objc_category_name_", 20) == 0 )
2607                         continue;
2608                 // ignore empty *.eh symbols
2609                 if ( strcmp(&absName[strlen(absName)-3], ".eh") == 0 )
2610                         continue;
2611
2612                 Atom<A>* allocatedSpace = (Atom<A>*)p;
2613                 new (allocatedSpace) Atom<A>(*this, parser, sym, 0);
2614                 p += sizeof(Atom<A>);
2615                 ++count;
2616         }
2617         this->_endAtoms = (Atom<A>*)p;
2618         return count;
2619 }
2620
2621 template <typename A>
2622 Atom<A>* AbsoluteSymbolSection<A>::findAbsAtomForValue(typename A::P::uint_t value)
2623 {
2624         Atom<A>* end = this->_endAtoms;
2625         for(Atom<A>* p = this->_beginAtoms; p < end; ++p) {
2626                 if ( p->_objAddress == value )
2627                         return p;
2628         }
2629         return NULL;
2630 }
2631
2632
2633 template <typename A>
2634 uint32_t Parser<A>::indirectSymbol(uint32_t indirectIndex)
2635 {
2636         if ( indirectIndex >= _indirectTableCount )
2637                 throw "indirect symbol index out of range";
2638         return E::get32(_indirectTable[indirectIndex]);
2639 }
2640
2641 template <typename A>
2642 const macho_nlist<typename A::P>& Parser<A>::symbolFromIndex(uint32_t index)
2643 {
2644         if ( index > _symbolCount )
2645                 throw "symbol index out of range";
2646         return _symbols[index];
2647 }
2648
2649 template <typename A>
2650 const macho_section<typename A::P>*     Parser<A>::machOSectionFromSectionIndex(uint32_t index)
2651 {
2652         if ( index >= _machOSectionsCount )
2653                 throw "section index out of range";
2654         return &_sectionsStart[index];
2655 }
2656
2657 template <typename A>
2658 uint32_t Parser<A>::symbolIndexFromIndirectSectionAddress(pint_t addr, const macho_section<P>* sect)
2659 {
2660         uint32_t elementSize = 0;
2661         switch ( sect->flags() & SECTION_TYPE ) {
2662                 case S_SYMBOL_STUBS:
2663                         elementSize = sect->reserved2();
2664                         break;
2665                 case S_LAZY_SYMBOL_POINTERS:
2666                 case S_NON_LAZY_SYMBOL_POINTERS:
2667                         elementSize = sizeof(pint_t);
2668                         break;
2669                 default:
2670                         throw "section does not use inirect symbol table";
2671         }
2672         uint32_t indexInSection = (addr - sect->addr()) / elementSize;
2673         uint32_t indexIntoIndirectTable = sect->reserved1() + indexInSection;
2674         return this->indirectSymbol(indexIntoIndirectTable);
2675 }
2676
2677
2678
2679 template <typename A>
2680 const char* Parser<A>::nameFromSymbol(const macho_nlist<P>& sym)
2681 {
2682         return &_strings[sym.n_strx()];
2683 }
2684
2685 template <typename A>
2686 ld::Atom::Scope Parser<A>::scopeFromSymbol(const macho_nlist<P>& sym)
2687 {
2688         if ( (sym.n_type() & N_EXT) == 0 )
2689                 return ld::Atom::scopeTranslationUnit;
2690         else if ( (sym.n_type() & N_PEXT) != 0 )
2691                 return ld::Atom::scopeLinkageUnit;
2692         else if ( this->nameFromSymbol(sym)[0] == 'l' ) // since all 'l' symbols will be remove, don't make them global
2693                 return ld::Atom::scopeLinkageUnit;
2694         else
2695                 return ld::Atom::scopeGlobal;
2696 }
2697
2698 template <typename A>
2699 ld::Atom::Definition Parser<A>::definitionFromSymbol(const macho_nlist<P>& sym)
2700 {
2701         switch ( sym.n_type() & N_TYPE ) {
2702                 case N_ABS:
2703                         return ld::Atom::definitionAbsolute;
2704                 case N_SECT:
2705                         return ld::Atom::definitionRegular;
2706                 case N_UNDF:
2707                         if ( sym.n_value() != 0 )
2708                                 return ld::Atom::definitionTentative;
2709         }
2710         throw "definitionFromSymbol() bad symbol";
2711 }
2712
2713 template <typename A>
2714 ld::Atom::Combine Parser<A>::combineFromSymbol(const macho_nlist<P>& sym)
2715 {
2716         if ( sym.n_desc() & N_WEAK_DEF )
2717                 return ld::Atom::combineByName;
2718         else
2719                 return ld::Atom::combineNever;
2720 }
2721
2722
2723 template <typename A>
2724 ld::Atom::SymbolTableInclusion Parser<A>::inclusionFromSymbol(const macho_nlist<P>& sym)
2725 {
2726         const char* symbolName = nameFromSymbol(sym);
2727         // labels beginning with 'l' (lowercase ell) are automatically removed in final linked images <rdar://problem/4571042>
2728         // labels beginning with 'L' should have been stripped by the assembler, so are stripped now
2729         if ( sym.n_desc() & REFERENCED_DYNAMICALLY )
2730                 return ld::Atom::symbolTableInAndNeverStrip;
2731         else if ( symbolName[0] == 'l' )
2732                 return ld::Atom::symbolTableNotInFinalLinkedImages;
2733         else if ( symbolName[0] == 'L' )
2734                 return ld::Atom::symbolTableNotIn;
2735         else
2736                 return ld::Atom::symbolTableIn;
2737 }
2738
2739 template <typename A>
2740 bool Parser<A>::dontDeadStripFromSymbol(const macho_nlist<P>& sym)
2741 {
2742         return ( (sym.n_desc() & (N_NO_DEAD_STRIP|REFERENCED_DYNAMICALLY)) != 0 );
2743 }
2744
2745 template <typename A>
2746 bool Parser<A>::isThumbFromSymbol(const macho_nlist<P>& sym)
2747 {
2748         return ( sym.n_desc() & N_ARM_THUMB_DEF );
2749 }
2750
2751 template <typename A>
2752 bool Parser<A>::weakImportFromSymbol(const macho_nlist<P>& sym)
2753 {
2754         return ( ((sym.n_type() & N_TYPE) == N_UNDF) && ((sym.n_desc() & N_WEAK_REF) != 0) );
2755 }
2756
2757 template <typename A>
2758 bool Parser<A>::resolverFromSymbol(const macho_nlist<P>& sym)
2759 {
2760         return ( sym.n_desc() & N_SYMBOL_RESOLVER );
2761 }
2762
2763
2764 /* Skip over a LEB128 value (signed or unsigned).  */
2765 static void
2766 skip_leb128 (const uint8_t ** offset, const uint8_t * end)
2767 {
2768   while (*offset != end && **offset >= 0x80)
2769     (*offset)++;
2770   if (*offset != end)
2771     (*offset)++;
2772 }
2773
2774 /* Read a ULEB128 into a 64-bit word.  Return (uint64_t)-1 on overflow
2775    or error.  On overflow, skip past the rest of the uleb128.  */
2776 static uint64_t
2777 read_uleb128 (const uint8_t ** offset, const uint8_t * end)
2778 {
2779   uint64_t result = 0;
2780   int bit = 0;
2781
2782   do  {
2783     uint64_t b;
2784
2785     if (*offset == end)
2786       return (uint64_t) -1;
2787
2788     b = **offset & 0x7f;
2789
2790     if (bit >= 64 || b << bit >> bit != b)
2791       result = (uint64_t) -1;
2792     else
2793       result |= b << bit, bit += 7;
2794   } while (*(*offset)++ >= 0x80);
2795   return result;
2796 }
2797
2798
2799 /* Skip over a DWARF attribute of form FORM.  */
2800 template <typename A>
2801 bool Parser<A>::skip_form(const uint8_t ** offset, const uint8_t * end, uint64_t form,
2802                                                         uint8_t addr_size, bool dwarf64)
2803 {
2804   int64_t sz=0;
2805
2806   switch (form)
2807     {
2808     case DW_FORM_addr:
2809       sz = addr_size;
2810       break;
2811
2812     case DW_FORM_block2:
2813       if (end - *offset < 2)
2814         return false;
2815       sz = 2 + A::P::E::get16(*(uint16_t*)offset);
2816       break;
2817
2818     case DW_FORM_block4:
2819       if (end - *offset < 4)
2820         return false;
2821       sz = 2 + A::P::E::get32(*(uint32_t*)offset);
2822       break;
2823
2824     case DW_FORM_data2:
2825     case DW_FORM_ref2:
2826       sz = 2;
2827       break;
2828
2829     case DW_FORM_data4:
2830     case DW_FORM_ref4:
2831       sz = 4;
2832       break;
2833
2834     case DW_FORM_data8:
2835     case DW_FORM_ref8:
2836       sz = 8;
2837       break;
2838
2839     case DW_FORM_string:
2840       while (*offset != end && **offset)
2841         ++*offset;
2842     case DW_FORM_data1:
2843     case DW_FORM_flag:
2844     case DW_FORM_ref1:
2845       sz = 1;
2846       break;
2847
2848     case DW_FORM_block:
2849       sz = read_uleb128 (offset, end);
2850       break;
2851
2852     case DW_FORM_block1:
2853       if (*offset == end)
2854         return false;
2855       sz = 1 + **offset;
2856       break;
2857
2858     case DW_FORM_sdata:
2859     case DW_FORM_udata:
2860     case DW_FORM_ref_udata:
2861       skip_leb128 (offset, end);
2862       return true;
2863
2864     case DW_FORM_strp:
2865     case DW_FORM_ref_addr:
2866       sz = 4;
2867       break;
2868
2869     default:
2870       return false;
2871     }
2872   if (end - *offset < sz)
2873     return false;
2874   *offset += sz;
2875   return true;
2876 }
2877
2878
2879 template <typename A>
2880 const char* Parser<A>::getDwarfString(uint64_t form, const uint8_t* p)
2881 {
2882         if ( form == DW_FORM_string )
2883                 return (const char*)p;
2884         else if ( form == DW_FORM_strp ) {
2885                 uint32_t offset = E::get32(*((uint32_t*)p));
2886                 const char* dwarfStrings = (char*)_file->fileContent() + _file->_dwarfDebugStringSect->offset();
2887                 if ( offset > _file->_dwarfDebugStringSect->size() ) {
2888                         warning("unknown dwarf DW_FORM_strp (offset=0x%08X) is too big in %s\n", offset, this->_path);
2889                         return NULL;
2890                 }
2891                 return &dwarfStrings[offset];
2892         }
2893         warning("unknown dwarf string encoding (form=%lld) in %s\n", form, this->_path);
2894         return NULL;
2895 }
2896
2897
2898 template <typename A>
2899 struct AtomAndLineInfo {
2900         Atom<A>*                        atom;
2901         ld::Atom::LineInfo      info;
2902 };
2903
2904
2905 // <rdar://problem/5591394> Add support to ld64 for N_FUN stabs when used for symbolic constants
2906 // Returns whether a stabStr belonging to an N_FUN stab represents a
2907 // symbolic constant rather than a function
2908 template <typename A>
2909 bool Parser<A>::isConstFunStabs(const char *stabStr)
2910 {
2911         const char* colon;
2912         // N_FUN can be used for both constants and for functions. In case it's a constant,
2913         // the format of the stabs string is "symname:c=<value>;"
2914         // ':' cannot appear in the symbol name, except if it's an Objective-C method
2915         // (in which case the symbol name starts with + or -, and then it's definitely
2916         //  not a constant)
2917         return (stabStr != NULL) && (stabStr[0] != '+') && (stabStr[0] != '-')
2918                         && ((colon = strchr(stabStr, ':')) != NULL)
2919                         && (colon[1] == 'c') && (colon[2] == '=');
2920 }
2921
2922
2923 template <typename A>
2924 void Parser<A>::parseDebugInfo()
2925 {
2926         // check for dwarf __debug_info section
2927         if ( _file->_dwarfDebugInfoSect == NULL ) {
2928                 // if no DWARF debug info, look for stabs
2929                 this->parseStabs();
2930                 return;
2931         }
2932         if ( _file->_dwarfDebugInfoSect->size() == 0 )
2933                 return;
2934
2935         uint64_t stmtList;
2936         if ( !read_comp_unit(&_file->_dwarfTranslationUnitFile, &_file->_dwarfTranslationUnitDir, &stmtList) ) {
2937                 // if can't parse dwarf, warn and give up
2938                 _file->_dwarfTranslationUnitFile = NULL;
2939                 _file->_dwarfTranslationUnitDir = NULL;
2940                 warning("can't parse dwarf compilation unit info in %s", _path);
2941                 _file->_debugInfoKind = ld::relocatable::File::kDebugInfoNone;
2942                 return;
2943         }
2944
2945         // add line number info to atoms from dwarf
2946         std::vector<AtomAndLineInfo<A> > entries;
2947         entries.reserve(64);
2948         if ( _file->_debugInfoKind == ld::relocatable::File::kDebugInfoDwarf ) {
2949                 // file with just data will have no __debug_line info
2950                 if ( (_file->_dwarfDebugLineSect != NULL) && (_file->_dwarfDebugLineSect->size() != 0) ) {
2951                         // validate stmt_list
2952                         if ( (stmtList != (uint64_t)-1) && (stmtList < _file->_dwarfDebugLineSect->size()) ) {
2953                                 const uint8_t* debug_line = (uint8_t*)_file->fileContent() + _file->_dwarfDebugLineSect->offset();
2954                                 struct line_reader_data* lines = line_open(&debug_line[stmtList],
2955                                                                                                                 _file->_dwarfDebugLineSect->size() - stmtList, E::little_endian);
2956                                 struct line_info result;
2957                                 Atom<A>* curAtom = NULL;
2958                                 uint32_t curAtomOffset = 0;
2959                                 uint32_t curAtomAddress = 0;
2960                                 uint32_t curAtomSize = 0;
2961                                 std::map<uint32_t,const char*>  dwarfIndexToFile;
2962                                 if ( lines != NULL ) {
2963                                         while ( line_next(lines, &result, line_stop_pc) ) {
2964                                                 //fprintf(stderr, "curAtom=%p, result.pc=0x%llX, result.line=%llu, result.end_of_sequence=%d,"
2965                                                 //                                " curAtomAddress=0x%X, curAtomSize=0x%X\n",
2966                                                 //              curAtom, result.pc, result.line, result.end_of_sequence, curAtomAddress, curAtomSize);
2967                                                 // work around weird debug line table compiler generates if no functions in __text section
2968                                                 if ( (curAtom == NULL) && (result.pc == 0) && result.end_of_sequence && (result.file == 1))
2969                                                         continue;
2970                                                 // for performance, see if in next pc is in current atom
2971                                                 if ( (curAtom != NULL) && (curAtomAddress <= result.pc) && (result.pc < (curAtomAddress+curAtomSize)) ) {
2972                                                         curAtomOffset = result.pc - curAtomAddress;
2973                                                 }
2974                                                 // or pc at end of current atom
2975                                                 else if ( result.end_of_sequence && (curAtom != NULL) && (result.pc == (curAtomAddress+curAtomSize)) ) {
2976                                                         curAtomOffset = result.pc - curAtomAddress;
2977                                                 }
2978                                                 // or only one function that is a one line function
2979                                                 else if ( result.end_of_sequence && (curAtom == NULL) && (this->findAtomByAddress(0) != NULL) && (result.pc == this->findAtomByAddress(0)->size()) ) {
2980                                                         curAtom                 = this->findAtomByAddress(0);
2981                                                         curAtomOffset   = result.pc - curAtom->objectAddress();
2982                                                         curAtomAddress  = curAtom->objectAddress();
2983                                                         curAtomSize             = curAtom->size();
2984                                                 }
2985                                                 else {
2986                                                         // do slow look up of atom by address
2987                                                         try {
2988                                                                 curAtom = this->findAtomByAddress(result.pc);
2989                                                         }
2990                                                         catch (...) {
2991                                                                 // in case of bug in debug info, don't abort link, just limp on
2992                                                                 curAtom = NULL;
2993                                                         }
2994                                                         if ( curAtom == NULL )
2995                                                                 break; // file has line info but no functions
2996                                                         if ( result.end_of_sequence && (curAtomAddress+curAtomSize < result.pc) ) {
2997                                                                 // a one line function can be returned by line_next() as one entry with pc at end of blob
2998                                                                 // look for alt atom starting at end of previous atom
2999                                                                 uint32_t previousEnd = curAtomAddress+curAtomSize;
3000                                                                 Atom<A>* alt = this->findAtomByAddressOrNullIfStub(previousEnd);
3001                                                                 if ( alt == NULL )
3002                                                                         continue; // ignore spurious debug info for stubs
3003                                                                 if ( result.pc <= alt->objectAddress() + alt->size() ) {
3004                                                                         curAtom                 = alt;
3005                                                                         curAtomOffset   = result.pc - alt->objectAddress();
3006                                                                         curAtomAddress  = alt->objectAddress();
3007                                                                         curAtomSize             = alt->size();
3008                                                                 }
3009                                                                 else {
3010                                                                         curAtomOffset   = result.pc - curAtom->objectAddress();
3011                                                                         curAtomAddress  = curAtom->objectAddress();
3012                                                                         curAtomSize             = curAtom->size();
3013                                                                 }
3014                                                         }
3015                                                         else {
3016                                                                 curAtomOffset   = result.pc - curAtom->objectAddress();
3017                                                                 curAtomAddress  = curAtom->objectAddress();
3018                                                                 curAtomSize             = curAtom->size();
3019                                                         }
3020                                                 }
3021                                                 const char* filename;
3022                                                 std::map<uint32_t,const char*>::iterator pos = dwarfIndexToFile.find(result.file);
3023                                                 if ( pos == dwarfIndexToFile.end() ) {
3024                                                         filename = line_file(lines, result.file);
3025                                                         dwarfIndexToFile[result.file] = filename;
3026                                                 }
3027                                                 else {
3028                                                         filename = pos->second;
3029                                                 }
3030                                                 // only record for ~8000 line info records per function
3031                                                 if ( curAtom->roomForMoreLineInfoCount() ) {
3032                                                         AtomAndLineInfo<A> entry;
3033                                                         entry.atom = curAtom;
3034                                                         entry.info.atomOffset = curAtomOffset;
3035                                                         entry.info.fileName = filename;
3036                                                         entry.info.lineNumber = result.line;
3037                                                         //fprintf(stderr, "addr=0x%08llX, line=%lld, file=%s, atom=%s, atom.size=0x%X, end=%d\n",
3038                                                         //              result.pc, result.line, filename, curAtom->name(), curAtomSize, result.end_of_sequence);
3039                                                         entries.push_back(entry);
3040                                                         curAtom->incrementLineInfoCount();
3041                                                 }
3042                                                 if ( result.end_of_sequence ) {
3043                                                         curAtom = NULL;
3044                                                 }
3045                                         }
3046                                         line_free(lines);
3047                                 }
3048                         }
3049                 }
3050         }
3051
3052         // assign line info start offset for each atom
3053         uint8_t* p = _file->_atomsArray;
3054         uint32_t liOffset = 0;
3055         for(int i=_file->_atomsArrayCount; i > 0; --i) {
3056                 Atom<A>* atom = (Atom<A>*)p;
3057                 atom->_lineInfoStartIndex = liOffset;
3058                 liOffset += atom->_lineInfoCount;
3059                 atom->_lineInfoCount = 0;
3060                 p += sizeof(Atom<A>);
3061         }
3062         assert(liOffset == entries.size());
3063         _file->_lineInfos.reserve(liOffset);
3064
3065         // copy each line info for each atom
3066         for (typename std::vector<AtomAndLineInfo<A> >::iterator it = entries.begin(); it != entries.end(); ++it) {
3067                 uint32_t slot = it->atom->_lineInfoStartIndex + it->atom->_lineInfoCount;
3068                 _file->_lineInfos[slot] = it->info;
3069                 it->atom->_lineInfoCount++;
3070         }
3071
3072         // done with temp vector
3073         entries.clear();
3074 }
3075
3076 template <typename A>
3077 void Parser<A>::parseStabs()
3078 {
3079         // scan symbol table for stabs entries
3080         Atom<A>* currentAtom = NULL;
3081         pint_t currentAtomAddress = 0;
3082         enum { start, inBeginEnd, inFun } state = start;
3083         for (uint32_t symbolIndex = 0; symbolIndex < _symbolCount; ++symbolIndex ) {
3084                 const macho_nlist<P>& sym = this->symbolFromIndex(symbolIndex);
3085                 bool useStab = true;
3086                 uint8_t type = sym.n_type();
3087                 const char* symString = (sym.n_strx() != 0) ? this->nameFromSymbol(sym) : NULL;
3088                 if ( (type & N_STAB) != 0 ) {
3089                         _file->_debugInfoKind =  (_hasUUID ? ld::relocatable::File::kDebugInfoStabsUUID : ld::relocatable::File::kDebugInfoStabs);
3090                         ld::relocatable::File::Stab stab;
3091                         stab.atom       = NULL;
3092                         stab.type       = type;
3093                         stab.other      = sym.n_sect();
3094                         stab.desc       = sym.n_desc();
3095                         stab.value      = sym.n_value();
3096                         stab.string = NULL;
3097                         switch (state) {
3098                                 case start:
3099                                         switch (type) {
3100                                                 case N_BNSYM:
3101                                                         // beginning of function block
3102                                                         state = inBeginEnd;
3103                                                         // fall into case to lookup atom by addresss
3104                                                 case N_LCSYM:
3105                                                 case N_STSYM:
3106                                                         currentAtomAddress = sym.n_value();
3107                                                         currentAtom = this->findAtomByAddress(currentAtomAddress);
3108                                                         if ( currentAtom != NULL ) {
3109                                                                 stab.atom = currentAtom;
3110                                                                 stab.string = symString;
3111                                                         }
3112                                                         else {
3113                                                                 fprintf(stderr, "can't find atom for stabs BNSYM at %08llX in %s",
3114                                                                         (uint64_t)sym.n_value(), _path);
3115                                                         }
3116                                                         break;
3117                                                 case N_SO:
3118                                                 case N_OSO:
3119                                                 case N_OPT:
3120                                                 case N_LSYM:
3121                                                 case N_RSYM:
3122                                                 case N_PSYM:
3123                                                         // not associated with an atom, just copy
3124                                                         stab.string = symString;
3125                                                         break;
3126                                                 case N_GSYM:
3127                                                 {
3128                                                         // n_value field is NOT atom address ;-(
3129                                                         // need to find atom by name match
3130                                                         const char* colon = strchr(symString, ':');
3131                                                         if ( colon != NULL ) {
3132                                                                 // build underscore leading name
3133                                                                 int nameLen = colon - symString;
3134                                                                 char symName[nameLen+2];
3135                                                                 strlcpy(&symName[1], symString, nameLen+1);
3136                                                                 symName[0] = '_';
3137                                                                 symName[nameLen+1] = '\0';
3138                                                                 currentAtom = this->findAtomByName(symName);
3139                                                                 if ( currentAtom != NULL ) {
3140                                                                         stab.atom = currentAtom;
3141                                                                         stab.string = symString;
3142                                                                 }
3143                                                         }
3144                                                         else {
3145                                                                 // might be a debug-note without trailing :G()
3146                                                                 currentAtom = this->findAtomByName(symString);
3147                                                                 if ( currentAtom != NULL ) {
3148                                                                         stab.atom = currentAtom;
3149                                                                         stab.string = symString;
3150                                                                 }
3151                                                         }
3152                                                         if ( stab.atom == NULL ) {
3153                                                                 // ld_classic added bogus GSYM stabs for old style dtrace probes
3154                                                                 if ( (strncmp(symString, "__dtrace_probe$", 15) != 0) )
3155                                                                         warning("can't find atom for N_GSYM stabs %s in %s", symString, _path);
3156                                                                 useStab = false;
3157                                                         }
3158                                                         break;
3159                                                 }
3160                                                 case N_FUN:
3161                                                         if ( isConstFunStabs(symString) ) {
3162                                                                 // constant not associated with a function
3163                                                                 stab.string = symString;
3164                                                         }
3165                                                         else {
3166                                                                 // old style stabs without BNSYM
3167                                                                 state = inFun;
3168                                                                 currentAtomAddress = sym.n_value();
3169                                                                 currentAtom = this->findAtomByAddress(currentAtomAddress);
3170                                                                 if ( currentAtom != NULL ) {
3171                                                                         stab.atom = currentAtom;
3172                                                                         stab.string = symString;
3173                                                                 }
3174                                                                 else {
3175                                                                         warning("can't find atom for stabs FUN at %08llX in %s",
3176                                                                                 (uint64_t)currentAtomAddress, _path);
3177                                                                 }
3178                                                         }
3179                                                         break;
3180                                                 case N_SOL:
3181                                                 case N_SLINE:
3182                                                         stab.string = symString;
3183                                                         // old stabs
3184                                                         break;
3185                                                 case N_BINCL:
3186                                                 case N_EINCL:
3187                                                 case N_EXCL:
3188                                                         stab.string = symString;
3189                                                         // -gfull built .o file
3190                                                         break;
3191                                                 default:
3192                                                         warning("unknown stabs type 0x%X in %s", type, _path);
3193                                         }
3194                                         break;
3195                                 case inBeginEnd:
3196                                         stab.atom = currentAtom;
3197                                         switch (type) {
3198                                                 case N_ENSYM:
3199                                                         state = start;
3200                                                         currentAtom = NULL;
3201                                                         break;
3202                                                 case N_LCSYM:
3203                                                 case N_STSYM:
3204                                                 {
3205                                                         Atom<A>* nestedAtom = this->findAtomByAddress(sym.n_value());
3206                                                         if ( nestedAtom != NULL ) {
3207                                                                 stab.atom = nestedAtom;
3208                                                                 stab.string = symString;
3209                                                         }
3210                                                         else {
3211                                                                 warning("can't find atom for stabs 0x%X at %08llX in %s",
3212                                                                         type, (uint64_t)sym.n_value(), _path);
3213                                                         }
3214                                                         break;
3215                                                 }
3216                                                 case N_LBRAC:
3217                                                 case N_RBRAC:
3218                                                 case N_SLINE:
3219                                                         // adjust value to be offset in atom
3220                                                         stab.value -= currentAtomAddress;
3221                                                 default:
3222                                                         stab.string = symString;
3223                                                         break;
3224                                         }
3225                                         break;
3226                                 case inFun:
3227                                         switch (type) {
3228                                                 case N_FUN:
3229                                                         if ( isConstFunStabs(symString) ) {
3230                                                                 stab.atom = currentAtom;
3231                                                                 stab.string = symString;
3232                                                         }
3233                                                         else {
3234                                                                 if ( sym.n_sect() != 0 ) {
3235                                                                         // found another start stab, must be really old stabs...
3236                                                                         currentAtomAddress = sym.n_value();
3237                                                                         currentAtom = this->findAtomByAddress(currentAtomAddress);
3238                                                                         if ( currentAtom != NULL ) {
3239                                                                                 stab.atom = currentAtom;
3240                                                                                 stab.string = symString;
3241                                                                         }
3242                                                                         else {
3243                                                                                 warning("can't find atom for stabs FUN at %08llX in %s",
3244                                                                                         (uint64_t)currentAtomAddress, _path);
3245                                                                         }
3246                                                                 }
3247                                                                 else {
3248                                                                         // found ending stab, switch back to start state
3249                                                                         stab.string = symString;
3250                                                                         stab.atom = currentAtom;
3251                                                                         state = start;
3252                                                                         currentAtom = NULL;
3253                                                                 }
3254                                                         }
3255                                                         break;
3256                                                 case N_LBRAC:
3257                                                 case N_RBRAC:
3258                                                 case N_SLINE:
3259                                                         // adjust value to be offset in atom
3260                                                         stab.value -= currentAtomAddress;
3261                                                         stab.atom = currentAtom;
3262                                                         break;
3263                                                 case N_SO:
3264                                                         stab.string = symString;
3265                                                         state = start;
3266                                                         break;
3267                                                 default:
3268                                                         stab.atom = currentAtom;
3269                                                         stab.string = symString;
3270                                                         break;
3271                                         }
3272                                         break;
3273                         }
3274                         // add to list of stabs for this .o file
3275                         if ( useStab )
3276                                 _file->_stabs.push_back(stab);
3277                 }
3278         }
3279 }
3280
3281
3282
3283 // Look at the compilation unit DIE and determine
3284 // its NAME, compilation directory (in COMP_DIR) and its
3285 // line number information offset (in STMT_LIST).  NAME and COMP_DIR
3286 // may be NULL (especially COMP_DIR) if they are not in the .o file;
3287 // STMT_LIST will be (uint64_t) -1.
3288 //
3289 // At present this assumes that there's only one compilation unit DIE.
3290 //
3291 template <typename A>
3292 bool Parser<A>::read_comp_unit(const char ** name, const char ** comp_dir,
3293                                                         uint64_t *stmt_list)
3294 {
3295         const uint8_t * debug_info;
3296         const uint8_t * debug_abbrev;
3297         const uint8_t * di;
3298         const uint8_t * da;
3299         const uint8_t * end;
3300         const uint8_t * enda;
3301         uint64_t sz;
3302         uint16_t vers;
3303         uint64_t abbrev_base;
3304         uint64_t abbrev;
3305         uint8_t address_size;
3306         bool dwarf64;
3307
3308         *name = NULL;
3309         *comp_dir = NULL;
3310         *stmt_list = (uint64_t) -1;
3311
3312         if ( (_file->_dwarfDebugInfoSect == NULL) || (_file->_dwarfDebugAbbrevSect == NULL) )
3313                 return false;
3314
3315         debug_info = (uint8_t*)_file->fileContent() + _file->_dwarfDebugInfoSect->offset();
3316         debug_abbrev = (uint8_t*)_file->fileContent() + _file->_dwarfDebugAbbrevSect->offset();
3317         di = debug_info;
3318
3319         if (_file->_dwarfDebugInfoSect->size() < 12)
3320                 /* Too small to be a real debug_info section.  */
3321                 return false;
3322         sz = A::P::E::get32(*(uint32_t*)di);
3323         di += 4;
3324         dwarf64 = sz == 0xffffffff;
3325         if (dwarf64)
3326                 sz = A::P::E::get64(*(uint64_t*)di), di += 8;
3327         else if (sz > 0xffffff00)
3328                 /* Unknown dwarf format.  */
3329                 return false;
3330
3331         /* Verify claimed size.  */
3332         if (sz + (di - debug_info) > _file->_dwarfDebugInfoSect->size() || sz <= (dwarf64 ? 23 : 11))
3333                 return false;
3334
3335         vers = A::P::E::get16(*(uint16_t*)di);
3336         if (vers < 2 || vers > 3)
3337         /* DWARF version wrong for this code.
3338            Chances are we could continue anyway, but we don't know for sure.  */
3339                 return false;
3340         di += 2;
3341
3342         /* Find the debug_abbrev section.  */
3343         abbrev_base = dwarf64 ? A::P::E::get64(*(uint64_t*)di) : A::P::E::get32(*(uint32_t*)di);
3344         di += dwarf64 ? 8 : 4;
3345
3346         if (abbrev_base > _file->_dwarfDebugAbbrevSect->size())
3347                 return false;
3348         da = debug_abbrev + abbrev_base;
3349         enda = debug_abbrev + _file->_dwarfDebugAbbrevSect->size();
3350
3351         address_size = *di++;
3352
3353         /* Find the abbrev number we're looking for.  */
3354         end = di + sz;
3355         abbrev = read_uleb128 (&di, end);
3356         if (abbrev == (uint64_t) -1)
3357                 return false;
3358
3359         /* Skip through the debug_abbrev section looking for that abbrev.  */
3360         for (;;)
3361         {
3362                 uint64_t this_abbrev = read_uleb128 (&da, enda);
3363                 uint64_t attr;
3364
3365                 if (this_abbrev == abbrev)
3366                         /* This is almost always taken.  */
3367                         break;
3368                 skip_leb128 (&da, enda); /* Skip the tag.  */
3369                 if (da == enda)
3370                         return false;
3371                 da++;  /* Skip the DW_CHILDREN_* value.  */
3372
3373                 do {
3374                         attr = read_uleb128 (&da, enda);
3375                         skip_leb128 (&da, enda);
3376                 } while (attr != 0 && attr != (uint64_t) -1);
3377                 if (attr != 0)
3378                         return false;
3379         }
3380
3381         /* Check that the abbrev is one for a DW_TAG_compile_unit.  */
3382         if (read_uleb128 (&da, enda) != DW_TAG_compile_unit)
3383         return false;
3384         if (da == enda)
3385         return false;
3386         da++;  /* Skip the DW_CHILDREN_* value.  */
3387
3388         /* Now, go through the DIE looking for DW_AT_name,
3389          DW_AT_comp_dir, and DW_AT_stmt_list.  */
3390         for (;;)
3391         {
3392                 uint64_t attr = read_uleb128 (&da, enda);
3393                 uint64_t form = read_uleb128 (&da, enda);
3394
3395                 if (attr == (uint64_t) -1)
3396                         return false;
3397                 else if (attr == 0)
3398                         return true;
3399
3400                 if (form == DW_FORM_indirect)
3401                         form = read_uleb128 (&di, end);
3402
3403                 if (attr == DW_AT_name)
3404                         *name = getDwarfString(form, di);
3405                 else if (attr == DW_AT_comp_dir)
3406                         *comp_dir = getDwarfString(form, di);
3407                 else if (attr == DW_AT_stmt_list && form == DW_FORM_data4)
3408                         *stmt_list = A::P::E::get32(*(uint32_t*)di);
3409                 else if (attr == DW_AT_stmt_list && form == DW_FORM_data8)
3410                         *stmt_list = A::P::E::get64(*(uint64_t*)di);
3411                 if (! skip_form (&di, end, form, address_size, dwarf64))
3412                         return false;
3413         }
3414 }
3415
3416
3417
3418 template <typename A>
3419 File<A>::~File()
3420 {
3421         free(_sectionsArray);
3422         free(_atomsArray);
3423 }
3424
3425 template <typename A>
3426 bool File<A>::translationUnitSource(const char** dir, const char** name) const
3427 {
3428         if ( _debugInfoKind == ld::relocatable::File::kDebugInfoDwarf ) {
3429                 *dir = _dwarfTranslationUnitDir;
3430                 *name = _dwarfTranslationUnitFile;
3431                 return (_dwarfTranslationUnitFile != NULL);
3432         }
3433         return false;
3434 }
3435
3436
3437
3438 template <typename A>
3439 bool File<A>::forEachAtom(ld::File::AtomHandler& handler) const
3440 {
3441         handler.doFile(*this);
3442         uint8_t* p = _atomsArray;
3443         for(int i=_atomsArrayCount; i > 0; --i) {
3444                 handler.doAtom(*((Atom<A>*)p));
3445                 p += sizeof(Atom<A>);
3446         }
3447         return (_atomsArrayCount != 0);
3448 }
3449
3450 template <typename A>
3451 const char* Section<A>::makeSegmentName(const macho_section<typename A::P>* sect)
3452 {
3453         // mach-o section record only has room for 16-byte seg/sect names
3454         // so a 16-byte name has no trailing zero
3455         const char* name = sect->segname();
3456         if ( strlen(name) < 16 )
3457                 return name;
3458         char* tmp = new char[17];
3459         strlcpy(tmp, name, 17);
3460         return tmp;
3461 }
3462
3463 template <typename A>
3464 const char* Section<A>::makeSectionName(const macho_section<typename A::P>* sect)
3465 {
3466         const char* name = sect->sectname();
3467         if ( strlen(name) < 16 )
3468                 return name;
3469
3470         // special case common long section names so we don't have to malloc
3471         if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
3472                 return "__objc_classrefs";
3473         if ( strncmp(sect->sectname(), "__objc_classlist", 16) == 0 )
3474                 return "__objc_classlist";
3475         if ( strncmp(sect->sectname(), "__objc_nlclslist", 16) == 0 )
3476                 return "__objc_nlclslist";
3477         if ( strncmp(sect->sectname(), "__objc_nlcatlist", 16) == 0 )
3478                 return "__objc_nlcatlist";
3479         if ( strncmp(sect->sectname(), "__objc_protolist", 16) == 0 )
3480                 return "__objc_protolist";
3481         if ( strncmp(sect->sectname(), "__objc_protorefs", 16) == 0 )
3482                 return "__objc_protorefs";
3483         if ( strncmp(sect->sectname(), "__objc_superrefs", 16) == 0 )
3484                 return "__objc_superrefs";
3485         if ( strncmp(sect->sectname(), "__objc_imageinfo", 16) == 0 )
3486                 return "__objc_imageinfo";
3487         if ( strncmp(sect->sectname(), "__objc_stringobj", 16) == 0 )
3488                 return "__objc_stringobj";
3489         if ( strncmp(sect->sectname(), "__gcc_except_tab", 16) == 0 )
3490                 return "__gcc_except_tab";
3491
3492         char* tmp = new char[17];
3493         strlcpy(tmp, name, 17);
3494         return tmp;
3495 }
3496
3497 template <typename A>
3498 bool Section<A>::readable(const macho_section<typename A::P>* sect)
3499 {
3500         return true;
3501 }
3502
3503 template <typename A>
3504 bool Section<A>::writable(const macho_section<typename A::P>* sect)
3505 {
3506         // mach-o .o files do not contain segment permissions
3507         // we just know TEXT is special
3508         return ( strcmp(sect->segname(), "__TEXT") != 0 );
3509 }
3510
3511 template <typename A>
3512 bool Section<A>::exectuable(const macho_section<typename A::P>* sect)
3513 {
3514         // mach-o .o files do not contain segment permissions
3515         // we just know TEXT is special
3516         return ( strcmp(sect->segname(), "__TEXT") == 0 );
3517 }
3518
3519
3520 template <typename A>
3521 ld::Section::Type Section<A>::sectionType(const macho_section<typename A::P>* sect)
3522 {
3523         switch ( sect->flags() & SECTION_TYPE ) {
3524                 case S_ZEROFILL:
3525                         return ld::Section::typeZeroFill;
3526                 case S_CSTRING_LITERALS:
3527                         if ( (strcmp(sect->sectname(), "__cstring") == 0) && (strcmp(sect->segname(), "__TEXT") == 0) )
3528                                 return ld::Section::typeCString;
3529                         else
3530                                 return ld::Section::typeNonStdCString;
3531                 case S_4BYTE_LITERALS:
3532                         return ld::Section::typeLiteral4;
3533                 case S_8BYTE_LITERALS:
3534                         return ld::Section::typeLiteral8;
3535                 case S_LITERAL_POINTERS:
3536                         return ld::Section::typeCStringPointer;
3537                 case S_NON_LAZY_SYMBOL_POINTERS:
3538                         return ld::Section::typeNonLazyPointer;
3539                 case S_LAZY_SYMBOL_POINTERS:
3540                         return ld::Section::typeLazyPointer;
3541                 case S_SYMBOL_STUBS:
3542                         return ld::Section::typeStub;
3543                 case S_MOD_INIT_FUNC_POINTERS:
3544                         return ld::Section::typeInitializerPointers;
3545                 case S_MOD_TERM_FUNC_POINTERS:
3546                         return ld::Section::typeTerminatorPointers;
3547                 case S_INTERPOSING:
3548                         return ld::Section::typeUnclassified;
3549                 case S_16BYTE_LITERALS:
3550                         return ld::Section::typeLiteral16;
3551                 case S_REGULAR:
3552                 case S_COALESCED:
3553                         if ( sect->flags() & S_ATTR_PURE_INSTRUCTIONS ) {
3554                                 return ld::Section::typeCode;
3555                         }
3556                         else if ( strcmp(sect->segname(), "__TEXT") == 0 ) {
3557                                 if ( strcmp(sect->sectname(), "__eh_frame") == 0 )
3558                                         return ld::Section::typeCFI;
3559                                 else if ( strcmp(sect->sectname(), "__ustring") == 0 )
3560                                         return ld::Section::typeUTF16Strings;
3561                                 else if ( strcmp(sect->sectname(), "__textcoal_nt") == 0 )
3562                                         return ld::Section::typeCode;
3563                                 else if ( strcmp(sect->sectname(), "__StaticInit") == 0 )
3564                                         return ld::Section::typeCode;
3565                                 else if ( strcmp(sect->sectname(), "__constructor") == 0 )
3566                                         return ld::Section::typeInitializerPointers;
3567                         }
3568                         else if ( strcmp(sect->segname(), "__DATA") == 0 ) {
3569                                 if ( strcmp(sect->sectname(), "__cfstring") == 0 )
3570                                         return ld::Section::typeCFString;
3571                                 else if ( strcmp(sect->sectname(), "__dyld") == 0 )
3572                                         return ld::Section::typeDyldInfo;
3573                                 else if ( strcmp(sect->sectname(), "__program_vars") == 0 )
3574                                         return ld::Section::typeDyldInfo;
3575                                 else if ( strncmp(sect->sectname(), "__objc_classrefs", 16) == 0 )
3576                                         return ld::Section::typeObjCClassRefs;
3577                                 else if ( strcmp(sect->sectname(), "__objc_catlist") == 0 )
3578                                         return ld::Section::typeObjC2CategoryList;
3579                         }
3580                         else if ( strcmp(sect->segname(), "__OBJC") == 0 ) {
3581                                 if ( strcmp(sect->sectname(), "__class") == 0 )
3582                                         return ld::Section::typeObjC1Classes;
3583                         }
3584                         break;
3585                 case S_THREAD_LOCAL_REGULAR:
3586                         return ld::Section::typeTLVInitialValues;
3587                 case S_THREAD_LOCAL_ZEROFILL:
3588                         return ld::Section::typeTLVZeroFill;
3589                 case S_THREAD_LOCAL_VARIABLES:
3590                         return ld::Section::typeTLVDefs;
3591                 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
3592                         return ld::Section::typeTLVInitializerPointers;
3593         }
3594         return ld::Section::typeUnclassified;
3595 }
3596
3597
3598 template <typename A>
3599 Atom<A>* Section<A>::findContentAtomByAddress(pint_t addr, class Atom<A>* start, class Atom<A>* end)
3600 {
3601         // do a binary search of atom array
3602         uint32_t atomCount = end - start;
3603         Atom<A>* base = start;
3604         for (uint32_t n = atomCount; n > 0; n /= 2) {
3605                 Atom<A>* pivot = &base[n/2];
3606                 pint_t atomStartAddr = pivot->_objAddress;
3607                 pint_t atomEndAddr = atomStartAddr + pivot->_size;
3608                 if ( atomStartAddr <= addr ) {
3609                         // address in normal atom
3610                         if (addr < atomEndAddr)
3611                                 return pivot;
3612                         // address in "end" label (but not in alias)
3613                         if ( (pivot->_size == 0) && (addr == atomEndAddr) && !pivot->isAlias() )
3614                                 return pivot;
3615                 }
3616                 if ( addr >= atomEndAddr ) {
3617                         // key > pivot
3618                         // move base to atom after pivot
3619                         base = &pivot[1];
3620                         --n;
3621                 }
3622                 else {
3623                         // key < pivot
3624                         // keep same base
3625                 }
3626         }
3627         return NULL;
3628 }
3629
3630 template <typename A>
3631 ld::Atom::Alignment Section<A>::alignmentForAddress(pint_t addr)
3632 {
3633         const uint32_t sectionAlignment = this->_machOSection->align();
3634         return ld::Atom::Alignment(sectionAlignment, (addr % (1 << sectionAlignment)));
3635 }
3636
3637 template <typename A>
3638 uint32_t Section<A>::sectionNum(class Parser<A>& parser) const
3639 {
3640         if ( _machOSection == NULL )
3641                 return 0;
3642         else
3643                 return 1 + (this->_machOSection - parser.firstMachOSection());
3644 }
3645
3646 // arm does not have zero cost exceptions
3647 template <> uint32_t CFISection<arm>::cfiCount() { return 0; }
3648
3649 template <typename A>
3650 uint32_t CFISection<A>::cfiCount()
3651 {
3652         // create ObjectAddressSpace object for use by libunwind
3653         OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
3654         return libunwind::CFI_Parser<OAS>::getCFICount(oas,
3655                                                                                 this->_machOSection->addr(), this->_machOSection->size());
3656 }
3657
3658 template <typename A>
3659 void CFISection<A>::warnFunc(void* ref, uint64_t funcAddr, const char* msg)
3660 {
3661         Parser<A>* parser = (Parser<A>*)ref;
3662         if ( ! parser->convertUnwindInfo() )
3663                 return;
3664         if ( funcAddr != CFI_INVALID_ADDRESS ) {
3665                 // atoms are not constructed yet, so scan symbol table for labels
3666                 const char* name = parser->scanSymbolTableForAddress(funcAddr);
3667                 warning("could not create compact unwind for %s: %s", name, msg);
3668         }
3669         else {
3670                 warning("could not create compact unwind: %s", msg);
3671         }
3672 }
3673
3674 template <>
3675 bool CFISection<x86_64>::needsRelocating()
3676 {
3677         return true;
3678 }
3679
3680 template <typename A>
3681 bool CFISection<A>::needsRelocating()
3682 {
3683         return false;
3684 }
3685
3686 template <>
3687 void CFISection<x86_64>::cfiParse(class Parser<x86_64>& parser, uint8_t* buffer,
3688                                                                         libunwind::CFI_Atom_Info<CFISection<x86_64>::OAS>::CFI_Atom_Info cfiArray[],
3689                                                                         uint32_t count)
3690 {
3691         // copy __eh_frame data to buffer
3692         memcpy(buffer, file().fileContent() + this->_machOSection->offset(), this->_machOSection->size());
3693
3694         // and apply relocations
3695         const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + this->_machOSection->reloff());
3696         const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
3697         for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
3698                 uint64_t value = 0;
3699                 switch ( reloc->r_type() ) {
3700                         case X86_64_RELOC_SUBTRACTOR:
3701                                 value =  0 - parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3702                                 ++reloc;
3703                                 if ( reloc->r_extern() )
3704                                         value += parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3705                                 break;
3706                         case X86_64_RELOC_UNSIGNED:
3707                                 value = parser.symbolFromIndex(reloc->r_symbolnum()).n_value();
3708                                 break;
3709                         case X86_64_RELOC_GOT:
3710                                 // this is used for the reference to the personality function in CIEs
3711                                 // store the symbol number of the personality function for later use as a Fixup
3712                                 value = reloc->r_symbolnum();
3713                                 break;
3714                         default:
3715                                 fprintf(stderr, "CFISection::cfiParse() unexpected relocation type at r_address=0x%08X\n", reloc->r_address());
3716                                 break;
3717                 }
3718                 uint64_t*       p64;
3719                 uint32_t*       p32;
3720                 switch ( reloc->r_length() ) {
3721                         case 3:
3722                                 p64 = (uint64_t*)&buffer[reloc->r_address()];
3723                                 E::set64(*p64, value + E::get64(*p64));
3724                                 break;
3725                         case 2:
3726                                 p32 = (uint32_t*)&buffer[reloc->r_address()];
3727                                 E::set32(*p32, value + E::get32(*p32));
3728                                 break;
3729                         default:
3730                                 fprintf(stderr, "CFISection::cfiParse() unexpected relocation size at r_address=0x%08X\n", reloc->r_address());
3731                                 break;
3732                 }
3733         }
3734
3735
3736         // create ObjectAddressSpace object for use by libunwind
3737         OAS oas(*this, buffer);
3738
3739         // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
3740         const char* msg;
3741         msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86_64>::parseCFIs(
3742                                                         oas, this->_machOSection->addr(), this->_machOSection->size(),
3743                                                         cfiArray, count, (void*)&parser, warnFunc);
3744         if ( msg != NULL )
3745                 throwf("malformed __eh_frame section: %s", msg);
3746 }
3747
3748 template <>
3749 void CFISection<x86>::cfiParse(class Parser<x86>& parser, uint8_t* buffer,
3750                                                                         libunwind::CFI_Atom_Info<CFISection<x86>::OAS>::CFI_Atom_Info cfiArray[],
3751                                                                         uint32_t count)
3752 {
3753         // create ObjectAddressSpace object for use by libunwind
3754         OAS oas(*this, (uint8_t*)this->file().fileContent()+this->_machOSection->offset());
3755
3756         // use libuwind to parse __eh_frame data into array of CFI_Atom_Info
3757         const char* msg;
3758         msg = libunwind::DwarfInstructions<OAS, libunwind::Registers_x86>::parseCFIs(
3759                                                         oas, this->_machOSection->addr(), this->_machOSection->size(),
3760                                                         cfiArray, count, (void*)&parser, warnFunc);
3761         if ( msg != NULL )
3762                 throwf("malformed __eh_frame section: %s", msg);
3763 }
3764
3765
3766
3767
3768 template <>
3769 void CFISection<arm>::cfiParse(class Parser<arm>& parser, uint8_t* buffer,
3770                                                                         libunwind::CFI_Atom_Info<CFISection<arm>::OAS>::CFI_Atom_Info cfiArray[],
3771                                                                         uint32_t count)
3772 {
3773         // arm does not use zero cost exceptions
3774         assert(count == 0);
3775 }
3776
3777
3778
3779 template <typename A>
3780 uint32_t CFISection<A>::computeAtomCount(class Parser<A>& parser,
3781                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
3782                                                                                         const struct Parser<A>::CFI_CU_InfoArrays& cfis)
3783 {
3784         return cfis.cfiCount;
3785 }
3786
3787
3788
3789 template <typename A>
3790 uint32_t CFISection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
3791                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
3792                                                                         const struct Parser<A>::CFI_CU_InfoArrays& cfis)
3793 {
3794         this->_beginAtoms = (Atom<A>*)p;
3795         // walk CFI_Atom_Info array and create atom for each entry
3796         const CFI_Atom_Info* start = &cfis.cfiArray[0];
3797         const CFI_Atom_Info* end   = &cfis.cfiArray[cfis.cfiCount];
3798         for(const CFI_Atom_Info* a=start; a < end; ++a) {
3799                 Atom<A>* space = (Atom<A>*)p;
3800                 new (space) Atom<A>(*this, (a->isCIE ? "CIE" : "FDE"), a->address, a->size,
3801                                                                                 ld::Atom::definitionRegular, ld::Atom::combineNever, ld::Atom::scopeTranslationUnit,
3802                                                                                 ld::Atom::typeCFI, ld::Atom::symbolTableNotInFinalLinkedImages,
3803                                                                                 false, false, false, ld::Atom::Alignment(0));
3804                 p += sizeof(Atom<A>);
3805         }
3806         this->_endAtoms = (Atom<A>*)p;
3807         return cfis.cfiCount;
3808 }
3809
3810
3811 template <> bool CFISection<x86_64>::bigEndian() { return false; }
3812 template <> bool CFISection<x86>::bigEndian() { return false; }
3813 template <> bool CFISection<arm>::bigEndian() { return false; }
3814
3815
3816 template <>
3817 void CFISection<x86_64>::addCiePersonalityFixups(class Parser<x86_64>& parser, const CFI_Atom_Info* cieInfo)
3818 {
3819         uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
3820         if ( personalityEncoding == 0x9B ) {
3821                 // compiler always produces X86_64_RELOC_GOT with addend of 4 to personality function
3822                 // CFISection<x86_64>::cfiParse() set targetAddress to be symbolIndex + 4 + addressInCIE
3823                 uint32_t symbolIndex = cieInfo->u.cieInfo.personality.targetAddress - 4
3824                                                                         - cieInfo->address - cieInfo->u.cieInfo.personality.offsetInCFI;
3825                 const macho_nlist<P>& sym = parser.symbolFromIndex(symbolIndex);
3826                 const char* personalityName = parser.nameFromSymbol(sym);
3827
3828                 Atom<x86_64>* cieAtom = this->findAtomByAddress(cieInfo->address);
3829                 Parser<x86_64>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
3830                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, personalityName);
3831                 parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, 4);
3832                 parser.addFixup(src, ld::Fixup::k3of3, ld::Fixup::kindStoreX86PCRel32GOT);
3833         }
3834         else if ( personalityEncoding != 0 ) {
3835                 throwf("unsupported address encoding (%02X) of personality function in CIE",
3836                                 personalityEncoding);
3837         }
3838 }
3839
3840 template <>
3841 void CFISection<x86>::addCiePersonalityFixups(class Parser<x86>& parser, const CFI_Atom_Info* cieInfo)
3842 {
3843         uint8_t personalityEncoding = cieInfo->u.cieInfo.personality.encodingOfTargetAddress;
3844         if ( (personalityEncoding == 0x9B) || (personalityEncoding == 0x90) ) {
3845                 uint32_t offsetInCFI = cieInfo->u.cieInfo.personality.offsetInCFI;
3846                 uint32_t nlpAddr = cieInfo->u.cieInfo.personality.targetAddress;
3847                 Atom<x86>* cieAtom = this->findAtomByAddress(cieInfo->address);
3848                 Atom<x86>* nlpAtom = parser.findAtomByAddress(nlpAddr);
3849                 assert(nlpAtom->contentType() == ld::Atom::typeNonLazyPointer);
3850                 Parser<x86>::SourceLocation src(cieAtom, cieInfo->u.cieInfo.personality.offsetInCFI);
3851
3852                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, nlpAtom);
3853                 parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
3854                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, offsetInCFI);
3855                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
3856         }
3857         else if ( personalityEncoding != 0 ) {
3858                 throwf("unsupported address encoding (%02X) of personality function in CIE", personalityEncoding);
3859         }
3860 }
3861
3862
3863 template <typename A>
3864 void CFISection<A>::addCiePersonalityFixups(class Parser<A>& parser, const CFI_Atom_Info* cieInfo)
3865 {
3866         // FIX ME
3867         assert(0);
3868 }
3869
3870 template <typename A>
3871 void CFISection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cfis)
3872 {
3873         ld::Fixup::Kind store32 = bigEndian() ? ld::Fixup::kindStoreBigEndian32 : ld::Fixup::kindStoreLittleEndian32;
3874         ld::Fixup::Kind store64 = bigEndian() ? ld::Fixup::kindStoreBigEndian64 : ld::Fixup::kindStoreLittleEndian64;
3875
3876         // add all references for FDEs, including implicit group references
3877         const CFI_Atom_Info* end = &cfis.cfiArray[cfis.cfiCount];
3878         for(const CFI_Atom_Info* p = &cfis.cfiArray[0]; p < end; ++p) {
3879                 if ( p->isCIE ) {
3880                         // add reference to personality function if used
3881                         if ( p->u.cieInfo.personality.targetAddress != CFI_INVALID_ADDRESS ) {
3882                                 this->addCiePersonalityFixups(parser, p);
3883                         }
3884                 }
3885                 else {
3886                         // find FDE Atom
3887                         Atom<A>* fdeAtom = this->findAtomByAddress(p->address);
3888                         // find function Atom
3889                         Atom<A>* functionAtom = parser.findAtomByAddress(p->u.fdeInfo.function.targetAddress);
3890                         // find CIE Atom
3891                         Atom<A>* cieAtom = this->findAtomByAddress(p->u.fdeInfo.cie.targetAddress);
3892                         // find LSDA Atom
3893                         Atom<A>* lsdaAtom = NULL;
3894                         if ( p->u.fdeInfo.lsda.targetAddress != CFI_INVALID_ADDRESS ) {
3895                                 lsdaAtom = parser.findAtomByAddress(p->u.fdeInfo.lsda.targetAddress);
3896                         }
3897                         // add reference from FDE to CIE (always 32-bit pc-rel)
3898                         typename Parser<A>::SourceLocation fdeToCieSrc(fdeAtom, p->u.fdeInfo.cie.offsetInCFI);
3899                         parser.addFixup(fdeToCieSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, fdeAtom);
3900                         parser.addFixup(fdeToCieSrc, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, p->u.fdeInfo.cie.offsetInCFI);
3901                         parser.addFixup(fdeToCieSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, cieAtom);
3902                         parser.addFixup(fdeToCieSrc, ld::Fixup::k4of4, store32, cieAtom);
3903
3904                         // add reference from FDE to function
3905                         typename Parser<A>::SourceLocation fdeToFuncSrc(fdeAtom, p->u.fdeInfo.function.offsetInCFI);
3906                         switch (p->u.fdeInfo.function.encodingOfTargetAddress) {
3907                                 case DW_EH_PE_pcrel|DW_EH_PE_ptr:
3908                                         if ( sizeof(typename A::P::uint_t) == 8 ) {
3909                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
3910                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3911                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
3912                                                 parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store64);
3913                                                 break;
3914                                         }
3915                                         // else fall into 32-bit case
3916                                 case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
3917                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, functionAtom);
3918                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3919                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.function.offsetInCFI);
3920                                         parser.addFixup(fdeToFuncSrc, ld::Fixup::k4of4, store32);
3921                                         break;
3922                                 default:
3923                                         throw "unsupported encoding in FDE of pointer to function";
3924                         }
3925
3926                         // add reference from FDE to LSDA
3927                         typename Parser<A>::SourceLocation fdeToLsdaSrc(fdeAtom,  p->u.fdeInfo.lsda.offsetInCFI);
3928                         if ( lsdaAtom != NULL ) {
3929                                 switch (p->u.fdeInfo.lsda.encodingOfTargetAddress) {
3930                                         case DW_EH_PE_pcrel|DW_EH_PE_ptr:
3931                                                 if ( sizeof(typename A::P::uint_t) == 8 ) {
3932                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
3933                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3934                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
3935                                                         parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store64);
3936                                                         break;
3937                                                 }
3938                                                 // else fall into 32-bit case
3939                                         case DW_EH_PE_pcrel|DW_EH_PE_sdata4:
3940                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, lsdaAtom);
3941                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k2of4, ld::Fixup::kindSubtractTargetAddress, fdeAtom);
3942                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k3of4, ld::Fixup::kindSubtractAddend, p->u.fdeInfo.lsda.offsetInCFI);
3943                                                 parser.addFixup(fdeToLsdaSrc, ld::Fixup::k4of4, store32);
3944                                         break;
3945                                         default:
3946                                                 throw "unsupported encoding in FDE of pointer to LSDA";
3947                                 }
3948                         }
3949
3950                         // FDE is in group lead by function atom
3951                         typename Parser<A>::SourceLocation fdeSrc(functionAtom,0);
3952                         parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateFDE, fdeAtom);
3953
3954                         // LSDA is in group lead by function atom
3955                         if ( lsdaAtom != NULL ) {
3956                                 parser.addFixup(fdeSrc, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, lsdaAtom);
3957                         }
3958                 }
3959         }
3960 }
3961
3962
3963
3964
3965 template <typename A>
3966 const void*      CFISection<A>::OAS::mappedAddress(pint_t addr)
3967 {
3968         if ( (_ehFrameStartAddr <= addr) && (addr < _ehFrameEndAddr) )
3969                 return &_ehFrameContent[addr-_ehFrameStartAddr];
3970         else {
3971                 // requested bytes are not in __eh_frame section
3972                 // this can occur when examining the instruction bytes in the __text
3973                 File<A>& file = _ehFrameSection.file();
3974                 for (uint32_t i=0; i < file._sectionsArrayCount; ++i ) {
3975                         const macho_section<typename A::P>* sect = file._sectionsArray[i]->machoSection();
3976                         // TentativeDefinitionSection and AbsoluteSymbolSection have no mach-o section
3977                         if ( sect != NULL ) {
3978                                 if ( (sect->addr() <= addr) && (addr < (sect->addr()+sect->size())) ) {
3979                                         return file.fileContent() + sect->offset() + addr - sect->addr();
3980                                 }
3981                         }
3982                 }
3983                 throwf("__eh_frame parsing problem.  Can't find target of reference to address 0x%08llX", (uint64_t)addr);
3984         }
3985 }
3986
3987
3988 template <typename A>
3989 uint64_t CFISection<A>::OAS::getULEB128(pint_t& logicalAddr, pint_t end)
3990 {
3991         uintptr_t size = (end - logicalAddr);
3992         libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
3993         libunwind::LocalAddressSpace::pint_t sladdr = laddr;
3994         uint64_t result = libunwind::LocalAddressSpace::getULEB128(laddr, laddr+size);
3995         logicalAddr += (laddr-sladdr);
3996         return result;
3997 }
3998
3999 template <typename A>
4000 int64_t CFISection<A>::OAS::getSLEB128(pint_t& logicalAddr, pint_t end)
4001 {
4002         uintptr_t size = (end - logicalAddr);
4003         libunwind::LocalAddressSpace::pint_t laddr = (libunwind::LocalAddressSpace::pint_t)mappedAddress(logicalAddr);
4004         libunwind::LocalAddressSpace::pint_t sladdr = laddr;
4005         int64_t result = libunwind::LocalAddressSpace::getSLEB128(laddr, laddr+size);
4006         logicalAddr += (laddr-sladdr);
4007         return result;
4008 }
4009
4010 template <typename A>
4011 typename A::P::uint_t CFISection<A>::OAS::getEncodedP(pint_t& addr, pint_t end, uint8_t encoding)
4012 {
4013         pint_t startAddr = addr;
4014         pint_t p = addr;
4015         pint_t result;
4016
4017         // first get value
4018         switch (encoding & 0x0F) {
4019                 case DW_EH_PE_ptr:
4020                         result = getP(addr);
4021                         p += sizeof(pint_t);
4022                         addr = (pint_t)p;
4023                         break;
4024                 case DW_EH_PE_uleb128:
4025                         result = getULEB128(addr, end);
4026                         break;
4027                 case DW_EH_PE_udata2:
4028                         result = get16(addr);
4029                         p += 2;
4030                         addr = (pint_t)p;
4031                         break;
4032                 case DW_EH_PE_udata4:
4033                         result = get32(addr);
4034                         p += 4;
4035                         addr = (pint_t)p;
4036                         break;
4037                 case DW_EH_PE_udata8:
4038                         result = get64(addr);
4039                         p += 8;
4040                         addr = (pint_t)p;
4041                         break;
4042                 case DW_EH_PE_sleb128:
4043                         result = getSLEB128(addr, end);
4044                         break;
4045                 case DW_EH_PE_sdata2:
4046                         result = (int16_t)get16(addr);
4047                         p += 2;
4048                         addr = (pint_t)p;
4049                         break;
4050                 case DW_EH_PE_sdata4:
4051                         result = (int32_t)get32(addr);
4052                         p += 4;
4053                         addr = (pint_t)p;
4054                         break;
4055                 case DW_EH_PE_sdata8:
4056                         result = get64(addr);
4057                         p += 8;
4058                         addr = (pint_t)p;
4059                         break;
4060                 default:
4061                         throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4062         }
4063
4064         // then add relative offset
4065         switch ( encoding & 0x70 ) {
4066                 case DW_EH_PE_absptr:
4067                         // do nothing
4068                         break;
4069                 case DW_EH_PE_pcrel:
4070                         result += startAddr;
4071                         break;
4072                 case DW_EH_PE_textrel:
4073                         throw "DW_EH_PE_textrel pointer encoding not supported";
4074                         break;
4075                 case DW_EH_PE_datarel:
4076                         throw "DW_EH_PE_datarel pointer encoding not supported";
4077                         break;
4078                 case DW_EH_PE_funcrel:
4079                         throw "DW_EH_PE_funcrel pointer encoding not supported";
4080                         break;
4081                 case DW_EH_PE_aligned:
4082                         throw "DW_EH_PE_aligned pointer encoding not supported";
4083                         break;
4084                 default:
4085                         throwf("ObjectFileAddressSpace<A>::getEncodedP() encoding 0x%08X not supported", encoding);
4086                         break;
4087         }
4088
4089 //  Note: DW_EH_PE_indirect is only used in CIEs to refernce the personality pointer
4090 //  When parsing .o files that pointer contains zero, so we don't to return that.
4091 //  Instead we skip the dereference and return the address of the pointer.
4092 //      if ( encoding & DW_EH_PE_indirect )
4093 //              result = getP(result);
4094
4095         return result;
4096 }
4097
4098 template <>
4099 const char* CUSection<x86_64>::personalityName(class Parser<x86_64>& parser, const macho_relocation_info<x86_64::P>* reloc)
4100 {
4101         assert(reloc->r_extern() && "reloc not extern on personality column in __compact_unwind section");
4102         assert((reloc->r_type() == X86_64_RELOC_UNSIGNED) && "wrong reloc type on personality column in __compact_unwind section");
4103         const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4104         return parser.nameFromSymbol(sym);
4105 }
4106
4107 template <>
4108 const char* CUSection<x86>::personalityName(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
4109 {
4110         assert(reloc->r_extern() && "reloc not extern on personality column in __compact_unwind section");
4111         assert((reloc->r_type() == GENERIC_RELOC_VANILLA) && "wrong reloc type on personality column in __compact_unwind section");
4112         const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
4113         return parser.nameFromSymbol(sym);
4114 }
4115
4116 template <typename A>
4117 const char* CUSection<A>::personalityName(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
4118 {
4119         return NULL;
4120 }
4121
4122
4123 template <typename A>
4124 int CUSection<A>::infoSorter(const void* l, const void* r)
4125 {
4126         // sort references by symbol index, then address
4127         const Info* left = (Info*)l;
4128         const Info* right = (Info*)r;
4129         if ( left->functionSymbolIndex == right->functionSymbolIndex )
4130                 return (left->functionStartAddress - right->functionStartAddress);
4131         else
4132                 return (left->functionSymbolIndex - right->functionSymbolIndex);
4133 }
4134
4135 template <typename A>
4136 void CUSection<A>::parse(class Parser<A>& parser, uint32_t cnt, Info array[])
4137 {
4138         // walk section content and copy to Info array
4139         const macho_compact_unwind_entry<P>* const entries = (macho_compact_unwind_entry<P>*)(this->file().fileContent() + this->_machOSection->offset());
4140         for (uint32_t i=0; i < cnt; ++i) {
4141                 Info* info = &array[i];
4142                 const macho_compact_unwind_entry<P>* entry = &entries[i];
4143                 info->functionStartAddress      = entry->codeStart();
4144                 info->functionSymbolIndex   = 0xFFFFFFFF;
4145                 info->rangeLength                       = entry->codeLen();
4146                 info->compactUnwindInfo         = entry->compactUnwindInfo();
4147                 info->personality                       = NULL;
4148                 info->lsdaAddress                       = entry->lsda();
4149                 info->function                          = NULL;
4150                 info->lsda                                      = NULL;
4151                 if ( (info->compactUnwindInfo & UNWIND_PERSONALITY_MASK) != 0 )
4152                         warning("no bits should be set in UNWIND_PERSONALITY_MASK of compact unwind encoding in __LD,__compact_unwind section");
4153                 if ( info->lsdaAddress != 0 ) {
4154                         info->compactUnwindInfo |= UNWIND_HAS_LSDA;
4155                 }
4156         }
4157
4158         // scan relocs, local relocs are useless - ignore them
4159         // extern relocs are needed for personality references (possibly for function/lsda refs??)
4160         const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(this->file().fileContent() + this->_machOSection->reloff());
4161         const macho_relocation_info<P>* relocsEnd = &relocs[this->_machOSection->nreloc()];
4162         for (const macho_relocation_info<P>* reloc = relocs; reloc < relocsEnd; ++reloc) {
4163                 if ( reloc->r_extern() ) {
4164                         // only expect external relocs on some colummns
4165                         if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::personalityFieldOffset() ) {
4166                                 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4167                                 array[entryIndex].personality = this->personalityName(parser, reloc);
4168                         }
4169                         else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::lsdaFieldOffset() ) {
4170                                 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4171                                 const macho_nlist<P>& lsdaSym = parser.symbolFromIndex(reloc->r_symbolnum());
4172                                 if ( (lsdaSym.n_type() & N_TYPE) == N_SECT )
4173                                         array[entryIndex].lsdaAddress = lsdaSym.n_value();
4174                                 else
4175                                         warning("unexpected extern relocation to lsda in __compact_unwind section");
4176                         }
4177                         else if ( (reloc->r_address() % sizeof(macho_compact_unwind_entry<P>)) == macho_compact_unwind_entry<P>::codeStartFieldOffset() ) {
4178                                 uint32_t entryIndex = reloc->r_address() / sizeof(macho_compact_unwind_entry<P>);
4179                                 array[entryIndex].functionSymbolIndex = reloc->r_symbolnum();
4180                         }
4181                         else {
4182                                 warning("unexpected extern relocation in __compact_unwind section");
4183                         }
4184                 }
4185         }
4186
4187         // sort array by function start address so unwind infos will be contiguous for a given function
4188         ::qsort(array, cnt, sizeof(Info), infoSorter);
4189 }
4190
4191 template <typename A>
4192 uint32_t CUSection<A>::count()
4193 {
4194         const macho_section<P>* machoSect =     this->machoSection();
4195         if ( (machoSect->size() % sizeof(macho_compact_unwind_entry<P>)) != 0 )
4196                 throw "malformed __LD,__compact_unwind section, bad length";
4197
4198         return machoSect->size() / sizeof(macho_compact_unwind_entry<P>);
4199 }
4200
4201 template <typename A>
4202 void CUSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays& cus)
4203 {
4204         Info* const arrayStart = cus.cuArray;
4205         Info* const arrayEnd = &cus.cuArray[cus.cuCount];
4206         for (Info* info=arrayStart; info < arrayEnd; ++info) {
4207                 // if external reloc was used, real address is symbol n_value + addend
4208                 if ( info->functionSymbolIndex != 0xFFFFFFFF )
4209                         info->functionStartAddress += parser.symbolFromIndex(info->functionSymbolIndex).n_value();
4210                 // find function atom from address
4211                 info->function = parser.findAtomByAddress(info->functionStartAddress);
4212                 // find lsda atom from address
4213                 if ( info->lsdaAddress != 0 ) {
4214                         info->lsda = parser.findAtomByAddress(info->lsdaAddress);
4215                         // add lsda subordinate
4216                         typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4217                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, info->lsda);
4218                 }
4219                 if ( info->personality != NULL ) {
4220                         // add personality subordinate
4221                         typename Parser<A>::SourceLocation src(info->function, info->functionStartAddress - info->function->objectAddress());
4222                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinatePersonality, false, info->personality);
4223                 }
4224         }
4225
4226 }
4227
4228 template <typename A>
4229 SymboledSection<A>::SymboledSection(Parser<A>& parser, File<A>& f, const macho_section<typename A::P>* s)
4230         : Section<A>(f, s), _type(ld::Atom::typeUnclassified)
4231 {
4232         switch ( s->flags() & SECTION_TYPE ) {
4233                 case S_ZEROFILL:
4234                         _type = ld::Atom::typeZeroFill;
4235                         break;
4236                 case S_MOD_INIT_FUNC_POINTERS:
4237                         _type = ld::Atom::typeInitializerPointers;
4238                         break;
4239                 case S_MOD_TERM_FUNC_POINTERS:
4240                         _type = ld::Atom::typeTerminatorPointers;
4241                         break;
4242                 case S_THREAD_LOCAL_VARIABLES:
4243                         _type = ld::Atom::typeTLV;
4244                         break;
4245                 case S_THREAD_LOCAL_ZEROFILL:
4246                         _type = ld::Atom::typeTLVZeroFill;
4247                         break;
4248                 case S_THREAD_LOCAL_REGULAR:
4249                         _type = ld::Atom::typeTLVInitialValue;
4250                         break;
4251                 case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
4252                         _type = ld::Atom::typeTLVInitializerPointers;
4253                         break;
4254                 case S_REGULAR:
4255                         if ( strncmp(s->sectname(), "__gcc_except_tab", 16) == 0 )
4256                                 _type = ld::Atom::typeLSDA;
4257                         else if ( this->type() == ld::Section::typeInitializerPointers )
4258                                 _type = ld::Atom::typeInitializerPointers;
4259                         break;
4260         }
4261 }
4262
4263
4264 template <typename A>
4265 bool SymboledSection<A>::dontDeadStrip()
4266 {
4267         switch ( _type ) {
4268                 case ld::Atom::typeInitializerPointers:
4269                 case ld::Atom::typeTerminatorPointers:
4270                         return true;
4271                 default:
4272                         // model an object file without MH_SUBSECTIONS_VIA_SYMBOLS as one in which nothing can be dead stripped
4273                         if ( ! this->_file.canScatterAtoms() )
4274                                 return true;
4275                         // call inherited
4276                         return Section<A>::dontDeadStrip();
4277         }
4278         return false;
4279 }
4280
4281
4282 template <typename A>
4283 uint32_t SymboledSection<A>::computeAtomCount(class Parser<A>& parser,
4284                                                                                                 struct Parser<A>::LabelAndCFIBreakIterator& it,
4285                                                                                                 const struct Parser<A>::CFI_CU_InfoArrays&)
4286 {
4287         const pint_t startAddr = this->_machOSection->addr();
4288         const pint_t endAddr = startAddr + this->_machOSection->size();
4289         const uint32_t sectNum = this->sectionNum(parser);
4290
4291         uint32_t count = 0;
4292         pint_t  addr;
4293         pint_t  size;
4294         const macho_nlist<P>* sym;
4295         while ( it.next(parser, sectNum, startAddr, endAddr, &addr, &size, &sym) ) {
4296                 ++count;
4297         }
4298         //fprintf(stderr, "computeAtomCount(%s,%s) => %d\n", this->segmentName(), this->sectionName(), count);
4299         return count;
4300 }
4301
4302 template <typename A>
4303 uint32_t SymboledSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4304                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
4305                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
4306 {
4307         this->_beginAtoms = (Atom<A>*)p;
4308
4309         //fprintf(stderr, "SymboledSection::appendAtoms() in section %s\n", this->_machOSection->sectname());
4310         const pint_t startAddr = this->_machOSection->addr();
4311         const pint_t endAddr = startAddr + this->_machOSection->size();
4312         const uint32_t sectNum = this->sectionNum(parser);
4313
4314         uint32_t count = 0;
4315         pint_t  addr;
4316         pint_t  size;
4317         const macho_nlist<P>* label;
4318         while ( it.next(parser, sectNum, startAddr, endAddr, &addr, &size, &label) ) {
4319                 Atom<A>* allocatedSpace = (Atom<A>*)p;
4320                 // is break because of label or CFI?
4321                 if ( label != NULL ) {
4322                         // The size is computed based on the address of the next label (or the end of the section for the last label)
4323                         // If there are two labels at the same address, we want them one to be an alias of the other.
4324                         // If the label is at the end of a section, it is has zero size, but is not an alias
4325                         const bool isAlias = ( (size == 0) && (addr <  endAddr) );
4326                         new (allocatedSpace) Atom<A>(*this, parser, *label, size, isAlias);
4327                         if ( isAlias )
4328                                 this->_hasAliases = true;
4329                 }
4330                 else {
4331                         ld::Atom::SymbolTableInclusion inclusion = ld::Atom::symbolTableNotIn;
4332                         ld::Atom::ContentType ctype = this->contentType();
4333                         if ( ctype == ld::Atom::typeLSDA )
4334                                 inclusion = ld::Atom::symbolTableInWithRandomAutoStripLabel;
4335                         new (allocatedSpace) Atom<A>(*this, "anon", addr, size, ld::Atom::definitionRegular, ld::Atom::combineNever,
4336                                                                                 ld::Atom::scopeTranslationUnit, ctype, inclusion,
4337                                                                                 this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
4338                 }
4339                 p += sizeof(Atom<A>);
4340                 ++count;
4341         }
4342
4343         this->_endAtoms = (Atom<A>*)p;
4344         return count;
4345 }
4346
4347
4348 template <typename A>
4349 uint32_t ImplicitSizeSection<A>::computeAtomCount(class Parser<A>& parser,
4350                                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
4351                                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
4352 {
4353         uint32_t count = 0;
4354         const macho_section<P>* sect = this->machoSection();
4355         const pint_t startAddr = sect->addr();
4356         const pint_t endAddr = startAddr + sect->size();
4357         for (pint_t addr = startAddr; addr < endAddr; addr += elementSizeAtAddress(addr) ) {
4358                 if ( useElementAt(parser, it, addr) )
4359                         ++count;
4360         }
4361         if ( it.fileHasOverlappingSymbols && (sect->size() != 0) && (this->combine(parser, startAddr) == ld::Atom::combineByNameAndContent) ) {
4362                 // if there are multiple labels in this section for the same address, then clone them into multi atoms
4363                 pint_t  prevSymbolAddr = (pint_t)(-1);
4364                 uint8_t prevSymbolSectNum = 0;
4365                 for(uint32_t i=0; i < it.sortedSymbolCount; ++i) {
4366                         const macho_nlist<P>& sym = parser.symbolFromIndex(it.sortedSymbolIndexes[i]);
4367                         const pint_t symbolAddr = sym.n_value();
4368                         const pint_t symbolSectNum = sym.n_sect();
4369                         if ( (symbolAddr == prevSymbolAddr) && (prevSymbolSectNum == symbolSectNum) && (symbolSectNum == this->sectionNum(parser)) ) {
4370                                 ++count;
4371                         }
4372                         prevSymbolAddr = symbolAddr;
4373                         prevSymbolSectNum = symbolSectNum;
4374                 }
4375         }
4376         return count;
4377 }
4378
4379 template <typename A>
4380 uint32_t ImplicitSizeSection<A>::appendAtoms(class Parser<A>& parser, uint8_t* p,
4381                                                                                         struct Parser<A>::LabelAndCFIBreakIterator& it,
4382                                                                                         const struct Parser<A>::CFI_CU_InfoArrays&)
4383 {
4384         this->_beginAtoms = (Atom<A>*)p;
4385
4386         const macho_section<P>* sect = this->machoSection();
4387         const pint_t startAddr = sect->addr();
4388         const pint_t endAddr = startAddr + sect->size();
4389         const uint32_t sectNum = this->sectionNum(parser);
4390         //fprintf(stderr, "ImplicitSizeSection::appendAtoms() in section %s\n", sect->sectname());
4391         uint32_t count = 0;
4392         pint_t  foundAddr;
4393         pint_t  size;
4394         const macho_nlist<P>* foundLabel;
4395         Atom<A>* allocatedSpace;
4396         while ( it.next(parser, sectNum, startAddr, endAddr, &foundAddr, &size, &foundLabel) ) {
4397                 if ( foundLabel != NULL ) {
4398                         pint_t labeledAtomSize = this->elementSizeAtAddress(foundAddr);
4399                         allocatedSpace = (Atom<A>*)p;
4400                         if ( this->ignoreLabel(parser.nameFromSymbol(*foundLabel)) ) {
4401                                 //fprintf(stderr, "  0x%08llX make annon\n", (uint64_t)foundAddr);
4402                                 new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, foundAddr), foundAddr,
4403                                                                                         this->elementSizeAtAddress(foundAddr), this->definition(),
4404                                                                                         this->combine(parser, foundAddr), this->scopeAtAddress(parser, foundAddr),
4405                                                                                         this->contentType(), this->symbolTableInclusion(),
4406                                                                                         this->dontDeadStrip(), false, false, this->alignmentForAddress(foundAddr));
4407                         }
4408                         else {
4409                                 // make named atom for label
4410                                 //fprintf(stderr, "  0x%08llX make labeled\n", (uint64_t)foundAddr);
4411                                 new (allocatedSpace) Atom<A>(*this, parser, *foundLabel, labeledAtomSize);
4412                         }
4413                         ++count;
4414                         p += sizeof(Atom<A>);
4415                         foundAddr += labeledAtomSize;
4416                         size -= labeledAtomSize;
4417                 }
4418                 // some number of anonymous atoms
4419                 for (pint_t addr = foundAddr; addr < (foundAddr+size); addr += elementSizeAtAddress(addr) ) {
4420                         // make anon atoms for area before label
4421                         if ( this->useElementAt(parser, it, addr) ) {
4422                                 //fprintf(stderr, "  0x%08llX make annon\n", (uint64_t)addr);
4423                                 allocatedSpace = (Atom<A>*)p;
4424                                 new (allocatedSpace) Atom<A>(*this, this->unlabeledAtomName(parser, addr), addr, this->elementSizeAtAddress(addr),
4425                                                                                         this->definition(), this->combine(parser, addr), this->scopeAtAddress(parser, addr),
4426                                                                                         this->contentType(), this->symbolTableInclusion(),
4427                                                                                         this->dontDeadStrip(), false, false, this->alignmentForAddress(addr));
4428                                 ++count;
4429                                 p += sizeof(Atom<A>);
4430                         }
4431                 }
4432         }
4433
4434         this->_endAtoms = (Atom<A>*)p;
4435
4436         return count;
4437 }
4438
4439
4440 template <typename A>
4441 unsigned long Literal4Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4442 {
4443         const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
4444         return *literalContent;
4445 }
4446
4447 template <typename A>
4448 bool Literal4Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4449                                                                                                         const ld::IndirectBindingTable& ind) const
4450 {
4451         assert(this->type() == rhs.section().type());
4452         const uint32_t* literalContent = (uint32_t*)atom->contentPointer();
4453
4454         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4455         assert(rhsAtom != NULL);
4456         if ( rhsAtom != NULL ) {
4457                 const uint32_t* rhsLiteralContent = (uint32_t*)rhsAtom->contentPointer();
4458                 return (*literalContent == *rhsLiteralContent);
4459         }
4460         return false;
4461 }
4462
4463
4464 template <typename A>
4465 unsigned long Literal8Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4466 {
4467 #if __LP64__
4468         const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4469         return *literalContent;
4470 #else
4471         unsigned long hash = 5381;
4472         const uint8_t* byteContent = atom->contentPointer();
4473         for (int i=0; i < 8; ++i) {
4474                 hash = hash * 33 + byteContent[i];
4475         }
4476         return hash;
4477 #endif
4478 }
4479
4480 template <typename A>
4481 bool Literal8Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4482                                                                                                         const ld::IndirectBindingTable& ind) const
4483 {
4484         if ( rhs.section().type() != ld::Section::typeLiteral8 )
4485                 return false;
4486         assert(this->type() == rhs.section().type());
4487         const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4488
4489         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4490         assert(rhsAtom != NULL);
4491         if ( rhsAtom != NULL ) {
4492                 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
4493                 return (*literalContent == *rhsLiteralContent);
4494         }
4495         return false;
4496 }
4497
4498
4499 template <typename A>
4500 unsigned long Literal16Section<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4501 {
4502         unsigned long hash = 5381;
4503         const uint8_t* byteContent = atom->contentPointer();
4504         for (int i=0; i < 16; ++i) {
4505                 hash = hash * 33 + byteContent[i];
4506         }
4507         return hash;
4508 }
4509
4510 template <typename A>
4511 bool Literal16Section<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4512                                                                                                         const ld::IndirectBindingTable& ind) const
4513 {
4514         if ( rhs.section().type() != ld::Section::typeLiteral16 )
4515                 return false;
4516         assert(this->type() == rhs.section().type());
4517         const uint64_t* literalContent = (uint64_t*)atom->contentPointer();
4518
4519         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4520         assert(rhsAtom != NULL);
4521         if ( rhsAtom != NULL ) {
4522                 const uint64_t* rhsLiteralContent = (uint64_t*)rhsAtom->contentPointer();
4523                 return ((literalContent[0] == rhsLiteralContent[0]) && (literalContent[1] == rhsLiteralContent[1]));
4524         }
4525         return false;
4526 }
4527
4528
4529
4530 template <typename A>
4531 typename A::P::uint_t CStringSection<A>::elementSizeAtAddress(pint_t addr)
4532 {
4533         const macho_section<P>* sect = this->machoSection();
4534         const char* stringContent = (char*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
4535         return strlen(stringContent) + 1;
4536 }
4537
4538 template <typename A>
4539 bool CStringSection<A>::useElementAt(Parser<A>& parser, struct Parser<A>::LabelAndCFIBreakIterator& it, pint_t addr)
4540 {
4541         return true;
4542 }
4543
4544 template <typename A>
4545 bool CStringSection<A>::ignoreLabel(const char* label)
4546 {
4547         return (label[0] == 'L') || (label[0] == 'l');
4548 }
4549
4550 template <typename A>
4551 Atom<A>* CStringSection<A>::findAtomByAddress(pint_t addr)
4552 {
4553         Atom<A>* result = this->findContentAtomByAddress(addr, this->_beginAtoms, this->_endAtoms);
4554         return result;
4555 }
4556
4557 template <typename A>
4558 unsigned long CStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4559 {
4560         unsigned long hash = 5381;
4561         const char* stringContent = (char*)atom->contentPointer();
4562         for (const char* s = stringContent; *s != '\0'; ++s) {
4563                 hash = hash * 33 + *s;
4564         }
4565         return hash;
4566 }
4567
4568
4569 template <typename A>
4570 bool CStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4571                                                                                                         const ld::IndirectBindingTable& ind) const
4572 {
4573         if ( rhs.section().type() != ld::Section::typeCString )
4574                 return false;
4575         assert(this->type() == rhs.section().type());
4576         assert(strcmp(this->sectionName(), rhs.section().sectionName())== 0);
4577         assert(strcmp(this->segmentName(), rhs.section().segmentName())== 0);
4578         const char* stringContent = (char*)atom->contentPointer();
4579
4580         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4581         assert(rhsAtom != NULL);
4582         if ( rhsAtom != NULL ) {
4583                 if ( atom->_size != rhsAtom->_size )
4584                         return false;
4585                 const char* rhsStringContent = (char*)rhsAtom->contentPointer();
4586                 return (strcmp(stringContent, rhsStringContent) == 0);
4587         }
4588         return false;
4589 }
4590
4591
4592 template <>
4593 ld::Fixup::Kind NonLazyPointerSection<x86>::fixupKind()
4594 {
4595         return ld::Fixup::kindStoreLittleEndian32;
4596 }
4597
4598 template <>
4599 ld::Fixup::Kind NonLazyPointerSection<arm>::fixupKind()
4600 {
4601         return ld::Fixup::kindStoreLittleEndian32;
4602 }
4603
4604
4605 template <>
4606 void NonLazyPointerSection<x86_64>::makeFixups(class Parser<x86_64>& parser, const struct Parser<x86_64>::CFI_CU_InfoArrays&)
4607 {
4608         assert(0 && "x86_64 should not have non-lazy-pointer sections in .o files");
4609 }
4610
4611 template <typename A>
4612 void NonLazyPointerSection<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
4613 {
4614         // add references for each NLP atom based on indirect symbol table
4615         const macho_section<P>* sect = this->machoSection();
4616         const pint_t endAddr = sect->addr() + sect->size();
4617         for( pint_t addr = sect->addr(); addr < endAddr; addr += sizeof(pint_t)) {
4618                 typename Parser<A>::SourceLocation      src;
4619                 typename Parser<A>::TargetDesc          target;
4620                 src.atom = this->findAtomByAddress(addr);
4621                 src.offsetInAtom = 0;
4622                 uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
4623                 target.atom = NULL;
4624                 target.name = NULL;
4625                 target.weakImport = false;
4626                 target.addend = 0;
4627                 if ( symIndex == INDIRECT_SYMBOL_LOCAL ) {
4628                         // use direct reference for local symbols
4629                         const pint_t* nlpContent = (pint_t*)(this->file().fileContent() + sect->offset() + addr - sect->addr());
4630                         pint_t targetAddr = P::getP(*nlpContent);
4631                         target.atom = parser.findAtomByAddress(targetAddr);
4632                         target.weakImport = false;
4633                         target.addend = (targetAddr - target.atom->objectAddress());
4634                         // <rdar://problem/8385011> if pointer to thumb function, mask of thumb bit (not an addend of +1)
4635                         if ( target.atom->isThumb() )
4636                                 target.addend &= (-2);
4637                         assert(src.atom->combine() == ld::Atom::combineNever);
4638                 }
4639                 else {
4640                         const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
4641                         // use direct reference for local symbols
4642                         if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
4643                                 parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
4644                                 assert(src.atom->combine() == ld::Atom::combineNever);
4645                         }
4646                         else {
4647                                 target.name = parser.nameFromSymbol(sym);
4648                                 target.weakImport = parser.weakImportFromSymbol(sym);
4649                                 assert(src.atom->combine() == ld::Atom::combineByNameAndReferences);
4650                         }
4651                 }
4652                 parser.addFixups(src, this->fixupKind(), target);
4653         }
4654 }
4655
4656 template <typename A>
4657 ld::Atom::Combine NonLazyPointerSection<A>::combine(Parser<A>& parser, pint_t addr)
4658 {
4659         const macho_section<P>* sect = this->machoSection();
4660         uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
4661         if ( symIndex == INDIRECT_SYMBOL_LOCAL)
4662                 return ld::Atom::combineNever;
4663
4664         // don't coalesce non-lazy-pointers to local symbols
4665         const macho_nlist<P>& sym = parser.symbolFromIndex(symIndex);
4666         if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) )
4667                 return ld::Atom::combineNever;
4668
4669         return ld::Atom::combineByNameAndReferences;
4670 }
4671
4672 template <typename A>
4673 const char* NonLazyPointerSection<A>::targetName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind)
4674 {
4675         assert(atom->combine() == ld::Atom::combineByNameAndReferences);
4676         assert(atom->fixupCount() == 1);
4677         ld::Fixup::iterator fit = atom->fixupsBegin();
4678         const char* name = NULL;
4679         switch ( fit->binding ) {
4680                 case ld::Fixup::bindingByNameUnbound:
4681                         name = fit->u.name;
4682                         break;
4683                 case ld::Fixup::bindingByContentBound:
4684                         name = fit->u.target->name();
4685                         break;
4686                 case ld::Fixup::bindingsIndirectlyBound:
4687                         name = ind.indirectName(fit->u.bindingIndex);
4688                         break;
4689                 default:
4690                         assert(0);
4691         }
4692         assert(name != NULL);
4693         return name;
4694 }
4695
4696 template <typename A>
4697 unsigned long NonLazyPointerSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4698 {
4699         assert(atom->combine() == ld::Atom::combineByNameAndReferences);
4700         unsigned long hash = 9508;
4701         for (const char* s = this->targetName(atom, ind); *s != '\0'; ++s) {
4702                 hash = hash * 33 + *s;
4703         }
4704         return hash;
4705 }
4706
4707 template <typename A>
4708 bool NonLazyPointerSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4709                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
4710 {
4711         if ( rhs.section().type() != ld::Section::typeNonLazyPointer )
4712                 return false;
4713         assert(this->type() == rhs.section().type());
4714         // there can be many non-lazy pointer in different section names
4715         // we only want to coalesce in same section name
4716         if ( *this != rhs.section() )
4717                 return false;
4718         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4719         assert(rhsAtom !=  NULL);
4720         const char* thisName = this->targetName(atom, indirectBindingTable);
4721         const char* rhsName = this->targetName(rhsAtom, indirectBindingTable);
4722         return (strcmp(thisName, rhsName) == 0);
4723 }
4724
4725 template <typename A>
4726 ld::Atom::Scope NonLazyPointerSection<A>::scopeAtAddress(Parser<A>& parser, pint_t addr)
4727 {
4728         const macho_section<P>* sect = this->machoSection();
4729         uint32_t symIndex = parser.symbolIndexFromIndirectSectionAddress(addr, sect);
4730         if ( symIndex == INDIRECT_SYMBOL_LOCAL)
4731                 return ld::Atom::scopeTranslationUnit;
4732         else
4733                 return ld::Atom::scopeLinkageUnit;
4734 }
4735
4736
4737 template <typename A>
4738 const uint8_t* CFStringSection<A>::targetContent(const class Atom<A>* atom, const ld::IndirectBindingTable& ind,
4739                                                                                                         ContentType* ct, unsigned int* count)
4740 {
4741         *ct = contentUnknown;
4742         for (ld::Fixup::iterator fit=atom->fixupsBegin(), end=atom->fixupsEnd(); fit != end; ++fit) {
4743                 const ld::Atom* targetAtom = NULL;
4744                 switch ( fit->binding ) {
4745                         case ld::Fixup::bindingByNameUnbound:
4746                                 // ignore reference to ___CFConstantStringClassReference
4747                                 // we are just looking for reference to backing string data
4748                                 assert(fit->offsetInAtom == 0);
4749                                 assert(strcmp(fit->u.name, "___CFConstantStringClassReference") == 0);
4750                                 break;
4751                         case ld::Fixup::bindingDirectlyBound:
4752                         case ld::Fixup::bindingByContentBound:
4753                                 targetAtom = fit->u.target;
4754                                 break;
4755                         case ld::Fixup::bindingsIndirectlyBound:
4756                                 targetAtom = ind.indirectAtom(fit->u.bindingIndex);
4757                                 break;
4758                         default:
4759                                 assert(0 && "bad binding type");
4760                 }
4761                 assert(targetAtom != NULL);
4762                 const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
4763                 if ( targetAtom->section().type() == ld::Section::typeCString ) {
4764                         *ct = contentUTF8;
4765                         *count = targetAtom->size();
4766                 }
4767                 else if ( targetAtom->section().type() == ld::Section::typeUTF16Strings ) {
4768                         *ct = contentUTF16;
4769                         *count = (targetAtom->size()+1)/2; // round up incase of buggy compiler that has only one trailing zero byte
4770                 }
4771                 assert(target !=  NULL);
4772                 return target->contentPointer();
4773         }
4774         assert(0);
4775         return NULL;
4776 }
4777
4778 template <typename A>
4779 unsigned long CFStringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4780 {
4781         // base hash of CFString on hash of cstring it wraps
4782         ContentType cType;
4783         unsigned long hash;
4784         unsigned int charCount;
4785         const uint8_t* content = this->targetContent(atom, ind, &cType, &charCount);
4786         switch ( cType ) {
4787                 case contentUTF8:
4788                         hash = 9408;
4789                         for (const char* s = (char*)content; *s != '\0'; ++s) {
4790                                 hash = hash * 33 + *s;
4791                         }
4792                         return hash;
4793                 case contentUTF16:
4794                         hash = 407955;
4795                         --charCount; // don't add last 0x0000 to hash because some buggy compilers only have trailing single byte
4796                         for (const uint16_t* s = (uint16_t*)content; charCount > 0; ++s, --charCount) {
4797                                 hash = hash * 1025 + *s;
4798                         }
4799                         return hash;
4800                 case contentUnknown:
4801                         return 0;
4802         }
4803         return 0;
4804 }
4805
4806
4807 template <typename A>
4808 bool CFStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4809                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
4810 {
4811         if ( atom == &rhs )
4812                 return true;
4813         if ( rhs.section().type() != ld::Section::typeCFString)
4814                 return false;
4815         assert(this->type() == rhs.section().type());
4816         assert(strcmp(this->sectionName(), "__cfstring") == 0);
4817
4818         ContentType thisType;
4819         unsigned int charCount;
4820         const uint8_t* cstringContent = this->targetContent(atom, indirectBindingTable, &thisType, &charCount);
4821         ContentType rhsType;
4822         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4823         assert(rhsAtom !=  NULL);
4824         unsigned int rhsCharCount;
4825         const uint8_t* rhsStringContent = this->targetContent(rhsAtom, indirectBindingTable, &rhsType, &rhsCharCount);
4826
4827         if ( thisType != rhsType )
4828                 return false;
4829
4830         // no need to compare content of pointers are already the same
4831         if ( cstringContent == rhsStringContent )
4832                 return true;
4833
4834         // no need to compare content if size is different
4835         if ( charCount != rhsCharCount )
4836                 return false;
4837
4838         switch ( thisType ) {
4839                 case contentUTF8:
4840                         return (strcmp((char*)cstringContent, (char*)rhsStringContent) == 0);
4841                 case contentUTF16:
4842                         {
4843                                 const uint16_t* cstringContent16 = (uint16_t*)cstringContent;
4844                                 const uint16_t* rhsStringContent16 = (uint16_t*)rhsStringContent;
4845                                 for (unsigned int i = 0; i < charCount; ++i) {
4846                                         if ( cstringContent16[i] != rhsStringContent16[i] )
4847                                                 return false;
4848                                 }
4849                                 return true;
4850                         }
4851                 case contentUnknown:
4852                         return false;
4853         }
4854         return false;
4855 }
4856
4857
4858 template <typename A>
4859 typename A::P::uint_t ObjC1ClassSection<A>::elementSizeAtAddress(pint_t addr)
4860 {
4861         // nominal size for each class is 48 bytes, but sometimes the compiler
4862         // over aligns and there is padding after class data
4863         const macho_section<P>* sct = this->machoSection();
4864         uint32_t align = 1 << sct->align();
4865         uint32_t size = ((12 * sizeof(pint_t)) + align-1) & (-align);
4866         return size;
4867 }
4868
4869 template <typename A>
4870 const char* ObjC1ClassSection<A>::unlabeledAtomName(Parser<A>& parser, pint_t addr)
4871 {
4872         // 8-bytes into class object is pointer to class name
4873         const macho_section<P>* sct = this->machoSection();
4874         uint32_t classObjcFileOffset = sct->offset() - sct->addr() + addr;
4875         const uint8_t* mappedFileContent = this->file().fileContent();
4876         pint_t nameAddr = P::getP(*((pint_t*)(mappedFileContent+classObjcFileOffset+2*sizeof(pint_t))));
4877
4878         // find section containing string address to get string bytes
4879         const macho_section<P>* const sections = parser.firstMachOSection();
4880         const uint32_t sectionCount = parser.machOSectionCount();
4881         for (uint32_t i=0; i < sectionCount; ++i) {
4882                 const macho_section<P>* aSect = &sections[i];
4883                 if ( (aSect->addr() <= nameAddr) && (nameAddr < (aSect->addr()+aSect->size())) ) {
4884                         assert((aSect->flags() & SECTION_TYPE) == S_CSTRING_LITERALS);
4885                         uint32_t nameFileOffset = aSect->offset() - aSect->addr() + nameAddr;
4886                         const char* name = (char*)mappedFileContent + nameFileOffset;
4887                         // spin through symbol table to find absolute symbol corresponding to this class
4888                         for (uint32_t s=0; s < parser.symbolCount(); ++s) {
4889                                 const macho_nlist<P>& sym =     parser.symbolFromIndex(s);
4890                                 if ( (sym.n_type() & N_TYPE) != N_ABS )
4891                                         continue;
4892                                 const char* absName = parser.nameFromSymbol(sym);
4893                                 if ( strncmp(absName, ".objc_class_name_", 17) == 0 ) {
4894                                         if ( strcmp(&absName[17], name) == 0 )
4895                                                 return absName;
4896                                 }
4897                         }
4898                         assert(0 && "obj class name not found in symbol table");
4899                 }
4900         }
4901         assert(0 && "obj class name not found");
4902         return "unknown objc class";
4903 }
4904
4905
4906 template <typename A>
4907 const char* ObjC2ClassRefsSection<A>::targetClassName(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4908 {
4909         assert(atom->fixupCount() == 1);
4910         ld::Fixup::iterator fit = atom->fixupsBegin();
4911         const char* className = NULL;
4912         switch ( fit->binding ) {
4913                 case ld::Fixup::bindingByNameUnbound:
4914                         className = fit->u.name;
4915                         break;
4916                 case ld::Fixup::bindingDirectlyBound:
4917                 case ld::Fixup::bindingByContentBound:
4918                         className = fit->u.target->name();
4919                         break;
4920                 case ld::Fixup::bindingsIndirectlyBound:
4921                         className = ind.indirectName(fit->u.bindingIndex);
4922                         break;
4923                 default:
4924                         assert(0 && "unsupported binding in objc2 class ref section");
4925         }
4926         assert(className != NULL);
4927         return className;
4928 }
4929
4930
4931 template <typename A>
4932 unsigned long ObjC2ClassRefsSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4933 {
4934         unsigned long hash = 978;
4935         for (const char* s = targetClassName(atom, ind); *s != '\0'; ++s) {
4936                 hash = hash * 33 + *s;
4937         }
4938         return hash;
4939 }
4940
4941 template <typename A>
4942 bool ObjC2ClassRefsSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
4943                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
4944 {
4945         assert(this->type() == rhs.section().type());
4946         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
4947         assert(rhsAtom !=  NULL);
4948         const char* thisClassName = targetClassName(atom, indirectBindingTable);
4949         const char* rhsClassName = targetClassName(rhsAtom, indirectBindingTable);
4950         return (strcmp(thisClassName, rhsClassName) == 0);
4951 }
4952
4953
4954 template <typename A>
4955 const char* Objc1ClassReferences<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4956 {
4957         assert(atom->fixupCount() == 2);
4958         ld::Fixup::iterator fit = atom->fixupsBegin();
4959         if ( fit->kind == ld::Fixup::kindSetTargetAddress )
4960                 ++fit;
4961         const ld::Atom* targetAtom = NULL;
4962         switch ( fit->binding ) {
4963                 case ld::Fixup::bindingByContentBound:
4964                         targetAtom = fit->u.target;
4965                         break;
4966                 case ld::Fixup::bindingsIndirectlyBound:
4967                         targetAtom = ind.indirectAtom(fit->u.bindingIndex);
4968                         if ( targetAtom == NULL ) {
4969                                 fprintf(stderr, "missing target named %s\n", ind.indirectName(fit->u.bindingIndex));
4970                         }
4971                         break;
4972                 default:
4973                         assert(0);
4974         }
4975         assert(targetAtom != NULL);
4976         const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
4977         assert(target !=  NULL);
4978         return (char*)target->contentPointer();
4979 }
4980
4981
4982 template <typename A>
4983 const char* PointerToCStringSection<A>::targetCString(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
4984 {
4985         assert(atom->fixupCount() == 1);
4986         ld::Fixup::iterator fit = atom->fixupsBegin();
4987         const ld::Atom* targetAtom = NULL;
4988         switch ( fit->binding ) {
4989                 case ld::Fixup::bindingByContentBound:
4990                         targetAtom = fit->u.target;
4991                         break;
4992                 case ld::Fixup::bindingsIndirectlyBound:
4993                         targetAtom = ind.indirectAtom(fit->u.bindingIndex);
4994                         break;
4995                 default:
4996                         assert(0);
4997         }
4998         assert(targetAtom != NULL);
4999         const Atom<A>* target = dynamic_cast<const Atom<A>*>(targetAtom);
5000         assert(target !=  NULL);
5001         return (char*)target->contentPointer();
5002 }
5003
5004 template <typename A>
5005 unsigned long PointerToCStringSection<A>::contentHash(const class Atom<A>* atom,
5006                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
5007 {
5008         // make hash from section name and target cstring name
5009         unsigned long hash = 123;
5010         for (const char* s = this->sectionName(); *s != '\0'; ++s) {
5011                 hash = hash * 33 + *s;
5012         }
5013         for (const char* s = this->targetCString(atom, indirectBindingTable); *s != '\0'; ++s) {
5014                 hash = hash * 33 + *s;
5015         }
5016         return hash;
5017 }
5018
5019 template <typename A>
5020 bool PointerToCStringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5021                                                                                                         const ld::IndirectBindingTable& indirectBindingTable) const
5022 {
5023         assert(this->type() == rhs.section().type());
5024         // there can be pointers-to-cstrings in different section names
5025         // we only want to coalesce in same section name
5026         if ( *this != rhs.section() )
5027                 return false;
5028
5029         // get string content for this
5030         const char* cstringContent = this->targetCString(atom, indirectBindingTable);
5031         const Atom<A>* rhsAtom = dynamic_cast<const Atom<A>*>(&rhs);
5032         assert(rhsAtom !=  NULL);
5033         const char* rhsCstringContent = this->targetCString(rhsAtom, indirectBindingTable);
5034
5035         assert(cstringContent != NULL);
5036         assert(rhsCstringContent != NULL);
5037         return (strcmp(cstringContent, rhsCstringContent) == 0);
5038 }
5039
5040
5041
5042 template <typename A>
5043 unsigned long UTF16StringSection<A>::contentHash(const class Atom<A>* atom, const ld::IndirectBindingTable& ind) const
5044 {
5045         unsigned long hash = 5381;
5046         const uint16_t* stringContent = (uint16_t*)atom->contentPointer();
5047         // some buggy compilers end utf16 data with single byte, so don't use last word in hash computation
5048         unsigned int count = (atom->size()/2) - 1;
5049         for (const uint16_t* s = stringContent; count > 0; ++s, --count) {
5050                 hash = hash * 33 + *s;
5051         }
5052         return hash;
5053 }
5054
5055 template <typename A>
5056 bool UTF16StringSection<A>::canCoalesceWith(const class Atom<A>* atom, const ld::Atom& rhs,
5057                                                                                                         const ld::IndirectBindingTable& ind) const
5058 {
5059         if ( rhs.section().type() != ld::Section::typeUTF16Strings )
5060                 return false;
5061         assert(0);
5062         return false;
5063 }
5064
5065
5066
5067
5068
5069
5070
5071 template <>
5072 uint32_t Section<x86_64>::x86_64PcRelOffset(uint8_t r_type)
5073 {
5074         switch ( r_type ) {
5075                 case X86_64_RELOC_SIGNED:
5076                         return 4;
5077                 case X86_64_RELOC_SIGNED_1:
5078                         return 5;
5079                 case X86_64_RELOC_SIGNED_2:
5080                         return 6;
5081                 case X86_64_RELOC_SIGNED_4:
5082                         return 8;
5083         }
5084         return 0;
5085 }
5086
5087
5088 template <>
5089 bool Section<x86_64>::addRelocFixup(class Parser<x86_64>& parser, const macho_relocation_info<P>* reloc)
5090 {
5091         const macho_section<P>* sect = this->machoSection();
5092         uint64_t srcAddr = sect->addr() + reloc->r_address();
5093         Parser<x86_64>::SourceLocation  src;
5094         Parser<x86_64>::TargetDesc              target;
5095         Parser<x86_64>::TargetDesc              toTarget;
5096         src.atom = this->findAtomByAddress(srcAddr);
5097         src.offsetInAtom = srcAddr - src.atom->_objAddress;
5098         const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
5099         uint64_t contentValue = 0;
5100         const macho_relocation_info<x86_64::P>* nextReloc = &reloc[1];
5101         bool result = false;
5102         bool useDirectBinding;
5103         switch ( reloc->r_length() ) {
5104                 case 0:
5105                         contentValue = *fixUpPtr;
5106                         break;
5107                 case 1:
5108                         contentValue = (int64_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
5109                         break;
5110                 case 2:
5111                         contentValue = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
5112                         break;
5113                 case 3:
5114                         contentValue = E::get64(*((uint64_t*)fixUpPtr));
5115                         break;
5116         }
5117         target.atom = NULL;
5118         target.name = NULL;
5119         target.weakImport = false;
5120         target.addend = 0;
5121         if ( reloc->r_extern() ) {
5122                 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
5123                 // use direct reference for local symbols
5124                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5125                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5126                         target.addend += contentValue;
5127                 }
5128                 else {
5129                         target.name = parser.nameFromSymbol(sym);
5130                         target.weakImport = parser.weakImportFromSymbol(sym);
5131                         target.addend = contentValue;
5132                 }
5133                 // cfstrings should always use direct reference to backing store
5134                 if ( (this->type() == ld::Section::typeCFString) && (src.offsetInAtom != 0) ) {
5135                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5136                         target.addend = contentValue;
5137                 }
5138         }
5139         else {
5140                 if ( reloc->r_pcrel()  )
5141                         contentValue += srcAddr + x86_64PcRelOffset(reloc->r_type());
5142                 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5143         }
5144         switch ( reloc->r_type() ) {
5145                 case X86_64_RELOC_UNSIGNED:
5146                         if ( reloc->r_pcrel() )
5147                                 throw "pcrel and X86_64_RELOC_UNSIGNED not supported";
5148                         switch ( reloc->r_length() ) {
5149                                 case 0:
5150                                 case 1:
5151                                         throw "length < 2 and X86_64_RELOC_UNSIGNED not supported";
5152                                 case 2:
5153                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5154                                         break;
5155                                 case 3:
5156                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian64, target);
5157                                         break;
5158                         }
5159                         break;
5160                 case X86_64_RELOC_SIGNED:
5161                 case X86_64_RELOC_SIGNED_1:
5162                 case X86_64_RELOC_SIGNED_2:
5163                 case X86_64_RELOC_SIGNED_4:
5164                         if ( ! reloc->r_pcrel() )
5165                                 throw "not pcrel and X86_64_RELOC_SIGNED* not supported";
5166                         if ( reloc->r_length() != 2 )
5167                                 throw "length != 2 and X86_64_RELOC_SIGNED* not supported";
5168                         switch ( reloc->r_type() ) {
5169                                 case X86_64_RELOC_SIGNED:
5170                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
5171                                         break;
5172                                 case X86_64_RELOC_SIGNED_1:
5173                                         if ( reloc->r_extern() )
5174                                                 target.addend += 1;
5175                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_1, target);
5176                                         break;
5177                                 case X86_64_RELOC_SIGNED_2:
5178                                         if ( reloc->r_extern() )
5179                                                 target.addend += 2;
5180                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_2, target);
5181                                         break;
5182                                 case X86_64_RELOC_SIGNED_4:
5183                                         if ( reloc->r_extern() )
5184                                                 target.addend += 4;
5185                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32_4, target);
5186                                         break;
5187                         }
5188                         break;
5189                 case X86_64_RELOC_BRANCH:
5190                         if ( ! reloc->r_pcrel() )
5191                                 throw "not pcrel and X86_64_RELOC_BRANCH not supported";
5192                         switch ( reloc->r_length() ) {
5193                                 case 2:
5194                                         if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5195                                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
5196                                                 parser.addDtraceExtraInfos(src, &target.name[16]);
5197                                         }
5198                                         else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5199                                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
5200                                                 parser.addDtraceExtraInfos(src, &target.name[20]);
5201                                         }
5202                                         else {
5203                                                 parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel32, target);
5204                                         }
5205                                         break;
5206                                 case 0:
5207                                         parser.addFixups(src, ld::Fixup::kindStoreX86BranchPCRel8, target);
5208                                         break;
5209                                 default:
5210                                         throwf("length=%d and X86_64_RELOC_BRANCH not supported", reloc->r_length());
5211                         }
5212                         break;
5213                 case X86_64_RELOC_GOT:
5214                         if ( ! reloc->r_extern() )
5215                                 throw "not extern and X86_64_RELOC_GOT not supported";
5216                         if ( ! reloc->r_pcrel() )
5217                                 throw "not pcrel and X86_64_RELOC_GOT not supported";
5218                         if ( reloc->r_length() != 2 )
5219                                 throw "length != 2 and X86_64_RELOC_GOT not supported";
5220                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOT, target);
5221                         break;
5222                 case X86_64_RELOC_GOT_LOAD:
5223                         if ( ! reloc->r_extern() )
5224                                 throw "not extern and X86_64_RELOC_GOT_LOAD not supported";
5225                         if ( ! reloc->r_pcrel() )
5226                                 throw "not pcrel and X86_64_RELOC_GOT_LOAD not supported";
5227                         if ( reloc->r_length() != 2 )
5228                                 throw "length != 2 and X86_64_RELOC_GOT_LOAD not supported";
5229                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32GOTLoad, target);
5230                         break;
5231                 case X86_64_RELOC_SUBTRACTOR:
5232                         if ( reloc->r_pcrel() )
5233                                 throw "X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5234                         if ( reloc->r_length() < 2 )
5235                                 throw "X86_64_RELOC_SUBTRACTOR must have r_length of 2 or 3";
5236                         if ( !reloc->r_extern() )
5237                                 throw "X86_64_RELOC_SUBTRACTOR must have r_extern=1";
5238                         if ( nextReloc->r_type() != X86_64_RELOC_UNSIGNED )
5239                                 throw "X86_64_RELOC_SUBTRACTOR must be followed by X86_64_RELOC_UNSIGNED";
5240                         result = true;
5241                         if ( nextReloc->r_pcrel() )
5242                                 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR cannot be pc-relative";
5243                         if ( nextReloc->r_length() != reloc->r_length() )
5244                                 throw "X86_64_RELOC_UNSIGNED following a X86_64_RELOC_SUBTRACTOR must have same r_length";
5245                         if ( nextReloc->r_extern() ) {
5246                                 const macho_nlist<P>& sym = parser.symbolFromIndex(nextReloc->r_symbolnum());
5247                                 // use direct reference for local symbols
5248                                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && (((sym.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(sym)[0] == 'L')) ) {
5249                                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), toTarget);
5250                                         toTarget.addend = contentValue;
5251                                         useDirectBinding = true;
5252                                 }
5253                                 else {
5254                                         toTarget.name = parser.nameFromSymbol(sym);
5255                                         toTarget.weakImport = parser.weakImportFromSymbol(sym);
5256                                         toTarget.addend = contentValue;
5257                                         useDirectBinding = false;
5258                                 }
5259                         }
5260                         else {
5261                                 parser.findTargetFromAddressAndSectionNum(contentValue, nextReloc->r_symbolnum(), toTarget);
5262                                 useDirectBinding = (toTarget.atom->scope() == ld::Atom::scopeTranslationUnit);
5263                         }
5264                         if ( useDirectBinding )
5265                                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.atom);
5266                         else
5267                                 parser.addFixup(src, ld::Fixup::k1of4, ld::Fixup::kindSetTargetAddress, toTarget.weakImport, toTarget.name);
5268                         parser.addFixup(src, ld::Fixup::k2of4, ld::Fixup::kindAddAddend, toTarget.addend);
5269                         if ( target.atom == NULL )
5270                                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, false, target.name);
5271                         else
5272                                 parser.addFixup(src, ld::Fixup::k3of4, ld::Fixup::kindSubtractTargetAddress, target.atom);
5273                         if ( reloc->r_length() == 2 )
5274                                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian32);
5275                         else
5276                                 parser.addFixup(src, ld::Fixup::k4of4, ld::Fixup::kindStoreLittleEndian64);
5277                         break;
5278                 case X86_64_RELOC_TLV:
5279                         if ( ! reloc->r_extern() )
5280                                 throw "not extern and X86_64_RELOC_TLV not supported";
5281                         if ( ! reloc->r_pcrel() )
5282                                 throw "not pcrel and X86_64_RELOC_TLV not supported";
5283                         if ( reloc->r_length() != 2 )
5284                                 throw "length != 2 and X86_64_RELOC_TLV not supported";
5285                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
5286                         break;
5287                 default:
5288                         throwf("unknown relocation type %d", reloc->r_type());
5289         }
5290         return result;
5291 }
5292
5293
5294
5295 template <>
5296 bool Section<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<P>* reloc)
5297 {
5298         const macho_section<P>* sect = this->machoSection();
5299         uint32_t srcAddr;
5300         const uint8_t* fixUpPtr;
5301         uint32_t contentValue = 0;
5302         ld::Fixup::Kind kind = ld::Fixup::kindNone;
5303         Parser<x86>::SourceLocation     src;
5304         Parser<x86>::TargetDesc         target;
5305
5306         if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
5307                 srcAddr = sect->addr() + reloc->r_address();
5308                 src.atom = this->findAtomByAddress(srcAddr);
5309                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5310                 fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
5311                 switch ( reloc->r_type() ) {
5312                 case GENERIC_RELOC_VANILLA:
5313                         switch ( reloc->r_length() ) {
5314                                 case 0:
5315                                         contentValue = (int32_t)(int8_t)*fixUpPtr;
5316                                         if ( reloc->r_pcrel() ) {
5317                                                 kind = ld::Fixup::kindStoreX86BranchPCRel8;
5318                                                 contentValue += srcAddr + sizeof(uint8_t);
5319                                         }
5320                                         else
5321                                                 throw "r_length=0 and r_pcrel=0 not supported";
5322                                         break;
5323                                 case 1:
5324                                         contentValue = (int32_t)(int16_t)E::get16(*((uint16_t*)fixUpPtr));
5325                                         if ( reloc->r_pcrel() ) {
5326                                                 kind = ld::Fixup::kindStoreX86PCRel16;
5327                                                 contentValue += srcAddr + sizeof(uint16_t);
5328                                         }
5329                                         else
5330                                                 kind = ld::Fixup::kindStoreLittleEndian16;
5331                                         break;
5332                                 case 2:
5333                                         contentValue = E::get32(*((uint32_t*)fixUpPtr));
5334                                         if ( reloc->r_pcrel() ) {
5335                                                 kind = ld::Fixup::kindStoreX86BranchPCRel32;
5336                                                 contentValue += srcAddr + sizeof(uint32_t);
5337                                         }
5338                                         else
5339                                                 kind = ld::Fixup::kindStoreLittleEndian32;
5340                                         break;
5341                                 case 3:
5342                                         throw "r_length=3 not supported";
5343                         }
5344                         if ( reloc->r_extern() ) {
5345                                 target.atom = NULL;
5346                                 const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
5347                                 target.name = parser.nameFromSymbol(targetSymbol);
5348                                 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
5349                                 target.addend = (int32_t)contentValue;
5350                         }
5351                         else {
5352                                 parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5353                         }
5354                         if ( (kind == ld::Fixup::kindStoreX86BranchPCRel32) && (target.name != NULL) ) {
5355                                 if ( strncmp(target.name, "___dtrace_probe$", 16) == 0 ) {
5356                                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceCallSiteNop, false, target.name);
5357                                         parser.addDtraceExtraInfos(src, &target.name[16]);
5358                                         return false;
5359                                 }
5360                                 else if ( strncmp(target.name, "___dtrace_isenabled$", 20) == 0 ) {
5361                                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindStoreX86DtraceIsEnableSiteClear, false, target.name);
5362                                         parser.addDtraceExtraInfos(src, &target.name[20]);
5363                                         return false;
5364                                 }
5365                         }
5366                         parser.addFixups(src, kind, target);
5367                         return false;
5368                         break;
5369                 case GENERIC_RLEOC_TLV:
5370                         {
5371                                 if ( !reloc->r_extern() )
5372                                         throw "r_extern=0 and r_type=GENERIC_RLEOC_TLV not supported";
5373                                 if ( reloc->r_length() != 2 )
5374                                         throw "r_length!=2 and r_type=GENERIC_RLEOC_TLV not supported";
5375                                 const macho_nlist<P>& sym = parser.symbolFromIndex(reloc->r_symbolnum());
5376                                 // use direct reference for local symbols
5377                                 if ( ((sym.n_type() & N_TYPE) == N_SECT) && ((sym.n_type() & N_EXT) == 0) ) {
5378                                         parser.findTargetFromAddressAndSectionNum(sym.n_value(), sym.n_sect(), target);
5379                                 }
5380                                 else {
5381                                         target.atom = NULL;
5382                                         target.name = parser.nameFromSymbol(sym);
5383                                         target.weakImport = parser.weakImportFromSymbol(sym);
5384                                 }
5385                                 target.addend = (int64_t)(int32_t)E::get32(*((uint32_t*)fixUpPtr));
5386                                 if ( reloc->r_pcrel() ) {
5387                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32TLVLoad, target);
5388                                 }
5389                                 else {
5390                                         parser.addFixups(src, ld::Fixup::kindStoreX86Abs32TLVLoad, target);
5391                                 }
5392                                 return false;
5393                         }
5394                         break;
5395                 default:
5396                         throwf("unsupported i386 relocation type (%d)", reloc->r_type());
5397                 }
5398         }
5399         else {
5400                 // scattered relocation
5401                 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
5402                 srcAddr = sect->addr() + sreloc->r_address();
5403                 src.atom = this->findAtomByAddress(srcAddr);
5404                 assert(src.atom != NULL);
5405                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5406                 fixUpPtr = file().fileContent() + sect->offset() + sreloc->r_address();
5407                 uint32_t relocValue = sreloc->r_value();
5408                 bool result = false;
5409                 // file format allows pair to be scattered or not
5410                 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
5411                 const macho_relocation_info<P>* nextReloc = &reloc[1];
5412                 bool nextRelocIsPair = false;
5413                 uint32_t nextRelocAddress = 0;
5414                 uint32_t nextRelocValue = 0;
5415                 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
5416                         if ( nextReloc->r_type() == GENERIC_RELOC_PAIR ) {
5417                                 nextRelocIsPair = true;
5418                                 nextRelocAddress = nextReloc->r_address();
5419                                 result = true;  // iterator should skip next reloc, since we've consumed it here
5420                         }
5421                 }
5422                 else {
5423                         if ( nextSReloc->r_type() == GENERIC_RELOC_PAIR ) {
5424                                 nextRelocIsPair = true;
5425                                 nextRelocAddress = nextSReloc->r_address();
5426                                 nextRelocValue = nextSReloc->r_value();
5427                         }
5428                 }
5429                 switch (sreloc->r_type()) {
5430                         case GENERIC_RELOC_VANILLA:
5431                                 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
5432                                 target.atom = parser.findAtomByAddress(relocValue);
5433                                 if ( sreloc->r_pcrel() ) {
5434                                         switch ( sreloc->r_length() ) {
5435                                                 case 0:
5436                                                         contentValue = srcAddr + 1 + *fixUpPtr;
5437                                                         target.addend = (int32_t)contentValue - (int32_t)relocValue;
5438                                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel8, target);
5439                                                         break;
5440                                                 case 1:
5441                                                         contentValue = srcAddr + 2 + LittleEndian::get16(*((uint16_t*)fixUpPtr));
5442                                                         target.addend = (int32_t)contentValue - (int32_t)relocValue;
5443                                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel16, target);
5444                                                         break;
5445                                                 case 2:
5446                                                         contentValue = srcAddr + 4 + LittleEndian::get32(*((uint32_t*)fixUpPtr));
5447                                                         target.addend = (int32_t)contentValue - (int32_t)relocValue;
5448                                                         parser.addFixups(src, ld::Fixup::kindStoreX86PCRel32, target);
5449                                                         break;
5450                                                 case 3:
5451                                                         throw "unsupported r_length=3 for scattered pc-rel vanilla reloc";
5452                                                         break;
5453                                         }
5454                                 }
5455                                 else {
5456                                         if ( sreloc->r_length() != 2 )
5457                                                 throwf("unsupported r_length=%d for scattered vanilla reloc", sreloc->r_length());
5458                                         contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
5459                                         target.addend = (int32_t)contentValue - (int32_t)(target.atom->objectAddress());
5460                                         parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5461                                 }
5462                                 break;
5463                         case GENERIC_RELOC_SECTDIFF:
5464                         case GENERIC_RELOC_LOCAL_SECTDIFF:
5465                                 {
5466                                         if ( !nextRelocIsPair )
5467                                                 throw "GENERIC_RELOC_SECTDIFF missing following pair";
5468                                         switch ( sreloc->r_length() ) {
5469                                                 case 0:
5470                                                 case 3:
5471                                                         throw "bad length for GENERIC_RELOC_SECTDIFF";
5472                                                 case 1:
5473                                                         contentValue = (int32_t)(int16_t)LittleEndian::get16(*((uint16_t*)fixUpPtr));
5474                                                         kind = ld::Fixup::kindStoreLittleEndian16;
5475                                                         break;
5476                                                 case 2:
5477                                                         contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
5478                                                         kind = ld::Fixup::kindStoreLittleEndian32;
5479                                                         break;
5480                                         }
5481                                         Atom<x86>* fromAtom  = parser.findAtomByAddress(nextRelocValue);
5482                                         uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
5483                                         parser.findTargetFromAddress(sreloc->r_value(), target);
5484                                         // check for addend encoded in the section content
5485                                         int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
5486                                         if ( addend < 0 ) {
5487                                                 // switch binding base on coalescing
5488                                                 if ( target.atom == NULL ) {
5489                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
5490                                                 }
5491                                                 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
5492                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
5493                                                 }
5494                                                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
5495                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
5496                                                 }
5497                                                 else {
5498                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
5499                                                 }
5500                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend);
5501                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5502                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
5503                                                 parser.addFixup(src, ld::Fixup::k5of5, kind);
5504                                         }
5505                                         else {
5506                                                 // switch binding base on coalescing
5507                                                 if ( target.atom == NULL ) {
5508                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.name);
5509                                                 }
5510                                                 else if ( target.atom->scope() == ld::Atom::scopeTranslationUnit ) {
5511                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, target.atom);
5512                                                 }
5513                                                 else if ( (target.atom->combine() == ld::Atom::combineByNameAndContent) || (target.atom->combine() == ld::Atom::combineByNameAndReferences) ) {
5514                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, target.atom);
5515                                                 }
5516                                                 else {
5517                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, target.atom->name());
5518                                                 }
5519                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, target.addend+addend);
5520                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5521                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
5522                                                 parser.addFixup(src, ld::Fixup::k5of5, kind);
5523                                         }
5524                                 }
5525                                 break;
5526                 }
5527                 return result;
5528         }
5529 }
5530
5531
5532
5533
5534
5535 template <>
5536 bool Section<arm>::addRelocFixup(class Parser<arm>& parser, const macho_relocation_info<P>* reloc)
5537 {
5538         const macho_section<P>* sect = this->machoSection();
5539         bool result = false;
5540         uint32_t srcAddr;
5541         uint32_t dstAddr;
5542         uint32_t* fixUpPtr;
5543         int32_t displacement = 0;
5544         uint32_t instruction = 0;
5545         pint_t contentValue = 0;
5546         Parser<arm>::SourceLocation     src;
5547         Parser<arm>::TargetDesc         target;
5548         const macho_relocation_info<P>* nextReloc;
5549
5550         if ( (reloc->r_address() & R_SCATTERED) == 0 ) {
5551                 bool externSymbolIsThumbDef = false;
5552                 srcAddr = sect->addr() + reloc->r_address();
5553                 src.atom = this->findAtomByAddress(srcAddr);
5554                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5555                 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + reloc->r_address());
5556                 if ( reloc->r_type() != ARM_RELOC_PAIR )
5557                         instruction = LittleEndian::get32(*fixUpPtr);
5558                 if ( reloc->r_extern() ) {
5559                         const macho_nlist<P>& targetSymbol = parser.symbolFromIndex(reloc->r_symbolnum());
5560                         // use direct reference for local symbols
5561                         if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) && (((targetSymbol.n_type() & N_EXT) == 0) || (parser.nameFromSymbol(targetSymbol)[0] == 'L')) ) {
5562                                 parser.findTargetFromAddressAndSectionNum(targetSymbol.n_value(), targetSymbol.n_sect(), target);
5563                         }
5564                         else {
5565                                 target.atom = NULL;
5566                                 target.name = parser.nameFromSymbol(targetSymbol);
5567                                 target.weakImport = parser.weakImportFromSymbol(targetSymbol);
5568                                 if ( ((targetSymbol.n_type() & N_TYPE) == N_SECT) &&  (targetSymbol.n_desc() & N_ARM_THUMB_DEF) )
5569                                         externSymbolIsThumbDef = true;
5570                         }
5571                 }
5572                 switch ( reloc->r_type() ) {
5573                         case ARM_RELOC_BR24:
5574                                 // Sign-extend displacement
5575                                 displacement = (instruction & 0x00FFFFFF) << 2;
5576                                 if ( (displacement & 0x02000000) != 0 )
5577                                         displacement |= 0xFC000000;
5578                                 // The pc added will be +8 from the pc
5579                                 displacement += 8;
5580                                 // If this is BLX add H << 1
5581                                 if ((instruction & 0xFE000000) == 0xFA000000)
5582                                         displacement += ((instruction & 0x01000000) >> 23);
5583                                 if ( reloc->r_extern() ) {
5584                                         target.addend = srcAddr + displacement;
5585                                         if ( externSymbolIsThumbDef )
5586                                                 target.addend &= -2; // remove thumb bit
5587                                 }
5588                                 else {
5589                                         dstAddr = srcAddr + displacement;
5590                                         parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
5591                                 }
5592                                 // special case "calls" for dtrace
5593                                 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5594                                         parser.addFixup(src, ld::Fixup::k1of1,
5595                                                                                                                         ld::Fixup::kindStoreARMDtraceCallSiteNop, false, target.name);
5596                                         parser.addDtraceExtraInfos(src, &target.name[16]);
5597                                 }
5598                                 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5599                                         parser.addFixup(src, ld::Fixup::k1of1,
5600                                                                                                                         ld::Fixup::kindStoreARMDtraceIsEnableSiteClear, false, target.name);
5601                                         parser.addDtraceExtraInfos(src, &target.name[20]);
5602                                 }
5603                                 else {
5604                                         parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
5605                                 }
5606                                 break;
5607                         case ARM_THUMB_RELOC_BR22:
5608                                 // thumb2 added two more bits to displacement, complicating the displacement decoding
5609                                 {
5610                                         uint32_t s = (instruction >> 10) & 0x1;
5611                                         uint32_t j1 = (instruction >> 29) & 0x1;
5612                                         uint32_t j2 = (instruction >> 27) & 0x1;
5613                                         uint32_t imm10 = instruction & 0x3FF;
5614                                         uint32_t imm11 = (instruction >> 16) & 0x7FF;
5615                                         uint32_t i1 = (j1 == s);
5616                                         uint32_t i2 = (j2 == s);
5617                                         uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
5618                                         int32_t sdis = dis;
5619                                         if ( s )
5620                                                 sdis |= 0xFE000000;
5621                                         displacement = sdis;
5622                                 }
5623                                 // The pc added will be +4 from the pc
5624                                 displacement += 4;
5625                                 // If the instruction was blx, force the low 2 bits to be clear
5626                                 dstAddr = srcAddr + displacement;
5627                                 if ((instruction & 0xF8000000) == 0xE8000000)
5628                                         dstAddr &= 0xFFFFFFFC;
5629
5630                                 if ( reloc->r_extern() ) {
5631                                         target.addend = dstAddr;
5632                                 }
5633                                 else {
5634                                         parser.findTargetFromAddressAndSectionNum(dstAddr, reloc->r_symbolnum(), target);
5635                                 }
5636                                 // special case "calls" for dtrace
5637                                 if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_probe$", 16) == 0) ) {
5638                                         parser.addFixup(src, ld::Fixup::k1of1,
5639                                                                                                                         ld::Fixup::kindStoreThumbDtraceCallSiteNop, false, target.name);
5640                                         parser.addDtraceExtraInfos(src, &target.name[16]);
5641                                 }
5642                                 else if ( (target.name != NULL) && (strncmp(target.name, "___dtrace_isenabled$", 20) == 0) ) {
5643                                         parser.addFixup(src, ld::Fixup::k1of1,
5644                                                                                                                         ld::Fixup::kindStoreThumbDtraceIsEnableSiteClear, false, target.name);
5645                                         parser.addDtraceExtraInfos(src, &target.name[20]);
5646                                 }
5647                                 else {
5648                                         parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
5649                                 }
5650                                 break;
5651                         case ARM_RELOC_VANILLA:
5652                                 if ( reloc->r_length() != 2 )
5653                                         throw "bad length for ARM_RELOC_VANILLA";
5654                                 contentValue = LittleEndian::get32(*fixUpPtr);
5655                                 if ( reloc->r_extern() ) {
5656                                         target.addend = (int32_t)contentValue;
5657                                         if ( externSymbolIsThumbDef )
5658                                                 target.addend &= -2; // remove thumb bit
5659                                 }
5660                                 else {
5661                                         parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), target);
5662                                         // possible non-extern relocation turned into by-name ref because target is a weak-def
5663                                         if ( target.atom != NULL ) {
5664                                                 if ( target.atom->isThumb() )
5665                                                         target.addend &= -2; // remove thumb bit
5666                                                 // if reference to LSDA, add group subordinate fixup
5667                                                 if ( target.atom->contentType() == ld::Atom::typeLSDA ) {
5668                                                         Parser<arm>::SourceLocation     src2;
5669                                                         src2.atom = src.atom;
5670                                                         src2.offsetInAtom = 0;
5671                                                         parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, target.atom);
5672                                                 }
5673                                         }
5674                                 }
5675                                 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5676                                 break;
5677                         case ARM_THUMB_32BIT_BRANCH:
5678                                 // silently ignore old unnecessary reloc
5679                                 break;
5680                         case ARM_RELOC_HALF:
5681                                 nextReloc = &reloc[1];
5682                                 if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
5683                                         uint32_t instruction16;
5684                                         uint32_t other16 = (nextReloc->r_address() & 0xFFFF);
5685                                         bool isThumb;
5686                                         if ( reloc->r_length() & 2 ) {
5687                                                 isThumb = true;
5688                                                 uint32_t i =    ((instruction & 0x00000400) >> 10);
5689                                                 uint32_t imm4 =  (instruction & 0x0000000F);
5690                                                 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
5691                                                 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
5692                                                 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
5693                                         }
5694                                         else {
5695                                                 isThumb = false;
5696                                                 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
5697                                                 uint32_t imm12 = (instruction & 0x00000FFF);
5698                                                 instruction16 = (imm4 << 12) | imm12;
5699                                         }
5700                                         if ( reloc->r_length() & 1 ) {
5701                                                 // high 16
5702                                                 dstAddr = ((instruction16 << 16) | other16);
5703                         if ( reloc->r_extern() ) {
5704                             target.addend = dstAddr;
5705                                                         if ( externSymbolIsThumbDef )
5706                                                                 target.addend &= -2; // remove thumb bit
5707                                                 }
5708                         else {
5709                             parser.findTargetFromAddress(dstAddr, target);
5710                             if ( target.atom->isThumb() )
5711                                 target.addend &= (-2); // remove thumb bit
5712                         }
5713                                                 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16), target);
5714                                         }
5715                                         else {
5716                                                 // low 16
5717                                                 dstAddr = (other16 << 16) | instruction16;
5718                         if ( reloc->r_extern() ) {
5719                             target.addend = dstAddr;
5720                                                         if ( externSymbolIsThumbDef )
5721                                                                 target.addend &= -2; // remove thumb bit
5722                         }
5723                         else {
5724                             parser.findTargetFromAddress(dstAddr, target);
5725                             if ( target.atom->isThumb() )
5726                                 target.addend &= (-2); // remove thumb bit
5727                         }
5728                                                 parser.addFixups(src, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16), target);
5729                                         }
5730                                         result = true;
5731                                 }
5732                                 else
5733                                         throw "for ARM_RELOC_HALF, next reloc is not ARM_RELOC_PAIR";
5734                                 break;
5735                         default:
5736                                 throwf("unknown relocation type %d", reloc->r_type());
5737                                 break;
5738                 }
5739         }
5740         else {
5741                 const macho_scattered_relocation_info<P>* sreloc = (macho_scattered_relocation_info<P>*)reloc;
5742                 // file format allows pair to be scattered or not
5743                 const macho_scattered_relocation_info<P>* nextSReloc = &sreloc[1];
5744                 nextReloc = &reloc[1];
5745                 srcAddr = sect->addr() + sreloc->r_address();
5746                 dstAddr = sreloc->r_value();
5747                 fixUpPtr = (uint32_t*)(file().fileContent() + sect->offset() + sreloc->r_address());
5748                 instruction = LittleEndian::get32(*fixUpPtr);
5749                 src.atom = this->findAtomByAddress(srcAddr);
5750                 src.offsetInAtom = srcAddr - src.atom->_objAddress;
5751                 bool nextRelocIsPair = false;
5752                 uint32_t nextRelocAddress = 0;
5753                 uint32_t nextRelocValue = 0;
5754                 if ( (nextReloc->r_address() & R_SCATTERED) == 0 ) {
5755                         if ( nextReloc->r_type() == ARM_RELOC_PAIR ) {
5756                                 nextRelocIsPair = true;
5757                                 nextRelocAddress = nextReloc->r_address();
5758                                 result = true;
5759                         }
5760                 }
5761                 else {
5762                         if ( nextSReloc->r_type() == ARM_RELOC_PAIR ) {
5763                                 nextRelocIsPair = true;
5764                                 nextRelocAddress = nextSReloc->r_address();
5765                                 nextRelocValue = nextSReloc->r_value();
5766                                 result = true;
5767                         }
5768                 }
5769                 switch ( sreloc->r_type() ) {
5770                         case ARM_RELOC_VANILLA:
5771                                 // with a scattered relocation we get both the target (sreloc->r_value()) and the target+offset (*fixUpPtr)
5772                                 if ( sreloc->r_length() != 2 )
5773                                         throw "bad length for ARM_RELOC_VANILLA";
5774                                 target.atom = parser.findAtomByAddress(sreloc->r_value());
5775                                 contentValue = LittleEndian::get32(*fixUpPtr);
5776                                 target.addend = contentValue - target.atom->_objAddress;
5777                                 if ( target.atom->isThumb() )
5778                                         target.addend &= -2; // remove thumb bit
5779                                 parser.addFixups(src, ld::Fixup::kindStoreLittleEndian32, target);
5780                                 break;
5781                         case ARM_RELOC_BR24:
5782                                 // Sign-extend displacement
5783                                 displacement = (instruction & 0x00FFFFFF) << 2;
5784                                 if ( (displacement & 0x02000000) != 0 )
5785                                         displacement |= 0xFC000000;
5786                                 // The pc added will be +8 from the pc
5787                                 displacement += 8;
5788                                 // If this is BLX add H << 1
5789                                 if ((instruction & 0xFE000000) == 0xFA000000)
5790                                         displacement += ((instruction & 0x01000000) >> 23);
5791                                 target.atom = parser.findAtomByAddress(sreloc->r_value());
5792                                 target.addend = (int64_t)(srcAddr + displacement) - (int64_t)(target.atom->_objAddress);
5793                                 parser.addFixups(src, ld::Fixup::kindStoreARMBranch24, target);
5794                                 break;
5795                         case ARM_THUMB_RELOC_BR22:
5796                                 // thumb2 added two more bits to displacement, complicating the displacement decoding
5797                                 {
5798                                         uint32_t s = (instruction >> 10) & 0x1;
5799                                         uint32_t j1 = (instruction >> 29) & 0x1;
5800                                         uint32_t j2 = (instruction >> 27) & 0x1;
5801                                         uint32_t imm10 = instruction & 0x3FF;
5802                                         uint32_t imm11 = (instruction >> 16) & 0x7FF;
5803                                         uint32_t i1 = (j1 == s);
5804                                         uint32_t i2 = (j2 == s);
5805                                         uint32_t dis = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1);
5806                                         int32_t sdis = dis;
5807                                         if ( s )
5808                                                 sdis |= 0xFE000000;
5809                                         displacement = sdis;
5810                                 }
5811                                 // The pc added will be +4 from the pc
5812                                 displacement += 4;
5813                                 dstAddr = srcAddr+displacement;
5814                                 // If the instruction was blx, force the low 2 bits to be clear
5815                                 if ((instruction & 0xF8000000) == 0xE8000000)
5816                                         dstAddr &= 0xFFFFFFFC;
5817                                 target.atom = parser.findAtomByAddress(sreloc->r_value());
5818                                 target.addend = dstAddr - target.atom->_objAddress;
5819                                 parser.addFixups(src, ld::Fixup::kindStoreThumbBranch22, target);
5820                                 break;
5821                         case ARM_RELOC_SECTDIFF:
5822                         case ARM_RELOC_LOCAL_SECTDIFF:
5823                                 {
5824                                         if ( ! nextRelocIsPair )
5825                                                 throw "ARM_RELOC_SECTDIFF missing following pair";
5826                                         if ( sreloc->r_length() != 2 )
5827                                                 throw "bad length for ARM_RELOC_SECTDIFF";
5828                                         contentValue = LittleEndian::get32(*fixUpPtr);
5829                                         Atom<arm>* fromAtom  = parser.findAtomByAddress(nextRelocValue);
5830                                         uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
5831                                         uint32_t offsetInTarget;
5832                                         Atom<arm>* targetAtom = parser.findAtomByAddressOrLocalTargetOfStub(sreloc->r_value(), &offsetInTarget);
5833                                         // check for addend encoded in the section content
5834                     int64_t addend = (int32_t)contentValue - (int32_t)(sreloc->r_value() - nextRelocValue);
5835                                         if ( targetAtom->isThumb() )
5836                                                 addend &= -2; // remove thumb bit
5837                                         // if reference to LSDA, add group subordinate fixup
5838                                         if ( targetAtom->contentType() == ld::Atom::typeLSDA ) {
5839                                                 Parser<arm>::SourceLocation     src2;
5840                                                 src2.atom = src.atom;
5841                                                 src2.offsetInAtom = 0;
5842                                                 parser.addFixup(src2, ld::Fixup::k1of1, ld::Fixup::kindNoneGroupSubordinateLSDA, targetAtom);
5843                                         }
5844                                         if ( addend < 0 ) {
5845                                                 // switch binding base on coalescing
5846                                                 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5847                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
5848                                                 }
5849                                                 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5850                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5851                                                 }
5852                                                 else {
5853                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5854                                                 }
5855                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, offsetInTarget);
5856                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5857                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom-addend);
5858                                                 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
5859                                         }
5860                                         else {
5861                                                 if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5862                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
5863                                                 }
5864                                                 else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5865                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5866                                                 }
5867                                                 else {
5868                                                         parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5869                                                 }
5870                                                 parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)(offsetInTarget+addend));
5871                                                 parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5872                                                 parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
5873                                                 parser.addFixup(src, ld::Fixup::k5of5, ld::Fixup::kindStoreLittleEndian32);
5874                                         }
5875                                 }
5876                                 break;
5877                         case ARM_RELOC_HALF_SECTDIFF:
5878                                 if ( nextRelocIsPair ) {
5879                                         instruction = LittleEndian::get32(*fixUpPtr);
5880                                         Atom<arm>* fromAtom  = parser.findAtomByAddress(nextRelocValue);
5881                                         uint32_t offsetInFrom = nextRelocValue - fromAtom->_objAddress;
5882                                         Atom<arm>* targetAtom  = parser.findAtomByAddress(sreloc->r_value());
5883                                         uint32_t offsetInTarget = sreloc->r_value() - targetAtom->_objAddress;
5884                                         uint32_t instruction16;
5885                                         uint32_t other16 = (nextRelocAddress & 0xFFFF);
5886                                         bool isThumb;
5887                                         if ( sreloc->r_length() & 2 ) {
5888                                                 isThumb = true;
5889                                                 uint32_t i =    ((instruction & 0x00000400) >> 10);
5890                                                 uint32_t imm4 =  (instruction & 0x0000000F);
5891                                                 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
5892                                                 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
5893                                                 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
5894                                         }
5895                                         else {
5896                                                 isThumb = false;
5897                                                 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
5898                                                 uint32_t imm12 = (instruction & 0x00000FFF);
5899                                                 instruction16 = (imm4 << 12) | imm12;
5900                                         }
5901                                         if ( sreloc->r_length() & 1 )
5902                                                 dstAddr = ((instruction16 << 16) | other16);
5903                                         else
5904                                                 dstAddr = (other16 << 16) | instruction16;
5905                                         if ( targetAtom->isThumb() )
5906                                                 dstAddr &= (-2); // remove thumb bit
5907                     int32_t addend = dstAddr - (sreloc->r_value() - nextRelocValue);
5908                                         if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5909                                                 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, targetAtom);
5910                                         }
5911                                         else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5912                                                 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5913                                         }
5914                                         else {
5915                                                 parser.addFixup(src, ld::Fixup::k1of5, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5916                                         }
5917                                         parser.addFixup(src, ld::Fixup::k2of5, ld::Fixup::kindAddAddend, (uint32_t)offsetInTarget+addend);
5918                                         parser.addFixup(src, ld::Fixup::k3of5, ld::Fixup::kindSubtractTargetAddress, fromAtom);
5919                                         parser.addFixup(src, ld::Fixup::k4of5, ld::Fixup::kindSubtractAddend, offsetInFrom);
5920                                         if ( sreloc->r_length() & 1 ) {
5921                                                 // high 16
5922                                                 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
5923                                         }
5924                                         else {
5925                                                 // low 16
5926                                                 parser.addFixup(src, ld::Fixup::k5of5, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
5927                                         }
5928                                         result = true;
5929                                 }
5930                                 else
5931                                         throw "ARM_RELOC_HALF_SECTDIFF reloc missing following pair";
5932                                 break;
5933                         case ARM_RELOC_HALF:
5934                                 if ( nextRelocIsPair ) {
5935                                         instruction = LittleEndian::get32(*fixUpPtr);
5936                                         Atom<arm>* targetAtom  = parser.findAtomByAddress(sreloc->r_value());
5937                                         uint32_t instruction16;
5938                                         uint32_t other16 = (nextRelocAddress & 0xFFFF);
5939                                         bool isThumb;
5940                                         if ( sreloc->r_length() & 2 ) {
5941                                                 isThumb = true;
5942                                                 uint32_t i =    ((instruction & 0x00000400) >> 10);
5943                                                 uint32_t imm4 =  (instruction & 0x0000000F);
5944                                                 uint32_t imm3 = ((instruction & 0x70000000) >> 28);
5945                                                 uint32_t imm8 = ((instruction & 0x00FF0000) >> 16);
5946                                                 instruction16 = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8;
5947                                         }
5948                                         else {
5949                                                 isThumb = false;
5950                                                 uint32_t imm4 = ((instruction & 0x000F0000) >> 16);
5951                                                 uint32_t imm12 = (instruction & 0x00000FFF);
5952                                                 instruction16 = (imm4 << 12) | imm12;
5953                                         }
5954                                         if ( sreloc->r_length() & 1 )
5955                                                 dstAddr = ((instruction16 << 16) | other16);
5956                                         else
5957                                                 dstAddr = (other16 << 16) | instruction16;
5958                                         if ( targetAtom->scope() == ld::Atom::scopeTranslationUnit ) {
5959                                                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, targetAtom);
5960                                         }
5961                                         else if ( (targetAtom->combine() == ld::Atom::combineByNameAndContent) || (targetAtom->combine() == ld::Atom::combineByNameAndReferences) ) {
5962                                                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, ld::Fixup::bindingByContentBound, targetAtom);
5963                                         }
5964                                         else {
5965                                                 parser.addFixup(src, ld::Fixup::k1of3, ld::Fixup::kindSetTargetAddress, false, targetAtom->name());
5966                                         }
5967                                         parser.addFixup(src, ld::Fixup::k2of3, ld::Fixup::kindAddAddend, dstAddr - targetAtom->_objAddress);
5968                                         if ( sreloc->r_length() & 1 ) {
5969                                                 // high 16
5970                                                 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbHigh16 : ld::Fixup::kindStoreARMHigh16));
5971                                         }
5972                                         else {
5973                                                 // low 16
5974                                                 parser.addFixup(src, ld::Fixup::k3of3, (isThumb ? ld::Fixup::kindStoreThumbLow16 : ld::Fixup::kindStoreARMLow16));
5975                                         }
5976                                         result = true;
5977                                 }
5978                                 else
5979                                         throw "scattered ARM_RELOC_HALF reloc missing following pair";
5980                                 break;
5981                         default:
5982                                 throwf("unknown ARM scattered relocation type %d", sreloc->r_type());
5983                 }
5984         }
5985         return result;
5986 }
5987
5988
5989
5990
5991
5992 template <typename A>
5993 bool ObjC1ClassSection<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
5994 {
5995         // inherited
5996         FixedSizeSection<A>::addRelocFixup(parser, reloc);
5997
5998         assert(0 && "needs template specialization");
5999         return false;
6000 }
6001
6002 template <>
6003 bool ObjC1ClassSection<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
6004 {
6005         // if this is the reloc for the super class name string, add implicit reference to super class
6006         if ( ((reloc->r_address() & R_SCATTERED) == 0) && (reloc->r_type() == GENERIC_RELOC_VANILLA) ) {
6007                 assert( reloc->r_length() == 2 );
6008                 assert( ! reloc->r_pcrel() );
6009
6010                 const macho_section<P>* sect = this->machoSection();
6011                 Parser<x86>::SourceLocation     src;
6012                 uint32_t srcAddr = sect->addr() + reloc->r_address();
6013                 src.atom = this->findAtomByAddress(srcAddr);
6014                 src.offsetInAtom = srcAddr - src.atom->objectAddress();
6015                 if ( src.offsetInAtom == 4 ) {
6016                         Parser<x86>::TargetDesc         stringTarget;
6017                         const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6018                         uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6019                         parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
6020
6021                         assert(stringTarget.atom != NULL);
6022                         assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
6023                         const char* superClassBaseName = (char*)stringTarget.atom->rawContentPointer();
6024                         char* superClassName = new char[strlen(superClassBaseName) + 20];
6025                         strcpy(superClassName, ".objc_class_name_");
6026                         strcat(superClassName, superClassBaseName);
6027
6028                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, superClassName);
6029                 }
6030         }
6031         // inherited
6032         return FixedSizeSection<x86>::addRelocFixup(parser, reloc);
6033 }
6034
6035
6036
6037 template <typename A>
6038 bool Objc1ClassReferences<A>::addRelocFixup(class Parser<A>& parser, const macho_relocation_info<P>* reloc)
6039 {
6040         // inherited
6041         PointerToCStringSection<A>::addRelocFixup(parser, reloc);
6042
6043         assert(0 && "needs template specialization");
6044         return false;
6045 }
6046
6047
6048
6049 template <>
6050 bool Objc1ClassReferences<x86>::addRelocFixup(class Parser<x86>& parser, const macho_relocation_info<x86::P>* reloc)
6051 {
6052         // add implict class refs, fixups not usable yet, so look at relocations
6053         assert( (reloc->r_address() & R_SCATTERED) == 0 );
6054         assert( reloc->r_type() == GENERIC_RELOC_VANILLA );
6055         assert( reloc->r_length() == 2 );
6056         assert( ! reloc->r_pcrel() );
6057
6058         const macho_section<P>* sect = this->machoSection();
6059         Parser<x86>::SourceLocation     src;
6060         uint32_t srcAddr = sect->addr() + reloc->r_address();
6061         src.atom = this->findAtomByAddress(srcAddr);
6062         src.offsetInAtom = srcAddr - src.atom->objectAddress();
6063         Parser<x86>::TargetDesc         stringTarget;
6064         const uint8_t* fixUpPtr = file().fileContent() + sect->offset() + reloc->r_address();
6065         uint32_t contentValue = LittleEndian::get32(*((uint32_t*)fixUpPtr));
6066         parser.findTargetFromAddressAndSectionNum(contentValue, reloc->r_symbolnum(), stringTarget);
6067
6068         assert(stringTarget.atom != NULL);
6069         assert(stringTarget.atom->contentType() == ld::Atom::typeCString);
6070         const char* baseClassName = (char*)stringTarget.atom->rawContentPointer();
6071         char* objcClassName = new char[strlen(baseClassName) + 20];
6072         strcpy(objcClassName, ".objc_class_name_");
6073         strcat(objcClassName, baseClassName);
6074
6075         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindSetTargetAddress, false, objcClassName);
6076
6077         // inherited
6078         return PointerToCStringSection<x86>::addRelocFixup(parser, reloc);
6079 }
6080
6081
6082 template <typename A>
6083 void Section<A>::makeFixups(class Parser<A>& parser, const struct Parser<A>::CFI_CU_InfoArrays&)
6084 {
6085         const macho_section<P>* sect = this->machoSection();
6086         const macho_relocation_info<P>* relocs = (macho_relocation_info<P>*)(file().fileContent() + sect->reloff());
6087         const uint32_t relocCount = sect->nreloc();
6088         for (uint32_t r = 0; r < relocCount; ++r) {
6089                 try {
6090                         if ( this->addRelocFixup(parser, &relocs[r]) )
6091                                 ++r; // skip next
6092                 }
6093                 catch (const char* msg) {
6094                         throwf("in section %s,%s reloc %u: %s", sect->segname(), Section<A>::makeSectionName(sect), r, msg);
6095                 }
6096         }
6097
6098         // add follow-on fixups if .o file is missing .subsections_via_symbols
6099         if ( this->addFollowOnFixups() ) {
6100                 Atom<A>* end = &_endAtoms[-1];
6101                 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
6102                         typename Parser<A>::SourceLocation src(p, 0);
6103                         Atom<A>* nextAtom = &p[1];
6104                         parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
6105                 }
6106         }
6107         else if ( this->type() == ld::Section::typeCode ) {
6108                 // if FDE broke text not at a symbol, use followOn to keep code together
6109                 Atom<A>* end = &_endAtoms[-1];
6110                 for(Atom<A>* p = _beginAtoms; p < end; ++p) {
6111                         typename Parser<A>::SourceLocation src(p, 0);
6112                         Atom<A>* nextAtom = &p[1];
6113                         if ( (p->symbolTableInclusion() == ld::Atom::symbolTableIn) && (nextAtom->symbolTableInclusion() == ld::Atom::symbolTableNotIn) ) {
6114                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, nextAtom);
6115                         }
6116                 }
6117         }
6118
6119         // add follow-on fixups for aliases
6120         if ( _hasAliases ) {
6121                 for(Atom<A>* p = _beginAtoms; p < _endAtoms; ++p) {
6122                         if ( p->isAlias() && ! this->addFollowOnFixups() ) {
6123                                 Atom<A>* targetOfAlias = &p[1];
6124                                 assert(p < &_endAtoms[-1]);
6125                                 assert(p->_objAddress == targetOfAlias->_objAddress);
6126                                 typename Parser<A>::SourceLocation src(p, 0);
6127                                 parser.addFixup(src, ld::Fixup::k1of1, ld::Fixup::kindNoneFollowOn, targetOfAlias);
6128                         }
6129                 }
6130         }
6131 }
6132
6133
6134
6135 //
6136 // main function used by linker to instantiate ld::Files
6137 //
6138 ld::relocatable::File* parse(const uint8_t* fileContent, uint64_t fileLength,
6139                                                                 const char* path, time_t modTime, uint32_t ordinal, const ParserOptions& opts)
6140 {
6141         switch ( opts.architecture ) {
6142                 case CPU_TYPE_X86_64:
6143                         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) )
6144                                 return mach_o::relocatable::Parser<x86_64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
6145                         break;
6146                 case CPU_TYPE_I386:
6147                         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) )
6148                                 return mach_o::relocatable::Parser<x86>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
6149                         break;
6150                 case CPU_TYPE_ARM:
6151                         if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) )
6152                                 return mach_o::relocatable::Parser<arm>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
6153                         break;
6154         }
6155         return NULL;
6156 }
6157
6158 //
6159 // used by archive reader to validate member object file
6160 //
6161 bool isObjectFile(const uint8_t* fileContent, uint64_t fileLength, const ParserOptions& opts)
6162 {
6163         switch ( opts.architecture ) {
6164                 case CPU_TYPE_X86_64:
6165                         return ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) );
6166                 case CPU_TYPE_I386:
6167                         return ( mach_o::relocatable::Parser<x86>::validFile(fileContent) );
6168                 case CPU_TYPE_ARM:
6169                         return ( mach_o::relocatable::Parser<arm>::validFile(fileContent, opts.objSubtypeMustMatch, opts.subType) );
6170         }
6171         return false;
6172 }
6173
6174 //
6175 // used by linker to infer architecture when no -arch is on command line
6176 //
6177 bool isObjectFile(const uint8_t* fileContent, cpu_type_t* result, cpu_subtype_t* subResult)
6178 {
6179         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
6180                 *result = CPU_TYPE_X86_64;
6181                 *subResult = CPU_SUBTYPE_X86_64_ALL;
6182                 return true;
6183         }
6184         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
6185                 *result = CPU_TYPE_I386;
6186                 *subResult = CPU_SUBTYPE_X86_ALL;
6187                 return true;
6188         }
6189         if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
6190                 *result = CPU_TYPE_ARM;
6191                 const macho_header<Pointer32<LittleEndian> >* header = (const macho_header<Pointer32<LittleEndian> >*)fileContent;
6192                 *subResult = header->cpusubtype();
6193                 return true;
6194         }
6195         return false;
6196 }
6197
6198 //
6199 // used by linker is error messages to describe bad .o file
6200 //
6201 const char* archName(const uint8_t* fileContent)
6202 {
6203         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
6204                 return mach_o::relocatable::Parser<x86_64>::fileKind(fileContent);
6205         }
6206         if ( mach_o::relocatable::Parser<x86>::validFile(fileContent) ) {
6207                 return mach_o::relocatable::Parser<x86>::fileKind(fileContent);
6208         }
6209         if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
6210                 return mach_o::relocatable::Parser<arm>::fileKind(fileContent);
6211         }
6212         return NULL;
6213 }
6214
6215 //
6216 // Used by archive reader when -ObjC option is specified
6217 //
6218 bool hasObjC2Categories(const uint8_t* fileContent)
6219 {
6220         if ( mach_o::relocatable::Parser<x86_64>::validFile(fileContent) ) {
6221                 return mach_o::relocatable::Parser<x86_64>::hasObjC2Categories(fileContent);
6222         }
6223         else if ( mach_o::relocatable::Parser<arm>::validFile(fileContent, false, 0) ) {
6224                 return mach_o::relocatable::Parser<arm>::hasObjC2Categories(fileContent);
6225         }
6226         else if ( mach_o::relocatable::Parser<x86>::validFile(fileContent, false, 0) ) {
6227                 return mach_o::relocatable::Parser<x86>::hasObjC2Categories(fileContent);
6228         }
6229         return false;
6230 }
6231
6232
6233
6234 } // namespace relocatable
6235 } // namespace mach_o
6236
6237