1 /* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
3 * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
5 * @APPLE_LICENSE_HEADER_START@
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
22 * @APPLE_LICENSE_HEADER_END@
24 #ifndef __MACH_O_TRIE__
25 #define __MACH_O_TRIE__
30 #include "MachOFileAbstraction.hpp"
38 Edge(const char* s
, struct Node
* n
) : fSubString(s
), fChild(n
) { }
40 const char* fSubString
;
47 Node(const char* s
) : fCummulativeString(s
), fAddress(0), fFlags(0),
48 fOther(0), fImportedName(NULL
), fOrdered(false),
49 fHaveExportInfo(false), fTrieOffset(0) {}
51 const char* fCummulativeString
;
52 std::vector
<Edge
> fChildren
;
56 const char* fImportedName
;
61 void addSymbol(const char* fullStr
, uint64_t address
, uint64_t flags
, uint64_t other
, const char* importName
) {
62 const char* partialStr
= &fullStr
[strlen(fCummulativeString
)];
63 for (std::vector
<Edge
>::iterator it
= fChildren
.begin(); it
!= fChildren
.end(); ++it
) {
65 int subStringLen
= strlen(e
.fSubString
);
66 if ( strncmp(e
.fSubString
, partialStr
, subStringLen
) == 0 ) {
67 // already have matching edge, go down that path
68 e
.fChild
->addSymbol(fullStr
, address
, flags
, other
, importName
);
72 for (int i
=subStringLen
-1; i
> 0; --i
) {
73 if ( strncmp(e
.fSubString
, partialStr
, i
) == 0 ) {
74 // found a common substring, splice in new node
75 // was A -> C, now A -> B -> C
76 char* bNodeCummStr
= strdup(e
.fChild
->fCummulativeString
);
77 bNodeCummStr
[strlen(bNodeCummStr
)+i
-subStringLen
] = '\0';
79 Node
* bNode
= new Node(bNodeCummStr
);
80 Node
* cNode
= e
.fChild
;
81 char* abEdgeStr
= strdup(e
.fSubString
);
83 char* bcEdgeStr
= strdup(&e
.fSubString
[i
]);
85 abEdge
.fSubString
= abEdgeStr
;
86 abEdge
.fChild
= bNode
;
87 Edge
bcEdge(bcEdgeStr
, cNode
);
88 bNode
->fChildren
.push_back(bcEdge
);
89 bNode
->addSymbol(fullStr
, address
, flags
, other
, importName
);
95 if ( flags
& EXPORT_SYMBOL_FLAGS_REEXPORT
) {
96 assert(importName
!= NULL
);
99 if ( flags
& EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER
) {
102 // no commonality with any existing child, make a new edge that is this whole string
103 Node
* newNode
= new Node(strdup(fullStr
));
104 Edge
newEdge(strdup(partialStr
), newNode
);
105 fChildren
.push_back(newEdge
);
106 newNode
->fAddress
= address
;
107 newNode
->fFlags
= flags
;
108 newNode
->fOther
= other
;
109 if ( (flags
& EXPORT_SYMBOL_FLAGS_REEXPORT
) && (importName
!= NULL
) && (strcmp(fullStr
,importName
) != 0) )
110 newNode
->fImportedName
= importName
;
112 newNode
->fImportedName
= NULL
;
113 newNode
->fHaveExportInfo
= true;
116 void addOrderedNodes(const char* name
, std::vector
<Node
*>& orderedNodes
) {
118 orderedNodes
.push_back(this);
119 //fprintf(stderr, "ordered %p %s\n", this, fCummulativeString);
122 const char* partialStr
= &name
[strlen(fCummulativeString
)];
123 for (std::vector
<Edge
>::iterator it
= fChildren
.begin(); it
!= fChildren
.end(); ++it
) {
125 int subStringLen
= strlen(e
.fSubString
);
126 if ( strncmp(e
.fSubString
, partialStr
, subStringLen
) == 0 ) {
127 // already have matching edge, go down that path
128 e
.fChild
->addOrderedNodes(name
, orderedNodes
);
134 // byte for terminal node size in bytes, or 0x00 if not terminal node
135 // teminal node (uleb128 flags, uleb128 addr [uleb128 other])
136 // byte for child node count
137 // each child: zero terminated substring, uleb128 node offset
138 bool updateOffset(uint32_t& offset
) {
139 uint32_t nodeSize
= 1; // length of export info when no export info
140 if ( fHaveExportInfo
) {
141 if ( fFlags
& EXPORT_SYMBOL_FLAGS_REEXPORT
) {
142 nodeSize
= uleb128_size(fFlags
) + uleb128_size(fOther
); // ordinal
143 if ( fImportedName
!= NULL
)
144 nodeSize
+= strlen(fImportedName
);
145 ++nodeSize
; // trailing zero in imported name
148 nodeSize
= uleb128_size(fFlags
) + uleb128_size(fAddress
);
149 if ( fFlags
& EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER
)
150 nodeSize
+= uleb128_size(fOther
);
152 // do have export info, overall node size so far is uleb128 of export info + export info
153 nodeSize
+= uleb128_size(nodeSize
);
156 ++nodeSize
; // byte for count of chidren
157 for (std::vector
<Edge
>::iterator it
= fChildren
.begin(); it
!= fChildren
.end(); ++it
) {
159 nodeSize
+= strlen(e
.fSubString
) + 1 + uleb128_size(e
.fChild
->fTrieOffset
);
161 bool result
= (fTrieOffset
!= offset
);
162 fTrieOffset
= offset
;
163 //fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString);
165 // return true if fTrieOffset was changed
169 void appendToStream(std::vector
<uint8_t>& out
) {
170 if ( fHaveExportInfo
) {
171 if ( fFlags
& EXPORT_SYMBOL_FLAGS_REEXPORT
) {
172 if ( fImportedName
!= NULL
) {
173 // nodes with re-export info: size, flags, ordinal, string
174 uint32_t nodeSize
= uleb128_size(fFlags
) + uleb128_size(fOther
) + strlen(fImportedName
) + 1;
175 out
.push_back(nodeSize
);
176 append_uleb128(fFlags
, out
);
177 append_uleb128(fOther
, out
);
178 append_string(fImportedName
, out
);
181 // nodes with re-export info: size, flags, ordinal, empty-string
182 uint32_t nodeSize
= uleb128_size(fFlags
) + uleb128_size(fOther
) + 1;
183 out
.push_back(nodeSize
);
184 append_uleb128(fFlags
, out
);
185 append_uleb128(fOther
, out
);
189 else if ( fFlags
& EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER
) {
190 // nodes with export info: size, flags, address, other
191 uint32_t nodeSize
= uleb128_size(fFlags
) + uleb128_size(fAddress
) + uleb128_size(fOther
);
192 out
.push_back(nodeSize
);
193 append_uleb128(fFlags
, out
);
194 append_uleb128(fAddress
, out
);
195 append_uleb128(fOther
, out
);
198 // nodes with export info: size, flags, address
199 uint32_t nodeSize
= uleb128_size(fFlags
) + uleb128_size(fAddress
);
200 out
.push_back(nodeSize
);
201 append_uleb128(fFlags
, out
);
202 append_uleb128(fAddress
, out
);
206 // no export info uleb128 of zero is one byte of zero
209 // write number of children
210 out
.push_back(fChildren
.size());
212 for (std::vector
<Edge
>::iterator it
= fChildren
.begin(); it
!= fChildren
.end(); ++it
) {
214 append_string(e
.fSubString
, out
);
215 append_uleb128(e
.fChild
->fTrieOffset
, out
);
220 static void append_uleb128(uint64_t value
, std::vector
<uint8_t>& out
) {
229 } while( byte
>= 0x80 );
232 static void append_string(const char* str
, std::vector
<uint8_t>& out
) {
233 for (const char* s
= str
; *s
!= '\0'; ++s
)
238 static unsigned int uleb128_size(uint64_t value
) {
243 } while ( value
!= 0 );
250 inline uint64_t read_uleb128(const uint8_t*& p
, const uint8_t* end
) {
255 throw "malformed uleb128 extends beyond trie";
257 uint64_t slice
= *p
& 0x7f;
259 if (bit
>= 64 || slice
<< bit
>> bit
!= slice
)
260 throw "uleb128 too big for 64-bits";
262 result
|= (slice
<< bit
);
278 const char* importName
;
283 inline void makeTrie(const std::vector
<Entry
>& entries
, std::vector
<uint8_t>& output
)
285 Node
start(strdup(""));
287 // make nodes for all exported symbols
288 for (std::vector
<Entry
>::const_iterator it
= entries
.begin(); it
!= entries
.end(); ++it
) {
289 start
.addSymbol(it
->name
, it
->address
, it
->flags
, it
->other
, it
->importName
);
292 // create vector of nodes
293 std::vector
<Node
*> orderedNodes
;
294 orderedNodes
.reserve(entries
.size()*2);
295 for (std::vector
<Entry
>::const_iterator it
= entries
.begin(); it
!= entries
.end(); ++it
) {
296 start
.addOrderedNodes(it
->name
, orderedNodes
);
299 // assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized
304 for (std::vector
<Node
*>::iterator it
= orderedNodes
.begin(); it
!= orderedNodes
.end(); ++it
) {
305 if ( (*it
)->updateOffset(offset
) )
310 // create trie stream
311 for (std::vector
<Node
*>::iterator it
= orderedNodes
.begin(); it
!= orderedNodes
.end(); ++it
) {
312 (*it
)->appendToStream(output
);
316 struct EntryWithOffset
318 uintptr_t nodeOffset
;
321 bool operator<(const EntryWithOffset
& other
) const { return ( nodeOffset
< other
.nodeOffset
); }
326 static inline void processExportNode(const uint8_t* const start
, const uint8_t* p
, const uint8_t* const end
,
327 char* cummulativeString
, int curStrOffset
,
328 std::vector
<EntryWithOffset
>& output
)
331 throw "malformed trie, node past end";
332 const uint64_t terminalSize
= read_uleb128(p
, end
);
333 const uint8_t* children
= p
+ terminalSize
;
334 if ( terminalSize
!= 0 ) {
336 e
.nodeOffset
= p
-start
;
337 e
.entry
.name
= strdup(cummulativeString
);
338 e
.entry
.flags
= read_uleb128(p
, end
);
339 if ( e
.entry
.flags
& EXPORT_SYMBOL_FLAGS_REEXPORT
) {
341 e
.entry
.other
= read_uleb128(p
, end
); // dylib ordinal
342 e
.entry
.importName
= (char*)p
;
345 e
.entry
.address
= read_uleb128(p
, end
);
346 if ( e
.entry
.flags
& EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER
)
347 e
.entry
.other
= read_uleb128(p
, end
);
350 e
.entry
.importName
= NULL
;
354 if ( children
> end
)
355 throw "malformed trie, terminalSize extends beyond trie data";
356 const uint8_t childrenCount
= *children
++;
357 const uint8_t* s
= children
;
358 for (uint8_t i
=0; i
< childrenCount
; ++i
) {
361 cummulativeString
[curStrOffset
+edgeStrLen
] = *s
++;
364 cummulativeString
[curStrOffset
+edgeStrLen
] = *s
++;
365 uint32_t childNodeOffset
= read_uleb128(s
, end
);
366 if (childNodeOffset
== 0)
367 throw "malformed trie, childNodeOffset==0";
368 processExportNode(start
, start
+childNodeOffset
, end
, cummulativeString
, curStrOffset
+edgeStrLen
, output
);
373 inline void parseTrie(const uint8_t* start
, const uint8_t* end
, std::vector
<Entry
>& output
)
375 // empty trie has no entries
378 // worst case largest exported symbol names is length of whole trie
379 char* cummulativeString
= new char[end
-start
];
380 std::vector
<EntryWithOffset
> entries
;
381 processExportNode(start
, start
, end
, cummulativeString
, 0, entries
);
382 // to preserve tie layout order, sort by node offset
383 std::sort(entries
.begin(), entries
.end());
385 output
.reserve(entries
.size());
386 for (std::vector
<EntryWithOffset
>::iterator it
=entries
.begin(); it
!= entries
.end(); ++it
)
387 output
.push_back(it
->entry
);
388 delete [] cummulativeString
;
395 }; // namespace mach_o
398 #endif // __MACH_O_TRIE__