2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #if defined(__linux__) || defined(__FreeBSD__)
19 #include <folly/Demangle.h>
20 #include <folly/Format.h>
21 #include <folly/Memory.h>
22 #include <folly/ScopeGuard.h>
23 #include <folly/String.h>
24 #include <folly/container/F14Map.h>
25 #include <folly/container/F14Set.h>
26 #include <folly/portability/Unistd.h>
30 #include <sys/types.h>
34 #include "hphp/util/assertions.h"
35 #include "hphp/util/functional.h"
36 #include "hphp/util/job-queue.h"
37 #include "hphp/util/timer.h"
38 #include "hphp/util/trace.h"
40 #include "hphp/tools/debug-parser/debug-parser.h"
41 #include "hphp/tools/debug-parser/dwarfstate.h"
44 * Debug parser for DWARF (using dwarfstate)
46 * DWARF is structured as a forest of DIEs (Debug Information Entry). Each DIE
47 * has a tag, which describes what kind of DIE it is, and a list of
48 * attributes. Each attribute has a type, which identifies what it is, and a
49 * value (the type of the value is implied by the attribute type). Furthermore,
50 * a DIE can have other DIEs as children. The top-level DIEs correspond to
51 * compilation-units, and all the children of these top-level DIEs correspond to
52 * the information in that compilation-unit.
54 * The meaning and interpretation of the DIEs is deliberately left vague by the
55 * standard, so different compilers can encode things in different ways (and no
56 * implementation is bug free).
59 namespace debug_parser
{ namespace {
63 ////////////////////////////////////////////////////////////////////////////////
65 // Allow foreach on a range (as returned by equal_range)
66 template<typename It
> It
begin(std::pair
<It
,It
> p
) { return p
.first
; }
67 template<typename It
> It
end(std::pair
<It
,It
> p
) { return p
.second
; }
70 * Fully qualified names aren't represented explicitly in DWARF. Instead the
71 * structure of the DIEs mimics the nesting structure in the source (IE, a
72 * nested class within a class nested within a namespace). So, in order to
73 * infer the fully qualified name for any given class, the current scope is
74 * tracked as the DIEs are walked.
76 * Likewise, DWARF has no concept of linkage, but the linkage is needed to know
77 * which types are actually equivalent. Luckily, a type's linkage is closely
78 * related to its scope (except for templates, see below), so it can be inferred
81 * The scope is tracked as a stack of contexts, pushing and popping off contexts
82 * when a namespace or type is entered or exited.
86 explicit Scope(GlobalOff cu_offset
)
87 : m_cu_offset
{cu_offset
}
90 ObjectTypeName
{std::string
{}, ObjectTypeName::Linkage::external
},
95 GlobalOff
cuOffset() const { return m_cu_offset
; }
97 ObjectTypeName
name() const;
99 // Fix the name of a type to match where it is in the namespace/type
101 void fixName(ObjectTypeName newName
);
103 ObjectTypeName::Linkage
linkage() const {
104 return m_scope
.back().name
.linkage
;
107 std::size_t unnamedTypeCount() const {
108 return m_scope
.back().unnamed_count
;
111 bool isInNamespaceScope() const {
112 return m_scope
.back().in_namespace_scope
;
115 void incUnnamedTypeCount() { ++m_scope
.back().unnamed_count
; }
117 HPHP::Optional
<GlobalOff
> typeOffset() const {
118 return m_scope
.back().offset
;
121 void pushType(std::string name
, GlobalOff offset
) {
122 m_scope
.emplace_back(
123 ObjectTypeName
{std::move(name
), linkage()},
126 m_scope
.back().offset
= offset
;
129 void pushUnnamedType(std::string name
, GlobalOff offset
) {
130 m_scope
.emplace_back(
133 ObjectTypeName::Linkage::none
137 m_scope
.back().offset
= offset
;
140 void pushNamespace(std::string ns
) {
141 m_scope
.emplace_back(
142 ObjectTypeName
{std::move(ns
), linkage()},
147 void pushUnnamedNamespace() {
148 m_scope
.emplace_back(
150 "(unnamed namespace)",
151 ObjectTypeName::Linkage::internal
157 void pop() { m_scope
.pop_back(); }
161 Context(ObjectTypeName name
, bool in_namespace_scope
)
162 : name(std::move(name
))
163 , in_namespace_scope
{in_namespace_scope
} {}
165 bool in_namespace_scope
;
166 std::size_t unnamed_count
= 0;
167 HPHP::Optional
<GlobalOff
> offset
;
169 std::vector
<Context
> m_scope
;
170 GlobalOff m_cu_offset
;
173 static const std::string s_pseudo_type_name
;
177 * Actual implementation of TypeParser for DWARF.
180 struct TypeParserImpl
: TypeParser
{
181 explicit TypeParserImpl(const std::string
& filename
, int num_threads
);
183 Object
getObject(ObjectTypeKey key
) override
;
185 size_t getObjectBlockCount() const override
;
188 const std::vector
<ObjectType
>& getObjectBlock(size_t index
) const override
;
193 struct LinkageDependents
{
194 folly::F14FastSet
<GlobalOff
> template_uses
;
195 folly::F14FastSet
<GlobalOff
> children
;
199 static auto constexpr kNoAddress
= std::numeric_limits
<uint64_t>::max();
200 std::string linkage_name
;
201 uint64_t address
{kNoAddress
};
202 bool is_member
{false};
206 const DwarfState
* dwarf
;
207 std::unique_ptr
<StateBlock
> state
;
208 folly::F14FastMap
<GlobalOff
, GlobalOff
> local_mappings
;
209 folly::F14FastMap
<GlobalOff
, LinkageDependents
> linkage_dependents
;
210 std::vector
<std::pair
<GlobalOff
, StaticSpec
>> raw_static_definitions
;
213 // Functions used while concurrently building state. Since these functions are
214 // invoked from multiple threads, they are static and take all their state
215 // explicitly as parameters.
216 static void genNames(Env
& env
,
219 std::vector
<GlobalOff
>* template_params
= nullptr);
221 static HPHP::Optional
<uintptr_t> interpretLocAddress(const DwarfState
& dwarf
,
222 Dwarf_Attribute attr
);
223 static HPHP::Optional
<GlobalOff
> parseSpecification(const DwarfState
& dwarf
,
227 void fixTemplateLinkage();
229 // Functions used after state is built. These are not thread-safe.
230 Object
genObject(Dwarf_Die die
,
233 Type
genType(Dwarf_Die die
);
234 Object::Member
genMember(Dwarf_Die die
,
235 const ObjectTypeName
& parent_name
);
236 Object::Function
genFunction(Dwarf_Die die
);
237 Object::Base
genBase(Dwarf_Die die
, const ObjectTypeName
& parent_name
);
238 Object::TemplateParam
genTemplateParam(Dwarf_Die die
);
239 HPHP::Optional
<size_t> determineArrayBound(Dwarf_Die die
);
241 void fillFuncArgs(Dwarf_Die die
, FuncType
& func
);
243 // Map a given offset to the state block which contains state for that offset
245 const StateBlock
& stateForOffset(GlobalOff offset
) const {
246 assertx(!m_state_map
.empty());
247 auto it
= std::upper_bound(
251 [](GlobalOff offset
, const std::pair
<GlobalOff
, StateBlock
*>& p
) {
252 return offset
< p
.first
;
255 if (it
!= m_state_map
.begin()) --it
;
259 // All of the parser's persistent state is stored in some number of
260 // blocks. All of the blocks are computed concurrently, one block per
261 // thread. To avoid the overhead of merging the blocks together, they are kept
262 // separated. Instead m_state_map is used to map a given offset into the block
263 // which contains the state for that offset. It is a list of offset/state
264 // pairs. Any offset between the offset given in the pair and the one in the
265 // next pair is mapped to the state block in the pair.
267 // Note: this scheme only works because each compilation unit is
268 // self-contained and does not reference data in another compilation
269 // unit. However, nothing in DWARF prevents this and its not guaranteed to
272 std::vector
<ObjectType
> all_objs
;
273 folly::F14FastMap
<GlobalOff
, size_t> obj_offsets
;
274 std::multimap
<GlobalOff
, StaticSpec
> static_definitions
;
276 std::vector
<std::unique_ptr
<StateBlock
>> m_states
;
277 std::vector
<std::pair
<GlobalOff
, StateBlock
*>> m_state_map
;
278 tbb::concurrent_hash_map
<GlobalOff
,
280 GlobalOff::Hash
> m_linkage_dependents
;
285 // Purposefully fake name to avoid confusion with an actual type.
286 const std::string
Scope::s_pseudo_type_name
= "@_PSEUDO_TY";
288 ObjectTypeName
Scope::name() const {
289 auto iter
= m_scope
.begin();
290 std::string str
= iter
->name
.name
;
292 for (; iter
!= m_scope
.end(); ++iter
) {
293 if (str
.empty()) str
= iter
->name
.name
;
294 else str
= folly::sformat("{}::{}", str
, iter
->name
.name
);
296 return ObjectTypeName
{std::move(str
), linkage()};
299 void Scope::fixName(ObjectTypeName newName
) {
300 if (m_scope
.size() == 1) {
301 m_scope
.back().name
= std::move(newName
);
305 auto context
= std::move(m_scope
.back());
307 auto outerName
= name();
308 assertx(newName
.name
.size() > outerName
.name
.size());
309 if (outerName
.name
.size()) {
310 assertx(!outerName
.name
.compare(0, outerName
.name
.size(), newName
.name
));
311 newName
.name
= newName
.name
.substr(outerName
.name
.size() + 2);
313 context
.name
= std::move(newName
);
314 m_scope
.push_back(std::move(context
));
317 TypeParserImpl::TypeParserImpl(const std::string
& filename
, int num_threads
)
320 // Processing each compiliation unit is very expensive, as it involves walking
321 // a large part of the debug information. To speed things up (a lot), we buid
322 // up the state concurrently. Create a job corresponding to each compiliation
323 // unit in the file and enqueue the jobs with a thread pool. We'll find the
324 // offsets of the compiliation unit in the main thread, enqueuing them as we
325 // find them. This lets us not only exploit concurrency between processing
326 // compiliation units, but between finding them and processing them.
328 // Each worker maintains its own private state which it populates for all the
329 // compiliation units its assigned (each worker can process multiple
330 // compiliation units). Once done, all the different states are kept separate
331 // (merging them would be too expensive), but a mapping is constructed to map
332 // offsets to the appropriate state block.
334 // This whole scheme is only viable because (right now), debug information in
335 // a given compilation unit doesn't reference anything outside of that unit,
336 // so the state for any given compiliation unit can be processed
339 // The context serves as the link between a worker and the TypeParserImpl
340 // state (this is forced by the JobQueueWorker interface).
342 const decltype(m_dwarf
)& dwarf
;
343 decltype(m_states
)& states
;
344 decltype(m_state_map
)& state_map
;
345 decltype(m_linkage_dependents
)& linkage_dependents
;
346 // The lock protects states, state_map, and the exception field (but only
347 // when the workers are running).
349 // Set to the exception if any of the workers threw (first one wins).
350 std::exception_ptr exception
;
353 // Thread worker. We'll end up with a state block for each one of these.
354 struct Worker
: HPHP::JobQueueWorker
<GlobalOff
, Context
*> {
357 // Remember each offset we processed so we can record it the global state
358 // map when we finish.
359 std::vector
<GlobalOff
> offsets
;
361 void doJob(GlobalOff offset
) override
{
362 // Process a compiliation unit at the given offset.
364 // We're going to use it so let's mark this worker active.
366 env
.dwarf
= &m_context
->dwarf
;
367 env
.state
= std::make_unique
<StateBlock
>();
370 offsets
.emplace_back(offset
);
372 // Do the actual processing, adding to the state block:
374 env
.dwarf
->onDIEAtOffset(
376 [&](Dwarf_Die cu
) { genNames(env
, cu
, scope
); }
379 auto const remap
= [&] (GlobalOff o
) {
380 auto const it
= env
.local_mappings
.find(o
);
381 if (it
!= env
.local_mappings
.end()) {
387 // Generate static_definitions by updating their keys collected during
388 // genNames. Some keys refer back to a DW_AT_member that belongs to a
389 // struct whose definition was in another type-unit. We want to add an
390 // entry for the member in the definition.
392 env
.raw_static_definitions
.begin(),
393 env
.raw_static_definitions
.end(),
395 env
.state
->static_definitions
,
396 env
.state
->static_definitions
.end()),
397 [&](const auto& elem
) {
398 return std::make_pair(remap(elem
.first
), std::move(elem
.second
));
400 env
.raw_static_definitions
.clear();
402 for (auto& linkage
: env
.linkage_dependents
) {
403 if (!linkage
.second
.template_uses
.size()) continue;
405 std::decay_t
<decltype(m_context
->linkage_dependents
)>::accessor acc
;
407 auto const inserted
=
408 m_context
->linkage_dependents
.insert(acc
, remap(linkage
.first
));
409 if (inserted
&& !env
.local_mappings
.size()) {
410 acc
->second
= std::move(linkage
.second
);
412 auto const process
= [&] (auto const& from
, auto& to
) {
413 for (auto& elm
: from
) {
414 to
.insert(remap(elm
));
417 process(linkage
.second
.template_uses
, acc
->second
.template_uses
);
418 process(linkage
.second
.children
, acc
->second
.children
);
421 env
.linkage_dependents
.clear();
422 env
.local_mappings
.clear();
424 // Store any exception thrown so it can be rethrown in the main
425 // thread. We only bother to store the first one.
427 std::lock_guard
<std::mutex
> guard
{m_context
->lock
};
428 if (!m_context
->exception
) {
429 m_context
->exception
= std::current_exception();
434 void onThreadExit() override
{
435 // The worker is done (we've been told to stop). Now that we know we won't
436 // be processing anymore offsets, do the needed post-processing on the
437 // rest of the state.
438 if (!env
.dwarf
) return;
440 // Compute a mapping of an object type's offset to its location in the
442 env
.state
->obj_offsets
.reserve(env
.state
->all_objs
.size());
443 for (auto i
= size_t{0}; i
< env
.state
->all_objs
.size(); ++i
) {
444 env
.state
->obj_offsets
.emplace(
445 GlobalOff::fromRaw(env
.state
->all_objs
[i
].key
.object_id
), i
449 // Record all the offsets this worker processed (along with the state
450 // block) in the global state map. This is done using a lock because its
451 // quick and only done when the thread is finishing.
452 std::lock_guard
<std::mutex
> guard
{m_context
->lock
};
453 auto const state
= env
.state
.get();
454 m_context
->states
.emplace_back(std::move(env
.state
));
455 for (auto offset
: offsets
) {
456 m_context
->state_map
.emplace_back(offset
, state
);
459 // Store any exception thrown so it can be rethrown in the main
460 // thread. We only bother to store the first one.
462 std::lock_guard
<std::mutex
> guard
{m_context
->lock
};
463 if (!m_context
->exception
) {
464 m_context
->exception
= std::current_exception();
470 // Create the thread pool
471 Context context
{m_dwarf
, m_states
, m_state_map
, m_linkage_dependents
};
472 HPHP::JobQueueDispatcher
<Worker
> dispatcher
{
473 num_threads
, num_threads
, 0, false, &context
478 FTRACE(1, "Adding type-units to dispatcher...\n");
479 // Iterate over every type-unit, enqueuing jobs which will
480 // concurrently scan that unit.
481 m_dwarf
.forEachTopLevelUnit(
483 dispatcher
.enqueue(m_dwarf
.getDIEOffset(tu
));
489 FTRACE(1, "... {} type-units added.\n", num_tu
);
492 FTRACE(1, "Adding compilation-units to dispatcher...\n");
493 // Iterate over every compilation-unit, enqueuing jobs which will
494 // concurrently scan that unit.
495 m_dwarf
.forEachCompilationUnit(
496 [&](Dwarf_Die cu
) { dispatcher
.enqueue(m_dwarf
.getDIEOffset(cu
)); ++num_cu
;}
499 FTRACE(1, "... {} compilation-units added.\n", num_cu
);
501 // Wait for all the workers to finish.
504 FTRACE(1, "Finished with genNames\n");
506 // If any of the workers caught an exception, rethrow here in the main
507 // thread. We don't need to bother taking the lock because all the workers are
509 if (context
.exception
) std::rethrow_exception(context
.exception
);
511 // Since the state map was appended to by the workers in a non-deterministic
512 // order, we need to sort it by offset so we can do efficient lookups later.
514 m_state_map
.begin(), m_state_map
.end(),
515 [&](const std::pair
<GlobalOff
, StateBlock
*>& p1
,
516 const std::pair
<GlobalOff
, StateBlock
*>& p2
) {
517 return p1
.first
< p2
.first
;
521 // Some of the static_definitions entries need to be moved to the
522 // correct block; eg they were seen when processing the cu
523 // containing the definition of the static member, but need to be
524 // moved to the state for the tu which contains the definition of
525 // the struct (which may or may not be the same state block).
526 folly::F14FastSet
<void*> seen
;
527 for (auto const& p
: m_state_map
) {
528 if (!seen
.insert(p
.second
).second
) continue;
529 auto curOff
= p
.first
;
530 auto curState
= p
.second
;
531 for (auto it
= p
.second
->static_definitions
.begin();
532 it
!= p
.second
->static_definitions
.end(); ) {
533 if (it
->first
!= curOff
) {
535 curState
= const_cast<decltype(p
.second
)>(&stateForOffset(curOff
));
537 if (curState
== p
.second
) {
541 curState
->static_definitions
.insert(*it
);
542 it
= p
.second
->static_definitions
.erase(it
);
546 fixTemplateLinkage();
547 m_linkage_dependents
.clear();
550 size_t TypeParserImpl::getObjectBlockCount() const {
551 return m_states
.size();
554 const std::vector
<ObjectType
>&
555 TypeParserImpl::getObjectBlock(size_t index
) const {
556 return m_states
[index
]->all_objs
;
560 * As stated above, the linkage of templates is tricky. The linkage of a
561 * template is the most restrictive linkage of its original linkage and the
562 * linkage of its template parameters. Since some of the template parameters may
563 * not yet be parsed when we parse the template, the inference of the correct
564 * template linkage is deferred until all the types' linkages are computed.
566 * However, since templates can be parameters to other templates, this process
567 * must be repeated until the linkage of no types are changed.
569 * As an additional complication, the linkage of any nested class is inherited
570 * from its parent, so when a template's linkage changes, it must be bubbled
571 * down to any of its nested classes.
573 * When the name and initial linkages of all the types was generated, the
574 * relationship between templates, their parameters, and nested classes is
575 * recorded in linkage_dependents, which is used here.
577 void TypeParserImpl::fixTemplateLinkage() {
578 using ChangedSet
= folly::F14FastSet
<GlobalOff
>;
581 for (const auto& pair
: m_linkage_dependents
) {
582 if (pair
.second
.template_uses
.empty()) continue;
583 changed
.emplace(pair
.first
);
586 ChangedSet old_changed
;
587 while (!changed
.empty()) {
588 std::swap(changed
, old_changed
);
590 // For every type which has its linkage changed, update its dependents
591 // (templates where the type is used as a parameter, or nested classes) with
592 // the new linkage, and mark as being changed as well.
593 for (auto changed_offset
: old_changed
) {
594 decltype(m_linkage_dependents
)::const_accessor acc
;
595 if (!m_linkage_dependents
.find(acc
, changed_offset
)) continue;
597 auto const& children
= acc
->second
.children
;
598 auto const& template_uses
= acc
->second
.template_uses
;
600 auto const& changed_state
= stateForOffset(changed_offset
);
602 auto const it
= changed_state
.obj_offsets
.find(changed_offset
);
603 if (it
== changed_state
.obj_offsets
.end()) {
604 // This isn't right - if (eg) its a pointer to an object type
605 // with internal linkage, we need to mark the dependents
606 // internal; but we don't track pointer types at all - so just
607 // assume this type doesn't matter. The same goes for other
608 // things like const struct types etc.
612 auto const& changed_obj
= changed_state
.all_objs
[it
->second
];
614 // Only update and mark if we actually make the linkage more restrictive.
615 if (changed_obj
.name
.linkage
!= ObjectTypeName::Linkage::external
) {
616 const auto process
= [&](GlobalOff dependent_offset
) {
617 auto& dep_state
= const_cast<StateBlock
&>(
618 stateForOffset(dependent_offset
)
620 auto const it
= dep_state
.obj_offsets
.find(dependent_offset
);
621 if (it
== dep_state
.obj_offsets
.end()) return;
622 auto& dependent_obj
= dep_state
.all_objs
[it
->second
];
623 if (dependent_obj
.name
.linkage
< changed_obj
.name
.linkage
) {
625 "Reducing linkage for {}({}) from {} to {} due to {}({})\n",
626 dependent_obj
.name
.name
,
627 GlobalOff::fromRaw(dependent_obj
.key
.object_id
),
628 show(dependent_obj
.name
.linkage
),
629 show(changed_obj
.name
.linkage
),
630 changed_obj
.name
.name
,
631 GlobalOff::fromRaw(changed_obj
.key
.object_id
));
632 dependent_obj
.name
.linkage
= changed_obj
.name
.linkage
;
633 changed
.emplace(dependent_offset
);
636 for (auto template_offset
: template_uses
) process(template_offset
);
637 for (auto child_offset
: children
) process(child_offset
);
645 Object
TypeParserImpl::getObject(ObjectTypeKey key
) {
646 auto const& state
= stateForOffset(GlobalOff::fromRaw(key
.object_id
));
647 auto iter
= state
.obj_offsets
.find(GlobalOff::fromRaw(key
.object_id
));
648 // If we don't know of an object type at the given location, assume its
649 // referring to something we never parsed in the first place, so return the
651 if (iter
== state
.obj_offsets
.end()) {
654 Scope::s_pseudo_type_name
,
655 ObjectTypeName::Linkage::pseudo
,
659 Object::Kind::k_other
,
664 return m_dwarf
.onDIEAtOffset(
665 GlobalOff::fromRaw(key
.object_id
),
669 state
.all_objs
[iter
->second
].name
,
676 // For static members, determine how that member's address can be
677 // determined. In theory, this can be any arbitrary expression, but we only
678 // support constant addresses right now.
679 HPHP::Optional
<uintptr_t>
680 TypeParserImpl::interpretLocAddress(const DwarfState
& dwarf
,
681 Dwarf_Attribute attr
) {
682 auto form
= dwarf
.getAttributeForm(attr
);
683 if (form
!= DW_FORM_exprloc
) return std::nullopt
;
684 auto exprs
= dwarf
.getAttributeValueExprLoc(attr
);
685 if (exprs
.size() != 1) return std::nullopt
;
686 if (exprs
[0].lr_atom
!= DW_OP_addr
) return std::nullopt
;
687 return HPHP::Optional
<uintptr_t>{exprs
[0].lr_number
};
690 HPHP::Optional
<GlobalOff
>
691 TypeParserImpl::parseSpecification(const DwarfState
& dwarf
,
695 HPHP::Optional
<GlobalOff
> offset
;
696 bool is_inline
= false;
697 dwarf
.forEachAttribute(
699 [&](Dwarf_Attribute attr
) {
700 switch (dwarf
.getAttributeType(attr
)) {
701 case DW_AT_abstract_origin
:
702 offset
= dwarf
.onDIEAtOffset(
703 dwarf
.getAttributeValueRef(attr
),
704 [&](Dwarf_Die die2
) {
705 return parseSpecification(dwarf
, die2
, false, spec
);
709 case DW_AT_specification
:
710 offset
= dwarf
.getAttributeValueRef(attr
);
712 case DW_AT_linkage_name
:
713 if (spec
.linkage_name
.empty()) {
714 spec
.linkage_name
= dwarf
.getAttributeValueString(attr
);
718 if (spec
.address
== StaticSpec::kNoAddress
) {
719 if (auto const address
= interpretLocAddress(dwarf
, attr
)) {
720 spec
.address
= *address
;
725 if (spec
.address
== StaticSpec::kNoAddress
) {
726 spec
.address
= dwarf
.getAttributeValueAddr(attr
);
727 // Sometimes GCC and Clang will emit invalid function
728 // addresses. Usually zero, but sometimes a very low
729 // number. These numbers have the appearance of being
730 // un-relocated addresses, but its in the final executable. As
731 // a safety net, if an address is provided, but its abnormally
733 if (spec
.address
< 4096) spec
.address
= StaticSpec::kNoAddress
;
736 case DW_AT_object_pointer
:
737 // Just in case we actually have a definition, use it to infer
739 spec
.is_member
= true;
747 if (first
&& (is_inline
||
748 (spec
.linkage_name
.empty() &&
749 spec
.address
== StaticSpec::kNoAddress
&&
757 * Given a DIE, and the current scope, recursively generate the names/linkages
758 * for all the object types in this DIE and children. If template_params is
759 * provided, the parent DIE is an object type, so template_params should be
760 * filled with any template parameters in the child DIE.
762 void TypeParserImpl::genNames(Env
& env
,
765 std::vector
<GlobalOff
>* template_params
) {
766 auto& dwarf
= *env
.dwarf
;
767 auto& state
= *env
.state
;
769 const auto recurse
= [&](std::vector
<GlobalOff
>* params
= nullptr){
772 [&](Dwarf_Die child
) {
773 genNames(env
, child
, scope
, params
);
779 auto tag
= dwarf
.getTag(die
);
781 case DW_TAG_base_type
:
782 case DW_TAG_union_type
:
783 case DW_TAG_enumeration_type
:
784 case DW_TAG_structure_type
:
785 case DW_TAG_class_type
:
786 case DW_TAG_unspecified_type
: {
787 // Object-types. These have names and linkages, so we must record them.
789 // If this is a type-unit definition with a separate declaration
790 // in the same tu, declarationOffset will point to the
792 HPHP::Optional
<GlobalOff
> declarationOffset
;
794 // If this is a declaration in a cu, referring back to a
795 // tu-definition, definitionOffset will point to that
796 // definition. Such declarations are emitted for the
797 // *definitions* of static members (which always happen in cus,
799 HPHP::Optional
<GlobalOff
> definitionOffset
;
801 // Determine the base name, whether this type was unnamed, and whether
802 // this is an incomplete type or not from the DIE's attributes.
803 auto get_info
= [&](Dwarf_Die cur
,
804 bool updateOffsets
) ->
805 std::tuple
<std::string
, bool, bool> {
807 std::string linkage_name
;
808 auto incomplete
= false;
810 dwarf
.forEachAttribute(
812 [&](Dwarf_Attribute attr
) {
813 switch (dwarf
.getAttributeType(attr
)) {
815 name
= dwarf
.getAttributeValueString(attr
);
817 case DW_AT_linkage_name
:
818 linkage_name
= dwarf
.getAttributeValueString(attr
);
820 case DW_AT_declaration
:
821 incomplete
= dwarf
.getAttributeValueFlag(attr
);
823 case DW_AT_specification
:
824 // The compiler can spit out a declaration for a
825 // struct, followed later by the full definition. The
826 // full definition has a DW_AT_specification pointing
827 // back to the declaration - but note that the full
828 // definition may not be defined in the correct
829 // namespace - so we're going to keep the declaration,
830 // and update it based on the definition ignoring the
831 // definition's name (this feels a little backwards,
832 // but its how dwarf works).
834 declarationOffset
= dwarf
.getAttributeValueRef(attr
);
837 case DW_AT_signature
:
839 dwarf
.getAttributeForm(attr
) == DW_FORM_ref_sig8
) {
840 // The actual definition is in another type-unit, we
841 // can ignore this declaration.
842 definitionOffset
= dwarf
.getAttributeValueRef(attr
);
852 // If there's an explicit name, just use that.
853 if (!name
.empty()) return std::make_tuple(name
, false, incomplete
);
855 // Otherwise, if there's a linkage name, demangle it, and strip off
856 // everything except the last section, and use that as the base
857 // name. For types which have external linkage, this lets us use
858 // whatever naming scheme the compiler has chosen for unnamed types.
859 if (!linkage_name
.empty()) {
860 auto demangled
= folly::demangle(linkage_name
.c_str()).toStdString();
861 auto index
= demangled
.rfind("::");
862 if (index
!= decltype(demangled
)::npos
) demangled
.erase(0, index
+2);
863 return std::make_tuple(demangled
, false, incomplete
);
866 // No explicit name and no linkage name to use, so we have to try to
867 // infer one ourself (making it a synthetic name).
869 // Try the first named member
870 auto const first_member
= [&](const char* type
,
872 std::string first_member
;
875 [&](Dwarf_Die child
) {
876 if (dwarf
.getTag(child
) == member_type
) {
877 first_member
= dwarf
.getDIEName(child
);
879 return first_member
.empty();
882 if (!first_member
.empty()) {
883 return folly::sformat(
884 "(unnamed {} containing '{}')", type
, first_member
887 return std::string
{};
890 auto const type_name
= [&]{
891 if (tag
== DW_TAG_enumeration_type
) return "enumeration";
892 if (tag
== DW_TAG_union_type
) return "union";
893 if (tag
== DW_TAG_structure_type
) return "struct";
894 if (tag
== DW_TAG_class_type
) return "class";
898 auto const member_type
= [&]() {
899 if (tag
== DW_TAG_enumeration_type
) return DW_TAG_enumerator
;
900 return DW_TAG_member
;
903 auto first_member_name
= first_member(type_name(), member_type());
904 if (!first_member_name
.empty()) {
905 return std::make_tuple(
906 std::move(first_member_name
), true, incomplete
910 // If this is within a namespace, don't infer any name at all, keep it
911 // nameless. If its not within a namespace (IE, within a class), give it
912 // a unique name based on how many unnamed types we've seen so far. We
913 // can't do this for types within a namespace because namespaces are
914 // open and thus we can't force a global numbering of all types within
916 if (!scope
.isInNamespaceScope()) {
917 scope
.incUnnamedTypeCount();
918 return std::make_tuple(
922 scope
.unnamedTypeCount()
929 return std::make_tuple(
930 folly::sformat("(unnamed {})", type_name()),
935 const auto info
= get_info(die
, /*updateOffsets=*/true);
937 auto offset
= dwarf
.getDIEOffset(die
);
938 if (definitionOffset
) {
939 // This is a declaration which refers to the definition via
940 // DW_AT_signature. We'll see one of these for a class in the
941 // cu where its static members are defined. Later
942 // DW_TAG_variable nodes will refer back to the ones here,
943 // rather than the ones in the definition, so we need to
944 // record a map from any members defined here back to the
945 // original definition. We could also see them for parent
946 // classes, or for template param (a template param can refer
947 // to an out-of-unit type either by using a ref_sig8 directly,
948 // in which case we will have resolved the offset correctly,
949 // or it could have an offset to a type with a
950 // DW_AT_signature, in which case we'll need to fix it up
951 // later). In any case, add an entry to map our offset to the
952 // true definition, and entries to map any members to their
954 env
.local_mappings
.emplace(offset
, *definitionOffset
);
956 folly::F14FastMap
<std::string
, GlobalOff
> map
;
959 [&] (Dwarf_Die child
) {
960 if (dwarf
.getTag(child
) == DW_TAG_member
) {
961 map
.emplace(dwarf
.getDIEName(child
), dwarf
.getDIEOffset(child
));
969 [&] (Dwarf_Die orig
) {
972 [&] (Dwarf_Die child
) {
973 auto it
= map
.find(dwarf
.getDIEName(child
));
974 if (it
!= map
.end()) {
975 env
.local_mappings
.emplace(it
->second
,
976 dwarf
.getDIEOffset(child
));
986 auto parent_offset
= scope
.typeOffset();
988 // If we inferred a base name, use that to form the fully qualified name,
989 // otherwise treat it as an unnamed type.
990 if (!definitionOffset
) {
992 scope
.pushUnnamedType(std::get
<0>(info
), offset
) :
993 scope
.pushType(std::get
<0>(info
), offset
);
995 // Push the name of the definition, not of the declaration
998 [&] (Dwarf_Die def
) {
999 const auto info_def
= get_info(def
, /*updateOffsets=*/false);
1000 std::get
<1>(info_def
) ?
1001 scope
.pushUnnamedType(std::get
<0>(info_def
), offset
) :
1002 scope
.pushType(std::get
<0>(info_def
), offset
);
1005 SCOPE_EXIT
{ scope
.pop(); };
1007 if (declarationOffset
) {
1008 // This completes a previous declaration. search backwards for
1009 // it, which should be fine because its normally right after
1010 // the declaration (and its always in the same cu/tu).
1011 auto i
= state
.all_objs
.size();
1014 auto& obj
= state
.all_objs
[--i
];
1015 if (obj
.key
.object_id
== declarationOffset
->raw()) {
1016 assert(obj
.incomplete
);
1018 "Completing previous definition of {}.\n"
1019 " Was {}, Now {}, Linkage: {}\n",
1021 GlobalOff::fromRaw(obj
.key
.object_id
), offset
,
1022 show(obj
.name
.linkage
)
1024 obj
.incomplete
= false;
1025 obj
.key
.object_id
= offset
.raw();
1026 // map declarationOffset to offset, because any ref_sig8s
1027 // will point to the definition, not the declaration.
1028 env
.local_mappings
.emplace(*declarationOffset
, offset
);
1030 // Fixup the name in the scope stack
1031 scope
.fixName(obj
.name
);
1032 assertx(scope
.name().name
== obj
.name
.name
);
1037 // Record this object type, with fully qualified name, key, and linkage.
1038 auto obj
= ObjectType
{
1040 ObjectTypeKey
{offset
.raw(), scope
.cuOffset().raw()},
1044 "{} {} at {} Linkage: {}\n",
1045 obj
.incomplete
? "Declaring" : "Defining",
1048 show(obj
.name
.linkage
)
1050 state
.all_objs
.emplace_back(std::move(obj
));
1053 // This object type is done, so recurse into any nested classes. Provide a
1054 // list of template parameters to be filled in case this is a template. If
1055 // it is, we'll record the linkage dependence for the later template
1057 std::vector
<GlobalOff
> recurse_template_params
;
1058 recurse(&recurse_template_params
);
1060 for (auto param_offset
: recurse_template_params
) {
1061 FTRACE(9, "linkage: {} depends on template param {}\n",
1062 offset
, param_offset
);
1063 env
.linkage_dependents
[param_offset
].template_uses
.emplace(offset
);
1065 if (parent_offset
) {
1066 FTRACE(9, "linkage: {} depends on child {}\n",
1067 *parent_offset
, offset
);
1068 env
.linkage_dependents
[*parent_offset
].children
.emplace(offset
);
1072 case DW_TAG_namespace
: {
1073 // Record the namespace in the scope and recurse. If this is an unnamed
1074 // namespace, that means any type found in child DIEs will have internal
1076 auto name
= dwarf
.getDIEName(die
);
1078 scope
.pushUnnamedNamespace() :
1079 scope
.pushNamespace(std::move(name
));
1080 SCOPE_EXIT
{ scope
.pop(); };
1084 case DW_TAG_variable
: {
1085 // Normally we don't care about variables since we're only looking for
1086 // types. However, certain aspects of object types can't be completely
1087 // inferred at the declaration site (mainly static variable linkage
1088 // related things like linkage name and address). We need a definition for
1089 // that, so record all the variable definitions along with their
1090 // specification, which we can consult later.
1092 // Neither GCC nor Clang record a name for a variable which is a static
1093 // definition, so ignore any that do have a name. This speeds things up.
1094 if (!dwarf
.getDIEName(die
).empty()) break;
1097 if (auto off
= parseSpecification(dwarf
, die
, true, spec
)) {
1098 env
.raw_static_definitions
.emplace_back(*off
, spec
);
1100 // Note that we don't recurse into any child DIEs here. There shouldn't be
1101 // anything interesting in them.
1104 case DW_TAG_subprogram
: {
1105 // For the same reason we care about DW_TAG_variables, we examine
1106 // DW_TAG_subprogram as well. Certain interesting aspects of a static
1107 // function are only present in its definition.
1109 if (!dwarf
.getDIEName(die
).empty()) break;
1112 if (auto off
= parseSpecification(dwarf
, die
, true, spec
)) {
1113 env
.raw_static_definitions
.emplace_back(*off
, spec
);
1116 // Don't recurse. There might be valid types within a subprogram
1117 // definition, but we deliberately ignore those. A large portion of the
1118 // debug information lies within subprogram definitions, and scanning all
1119 // of that consumes a large amount of time. Moreover, these types usually
1120 // aren't very interesting, so we deliberately ignore them for
1121 // efficiency. If there's actually any reference to these types, they'll
1122 // be reported as the pseudo-type.
1125 case DW_TAG_template_type_param
: {
1126 // Template type parameters are represented using child DIEs, not
1127 // attributes. If the parent DIE was an object type, fill the supplied
1128 // vector with the template parameters. Don't recurse because there
1129 // shouldn't be anything interesting in the children.
1130 if (template_params
) {
1131 dwarf
.forEachAttribute(
1133 [&](Dwarf_Attribute attr
) {
1134 switch (dwarf
.getAttributeType(attr
)) {
1136 auto offset
= dwarf
.getAttributeValueRef(attr
);
1137 // Check this type to see if it is a declaration and use the
1138 // real type instead
1139 dwarf
.onDIEAtOffset(
1141 [&] (Dwarf_Die type_die
) {
1142 dwarf
.forEachAttribute(
1144 [&](Dwarf_Attribute attr
) {
1145 if (dwarf
.getAttributeType(attr
) == DW_AT_signature
&&
1146 dwarf
.getAttributeForm(attr
) == DW_FORM_ref_sig8
) {
1147 offset
= dwarf
.getAttributeValueRef(attr
);
1154 template_params
->emplace_back(offset
);
1172 * Given the DIE representing an object type, its name, and its key, return the
1173 * detailed specification of the object.
1175 Object
TypeParserImpl::genObject(Dwarf_Die die
,
1176 ObjectTypeName name
,
1177 ObjectTypeKey key
) {
1178 const auto kind
= [&]{
1179 switch (m_dwarf
.getTag(die
)) {
1180 case DW_TAG_structure_type
: return Object::Kind::k_class
;
1181 case DW_TAG_class_type
: return Object::Kind::k_class
;
1182 case DW_TAG_union_type
: return Object::Kind::k_union
;
1183 case DW_TAG_base_type
: return Object::Kind::k_primitive
;
1184 case DW_TAG_enumeration_type
: return Object::Kind::k_enum
;
1185 // Strange things like "decltype(nullptr_t)"
1186 case DW_TAG_unspecified_type
: return Object::Kind::k_other
;
1187 // Shouldn't happen because we only call genObject() on offsets already
1188 // visited and verified to be an object type.
1189 default: always_assert(0);
1193 HPHP::Optional
<std::size_t> size
;
1194 bool incomplete
= false;
1195 HPHP::Optional
<GlobalOff
> definition_offset
;
1197 m_dwarf
.forEachAttribute(
1199 [&](Dwarf_Attribute attr
) {
1200 switch (m_dwarf
.getAttributeType(attr
)) {
1201 case DW_AT_byte_size
:
1202 size
= m_dwarf
.getAttributeValueUData(attr
);
1204 case DW_AT_declaration
:
1205 incomplete
= m_dwarf
.getAttributeValueFlag(attr
);
1207 case DW_AT_signature
:
1208 definition_offset
= m_dwarf
.getAttributeValueRef(attr
);
1217 if (definition_offset
) {
1218 return m_dwarf
.onDIEAtOffset(
1220 [&](Dwarf_Die die2
) { return genObject(die2
, name
, key
); }
1224 // No size was provided. This is expected for incomplete types or the strange
1225 // "other" types sometimes seen, but an error otherwise.
1227 if (incomplete
|| kind
== Object::Kind::k_other
) {
1232 "Object type '{}' at offset {} is a complete definition, "
1241 Object obj
{std::move(name
), *size
, key
, kind
, incomplete
};
1243 m_dwarf
.forEachChild(
1245 [&](Dwarf_Die child
) {
1246 switch (m_dwarf
.getTag(child
)) {
1247 case DW_TAG_inheritance
:
1248 obj
.bases
.emplace_back(genBase(child
, obj
.name
));
1251 obj
.members
.emplace_back(genMember(child
, obj
.name
));
1252 if (obj
.name
.linkage
!= ObjectTypeName::Linkage::external
) {
1253 // Clang gives linkage names to things that don't actually have
1254 // linkage. Don't let any members have linkage names if the object
1255 // type doesn't have external linkage.
1256 obj
.members
.back().linkage_name
.clear();
1259 case DW_TAG_template_type_parameter
:
1260 obj
.template_params
.emplace_back(genTemplateParam(child
));
1262 case DW_TAG_GNU_template_parameter_pack
:
1263 // Flatten parameter packs as if they were just a normally provided
1264 // parameter list. This is enough for our purposes.
1265 m_dwarf
.forEachChild(
1267 [&](Dwarf_Die template_die
) {
1268 if (m_dwarf
.getTag(template_die
) ==
1269 DW_TAG_template_type_parameter
) {
1270 obj
.template_params
.emplace_back(
1271 genTemplateParam(template_die
)
1278 case DW_TAG_subprogram
:
1279 obj
.functions
.emplace_back(genFunction(child
));
1280 if (obj
.name
.linkage
!= ObjectTypeName::Linkage::external
) {
1281 // Clang gives linkage names to things that don't actually have
1282 // linkage. Don't let any functions have linkage names if the object
1283 // type doesn't have external linkage.
1284 obj
.functions
.back().linkage_name
.clear();
1294 // The base classes and members aren't always reported in DWARF in offset
1295 // order, but make the output deterministic here to simplify consumers of the
1300 [&](const Object::Base
& b1
, const Object::Base
& b2
) {
1301 return std::tie(b1
.offset
, b1
.type
.name
.name
) <
1302 std::tie(b2
.offset
, b2
.type
.name
.name
);
1307 obj
.members
.begin(),
1309 [&](const Object::Member
& m1
, const Object::Member
& m2
) {
1310 return std::tie(m1
.offset
, m1
.name
) <
1311 std::tie(m2
.offset
, m2
.name
);
1319 * Given a DIE representing an arbitrary type, return its equivalent Type. This
1320 * can involve chasing a chain of such type DIEs.
1322 Type
TypeParserImpl::genType(Dwarf_Die die
) {
1323 // Offset of a different type this type refers to. If not present, that type
1324 // is implicitly "void".
1325 HPHP::Optional
<GlobalOff
> type_offset
;
1326 // For pointers to members, the type referring to the object the member
1328 HPHP::Optional
<GlobalOff
> containing_type_offset
;
1330 // A struct can have a declaration which refers to the definition
1331 // via a DW_AT_signature.
1332 HPHP::Optional
<GlobalOff
> definition_offset
;
1334 m_dwarf
.forEachAttribute(
1336 [&](Dwarf_Attribute attr
) {
1337 switch (m_dwarf
.getAttributeType(attr
)) {
1339 type_offset
= m_dwarf
.getAttributeValueRef(attr
);
1341 case DW_AT_containing_type
:
1342 containing_type_offset
= m_dwarf
.getAttributeValueRef(attr
);
1344 case DW_AT_signature
:
1345 definition_offset
= m_dwarf
.getAttributeValueRef(attr
);
1354 const auto recurse
= [&](GlobalOff offset
) {
1355 return m_dwarf
.onDIEAtOffset(
1357 [&](Dwarf_Die die2
) { return genType(die2
); }
1361 // Pointers to member functions aren't represented in DWARF. Instead the
1362 // compiler creates a struct internally which stores all the information.
1364 switch (m_dwarf
.getTag(die
)) {
1365 case DW_TAG_base_type
:
1366 case DW_TAG_structure_type
:
1367 case DW_TAG_class_type
:
1368 case DW_TAG_union_type
:
1369 case DW_TAG_enumeration_type
:
1370 case DW_TAG_unspecified_type
: {
1371 if (definition_offset
) return recurse(*definition_offset
);
1372 auto offset
= m_dwarf
.getDIEOffset(die
);
1373 auto const& state
= stateForOffset(offset
);
1374 auto iter
= state
.obj_offsets
.find(offset
);
1375 if (iter
== state
.obj_offsets
.end()) {
1376 // Must be the pseudo-type.
1379 Scope::s_pseudo_type_name
,
1380 ObjectTypeName::Linkage::pseudo
1382 ObjectTypeKey
{offset
.raw(), 0},
1386 return state
.all_objs
[iter
->second
];
1389 case DW_TAG_pointer_type
:
1390 return PtrType
{type_offset
? recurse(*type_offset
) : VoidType
{}};
1391 case DW_TAG_reference_type
: {
1395 "Encountered reference to void at offset {}",
1396 m_dwarf
.getDIEOffset(die
)
1400 return RefType
{recurse(*type_offset
)};
1402 case DW_TAG_rvalue_reference_type
: {
1406 "Encountered rvalue reference to void at offset {}",
1407 m_dwarf
.getDIEOffset(die
)
1411 return RValueRefType
{recurse(*type_offset
)};
1413 case DW_TAG_array_type
: {
1417 "Encountered array of voids at offset {}",
1418 m_dwarf
.getDIEOffset(die
)
1422 return ArrType
{recurse(*type_offset
), determineArrayBound(die
)};
1424 case DW_TAG_const_type
:
1425 return ConstType
{type_offset
? recurse(*type_offset
) : VoidType
{}};
1426 case DW_TAG_volatile_type
:
1427 return VolatileType
{type_offset
? recurse(*type_offset
) : VoidType
{}};
1428 case DW_TAG_restrict_type
:
1429 return RestrictType
{type_offset
? recurse(*type_offset
) : VoidType
{}};
1430 case DW_TAG_typedef
:
1431 return type_offset
? recurse(*type_offset
) : VoidType
{};
1432 case DW_TAG_subroutine_type
: {
1433 FuncType func
{type_offset
? recurse(*type_offset
) : VoidType
{}};
1434 fillFuncArgs(die
, func
);
1435 return std::move(func
);
1437 case DW_TAG_ptr_to_member_type
: {
1438 if (!containing_type_offset
) {
1441 "Encountered ptr-to-member at offset {} without a "
1442 "containing object",
1443 m_dwarf
.getDIEOffset(die
)
1448 auto containing
= recurse(*containing_type_offset
);
1449 if (auto obj
= containing
.asObject()) {
1451 MemberType
{std::move(*obj
), recurse(*type_offset
)}
1456 "Encountered ptr-to-member at offset {} with a "
1457 "containing object of type '{}'",
1458 m_dwarf
.getDIEOffset(die
),
1459 containing
.toString()
1467 "Encountered non-type tag '{}' at offset {} while "
1468 "traversing type description",
1469 m_dwarf
.tagToString(m_dwarf
.getTag(die
)),
1470 m_dwarf
.getDIEOffset(die
)
1476 Object::Member
TypeParserImpl::genMember(Dwarf_Die die
,
1477 const ObjectTypeName
& parent_name
) {
1479 std::string linkage_name
;
1480 std::size_t offset
= 0;
1481 HPHP::Optional
<GlobalOff
> die_offset
;
1482 HPHP::Optional
<uintptr_t> address
;
1483 bool is_static
= false;
1485 m_dwarf
.forEachAttribute(
1487 [&](Dwarf_Attribute attr
) {
1488 switch (m_dwarf
.getAttributeType(attr
)) {
1490 name
= m_dwarf
.getAttributeValueString(attr
);
1492 case DW_AT_linkage_name
:
1493 linkage_name
= m_dwarf
.getAttributeValueString(attr
);
1495 case DW_AT_location
:
1496 address
= interpretLocAddress(m_dwarf
, attr
);
1498 case DW_AT_data_member_location
:
1499 offset
= m_dwarf
.getAttributeValueUData(attr
);
1502 die_offset
= m_dwarf
.getAttributeValueRef(attr
);
1504 case DW_AT_declaration
:
1505 is_static
= m_dwarf
.getAttributeValueFlag(attr
);
1515 // No DW_AT_type means "void", but you can't have void members!
1518 "Encountered member (name: '{}') of type void "
1519 "in object type '{}' at offset {}",
1522 m_dwarf
.getDIEOffset(die
)
1528 // If this is a static member, look up any definitions which refer to this
1529 // member, and pull any additional information out of it.
1530 auto const static_offset
= m_dwarf
.getDIEOffset(die
);
1531 auto const& state
= stateForOffset(static_offset
);
1532 auto const range
= state
.static_definitions
.equal_range(static_offset
);
1534 for (auto const& elm
: range
) {
1535 if (linkage_name
.empty() && !elm
.second
.linkage_name
.empty()) {
1536 linkage_name
= elm
.second
.linkage_name
;
1538 if (!address
&& elm
.second
.address
!= StaticSpec::kNoAddress
) {
1539 address
= elm
.second
.address
;
1544 auto type
= m_dwarf
.onDIEAtOffset(
1546 [&](Dwarf_Die die2
){ return genType(die2
); }
1551 ? folly::sformat("(unnamed static member of type '{}')", type
.toString())
1552 : folly::sformat("(unnamed member of type '{}')", type
.toString());
1555 return Object::Member
{
1557 is_static
? std::nullopt
: HPHP::Optional
<std::size_t>{offset
},
1564 Object::Function
TypeParserImpl::genFunction(Dwarf_Die die
) {
1566 Type ret_type
{VoidType
{}};
1567 std::string linkage_name
;
1568 bool is_virtual
= false;
1569 bool is_member
= false;
1571 m_dwarf
.forEachAttribute(
1573 [&](Dwarf_Attribute attr
) {
1574 switch (m_dwarf
.getAttributeType(attr
)) {
1576 name
= m_dwarf
.getAttributeValueString(attr
);
1579 ret_type
= m_dwarf
.onDIEAtOffset(
1580 m_dwarf
.getAttributeValueRef(attr
),
1581 [&](Dwarf_Die ty_die
) { return genType(ty_die
); }
1584 case DW_AT_linkage_name
:
1585 linkage_name
= m_dwarf
.getAttributeValueString(attr
);
1587 case DW_AT_virtuality
:
1589 (m_dwarf
.getAttributeValueUData(attr
) != DW_VIRTUALITY_none
);
1591 case DW_AT_object_pointer
:
1602 * We need to determine if this function is a static function or a member
1603 * function. The straight-forward way is to look for the DW_AT_object_pointer
1604 * attribute (which is only present for member functions). This works fine for
1605 * GCC, but not Clang.
1607 * On Clang, the DW_AT_object_pointer is only present in a function's
1608 * definition, not its declaration. Moreover, it doesn't reliably emit
1609 * function declarations if it thinks the function isn't used. As a result, we
1610 * can't reliably distinguish member functions from static functions on clang.
1612 * As an alternative, if the first formal parameter of a function is marked as
1613 * being "artificial" (which means its not present in the actual source),
1614 * assume its actually the this pointer, and that the function is a member
1617 std::vector
<Type
> arg_types
;
1618 m_dwarf
.forEachChild(
1620 [&](Dwarf_Die child
) {
1621 if (m_dwarf
.getTag(child
) != DW_TAG_formal_parameter
) {
1625 bool is_artificial
= false;
1626 Type arg_type
{VoidType()};
1628 m_dwarf
.forEachAttribute(
1630 [&](Dwarf_Attribute attr
) {
1631 switch (m_dwarf
.getAttributeType(attr
)) {
1633 arg_type
= m_dwarf
.onDIEAtOffset(
1634 m_dwarf
.getAttributeValueRef(attr
),
1635 [&](Dwarf_Die ty_die
) { return genType(ty_die
); }
1638 case DW_AT_artificial
:
1639 is_artificial
= m_dwarf
.getAttributeValueFlag(attr
);
1648 // Only consider this a member function if this arg if the first and its
1650 if (is_artificial
&& arg_types
.empty()) {
1653 arg_types
.emplace_back(std::move(arg_type
));
1659 HPHP::Optional
<std::uintptr_t> address
;
1661 // Similar to static variables, find any definitions which refer to this
1662 // function in order to extract linkage information.
1663 auto const offset
= m_dwarf
.getDIEOffset(die
);
1664 auto const& state
= stateForOffset(offset
);
1665 auto range
= state
.static_definitions
.equal_range(offset
);
1666 for (auto const& elm
: range
) {
1667 if (linkage_name
.empty() && !elm
.second
.linkage_name
.empty()) {
1668 linkage_name
= elm
.second
.linkage_name
;
1670 if (!address
&& elm
.second
.address
!= StaticSpec::kNoAddress
) {
1671 address
= elm
.second
.address
;
1673 if (elm
.second
.is_member
) is_member
= true;
1676 return Object::Function
{
1678 std::move(ret_type
),
1679 std::move(arg_types
),
1681 Object::Function::Kind::k_virtual
:
1682 (is_member
? Object::Function::Kind::k_member
:
1683 Object::Function::Kind::k_static
),
1689 Object::Base
TypeParserImpl::genBase(Dwarf_Die die
,
1690 const ObjectTypeName
& parent_name
) {
1692 HPHP::Optional
<std::size_t> offset
;
1693 HPHP::Optional
<GlobalOff
> die_offset
;
1694 bool is_virtual
= false;
1696 m_dwarf
.forEachAttribute(
1698 [&](Dwarf_Attribute attr
) {
1699 switch (m_dwarf
.getAttributeType(attr
)) {
1701 name
= m_dwarf
.getAttributeValueString(attr
);
1704 die_offset
= m_dwarf
.getAttributeValueRef(attr
);
1706 case DW_AT_virtuality
:
1708 (m_dwarf
.getAttributeValueUData(attr
) != DW_VIRTUALITY_none
);
1720 m_dwarf
.forEachAttribute(
1722 [&](Dwarf_Attribute attr
) {
1723 switch (m_dwarf
.getAttributeType(attr
)) {
1724 case DW_AT_data_member_location
:
1725 offset
= m_dwarf
.getAttributeValueUData(attr
);
1738 "Encountered base '{}' of object type '{}' without "
1739 "type information at offset {}",
1742 m_dwarf
.getDIEOffset(die
)
1748 m_dwarf
.onDIEAtOffset(
1750 [&](Dwarf_Die die2
) { return genType(die2
); }
1753 if (auto obj
= type
.asObject()) {
1754 // Base class better be an actual class!
1755 return Object::Base
{*obj
, offset
};
1759 "Encountered base '{}' of object type '{}' of "
1760 "non-object type '{}' at offset {}",
1764 m_dwarf
.getDIEOffset(die
)
1770 Object::TemplateParam
TypeParserImpl::genTemplateParam(Dwarf_Die die
) {
1771 HPHP::Optional
<GlobalOff
> die_offset
;
1773 m_dwarf
.forEachAttribute(
1775 [&](Dwarf_Attribute attr
) {
1776 switch (m_dwarf
.getAttributeType(attr
)) {
1778 die_offset
= m_dwarf
.getAttributeValueRef(attr
);
1787 return Object::TemplateParam
{
1789 m_dwarf
.onDIEAtOffset(
1791 [&](Dwarf_Die die2
){ return genType(die2
); }
1797 HPHP::Optional
<std::size_t>
1798 TypeParserImpl::determineArrayBound(Dwarf_Die die
) {
1799 HPHP::Optional
<std::size_t> bound
;
1801 m_dwarf
.forEachChild(
1803 [&](Dwarf_Die child
) {
1804 switch (m_dwarf
.getTag(child
)) {
1805 case DW_TAG_subrange_type
:
1806 m_dwarf
.forEachAttribute(
1808 [&](Dwarf_Attribute attr
) {
1809 switch (m_dwarf
.getAttributeType(attr
)) {
1811 bound
= m_dwarf
.getAttributeValueUData(attr
);
1813 case DW_AT_upper_bound
:
1814 bound
= m_dwarf
.getAttributeValueUData(attr
)+1;
1830 if (bound
&& !*bound
) bound
.reset();
1834 void TypeParserImpl::fillFuncArgs(Dwarf_Die die
, FuncType
& func
) {
1835 m_dwarf
.forEachChild(
1837 [&](Dwarf_Die child
) {
1838 switch (m_dwarf
.getTag(child
)) {
1839 case DW_TAG_formal_parameter
: {
1840 HPHP::Optional
<GlobalOff
> type_offset
;
1842 m_dwarf
.forEachAttribute(
1844 [&](Dwarf_Attribute attr
) {
1845 switch (m_dwarf
.getAttributeType(attr
)) {
1847 type_offset
= m_dwarf
.getAttributeValueRef(attr
);
1859 "Encountered function at offset {} taking a void parameter",
1860 m_dwarf
.getDIEOffset(die
)
1865 func
.args
.push_back(
1866 m_dwarf
.onDIEAtOffset(
1868 [&](Dwarf_Die die
) { return genType(die
); }
1882 * Print out the given DIE (including children) in textual format to the given
1883 * ostream. Only actually print out DIEs which begin in the range between the
1884 * begin and end parameters.
1887 void printDIE(std::ostream
& os
,
1888 const DwarfState
& dwarf
,
1890 std::pair
<uint64_t,GlobalOff
>* sig
,
1894 auto tag
= dwarf
.getTag(die
);
1895 auto tag_name
= dwarf
.tagToString(tag
);
1896 auto name
= dwarf
.getDIEName(die
);
1897 auto offset
= dwarf
.getDIEOffset(die
).offset();
1899 const auto recurse
= [&]{
1900 // Find the last child DIE which does not start with the begin/end
1901 // range. This DIE is the first one which contains some data within the
1902 // begin/end range, so that must be the first one to begin recursion at.
1903 HPHP::Optional
<uint64_t> first
;
1907 [&](Dwarf_Die child
) {
1908 const auto offset
= dwarf
.getDIEOffset(child
).offset();
1909 if (offset
<= begin
) {
1919 // Only actually recurse if this child DIE is the above computed first DIE,
1920 // or one following it, and begins before the end parameter.
1923 [&](Dwarf_Die child
) {
1924 const auto offset
= dwarf
.getDIEOffset(child
).offset();
1925 if ((!first
|| offset
>= *first
) && offset
< end
) {
1926 printDIE(os
, dwarf
, child
, nullptr, begin
, end
, indent
+1);
1928 return offset
< end
;
1933 if (offset
< begin
) {
1936 } else if (offset
>= end
) {
1940 auto const printSig
= [&] (uint64_t sig
) {
1941 return folly::sformat("ref_sig8:{:016x}", sig
);
1944 for (int i
= 0; i
< indent
; ++i
) {
1947 os
<< "#" << offset
<< ": " << tag_name
<< " (" << tag
<< ") \""
1949 if (sig
&& sig
->first
) {
1950 os
<< folly::sformat(" {{{} -> #{}}}", printSig(sig
->first
), sig
->second
);
1954 dwarf
.forEachAttribute(
1956 [&](Dwarf_Attribute attr
) {
1957 auto const type
= dwarf
.getAttributeType(attr
);
1958 auto const attr_name
= dwarf
.attributeTypeToString(type
);
1959 auto const form
= dwarf
.getAttributeForm(attr
);
1960 auto const attr_form
= dwarf
.attributeFormToString(form
);
1962 auto attr_value
= [&]() -> std::string
{
1963 if (type
== DW_AT_ranges
) {
1964 auto const ranges
= dwarf
.getRanges(attr
);
1966 for (auto range
: ranges
) {
1967 if (range
.dwr_addr1
== DwarfState::Dwarf_Ranges::kSelection
) {
1968 folly::format(&res
, "0x{:x} ", range
.dwr_addr2
);
1970 folly::format(&res
, "0x{:x}-0x{:x} ",
1971 range
.dwr_addr1
, range
.dwr_addr2
);
1976 switch (dwarf
.getAttributeForm(attr
)) {
1982 return folly::sformat("{}", dwarf
.getAttributeValueUData(attr
));
1985 return folly::sformat("{}", dwarf
.getAttributeValueSData(attr
));
1987 case DW_FORM_string
:
1989 return folly::sformat(
1991 dwarf
.getAttributeValueString(attr
)
1995 case DW_FORM_flag_present
:
1996 return dwarf
.getAttributeValueFlag(attr
) ? "true" : "false";
1999 return folly::sformat(
2001 dwarf
.getAttributeValueAddr(attr
)
2008 case DW_FORM_ref_udata
:
2009 case DW_FORM_ref_addr
:
2010 return folly::sformat("#{}", dwarf
.getAttributeValueRef(attr
));
2011 case DW_FORM_ref_sig8
: {
2012 return printSig(dwarf
.getAttributeValueSig8(attr
));
2015 case DW_FORM_exprloc
: {
2017 for (const auto& expr
: dwarf
.getAttributeValueExprLoc(attr
)) {
2018 if (expr
.lr_atom
== DW_OP_addr
) {
2019 output
+= folly::sformat(
2020 "<OP_addr: {:#x}>,",
2024 output
+= folly::sformat(
2026 dwarf
.opToString(expr
.lr_atom
),
2033 return folly::sformat("Location: [{}]", output
);
2036 case DW_FORM_block1
:
2037 case DW_FORM_block2
:
2038 case DW_FORM_block4
:
2039 case DW_FORM_block
: return "{BLOCK}";
2041 case DW_FORM_indirect
: return "{INDIRECT}";
2042 case DW_FORM_sec_offset
: return "{SECTION OFFSET}";
2043 default: return "{UNKNOWN}";
2047 for (int i
= 0; i
< indent
; ++i
) {
2050 os
<< folly::sformat(" **** {} ({}) ==> {} [{}:{}]\n",
2051 attr_name
, type
, attr_value
,
2060 struct PrinterImpl
: Printer
{
2061 explicit PrinterImpl(const std::string
& filename
): m_filename
{filename
} {}
2062 void operator()(std::ostream
& os
,
2064 std::size_t end
) const override
{
2065 DwarfState dwarf
{m_filename
};
2067 print_section(os
, dwarf
, false, begin
, end
);
2068 print_section(os
, dwarf
, true, begin
, end
);
2073 void print_section(std::ostream
& os
,
2074 const DwarfState
& dwarf
,
2077 std::size_t end
) const {
2078 // If a non-default begin parameter was specified, first iterate over all
2079 // the compilation units. Find the first compilation unit which at least
2080 // partially lies within the range given by the begin parameter. This is the
2081 // first compilation unit to begin printing from.
2082 HPHP::Optional
<uint64_t> last
;
2084 dwarf
.forEachTopLevelUnit(
2086 const auto offset
= dwarf
.getDIEOffset(cu
).offset();
2087 if (offset
<= begin
) last
= offset
;
2093 // Now iterate over all the compilation units again. Only actually print out
2094 // compilation units if they lie within the begin/end parameter range.
2095 dwarf
.forEachTopLevelUnit(
2096 [&] (Dwarf_Die cu
) {
2097 auto context
= cu
->context
;
2098 auto type_offset
= GlobalOff
{ context
->typeOffset
, context
->isInfo
};
2099 auto pair
= std::make_pair(context
->typeSignature
, type_offset
);
2100 const auto offset
= dwarf
.getDIEOffset(cu
).offset();
2101 if (offset
>= end
) return false;
2102 if ((!last
|| offset
>= *last
)) {
2108 // If this compilation unit entirely lies within the begin/end
2109 // range, specify a begin parameter of "0", which will stop
2110 // printDIE() from doing range checks (which is more efficient).
2111 (!last
|| (offset
> *last
)) ? 0 : begin
,
2120 std::string m_filename
;
2124 struct GDBIndexerImpl
: GDBIndexer
{
2125 explicit GDBIndexerImpl(const std::string
& filename
, int num_threads
)
2126 : m_filename
{filename
}
2127 , m_numThreads
{num_threads
}
2129 if (num_threads
< 1) {
2130 throw Exception
{folly::sformat("Invalid number of threads: {}",
2135 void operator()(const std::string
& output_file
) const override
{
2136 auto begin_time
= ::HPHP::Timer::GetCurrentTimeMicros();
2137 DwarfState dwarf
{m_filename
};
2138 log_time(begin_time
, "Parsing dwarf file");
2140 std::FILE* fd
= std::fopen(output_file
.c_str(), "wb");
2143 throw Exception
{folly::sformat("Cannot open file: {}", output_file
)};
2146 auto const gdb_index_version
= 8;
2147 std::vector
<uint32_t> header
{gdb_index_version
, 0, 0, 0, 0, 0};
2149 auto time_index_begin
= ::HPHP::Timer::GetCurrentTimeMicros();
2151 auto addresses_and_symbols
= collect_addresses_and_symbols(dwarf
);
2152 auto time
= log_time(time_index_begin
, "collect_addresses_and_symbols");
2153 auto const cu
= get_cu(dwarf
);
2154 time
= log_time(time
, "Get_cu");
2155 auto const tu
= get_tu(dwarf
);
2156 time
= log_time(time
, "Get_tu");
2157 auto const address
= get_address(addresses_and_symbols
.first
);
2158 time
= log_time(time
, "Get_address");
2159 auto const symbol_and_constants
=
2160 get_symbol_and_constants(addresses_and_symbols
.second
);
2161 log_time(time
, "Get_symbol_and_constants");
2163 time
= log_time(time_index_begin
, "Index generation");
2165 // The offset, from the start of the file, of the CU list.
2166 header
[1] = sizeof header
[0] * header
.size();
2167 // The offset, from the start of the file, of the types CU list.
2168 header
[2] = header
[1] + sizeof cu
[0] * cu
.size();
2169 // The offset, from the start of the file, of the address area.
2170 header
[3] = header
[2] + sizeof tu
[0] * tu
.size();
2171 // The offset, from the start of the file, of the symbol table.
2172 header
[4] = header
[3] + sizeof address
[0] * address
.size();
2173 // The offset, from the start of the file, of the constant pool.
2174 header
[5] = header
[4] +
2175 sizeof symbol_and_constants
.symbol_pool
.m_hashtable
[0] *
2176 symbol_and_constants
.symbol_pool
.m_hashtable
.size();
2178 print_section(fd
, header
);
2179 print_section(fd
, cu
);
2180 print_section(fd
, tu
);
2181 print_section(fd
, address
);
2182 print_section(fd
, symbol_and_constants
.symbol_pool
.m_hashtable
);
2183 print_section(fd
, symbol_and_constants
.cu_vector_offsets
);
2184 print_section(fd
, symbol_and_constants
.strings
);
2186 log_time(time
, "Print");
2188 log_time(begin_time
, "Full index creation");
2194 int32_t log_time(int32_t time
, const char* msg
) const {
2195 int32_t now
= ::HPHP::Timer::GetCurrentTimeMicros();
2196 std::cout
<< msg
<< " took " << (now
- time
) / 1000 << " ms" << std::endl
;
2200 void print_section(std::FILE* fd
,
2201 const std::vector
<std::string
>& data
) const {
2202 if (!data
.size()) return;
2204 for (auto s
: data
) {
2205 std::fwrite(s
.c_str(), sizeof(char), s
.length() + 1, fd
);
2209 template <typename T
>
2210 void print_section(std::FILE* fd
, const std::vector
<T
>& data
) const {
2211 if (!data
.size()) return;
2213 std::fwrite(data
.data(), sizeof data
[0], data
.size(), fd
);
2216 std::vector
<uint64_t> get_cu(const DwarfState
& dwarf
) const {
2217 std::vector
<uint64_t> result
= {};
2218 dwarf
.forEachCompilationUnit(
2220 result
.push_back(cu
->context
->offset
);
2221 result
.push_back(cu
->context
->size
);
2227 std::vector
<uint64_t> get_tu(const DwarfState
& dwarf
) const {
2228 std::vector
<uint64_t> result
= {};
2229 dwarf
.forEachTopLevelUnit(
2231 result
.push_back(cu
->context
->offset
);
2232 result
.push_back(cu
->context
->typeOffset
- cu
->context
->offset
);
2233 result
.push_back(cu
->context
->typeSignature
);
2239 struct AddressTableEntry
{
2243 uint32_t low_bottom
;
2250 uint32_t high_bottom
;
2257 static bool compareAddressTableEntry(AddressTableEntry a
,
2258 AddressTableEntry b
) {
2259 return a
.low
== b
.low
? a
.high
< b
.high
: a
.low
< b
.low
;
2262 void visit_die_for_address(const DwarfState
& dwarf
, const Dwarf_Die die
,
2263 std::vector
<AddressTableEntry
>& entries
,
2264 uint32_t cu_index
) const {
2265 HPHP::Optional
<uint64_t> low
, high
;
2266 std::vector
<DwarfState::Dwarf_Ranges
> ranges
;
2267 bool is_high_udata
= false;
2268 dwarf
.forEachAttribute(
2270 [&](Dwarf_Attribute attr
) {
2271 switch (dwarf
.getAttributeType(attr
)) {
2273 ranges
= dwarf
.getRanges(attr
);
2276 // Some times GCC/Clang emits very low numbers for addresses in
2277 // the form of UData. Let's drop them.
2278 if (attr
->form
== DW_FORM_addr
) {
2279 low
= dwarf
.getAttributeValueAddr(attr
);
2283 if (attr
->form
!= DW_FORM_addr
) {
2284 is_high_udata
= true;
2285 high
= dwarf
.getAttributeValueUData(attr
);
2287 high
= dwarf
.getAttributeValueAddr(attr
);
2297 if (!ranges
.empty()) {
2298 uint64_t base
= low
? *low
: 0;
2300 for (auto range
: ranges
) {
2301 if (range
.dwr_addr1
== DwarfState::Dwarf_Ranges::kSelection
) {
2302 base
= range
.dwr_addr2
;
2305 if (base
+ range
.dwr_addr1
== 0) continue;
2306 // Drop all the addresses under 2M
2307 if (base
+ range
.dwr_addr2
< 2000000) continue;
2311 base
+ range
.dwr_addr1
,
2312 base
+ range
.dwr_addr2
,
2321 high
= is_high_udata
? *low
+ *high
: *high
;
2322 // Drop all the addresses under 2M
2323 if (*low
!= 0 && *high
>= 2000000) {
2324 entries
.push_back(AddressTableEntry
{*low
, *high
, cu_index
});
2331 [&](Dwarf_Die child
) {
2332 visit_die_for_address(dwarf
, child
, entries
, cu_index
);
2338 std::vector
<uint32_t>
2339 get_address(std::vector
<AddressTableEntry
>& entries
) const {
2340 sort(entries
.begin(), entries
.end(), compareAddressTableEntry
);
2342 // Split into little-endian formatting
2343 std::vector
<uint32_t> result
= {};
2344 for (auto& e
: entries
) {
2345 result
.push_back(e
.low_bottom
);
2346 result
.push_back(e
.low_top
);
2347 result
.push_back(e
.high_bottom
);
2348 result
.push_back(e
.high_top
);
2349 result
.push_back(e
.index
);
2355 uint32_t name_offset
{};
2356 uint32_t cu_vector_offset
{};
2358 bool valid() { return name_offset
; }
2361 struct GDBHashtable
{
2362 GDBHashtable() : m_size(0), m_capacity(0), m_hashtable({}) {}
2365 std::vector
<GDBSymbol
> m_hashtable
;
2368 void init(size_t size
) {
2369 assertx(m_size
== 0 && m_capacity
== 0);
2371 auto const nextPowerOfTwo
= [](size_t n
) -> size_t {
2372 if (n
== 0) return 1;
2383 auto initial_size
= nextPowerOfTwo(size
* 4 / 3);
2385 m_hashtable
= std::vector
<GDBSymbol
>(initial_size
, GDBSymbol
{});
2386 m_capacity
= initial_size
;
2389 GDBSymbol
* findSlot(uint32_t hash
) {
2390 uint32_t index
= hash
;
2391 uint32_t step
= ((hash
* 17) & (m_capacity
- 1)) | 1;
2394 index
&= m_capacity
- 1;
2395 if (!m_hashtable
[index
].valid()) {
2396 return &m_hashtable
[index
];
2402 bool add(uint32_t hash
, GDBSymbol s
) {
2403 auto const loc
= this->findSlot(hash
);
2404 assert(!loc
->valid());
2411 using SymbolMap
= tbb::concurrent_hash_map
<std::string
,
2412 std::vector
<uint32_t>,
2413 ::HPHP::stringHashCompare
>;
2414 using SpecMap
= folly::F14FastMap
<GlobalOff
, std::string
>;
2416 void visit_die_for_symbols(const DwarfState
& dwarf
,
2417 const Dwarf_Die die
,
2419 SpecMap
& spec_names
,
2420 std::string parent_name
,
2422 uint32_t cu_index
) const {
2424 bool is_declaration
= false;
2425 bool is_external
= false;
2427 bool full_name
= false;
2428 bool is_inlined
= false;
2429 bool has_location
= false;
2430 bool in_specification
= false;
2431 auto specification
= GlobalOff::fromRaw(0);
2432 auto collect_attributes
= [&] (Dwarf_Attribute attr
) {
2433 switch (dwarf
.getAttributeType(attr
)) {
2434 case DW_AT_declaration
:
2435 if (!in_specification
) {
2436 is_declaration
= dwarf
.getAttributeValueFlag(attr
);
2439 case DW_AT_external
:
2440 is_external
= dwarf
.getAttributeValueFlag(attr
);
2442 case DW_AT_linkage_name
:
2445 case DW_AT_location
:
2446 has_location
= true;
2450 name
= dwarf
.getAttributeValueString(attr
);
2453 case DW_AT_inline
: {
2454 auto const val
= dwarf
.getAttributeValueUData(attr
);
2456 (val
== DW_INL_inlined
) ||
2457 (val
== DW_INL_declared_inlined
);
2460 case DW_AT_language
:
2461 language
= dwarf
.getAttributeValueUData(attr
);
2463 case DW_AT_specification
: {
2464 specification
= dwarf
.getAttributeValueRef(attr
);
2465 auto const it
= spec_names
.find(specification
);
2466 if (it
!= spec_names
.end()) {
2468 auto const pos
= name
.rfind("::");
2469 if (pos
!= std::string::npos
) {
2470 parent_name
= name
.substr(0, pos
);
2481 dwarf
.forEachAttribute(die
, collect_attributes
);
2482 if (specification
.raw()) {
2483 dwarf
.onDIEAtOffset(
2486 in_specification
= true;
2487 dwarf
.forEachAttribute(d
, collect_attributes
);
2492 struct IndexAndFlags
{
2493 IndexAndFlags(uint32_t index
, uint32_t kind
, uint32_t is_static
) {
2494 assertx((index
>> 24) == 0);
2495 // Bits 0-23 is CU index
2496 // Bits 24-27 are reserved and must be 0
2497 // Bits 28-30 The kind of the symbol in the CU.
2498 // Bit 31 is zero if the value is global and one if it is static.
2499 m_data
= (is_static
<< 31) | (kind
<< 28) | index
;
2502 explicit IndexAndFlags(uint32_t data
) : m_data(data
) {}
2506 uint32_t get_kind() const { return (m_data
>> 28) & 7; }
2507 uint32_t get_is_static() const { return m_data
>> 31; }
2511 constexpr int TYPE
= 1;
2512 constexpr int VARIABLE
= 2;
2513 //constexpr int ENUM = 2;
2514 constexpr int FUNCTION
= 3;
2515 // constexpr int OTHER = 4;
2517 auto const index_and_flags
= [&] {
2519 auto is_static
= false;
2520 switch (dwarf
.getTag(die
)) {
2521 case DW_TAG_typedef
:
2522 case DW_TAG_base_type
:
2523 case DW_TAG_subrange_type
:
2527 case DW_TAG_enumerator
:
2529 is_static
= language
!= DW_LANG_C_plus_plus
;
2531 case DW_TAG_subprogram
:
2533 is_static
= !(is_external
|| language
== DW_LANG_Ada83
||
2534 language
== DW_LANG_Ada95
);
2536 case DW_TAG_constant
:
2538 is_static
= !is_external
;
2540 case DW_TAG_variable
:
2542 is_static
= !is_external
;
2544 case DW_TAG_namespace
:
2548 case DW_TAG_class_type
:
2549 case DW_TAG_interface_type
:
2550 case DW_TAG_structure_type
:
2551 case DW_TAG_union_type
:
2552 case DW_TAG_enumeration_type
:
2554 is_static
= language
!= DW_LANG_C_plus_plus
;
2557 throw Exception
{"Invalid tag"};
2559 return IndexAndFlags
{cu_index
, kind
, is_static
}.m_data
;
2562 auto const hasSameFlags
= [&](std::vector
<uint32_t> v
, uint32_t input
) {
2563 auto const flags
= IndexAndFlags
{input
};
2564 for (auto const e
: v
) {
2565 auto const f
= IndexAndFlags
{e
};
2566 if (f
.get_kind() == flags
.get_kind()) {
2567 if ((f
.get_kind() == TYPE
&&
2568 f
.get_is_static() == flags
.get_is_static()) ||
2569 (!f
.get_is_static() && !flags
.get_is_static())) {
2577 auto const addSymbol
= [&](std::string name
) {
2578 auto value
= index_and_flags();
2579 SymbolMap::accessor acc
;
2580 if (symbols
.insert(acc
, name
) || !hasSameFlags(acc
->second
, value
)) {
2581 acc
->second
.push_back(value
);
2585 auto const addParent
= [&] {
2586 if (full_name
) return;
2587 if (name
.empty()) return;
2588 if (!parent_name
.empty()) {
2589 name
= folly::sformat("{}::{}", parent_name
, name
);
2591 if (is_declaration
) {
2592 spec_names
.emplace(dwarf
.getDIEOffset(die
), name
);
2596 auto const visitChildren
= [&](std::string name
) {
2599 [&](Dwarf_Die child
) {
2600 visit_die_for_symbols(dwarf
, child
, symbols
, spec_names
, name
,
2601 language
, cu_index
);
2607 auto const tag
= dwarf
.getTag(die
);
2609 case DW_TAG_base_type
:
2610 // don't canonicalize!
2614 // static members appear first here as a declaration, then
2615 // later as a DW_TAG_variable whose specification points
2616 // here. We need to note the name just in case.
2617 if (is_declaration
) addParent();
2619 case DW_TAG_subprogram
:
2620 if (is_inlined
) break;
2621 case DW_TAG_constant
:
2622 case DW_TAG_enumerator
:
2623 if (name
.empty()) break;
2625 if (is_declaration
) break;
2628 case DW_TAG_variable
:
2629 if (name
.empty() || (!is_external
&& !has_location
)) break;
2631 if (is_declaration
) break;
2634 case DW_TAG_namespace
:
2635 if (name
.empty()) name
= "(anonymous namespace)";
2637 visitChildren(name
);
2639 case DW_TAG_typedef
:
2640 case DW_TAG_subrange_type
:
2642 if (is_declaration
|| name
.empty()) break;
2645 case DW_TAG_union_type
:
2646 case DW_TAG_class_type
:
2647 case DW_TAG_interface_type
:
2648 case DW_TAG_structure_type
:
2649 case DW_TAG_enumeration_type
:
2651 if (!is_declaration
&& !name
.empty()) {
2654 if (tag
== DW_TAG_enumeration_type
|| !name
.empty()) {
2655 visitChildren(tag
== DW_TAG_enumeration_type
? parent_name
: name
);
2658 case DW_TAG_compile_unit
:
2659 case DW_TAG_type_unit
:
2660 visitChildren(parent_name
);
2667 std::pair
<std::vector
<AddressTableEntry
>, SymbolMap
>
2668 collect_addresses_and_symbols(const DwarfState
& dwarf
) const {
2669 auto time
= ::HPHP::Timer::GetCurrentTimeMicros();
2671 folly::F14FastMap
<uint32_t, uint32_t> unit_indices_cu
;
2672 folly::F14FastMap
<uint32_t, uint32_t> unit_indices_tu
;
2675 dwarf
.forEachTopLevelUnit(
2676 [&](Dwarf_Die die
) {
2677 unit_indices_cu
.insert({die
->context
->offset
, count
});
2679 }, true /* Compilation Unit */
2681 size_t numCUs
= count
;
2682 dwarf
.forEachTopLevelUnit(
2683 [&](Dwarf_Die die
) {
2684 unit_indices_tu
[die
->context
->offset
] = count
;
2686 }, false /* Type Unit */
2690 std::vector
<std::vector
<AddressTableEntry
>>
2691 entryList(numCUs
, std::vector
<AddressTableEntry
>{});
2694 dwarf
.forEachTopLevelUnitParallel(
2695 [&](Dwarf_Die die
) {
2696 uint32_t index
= unit_indices_cu
[die
->context
->offset
];
2697 assertx(index
< entryList
.size());
2698 std::vector
<AddressTableEntry
> entry
;
2699 visit_die_for_address(dwarf
, die
, entry
, index
);
2701 sort(entry
.begin(), entry
.end(), compareAddressTableEntry
);
2703 std::vector
<AddressTableEntry
> merged
;
2704 for (auto& e
: entry
) {
2705 if (!merged
.empty()) {
2706 auto& prev
= merged
.back();
2707 if (e
.low
<= prev
.high
) {
2708 if (e
.high
<= prev
.high
) continue;
2709 assertx(prev
.index
== e
.index
);
2714 merged
.push_back(e
);
2717 entryList
[index
] = std::move(merged
);
2719 visit_die_for_symbols(dwarf
, die
, symbols
, spec_names
, "",
2721 }, true /* Compilation Unit */, m_numThreads
2724 std::vector
<AddressTableEntry
> entries
;
2725 for (auto& list
: entryList
) {
2726 for (auto &e
: list
) {
2727 entries
.push_back(e
);
2731 time
= log_time(time
, "collect_addresses_and_symbols: Visit CUs");
2733 dwarf
.forEachTopLevelUnitParallel(
2734 [&](Dwarf_Die die
) {
2735 uint32_t index
= unit_indices_tu
[die
->context
->offset
];
2737 visit_die_for_symbols(dwarf
, die
, symbols
, spec_names
, "",
2739 }, false /* Type Unit */, m_numThreads
2742 log_time(time
, "collect_addresses_and_symbols: Visit TUs");
2744 return {std::move(entries
), std::move(symbols
)};
2747 struct SymbolAndConstantPool
{
2748 GDBHashtable symbol_pool
;
2749 std::vector
<uint32_t> cu_vector_offsets
;
2750 std::vector
<std::string
> strings
;
2753 SymbolAndConstantPool
2754 get_symbol_and_constants(const SymbolMap
& symbols
) const {
2755 auto time
= ::HPHP::Timer::GetCurrentTimeMicros();
2757 GDBHashtable symbol_hash_table
;
2758 symbol_hash_table
.init(symbols
.size());
2760 auto const getHashVal
= [](std::string name
) {
2762 for (char& c
: name
) {
2764 r
= r
* 67 + c
- 113;
2769 // The first value is the number of CU indices in the vector
2770 std::vector
<uint32_t> cu_vector_values
;
2771 std::vector
<std::string
> strings
;
2773 // set name_off to 1 so can use non-zero as the valid test for a
2774 // hash table entry.
2775 uint32_t name_off
= 1;
2776 for (auto& entry
: symbols
) {
2777 uint32_t cu_vector_offset
= cu_vector_values
.size() * 4;
2778 cu_vector_values
.push_back(entry
.second
.size());
2779 for (auto& elem
: entry
.second
) {
2780 cu_vector_values
.push_back(elem
);
2782 strings
.push_back(entry
.first
);
2783 symbol_hash_table
.add(getHashVal(entry
.first
),
2784 GDBSymbol
{name_off
, cu_vector_offset
});
2785 name_off
+= entry
.first
.length() + 1;
2788 time
= log_time(time
, "Get_symbol_and_constants: Populate hash table");
2790 auto const num_cu_vector_bytes
=
2791 cu_vector_values
.size() * sizeof(cu_vector_values
[0]);
2792 for (auto& sym
: symbol_hash_table
.m_hashtable
) {
2794 sym
.name_offset
+= num_cu_vector_bytes
- 1;
2798 log_time(time
, "Get_symbol_and_constants: Update symbol pool");
2800 std::cout
<< "Hash Table Size: " << symbol_hash_table
.m_size
<<
2801 " Capacity: " << symbol_hash_table
.m_capacity
<< std::endl
;
2802 std::cout
<< "Strings Size: " << strings
.size() << std::endl
;
2803 std::cout
<< "CU Vector Values Size: " <<
2804 cu_vector_values
.size() << std::endl
;
2807 std::move(symbol_hash_table
),
2808 std::move(cu_vector_values
),
2813 std::string m_filename
;
2817 ////////////////////////////////////////////////////////////////////////////////
2821 std::unique_ptr
<TypeParser
>
2822 make_dwarf_type_parser(const std::string
& filename
, int num_threads
) {
2823 return std::make_unique
<TypeParserImpl
>(filename
, num_threads
);
2826 std::unique_ptr
<Printer
> make_dwarf_printer(const std::string
& filename
) {
2827 return std::make_unique
<PrinterImpl
>(filename
);
2830 std::unique_ptr
<GDBIndexer
>
2831 make_dwarf_gdb_indexer(const std::string
& filename
, int num_threads
) {
2832 return std::make_unique
<GDBIndexerImpl
>(filename
, num_threads
);
2835 ////////////////////////////////////////////////////////////////////////////////