fix hot shallow decls
[hiphop-php.git] / hphp / tools / debug-parser / debug-parser-dwarf.cpp
blob20dd70f4faf2d5237aeda088fa494a674d1c343b
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #if defined(__linux__) || defined(__FreeBSD__)
19 #include <folly/Demangle.h>
20 #include <folly/Format.h>
21 #include <folly/Memory.h>
22 #include <folly/ScopeGuard.h>
23 #include <folly/String.h>
24 #include <folly/container/F14Map.h>
25 #include <folly/container/F14Set.h>
26 #include <folly/portability/Unistd.h>
28 #include <fcntl.h>
29 #include <sys/stat.h>
30 #include <sys/types.h>
32 #include <dwarf.h>
34 #include "hphp/util/assertions.h"
35 #include "hphp/util/functional.h"
36 #include "hphp/util/job-queue.h"
37 #include "hphp/util/timer.h"
38 #include "hphp/util/trace.h"
40 #include "hphp/tools/debug-parser/debug-parser.h"
41 #include "hphp/tools/debug-parser/dwarfstate.h"
44 * Debug parser for DWARF (using dwarfstate)
46 * DWARF is structured as a forest of DIEs (Debug Information Entry). Each DIE
47 * has a tag, which describes what kind of DIE it is, and a list of
48 * attributes. Each attribute has a type, which identifies what it is, and a
49 * value (the type of the value is implied by the attribute type). Furthermore,
50 * a DIE can have other DIEs as children. The top-level DIEs correspond to
51 * compilation-units, and all the children of these top-level DIEs correspond to
52 * the information in that compilation-unit.
54 * The meaning and interpretation of the DIEs is deliberately left vague by the
55 * standard, so different compilers can encode things in different ways (and no
56 * implementation is bug free).
59 namespace debug_parser { namespace {
61 TRACE_SET_MOD(trans);
63 ////////////////////////////////////////////////////////////////////////////////
65 // Allow foreach on a range (as returned by equal_range)
66 template<typename It> It begin(std::pair<It,It> p) { return p.first; }
67 template<typename It> It end(std::pair<It,It> p) { return p.second; }
70 * Fully qualified names aren't represented explicitly in DWARF. Instead the
71 * structure of the DIEs mimics the nesting structure in the source (IE, a
72 * nested class within a class nested within a namespace). So, in order to
73 * infer the fully qualified name for any given class, the current scope is
74 * tracked as the DIEs are walked.
76 * Likewise, DWARF has no concept of linkage, but the linkage is needed to know
77 * which types are actually equivalent. Luckily, a type's linkage is closely
78 * related to its scope (except for templates, see below), so it can be inferred
79 * the same way.
81 * The scope is tracked as a stack of contexts, pushing and popping off contexts
82 * when a namespace or type is entered or exited.
85 struct Scope {
86 explicit Scope(GlobalOff cu_offset)
87 : m_cu_offset{cu_offset}
89 m_scope.emplace_back(
90 ObjectTypeName{std::string{}, ObjectTypeName::Linkage::external},
91 true
95 GlobalOff cuOffset() const { return m_cu_offset; }
97 ObjectTypeName name() const;
99 // Fix the name of a type to match where it is in the namespace/type
100 // hierarchy.
101 void fixName(ObjectTypeName newName);
103 ObjectTypeName::Linkage linkage() const {
104 return m_scope.back().name.linkage;
107 std::size_t unnamedTypeCount() const {
108 return m_scope.back().unnamed_count;
111 bool isInNamespaceScope() const {
112 return m_scope.back().in_namespace_scope;
115 void incUnnamedTypeCount() { ++m_scope.back().unnamed_count; }
117 HPHP::Optional<GlobalOff> typeOffset() const {
118 return m_scope.back().offset;
121 void pushType(std::string name, GlobalOff offset) {
122 m_scope.emplace_back(
123 ObjectTypeName{std::move(name), linkage()},
124 false
126 m_scope.back().offset = offset;
129 void pushUnnamedType(std::string name, GlobalOff offset) {
130 m_scope.emplace_back(
131 ObjectTypeName{
132 std::move(name),
133 ObjectTypeName::Linkage::none
135 false
137 m_scope.back().offset = offset;
140 void pushNamespace(std::string ns) {
141 m_scope.emplace_back(
142 ObjectTypeName{std::move(ns), linkage()},
143 true
147 void pushUnnamedNamespace() {
148 m_scope.emplace_back(
149 ObjectTypeName{
150 "(unnamed namespace)",
151 ObjectTypeName::Linkage::internal
153 true
157 void pop() { m_scope.pop_back(); }
159 private:
160 struct Context {
161 Context(ObjectTypeName name, bool in_namespace_scope)
162 : name(std::move(name))
163 , in_namespace_scope{in_namespace_scope} {}
164 ObjectTypeName name;
165 bool in_namespace_scope;
166 std::size_t unnamed_count = 0;
167 HPHP::Optional<GlobalOff> offset;
169 std::vector<Context> m_scope;
170 GlobalOff m_cu_offset;
172 public:
173 static const std::string s_pseudo_type_name;
177 * Actual implementation of TypeParser for DWARF.
180 struct TypeParserImpl : TypeParser {
181 explicit TypeParserImpl(const std::string& filename, int num_threads);
183 Object getObject(ObjectTypeKey key) override;
185 size_t getObjectBlockCount() const override;
187 protected:
188 const std::vector<ObjectType>& getObjectBlock(size_t index) const override;
190 private:
191 struct StateBlock;
193 struct LinkageDependents {
194 folly::F14FastSet<GlobalOff> template_uses;
195 folly::F14FastSet<GlobalOff> children;
198 struct StaticSpec {
199 static auto constexpr kNoAddress = std::numeric_limits<uint64_t>::max();
200 std::string linkage_name;
201 uint64_t address{kNoAddress};
202 bool is_member{false};
205 struct Env {
206 const DwarfState* dwarf;
207 std::unique_ptr<StateBlock> state;
208 folly::F14FastMap<GlobalOff, GlobalOff> local_mappings;
209 folly::F14FastMap<GlobalOff, LinkageDependents> linkage_dependents;
210 std::vector<std::pair<GlobalOff, StaticSpec>> raw_static_definitions;
213 // Functions used while concurrently building state. Since these functions are
214 // invoked from multiple threads, they are static and take all their state
215 // explicitly as parameters.
216 static void genNames(Env& env,
217 Dwarf_Die die,
218 Scope& scope,
219 std::vector<GlobalOff>* template_params = nullptr);
221 static HPHP::Optional<uintptr_t> interpretLocAddress(const DwarfState& dwarf,
222 Dwarf_Attribute attr);
223 static HPHP::Optional<GlobalOff> parseSpecification(const DwarfState& dwarf,
224 Dwarf_Die die,
225 bool first,
226 StaticSpec& spec);
227 void fixTemplateLinkage();
229 // Functions used after state is built. These are not thread-safe.
230 Object genObject(Dwarf_Die die,
231 ObjectTypeName name,
232 ObjectTypeKey key);
233 Type genType(Dwarf_Die die);
234 Object::Member genMember(Dwarf_Die die,
235 const ObjectTypeName& parent_name);
236 Object::Function genFunction(Dwarf_Die die);
237 Object::Base genBase(Dwarf_Die die, const ObjectTypeName& parent_name);
238 Object::TemplateParam genTemplateParam(Dwarf_Die die);
239 HPHP::Optional<size_t> determineArrayBound(Dwarf_Die die);
241 void fillFuncArgs(Dwarf_Die die, FuncType& func);
243 // Map a given offset to the state block which contains state for that offset
244 // (see below).
245 const StateBlock& stateForOffset(GlobalOff offset) const {
246 assertx(!m_state_map.empty());
247 auto it = std::upper_bound(
248 m_state_map.begin(),
249 m_state_map.end(),
250 offset,
251 [](GlobalOff offset, const std::pair<GlobalOff, StateBlock*>& p) {
252 return offset < p.first;
255 if (it != m_state_map.begin()) --it;
256 return *it->second;
259 // All of the parser's persistent state is stored in some number of
260 // blocks. All of the blocks are computed concurrently, one block per
261 // thread. To avoid the overhead of merging the blocks together, they are kept
262 // separated. Instead m_state_map is used to map a given offset into the block
263 // which contains the state for that offset. It is a list of offset/state
264 // pairs. Any offset between the offset given in the pair and the one in the
265 // next pair is mapped to the state block in the pair.
267 // Note: this scheme only works because each compilation unit is
268 // self-contained and does not reference data in another compilation
269 // unit. However, nothing in DWARF prevents this and its not guaranteed to
270 // always be true.
271 struct StateBlock {
272 std::vector<ObjectType> all_objs;
273 folly::F14FastMap<GlobalOff, size_t> obj_offsets;
274 std::multimap<GlobalOff, StaticSpec> static_definitions;
276 std::vector<std::unique_ptr<StateBlock>> m_states;
277 std::vector<std::pair<GlobalOff, StateBlock*>> m_state_map;
278 tbb::concurrent_hash_map<GlobalOff,
279 LinkageDependents,
280 GlobalOff::Hash> m_linkage_dependents;
282 DwarfState m_dwarf;
285 // Purposefully fake name to avoid confusion with an actual type.
286 const std::string Scope::s_pseudo_type_name = "@_PSEUDO_TY";
288 ObjectTypeName Scope::name() const {
289 auto iter = m_scope.begin();
290 std::string str = iter->name.name;
291 ++iter;
292 for (; iter != m_scope.end(); ++iter) {
293 if (str.empty()) str = iter->name.name;
294 else str = folly::sformat("{}::{}", str, iter->name.name);
296 return ObjectTypeName{std::move(str), linkage()};
299 void Scope::fixName(ObjectTypeName newName) {
300 if (m_scope.size() == 1) {
301 m_scope.back().name = std::move(newName);
302 return;
305 auto context = std::move(m_scope.back());
306 m_scope.pop_back();
307 auto outerName = name();
308 assertx(newName.name.size() > outerName.name.size());
309 if (outerName.name.size()) {
310 assertx(!outerName.name.compare(0, outerName.name.size(), newName.name));
311 newName.name = newName.name.substr(outerName.name.size() + 2);
313 context.name = std::move(newName);
314 m_scope.push_back(std::move(context));
317 TypeParserImpl::TypeParserImpl(const std::string& filename, int num_threads)
318 : m_dwarf{filename}
320 // Processing each compiliation unit is very expensive, as it involves walking
321 // a large part of the debug information. To speed things up (a lot), we buid
322 // up the state concurrently. Create a job corresponding to each compiliation
323 // unit in the file and enqueue the jobs with a thread pool. We'll find the
324 // offsets of the compiliation unit in the main thread, enqueuing them as we
325 // find them. This lets us not only exploit concurrency between processing
326 // compiliation units, but between finding them and processing them.
328 // Each worker maintains its own private state which it populates for all the
329 // compiliation units its assigned (each worker can process multiple
330 // compiliation units). Once done, all the different states are kept separate
331 // (merging them would be too expensive), but a mapping is constructed to map
332 // offsets to the appropriate state block.
334 // This whole scheme is only viable because (right now), debug information in
335 // a given compilation unit doesn't reference anything outside of that unit,
336 // so the state for any given compiliation unit can be processed
337 // independently.
339 // The context serves as the link between a worker and the TypeParserImpl
340 // state (this is forced by the JobQueueWorker interface).
341 struct Context {
342 const decltype(m_dwarf)& dwarf;
343 decltype(m_states)& states;
344 decltype(m_state_map)& state_map;
345 decltype(m_linkage_dependents)& linkage_dependents;
346 // The lock protects states, state_map, and the exception field (but only
347 // when the workers are running).
348 std::mutex lock;
349 // Set to the exception if any of the workers threw (first one wins).
350 std::exception_ptr exception;
353 // Thread worker. We'll end up with a state block for each one of these.
354 struct Worker : HPHP::JobQueueWorker<GlobalOff, Context*> {
355 Env env;
357 // Remember each offset we processed so we can record it the global state
358 // map when we finish.
359 std::vector<GlobalOff> offsets;
361 void doJob(GlobalOff offset) override {
362 // Process a compiliation unit at the given offset.
363 try {
364 // We're going to use it so let's mark this worker active.
365 if (!env.dwarf) {
366 env.dwarf = &m_context->dwarf;
367 env.state = std::make_unique<StateBlock>();
370 offsets.emplace_back(offset);
372 // Do the actual processing, adding to the state block:
373 Scope scope{offset};
374 env.dwarf->onDIEAtOffset(
375 offset,
376 [&](Dwarf_Die cu) { genNames(env, cu, scope); }
379 auto const remap = [&] (GlobalOff o) {
380 auto const it = env.local_mappings.find(o);
381 if (it != env.local_mappings.end()) {
382 return it->second;
384 return o;
387 // Generate static_definitions by updating their keys collected during
388 // genNames. Some keys refer back to a DW_AT_member that belongs to a
389 // struct whose definition was in another type-unit. We want to add an
390 // entry for the member in the definition.
391 std::transform(
392 env.raw_static_definitions.begin(),
393 env.raw_static_definitions.end(),
394 std::inserter(
395 env.state->static_definitions,
396 env.state->static_definitions.end()),
397 [&](const auto& elem) {
398 return std::make_pair(remap(elem.first), std::move(elem.second));
400 env.raw_static_definitions.clear();
402 for (auto& linkage : env.linkage_dependents) {
403 if (!linkage.second.template_uses.size()) continue;
405 std::decay_t<decltype(m_context->linkage_dependents)>::accessor acc;
407 auto const inserted =
408 m_context->linkage_dependents.insert(acc, remap(linkage.first));
409 if (inserted && !env.local_mappings.size()) {
410 acc->second = std::move(linkage.second);
411 } else {
412 auto const process = [&] (auto const& from, auto& to) {
413 for (auto& elm : from) {
414 to.insert(remap(elm));
417 process(linkage.second.template_uses, acc->second.template_uses);
418 process(linkage.second.children, acc->second.children);
421 env.linkage_dependents.clear();
422 env.local_mappings.clear();
423 } catch (...) {
424 // Store any exception thrown so it can be rethrown in the main
425 // thread. We only bother to store the first one.
426 stop();
427 std::lock_guard<std::mutex> guard{m_context->lock};
428 if (!m_context->exception) {
429 m_context->exception = std::current_exception();
434 void onThreadExit() override {
435 // The worker is done (we've been told to stop). Now that we know we won't
436 // be processing anymore offsets, do the needed post-processing on the
437 // rest of the state.
438 if (!env.dwarf) return;
439 try {
440 // Compute a mapping of an object type's offset to its location in the
441 // all_objs vector.
442 env.state->obj_offsets.reserve(env.state->all_objs.size());
443 for (auto i = size_t{0}; i < env.state->all_objs.size(); ++i) {
444 env.state->obj_offsets.emplace(
445 GlobalOff::fromRaw(env.state->all_objs[i].key.object_id), i
449 // Record all the offsets this worker processed (along with the state
450 // block) in the global state map. This is done using a lock because its
451 // quick and only done when the thread is finishing.
452 std::lock_guard<std::mutex> guard{m_context->lock};
453 auto const state = env.state.get();
454 m_context->states.emplace_back(std::move(env.state));
455 for (auto offset : offsets) {
456 m_context->state_map.emplace_back(offset, state);
458 } catch (...) {
459 // Store any exception thrown so it can be rethrown in the main
460 // thread. We only bother to store the first one.
461 stop();
462 std::lock_guard<std::mutex> guard{m_context->lock};
463 if (!m_context->exception) {
464 m_context->exception = std::current_exception();
470 // Create the thread pool
471 Context context{m_dwarf, m_states, m_state_map, m_linkage_dependents};
472 HPHP::JobQueueDispatcher<Worker> dispatcher{
473 num_threads, num_threads, 0, false, &context
475 dispatcher.start();
477 size_t num_tu = 0;
478 FTRACE(1, "Adding type-units to dispatcher...\n");
479 // Iterate over every type-unit, enqueuing jobs which will
480 // concurrently scan that unit.
481 m_dwarf.forEachTopLevelUnit(
482 [&] (Dwarf_Die tu) {
483 dispatcher.enqueue(m_dwarf.getDIEOffset(tu));
484 ++num_tu;
485 return true;
487 false
489 FTRACE(1, "... {} type-units added.\n", num_tu);
491 size_t num_cu = 0;
492 FTRACE(1, "Adding compilation-units to dispatcher...\n");
493 // Iterate over every compilation-unit, enqueuing jobs which will
494 // concurrently scan that unit.
495 m_dwarf.forEachCompilationUnit(
496 [&](Dwarf_Die cu) { dispatcher.enqueue(m_dwarf.getDIEOffset(cu)); ++num_cu;}
499 FTRACE(1, "... {} compilation-units added.\n", num_cu);
501 // Wait for all the workers to finish.
502 dispatcher.stop();
504 FTRACE(1, "Finished with genNames\n");
506 // If any of the workers caught an exception, rethrow here in the main
507 // thread. We don't need to bother taking the lock because all the workers are
508 // gone.
509 if (context.exception) std::rethrow_exception(context.exception);
511 // Since the state map was appended to by the workers in a non-deterministic
512 // order, we need to sort it by offset so we can do efficient lookups later.
513 std::sort(
514 m_state_map.begin(), m_state_map.end(),
515 [&](const std::pair<GlobalOff, StateBlock*>& p1,
516 const std::pair<GlobalOff, StateBlock*>& p2) {
517 return p1.first < p2.first;
521 // Some of the static_definitions entries need to be moved to the
522 // correct block; eg they were seen when processing the cu
523 // containing the definition of the static member, but need to be
524 // moved to the state for the tu which contains the definition of
525 // the struct (which may or may not be the same state block).
526 folly::F14FastSet<void*> seen;
527 for (auto const& p : m_state_map) {
528 if (!seen.insert(p.second).second) continue;
529 auto curOff = p.first;
530 auto curState = p.second;
531 for (auto it = p.second->static_definitions.begin();
532 it != p.second->static_definitions.end(); ) {
533 if (it->first != curOff) {
534 curOff = it->first;
535 curState = const_cast<decltype(p.second)>(&stateForOffset(curOff));
537 if (curState == p.second) {
538 ++it;
539 continue;
541 curState->static_definitions.insert(*it);
542 it = p.second->static_definitions.erase(it);
546 fixTemplateLinkage();
547 m_linkage_dependents.clear();
550 size_t TypeParserImpl::getObjectBlockCount() const {
551 return m_states.size();
554 const std::vector<ObjectType>&
555 TypeParserImpl::getObjectBlock(size_t index) const {
556 return m_states[index]->all_objs;
560 * As stated above, the linkage of templates is tricky. The linkage of a
561 * template is the most restrictive linkage of its original linkage and the
562 * linkage of its template parameters. Since some of the template parameters may
563 * not yet be parsed when we parse the template, the inference of the correct
564 * template linkage is deferred until all the types' linkages are computed.
566 * However, since templates can be parameters to other templates, this process
567 * must be repeated until the linkage of no types are changed.
569 * As an additional complication, the linkage of any nested class is inherited
570 * from its parent, so when a template's linkage changes, it must be bubbled
571 * down to any of its nested classes.
573 * When the name and initial linkages of all the types was generated, the
574 * relationship between templates, their parameters, and nested classes is
575 * recorded in linkage_dependents, which is used here.
577 void TypeParserImpl::fixTemplateLinkage() {
578 using ChangedSet = folly::F14FastSet<GlobalOff>;
579 ChangedSet changed;
581 for (const auto& pair : m_linkage_dependents) {
582 if (pair.second.template_uses.empty()) continue;
583 changed.emplace(pair.first);
586 ChangedSet old_changed;
587 while (!changed.empty()) {
588 std::swap(changed, old_changed);
590 // For every type which has its linkage changed, update its dependents
591 // (templates where the type is used as a parameter, or nested classes) with
592 // the new linkage, and mark as being changed as well.
593 for (auto changed_offset : old_changed) {
594 decltype(m_linkage_dependents)::const_accessor acc;
595 if (!m_linkage_dependents.find(acc, changed_offset)) continue;
597 auto const& children = acc->second.children;
598 auto const& template_uses = acc->second.template_uses;
600 auto const& changed_state = stateForOffset(changed_offset);
602 auto const it = changed_state.obj_offsets.find(changed_offset);
603 if (it == changed_state.obj_offsets.end()) {
604 // This isn't right - if (eg) its a pointer to an object type
605 // with internal linkage, we need to mark the dependents
606 // internal; but we don't track pointer types at all - so just
607 // assume this type doesn't matter. The same goes for other
608 // things like const struct types etc.
609 continue;
612 auto const& changed_obj = changed_state.all_objs[it->second];
614 // Only update and mark if we actually make the linkage more restrictive.
615 if (changed_obj.name.linkage != ObjectTypeName::Linkage::external) {
616 const auto process = [&](GlobalOff dependent_offset) {
617 auto& dep_state = const_cast<StateBlock&>(
618 stateForOffset(dependent_offset)
620 auto const it = dep_state.obj_offsets.find(dependent_offset);
621 if (it == dep_state.obj_offsets.end()) return;
622 auto& dependent_obj = dep_state.all_objs[it->second];
623 if (dependent_obj.name.linkage < changed_obj.name.linkage) {
624 FTRACE(4,
625 "Reducing linkage for {}({}) from {} to {} due to {}({})\n",
626 dependent_obj.name.name,
627 GlobalOff::fromRaw(dependent_obj.key.object_id),
628 show(dependent_obj.name.linkage),
629 show(changed_obj.name.linkage),
630 changed_obj.name.name,
631 GlobalOff::fromRaw(changed_obj.key.object_id));
632 dependent_obj.name.linkage = changed_obj.name.linkage;
633 changed.emplace(dependent_offset);
636 for (auto template_offset : template_uses) process(template_offset);
637 for (auto child_offset : children) process(child_offset);
641 old_changed.clear();
645 Object TypeParserImpl::getObject(ObjectTypeKey key) {
646 auto const& state = stateForOffset(GlobalOff::fromRaw(key.object_id));
647 auto iter = state.obj_offsets.find(GlobalOff::fromRaw(key.object_id));
648 // If we don't know of an object type at the given location, assume its
649 // referring to something we never parsed in the first place, so return the
650 // pseudo-type.
651 if (iter == state.obj_offsets.end()) {
652 return Object{
653 ObjectTypeName{
654 Scope::s_pseudo_type_name,
655 ObjectTypeName::Linkage::pseudo,
658 key,
659 Object::Kind::k_other,
660 true
664 return m_dwarf.onDIEAtOffset(
665 GlobalOff::fromRaw(key.object_id),
666 [&](Dwarf_Die die) {
667 return genObject(
668 die,
669 state.all_objs[iter->second].name,
676 // For static members, determine how that member's address can be
677 // determined. In theory, this can be any arbitrary expression, but we only
678 // support constant addresses right now.
679 HPHP::Optional<uintptr_t>
680 TypeParserImpl::interpretLocAddress(const DwarfState& dwarf,
681 Dwarf_Attribute attr) {
682 auto form = dwarf.getAttributeForm(attr);
683 if (form != DW_FORM_exprloc) return std::nullopt;
684 auto exprs = dwarf.getAttributeValueExprLoc(attr);
685 if (exprs.size() != 1) return std::nullopt;
686 if (exprs[0].lr_atom != DW_OP_addr) return std::nullopt;
687 return HPHP::Optional<uintptr_t>{exprs[0].lr_number};
690 HPHP::Optional<GlobalOff>
691 TypeParserImpl::parseSpecification(const DwarfState& dwarf,
692 Dwarf_Die die,
693 bool first,
694 StaticSpec &spec) {
695 HPHP::Optional<GlobalOff> offset;
696 bool is_inline = false;
697 dwarf.forEachAttribute(
698 die,
699 [&](Dwarf_Attribute attr) {
700 switch (dwarf.getAttributeType(attr)) {
701 case DW_AT_abstract_origin:
702 offset = dwarf.onDIEAtOffset(
703 dwarf.getAttributeValueRef(attr),
704 [&](Dwarf_Die die2) {
705 return parseSpecification(dwarf, die2, false, spec);
708 break;
709 case DW_AT_specification:
710 offset = dwarf.getAttributeValueRef(attr);
711 break;
712 case DW_AT_linkage_name:
713 if (spec.linkage_name.empty()) {
714 spec.linkage_name = dwarf.getAttributeValueString(attr);
716 break;
717 case DW_AT_location:
718 if (spec.address == StaticSpec::kNoAddress) {
719 if (auto const address = interpretLocAddress(dwarf, attr)) {
720 spec.address = *address;
723 break;
724 case DW_AT_low_pc:
725 if (spec.address == StaticSpec::kNoAddress) {
726 spec.address = dwarf.getAttributeValueAddr(attr);
727 // Sometimes GCC and Clang will emit invalid function
728 // addresses. Usually zero, but sometimes a very low
729 // number. These numbers have the appearance of being
730 // un-relocated addresses, but its in the final executable. As
731 // a safety net, if an address is provided, but its abnormally
732 // low, ignore it.
733 if (spec.address < 4096) spec.address = StaticSpec::kNoAddress;
735 break;
736 case DW_AT_object_pointer:
737 // Just in case we actually have a definition, use it to infer
738 // member-ness.
739 spec.is_member = true;
740 break;
741 default:
742 break;
744 return true;
747 if (first && (is_inline ||
748 (spec.linkage_name.empty() &&
749 spec.address == StaticSpec::kNoAddress &&
750 !spec.is_member))) {
751 return std::nullopt;
753 return offset;
757 * Given a DIE, and the current scope, recursively generate the names/linkages
758 * for all the object types in this DIE and children. If template_params is
759 * provided, the parent DIE is an object type, so template_params should be
760 * filled with any template parameters in the child DIE.
762 void TypeParserImpl::genNames(Env& env,
763 Dwarf_Die die,
764 Scope& scope,
765 std::vector<GlobalOff>* template_params) {
766 auto& dwarf = *env.dwarf;
767 auto& state = *env.state;
769 const auto recurse = [&](std::vector<GlobalOff>* params = nullptr){
770 dwarf.forEachChild(
771 die,
772 [&](Dwarf_Die child) {
773 genNames(env, child, scope, params);
774 return true;
779 auto tag = dwarf.getTag(die);
780 switch (tag) {
781 case DW_TAG_base_type:
782 case DW_TAG_union_type:
783 case DW_TAG_enumeration_type:
784 case DW_TAG_structure_type:
785 case DW_TAG_class_type:
786 case DW_TAG_unspecified_type: {
787 // Object-types. These have names and linkages, so we must record them.
789 // If this is a type-unit definition with a separate declaration
790 // in the same tu, declarationOffset will point to the
791 // declaration.
792 HPHP::Optional<GlobalOff> declarationOffset;
794 // If this is a declaration in a cu, referring back to a
795 // tu-definition, definitionOffset will point to that
796 // definition. Such declarations are emitted for the
797 // *definitions* of static members (which always happen in cus,
798 // not tus)
799 HPHP::Optional<GlobalOff> definitionOffset;
801 // Determine the base name, whether this type was unnamed, and whether
802 // this is an incomplete type or not from the DIE's attributes.
803 auto get_info = [&](Dwarf_Die cur,
804 bool updateOffsets) ->
805 std::tuple<std::string, bool, bool> {
806 std::string name;
807 std::string linkage_name;
808 auto incomplete = false;
810 dwarf.forEachAttribute(
811 cur,
812 [&](Dwarf_Attribute attr) {
813 switch (dwarf.getAttributeType(attr)) {
814 case DW_AT_name:
815 name = dwarf.getAttributeValueString(attr);
816 break;
817 case DW_AT_linkage_name:
818 linkage_name = dwarf.getAttributeValueString(attr);
819 break;
820 case DW_AT_declaration:
821 incomplete = dwarf.getAttributeValueFlag(attr);
822 break;
823 case DW_AT_specification:
824 // The compiler can spit out a declaration for a
825 // struct, followed later by the full definition. The
826 // full definition has a DW_AT_specification pointing
827 // back to the declaration - but note that the full
828 // definition may not be defined in the correct
829 // namespace - so we're going to keep the declaration,
830 // and update it based on the definition ignoring the
831 // definition's name (this feels a little backwards,
832 // but its how dwarf works).
833 if (updateOffsets) {
834 declarationOffset = dwarf.getAttributeValueRef(attr);
836 break;
837 case DW_AT_signature:
838 if (updateOffsets &&
839 dwarf.getAttributeForm(attr) == DW_FORM_ref_sig8) {
840 // The actual definition is in another type-unit, we
841 // can ignore this declaration.
842 definitionOffset = dwarf.getAttributeValueRef(attr);
843 break;
845 default:
846 break;
848 return true;
852 // If there's an explicit name, just use that.
853 if (!name.empty()) return std::make_tuple(name, false, incomplete);
855 // Otherwise, if there's a linkage name, demangle it, and strip off
856 // everything except the last section, and use that as the base
857 // name. For types which have external linkage, this lets us use
858 // whatever naming scheme the compiler has chosen for unnamed types.
859 if (!linkage_name.empty()) {
860 auto demangled = folly::demangle(linkage_name.c_str()).toStdString();
861 auto index = demangled.rfind("::");
862 if (index != decltype(demangled)::npos) demangled.erase(0, index+2);
863 return std::make_tuple(demangled, false, incomplete);
866 // No explicit name and no linkage name to use, so we have to try to
867 // infer one ourself (making it a synthetic name).
869 // Try the first named member
870 auto const first_member = [&](const char* type,
871 auto member_type) {
872 std::string first_member;
873 dwarf.forEachChild(
874 cur,
875 [&](Dwarf_Die child) {
876 if (dwarf.getTag(child) == member_type) {
877 first_member = dwarf.getDIEName(child);
879 return first_member.empty();
882 if (!first_member.empty()) {
883 return folly::sformat(
884 "(unnamed {} containing '{}')", type, first_member
887 return std::string{};
890 auto const type_name = [&]{
891 if (tag == DW_TAG_enumeration_type) return "enumeration";
892 if (tag == DW_TAG_union_type) return "union";
893 if (tag == DW_TAG_structure_type) return "struct";
894 if (tag == DW_TAG_class_type) return "class";
895 return "type";
898 auto const member_type = [&]() {
899 if (tag == DW_TAG_enumeration_type) return DW_TAG_enumerator;
900 return DW_TAG_member;
903 auto first_member_name = first_member(type_name(), member_type());
904 if (!first_member_name.empty()) {
905 return std::make_tuple(
906 std::move(first_member_name), true, incomplete
910 // If this is within a namespace, don't infer any name at all, keep it
911 // nameless. If its not within a namespace (IE, within a class), give it
912 // a unique name based on how many unnamed types we've seen so far. We
913 // can't do this for types within a namespace because namespaces are
914 // open and thus we can't force a global numbering of all types within
915 // it.
916 if (!scope.isInNamespaceScope()) {
917 scope.incUnnamedTypeCount();
918 return std::make_tuple(
919 folly::sformat(
920 "(unnamed {} #{})",
921 type_name(),
922 scope.unnamedTypeCount()
924 true,
925 incomplete
929 return std::make_tuple(
930 folly::sformat("(unnamed {})", type_name()),
931 true,
932 incomplete
935 const auto info = get_info(die, /*updateOffsets=*/true);
937 auto offset = dwarf.getDIEOffset(die);
938 if (definitionOffset) {
939 // This is a declaration which refers to the definition via
940 // DW_AT_signature. We'll see one of these for a class in the
941 // cu where its static members are defined. Later
942 // DW_TAG_variable nodes will refer back to the ones here,
943 // rather than the ones in the definition, so we need to
944 // record a map from any members defined here back to the
945 // original definition. We could also see them for parent
946 // classes, or for template param (a template param can refer
947 // to an out-of-unit type either by using a ref_sig8 directly,
948 // in which case we will have resolved the offset correctly,
949 // or it could have an offset to a type with a
950 // DW_AT_signature, in which case we'll need to fix it up
951 // later). In any case, add an entry to map our offset to the
952 // true definition, and entries to map any members to their
953 // true definitions.
954 env.local_mappings.emplace(offset, *definitionOffset);
956 folly::F14FastMap<std::string, GlobalOff> map;
957 dwarf.forEachChild(
958 die,
959 [&] (Dwarf_Die child) {
960 if (dwarf.getTag(child) == DW_TAG_member) {
961 map.emplace(dwarf.getDIEName(child), dwarf.getDIEOffset(child));
963 return true;
966 if (!map.empty()) {
967 dwarf.onDIEAtOffset(
968 *definitionOffset,
969 [&] (Dwarf_Die orig) {
970 dwarf.forEachChild(
971 orig,
972 [&] (Dwarf_Die child) {
973 auto it = map.find(dwarf.getDIEName(child));
974 if (it != map.end()) {
975 env.local_mappings.emplace(it->second,
976 dwarf.getDIEOffset(child));
978 return true;
986 auto parent_offset = scope.typeOffset();
988 // If we inferred a base name, use that to form the fully qualified name,
989 // otherwise treat it as an unnamed type.
990 if (!definitionOffset) {
991 std::get<1>(info) ?
992 scope.pushUnnamedType(std::get<0>(info), offset) :
993 scope.pushType(std::get<0>(info), offset);
994 } else {
995 // Push the name of the definition, not of the declaration
996 dwarf.onDIEAtOffset(
997 *definitionOffset,
998 [&] (Dwarf_Die def) {
999 const auto info_def = get_info(def, /*updateOffsets=*/false);
1000 std::get<1>(info_def) ?
1001 scope.pushUnnamedType(std::get<0>(info_def), offset) :
1002 scope.pushType(std::get<0>(info_def), offset);
1005 SCOPE_EXIT { scope.pop(); };
1007 if (declarationOffset) {
1008 // This completes a previous declaration. search backwards for
1009 // it, which should be fine because its normally right after
1010 // the declaration (and its always in the same cu/tu).
1011 auto i = state.all_objs.size();
1012 while (true) {
1013 assert(i);
1014 auto& obj = state.all_objs[--i];
1015 if (obj.key.object_id == declarationOffset->raw()) {
1016 assert(obj.incomplete);
1017 FTRACE(5,
1018 "Completing previous definition of {}.\n"
1019 " Was {}, Now {}, Linkage: {}\n",
1020 obj.name.name,
1021 GlobalOff::fromRaw(obj.key.object_id), offset,
1022 show(obj.name.linkage)
1024 obj.incomplete = false;
1025 obj.key.object_id = offset.raw();
1026 // map declarationOffset to offset, because any ref_sig8s
1027 // will point to the definition, not the declaration.
1028 env.local_mappings.emplace(*declarationOffset, offset);
1030 // Fixup the name in the scope stack
1031 scope.fixName(obj.name);
1032 assertx(scope.name().name == obj.name.name);
1033 break;
1036 } else {
1037 // Record this object type, with fully qualified name, key, and linkage.
1038 auto obj = ObjectType{
1039 scope.name(),
1040 ObjectTypeKey{offset.raw(), scope.cuOffset().raw()},
1041 std::get<2>(info)
1043 FTRACE(5,
1044 "{} {} at {} Linkage: {}\n",
1045 obj.incomplete ? "Declaring" : "Defining",
1046 obj.name.name,
1047 offset,
1048 show(obj.name.linkage)
1050 state.all_objs.emplace_back(std::move(obj));
1053 // This object type is done, so recurse into any nested classes. Provide a
1054 // list of template parameters to be filled in case this is a template. If
1055 // it is, we'll record the linkage dependence for the later template
1056 // linkage fix-up.
1057 std::vector<GlobalOff> recurse_template_params;
1058 recurse(&recurse_template_params);
1060 for (auto param_offset : recurse_template_params) {
1061 FTRACE(9, "linkage: {} depends on template param {}\n",
1062 offset, param_offset);
1063 env.linkage_dependents[param_offset].template_uses.emplace(offset);
1065 if (parent_offset) {
1066 FTRACE(9, "linkage: {} depends on child {}\n",
1067 *parent_offset, offset);
1068 env.linkage_dependents[*parent_offset].children.emplace(offset);
1070 break;
1072 case DW_TAG_namespace: {
1073 // Record the namespace in the scope and recurse. If this is an unnamed
1074 // namespace, that means any type found in child DIEs will have internal
1075 // linkage.
1076 auto name = dwarf.getDIEName(die);
1077 name.empty() ?
1078 scope.pushUnnamedNamespace() :
1079 scope.pushNamespace(std::move(name));
1080 SCOPE_EXIT { scope.pop(); };
1081 recurse();
1082 break;
1084 case DW_TAG_variable: {
1085 // Normally we don't care about variables since we're only looking for
1086 // types. However, certain aspects of object types can't be completely
1087 // inferred at the declaration site (mainly static variable linkage
1088 // related things like linkage name and address). We need a definition for
1089 // that, so record all the variable definitions along with their
1090 // specification, which we can consult later.
1092 // Neither GCC nor Clang record a name for a variable which is a static
1093 // definition, so ignore any that do have a name. This speeds things up.
1094 if (!dwarf.getDIEName(die).empty()) break;
1096 StaticSpec spec;
1097 if (auto off = parseSpecification(dwarf, die, true, spec)) {
1098 env.raw_static_definitions.emplace_back(*off, spec);
1100 // Note that we don't recurse into any child DIEs here. There shouldn't be
1101 // anything interesting in them.
1102 break;
1104 case DW_TAG_subprogram: {
1105 // For the same reason we care about DW_TAG_variables, we examine
1106 // DW_TAG_subprogram as well. Certain interesting aspects of a static
1107 // function are only present in its definition.
1109 if (!dwarf.getDIEName(die).empty()) break;
1111 StaticSpec spec;
1112 if (auto off = parseSpecification(dwarf, die, true, spec)) {
1113 env.raw_static_definitions.emplace_back(*off, spec);
1116 // Don't recurse. There might be valid types within a subprogram
1117 // definition, but we deliberately ignore those. A large portion of the
1118 // debug information lies within subprogram definitions, and scanning all
1119 // of that consumes a large amount of time. Moreover, these types usually
1120 // aren't very interesting, so we deliberately ignore them for
1121 // efficiency. If there's actually any reference to these types, they'll
1122 // be reported as the pseudo-type.
1123 break;
1125 case DW_TAG_template_type_param: {
1126 // Template type parameters are represented using child DIEs, not
1127 // attributes. If the parent DIE was an object type, fill the supplied
1128 // vector with the template parameters. Don't recurse because there
1129 // shouldn't be anything interesting in the children.
1130 if (template_params) {
1131 dwarf.forEachAttribute(
1132 die,
1133 [&](Dwarf_Attribute attr) {
1134 switch (dwarf.getAttributeType(attr)) {
1135 case DW_AT_type: {
1136 auto offset = dwarf.getAttributeValueRef(attr);
1137 // Check this type to see if it is a declaration and use the
1138 // real type instead
1139 dwarf.onDIEAtOffset(
1140 offset,
1141 [&] (Dwarf_Die type_die) {
1142 dwarf.forEachAttribute(
1143 type_die,
1144 [&](Dwarf_Attribute attr) {
1145 if (dwarf.getAttributeType(attr) == DW_AT_signature &&
1146 dwarf.getAttributeForm(attr) == DW_FORM_ref_sig8) {
1147 offset = dwarf.getAttributeValueRef(attr);
1148 return false;
1150 return true;
1154 template_params->emplace_back(offset);
1155 return false;
1157 default:
1158 return true;
1163 break;
1165 default:
1166 recurse();
1167 break;
1172 * Given the DIE representing an object type, its name, and its key, return the
1173 * detailed specification of the object.
1175 Object TypeParserImpl::genObject(Dwarf_Die die,
1176 ObjectTypeName name,
1177 ObjectTypeKey key) {
1178 const auto kind = [&]{
1179 switch (m_dwarf.getTag(die)) {
1180 case DW_TAG_structure_type: return Object::Kind::k_class;
1181 case DW_TAG_class_type: return Object::Kind::k_class;
1182 case DW_TAG_union_type: return Object::Kind::k_union;
1183 case DW_TAG_base_type: return Object::Kind::k_primitive;
1184 case DW_TAG_enumeration_type: return Object::Kind::k_enum;
1185 // Strange things like "decltype(nullptr_t)"
1186 case DW_TAG_unspecified_type: return Object::Kind::k_other;
1187 // Shouldn't happen because we only call genObject() on offsets already
1188 // visited and verified to be an object type.
1189 default: always_assert(0);
1191 }();
1193 HPHP::Optional<std::size_t> size;
1194 bool incomplete = false;
1195 HPHP::Optional<GlobalOff> definition_offset;
1197 m_dwarf.forEachAttribute(
1198 die,
1199 [&](Dwarf_Attribute attr) {
1200 switch (m_dwarf.getAttributeType(attr)) {
1201 case DW_AT_byte_size:
1202 size = m_dwarf.getAttributeValueUData(attr);
1203 break;
1204 case DW_AT_declaration:
1205 incomplete = m_dwarf.getAttributeValueFlag(attr);
1206 break;
1207 case DW_AT_signature:
1208 definition_offset = m_dwarf.getAttributeValueRef(attr);
1209 break;
1210 default:
1211 break;
1213 return true;
1217 if (definition_offset) {
1218 return m_dwarf.onDIEAtOffset(
1219 *definition_offset,
1220 [&](Dwarf_Die die2) { return genObject(die2, name, key); }
1224 // No size was provided. This is expected for incomplete types or the strange
1225 // "other" types sometimes seen, but an error otherwise.
1226 if (!size) {
1227 if (incomplete || kind == Object::Kind::k_other) {
1228 size = 0;
1229 } else {
1230 throw Exception{
1231 folly::sformat(
1232 "Object type '{}' at offset {} is a complete definition, "
1233 "but has no size!",
1234 name.name,
1235 key.object_id
1241 Object obj{std::move(name), *size, key, kind, incomplete};
1243 m_dwarf.forEachChild(
1244 die,
1245 [&](Dwarf_Die child) {
1246 switch (m_dwarf.getTag(child)) {
1247 case DW_TAG_inheritance:
1248 obj.bases.emplace_back(genBase(child, obj.name));
1249 break;
1250 case DW_TAG_member:
1251 obj.members.emplace_back(genMember(child, obj.name));
1252 if (obj.name.linkage != ObjectTypeName::Linkage::external) {
1253 // Clang gives linkage names to things that don't actually have
1254 // linkage. Don't let any members have linkage names if the object
1255 // type doesn't have external linkage.
1256 obj.members.back().linkage_name.clear();
1258 break;
1259 case DW_TAG_template_type_parameter:
1260 obj.template_params.emplace_back(genTemplateParam(child));
1261 break;
1262 case DW_TAG_GNU_template_parameter_pack:
1263 // Flatten parameter packs as if they were just a normally provided
1264 // parameter list. This is enough for our purposes.
1265 m_dwarf.forEachChild(
1266 child,
1267 [&](Dwarf_Die template_die) {
1268 if (m_dwarf.getTag(template_die) ==
1269 DW_TAG_template_type_parameter) {
1270 obj.template_params.emplace_back(
1271 genTemplateParam(template_die)
1274 return true;
1277 break;
1278 case DW_TAG_subprogram:
1279 obj.functions.emplace_back(genFunction(child));
1280 if (obj.name.linkage != ObjectTypeName::Linkage::external) {
1281 // Clang gives linkage names to things that don't actually have
1282 // linkage. Don't let any functions have linkage names if the object
1283 // type doesn't have external linkage.
1284 obj.functions.back().linkage_name.clear();
1286 break;
1287 default:
1288 break;
1290 return true;
1294 // The base classes and members aren't always reported in DWARF in offset
1295 // order, but make the output deterministic here to simplify consumers of the
1296 // information.
1297 std::sort(
1298 obj.bases.begin(),
1299 obj.bases.end(),
1300 [&](const Object::Base& b1, const Object::Base& b2) {
1301 return std::tie(b1.offset, b1.type.name.name) <
1302 std::tie(b2.offset, b2.type.name.name);
1306 std::sort(
1307 obj.members.begin(),
1308 obj.members.end(),
1309 [&](const Object::Member& m1, const Object::Member& m2) {
1310 return std::tie(m1.offset, m1.name) <
1311 std::tie(m2.offset, m2.name);
1315 return obj;
1319 * Given a DIE representing an arbitrary type, return its equivalent Type. This
1320 * can involve chasing a chain of such type DIEs.
1322 Type TypeParserImpl::genType(Dwarf_Die die) {
1323 // Offset of a different type this type refers to. If not present, that type
1324 // is implicitly "void".
1325 HPHP::Optional<GlobalOff> type_offset;
1326 // For pointers to members, the type referring to the object the member
1327 // belongs to.
1328 HPHP::Optional<GlobalOff> containing_type_offset;
1330 // A struct can have a declaration which refers to the definition
1331 // via a DW_AT_signature.
1332 HPHP::Optional<GlobalOff> definition_offset;
1334 m_dwarf.forEachAttribute(
1335 die,
1336 [&](Dwarf_Attribute attr) {
1337 switch (m_dwarf.getAttributeType(attr)) {
1338 case DW_AT_type:
1339 type_offset = m_dwarf.getAttributeValueRef(attr);
1340 break;
1341 case DW_AT_containing_type:
1342 containing_type_offset = m_dwarf.getAttributeValueRef(attr);
1343 break;
1344 case DW_AT_signature:
1345 definition_offset = m_dwarf.getAttributeValueRef(attr);
1346 return false;
1347 default:
1348 break;
1350 return true;
1354 const auto recurse = [&](GlobalOff offset) {
1355 return m_dwarf.onDIEAtOffset(
1356 offset,
1357 [&](Dwarf_Die die2) { return genType(die2); }
1361 // Pointers to member functions aren't represented in DWARF. Instead the
1362 // compiler creates a struct internally which stores all the information.
1364 switch (m_dwarf.getTag(die)) {
1365 case DW_TAG_base_type:
1366 case DW_TAG_structure_type:
1367 case DW_TAG_class_type:
1368 case DW_TAG_union_type:
1369 case DW_TAG_enumeration_type:
1370 case DW_TAG_unspecified_type: {
1371 if (definition_offset) return recurse(*definition_offset);
1372 auto offset = m_dwarf.getDIEOffset(die);
1373 auto const& state = stateForOffset(offset);
1374 auto iter = state.obj_offsets.find(offset);
1375 if (iter == state.obj_offsets.end()) {
1376 // Must be the pseudo-type.
1377 return ObjectType{
1378 ObjectTypeName{
1379 Scope::s_pseudo_type_name,
1380 ObjectTypeName::Linkage::pseudo
1382 ObjectTypeKey{offset.raw(), 0},
1383 true
1385 } else {
1386 return state.all_objs[iter->second];
1389 case DW_TAG_pointer_type:
1390 return PtrType{type_offset ? recurse(*type_offset) : VoidType{}};
1391 case DW_TAG_reference_type: {
1392 if (!type_offset) {
1393 throw Exception{
1394 folly::sformat(
1395 "Encountered reference to void at offset {}",
1396 m_dwarf.getDIEOffset(die)
1400 return RefType{recurse(*type_offset)};
1402 case DW_TAG_rvalue_reference_type: {
1403 if (!type_offset) {
1404 throw Exception{
1405 folly::sformat(
1406 "Encountered rvalue reference to void at offset {}",
1407 m_dwarf.getDIEOffset(die)
1411 return RValueRefType{recurse(*type_offset)};
1413 case DW_TAG_array_type: {
1414 if (!type_offset) {
1415 throw Exception{
1416 folly::sformat(
1417 "Encountered array of voids at offset {}",
1418 m_dwarf.getDIEOffset(die)
1422 return ArrType{recurse(*type_offset), determineArrayBound(die)};
1424 case DW_TAG_const_type:
1425 return ConstType{type_offset ? recurse(*type_offset) : VoidType{}};
1426 case DW_TAG_volatile_type:
1427 return VolatileType{type_offset ? recurse(*type_offset) : VoidType{}};
1428 case DW_TAG_restrict_type:
1429 return RestrictType{type_offset ? recurse(*type_offset) : VoidType{}};
1430 case DW_TAG_typedef:
1431 return type_offset ? recurse(*type_offset) : VoidType{};
1432 case DW_TAG_subroutine_type: {
1433 FuncType func{type_offset ? recurse(*type_offset) : VoidType{}};
1434 fillFuncArgs(die, func);
1435 return std::move(func);
1437 case DW_TAG_ptr_to_member_type: {
1438 if (!containing_type_offset) {
1439 throw Exception{
1440 folly::sformat(
1441 "Encountered ptr-to-member at offset {} without a "
1442 "containing object",
1443 m_dwarf.getDIEOffset(die)
1448 auto containing = recurse(*containing_type_offset);
1449 if (auto obj = containing.asObject()) {
1450 return PtrType{
1451 MemberType{std::move(*obj), recurse(*type_offset)}
1453 } else {
1454 throw Exception{
1455 folly::sformat(
1456 "Encountered ptr-to-member at offset {} with a "
1457 "containing object of type '{}'",
1458 m_dwarf.getDIEOffset(die),
1459 containing.toString()
1464 default:
1465 throw Exception{
1466 folly::sformat(
1467 "Encountered non-type tag '{}' at offset {} while "
1468 "traversing type description",
1469 m_dwarf.tagToString(m_dwarf.getTag(die)),
1470 m_dwarf.getDIEOffset(die)
1476 Object::Member TypeParserImpl::genMember(Dwarf_Die die,
1477 const ObjectTypeName& parent_name) {
1478 std::string name;
1479 std::string linkage_name;
1480 std::size_t offset = 0;
1481 HPHP::Optional<GlobalOff> die_offset;
1482 HPHP::Optional<uintptr_t> address;
1483 bool is_static = false;
1485 m_dwarf.forEachAttribute(
1486 die,
1487 [&](Dwarf_Attribute attr) {
1488 switch (m_dwarf.getAttributeType(attr)) {
1489 case DW_AT_name:
1490 name = m_dwarf.getAttributeValueString(attr);
1491 break;
1492 case DW_AT_linkage_name:
1493 linkage_name = m_dwarf.getAttributeValueString(attr);
1494 break;
1495 case DW_AT_location:
1496 address = interpretLocAddress(m_dwarf, attr);
1497 break;
1498 case DW_AT_data_member_location:
1499 offset = m_dwarf.getAttributeValueUData(attr);
1500 break;
1501 case DW_AT_type:
1502 die_offset = m_dwarf.getAttributeValueRef(attr);
1503 break;
1504 case DW_AT_declaration:
1505 is_static = m_dwarf.getAttributeValueFlag(attr);
1506 break;
1507 default:
1508 break;
1510 return true;
1514 if (!die_offset) {
1515 // No DW_AT_type means "void", but you can't have void members!
1516 throw Exception{
1517 folly::sformat(
1518 "Encountered member (name: '{}') of type void "
1519 "in object type '{}' at offset {}",
1520 name,
1521 parent_name.name,
1522 m_dwarf.getDIEOffset(die)
1527 if (is_static) {
1528 // If this is a static member, look up any definitions which refer to this
1529 // member, and pull any additional information out of it.
1530 auto const static_offset = m_dwarf.getDIEOffset(die);
1531 auto const& state = stateForOffset(static_offset);
1532 auto const range = state.static_definitions.equal_range(static_offset);
1534 for (auto const& elm : range) {
1535 if (linkage_name.empty() && !elm.second.linkage_name.empty()) {
1536 linkage_name = elm.second.linkage_name;
1538 if (!address && elm.second.address != StaticSpec::kNoAddress) {
1539 address = elm.second.address;
1544 auto type = m_dwarf.onDIEAtOffset(
1545 *die_offset,
1546 [&](Dwarf_Die die2){ return genType(die2); }
1549 if (name.empty()) {
1550 name = is_static
1551 ? folly::sformat("(unnamed static member of type '{}')", type.toString())
1552 : folly::sformat("(unnamed member of type '{}')", type.toString());
1555 return Object::Member{
1556 name,
1557 is_static ? std::nullopt : HPHP::Optional<std::size_t>{offset},
1558 linkage_name,
1559 address,
1560 std::move(type)
1564 Object::Function TypeParserImpl::genFunction(Dwarf_Die die) {
1565 std::string name;
1566 Type ret_type{VoidType{}};
1567 std::string linkage_name;
1568 bool is_virtual = false;
1569 bool is_member = false;
1571 m_dwarf.forEachAttribute(
1572 die,
1573 [&](Dwarf_Attribute attr) {
1574 switch (m_dwarf.getAttributeType(attr)) {
1575 case DW_AT_name:
1576 name = m_dwarf.getAttributeValueString(attr);
1577 break;
1578 case DW_AT_type:
1579 ret_type = m_dwarf.onDIEAtOffset(
1580 m_dwarf.getAttributeValueRef(attr),
1581 [&](Dwarf_Die ty_die) { return genType(ty_die); }
1583 break;
1584 case DW_AT_linkage_name:
1585 linkage_name = m_dwarf.getAttributeValueString(attr);
1586 break;
1587 case DW_AT_virtuality:
1588 is_virtual =
1589 (m_dwarf.getAttributeValueUData(attr) != DW_VIRTUALITY_none);
1590 break;
1591 case DW_AT_object_pointer:
1592 is_member = true;
1593 break;
1594 default:
1595 break;
1597 return true;
1602 * We need to determine if this function is a static function or a member
1603 * function. The straight-forward way is to look for the DW_AT_object_pointer
1604 * attribute (which is only present for member functions). This works fine for
1605 * GCC, but not Clang.
1607 * On Clang, the DW_AT_object_pointer is only present in a function's
1608 * definition, not its declaration. Moreover, it doesn't reliably emit
1609 * function declarations if it thinks the function isn't used. As a result, we
1610 * can't reliably distinguish member functions from static functions on clang.
1612 * As an alternative, if the first formal parameter of a function is marked as
1613 * being "artificial" (which means its not present in the actual source),
1614 * assume its actually the this pointer, and that the function is a member
1615 * function.
1617 std::vector<Type> arg_types;
1618 m_dwarf.forEachChild(
1619 die,
1620 [&](Dwarf_Die child) {
1621 if (m_dwarf.getTag(child) != DW_TAG_formal_parameter) {
1622 return true;
1625 bool is_artificial = false;
1626 Type arg_type{VoidType()};
1628 m_dwarf.forEachAttribute(
1629 child,
1630 [&](Dwarf_Attribute attr) {
1631 switch (m_dwarf.getAttributeType(attr)) {
1632 case DW_AT_type:
1633 arg_type = m_dwarf.onDIEAtOffset(
1634 m_dwarf.getAttributeValueRef(attr),
1635 [&](Dwarf_Die ty_die) { return genType(ty_die); }
1637 break;
1638 case DW_AT_artificial:
1639 is_artificial = m_dwarf.getAttributeValueFlag(attr);
1640 break;
1641 default:
1642 break;
1644 return true;
1648 // Only consider this a member function if this arg if the first and its
1649 // artificial.
1650 if (is_artificial && arg_types.empty()) {
1651 is_member = true;
1653 arg_types.emplace_back(std::move(arg_type));
1655 return true;
1659 HPHP::Optional<std::uintptr_t> address;
1661 // Similar to static variables, find any definitions which refer to this
1662 // function in order to extract linkage information.
1663 auto const offset = m_dwarf.getDIEOffset(die);
1664 auto const& state = stateForOffset(offset);
1665 auto range = state.static_definitions.equal_range(offset);
1666 for (auto const& elm : range) {
1667 if (linkage_name.empty() && !elm.second.linkage_name.empty()) {
1668 linkage_name = elm.second.linkage_name;
1670 if (!address && elm.second.address != StaticSpec::kNoAddress) {
1671 address = elm.second.address;
1673 if (elm.second.is_member) is_member = true;
1676 return Object::Function{
1677 name,
1678 std::move(ret_type),
1679 std::move(arg_types),
1680 is_virtual ?
1681 Object::Function::Kind::k_virtual :
1682 (is_member ? Object::Function::Kind::k_member :
1683 Object::Function::Kind::k_static),
1684 linkage_name,
1685 address,
1689 Object::Base TypeParserImpl::genBase(Dwarf_Die die,
1690 const ObjectTypeName& parent_name) {
1691 std::string name;
1692 HPHP::Optional<std::size_t> offset;
1693 HPHP::Optional<GlobalOff> die_offset;
1694 bool is_virtual = false;
1696 m_dwarf.forEachAttribute(
1697 die,
1698 [&](Dwarf_Attribute attr) {
1699 switch (m_dwarf.getAttributeType(attr)) {
1700 case DW_AT_name:
1701 name = m_dwarf.getAttributeValueString(attr);
1702 break;
1703 case DW_AT_type:
1704 die_offset = m_dwarf.getAttributeValueRef(attr);
1705 break;
1706 case DW_AT_virtuality:
1707 is_virtual =
1708 (m_dwarf.getAttributeValueUData(attr) != DW_VIRTUALITY_none);
1709 break;
1710 default:
1711 break;
1713 return true;
1717 if (!is_virtual) {
1718 offset = 0;
1720 m_dwarf.forEachAttribute(
1721 die,
1722 [&](Dwarf_Attribute attr) {
1723 switch (m_dwarf.getAttributeType(attr)) {
1724 case DW_AT_data_member_location:
1725 offset = m_dwarf.getAttributeValueUData(attr);
1726 break;
1727 default:
1728 break;
1730 return true;
1735 if (!die_offset) {
1736 throw Exception{
1737 folly::sformat(
1738 "Encountered base '{}' of object type '{}' without "
1739 "type information at offset {}",
1740 name,
1741 parent_name.name,
1742 m_dwarf.getDIEOffset(die)
1747 auto type =
1748 m_dwarf.onDIEAtOffset(
1749 *die_offset,
1750 [&](Dwarf_Die die2) { return genType(die2); }
1753 if (auto obj = type.asObject()) {
1754 // Base class better be an actual class!
1755 return Object::Base{*obj, offset};
1756 } else {
1757 throw Exception{
1758 folly::sformat(
1759 "Encountered base '{}' of object type '{}' of "
1760 "non-object type '{}' at offset {}",
1761 name,
1762 parent_name.name,
1763 type.toString(),
1764 m_dwarf.getDIEOffset(die)
1770 Object::TemplateParam TypeParserImpl::genTemplateParam(Dwarf_Die die) {
1771 HPHP::Optional<GlobalOff> die_offset;
1773 m_dwarf.forEachAttribute(
1774 die,
1775 [&](Dwarf_Attribute attr) {
1776 switch (m_dwarf.getAttributeType(attr)) {
1777 case DW_AT_type:
1778 die_offset = m_dwarf.getAttributeValueRef(attr);
1779 break;
1780 default:
1781 break;
1783 return true;
1787 return Object::TemplateParam{
1788 die_offset ?
1789 m_dwarf.onDIEAtOffset(
1790 *die_offset,
1791 [&](Dwarf_Die die2){ return genType(die2); }
1793 VoidType{}
1797 HPHP::Optional<std::size_t>
1798 TypeParserImpl::determineArrayBound(Dwarf_Die die) {
1799 HPHP::Optional<std::size_t> bound;
1801 m_dwarf.forEachChild(
1802 die,
1803 [&](Dwarf_Die child) {
1804 switch (m_dwarf.getTag(child)) {
1805 case DW_TAG_subrange_type:
1806 m_dwarf.forEachAttribute(
1807 child,
1808 [&](Dwarf_Attribute attr) {
1809 switch (m_dwarf.getAttributeType(attr)) {
1810 case DW_AT_count:
1811 bound = m_dwarf.getAttributeValueUData(attr);
1812 break;
1813 case DW_AT_upper_bound:
1814 bound = m_dwarf.getAttributeValueUData(attr)+1;
1815 break;
1816 default:
1817 break;
1819 return true;
1822 break;
1823 default:
1824 break;
1826 return true;
1830 if (bound && !*bound) bound.reset();
1831 return bound;
1834 void TypeParserImpl::fillFuncArgs(Dwarf_Die die, FuncType& func) {
1835 m_dwarf.forEachChild(
1836 die,
1837 [&](Dwarf_Die child) {
1838 switch (m_dwarf.getTag(child)) {
1839 case DW_TAG_formal_parameter: {
1840 HPHP::Optional<GlobalOff> type_offset;
1842 m_dwarf.forEachAttribute(
1843 child,
1844 [&](Dwarf_Attribute attr) {
1845 switch (m_dwarf.getAttributeType(attr)) {
1846 case DW_AT_type:
1847 type_offset = m_dwarf.getAttributeValueRef(attr);
1848 break;
1849 default:
1850 break;
1852 return true;
1856 if (!type_offset) {
1857 throw Exception{
1858 folly::sformat(
1859 "Encountered function at offset {} taking a void parameter",
1860 m_dwarf.getDIEOffset(die)
1865 func.args.push_back(
1866 m_dwarf.onDIEAtOffset(
1867 *type_offset,
1868 [&](Dwarf_Die die) { return genType(die); }
1871 break;
1873 default:
1874 break;
1876 return true;
1882 * Print out the given DIE (including children) in textual format to the given
1883 * ostream. Only actually print out DIEs which begin in the range between the
1884 * begin and end parameters.
1887 void printDIE(std::ostream& os,
1888 const DwarfState& dwarf,
1889 Dwarf_Die die,
1890 std::pair<uint64_t,GlobalOff>* sig,
1891 std::size_t begin,
1892 std::size_t end,
1893 int indent = 0) {
1894 auto tag = dwarf.getTag(die);
1895 auto tag_name = dwarf.tagToString(tag);
1896 auto name = dwarf.getDIEName(die);
1897 auto offset = dwarf.getDIEOffset(die).offset();
1899 const auto recurse = [&]{
1900 // Find the last child DIE which does not start with the begin/end
1901 // range. This DIE is the first one which contains some data within the
1902 // begin/end range, so that must be the first one to begin recursion at.
1903 HPHP::Optional<uint64_t> first;
1904 if (begin > 0) {
1905 dwarf.forEachChild(
1906 die,
1907 [&](Dwarf_Die child) {
1908 const auto offset = dwarf.getDIEOffset(child).offset();
1909 if (offset <= begin) {
1910 first = offset;
1911 return true;
1912 } else {
1913 return false;
1919 // Only actually recurse if this child DIE is the above computed first DIE,
1920 // or one following it, and begins before the end parameter.
1921 dwarf.forEachChild(
1922 die,
1923 [&](Dwarf_Die child) {
1924 const auto offset = dwarf.getDIEOffset(child).offset();
1925 if ((!first || offset >= *first) && offset < end) {
1926 printDIE(os, dwarf, child, nullptr, begin, end, indent+1);
1928 return offset < end;
1933 if (offset < begin) {
1934 recurse();
1935 return;
1936 } else if (offset >= end) {
1937 return;
1940 auto const printSig = [&] (uint64_t sig) {
1941 return folly::sformat("ref_sig8:{:016x}", sig);
1944 for (int i = 0; i < indent; ++i) {
1945 os << " ";
1947 os << "#" << offset << ": " << tag_name << " (" << tag << ") \""
1948 << name << "\"";
1949 if (sig && sig->first) {
1950 os << folly::sformat(" {{{} -> #{}}}", printSig(sig->first), sig->second);
1952 os << "\n";
1954 dwarf.forEachAttribute(
1955 die,
1956 [&](Dwarf_Attribute attr) {
1957 auto const type = dwarf.getAttributeType(attr);
1958 auto const attr_name = dwarf.attributeTypeToString(type);
1959 auto const form = dwarf.getAttributeForm(attr);
1960 auto const attr_form = dwarf.attributeFormToString(form);
1962 auto attr_value = [&]() -> std::string {
1963 if (type == DW_AT_ranges) {
1964 auto const ranges = dwarf.getRanges(attr);
1965 std::string res;
1966 for (auto range : ranges) {
1967 if (range.dwr_addr1 == DwarfState::Dwarf_Ranges::kSelection) {
1968 folly::format(&res, "0x{:x} ", range.dwr_addr2);
1969 } else {
1970 folly::format(&res, "0x{:x}-0x{:x} ",
1971 range.dwr_addr1, range.dwr_addr2);
1974 return res;
1976 switch (dwarf.getAttributeForm(attr)) {
1977 case DW_FORM_data1:
1978 case DW_FORM_data2:
1979 case DW_FORM_data4:
1980 case DW_FORM_data8:
1981 case DW_FORM_udata:
1982 return folly::sformat("{}", dwarf.getAttributeValueUData(attr));
1984 case DW_FORM_sdata:
1985 return folly::sformat("{}", dwarf.getAttributeValueSData(attr));
1987 case DW_FORM_string:
1988 case DW_FORM_strp:
1989 return folly::sformat(
1990 "\"{}\"",
1991 dwarf.getAttributeValueString(attr)
1994 case DW_FORM_flag:
1995 case DW_FORM_flag_present:
1996 return dwarf.getAttributeValueFlag(attr) ? "true" : "false";
1998 case DW_FORM_addr:
1999 return folly::sformat(
2000 "{:#010x}",
2001 dwarf.getAttributeValueAddr(attr)
2004 case DW_FORM_ref1:
2005 case DW_FORM_ref2:
2006 case DW_FORM_ref4:
2007 case DW_FORM_ref8:
2008 case DW_FORM_ref_udata:
2009 case DW_FORM_ref_addr:
2010 return folly::sformat("#{}", dwarf.getAttributeValueRef(attr));
2011 case DW_FORM_ref_sig8: {
2012 return printSig(dwarf.getAttributeValueSig8(attr));
2015 case DW_FORM_exprloc: {
2016 std::string output;
2017 for (const auto& expr : dwarf.getAttributeValueExprLoc(attr)) {
2018 if (expr.lr_atom == DW_OP_addr) {
2019 output += folly::sformat(
2020 "<OP_addr: {:#x}>,",
2021 expr.lr_number
2023 } else {
2024 output += folly::sformat(
2025 "<{}:{}:{}:{}>,",
2026 dwarf.opToString(expr.lr_atom),
2027 expr.lr_number,
2028 expr.lr_number2,
2029 expr.lr_offset
2033 return folly::sformat("Location: [{}]", output);
2036 case DW_FORM_block1:
2037 case DW_FORM_block2:
2038 case DW_FORM_block4:
2039 case DW_FORM_block: return "{BLOCK}";
2041 case DW_FORM_indirect: return "{INDIRECT}";
2042 case DW_FORM_sec_offset: return "{SECTION OFFSET}";
2043 default: return "{UNKNOWN}";
2045 }();
2047 for (int i = 0; i < indent; ++i) {
2048 os << " ";
2050 os << folly::sformat(" **** {} ({}) ==> {} [{}:{}]\n",
2051 attr_name, type, attr_value,
2052 attr_form, form);
2053 return true;
2057 recurse();
2060 struct PrinterImpl : Printer {
2061 explicit PrinterImpl(const std::string& filename): m_filename{filename} {}
2062 void operator()(std::ostream& os,
2063 std::size_t begin,
2064 std::size_t end) const override {
2065 DwarfState dwarf{m_filename};
2067 print_section(os, dwarf, false, begin, end);
2068 print_section(os, dwarf, true, begin, end);
2070 os << std::flush;
2072 private:
2073 void print_section(std::ostream& os,
2074 const DwarfState& dwarf,
2075 bool isInfo,
2076 std::size_t begin,
2077 std::size_t end) const {
2078 // If a non-default begin parameter was specified, first iterate over all
2079 // the compilation units. Find the first compilation unit which at least
2080 // partially lies within the range given by the begin parameter. This is the
2081 // first compilation unit to begin printing from.
2082 HPHP::Optional<uint64_t> last;
2083 if (begin > 0) {
2084 dwarf.forEachTopLevelUnit(
2085 [&](Dwarf_Die cu) {
2086 const auto offset = dwarf.getDIEOffset(cu).offset();
2087 if (offset <= begin) last = offset;
2089 isInfo
2093 // Now iterate over all the compilation units again. Only actually print out
2094 // compilation units if they lie within the begin/end parameter range.
2095 dwarf.forEachTopLevelUnit(
2096 [&] (Dwarf_Die cu) {
2097 auto context = cu->context;
2098 auto type_offset = GlobalOff { context->typeOffset, context->isInfo };
2099 auto pair = std::make_pair(context->typeSignature, type_offset);
2100 const auto offset = dwarf.getDIEOffset(cu).offset();
2101 if (offset >= end) return false;
2102 if ((!last || offset >= *last)) {
2103 printDIE(
2105 dwarf,
2107 &pair,
2108 // If this compilation unit entirely lies within the begin/end
2109 // range, specify a begin parameter of "0", which will stop
2110 // printDIE() from doing range checks (which is more efficient).
2111 (!last || (offset > *last)) ? 0 : begin,
2115 return true;
2117 isInfo
2120 std::string m_filename;
2124 struct GDBIndexerImpl : GDBIndexer {
2125 explicit GDBIndexerImpl(const std::string& filename, int num_threads)
2126 : m_filename{filename}
2127 , m_numThreads{num_threads}
2129 if (num_threads < 1) {
2130 throw Exception{folly::sformat("Invalid number of threads: {}",
2131 num_threads)};
2135 void operator()(const std::string& output_file) const override {
2136 auto begin_time = ::HPHP::Timer::GetCurrentTimeMicros();
2137 DwarfState dwarf{m_filename};
2138 log_time(begin_time, "Parsing dwarf file");
2140 std::FILE* fd = std::fopen(output_file.c_str(), "wb");
2142 if (!fd) {
2143 throw Exception{folly::sformat("Cannot open file: {}", output_file)};
2146 auto const gdb_index_version = 8;
2147 std::vector<uint32_t> header{gdb_index_version, 0, 0, 0, 0, 0};
2149 auto time_index_begin = ::HPHP::Timer::GetCurrentTimeMicros();
2151 auto addresses_and_symbols = collect_addresses_and_symbols(dwarf);
2152 auto time = log_time(time_index_begin, "collect_addresses_and_symbols");
2153 auto const cu = get_cu(dwarf);
2154 time = log_time(time, "Get_cu");
2155 auto const tu = get_tu(dwarf);
2156 time = log_time(time, "Get_tu");
2157 auto const address = get_address(addresses_and_symbols.first);
2158 time = log_time(time, "Get_address");
2159 auto const symbol_and_constants =
2160 get_symbol_and_constants(addresses_and_symbols.second);
2161 log_time(time, "Get_symbol_and_constants");
2163 time = log_time(time_index_begin, "Index generation");
2165 // The offset, from the start of the file, of the CU list.
2166 header[1] = sizeof header[0] * header.size();
2167 // The offset, from the start of the file, of the types CU list.
2168 header[2] = header[1] + sizeof cu[0] * cu.size();
2169 // The offset, from the start of the file, of the address area.
2170 header[3] = header[2] + sizeof tu[0] * tu.size();
2171 // The offset, from the start of the file, of the symbol table.
2172 header[4] = header[3] + sizeof address[0] * address.size();
2173 // The offset, from the start of the file, of the constant pool.
2174 header[5] = header[4] +
2175 sizeof symbol_and_constants.symbol_pool.m_hashtable[0] *
2176 symbol_and_constants.symbol_pool.m_hashtable.size();
2178 print_section(fd, header);
2179 print_section(fd, cu);
2180 print_section(fd, tu);
2181 print_section(fd, address);
2182 print_section(fd, symbol_and_constants.symbol_pool.m_hashtable);
2183 print_section(fd, symbol_and_constants.cu_vector_offsets);
2184 print_section(fd, symbol_and_constants.strings);
2186 log_time(time, "Print");
2188 log_time(begin_time, "Full index creation");
2190 std::fclose(fd);
2193 private:
2194 int32_t log_time(int32_t time, const char* msg) const {
2195 int32_t now = ::HPHP::Timer::GetCurrentTimeMicros();
2196 std::cout << msg << " took " << (now - time) / 1000 << " ms" << std::endl;
2197 return now;
2200 void print_section(std::FILE* fd,
2201 const std::vector<std::string>& data) const {
2202 if (!data.size()) return;
2203 assertx(fd);
2204 for (auto s : data) {
2205 std::fwrite(s.c_str(), sizeof(char), s.length() + 1, fd);
2209 template <typename T>
2210 void print_section(std::FILE* fd, const std::vector<T>& data) const {
2211 if (!data.size()) return;
2212 assertx(fd);
2213 std::fwrite(data.data(), sizeof data[0], data.size(), fd);
2216 std::vector<uint64_t> get_cu(const DwarfState& dwarf) const {
2217 std::vector<uint64_t> result = {};
2218 dwarf.forEachCompilationUnit(
2219 [&](Dwarf_Die cu) {
2220 result.push_back(cu->context->offset);
2221 result.push_back(cu->context->size);
2224 return result;
2227 std::vector<uint64_t> get_tu(const DwarfState& dwarf) const {
2228 std::vector<uint64_t> result = {};
2229 dwarf.forEachTopLevelUnit(
2230 [&](Dwarf_Die cu) {
2231 result.push_back(cu->context->offset);
2232 result.push_back(cu->context->typeOffset - cu->context->offset);
2233 result.push_back(cu->context->typeSignature);
2234 }, false
2236 return result;
2239 struct AddressTableEntry {
2240 union {
2241 uint64_t low;
2242 struct {
2243 uint32_t low_bottom;
2244 uint32_t low_top;
2247 union {
2248 uint64_t high;
2249 struct {
2250 uint32_t high_bottom;
2251 uint32_t high_top;
2254 uint32_t index;
2257 static bool compareAddressTableEntry(AddressTableEntry a,
2258 AddressTableEntry b) {
2259 return a.low == b.low ? a.high < b.high : a.low < b.low;
2262 void visit_die_for_address(const DwarfState& dwarf, const Dwarf_Die die,
2263 std::vector<AddressTableEntry>& entries,
2264 uint32_t cu_index) const {
2265 HPHP::Optional<uint64_t> low, high;
2266 std::vector<DwarfState::Dwarf_Ranges> ranges;
2267 bool is_high_udata = false;
2268 dwarf.forEachAttribute(
2269 die,
2270 [&](Dwarf_Attribute attr) {
2271 switch (dwarf.getAttributeType(attr)) {
2272 case DW_AT_ranges:
2273 ranges = dwarf.getRanges(attr);
2274 break;
2275 case DW_AT_low_pc:
2276 // Some times GCC/Clang emits very low numbers for addresses in
2277 // the form of UData. Let's drop them.
2278 if (attr->form == DW_FORM_addr) {
2279 low = dwarf.getAttributeValueAddr(attr);
2281 break;
2282 case DW_AT_high_pc:
2283 if (attr->form != DW_FORM_addr) {
2284 is_high_udata = true;
2285 high = dwarf.getAttributeValueUData(attr);
2286 } else {
2287 high = dwarf.getAttributeValueAddr(attr);
2289 break;
2290 default:
2291 break;
2293 return true;
2297 if (!ranges.empty()) {
2298 uint64_t base = low ? *low : 0;
2299 bool added = false;
2300 for (auto range : ranges) {
2301 if (range.dwr_addr1 == DwarfState::Dwarf_Ranges::kSelection) {
2302 base = range.dwr_addr2;
2303 continue;
2305 if (base + range.dwr_addr1 == 0) continue;
2306 // Drop all the addresses under 2M
2307 if (base + range.dwr_addr2 < 2000000) continue;
2308 added = true;
2309 entries.push_back(
2310 AddressTableEntry {
2311 base + range.dwr_addr1,
2312 base + range.dwr_addr2,
2313 cu_index
2317 if (added) return;
2320 if (low && high) {
2321 high = is_high_udata ? *low + *high : *high;
2322 // Drop all the addresses under 2M
2323 if (*low != 0 && *high >= 2000000) {
2324 entries.push_back(AddressTableEntry{*low, *high, cu_index});
2325 return;
2329 dwarf.forEachChild(
2330 die,
2331 [&](Dwarf_Die child) {
2332 visit_die_for_address(dwarf, child, entries, cu_index);
2333 return true;
2338 std::vector<uint32_t>
2339 get_address(std::vector<AddressTableEntry>& entries) const {
2340 sort(entries.begin(), entries.end(), compareAddressTableEntry);
2342 // Split into little-endian formatting
2343 std::vector<uint32_t> result = {};
2344 for (auto& e : entries) {
2345 result.push_back(e.low_bottom);
2346 result.push_back(e.low_top);
2347 result.push_back(e.high_bottom);
2348 result.push_back(e.high_top);
2349 result.push_back(e.index);
2351 return result;
2354 struct GDBSymbol {
2355 uint32_t name_offset{};
2356 uint32_t cu_vector_offset{};
2358 bool valid() { return name_offset; }
2361 struct GDBHashtable {
2362 GDBHashtable() : m_size(0), m_capacity(0), m_hashtable({}) {}
2363 size_t m_size;
2364 size_t m_capacity;
2365 std::vector<GDBSymbol> m_hashtable;
2368 void init(size_t size) {
2369 assertx(m_size == 0 && m_capacity == 0);
2371 auto const nextPowerOfTwo = [](size_t n) -> size_t {
2372 if (n == 0) return 1;
2373 n--;
2374 n |= n >> 1;
2375 n |= n >> 2;
2376 n |= n >> 4;
2377 n |= n >> 8;
2378 n |= n >> 16;
2379 n++;
2380 return n;
2383 auto initial_size = nextPowerOfTwo(size * 4 / 3);
2385 m_hashtable = std::vector<GDBSymbol>(initial_size, GDBSymbol{});
2386 m_capacity = initial_size;
2389 GDBSymbol* findSlot(uint32_t hash) {
2390 uint32_t index = hash;
2391 uint32_t step = ((hash * 17) & (m_capacity - 1)) | 1;
2393 while (true) {
2394 index &= m_capacity - 1;
2395 if (!m_hashtable[index].valid()) {
2396 return &m_hashtable[index];
2398 index += step;
2402 bool add(uint32_t hash, GDBSymbol s) {
2403 auto const loc = this->findSlot(hash);
2404 assert(!loc->valid());
2405 *loc = s;
2406 m_size++;
2407 return true;
2411 using SymbolMap = tbb::concurrent_hash_map<std::string,
2412 std::vector<uint32_t>,
2413 ::HPHP::stringHashCompare>;
2414 using SpecMap = folly::F14FastMap<GlobalOff, std::string>;
2416 void visit_die_for_symbols(const DwarfState& dwarf,
2417 const Dwarf_Die die,
2418 SymbolMap& symbols,
2419 SpecMap& spec_names,
2420 std::string parent_name,
2421 uint32_t language,
2422 uint32_t cu_index) const {
2424 bool is_declaration = false;
2425 bool is_external = false;
2426 std::string name;
2427 bool full_name = false;
2428 bool is_inlined = false;
2429 bool has_location = false;
2430 bool in_specification = false;
2431 auto specification = GlobalOff::fromRaw(0);
2432 auto collect_attributes = [&] (Dwarf_Attribute attr) {
2433 switch (dwarf.getAttributeType(attr)) {
2434 case DW_AT_declaration:
2435 if (!in_specification) {
2436 is_declaration = dwarf.getAttributeValueFlag(attr);
2438 break;
2439 case DW_AT_external:
2440 is_external = dwarf.getAttributeValueFlag(attr);
2441 break;
2442 case DW_AT_linkage_name:
2443 is_external = true;
2444 break;
2445 case DW_AT_location:
2446 has_location = true;
2447 break;
2448 case DW_AT_name:
2449 if (!full_name) {
2450 name = dwarf.getAttributeValueString(attr);
2452 break;
2453 case DW_AT_inline: {
2454 auto const val = dwarf.getAttributeValueUData(attr);
2455 is_inlined =
2456 (val == DW_INL_inlined) ||
2457 (val == DW_INL_declared_inlined);
2458 break;
2460 case DW_AT_language:
2461 language = dwarf.getAttributeValueUData(attr);
2462 break;
2463 case DW_AT_specification: {
2464 specification = dwarf.getAttributeValueRef(attr);
2465 auto const it = spec_names.find(specification);
2466 if (it != spec_names.end()) {
2467 name = it->second;
2468 auto const pos = name.rfind("::");
2469 if (pos != std::string::npos) {
2470 parent_name = name.substr(0, pos);
2472 full_name = true;
2474 break;
2476 default:
2477 return true;
2479 return true;
2481 dwarf.forEachAttribute(die, collect_attributes);
2482 if (specification.raw()) {
2483 dwarf.onDIEAtOffset(
2484 specification,
2485 [&] (Dwarf_Die d) {
2486 in_specification = true;
2487 dwarf.forEachAttribute(d, collect_attributes);
2492 struct IndexAndFlags {
2493 IndexAndFlags(uint32_t index, uint32_t kind, uint32_t is_static) {
2494 assertx((index >> 24) == 0);
2495 // Bits 0-23 is CU index
2496 // Bits 24-27 are reserved and must be 0
2497 // Bits 28-30 The kind of the symbol in the CU.
2498 // Bit 31 is zero if the value is global and one if it is static.
2499 m_data = (is_static << 31) | (kind << 28) | index;
2502 explicit IndexAndFlags(uint32_t data) : m_data(data) {}
2504 uint32_t m_data;
2506 uint32_t get_kind() const { return (m_data >> 28) & 7; }
2507 uint32_t get_is_static() const { return m_data >> 31; }
2511 constexpr int TYPE = 1;
2512 constexpr int VARIABLE = 2;
2513 //constexpr int ENUM = 2;
2514 constexpr int FUNCTION = 3;
2515 // constexpr int OTHER = 4;
2517 auto const index_and_flags = [&] {
2518 uint32_t kind = 0;
2519 auto is_static = false;
2520 switch (dwarf.getTag(die)) {
2521 case DW_TAG_typedef:
2522 case DW_TAG_base_type:
2523 case DW_TAG_subrange_type:
2524 kind = TYPE;
2525 is_static = 1;
2526 break;
2527 case DW_TAG_enumerator:
2528 kind = VARIABLE;
2529 is_static = language != DW_LANG_C_plus_plus;
2530 break;
2531 case DW_TAG_subprogram:
2532 kind = FUNCTION;
2533 is_static = !(is_external || language == DW_LANG_Ada83 ||
2534 language == DW_LANG_Ada95);
2535 break;
2536 case DW_TAG_constant:
2537 kind = VARIABLE;
2538 is_static = !is_external;
2539 break;
2540 case DW_TAG_variable:
2541 kind = VARIABLE;
2542 is_static = !is_external;
2543 break;
2544 case DW_TAG_namespace:
2545 kind = TYPE;
2546 is_static = 0;
2547 break;
2548 case DW_TAG_class_type:
2549 case DW_TAG_interface_type:
2550 case DW_TAG_structure_type:
2551 case DW_TAG_union_type:
2552 case DW_TAG_enumeration_type:
2553 kind = TYPE;
2554 is_static = language != DW_LANG_C_plus_plus;
2555 break;
2556 default:
2557 throw Exception{"Invalid tag"};
2559 return IndexAndFlags{cu_index, kind, is_static}.m_data;
2562 auto const hasSameFlags = [&](std::vector<uint32_t> v, uint32_t input) {
2563 auto const flags = IndexAndFlags{input};
2564 for (auto const e : v) {
2565 auto const f = IndexAndFlags{e};
2566 if (f.get_kind() == flags.get_kind()) {
2567 if ((f.get_kind() == TYPE &&
2568 f.get_is_static() == flags.get_is_static()) ||
2569 (!f.get_is_static() && !flags.get_is_static())) {
2570 return true;
2574 return false;
2577 auto const addSymbol = [&](std::string name) {
2578 auto value = index_and_flags();
2579 SymbolMap::accessor acc;
2580 if (symbols.insert(acc, name) || !hasSameFlags(acc->second, value)) {
2581 acc->second.push_back(value);
2585 auto const addParent = [&] {
2586 if (full_name) return;
2587 if (name.empty()) return;
2588 if (!parent_name.empty()) {
2589 name = folly::sformat("{}::{}", parent_name, name);
2591 if (is_declaration) {
2592 spec_names.emplace(dwarf.getDIEOffset(die), name);
2596 auto const visitChildren = [&](std::string name) {
2597 dwarf.forEachChild(
2598 die,
2599 [&](Dwarf_Die child) {
2600 visit_die_for_symbols(dwarf, child, symbols, spec_names, name,
2601 language, cu_index);
2602 return true;
2607 auto const tag = dwarf.getTag(die);
2608 switch (tag) {
2609 case DW_TAG_base_type:
2610 // don't canonicalize!
2611 addSymbol(name);
2612 break;
2613 case DW_TAG_member:
2614 // static members appear first here as a declaration, then
2615 // later as a DW_TAG_variable whose specification points
2616 // here. We need to note the name just in case.
2617 if (is_declaration) addParent();
2618 break;
2619 case DW_TAG_subprogram:
2620 if (is_inlined) break;
2621 case DW_TAG_constant:
2622 case DW_TAG_enumerator:
2623 if (name.empty()) break;
2624 addParent();
2625 if (is_declaration) break;
2626 addSymbol(name);
2627 break;
2628 case DW_TAG_variable:
2629 if (name.empty() || (!is_external && !has_location)) break;
2630 addParent();
2631 if (is_declaration) break;
2632 addSymbol(name);
2633 break;
2634 case DW_TAG_namespace:
2635 if (name.empty()) name = "(anonymous namespace)";
2636 addParent();
2637 visitChildren(name);
2638 break;
2639 case DW_TAG_typedef:
2640 case DW_TAG_subrange_type:
2641 addParent();
2642 if (is_declaration || name.empty()) break;
2643 addSymbol(name);
2644 break;
2645 case DW_TAG_union_type:
2646 case DW_TAG_class_type:
2647 case DW_TAG_interface_type:
2648 case DW_TAG_structure_type:
2649 case DW_TAG_enumeration_type:
2650 addParent();
2651 if (!is_declaration && !name.empty()) {
2652 addSymbol(name);
2654 if (tag == DW_TAG_enumeration_type || !name.empty()) {
2655 visitChildren(tag == DW_TAG_enumeration_type ? parent_name : name);
2657 break;
2658 case DW_TAG_compile_unit:
2659 case DW_TAG_type_unit:
2660 visitChildren(parent_name);
2661 break;
2662 default:
2663 break;
2667 std::pair<std::vector<AddressTableEntry>, SymbolMap>
2668 collect_addresses_and_symbols(const DwarfState& dwarf) const {
2669 auto time = ::HPHP::Timer::GetCurrentTimeMicros();
2671 folly::F14FastMap<uint32_t, uint32_t> unit_indices_cu;
2672 folly::F14FastMap<uint32_t, uint32_t> unit_indices_tu;
2674 uint32_t count = 0;
2675 dwarf.forEachTopLevelUnit(
2676 [&](Dwarf_Die die) {
2677 unit_indices_cu.insert({die->context->offset, count});
2678 count++;
2679 }, true /* Compilation Unit */
2681 size_t numCUs = count;
2682 dwarf.forEachTopLevelUnit(
2683 [&](Dwarf_Die die) {
2684 unit_indices_tu[die->context->offset] = count;
2685 count++;
2686 }, false /* Type Unit */
2690 std::vector<std::vector<AddressTableEntry>>
2691 entryList(numCUs, std::vector<AddressTableEntry>{});
2692 SymbolMap symbols;
2694 dwarf.forEachTopLevelUnitParallel(
2695 [&](Dwarf_Die die) {
2696 uint32_t index = unit_indices_cu[die->context->offset];
2697 assertx(index < entryList.size());
2698 std::vector<AddressTableEntry> entry;
2699 visit_die_for_address(dwarf, die, entry, index);
2701 sort(entry.begin(), entry.end(), compareAddressTableEntry);
2703 std::vector<AddressTableEntry> merged;
2704 for (auto& e : entry) {
2705 if (!merged.empty()) {
2706 auto& prev = merged.back();
2707 if (e.low <= prev.high) {
2708 if (e.high <= prev.high) continue;
2709 assertx(prev.index == e.index);
2710 prev.high = e.high;
2711 continue;
2714 merged.push_back(e);
2717 entryList[index] = std::move(merged);
2718 SpecMap spec_names;
2719 visit_die_for_symbols(dwarf, die, symbols, spec_names, "",
2720 0, index);
2721 }, true /* Compilation Unit */, m_numThreads
2724 std::vector<AddressTableEntry> entries;
2725 for (auto& list : entryList) {
2726 for (auto &e : list) {
2727 entries.push_back(e);
2731 time = log_time(time, "collect_addresses_and_symbols: Visit CUs");
2733 dwarf.forEachTopLevelUnitParallel(
2734 [&](Dwarf_Die die) {
2735 uint32_t index = unit_indices_tu[die->context->offset];
2736 SpecMap spec_names;
2737 visit_die_for_symbols(dwarf, die, symbols, spec_names, "",
2738 0, index);
2739 }, false /* Type Unit */, m_numThreads
2742 log_time(time, "collect_addresses_and_symbols: Visit TUs");
2744 return {std::move(entries), std::move(symbols)};
2747 struct SymbolAndConstantPool {
2748 GDBHashtable symbol_pool;
2749 std::vector<uint32_t> cu_vector_offsets;
2750 std::vector<std::string> strings;
2753 SymbolAndConstantPool
2754 get_symbol_and_constants(const SymbolMap& symbols) const {
2755 auto time = ::HPHP::Timer::GetCurrentTimeMicros();
2757 GDBHashtable symbol_hash_table;
2758 symbol_hash_table.init(symbols.size());
2760 auto const getHashVal = [](std::string name) {
2761 uint32_t r = 0;
2762 for (char& c : name) {
2763 c = tolower(c);
2764 r = r * 67 + c - 113;
2766 return r;
2769 // The first value is the number of CU indices in the vector
2770 std::vector<uint32_t> cu_vector_values;
2771 std::vector<std::string> strings;
2773 // set name_off to 1 so can use non-zero as the valid test for a
2774 // hash table entry.
2775 uint32_t name_off = 1;
2776 for (auto& entry : symbols) {
2777 uint32_t cu_vector_offset = cu_vector_values.size() * 4;
2778 cu_vector_values.push_back(entry.second.size());
2779 for (auto& elem : entry.second) {
2780 cu_vector_values.push_back(elem);
2782 strings.push_back(entry.first);
2783 symbol_hash_table.add(getHashVal(entry.first),
2784 GDBSymbol{name_off, cu_vector_offset});
2785 name_off += entry.first.length() + 1;
2788 time = log_time(time, "Get_symbol_and_constants: Populate hash table");
2790 auto const num_cu_vector_bytes =
2791 cu_vector_values.size() * sizeof(cu_vector_values[0]);
2792 for (auto& sym : symbol_hash_table.m_hashtable) {
2793 if (sym.valid()) {
2794 sym.name_offset += num_cu_vector_bytes - 1;
2798 log_time(time, "Get_symbol_and_constants: Update symbol pool");
2800 std::cout << "Hash Table Size: " << symbol_hash_table.m_size <<
2801 " Capacity: " << symbol_hash_table.m_capacity << std::endl;
2802 std::cout << "Strings Size: " << strings.size() << std::endl;
2803 std::cout << "CU Vector Values Size: " <<
2804 cu_vector_values.size() << std::endl;
2806 return {
2807 std::move(symbol_hash_table),
2808 std::move(cu_vector_values),
2809 std::move(strings)
2813 std::string m_filename;
2814 int m_numThreads;
2817 ////////////////////////////////////////////////////////////////////////////////
2821 std::unique_ptr<TypeParser>
2822 make_dwarf_type_parser(const std::string& filename, int num_threads) {
2823 return std::make_unique<TypeParserImpl>(filename, num_threads);
2826 std::unique_ptr<Printer> make_dwarf_printer(const std::string& filename) {
2827 return std::make_unique<PrinterImpl>(filename);
2830 std::unique_ptr<GDBIndexer>
2831 make_dwarf_gdb_indexer(const std::string& filename, int num_threads) {
2832 return std::make_unique<GDBIndexerImpl>(filename, num_threads);
2835 ////////////////////////////////////////////////////////////////////////////////
2839 #endif