Clear the container before decoding for inplace adapter
[hiphop-php.git] / hphp / tools / type-info-gens / gen-type-scanners.cpp
blob32f28b1740e87a26ae542e9d01f12b60af3db3da
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include <algorithm>
18 #include <atomic>
19 #include <fstream>
20 #include <iostream>
21 #include <iterator>
22 #include <memory>
24 #include <folly/Demangle.h>
25 #include <folly/Format.h>
26 #include <folly/Hash.h>
27 #include <folly/Memory.h>
28 #include <folly/Singleton.h>
29 #include <folly/String.h>
30 #include <folly/container/F14Map.h>
31 #include <folly/container/F14Set.h>
33 #include <boost/program_options.hpp>
34 #include <boost/variant.hpp>
36 #include <tbb/concurrent_unordered_map.h>
37 #include <tbb/concurrent_vector.h>
39 #include "hphp/tools/debug-parser/debug-parser.h"
41 #include "hphp/util/assertions.h"
42 #include "hphp/util/file.h"
43 #include "hphp/util/type-scan.h"
46 * Program responsible for taking the parsed debug-information from TypeParser,
47 * analyzing it (along with user-provided annotations), and generating C++ code
48 * for GC type-scanners. These type-scanners will then be compiled into a shared
49 * object which can be loaded at start-up if GC is enabled.
51 * Some general concepts:
53 * - "Scanner". A scanner is responsible for reporting pointer information about
54 * a set of types to the GC. Generally it will report a list of pointers to
55 * other potentially interesting types, as well as address ranges that should
56 * be conservatively scanned.
58 * - "Collectable". A type is collectable if MarkCollectable<> or
59 * MarkScannableCollectable<> is instantiated on it. Type-scanners are never
60 * generated for collectable types, it is assumed their scanners will be
61 * hand-written. The exception is if MarkScannableCollectable<> is used, in
62 * which case they'll be scanned if explicitly requested. The point of the
63 * type-scanners is to determine how to find pointers to collectable types
64 * from other types. Collectable types correspond to the set of types in HHVM
65 * which are explicitly managed by the GC.
67 * - "Indexed". An indexed type is a combination of a type and an action. These
68 * occur from an instantiation of Indexer<>. Any particular type can be part
69 * of multiple indexed types but with different actions. Every indexed type
70 * receives a unique type-index and will have a scanner generated for
71 * it. Belonging to an indexed type generally marks that the type is allocated
72 * on the req-heap (with one exception). The action influences the behavior of
73 * that particular scanner. They are:
75 * (*) Ignore -- Scanner is trivial.
76 * (*) Auto -- The default. Attempt to generate scanner automatically.
77 * (*) Conservative<T...> -- If any of T... is "interesting" (see below),
78 * conservative scan. Otherwise, ignore.
79 * If T... is an empty list, always
80 * conservative scan.
81 * (*) WithSuffix<T> -- Attempt to generate scanner for type as normal.
82 * However, any allocation of the type only contains
83 * one instance of that type. Any remaining
84 * allocated memory is assumed to be filled with
85 * instances of T. So, a scanner for T will be
86 * generated as well.
87 * (*) ForScan -- Similar to Auto, but this type should not be marked as
88 * "pointer followable" (see below) if this is the only
89 * indexed type for the type. (This is a marker that the
90 * type is never actually req-heap allocated).
92 * - "Interesting". A type is interesting if the scanner generated for it would
93 * be non-trivial. IE, it would have at least one action. Interesting
94 * types either contain interesting types, pointers to "pointer
95 * followable" types, or have some custom action defined on it.
97 * - "Pointer followable". A type is pointer followable if it is a collectable
98 * type, it has a collectable type as a base, or if it is a member of at least
99 * one indexed type which has an action which is not "ForScan" and is
100 * interesting. By these conditions, a pointer followable type is one which is
101 * known to be allocated out of the request heap, and transitively leads to a
102 * collectable type via some chain of pointers. Pointers to pointer followable
103 * types are enqueued inside scanners. Pointers to non-pointer followable
104 * types are ignored. All base classes of pointer followable object types are
105 * also pointer followable (to handle polymorphism).
107 * - "Layout". Abstract description of a scanner. A layout contains a list of
108 * offsets and actions to perform at those offsets. Since many types have
109 * similar structure, multiple types can share the same layout.
111 * The goal of the scanner generator is find the maximal set of pointer
112 * followable types, use those to compute the layout of each indexed type, then
113 * output those layouts in the form of C++ code.
115 * Note: this entire scheme assumes that all pointer followable types can be
116 * reached via some indexed type. If not, that pointer followable type is a
117 * root, and must be dealt with specially by the GC.
120 namespace {
122 // fast_map/set maps to F14{Value,Vector}Map/Set depending on K+V size.
123 // Entries are moved (if possible) or copied (if necessary) on rehash & erase.
124 template<class K, class V, class H=std::hash<K>, class C=std::equal_to<K>>
125 using fast_map = folly::F14FastMap<K,V,H,C>;
126 template<class T, class H=std::hash<T>, class C=std::equal_to<T>>
127 using fast_set = folly::F14FastSet<T,H,C>;
129 // node_map/set allocate K+V separately like std::unordered_map; K+V don't
130 // move during rehash. Saves memory compared to fast_map/set when when K+V
131 // is large.
132 template<class K, class V, class H=std::hash<K>, class C=std::equal_to<K>>
133 using node_map = folly::F14NodeMap<K,V,H,C>;
134 template<class T, class H=std::hash<T>, class C=std::equal_to<T>>
135 using node_set = folly::F14NodeSet<T,H,C>;
137 ////////////////////////////////////////////////////////////////////////////////
139 using namespace debug_parser;
141 struct Generator {
142 private:
143 struct IndexedType;
144 struct Layout;
145 struct LayoutError;
147 // Action is a description of any special behavior an object type needs when
148 // generating a scanner for it. It abstracts away how this behavior is
149 // communicated within the generator.
150 struct Action {
151 // Ignore everything, a trivial scanner
152 bool ignore_all = false;
154 // Conservative scan everything (not including bases)
155 bool conservative_all = false;
156 // Conservative scan all bases
157 bool conservative_all_bases = false;
159 // This type should be ignored (used for system types we can't modify). This
160 // is different from ignore_all where ignore_all will still process base
161 // classes as normal, but whitelisted will not process anything, even base
162 // classes.
163 bool whitelisted = false;
165 // This type is a template which should not be instantiated on any req
166 // allocated types.
167 bool forbidden_template = false;
169 // Symbol of the custom scanner function which handles the entire type. If
170 // present, but empty, the scanner does not have linkage (which is an
171 // error).
172 HPHP::Optional<std::string> custom_all;
174 // Symbol of the custom scanner function which handles scanning base
175 // classes. If present, but empty, the scanner does not have linkage (which
176 // is an error).
177 HPHP::Optional<std::string> custom_bases_scanner;
179 // If non-empty, the name of the field in the object which is a "flexible
180 // array" member (a trailing array of unbound size). Each object can only
181 // have one of these.
182 std::string flexible_array_field;
184 // If a custom scanner for the object type is specified, it will only be
185 // invoked if any of the types in the custom guards list is interesting. If
186 // the list is empty, the custom scanner is always invoked.
187 fast_set<const Type*> custom_guards;
189 // List of fields in the object which should be ignored.
190 fast_set<std::string> ignore_fields;
192 // List of fields in the object which should always be conservative scanned.
193 fast_set<std::string> conservative_fields;
195 // Map of field names to symbols of custom scanners for that field.
196 fast_map<std::string, std::string> custom_fields;
198 // List of immediate base classes which should be ignored.
199 fast_set<const Object*> ignored_bases;
201 // List of immediate bases which the "forbidden template" check should not
202 // be applied to. Mainly used internally.
203 fast_set<const Object*> silenced_bases;
205 // If a custom scanner function for bases is specified, the list of
206 // immediate bases which the scanner applies to.
207 fast_set<const Object*> custom_bases;
209 // For certain actions it can immediately be known that its associated
210 // object will always be interesting. Therefore, any indexed type with such
211 // an action can immediately be marked as pointer followable, which helps us
212 // reach a fixed point faster. This is only an optimization, so the criteria
213 // does not need to be exact.
214 bool isAlwaysNonTrivial() const {
215 return !ignore_all && !whitelisted && (
216 conservative_all || conservative_all_bases ||
217 (custom_all && custom_guards.empty()) ||
218 (custom_bases_scanner && !custom_bases.empty()) ||
219 !conservative_fields.empty() || !custom_fields.empty());
223 struct ObjectNameHasher {
224 size_t operator()(const ObjectTypeName& name) const {
225 return folly::Hash()(name.name, name.linkage);
228 struct ObjectNameEquals {
229 bool operator()(const ObjectTypeName& n1,
230 const ObjectTypeName& n2) const {
231 return std::tie(n1.name, n1.linkage) ==
232 std::tie(n2.name, n2.linkage);
236 // For initializing type indices, we can use either the symbol name of the
237 // Indexer<> instantiation (preferred), or its raw memory address. We have to
238 // use the raw memory address for Indexer<> instantiations which do not have
239 // external linkage.
240 using Address = boost::variant<std::string, uintptr_t>;
242 public:
243 // Parse out all the debug information out of the specified file and do the
244 // analysis generating the layouts.
245 explicit Generator(const std::string&, bool skip);
247 // Turn the layouts into C++ code, writing to the specified ostream.
248 void operator()(std::ostream&) const;
249 private:
250 static bool isTemplateName(const std::string& candidate,
251 const std::string& name);
253 static bool isMarkCollectableName(const std::string&);
254 static bool isMarkScannableCollectableName(const std::string&);
255 static bool isIndexerName(const std::string&);
256 static bool isConservativeActionName(const std::string&);
257 static bool isWithSuffixActionName(const std::string&);
259 static std::string splitFieldName(const std::string& input,
260 const std::string& prefix);
262 static const Type& stripModifiers(const Type&);
264 static int compareTypes(const Type&, const Type&);
266 static int compareIndexedTypes(const IndexedType&,
267 const IndexedType&,
268 bool for_merge = false);
270 static void sanityCheckTemplateParams(const Object&);
272 bool findMemberHelper(const std::string& field, const Object &a_object) const;
273 void genAllLayouts();
274 void checkForLayoutErrors() const;
275 void assignUniqueLayouts();
277 template <typename T, typename F, typename C> T extractFromMarkers(
278 const C&, F&&
279 ) const;
281 size_t determineSize(const Type&) const;
283 const Object& getObject(const ObjectType&) const;
285 const Object& getMarkedCollectable(const Object&) const;
287 void genLayout(const Type&, Layout&, size_t,
288 bool conservative_everything = false) const;
289 void genLayout(const Object&, Layout&, size_t,
290 bool do_forbidden_check = true,
291 bool conservative_everything = false) const;
292 bool checkMemberSpecialAction(const Object& base_object,
293 const Object::Member& member,
294 const Action& action,
295 Layout& layout,
296 size_t base_obj_offset,
297 size_t offset) const;
299 IndexedType getIndexedType(const Object&) const;
301 template <typename C>
302 std::vector<IndexedType> getIndexedTypes(const C&) const;
304 std::vector<IndexedType> mergeDupIndexedTypes(
305 std::vector<Generator::IndexedType>
306 ) const;
308 bool hasCollectableBase(const Object& object) const;
310 bool forbiddenTemplateCheck(const Type& type) const;
311 bool forbiddenTemplateCheck(const Object& object) const;
313 bool hasEmptyLayout(const Type& type) const;
314 bool hasEmptyLayout(const Object& object) const;
316 bool isPtrFollowable(const Type& type) const;
317 bool isPtrFollowable(const Object& object) const {
318 return m_ptr_followable.find(&object) != m_ptr_followable.end();
321 void makePtrFollowable(const Type& type);
322 void makePtrFollowable(const Object& object);
324 const Action& getAction(const Object& object,
325 bool conservative_everything = false) const;
326 Action inferAction(const Object& object,
327 bool conservative_everything = false) const;
329 void genMetrics(std::ostream&) const;
330 void genForwardDecls(std::ostream&) const;
331 void genDataTable(std::ostream&) const;
332 void genIndexInit(std::ostream&) const;
333 void genScannerFuncs(std::ostream&) const;
334 void genBuiltinScannerFuncs(std::ostream&) const;
335 void genScannerFunc(std::ostream&,
336 const Layout&,
337 size_t begin_offset = 0) const;
339 std::unique_ptr<TypeParser> m_parser;
342 * Maps manipulated by getObject(). These index the known object types in
343 * various ways to allow for the proper lookup of an object type according to
344 * its linkage. They also allow that getObject() will always return the same
345 * Object& for the same type, even if the object types' keys are different,
346 * which means we can use pointer equality to compare types.
348 * They are marked mutable so that getObject() can remain const.
351 mutable tbb::concurrent_unordered_map<
352 ObjectTypeName,
353 tbb::concurrent_vector<ObjectTypeKey>,
354 ObjectNameHasher,
355 ObjectNameEquals
356 > m_objects_by_name;
358 mutable node_map<
359 std::string,
360 Object
361 > m_external_objects;
363 mutable node_map<
364 CompileUnitId,
365 node_map<
366 std::string,
367 Object
369 > m_internal_objects;
371 mutable node_map<
372 ObjectTypeId,
373 Object
374 > m_unique_objects;
376 // Mapping of object types to their computed actions. We could compute the
377 // action every time we needed it, but they're stored in this table for
378 // memoization. This table is mutable as well since its a cache.
379 mutable node_map<const Object*, Action> m_actions; // XXX must be node
381 // List of all indexed types in the debug information.
382 std::vector<IndexedType> m_indexed_types;
384 // Set of all types which are currently known to be pointer followable or
385 // collectable. The collectable set is set once, and should never change,
386 // while the pointer followable set can grow as more pointer followable types
387 // are discovered (it must grow monotonically, never removing anything).
388 fast_set<const Object*> m_ptr_followable;
389 fast_set<const Object*> m_collectable;
390 fast_set<const Object*> m_scannable_collectable;
392 // List of all layouts. Once computed, the indexed types will have an index
393 // into this table for its associated layout.
394 std::vector<Layout> m_layouts;
396 // Set of objects whose layout is currently being generated. Used to detect
397 // possible infinite recursion.
398 mutable fast_set<const Object*> m_layout_being_generated;
400 // Static strings used to identify certain special types in the debug info,
401 // which serve as markers for special actions. These strings should stay in
402 // sync with the types in type-scan.h.
403 static constexpr const char* const s_mark_collectable_name =
404 "HPHP::type_scan::MarkCollectable";
405 static constexpr const char* const s_mark_scannable_collectable_name =
406 "HPHP::type_scan::MarkScannableCollectable";
407 static constexpr const char* const s_indexer_name =
408 "HPHP::type_scan::detail::Indexer";
409 static constexpr const char* const s_auto_action_name =
410 "HPHP::type_scan::Action::Auto";
411 static constexpr const char* const s_scan_action_name =
412 "HPHP::type_scan::detail::ScanAction";
413 static constexpr const char* const s_with_suffix_action_name =
414 "HPHP::type_scan::Action::WithSuffix";
415 static constexpr const char* const s_ignore_action_name =
416 "HPHP::type_scan::Action::Ignore";
417 static constexpr const char* const s_conservative_action_name =
418 "HPHP::type_scan::Action::Conservative";
419 static constexpr const char* const s_scanner_name =
420 "HPHP::type_scan::Scanner";
424 * As stated above, layout is a description of a particular scanner. It is a
425 * list of offsets, along with a certain action to perform at that
426 * offset. Computing layouts for indexed types is the purpose of the scanner
427 * generator. A layout is not inherently tied to any particular type, as many
428 * different types can share the same layout (this is especially common for
429 * templated types, like req::ptr<>).
431 * A layout can currently contain three different types of actions. The first is
432 * to report the value of a pointer at a certain offset. The second is to
433 * conservative scan a range given by a start offset and a length. The last is
434 * to invoke a custom function (with a certain symbol), passing a pointer formed
435 * by the offset as the first parameter (which serves as the this pointer).
437 * A layout can also have a suffix layout. Normally, for a given block of
438 * memory, the layout will be applied contiguously until the entire block is
439 * processed. However, if the layout has a suffix layout, the base layout is
440 * only applied once on the beginning of the block. The suffix layout will be
441 * applied contiguously on the remaining portion of the block. This is to handle
442 * object types with "flexible array members".
445 struct Generator::Layout {
446 struct Ptr {
447 size_t offset;
449 bool operator==(const Ptr& other) const {
450 return offset == other.offset;
452 bool operator!=(const Ptr& other) const {
453 return !(*this == other);
455 bool operator<(const Ptr& other) const {
456 return offset < other.offset;
460 struct Conservative {
461 size_t offset;
462 size_t size;
464 bool operator==(const Conservative& other) const {
465 return std::tie(offset, size) == std::tie(other.offset, other.size);
467 bool operator!=(const Conservative& other) const {
468 return !(*this == other);
470 bool operator<(const Conservative& other) const {
471 return std::tie(offset, size) < std::tie(other.offset, other.size);
475 struct Custom {
476 size_t offset;
477 std::string linkage_name;
479 bool operator==(const Custom& other) const {
480 return std::tie(offset, linkage_name) ==
481 std::tie(other.offset, other.linkage_name);
483 bool operator!=(const Custom& other) const {
484 return !(*this == other);
486 bool operator<(const Custom& other) const {
487 return std::tie(offset, linkage_name) <
488 std::tie(other.offset, other.linkage_name);
492 explicit Layout(size_t size) : size{size} {}
494 Layout(const Layout& other);
495 Layout(Layout&&) = default;
497 Layout& operator=(const Layout& other);
498 Layout& operator=(Layout&&) = default;
500 void addPtr(size_t offset) {
501 ptrs.emplace_back(Ptr{offset});
503 void addConservative(size_t offset, size_t size);
504 void addCustom(size_t offset, std::string linkage) {
505 custom.emplace_back(Custom{offset, linkage});
508 // A sub-layout is used to insert a layout into another layout N times. Used
509 // for arrays, where we compute the layout once, but want to insert it for the
510 // number of elements in the array.
511 void addSubLayout(size_t offset,
512 size_t count,
513 const Layout& layout) {
514 for (size_t i = 0; i < count; ++i) {
515 merge(layout, offset + i*layout.size);
519 void setSuffix(Layout other) {
520 suffix = std::make_unique<Layout>(std::move(other));
523 void merge(const Layout& other, size_t offset);
525 bool isEmpty() const {
526 return ptrs.empty() && conservative.empty() &&
527 custom.empty() && (!suffix || suffix->isEmpty());
530 // Is this layout made up of nothing but conservative scans?
531 bool isAllConservative() const;
533 void clear();
535 size_t totalCount() const;
537 bool operator==(const Layout& other) const;
538 bool operator!=(const Layout& other) const { return !(*this == other); }
539 bool operator<(const Layout& other) const;
541 size_t size;
542 std::vector<Ptr> ptrs;
543 std::vector<Conservative> conservative;
544 std::vector<Custom> custom;
545 std::unique_ptr<Layout> suffix;
546 // Offset where the suffix begins. This may be different than the size because
547 // of flexible array members (which sometimes start within the object).
548 size_t suffix_begin = size;
552 * If the generator cannot automatically create a layout, it will throw
553 * LayoutError. These LayoutErrors are gathered and reported together at the end
554 * of processing. They include context about why and where the error occurred.
556 struct Generator::LayoutError {
557 explicit LayoutError(std::string reason): reason{std::move(reason)} {}
559 void addContext(std::string str) {
560 context.emplace_back(std::move(str));
563 std::string reason;
564 std::vector<std::string> context;
568 * An indexed type is a combination of a type and a set of actions associated
569 * with. Every indexed type is associated a layout (hence a scanner), but a
570 * given type can be part of multiple indexed types. Type-indices are handed out
571 * on a per indexed type basis.
573 * Its important to distinguish the actions that are inherent to a type
574 * (embodied in the Action class), with the actions assigned inside an indexed
575 * type. The actions inherent to the type are used when automatically generating
576 * a scanner for that type, while the actions here override those. However,
577 * since indexed types are keyed by a type index, they are only applied on
578 * specific instances of the type with that type index at runtime. So, for
579 * example, a type T can have an automatically generated scanner for some
580 * instances of T, but a conservative scanner for others depending on which type
581 * index the instance has.
583 struct Generator::IndexedType {
584 IndexedType(const Type& type, size_t type_size, Address address)
585 : type{&type}
586 , layout{type_size} { addresses.emplace_back(address); }
588 // Underlying type
589 const Type* type;
591 // List of addresses for all the associated Indexer<> instances.
592 std::vector<Address> addresses;
594 // Marker that instances of the type with this type index do not represent
595 // actual heap allocations (we only want the scanner).
596 bool scan = false;
598 // Ignore instances of the type with this type index. If this is true, and
599 // conservative is true as well, it means that the conservative scan is still
600 // conditional.
601 bool ignore = false;
603 // Conservative scan instances of the type with this type index.
604 bool conservative = false;
606 // If conservative is true, only actually conservative scan if any of these
607 // types are interesting. If not, ignore. If the list is empty, always
608 // conservative scan.
609 std::vector<const Type*> conservative_guards;
611 // A suffix type is used as the basis for a suffix layout as described above.
612 const Type* suffix_type = nullptr;
614 // Generated layout. This is used when generating layouts, but after that,
615 // layout_index will index into the layout table for the actual layout to use.
616 Layout layout;
618 // Index into the layout table for the assigned layout.
619 size_t layout_index = 0;
621 // If there was an error while automatically generating the layout for this
622 // underlying type, it is stored here until they are all reported.
623 HPHP::Optional<LayoutError> errors;
626 size_t NumThreads = 24;
628 Generator::Generator(const std::string& filename, bool skip) {
629 // Either this platform has no support for parsing debug information, or the
630 // preprocessor symbol to enable actually building scanner isn't
631 // enabled. Either way, just bail out. Everything will get a conservative
632 // scanner by default if someone actually tries to use the scanners at
633 // runtime.
634 if (skip) return;
636 m_parser = TypeParser::make(filename, NumThreads);
638 tbb::concurrent_vector<ObjectType> indexer_types;
639 tbb::concurrent_vector<ObjectType> collectable_markers;
640 tbb::concurrent_vector<ObjectType> scannable_collectable_markers;
642 // Iterate through all the objects the debug info parser found, storing the
643 // MarkCollectable<> markers, and the Indexer<> instances. For everything,
644 // store in the appropriate getObject() maps, which will allow us to do
645 // getObject() lookups afterwards.
647 // There can be a lot of objects to iterate over, so do it concurrently.
649 auto const block_count = m_parser->getObjectBlockCount();
650 std::atomic<size_t> next_block{0};
652 auto const run = [&]{
653 while (true) {
654 auto const block = next_block++;
655 if (block >= block_count) break;
657 m_parser->forEachObjectInBlock(
658 block,
659 [&](const ObjectType& type) {
660 if (isIndexerName(type.name.name)) {
661 indexer_types.push_back(type);
662 } else if (isMarkCollectableName(type.name.name)) {
663 collectable_markers.push_back(type);
664 } else if (isMarkScannableCollectableName(type.name.name)) {
665 collectable_markers.push_back(type);
666 scannable_collectable_markers.push_back(type);
669 // Incomplete types are useless for our purposes, so just ignore
670 // them.
671 if (type.incomplete) return;
673 m_objects_by_name[type.name].push_back(type.key);
679 std::vector<std::thread> threads;
680 // No point in creating more threads than there are blocks.
681 for (auto i = size_t{0}; i < std::min(block_count, NumThreads); ++i) {
682 threads.emplace_back(std::thread(run));
684 for (auto& t : threads) t.join();
687 // Complain if it looks like we don't have any debug info enabled.
688 // (falls back to conservative scanning for everything)
689 if (collectable_markers.empty() && indexer_types.empty()) {
690 std::cerr << "gen-type-scanners: warning: "
691 "No collectable or indexed types found. "
692 "Is debug-info enabled?" << std::endl;
695 // Extract all the types that Mark[Scannable]Collectable<> was instantiated on
696 // to obtain all the types which are collectable. Since all collectable types
697 // are automatically pointer followable, mark them as such.
698 m_collectable = extractFromMarkers<decltype(m_collectable)>(
699 collectable_markers,
700 [&](const Object& o) { return &getMarkedCollectable(o); }
702 m_scannable_collectable =
703 extractFromMarkers<decltype(m_scannable_collectable)>(
704 scannable_collectable_markers,
705 [&](const Object& o) { return &getMarkedCollectable(o); }
707 for (const auto* obj : m_collectable) {
708 makePtrFollowable(*obj);
711 // Extract all the IndexedType information out of the Indexer<>
712 // instantiations.
713 m_indexed_types = getIndexedTypes(indexer_types);
715 // Before beginning the actual layout generation, we can speed things up a bit
716 // by marking any types which we know are always pointer followable. This will
717 // let us reach the fixed point in fewer iterations.
718 for (const auto& indexed : m_indexed_types) {
719 // Indexed types just for scanning are never pointer followable (because
720 // they're not actually heap allocated).
721 if (indexed.scan) continue;
723 // If the underlying type is an object type, and its associated action is
724 // always non-trivial, or if the object type has a collectable type as a
725 // base class, then the object type is always pointer followable. Same logic
726 // for any suffix type.
727 if (const auto* obj = stripModifiers(*indexed.type).asObject()) {
728 const auto& object = getObject(*obj);
729 if (getAction(object).isAlwaysNonTrivial() ||
730 hasCollectableBase(object)) {
731 makePtrFollowable(object);
735 if (indexed.suffix_type) {
736 const auto& suffix_type = stripModifiers(*indexed.suffix_type);
737 if (const auto* obj = suffix_type.asObject()) {
738 const auto& object = getObject(*obj);
739 if (getAction(object).isAlwaysNonTrivial()) {
740 makePtrFollowable(*indexed.type);
745 // If this indexed type is always going to be a complete conservative scan,
746 // than we're always going to have a non-trivial action for its scanner, so
747 // it's always pointer followable.
748 if (indexed.conservative && indexed.conservative_guards.empty()) {
749 makePtrFollowable(*indexed.type);
753 genAllLayouts();
754 checkForLayoutErrors();
755 assignUniqueLayouts();
758 // Helper function. Match a name against another name, ignoring the template
759 // parameter portion.
760 bool Generator::isTemplateName(const std::string& candidate,
761 const std::string& name) {
762 auto index = candidate.find(name);
763 if (index != 0) return false;
764 if (name.size() >= candidate.size()) return false;
765 if (candidate[name.size()] != '<') return false;
766 if (candidate.rfind('>') == std::string::npos) return false;
767 return true;
770 // Helper functions to check if an object type's name is that of a special
771 // marker type:
773 bool Generator::isMarkCollectableName(const std::string& name) {
774 return isTemplateName(name, s_mark_collectable_name);
776 bool Generator::isMarkScannableCollectableName(const std::string& name) {
777 return isTemplateName(name, s_mark_scannable_collectable_name);
779 bool Generator::isIndexerName(const std::string& name) {
780 return isTemplateName(name, s_indexer_name);
782 bool Generator::isConservativeActionName(const std::string& name) {
783 return isTemplateName(name, s_conservative_action_name);
785 bool Generator::isWithSuffixActionName(const std::string& name) {
786 return isTemplateName(name, s_with_suffix_action_name);
789 // Split a member function of the form "[prefix][suffix]_" into just "suffix"
790 // and return it. Returns the empty string if the input doesn't match that
791 // format.
792 std::string Generator::splitFieldName(const std::string& input,
793 const std::string& prefix) {
794 auto index = input.find(prefix);
795 if (index != 0) return std::string{};
796 if (prefix.size()+1 >= input.size()) return std::string{};
797 if (input[input.size()-1] != '_') return std::string{};
798 return input.substr(prefix.size(), input.size()-prefix.size()-1);
801 // Helper function to remove any cv (and restrict) qualifiers from a type.
802 const Type& Generator::stripModifiers(const Type& type) {
803 if (auto modifier = type.asConst()) {
804 return stripModifiers(modifier->modified);
805 } else if (auto modifier = type.asVolatile()) {
806 return stripModifiers(modifier->modified);
807 } else if (auto modifier = type.asRestrict()) {
808 return stripModifiers(modifier->modified);
809 } else {
810 return type;
814 // Compare two types, return -1 if t1 < t2, 1 if t1 > t2, and 0 if they are
815 // equal. Type doesn't have a builtin comparison operator because the ordering
816 // is rather arbitrary and application dependent.
817 int Generator::compareTypes(const Type& t1, const Type& t2) {
818 // First order types according to what category of type they are, using the
819 // below arbitrary ranking.
820 const auto rank = [](const Type& t) {
821 return t.match<int>(
822 [](const ObjectType*) { return 0; },
823 [](const PtrType*) { return 1; },
824 [](const RefType*) { return 2; },
825 [](const RValueRefType*) { return 3; },
826 [](const ArrType*) { return 4; },
827 [](const ConstType*) { return 5; },
828 [](const VolatileType*) { return 6; },
829 [](const RestrictType*) { return 7; },
830 [](const FuncType*) { return 8; },
831 [](const MemberType*) { return 9; },
832 [](const VoidType*) { return 10; }
835 const auto rank1 = rank(t1);
836 const auto rank2 = rank(t2);
837 if (rank1 < rank2) return -1;
838 else if (rank1 > rank2) return 1;
840 // At this point, both t1 and t2 are of the same type category. We can now do
841 // member-wise comparison of the types within these types.
843 return t1.match<int>(
844 [&](const ObjectType* t) {
845 const auto* other = t2.asObject();
847 if (t->name.name < other->name.name) return -1;
848 else if (t->name.name > other->name.name) return 1;
850 if (t->incomplete < other->incomplete) return -1;
851 else if (t->incomplete > other->incomplete) return 1;
853 if (t->name.linkage < other->name.linkage) return -1;
854 else if (t->name.linkage > other->name.linkage) return 1;
856 // The two object types have the same linkage and same name, so now
857 // examine which linkage that actually is to determine how to check for
858 // object equality.
859 switch (t->name.linkage) {
860 case ObjectTypeName::Linkage::external:
861 case ObjectTypeName::Linkage::pseudo:
862 // Same name and external linkage means they're the same.
863 return 0;
864 case ObjectTypeName::Linkage::internal:
865 // Objects types with internal linkage with the same name are only the
866 // same if they're in the same compile unit.
867 if (t->key.compile_unit_id < other->key.compile_unit_id) return -1;
868 if (t->key.compile_unit_id > other->key.compile_unit_id) return 1;
869 return 0;
870 case ObjectTypeName::Linkage::none: {
871 // Object types with no linkage are only the same if they have
872 // identical keys.
873 const auto& tie1 = std::tie(
874 t->key.compile_unit_id,
875 t->key.object_id
877 const auto& tie2 = std::tie(
878 other->key.compile_unit_id,
879 other->key.object_id
881 if (tie1 < tie2) return -1;
882 if (tie1 > tie2) return 1;
883 return 0;
887 return 0;
889 [&](const PtrType* t) {
890 return compareTypes(t->pointee, t2.asPtr()->pointee);
892 [&](const RefType* t) {
893 return compareTypes(t->referenced, t2.asRef()->referenced);
895 [&](const RValueRefType* t) {
896 return compareTypes(t->referenced, t2.asRValueRef()->referenced);
898 [&](const ArrType* t) {
899 const auto* other = t2.asArr();
900 const auto cmp = compareTypes(t->element, other->element);
901 if (cmp != 0) return cmp;
902 if (t->count < other->count) return -1;
903 if (t->count > other->count) return 1;
904 return 0;
906 [&](const ConstType* t) {
907 return compareTypes(t->modified, t2.asConst()->modified);
909 [&](const VolatileType* t) {
910 return compareTypes(t->modified, t2.asVolatile()->modified);
912 [&](const RestrictType* t) {
913 return compareTypes(t->modified, t2.asRestrict()->modified);
915 [&](const FuncType* t) {
916 const auto* other = t2.asFunc();
917 const auto cmp = compareTypes(t->ret, other->ret);
918 if (cmp != 0) return cmp;
919 if (t->args.size() < other->args.size()) return -1;
920 if (t->args.size() > other->args.size()) return 1;
921 for (size_t i = 0; i < t->args.size(); ++i) {
922 const auto cmp = compareTypes(t->args[i], other->args[i]);
923 if (cmp != 0) return cmp;
925 return 0;
927 [&](const MemberType* t) {
928 const auto* other = t2.asMember();
929 const auto cmp = compareTypes(t->obj, other->obj);
930 if (cmp != 0) return cmp;
931 return compareTypes(t->member, other->member);
933 [&](const VoidType*) { return 0; }
937 // Compare two indexed types, return -1 if t1 < t2, 1 if t1 > t2, and 0 if
938 // equal. If this comparison is for the purpose of trying to merge two indexed
939 // type together (see below), skip comparing certain fields.
940 int Generator::compareIndexedTypes(const IndexedType& t1,
941 const IndexedType& t2,
942 bool for_merge) {
943 const auto cmp = compareTypes(*t1.type, *t2.type);
944 if (cmp != 0) return cmp;
946 if (t1.ignore < t2.ignore) return -1;
947 else if (t1.ignore > t2.ignore) return 1;
949 if (t1.conservative < t2.conservative) return -1;
950 else if (t1.conservative > t2.conservative) return 1;
952 const auto compare_guards = [](const IndexedType& type1,
953 const IndexedType& type2) {
954 return std::lexicographical_compare(
955 type1.conservative_guards.begin(),
956 type1.conservative_guards.end(),
957 type2.conservative_guards.begin(),
958 type2.conservative_guards.end(),
959 [](const Type* tp1, const Type* tp2) {
960 return compareTypes(*tp1, *tp2) < 0;
964 if (compare_guards(t1, t2)) return -1;
965 else if (compare_guards(t2, t1)) return 1;
967 if (!t1.suffix_type) {
968 if (t2.suffix_type) return -1;
969 } else if (!t2.suffix_type) {
970 if (t1.suffix_type) return 1;
971 } else {
972 const auto cmp = compareTypes(*t1.suffix_type, *t2.suffix_type);
973 if (cmp != 0) return cmp;
976 // The whole point of merging two indexed types together are to coalesce
977 // Indexer<> addresses and to combine a "scan-only" one with a non-scan-only
978 // one, so don't compare these fields when checking for equality.
979 if (!for_merge) {
980 // This ordering is important! We want indexed types with scan set to false
981 // to come first. We always merge right into left, so scan-only should be
982 // merged into no-scan-only.
983 if (t1.scan < t2.scan) return -1;
984 else if (t1.scan > t2.scan) return 1;
986 if (t1.addresses < t2.addresses) return -1;
987 else if (t1.addresses > t2.addresses) return 1;
990 return 0;
993 // Helper function to extract types that markers are instantiated on from the
994 // markers. Given a list of a specific marker object type, apply the given
995 // callable f to it (which should extract the marked type out of it), sort and
996 // uniquify the resultant list, and return it.
997 template <typename T, typename F, typename C>
998 T Generator::extractFromMarkers(const C& types, F&& f) const {
999 std::vector<const Object*> objects;
1000 std::transform(
1001 types.begin(),
1002 types.end(),
1003 std::back_inserter(objects),
1004 [&](const ObjectType& t){ return &getObject(t); }
1006 std::sort(objects.begin(), objects.end());
1008 T out;
1009 auto ins = std::inserter(out, out.end());
1010 std::string msg;
1011 std::for_each(
1012 objects.begin(),
1013 std::unique(objects.begin(), objects.end()),
1014 [&](const Object* o) {
1015 try {
1016 ins = f(*o);
1017 } catch (Exception& e) {
1018 folly::format(&msg, " => {}\n", e.what());
1022 if (!msg.empty()) throw Exception(msg);
1023 return out;
1026 // Given a list of Indexer<> instantiation types, extract the marked types, and
1027 // create the appropriate IndexedType struct for them, merging duplicates
1028 // together.
1029 template <typename C>
1030 std::vector<Generator::IndexedType> Generator::getIndexedTypes(
1031 const C& indexers
1032 ) const {
1033 auto indexed = extractFromMarkers<std::vector<IndexedType>>(
1034 indexers,
1035 [&](const Object& o) { return getIndexedType(o); }
1037 return mergeDupIndexedTypes(std::move(indexed));
1040 // Merge "duplicate" indexed types together using some definition of duplicate,
1041 // returning the merged types.
1042 std::vector<Generator::IndexedType> Generator::mergeDupIndexedTypes(
1043 std::vector<Generator::IndexedType> indexed
1044 ) const {
1046 // First sort the types. The ordering has been set up so that indexed types
1047 // that are identical except for the "scan" field will have the indexed types
1048 // with scan set to false first.
1049 std::sort(
1050 indexed.begin(),
1051 indexed.end(),
1052 [](const IndexedType& t1, const IndexedType& t2)
1053 { return compareIndexedTypes(t1, t2) < 0; }
1056 // Only insert an indexed type into the unique vector if its not equal (for
1057 // the purposes of merging) as the last indexed type in the vector. If they
1058 // are equal, merge their addresses and their scan field anded together. Since
1059 // indexed types with scan set to false come first, this will ensure that
1060 // false is kept over true.
1061 std::vector<IndexedType> unique;
1062 for (const auto& i : indexed) {
1063 if (unique.empty() || compareIndexedTypes(unique.back(), i, true) != 0) {
1064 unique.emplace_back(std::move(i));
1065 } else {
1066 auto& back = unique.back();
1067 back.scan &= i.scan;
1068 back.addresses.insert(
1069 back.addresses.end(),
1070 i.addresses.begin(),
1071 i.addresses.end()
1076 return unique;
1080 * Certain compilers (usually Clang) will emit debug information for template
1081 * instantiations, but fail to emit information about the template
1082 * parameters. This usually happens if it thinks the template isn't "used". This
1083 * is bad for marker types like Conservative<...> because it will be interpreted
1084 * as an empty type list (which implies always conservative scan). Unfortunately
1085 * we cannot always detect this, but we can for certain cases.
1087 * If we know a-priori that an object-type is a template, and the debug
1088 * information indicates the object type has no template parameters, do a hacky
1089 * check on the object type's name. Look for the name to end with "<>",
1090 * indicating an empty parameter list. If not, the template actually has
1091 * parameters but is failing to emit them.
1093 * Unfortunately there's not much that can be done about this, but we want to
1094 * catch it quickly so that we can try to work around it.
1096 void Generator::sanityCheckTemplateParams(const Object& object) {
1097 if (!object.template_params.empty()) return;
1099 const auto index = object.name.name.rfind("<>");
1100 if (index == std::string::npos ||
1101 index != object.name.name.size()-2) {
1102 throw Exception{
1103 folly::sformat(
1104 "Object type '{}' at ({},{}) is reported as having "
1105 "no template parameters, but its name indicates "
1106 "otherwise. This usually indicates the compiler "
1107 "is not generating complete enough debug information.",
1108 object.name.name,
1109 object.key.object_id,
1110 object.key.compile_unit_id
1116 // Given a Mark[Scannable]Collectable<> marker instantiation, extract
1117 // the object-type its marking. Actually very simple, but do a lot of sanity
1118 // checking on the result.
1119 const Object& Generator::getMarkedCollectable(const Object& mark) const {
1120 if (mark.incomplete) {
1121 throw Exception{
1122 folly::sformat(
1123 "Collectable marker '{}' at ({},{}) is an incomplete type",
1124 mark.name.name,
1125 mark.key.object_id,
1126 mark.key.compile_unit_id
1131 if (mark.kind != Object::Kind::k_class) {
1132 throw Exception{
1133 folly::sformat(
1134 "Collectable marker '{}' at ({},{}) isn't a class type",
1135 mark.name.name,
1136 mark.key.object_id,
1137 mark.key.compile_unit_id
1142 if (!mark.bases.empty()) {
1143 throw Exception{
1144 folly::sformat(
1145 "Collectable marker '{}' at ({},{}) has base classes",
1146 mark.name.name,
1147 mark.key.object_id,
1148 mark.key.compile_unit_id
1153 if (!mark.members.empty()) {
1154 throw Exception{
1155 folly::sformat(
1156 "Collectable marker '{}' at ({},{}) has members",
1157 mark.name.name,
1158 mark.key.object_id,
1159 mark.key.compile_unit_id
1164 if (mark.name.linkage != ObjectTypeName::Linkage::external) {
1165 throw Exception{
1166 folly::sformat(
1167 "Collectable marker '{}' at ({},{}) does not have external linkage",
1168 mark.name.name,
1169 mark.key.object_id,
1170 mark.key.compile_unit_id
1175 if (mark.template_params.size() != 1) {
1176 throw Exception{
1177 folly::sformat(
1178 "Collectable marker '{}' at ({},{}) does not have exactly "
1179 "one template parameter",
1180 mark.name.name,
1181 mark.key.object_id,
1182 mark.key.compile_unit_id
1187 const auto& type = mark.template_params[0].type;
1189 const auto* obj_type = stripModifiers(type).asObject();
1190 if (!obj_type) {
1191 throw Exception{
1192 folly::sformat(
1193 "Collectable marker '{}' at ({},{}) is instantiated on type '{}', "
1194 "which is not an object",
1195 mark.name.name,
1196 mark.key.object_id,
1197 mark.key.compile_unit_id,
1198 type.toString()
1203 if (obj_type->name.linkage != ObjectTypeName::Linkage::external) {
1204 throw Exception{
1205 folly::sformat(
1206 "Collectable marker '{}' at ({},{}) is instantiated on object type '{}'"
1207 " at ({}, {}), which does not have external linkage",
1208 mark.name.name,
1209 mark.key.object_id,
1210 mark.key.compile_unit_id,
1211 obj_type->name.name,
1212 obj_type->key.object_id,
1213 obj_type->key.compile_unit_id
1218 const auto& obj = getObject(*obj_type);
1219 if (obj.incomplete) {
1220 throw Exception{
1221 folly::sformat(
1222 "Collectable marker '{}' at ({},{}) is instantiated on object type '{}'"
1223 " at ({}, {}), which is an incomplete type",
1224 mark.name.name,
1225 mark.key.object_id,
1226 mark.key.compile_unit_id,
1227 obj_type->name.name,
1228 obj_type->key.object_id,
1229 obj_type->key.compile_unit_id
1233 if (obj.kind != Object::Kind::k_class) {
1234 throw Exception{
1235 folly::sformat(
1236 "Collectable marker '{}' at ({},{}) is instantiated on object type '{}'"
1237 " at ({}, {}), which is not a class type",
1238 mark.name.name,
1239 mark.key.object_id,
1240 mark.key.compile_unit_id,
1241 obj_type->name.name,
1242 obj_type->key.object_id,
1243 obj_type->key.compile_unit_id
1248 return obj;
1251 // Given an Indexer<> marker instantiation, extract the type and action the
1252 // marker refers to, returning the appropriate IndexedType instance. Do a lot of
1253 // sanity checking on the result.
1254 Generator::IndexedType Generator::getIndexedType(const Object& indexer) const {
1255 if (indexer.incomplete) {
1256 throw Exception{
1257 folly::sformat(
1258 "Indexer '{}' at ({},{}) is an incomplete type",
1259 indexer.name.name,
1260 indexer.key.object_id,
1261 indexer.key.compile_unit_id
1266 if (indexer.kind != Object::Kind::k_class) {
1267 throw Exception{
1268 folly::sformat(
1269 "Indexer '{}' at ({},{}) is not a class type",
1270 indexer.name.name,
1271 indexer.key.object_id,
1272 indexer.key.compile_unit_id
1277 if (!indexer.bases.empty()) {
1278 throw Exception{
1279 folly::sformat(
1280 "Indexer '{}' at ({},{}) has base classes",
1281 indexer.name.name,
1282 indexer.key.object_id,
1283 indexer.key.compile_unit_id
1288 if (indexer.members.size() != 2) {
1289 throw Exception{
1290 folly::sformat(
1291 "Indexer '{}' at ({},{}) does not have exactly two members ({})",
1292 indexer.name.name,
1293 indexer.key.object_id,
1294 indexer.key.compile_unit_id,
1295 indexer.members.size()
1300 if (indexer.template_params.size() != 2) {
1301 throw Exception{
1302 folly::sformat(
1303 "Indexer '{}' at ({},{}) does not have exactly two "
1304 "template parameters ({})",
1305 indexer.name.name,
1306 indexer.key.object_id,
1307 indexer.key.compile_unit_id,
1308 indexer.template_params.size()
1313 const auto index_iter = std::find_if(
1314 indexer.members.begin(),
1315 indexer.members.end(),
1316 [](const Object::Member& m) { return m.name == "s_index"; }
1318 if (index_iter == indexer.members.end()) {
1319 throw Exception{
1320 folly::sformat(
1321 "Indexer '{}' at ({},{}) does not a s_index member",
1322 indexer.name.name,
1323 indexer.key.object_id,
1324 indexer.key.compile_unit_id
1328 const auto& index_member = *index_iter;
1330 if (index_member.offset) {
1331 throw Exception{
1332 folly::sformat(
1333 "Indexer '{}' at ({},{}) has a non-static s_index member",
1334 indexer.name.name,
1335 indexer.key.object_id,
1336 indexer.key.compile_unit_id
1341 // Since we want to put the assigned type index into Indexer<>::s_index, it
1342 // had better either have a symbol (preferred), or an absolute address.
1343 if (!index_member.address && index_member.linkage_name.empty()) {
1344 throw Exception{
1345 folly::sformat(
1346 "Indexer '{}' at ({},{}) has a s_index member which "
1347 "has neither a linkage name, nor an address",
1348 indexer.name.name,
1349 indexer.key.object_id,
1350 indexer.key.compile_unit_id
1355 // Extract the underlying type from the Indexer<>, doing sanity checking on
1356 // the type.
1357 const auto* type = [&] {
1359 * The type we want is just the first template parameter of Indexer<>, but
1360 * we want to do sanity checking on it, which involves walking the type
1361 * chain.
1363 * To avoid recursion, use a loop instead. Each invocation of the match
1364 * method returns the next type to check, returning nullptr if there's no
1365 * more types to check. This works because each type category only has at
1366 * most one sub-type to check.
1368 const auto* current = &indexer.template_params[0].type;
1369 while (current) {
1370 current = current->match<const Type*>(
1371 [&](const ObjectType* t) -> const Type* {
1372 if (t->name.linkage == ObjectTypeName::Linkage::pseudo) {
1373 throw Exception{
1374 folly::sformat(
1375 "Indexer '{}' at ({},{}) is instantiated on "
1376 "object type '{}' which is the pseudo-type",
1377 indexer.name.name,
1378 indexer.key.object_id,
1379 indexer.key.compile_unit_id,
1380 current->toString()
1385 const auto& obj = getObject(*t);
1386 if (obj.incomplete) {
1387 throw Exception{
1388 folly::sformat(
1389 "Indexer '{}' at ({},{}) is instantiated on "
1390 "object type '{}' which is an incomplete type",
1391 indexer.name.name,
1392 indexer.key.object_id,
1393 indexer.key.compile_unit_id,
1394 current->toString()
1399 if (obj.kind == Object::Kind::k_other) {
1400 throw Exception{
1401 folly::sformat(
1402 "Indexer '{}' at ({},{}) is instantiated on "
1403 "object type '{}' which has an 'other' kind",
1404 indexer.name.name,
1405 indexer.key.object_id,
1406 indexer.key.compile_unit_id,
1407 current->toString()
1412 return nullptr;
1414 [&](const PtrType*) -> const Type* { return nullptr; },
1415 [&](const RefType*) -> const Type* { return nullptr; },
1416 [&](const RValueRefType*) -> const Type* { return nullptr; },
1417 [&](const ArrType* t) {
1418 if (!t->count) {
1419 throw Exception{
1420 folly::sformat(
1421 "Indexer '{}' at ({},{}) is instantiated on "
1422 "unbounded array type '{}'",
1423 indexer.name.name,
1424 indexer.key.object_id,
1425 indexer.key.compile_unit_id,
1426 current->toString()
1430 return &t->element;
1432 [&](const ConstType* t) { return &t->modified; },
1433 [&](const VolatileType* t) { return &t->modified; },
1434 [&](const RestrictType* t) { return &t->modified; },
1435 [&](const FuncType*) -> const Type* {
1436 throw Exception{
1437 folly::sformat(
1438 "Indexer '{}' at ({},{}) is instantiated on function type '{}'",
1439 indexer.name.name,
1440 indexer.key.object_id,
1441 indexer.key.compile_unit_id,
1442 current->toString()
1445 return nullptr;
1447 [&](const MemberType*) -> const Type* {
1448 throw Exception{
1449 folly::sformat(
1450 "Indexer '{}' at ({},{}) is instantiated on member type '{}'",
1451 indexer.name.name,
1452 indexer.key.object_id,
1453 indexer.key.compile_unit_id,
1454 current->toString()
1457 return nullptr;
1459 [&](const VoidType*) -> const Type* {
1460 throw Exception{
1461 folly::sformat(
1462 "Indexer '{}' at ({},{}) is instantiated on void type",
1463 indexer.name.name,
1464 indexer.key.object_id,
1465 indexer.key.compile_unit_id
1468 return nullptr;
1473 return &stripModifiers(indexer.template_params[0].type);
1474 }();
1476 IndexedType indexed_type{
1477 *type,
1478 determineSize(*type),
1479 index_member.linkage_name.empty() ?
1480 Address{*index_member.address} :
1481 Address{index_member.linkage_name}
1484 // Now examine the action component:
1486 const auto* action_type =
1487 stripModifiers(indexer.template_params[1].type).asObject();
1488 if (!action_type) {
1489 throw Exception{
1490 folly::sformat(
1491 "Indexer '{}' at ({},{}) action type '{}' isn't an object type",
1492 indexer.name.name,
1493 indexer.key.object_id,
1494 indexer.key.compile_unit_id,
1495 indexer.template_params[1].type.toString()
1499 const auto& action = getObject(*action_type);
1501 if (action.incomplete) {
1502 throw Exception{
1503 folly::sformat(
1504 "Indexer '{}' at ({},{}) action type '{}' at ({},{}) is incomplete",
1505 indexer.name.name,
1506 indexer.key.object_id,
1507 indexer.key.compile_unit_id,
1508 action.name.name,
1509 action.key.object_id,
1510 action.key.compile_unit_id
1515 // Use the name of the action to determine what it is:
1516 if (action.name.name == s_auto_action_name) {
1517 // Nothing
1518 } else if (action.name.name == s_scan_action_name) {
1519 indexed_type.scan = true;
1520 } else if (action.name.name == s_ignore_action_name) {
1521 indexed_type.ignore = true;
1522 } else if (isConservativeActionName(action.name.name)) {
1523 indexed_type.conservative = true;
1525 // Conservative<> is a variadic template, so we'd better sanity check the
1526 // template parameters.
1527 sanityCheckTemplateParams(action);
1529 if (!action.template_params.empty()) {
1530 // If it has template parameters, the conservative scan is
1531 // conditional. Indicate this fact by setting ignore to true as well. Once
1532 // the guards have been evaluated, ignore will either be cleared (if any
1533 // guards pass), or conservative will be cleared (if none pass).
1534 indexed_type.ignore = true;
1535 for (const auto& param : action.template_params) {
1536 indexed_type.conservative_guards.emplace_back(
1537 &stripModifiers(param.type)
1541 } else if (isWithSuffixActionName(action.name.name)) {
1542 if (action.template_params.size() != 1) {
1543 throw Exception{
1544 folly::sformat(
1545 "Indexer '{}' at ({},{}) action type '{}' at ({},{}) does not "
1546 "have exactly one template parameter",
1547 indexer.name.name,
1548 indexer.key.object_id,
1549 indexer.key.compile_unit_id,
1550 action.name.name,
1551 action.key.object_id,
1552 action.key.compile_unit_id
1556 indexed_type.suffix_type = &stripModifiers(action.template_params[0].type);
1557 } else {
1558 throw Exception{
1559 folly::sformat(
1560 "Indexer '{}' at ({},{}) action type '{}' at ({},{}) is unknown",
1561 indexer.name.name,
1562 indexer.key.object_id,
1563 indexer.key.compile_unit_id,
1564 action.name.name,
1565 action.key.object_id,
1566 action.key.compile_unit_id
1571 return indexed_type;
1574 // Retrieve the action associated with the given object type, computing a new
1575 // one if it isn't already present.
1576 const Generator::Action& Generator::getAction(const Object& object,
1577 bool conservative) const {
1578 auto iter = m_actions.find(&object);
1579 if (iter != m_actions.end()) return iter->second;
1580 return m_actions.emplace(
1581 &object,
1582 inferAction(object, conservative)
1583 ).first->second;
1586 bool Generator::findMemberHelper(const std::string& field,
1587 const Object &a_object) const {
1588 return std::any_of(
1589 a_object.members.begin(),
1590 a_object.members.end(),
1591 [&](const Object::Member& m) {
1592 if (m.type.isObject()) {
1593 const Object &inner_object = getObject(*m.type.asObject());
1594 if (inner_object.kind == Object::Kind::k_union) {
1595 return findMemberHelper(field, inner_object);
1598 return m.offset && m.name == field;
1603 // Given an object type, examine it to infer all the needed actions for that
1604 // type. The actions are inferred by looking for member functions with special
1605 // names, and static members with special names.
1606 Generator::Action Generator::inferAction(const Object& object,
1607 bool conservative_everything) const {
1608 if (object.incomplete) {
1609 throw Exception{
1610 folly::sformat(
1611 "Trying to infer actions on object type '{}' at ({},{}) "
1612 "which is incomplete",
1613 object.name.name,
1614 object.key.object_id,
1615 object.key.compile_unit_id
1620 Action action;
1622 if (conservative_everything) {
1623 action.conservative_all = true;
1624 action.conservative_all_bases = true;
1625 return action;
1628 // White-listing and forbidden templates are determined by just checking the
1629 // name against explicit lists.
1630 if (HPHP::type_scan::detail::isIgnoredType(object.name.name)) {
1631 action.whitelisted = true;
1632 return action;
1635 if (HPHP::type_scan::detail::isForbiddenTemplate(object.name.name)) {
1636 sanityCheckTemplateParams(object);
1637 action.forbidden_template = true;
1638 return action;
1641 if (HPHP::type_scan::detail::isForcedConservativeTemplate(object.name.name)) {
1642 sanityCheckTemplateParams(object);
1643 action.conservative_all = true;
1644 action.conservative_all_bases = true;
1645 return action;
1648 const auto find_member = [&](const std::string& field) {
1649 return findMemberHelper(field, object);
1652 const auto find_base = [&](const Object& base) {
1653 return std::any_of(
1654 object.bases.begin(),
1655 object.bases.end(),
1656 [&](const Object::Base& b) { return &getObject(b.type) == &base; }
1660 for (const auto& fun : object.functions) {
1661 // Sanity check special member function. All the functions should take a
1662 // const pointer to the contained object type as the first parameter (the
1663 // this pointer), and a non-const reference to HPHP::type_scan::Scanner as
1664 // the second (and nothing else). The return type should be void.
1665 auto verify_func = [&](const Object::Function& func) {
1666 if (func.kind != Object::Function::Kind::k_member) {
1667 throw Exception{
1668 folly::sformat(
1669 "Object type '{}' at ({},{}) contains scanner func '{}' "
1670 "which is not a non-static, non-virtual member",
1671 object.name.name,
1672 object.key.object_id,
1673 object.key.compile_unit_id,
1674 func.name
1679 if (!func.ret_type.isVoid()) {
1680 throw Exception{
1681 folly::sformat(
1682 "Object type '{}' at ({},{}) contains scanner func '{}' "
1683 "which does not have a void return type",
1684 object.name.name,
1685 object.key.object_id,
1686 object.key.compile_unit_id,
1687 func.name
1692 if (func.arg_types.size() != 2) {
1693 throw Exception{
1694 folly::sformat(
1695 "Object type '{}' at ({},{}) contains scanner func '{}' "
1696 "which does not take exactly two parameter ({})",
1697 object.name.name,
1698 object.key.object_id,
1699 object.key.compile_unit_id,
1700 func.name,
1701 func.arg_types.size()
1706 const auto& this_arg = func.arg_types[0];
1707 const auto* this_ptr_arg = this_arg.asPtr();
1708 if (!this_ptr_arg) {
1709 throw Exception{
1710 folly::sformat(
1711 "Object type '{}' at ({},{}) contains scanner func '{}' "
1712 "whose first parameter isn't a pointer type '{}'",
1713 object.name.name,
1714 object.key.object_id,
1715 object.key.compile_unit_id,
1716 func.name,
1717 this_arg.toString()
1722 const auto* this_const_arg = this_ptr_arg->pointee.asConst();
1723 if (!this_const_arg) {
1724 throw Exception{
1725 folly::sformat(
1726 "Object type '{}' at ({},{}) contains scanner func '{}' "
1727 "whose first parameter isn't a const pointer type '{}'",
1728 object.name.name,
1729 object.key.object_id,
1730 object.key.compile_unit_id,
1731 func.name,
1732 this_arg.toString()
1737 const auto* this_obj_arg = this_const_arg->modified.asObject();
1738 if (!this_obj_arg) {
1739 throw Exception{
1740 folly::sformat(
1741 "Object type '{}' at ({},{}) contains scanner func '{}' "
1742 "whose first parameter isn't a pointer type to object type '{}'",
1743 object.name.name,
1744 object.key.object_id,
1745 object.key.compile_unit_id,
1746 func.name,
1747 this_arg.toString()
1752 if (&getObject(*this_obj_arg) != &object) {
1753 throw Exception{
1754 folly::sformat(
1755 "Object type '{}' at ({},{}) contains scanner func '{}' "
1756 "whose first parameter isn't a valid this pointer '{}'",
1757 object.name.name,
1758 object.key.object_id,
1759 object.key.compile_unit_id,
1760 func.name,
1761 this_arg.toString()
1766 const auto& scanner_arg = func.arg_types[1];
1767 const auto* scanner_ref_arg = scanner_arg.asRef();
1768 if (!scanner_ref_arg) {
1769 throw Exception{
1770 folly::sformat(
1771 "Object type '{}' at ({},{}) contains scanner func '{}' "
1772 "whose second parameter isn't a reference '{}'",
1773 object.name.name,
1774 object.key.object_id,
1775 object.key.compile_unit_id,
1776 func.name,
1777 scanner_arg.toString()
1782 const auto* scanner_obj_arg = scanner_ref_arg->referenced.asObject();
1783 if (!scanner_obj_arg) {
1784 throw Exception{
1785 folly::sformat(
1786 "Object type '{}' at ({},{}) contains scanner func '{}' "
1787 "whose second parameter isn't a reference to object-type '{}'",
1788 object.name.name,
1789 object.key.object_id,
1790 object.key.compile_unit_id,
1791 func.name,
1792 scanner_arg.toString()
1797 const auto& scanner_obj = getObject(*scanner_obj_arg);
1798 if (scanner_obj.name.name != s_scanner_name) {
1799 throw Exception{
1800 folly::sformat(
1801 "Object type '{}' at ({},{}) contains scanner func '{}' "
1802 "whose second parameter isn't a reference to "
1803 "{} '{}'",
1804 object.name.name,
1805 object.key.object_id,
1806 object.key.compile_unit_id,
1807 func.name,
1808 std::string{s_scanner_name},
1809 scanner_arg.toString()
1815 // Custom scanner for particular field.
1816 auto custom_field = splitFieldName(
1817 fun.name,
1818 HPHP::type_scan::detail::kCustomFieldName
1820 if (!custom_field.empty()) {
1821 verify_func(fun);
1823 if (!find_member(custom_field)) {
1824 throw Exception{
1825 folly::sformat(
1826 "Object type '{}' at ({},{}) contains custom field marker "
1827 "referring to unknown non-static field '{}'",
1828 object.name.name,
1829 object.key.object_id,
1830 object.key.compile_unit_id,
1831 custom_field
1836 action.custom_fields.emplace(
1837 std::move(custom_field),
1838 fun.linkage_name
1842 // Custom scanner for entire object.
1843 if (fun.name == HPHP::type_scan::detail::kCustomName) {
1844 verify_func(fun);
1845 action.custom_all = fun.linkage_name;
1846 continue;
1849 // Custom scanner for base classes.
1850 if (fun.name == HPHP::type_scan::detail::kCustomBasesScannerName) {
1851 verify_func(fun);
1852 action.custom_bases_scanner = fun.linkage_name;
1853 continue;
1857 for (const auto& member : object.members) {
1858 // All special member markers should be static, so ignore anything that's
1859 // not.
1860 if (member.offset) continue;
1862 // Ignore a field.
1863 auto ignore_field = splitFieldName(
1864 member.name,
1865 HPHP::type_scan::detail::kIgnoreFieldName
1867 if (!ignore_field.empty()) {
1868 if (!find_member(ignore_field)) {
1869 throw Exception{
1870 folly::sformat(
1871 "Object type '{}' at ({},{}) contains ignore field marker "
1872 "referring to unknown non-static field '{}'",
1873 object.name.name,
1874 object.key.object_id,
1875 object.key.compile_unit_id,
1876 ignore_field
1881 action.ignore_fields.emplace(std::move(ignore_field));
1882 continue;
1885 // Scan field conservatively.
1886 auto conservative_field = splitFieldName(
1887 member.name,
1888 HPHP::type_scan::detail::kConservativeFieldName
1890 if (!conservative_field.empty()) {
1891 if (!find_member(conservative_field)) {
1892 throw Exception{
1893 folly::sformat(
1894 "Object type '{}' at ({},{}) contains conservative field marker "
1895 "referring to unknown non-static field '{}'",
1896 object.name.name,
1897 object.key.object_id,
1898 object.key.compile_unit_id,
1899 conservative_field
1904 action.conservative_fields.emplace(std::move(conservative_field));
1905 continue;
1908 // Marks flexible array field. There can only be one of these per object
1909 // type.
1910 auto flexible_array_field = splitFieldName(
1911 member.name,
1912 HPHP::type_scan::detail::kFlexibleArrayFieldName
1914 if (!flexible_array_field.empty()) {
1915 if (!action.flexible_array_field.empty()) {
1916 throw Exception{
1917 folly::sformat(
1918 "Object type '{}' at ({},{}) contains more than one flexible "
1919 "array field marker",
1920 object.name.name,
1921 object.key.object_id,
1922 object.key.compile_unit_id
1927 if (!find_member(flexible_array_field)) {
1928 throw Exception{
1929 folly::sformat(
1930 "Object type '{}' at ({},{}) contains flexible array field marker "
1931 "referring to unknown non-static field '{}'",
1932 object.name.name,
1933 object.key.object_id,
1934 object.key.compile_unit_id,
1935 flexible_array_field
1940 action.flexible_array_field = std::move(flexible_array_field);
1943 // Ignore entire object.
1944 if (member.name == HPHP::type_scan::detail::kIgnoreName) {
1945 action.ignore_all = true;
1946 continue;
1949 // Conservative scan entire object.
1950 if (member.name == HPHP::type_scan::detail::kConservativeName) {
1951 action.conservative_all = true;
1952 continue;
1955 // Ignore specific base.
1956 if (member.name == HPHP::type_scan::detail::kIgnoreBaseName) {
1957 const auto* ignore_type = stripModifiers(member.type).asObject();
1958 if (!ignore_type) {
1959 throw Exception{
1960 folly::sformat(
1961 "Object type '{}' at ({},{}) contains an ignore base marker "
1962 "for a non-object type '{}'",
1963 object.name.name,
1964 object.key.object_id,
1965 object.key.compile_unit_id,
1966 member.type.toString()
1971 const auto& ignore = getObject(*ignore_type);
1972 // This is a variadic template, so sanity check it.
1973 sanityCheckTemplateParams(ignore);
1974 for (const auto& param : ignore.template_params) {
1975 const auto* ignored_type = stripModifiers(param.type).asObject();
1976 if (!ignored_type) {
1977 throw Exception{
1978 folly::sformat(
1979 "Object type '{}' at ({},{}) contains an ignore base marker "
1980 "instantiated on non-object type '{}'",
1981 object.name.name,
1982 object.key.object_id,
1983 object.key.compile_unit_id,
1984 param.type.toString()
1989 const auto& ignored = getObject(*ignored_type);
1990 if (!find_base(ignored)) {
1991 throw Exception{
1992 folly::sformat(
1993 "Object type '{}' at ({},{}) contains an ignore base marker "
1994 "instantiated on object-type '{}' which isn't a base class",
1995 object.name.name,
1996 object.key.object_id,
1997 object.key.compile_unit_id,
1998 ignored.name.name
2002 action.ignored_bases.emplace(&ignored);
2004 continue;
2007 // Don't complain about a particular base class violating a forbidden
2008 // template check.
2009 if (member.name == HPHP::type_scan::detail::kSilenceForbiddenBaseName) {
2010 const auto* silence_type = stripModifiers(member.type).asObject();
2011 if (!silence_type) {
2012 throw Exception{
2013 folly::sformat(
2014 "Object type '{}' at ({},{}) contains a silence base marker "
2015 "for a non-object type '{}'",
2016 object.name.name,
2017 object.key.object_id,
2018 object.key.compile_unit_id,
2019 member.type.toString()
2024 const auto& silence = getObject(*silence_type);
2025 // This is a variadic template, so sanity check it.
2026 sanityCheckTemplateParams(silence);
2027 for (const auto& param : silence.template_params) {
2028 const auto* silenced_type = stripModifiers(param.type).asObject();
2029 if (!silenced_type) {
2030 throw Exception{
2031 folly::sformat(
2032 "Object type '{}' at ({},{}) contains a silence base marker "
2033 "instantiated on non-object type '{}'",
2034 object.name.name,
2035 object.key.object_id,
2036 object.key.compile_unit_id,
2037 param.type.toString()
2042 const auto& silenced = getObject(*silenced_type);
2043 if (!find_base(silenced)) {
2044 throw Exception{
2045 folly::sformat(
2046 "Object type '{}' at ({},{}) contains a silence base marker "
2047 "instantiated on object-type '{}' which isn't a base class",
2048 object.name.name,
2049 object.key.object_id,
2050 object.key.compile_unit_id,
2051 silenced.name.name
2055 action.silenced_bases.emplace(&silenced);
2057 continue;
2060 // List of base classes to apply the custom bases scan to.
2061 if (action.custom_bases_scanner &&
2062 member.name == HPHP::type_scan::detail::kCustomBasesName) {
2063 const auto* custom_list_type = stripModifiers(member.type).asObject();
2064 if (!custom_list_type) {
2065 throw Exception{
2066 folly::sformat(
2067 "Object type '{}' at ({},{}) contains a custom base marker "
2068 "for a non-object type '{}'",
2069 object.name.name,
2070 object.key.object_id,
2071 object.key.compile_unit_id,
2072 member.type.toString()
2077 const auto& custom_list = getObject(*custom_list_type);
2078 // This is a variadic template, so sanity check it.
2079 sanityCheckTemplateParams(custom_list);
2080 for (const auto& param : custom_list.template_params) {
2081 const auto* custom_type = stripModifiers(param.type).asObject();
2082 if (!custom_type) {
2083 throw Exception{
2084 folly::sformat(
2085 "Object type '{}' at ({},{}) contains a custom base marker "
2086 "instantiated on non-object type '{}'",
2087 object.name.name,
2088 object.key.object_id,
2089 object.key.compile_unit_id,
2090 param.type.toString()
2095 const auto& custom = getObject(*custom_type);
2096 if (!find_base(custom)) {
2097 throw Exception{
2098 folly::sformat(
2099 "Object type '{}' at ({},{}) contains a custom base marker "
2100 "instantiated on object-type '{}' which isn't a base class",
2101 object.name.name,
2102 object.key.object_id,
2103 object.key.compile_unit_id,
2104 custom.name.name
2108 action.custom_bases.emplace(&custom);
2110 continue;
2113 // If there's a custom scanner for the entire object, list of types to guard
2114 // on.
2115 if (action.custom_all &&
2116 member.name == HPHP::type_scan::detail::kCustomGuardName) {
2117 const auto* guard_type = stripModifiers(member.type).asObject();
2118 if (!guard_type) {
2119 throw Exception{
2120 folly::sformat(
2121 "Object type '{}' at ({},{}) contains a custom guard marker "
2122 "instantiated on non-object type '{}'",
2123 object.name.name,
2124 object.key.object_id,
2125 object.key.compile_unit_id,
2126 member.type.toString()
2131 const auto& guard = getObject(*guard_type);
2132 // This is a variadic template, so sanity check it.
2133 sanityCheckTemplateParams(guard);
2134 for (const auto& param : guard.template_params) {
2135 action.custom_guards.emplace(&param.type);
2137 continue;
2141 return action;
2144 // Given an object type, return the matching Object representation. Even for
2145 // object types with different keys, if the underlying object type is the "same"
2146 // (according to the linkage rules), the same Object representation will be
2147 // returned. This means that one can then use pointer equality to check for
2148 // equality between two object types.
2149 const Object& Generator::getObject(const ObjectType& type) const {
2150 // First attempt to lookup the Object in our internal maps. Use the
2151 // appropriate map according to the object type's linkage. This ensures that
2152 // object types that represent the same underlying object always returns the
2153 // same Object&.
2154 switch (type.name.linkage) {
2155 case ObjectTypeName::Linkage::external: {
2156 auto iter = m_external_objects.find(type.name.name);
2157 if (iter != m_external_objects.end()) return iter->second;
2158 break;
2160 case ObjectTypeName::Linkage::internal: {
2161 auto cu_iter = m_internal_objects.find(type.key.compile_unit_id);
2162 if (cu_iter != m_internal_objects.end()) {
2163 auto iter = cu_iter->second.find(type.name.name);
2164 if (iter != cu_iter->second.end()) return iter->second;
2166 break;
2168 case ObjectTypeName::Linkage::none:
2169 case ObjectTypeName::Linkage::pseudo: {
2170 auto iter = m_unique_objects.find(type.key.object_id);
2171 if (iter != m_unique_objects.end()) return iter->second;
2172 break;
2176 const auto insert = [&](Object object) -> const Object& {
2177 switch (object.name.linkage) {
2178 case ObjectTypeName::Linkage::external: {
2179 return m_external_objects.emplace(
2180 type.name.name,
2181 std::move(object)
2182 ).first->second;
2184 case ObjectTypeName::Linkage::internal: {
2185 return m_internal_objects[type.key.compile_unit_id].emplace(
2186 type.name.name,
2187 std::move(object)
2188 ).first->second;
2190 case ObjectTypeName::Linkage::none:
2191 case ObjectTypeName::Linkage::pseudo: {
2192 return m_unique_objects.emplace(
2193 type.key.object_id,
2194 std::move(object)
2195 ).first->second;
2198 not_reached();
2201 // Check if the object is a valid indexer. LLVM for clang 15/16 will sometimes
2202 // produce a DW_TAG_structure_type for the same Indexer<...> type in a
2203 // compile_unit without a corresponding DW_TAG_variable. This check will skip
2204 // such cases and end up pick a different type.
2205 const auto isValid = [&](const Object& obj) -> bool {
2206 if (isIndexerName(type.name.name)) {
2207 const auto index_iter = std::find_if(
2208 obj.members.begin(),
2209 obj.members.end(),
2210 [](const Object::Member& m) { return m.name == "s_index"; }
2213 if (index_iter == obj.members.end()) {
2214 return false;
2217 const auto& index_member = *index_iter;
2218 if (!index_member.address && index_member.linkage_name.empty()) {
2219 return false;
2223 return true;
2226 // No direct matches in our internal maps, so we need to retrieve it from the
2227 // type parser. If the type is complete we can just retrieve it and use it
2228 // directly. If this type has no linkage or pseudo-linkage, it matches nothing
2229 // else, so just retrieve it. Store it in our maps for later lookup.
2230 if (isValid(m_parser->getObject(type.key)) && (!type.incomplete ||
2231 type.name.linkage == ObjectTypeName::Linkage::none ||
2232 type.name.linkage == ObjectTypeName::Linkage::pseudo)) {
2233 return insert(m_parser->getObject(type.key));
2236 // The object type is incomplete, but has internal or external linkage. We
2237 // only want to return an incomplete object as a last resort, so let's look
2238 // for any possible definitions of this type elsewhere.
2240 auto const name_iter = m_objects_by_name.find(type.name);
2241 if (name_iter != m_objects_by_name.end()) {
2242 auto const& keys = name_iter->second;
2243 // First look for a type with the same name in the same compilation unit. If
2244 // there's one that's a complete definition, use that.
2245 for (auto const& key : keys) {
2246 if (key.object_id == type.key.object_id) continue;
2247 if (key.compile_unit_id != type.key.compile_unit_id) continue;
2248 auto other = m_parser->getObject(key);
2249 if (!isValid(other)) continue;
2250 if (other.incomplete) continue;
2251 return insert(std::move(other));
2253 // Otherwise if the type has external linkage, look for any type in any
2254 // compilation unit (with external linkage) with the same name and having a
2255 // complete definition.
2256 if (type.name.linkage == ObjectTypeName::Linkage::internal) {
2257 // Newer clang seems to split some types into different units,
2258 // or at least we are not able to tell that they are the same.
2259 std::cout << "gen-type-scanners: warning: "
2260 "No matching type found for internal linkage type " <<
2261 type.name.name << " in same compile unit. "
2262 "Trying other compile units." << std::endl;
2264 for (auto const& key : keys) {
2265 if (key.object_id == type.key.object_id) continue;
2266 auto other = m_parser->getObject(key);
2267 if (!isValid(other)) continue;
2268 if (other.incomplete) continue;
2269 return insert(std::move(other));
2273 // There doesn't appear to be a complete definition of this type anywhere, so
2274 // just return the incomplete object representation. This will probably error
2275 // elsewhere, but there's nothing we can do.
2276 return insert(m_parser->getObject(type.key));
2279 // Given a type, fill the given layout (starting at the specified offset)
2280 // appropriate for that type. LayoutError will be thrown if an ambiguous
2281 // construct is encountered.
2282 void Generator::genLayout(const Type& type,
2283 Layout& layout,
2284 size_t offset,
2285 bool conservative_everything) const {
2286 return type.match<void>(
2287 [&](const ObjectType* t) {
2288 genLayout(getObject(*t), layout, offset, true, conservative_everything);
2290 [&](const PtrType* t) {
2291 // Don't care about pointers to non-pointer followable types.
2292 if (!isPtrFollowable(type)) return;
2293 if (t->pointee.isVoid()) {
2294 throw LayoutError{
2295 "Generic pointer to void. Add annotation to disambiguate."
2298 layout.addPtr(offset);
2300 [&](const RefType* t) {
2301 // Don't care about pointers to non-pointer followable types.
2302 if (!isPtrFollowable(type)) return;
2303 if (t->referenced.isVoid()) {
2304 throw LayoutError{
2305 "Generic pointer to void. Add annotation to disambiguate."
2308 layout.addPtr(offset);
2310 [&](const RValueRefType* t) {
2311 // Don't care about pointers to non-pointer followable types.
2312 if (!isPtrFollowable(type)) return;
2313 if (t->referenced.isVoid()) {
2314 throw LayoutError{
2315 "Generic pointer to void. Add annotation to disambiguate."
2318 layout.addPtr(offset);
2320 [&](const ArrType* t) {
2321 if (!t->count) {
2322 throw LayoutError{
2323 "Array of indeterminate size. Add annotation to disambiguate."
2326 Layout sublayout{determineSize(t->element)};
2327 genLayout(t->element, sublayout, 0, conservative_everything);
2328 layout.addSubLayout(
2329 offset,
2330 *t->count,
2331 sublayout
2334 [&](const ConstType* t) {
2335 genLayout(t->modified, layout, offset, conservative_everything);
2337 [&](const VolatileType* t) {
2338 genLayout(t->modified, layout, offset, conservative_everything);
2340 [&](const RestrictType* t) {
2341 genLayout(t->modified, layout, offset, conservative_everything);
2343 [&](const FuncType*) {},
2344 [&](const MemberType*) {},
2345 [&](const VoidType*) {}
2349 // Check if the given object member is associated with a special action,
2350 // recording it into the given Layout as needed. Unnamed unions are recursed
2351 // into with their members being treated as members of the enclosing
2352 // object. Returns true if the layout was modified, false otherwise.
2353 bool Generator::checkMemberSpecialAction(const Object& base_object,
2354 const Object::Member& member,
2355 const Action& action,
2356 Layout& layout,
2357 size_t base_obj_offset,
2358 size_t offset) const {
2359 if (member.type.isObject()) {
2360 auto const& object = getObject(*member.type.asObject());
2361 // Treat members of an unnamed union as members
2362 // of the enclosing struct.
2363 if (object.kind == Object::Kind::k_union) {
2364 for (auto const& obj_member : object.members) {
2365 if (!obj_member.offset) continue;
2366 // Recurse: the unions themselves might contain unnamed unions.
2367 if (checkMemberSpecialAction(base_object, obj_member, action,
2368 layout, base_obj_offset,
2369 offset + *obj_member.offset)) {
2370 return true;
2376 // The sole purpose of marking the flexible array member is so we know
2377 // where the suffix begins. The suffix usually begins at the end of the
2378 // object, but sometimes within it.
2379 if (member.type.isArr() && action.flexible_array_field == member.name) {
2380 layout.suffix_begin = offset;
2381 return true;
2384 if (action.ignore_fields.count(member.name) > 0) return true;
2386 if (action.conservative_fields.count(member.name) > 0) {
2387 layout.addConservative(offset, determineSize(member.type));
2388 return true;
2391 auto custom_iter = action.custom_fields.find(member.name);
2392 if (custom_iter != action.custom_fields.end()) {
2393 if (custom_iter->second.empty()) {
2394 throw LayoutError{
2395 folly::sformat(
2396 "'{}' needs to have external linkage (not in unnamed namespace)"
2397 " to use custom field scanner. If a template, template parameters"
2398 " must have external linkage as well.",
2399 base_object.name.name
2403 layout.addCustom(base_obj_offset, custom_iter->second);
2404 return true;
2407 return false;
2410 // Given an object type representation, fill the given Layout (starting at
2411 // specified offset) with the appropriate layout for that object
2412 // type. LayoutError will be thrown if an ambiguous construct is encountered.
2413 void Generator::genLayout(const Object& object,
2414 Layout& layout,
2415 size_t offset,
2416 bool do_forbidden_check,
2417 bool conservative_everything) const {
2418 // Never generate layout for collectable types, unless it was marked as
2419 // scannable.
2420 if (m_collectable.count(&object) > 0 &&
2421 !m_scannable_collectable.count(&object)) {
2422 return;
2425 if (!m_layout_being_generated.emplace(&object).second) {
2426 throw LayoutError{
2427 folly::sformat(
2428 "'{}' is contained within a recursive definition. "
2429 "This can only happen with invalid debug information "
2430 "or a type-scanner generator bug.",
2431 object.name.name
2435 SCOPE_EXIT { m_layout_being_generated.erase(&object); };
2437 const auto& action = getAction(object, conservative_everything);
2439 // A whitelisted type should be ignored entirely.
2440 if (action.whitelisted) return;
2442 // If this is a forbidden template (and forbidden template checking has been
2443 // enabled), check if any of the template's type parameters are
2444 // interesting. If so, this is an error, as the user shouldn't be using such
2445 // types in this template.
2446 if (do_forbidden_check &&
2447 action.forbidden_template &&
2448 forbiddenTemplateCheck(object)) {
2449 throw LayoutError{
2450 folly::sformat(
2451 "'{}' shouldn't be used with potentially req-heap "
2452 "allocated objects. Use req:: equivalents instead or add "
2453 "annotations.",
2454 object.name.name
2459 // Process the base classes first to maintain rough offset ordering.
2460 for (const auto& base : object.bases) {
2461 try {
2462 const auto& obj = getObject(base.type);
2463 if (action.ignored_bases.count(&obj)) continue;
2465 // Any base which has been included with the custom base scanner should be
2466 // ignored here, as we'll do one call to the custom scanner.
2467 if (action.custom_bases_scanner && action.custom_bases.count(&obj)) {
2468 continue;
2471 // Virtual inheritance. The generator doesn't know how to get the base
2472 // class from the derived (though in theory this could be inferred from
2473 // the debug information), so punt and make the user specify a custom base
2474 // scanner.
2475 if (!base.offset) {
2476 throw LayoutError{
2477 "Base is inherited virtually. "
2478 "Add annotations to convert manually."
2482 // Recursively generate layout for the base.
2483 genLayout(
2484 obj,
2485 layout,
2486 offset + *base.offset,
2487 !action.silenced_bases.count(&obj),
2488 action.conservative_all_bases
2490 } catch (LayoutError& exn) {
2491 exn.addContext(
2492 folly::sformat(
2493 "from base class '{}'",
2494 base.type.name.name
2497 throw;
2501 // Do the single call to the custom bases scanner if there is one.
2502 if (action.custom_bases_scanner && !action.custom_bases.empty()) {
2503 if (action.custom_bases_scanner->empty()) {
2504 throw LayoutError{
2505 folly::sformat(
2506 "'{}' needs to have external linkage (not in unnamed namespace)"
2507 " to use custom base scanner. If a template, template parameters"
2508 " must have external linkage as well.",
2509 object.name.name
2513 layout.addCustom(offset, *action.custom_bases_scanner);
2516 if (action.ignore_all) return;
2518 if (action.custom_all) {
2519 // We'll use the custom scanner if there's no guards, or if at least one of
2520 // the guards is interesting.
2521 if (action.custom_guards.empty() ||
2522 std::any_of(
2523 action.custom_guards.begin(),
2524 action.custom_guards.end(),
2525 [this](const Type* guard) { return !hasEmptyLayout(*guard); }
2529 // Ooops, the custom scanner function doesn't have a symbol, which
2530 // probably means it doesn't have external linkage. We can't reliably call
2531 // such things, so error out.
2532 if (action.custom_all->empty()) {
2533 throw LayoutError{
2534 folly::sformat(
2535 "'{}' needs to have external linkage (not in unnamed namespace)"
2536 " to use custom scanner. If a template, template parameters must"
2537 " have external linkage as well.",
2538 object.name.name
2542 layout.addCustom(offset, *action.custom_all);
2544 return;
2547 if (action.conservative_all) {
2548 // Determine the begin and end offsets of this type and set up a
2549 // conservative scan for that range. We can't simply use (0, object size)
2550 // because we do not want to include base classes, nor padding which we know
2551 // can't contain any fields.
2552 size_t begin = std::numeric_limits<size_t>::max();
2553 size_t end = std::numeric_limits<size_t>::min();
2554 for (const auto& member : object.members) {
2555 if (!member.offset) continue;
2556 begin = std::min(begin, *member.offset);
2557 end = std::max(end, *member.offset + determineSize(member.type));
2559 if (begin < end) {
2560 layout.addConservative(begin + offset, end - begin);
2562 return;
2565 // Unions are special. If all the members of the union have the same layout,
2566 // we can just use that. If not, its a LayoutError, as a custom scanner is
2567 // needed to disambiguate it.
2568 if (object.kind == Object::Kind::k_union) {
2569 Layout first_layout{object.size};
2570 bool first = true;
2571 for (const auto& member : object.members) {
2572 if (!member.offset) continue;
2574 if (first) {
2575 genLayout(member.type, first_layout, 0, conservative_everything);
2576 first = false;
2577 } else {
2578 Layout other_layout{object.size};
2579 genLayout(member.type, other_layout, 0, conservative_everything);
2580 if (first_layout != other_layout) {
2581 throw LayoutError{
2582 folly::sformat(
2583 "'{}' is a union containing potentially req-heap allocated "
2584 "objects with different layouts. Add annotation to disambiguate "
2585 "contents. (Conflicting members: '{}' and '{}')",
2586 object.name.name,
2587 object.members.front().name,
2588 member.name
2595 layout.merge(first_layout, offset);
2596 return;
2599 for (const auto& member : object.members) {
2600 // Only non-static members.
2601 if (!member.offset) continue;
2603 // Check if this member has a special action. If it does, we're done
2604 // processing it.
2605 if (checkMemberSpecialAction(object, member, action,
2606 layout, offset,
2607 offset + *member.offset)) {
2608 continue;
2611 // Otherwise generate its layout recursively.
2612 try {
2613 genLayout(
2614 member.type,
2615 layout,
2616 offset + *member.offset,
2617 conservative_everything
2619 } catch (LayoutError& exn) {
2620 exn.addContext(
2621 folly::sformat(
2622 "from member '{}' of type '{}'",
2623 member.name,
2624 member.type.toString()
2627 throw;
2632 // Given a type, determine if it is pointer followable. Only object types (which
2633 // are recorded as being pointer followable) pass, as do void pointers (though
2634 // we cannot generate layout for them).
2635 bool Generator::isPtrFollowable(const Type& type) const {
2636 return type.match<bool>(
2637 [&](const ObjectType* t) { return isPtrFollowable(getObject(*t)); },
2638 [&](const PtrType* t) {
2639 if (t->pointee.isVoid()) return true;
2640 return isPtrFollowable(t->pointee);
2642 [&](const RefType* t) {
2643 if (t->referenced.isVoid()) return true;
2644 return isPtrFollowable(t->referenced);
2646 [&](const RValueRefType* t) {
2647 if (t->referenced.isVoid()) return true;
2648 return isPtrFollowable(t->referenced);
2650 [&](const ArrType* t) { return isPtrFollowable(t->element); },
2651 [&](const ConstType* t) { return isPtrFollowable(t->modified); },
2652 [&](const VolatileType* t) { return isPtrFollowable(t->modified); },
2653 [&](const RestrictType* t) { return isPtrFollowable(t->modified); },
2654 [&](const FuncType*) { return false; },
2655 [&](const MemberType*) { return false; },
2656 [&](const VoidType*) { return false; }
2660 // Make a given type pointer followable. This just walks the type hierarchy,
2661 // marking the contained object type (if any) as being pointer followable.
2662 void Generator::makePtrFollowable(const Type& type) {
2663 return type.match<void>(
2664 [&](const ObjectType* t) { makePtrFollowable(getObject(*t)); },
2665 [&](const PtrType*) {},
2666 [&](const RefType*) {},
2667 [&](const RValueRefType*) {},
2668 [&](const ArrType* t) { makePtrFollowable(t->element); },
2669 [&](const ConstType* t) { makePtrFollowable(t->modified); },
2670 [&](const VolatileType* t) { makePtrFollowable(t->modified); },
2671 [&](const RestrictType* t) { makePtrFollowable(t->modified); },
2672 [&](const FuncType*) {},
2673 [&](const MemberType*) {},
2674 [&](const VoidType*) {}
2678 // Mark a given object type pointer followable. If an object type is marked
2679 // pointer followable, all its bases must be as well (because a pointer to a
2680 // base could be pointing towards this object type).
2681 void Generator::makePtrFollowable(const Object& obj) {
2682 m_ptr_followable.emplace(&obj);
2683 for (const auto& base : obj.bases) {
2684 makePtrFollowable(getObject(base.type));
2688 // Recursive function to check if a given object has a collectable base
2689 // somewhere in its type hierarchy.
2690 bool Generator::hasCollectableBase(const Object& object) const {
2691 if (m_collectable.count(&object)) return true;
2692 return std::any_of(
2693 object.bases.begin(),
2694 object.bases.end(),
2695 [this](const Object::Base& b) {
2696 return hasCollectableBase(getObject(b.type));
2701 // Given a type, check if this is an object type with any template parameters
2702 // being a pointer followable type.
2703 bool Generator::forbiddenTemplateCheck(const Type& type) const {
2704 return type.match<bool>(
2705 [&](const ObjectType* t) { return forbiddenTemplateCheck(getObject(*t)); },
2706 [&](const PtrType* t) { return forbiddenTemplateCheck(t->pointee); },
2707 [&](const RefType* t) { return forbiddenTemplateCheck(t->referenced); },
2708 [&](const RValueRefType* t) {
2709 return forbiddenTemplateCheck(t->referenced);
2711 [&](const ArrType* t) { return forbiddenTemplateCheck(t->element); },
2712 [&](const ConstType* t) { return forbiddenTemplateCheck(t->modified); },
2713 [&](const VolatileType* t) {
2714 return forbiddenTemplateCheck(t->modified);
2716 [&](const RestrictType* t) {
2717 return forbiddenTemplateCheck(t->modified);
2719 [&](const FuncType*) { return false; },
2720 [&](const MemberType*) { return false; },
2721 [&](const VoidType*) { return false; }
2725 // Given an object type, check if any template parameters are a pointer
2726 // followable type.
2727 bool Generator::forbiddenTemplateCheck(const Object& object) const {
2728 if (isPtrFollowable(object)) return true;
2729 for (const auto& param : object.template_params) {
2730 if (forbiddenTemplateCheck(param.type)) return true;
2732 return false;
2735 // Given a type, determine if it is non-interesting, IE, its generated layout is
2736 // empty.
2737 bool Generator::hasEmptyLayout(const Type& type) const {
2738 try {
2739 Layout layout{determineSize(type)};
2740 genLayout(type, layout, 0);
2741 return layout.isEmpty();
2742 } catch (const LayoutError&) {
2743 return false;
2747 // Given an object type, determine if it is non-interesting, IE, its generated
2748 // layout is empty.
2749 bool Generator::hasEmptyLayout(const Object& object) const {
2750 try {
2751 Layout layout{object.size};
2752 genLayout(object, layout, 0);
2753 return layout.isEmpty();
2754 } catch (const LayoutError&) {
2755 return false;
2759 // Given a type, determine how many bytes an instance of that type occupies.
2760 size_t Generator::determineSize(const Type& type) const {
2761 return type.match<size_t>(
2762 [&](const ObjectType* t) { return getObject(*t).size; },
2763 // This is valid because we run on architectures where all pointers are the
2764 // same size, and we always generate the heap scanners at the same time
2765 // we're building everything else.
2766 [&](const PtrType*) { return sizeof(void*); },
2767 [&](const RefType*) { return sizeof(void*); },
2768 [&](const RValueRefType*) { return sizeof(void*); },
2769 [&](const ArrType* t) {
2770 if (!t->count) return size_t{0};
2771 return determineSize(t->element) * *t->count;
2773 [&](const ConstType* t) { return determineSize(t->modified); },
2774 [&](const VolatileType* t) { return determineSize(t->modified); },
2775 [&](const RestrictType* t) { return determineSize(t->modified); },
2776 // These are somewhat dubious, and shouldn't really occur:
2777 [&](const FuncType*) { return size_t{0}; },
2778 [&](const MemberType*) { return size_t{0}; },
2779 [&](const VoidType*) { return size_t{0}; }
2784 * Generate layouts for all indexed types. This is an iterative method since the
2785 * layout depends on which types are pointer followable, and what types are
2786 * pointer followable depends on which types are interesting, and what types are
2787 * interesting depends on their layout.
2789 * For every indexed type, we generate its layout. If the type was previously
2790 * uninteresting, but the new layout makes it interesting, mark it as pointer
2791 * followable. Continue this process until we take a full pass through the
2792 * indexed type list without computing a different layout for all types.
2794 void Generator::genAllLayouts() {
2795 bool changed;
2796 do {
2797 changed = false;
2798 for (auto& indexed : m_indexed_types) {
2799 try {
2800 // No point in continuing with this one if we already have a
2801 // LayoutError.
2802 if (indexed.errors) continue;
2804 // If this indexed type's action is conservative, examine guards (if
2805 // any) to see if we want to ignore or conservative scan it.
2806 if (indexed.conservative) {
2807 // If ignore isn't set, the issue has already been decided
2808 // (conservative scan).
2809 if (!indexed.ignore) continue;
2810 // Otherwise, iterate over all the conservative guards, seeing if any
2811 // are interesting.
2812 for (const auto* guard : indexed.conservative_guards) {
2813 if (!hasEmptyLayout(*guard)) {
2814 indexed.ignore = false;
2815 makePtrFollowable(*indexed.type);
2816 changed = true;
2817 break;
2820 continue;
2823 if (indexed.ignore) continue;
2825 // Generate the new layout for this type, including any suffix.
2826 Layout new_layout{determineSize(*indexed.type)};
2827 genLayout(*indexed.type, new_layout, 0);
2828 if (indexed.suffix_type) {
2829 Layout suffix_layout{determineSize(*indexed.suffix_type)};
2830 genLayout(*indexed.suffix_type, suffix_layout, 0);
2831 new_layout.setSuffix(std::move(suffix_layout));
2834 if (indexed.layout != new_layout) {
2835 // This new layout is different. If this isn't a "scan-only" indexed
2836 // type (which can't be pointer followable), and the type was
2837 // previously un-interesting, make this pointer followable.
2838 if (!indexed.scan && indexed.layout.isEmpty()) {
2839 makePtrFollowable(*indexed.type);
2841 changed = true;
2842 indexed.layout = std::move(new_layout);
2844 } catch (LayoutError& exn) {
2845 indexed.errors = std::move(exn);
2848 } while (changed);
2851 // At this point, the layouts in the indexed types are correct. However, some of
2852 // the indexed types may have the same layout. Put all the unique and sorted
2853 // layouts into the m_layouts list. Assign each indexed type its appropriate
2854 // index into the list.
2855 void Generator::assignUniqueLayouts() {
2856 // First fix up some of the indexed types so that they're in a consistent
2857 // state. These transformations are safe to perform as we know the layouts at
2858 // maximal.
2859 for (auto& indexed : m_indexed_types) {
2860 // At this point, we definitely know which types will be conservative
2861 // scanned or not, so we do not need the guards anymore. Clearing these lets
2862 // us merge together more duplicates.
2863 indexed.conservative_guards.clear();
2865 if (indexed.ignore) {
2866 // If the indexed type is still marked as ignored, it cannot be
2867 // conservative scanned.
2868 indexed.conservative = false;
2869 continue;
2871 if (indexed.conservative) {
2872 // Likewise, if the indexed type is still marked for conservative
2873 // scanning, it cannot be ignored.
2874 indexed.ignore = false;
2875 continue;
2877 if (indexed.layout.isEmpty()) {
2878 // If this type isn't interesting, mark it as if it was explicitly
2879 // ignored.
2880 indexed.ignore = true;
2881 indexed.layout.clear();
2882 continue;
2884 if (indexed.layout.isAllConservative()) {
2885 // If the layout contains nothing but conservative scans, mark it as if it
2886 // was explicitly marked for conservative scans.
2887 indexed.conservative = true;
2888 indexed.layout.clear();
2889 continue;
2891 // Finally, if there's a suffix layout, and the suffix begins at offset 0,
2892 // than the suffix layout can completely subsume the original layout.
2893 if (indexed.layout.suffix && indexed.layout.suffix_begin == 0) {
2894 // avoid indeterminate evaluation order by moving indexed.layout.suffix
2895 // to a temp before overwriting indexed.layout
2896 auto suffix = std::move(*indexed.layout.suffix);
2897 indexed.layout = std::move(suffix);
2901 // Now that the indexed types are fixed up to be more consistent, merge
2902 // duplicates together.
2903 m_indexed_types = mergeDupIndexedTypes(std::move(m_indexed_types));
2905 // Record all generated layouts in m_layouts (ignoring ignored or conservative
2906 // ones since those have hard-coded scanners).
2907 for (const auto& indexed : m_indexed_types) {
2908 if (indexed.ignore || indexed.conservative) continue;
2909 m_layouts.emplace_back(indexed.layout);
2911 // Sort them and make them unique.
2912 std::sort(m_layouts.begin(), m_layouts.end());
2913 m_layouts.erase(
2914 std::unique(m_layouts.begin(), m_layouts.end()),
2915 m_layouts.end()
2918 // Record the appropriate offset into m_layouts for each indexed type to refer
2919 // to its layout.
2920 for (auto& indexed : m_indexed_types) {
2921 if (indexed.ignore || indexed.conservative) continue;
2922 auto result = std::equal_range(
2923 m_layouts.begin(),
2924 m_layouts.end(),
2925 indexed.layout
2927 assert(result.first != result.second);
2928 indexed.layout_index = std::distance(m_layouts.begin(), result.first);
2932 // Check for any errors while generating layouts. We don't want to report these
2933 // errors immediately, as we want to gather them up and report them all at the
2934 // end. This is helpful when there's several things wrong at once.
2935 void Generator::checkForLayoutErrors() const {
2936 std::ostringstream oss;
2937 size_t error_count = 0;
2938 for (const auto& indexed : m_indexed_types) {
2939 if (indexed.errors) {
2940 // Don't go overboard....
2941 if (++error_count > 15) break;
2942 const auto& errors = *indexed.errors;
2943 oss << "error: " << errors.reason << "\n";
2944 for (const auto& context : errors.context) {
2945 oss << "\t- " << context << "\n";
2947 oss << "\t- from type '" << *indexed.type << "'\n"
2948 "\t- if annotations are needed, see definitions in "
2949 "hphp/util/type-scan.h.\n\n";
2951 // Error if an indexed type had internal linkage.
2952 for (const auto& address : indexed.addresses) {
2953 HPHP::match<void>(address,
2954 [&](const std::string&) { /* ok */ },
2955 [&](uintptr_t) {
2956 ++error_count;
2957 oss << "error: type " << *indexed.type << " has internal linkage.\n"
2958 " Indexed types need external linkage.\n";
2963 if (error_count > 0) throw Exception{oss.str()};
2967 * C++ code generation functions:
2971 * Output all the needed C++ forward declarations. Any called custom scanners
2972 * need to forward declared, as well as any Indexer<>::s_index static instances
2973 * which have a symbol. Normally to forward declare these, we would have to
2974 * forward declare a lot of other types, but we employ a dirty trick to avoid
2975 * this. We forward declare the mangled names, wrapped in an extern C block.
2977 void Generator::genForwardDecls(std::ostream& os) const {
2978 std::set<std::string> decls;
2980 for (const auto& layout : m_layouts) {
2981 for (const auto& custom : layout.custom) {
2982 decls.emplace(custom.linkage_name);
2984 if (layout.suffix) {
2985 for (const auto& custom : layout.suffix->custom) {
2986 decls.emplace(custom.linkage_name);
2991 for (const auto& decl : decls) {
2992 os << "/* " << folly::demangle(decl.c_str()) << " */\n"
2993 << "extern \"C\" void " << decl << "(const void*, Scanner&);\n";
2995 os << "\n";
2997 decls.clear();
2998 for (const auto& indexed : m_indexed_types) {
2999 for (const auto& addr : indexed.addresses) {
3000 if (auto* decl = boost::get<std::string>(&addr)) {
3001 decls.emplace(*decl);
3006 for (const auto& decl : decls) {
3007 os << "/* " << folly::demangle(decl.c_str()) << " */\n"
3008 << "extern \"C\" Index " << decl << ";\n";
3010 os << "\n";
3013 // Output the initialization of the metadata table mapping the type indices to
3014 // the type name and scanner.
3015 void Generator::genDataTable(std::ostream& os) const {
3016 os << "const HPHP::type_scan::detail::Metadata g_table[] = {\n";
3017 os << " {\"(UNKNOWN)\", scanner_conservative},\n";
3018 os << " {\"(UNKNOWN NO-PTRS)\", scanner_noptrs},\n";
3019 for (const auto& indexed : m_indexed_types) {
3020 const auto get_scanner_name = [&]() -> std::string {
3021 if (indexed.ignore) return "scanner_noptrs";
3022 if (indexed.conservative) return "scanner_conservative";
3023 return folly::sformat("scanner_{}", indexed.layout_index);
3025 os << " {\"" << *indexed.type << "\", "
3026 << get_scanner_name() << "},\n";
3028 os << "};\n\n";
3031 // Output the initialization function which inserts the type indices to the
3032 // appropriate Indexer<>::s_index static instances.
3033 void Generator::genIndexInit(std::ostream& os) const {
3034 os << "void init_indices() {\n";
3035 size_t index = 2;
3036 for (const auto& indexed : m_indexed_types) {
3037 os << " /* " << *indexed.type << " */\n";
3038 for (const auto& address : indexed.addresses) {
3039 HPHP::match<void>(address,
3040 [&](const std::string& s) {
3041 os << " " << s << " = " << index << ";\n";
3043 [&](uintptr_t /*p*/) {
3044 os << " *reinterpret_cast<Index*>(0x" << std::hex
3045 << address << std::dec << ") = " << index << ";\n";
3048 ++index;
3050 os << "\n static_assert(" << index-1
3051 << " <= std::numeric_limits<Index>::max(), "
3052 << "\"type_scan::Index is too small for all types\");\n";
3053 os << "}\n\n";
3056 void Generator::genBuiltinScannerFuncs(std::ostream& os) const {
3057 os << "void scanner_conservative(Scanner& scanner, "
3058 << "const void* ptr, size_t size) {\n"
3059 << " scanner.m_conservative.emplace_back(ptr, size);\n"
3060 << "}\n\n";
3061 os << "void scanner_noptrs(Scanner& scanner, "
3062 << "const void* ptr, size_t size) {\n"
3063 << "}\n\n";
3066 void Generator::genScannerFuncs(std::ostream& os) const {
3067 genBuiltinScannerFuncs(os);
3068 std::vector<std::vector<std::string>> types(m_layouts.size());
3069 for (const auto& indexed : m_indexed_types) {
3070 if (indexed.ignore || indexed.conservative) continue;
3071 types[indexed.layout_index].push_back(indexed.type->toString());
3073 for (size_t i = 0; i < m_layouts.size(); ++i) {
3074 auto& type_list = types[i];
3075 std::sort(type_list.begin(), type_list.end());
3076 for (auto& t : type_list) {
3077 os << "// " << t << "\n";
3079 os << "void scanner_" << i << "(Scanner& scanner, "
3080 << "const void* ptr, size_t size) {\n";
3081 genScannerFunc(os, m_layouts[i]);
3082 os << "}\n\n";
3086 // For a given layout, output the matching C++ code to implement the scanner.
3087 void Generator::genScannerFunc(std::ostream& os,
3088 const Layout& layout,
3089 size_t begin_offset) const {
3090 // Assert that the size passed into the scanner is a multiple of the type
3091 // size.
3092 if (layout.size > 0) {
3093 if (begin_offset > 0) {
3094 os << " assert((size - " << begin_offset << ") % "
3095 << layout.size << " == 0);\n";
3096 } else if (!layout.suffix) {
3097 os << " assert(size % " << layout.size << " == 0);\n";
3101 // If there's no suffix, the scanner is wrapped within a for loop which loops
3102 // over the entire allocation given by the size parameter.
3103 if (!layout.suffix) {
3104 os << " for (size_t offset = "
3105 << begin_offset << "; offset < size; offset += "
3106 << layout.size << ") {\n";
3109 // Ident appropriately depending on whether we're inside a for loop or not.
3110 auto indent = [&](int level = 0) -> std::ostream& {
3111 if (!layout.suffix) level += 2;
3112 for (size_t i = 0; i < level; ++i) {
3113 os << " ";
3115 return os;
3118 // If we're in a for loop, the offsets need to be biased by the loop
3119 // iteration.
3120 const auto* offset_str = layout.suffix ? "" : "+offset";
3122 // First generate calls to the scanner to record all the pointers. We use the
3123 // version of insert() which takes an initializer list because it is more
3124 // efficient.
3125 if (layout.ptrs.size() == 1) {
3126 indent(2) << "scanner.m_addrs.emplace_back(\n";
3127 const auto& ptr = layout.ptrs.back();
3128 indent(4) << "((const void**)(uintptr_t(ptr)"
3129 << offset_str << "+" << ptr.offset << "))\n";
3130 indent(2) << ");\n";
3131 } else if (!layout.ptrs.empty()) {
3132 indent(2) << "scanner.m_addrs.insert(scanner.m_addrs.end(), {\n";
3133 for (const auto& ptr : layout.ptrs) {
3134 indent(4) << "((const void**)(uintptr_t(ptr)"
3135 << offset_str << "+" << ptr.offset
3136 << ((&ptr == &layout.ptrs.back()) ? "))\n" : ")),\n");
3138 indent(2) << "});\n";
3141 // In a similar manner, insert conservative ranges.
3142 if (layout.conservative.size() == 1) {
3143 indent(2) << "scanner.m_conservative.emplace_back(\n";
3144 const auto& conservative = layout.conservative.back();
3145 indent(4) << "(const void*)(uintptr_t(ptr)"
3146 << offset_str << "+" << conservative.offset << "), "
3147 << conservative.size << "\n";
3148 indent(2) << ");\n";
3149 } else if (!layout.conservative.empty()) {
3150 indent(2) << "scanner.m_conservative.insert(scanner.m_conservative.end(), "
3151 << "{\n";
3152 for (const auto& conservative : layout.conservative) {
3153 indent(4) << "{(const void*)(uintptr_t(ptr)"
3154 << offset_str << "+" << conservative.offset << "), "
3155 << conservative.size
3156 << ((&conservative == &layout.conservative.back()) ?
3157 "}\n" : "},\n");
3159 indent(2) << "});\n";
3162 // Finally generate calls to all custom functions. Use the current offset to
3163 // form the this pointer for the method call.
3164 for (const auto& custom : layout.custom) {
3165 indent(2) << custom.linkage_name << "((const void*)(uintptr_t(ptr)"
3166 << offset_str << "+" << custom.offset << "), scanner);\n";
3169 if (!layout.suffix) {
3170 os << " }\n";
3171 } else {
3172 // If we have a suffix, we didn't generate a for loop, just straightline
3173 // code. Now generate code for the suffix portion.
3174 genScannerFunc(os, *layout.suffix, layout.suffix_begin);
3178 void Generator::genMetrics(std::ostream& os) const {
3179 os << "// type_scan Metrics:\n";
3180 os << "// unique layouts: " << m_layouts.size() << std::endl;
3181 os << "// indexed types: " << m_indexed_types.size() << std::endl;
3182 os << "// pointer followable types: " << m_ptr_followable.size() << std::endl;
3183 os << "// collectable types: " << m_collectable.size() << std::endl;
3184 os << "// scannable collectable types: " << m_scannable_collectable.size()
3185 << std::endl;
3187 size_t conservative_fields{0};
3188 size_t conservative_types{0};
3189 size_t custom_fields{0};
3190 size_t custom_types{0};
3191 size_t custom_bases{0};
3192 size_t ignored_fields{0};
3193 size_t ignored_types{0};
3194 size_t whitelisted_types{0};
3195 size_t forbidden_templates{0};
3196 size_t flexible_arrays{0};
3197 for (auto& e : m_actions) {
3198 auto& action = e.second;
3199 if (action.ignore_all) ignored_types++;
3200 if (action.conservative_all) conservative_types++;
3201 if (action.whitelisted) whitelisted_types++;
3202 if (action.forbidden_template) forbidden_templates++;
3203 if (action.custom_all) custom_types++;
3204 if (!action.flexible_array_field.empty()) flexible_arrays++;
3205 // count custom guards?
3206 ignored_fields += action.ignore_fields.size();
3207 conservative_fields += action.conservative_fields.size();
3208 custom_fields += action.custom_fields.size();
3209 custom_bases += action.custom_bases.size();
3212 os << "// object types: " << m_actions.size() << std::endl;
3213 os << "// conservative-scanned types: " << conservative_types << std::endl;
3214 os << "// conservative-scanned fields: " << conservative_fields << std::endl;
3215 os << "// custom-scanned types: " << custom_types << std::endl;
3216 os << "// custom-scanned fields: " << custom_fields << std::endl;
3217 os << "// custom-scanned bases: " << custom_bases << std::endl;
3218 os << "// ignored types: " << ignored_types << std::endl;
3219 os << "// ignored fields: " << ignored_fields << std::endl;
3220 os << "// whitelisted types: " << whitelisted_types << std::endl;
3221 os << "// forbidden templates: " << forbidden_templates << std::endl;
3222 os << "// flexible arrays: " << flexible_arrays << std::endl;
3223 os << std::endl;
3226 // Generate the entire C++ file.
3227 void Generator::operator()(std::ostream& os) const {
3228 os << "#include <limits>\n\n";
3230 os << "#include \"hphp/util/assertions.h\"\n";
3231 os << "#include \"hphp/util/portability.h\"\n";
3232 os << "#include \"hphp/util/type-scan.h\"\n\n";
3234 os << "using namespace HPHP::type_scan;\n";
3235 os << "using namespace HPHP::type_scan::detail;\n\n";
3237 genMetrics(os);
3238 genForwardDecls(os);
3240 os << "namespace {\n\n";
3241 genScannerFuncs(os);
3242 genDataTable(os);
3243 genIndexInit(os);
3244 os << "}\n\n";
3246 os << "extern \"C\" {\n\n"
3247 << "EXTERNALLY_VISIBLE const Metadata* "
3248 << HPHP::type_scan::detail::kInitFuncName
3249 << "(size_t& table_size) {\n"
3250 << " init_indices();\n"
3251 << " table_size = " << m_indexed_types.size()+2 << ";\n"
3252 << " return g_table;\n"
3253 << "}\n\n"
3254 << "}" << std::endl;
3257 Generator::Layout::Layout(const Layout& other)
3258 : size{other.size}
3259 , ptrs{other.ptrs}
3260 , conservative{other.conservative}
3261 , custom{other.custom}
3262 , suffix{other.suffix ? std::make_unique<Layout>(*other.suffix) : nullptr}
3263 , suffix_begin{other.suffix_begin}
3267 Generator::Layout& Generator::Layout::operator=(const Layout& other) {
3268 size = other.size;
3269 ptrs = other.ptrs;
3270 conservative = other.conservative;
3271 custom = other.custom;
3272 suffix = other.suffix ? std::make_unique<Layout>(*other.suffix) : nullptr;
3273 suffix_begin = other.suffix_begin;
3274 return *this;
3277 void Generator::Layout::addConservative(size_t offset, size_t size) {
3278 if (!size) return;
3279 // Eagerly coalesce adjacent conservative ranges.
3280 if (!conservative.empty()) {
3281 auto& back = conservative.back();
3282 if (back.offset + back.size == offset) {
3283 back.size += size;
3284 return;
3287 conservative.emplace_back(Conservative{offset, size});
3290 bool Generator::Layout::isAllConservative() const {
3291 if (!ptrs.empty() || !custom.empty()) return false;
3292 if (conservative.size() != 1) return false;
3293 if (suffix && !suffix->isAllConservative()) return false;
3294 const auto& back = conservative.back();
3295 return back.offset == 0 && back.size == size;
3298 void Generator::Layout::clear() {
3299 ptrs.clear();
3300 conservative.clear();
3301 custom.clear();
3302 suffix.reset();
3303 suffix_begin = 0;
3306 size_t Generator::Layout::totalCount() const {
3307 return ptrs.size() +
3308 conservative.size() +
3309 custom.size() +
3310 (suffix ? suffix->totalCount() : 0);
3313 void Generator::Layout::merge(const Layout& other, size_t offset) {
3314 for (const auto& entry : other.ptrs) {
3315 addPtr(offset + entry.offset);
3317 for (const auto& entry : other.conservative) {
3318 addConservative(offset + entry.offset, entry.size);
3320 for (const auto& entry : other.custom) {
3321 addCustom(offset + entry.offset, entry.linkage_name);
3325 bool Generator::Layout::operator==(const Layout& other) const {
3326 if (std::tie(size, ptrs, conservative,
3327 custom, suffix_begin) !=
3328 std::tie(other.size, other.ptrs, other.conservative,
3329 other.custom, other.suffix_begin)) {
3330 return false;
3332 if (!suffix) return !other.suffix;
3333 if (!other.suffix) return !suffix;
3334 return *suffix == *other.suffix;
3337 // Arbitrary ordering of layouts. This ordering was chosen to satisfy my
3338 // aesthetics of having "simpler" scanners come first.
3339 bool Generator::Layout::operator<(const Layout& other) const {
3340 const auto count1 = totalCount();
3341 const auto count2 = other.totalCount();
3342 if (count1 != count2) return count1 < count2;
3344 if (ptrs.size() != other.ptrs.size()) {
3345 return ptrs.size() < other.ptrs.size();
3347 if (conservative.size() != other.conservative.size()) {
3348 return conservative.size() < other.conservative.size();
3350 if (custom.size() != other.custom.size()) {
3351 return custom.size() < other.custom.size();
3354 if (ptrs != other.ptrs) return ptrs < other.ptrs;
3355 if (conservative != other.conservative) {
3356 return conservative < other.conservative;
3358 if (custom != other.custom) return custom < other.custom;
3360 if (size != other.size) return size < other.size;
3362 if (suffix) {
3363 if (!other.suffix) return false;
3364 if (*suffix != *other.suffix) return *suffix < *other.suffix;
3365 return suffix_begin < other.suffix_begin;
3366 } else {
3367 return static_cast<bool>(other.suffix);
3371 const std::string kProgramDescription =
3372 "Generate type-scanners from debug-info";
3374 ////////////////////////////////////////////////////////////////////////////////
3378 int main(int argc, char** argv) {
3379 folly::SingletonVault::singleton()->registrationComplete();
3381 namespace po = boost::program_options;
3383 po::options_description desc{"Allowed options"};
3384 desc.add_options()
3385 ("help", "produce help message")
3386 ("install_dir",
3387 po::value<std::string>(),
3388 "directory to put generated scanners")
3389 ("fbcode_dir", po::value<std::string>(), "ignored")
3390 ("source_file",
3391 po::value<std::string>()->required(),
3392 "filename to read debug-info from")
3393 ("output_file",
3394 po::value<std::string>()->required(),
3395 "filename of generated scanners")
3396 ("skip", "do not scan dwarf, generate conservative scanners")
3397 ("num_threads", po::value<int>(), "number of parallel threads")
3398 ("print", "dump the dwarf to stdout")
3401 try {
3402 po::variables_map vm;
3403 po::store(po::parse_command_line(argc, argv, desc), vm);
3405 if (vm.count("help")) {
3406 std::cout << kProgramDescription << "\n\n"
3407 << desc << std::endl;
3408 return 1;
3411 #if defined(__clang__) && !defined(CLANG_STANDALONE_DEBUG)
3412 // Doesn't work with older Clang that don't support attribute used
3413 // in member functions of template classes.
3414 // Fixed in https://reviews.llvm.org/D56928
3415 // Doesn't work with Clang without -fstandalone-debug
3416 auto skip = true;
3417 #else
3418 auto skip = vm.count("skip") || getenv("HHVM_DISABLE_TYPE_SCANNERS");
3419 #endif
3421 po::notify(vm);
3422 auto const print = vm.count("print") != 0;
3424 const auto output_filename =
3425 vm.count("install_dir") ?
3426 folly::sformat(
3427 "{}{}{}",
3428 vm["install_dir"].as<std::string>(),
3429 HPHP::FileUtil::getDirSeparator(),
3430 vm["output_file"].as<std::string>()
3432 vm["output_file"].as<std::string>();
3434 if (vm.count("num_threads")) {
3435 auto n = vm["num_threads"].as<int>();
3436 if (n > 0) {
3437 NumThreads = n;
3438 } else {
3439 std::cerr << "\nIllegal num_threads=" << n << "\n";
3440 return 1;
3444 try {
3445 const auto source_executable = vm["source_file"].as<std::string>();
3446 if (print) {
3447 auto const printer = debug_parser::Printer::make(source_executable);
3448 (*printer)(std::cout);
3450 Generator generator{source_executable, skip};
3451 std::ofstream output_file{output_filename};
3452 generator(output_file);
3453 } catch (const debug_parser::Exception& exn) {
3454 std::cerr << "\nError generating type scanners:\n"
3455 << exn.what() << std::endl << std::endl;
3456 return 1;
3458 } catch (const po::error& e) {
3459 std::cerr << e.what() << "\n\n"
3460 << kProgramDescription << "\n\n"
3461 << desc << std::endl;
3462 return 1;
3465 return 0;