2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
22 #include <type_traits>
25 #include <folly/CPortability.h>
26 #include <folly/gen/String.h>
28 #include "hphp/util/compilation-flags.h"
29 #include "hphp/util/safe-cast.h"
33 //////////////////////////////////////////////////////////////////////
36 * Time-efficient representations for sparse sets and sparse maps keyed with
37 * integers from a known universe (i.e. values from zero to some maximum). It
38 * also has the peculiar (but maybe occasionally useful) property of iteration
39 * order matching insertion order as long as you haven't erased anything yet.
41 * Space consumption is O(universe), where universe is the maximum value the
42 * set can hold. See the constructor for more information on this. The set
43 * version has several member functions with preconditions relating to universe
46 * The datastructure implemented here is from this:
48 * http://dl.acm.org/citation.cfm?id=176484
50 * Some notes about this implementation:
52 * o The set doesn't support set complement. It's O(universe), so if you
53 * need it it's probably better to use a bitset of some sort.
55 * o Iterators are invalidated on any call to a non-const member function.
57 * o Moving from a set or map leaves it in an undetermined (but valid)
58 * state. Notably this includes potentially changing its universe size.
60 * o Set operations involving multiple sets are generally only legal if both
61 * sets have the same universe size. Exceptions are expressions relating
62 * to EqualityComparable, Assignable, and swap(), for reasons relating to
63 * moving potentially changing universe size.
65 * o Lookups and insertions can be done through a different type than the
66 * containers actually hold. This is to ease using non-integer types as
67 * "keys" in these classes, as long as they can be mapped down to ids. An
68 * 'extractor' function object type can be provided as a template
69 * parameter to control how the mapping works.
71 * Also, note that for very small universes, even if the bits are sparse
72 * there's a good chance you'll be better off with some kind of bitset than the
73 * set version of this.
77 //////////////////////////////////////////////////////////////////////
79 namespace sparse_id_detail
{
81 template<class T
, class Lookup
>
82 struct default_extract
{
83 T
operator()(Lookup l
) const {
85 return safe_cast
<T
>(convert
);
91 //////////////////////////////////////////////////////////////////////
96 class Extract
= sparse_id_detail::default_extract
<T
,LookupT
>
98 struct sparse_id_set
{
100 using size_type
= value_type
;
101 using const_iterator
= const T
*;
104 std::is_integral
<T
>::value
&& std::is_unsigned
<T
>::value
,
105 "sparse_id_set is intended for use with unsigned integer types"
109 * When constructing a sparse_id_set, you must provide a 'universe size' for
110 * the ids. This is one greater than the maximum value you'll insert into
113 * All functions dealing with values have a precondition that the values fit
114 * in the universe size, and most functions involving multiple sparse_id_sets
115 * (essentially everything except swap) will have a precondition that the two
116 * sets have the same universe size.
118 explicit sparse_id_set(T universe_size
)
119 : m_universe_size
{universe_size
}
123 ? static_cast<T
*>(std::malloc(sizeof(T
) * universe_size
* 2))
127 // Note: the sparse part of m_mem is deliberately uninitialized, but we do
128 // it for valgrind or asan builds.
129 #if FOLLY_SANITIZE || defined(VALGRIND)
131 std::memset(m_mem
, 0, sizeof(T
) * universe_size
);
135 ~sparse_id_set() { if (m_universe_size
) std::free(m_mem
); }
138 * Copy this set from `o'.
140 * Post: operator==(o)
142 sparse_id_set(const sparse_id_set
& o
)
143 : sparse_id_set(o
.m_universe_size
)
149 * Move construct a set from `o'.
151 * The set `o' is left in an unspecified but valid state. It's
152 * universe_size() is not even guaranteed to be the same after it is
155 sparse_id_set(sparse_id_set
&& o
) noexcept
156 : m_universe_size
{o
.m_universe_size
}
160 o
.m_universe_size
= 0;
170 * Post: operator==(o)
172 sparse_id_set
& operator=(const sparse_id_set
& o
) {
173 if (m_universe_size
== o
.m_universe_size
) {
178 sparse_id_set
tmp(o
);
186 * Leaves `o' in an unspecified, but valid state. It may not have the same
187 * universe size that it had before being moved from.
189 sparse_id_set
& operator=(sparse_id_set
&& o
) noexcept
{
192 // Make sure no one relies on the universe staying the same.
193 sparse_id_set
tmp(0);
200 * Returns the universe size of this sparse_id_set. Once created, a set's
201 * universe size can not change unless you move-construct or move-assign from
204 size_type
universe_size() const { return m_universe_size
; }
207 * Iteration. Make sure you don't mutate the set while you're using its
210 * The order of elements in the set is guaranteed to be the same as the order
213 const_iterator
begin() const { return const_iterator(dense()); }
214 const_iterator
end() const { return const_iterator(dense() + m_next
); }
215 const_iterator
cbegin() const { return const_iterator(dense()); }
216 const_iterator
cend() const { return const_iterator(dense() + m_next
); }
219 * Since we iterate in insertion order, it might be convenient to ask what's
220 * at the front or the back. This class is definitely not a full model of
223 T
front() const { assert(!empty()); return dense()[0]; }
224 T
back() const { assert(!empty()); return dense()[m_next
- 1]; }
227 * Number of elements in this set.
229 size_type
size() const { return m_next
; }
232 * Returns: size() != 0
234 bool empty() const { return !size(); }
237 * Clear all members from the set. O(1).
241 void clear() { m_next
= 0; }
244 * Returns: whether this sparse_id_set contains a particular value. O(1).
246 bool contains(LookupT lt
) const {
247 return containsImpl(Extract()(lt
));
251 * Returns: whether this sparse_id_set contains a particular value. O(1).
252 * Does not require that the id is in range.
254 bool contains_safe(LookupT lt
) const {
255 auto const t
= Extract()(lt
);
256 return t
< m_universe_size
&& containsImpl(t
);
260 * Insert a new value into the set. O(1)
262 * Post: contains an element with the id of `lt'
264 void insert(LookupT lt
) {
265 auto const t
= Extract()(lt
);
266 assert(t
< m_universe_size
);
267 if (containsImpl(t
)) return;
269 sparse()[t
] = m_next
;
274 * Remove an element from the set, if it is a member. (Does not assume that
277 * Post: !contains(lt)
279 void erase(LookupT lt
) {
280 auto const t
= Extract()(lt
);
281 assert(t
< m_universe_size
);
282 // Swap with back element and update sparse ptrs.
283 auto const didx
= sparse()[t
]; // possibly reads uninitialized mem
284 if (didx
>= m_next
|| dense()[didx
] != t
) return;
285 auto const moving
= dense()[m_next
- 1];
286 sparse()[moving
] = didx
;
287 dense()[didx
] = moving
;
289 // No need to write to sparse()[t]. If it's read, next and dense are
290 // rechecked to ensure it's actually relevant.
294 * These sets are EqualityComparable, even if they aren't from the save
295 * universe. (Rationale: if you move construct from something it's nice that
296 * it is still legal to compare to other things it was legally comparable to
299 * This returns whether the two sets have the same elements, regardless of
300 * the order they were inserted.
302 * But note that it's O(size()) worst case. You probably should just never
305 bool operator==(const sparse_id_set
& o
) const {
306 if (universe_size() != o
.universe_size()) return false;
307 if (size() != o
.size()) return false;
308 for (auto v
: *this) if (!o
.containsImpl(v
)) return false;
311 bool operator!=(const sparse_id_set
& o
) const { return !(*this == o
); }
314 * Union, difference and intersection operators.
316 * All of these operators are only provided as versions that modify the lhs
317 * in place. Idiomatic uses are going to involve updating id sets that
318 * already exist, so even with our move-construction support it will tend to
319 * involve allocations compared to mutation-based usage-styles.
321 * Union (|=) and difference (-=) are O(o.size()), while intersection (&=) is
324 * Pre: universe_size() == o.universe_size()
326 sparse_id_set
& operator|=(const sparse_id_set
& o
) {
327 assert(m_universe_size
== o
.m_universe_size
);
328 for (auto t
: o
) insert(t
);
331 sparse_id_set
& operator-=(const sparse_id_set
& o
) {
332 assert(m_universe_size
== o
.m_universe_size
);
333 for (auto t
: o
) erase(t
);
336 sparse_id_set
& operator&=(const sparse_id_set
& o
) {
337 assert(m_universe_size
== o
.m_universe_size
);
340 while (fwd
!= back
) {
342 if (!o
.containsImpl(dense()[fwd
])) {
343 auto const val
= dense()[--back
];
355 * Swap the contents of two sets.
357 * This function is unusual in that it does not have any preconditions about
358 * universe sizes matching.
360 void swap(sparse_id_set
& o
) noexcept
{
361 std::swap(m_universe_size
, o
.m_universe_size
);
362 std::swap(m_mem
, o
.m_mem
);
363 std::swap(m_next
, o
.m_next
);
367 * Convert a sparse id set to a std::string, intended for debug printing.
369 friend std::string
show(const sparse_id_set
& set
) {
370 using namespace folly::gen
;
372 | eachTo
<std::string
>()
373 | unsplit
<std::string
>(" ")
378 bool containsImpl(T t
) const {
379 assert(t
< m_universe_size
);
380 auto const didx
= sparse()[t
]; // may read uninitialized memory
381 return didx
< m_next
&& dense()[didx
] == t
;
385 T
* sparse() { return m_mem
; }
386 T
* dense() { return m_mem
+ m_universe_size
; }
387 const T
* sparse() const { return m_mem
; }
388 const T
* dense() const { return m_mem
+ m_universe_size
; }
396 //////////////////////////////////////////////////////////////////////
402 class KExtract
= sparse_id_detail::default_extract
<K
,LookupKey
>
404 struct sparse_id_map
{
405 using value_type
= std::pair
<const K
,V
>;
407 using const_iterator
= const value_type
*;
410 std::is_integral
<K
>::value
&& std::is_unsigned
<K
>::value
,
411 "sparse_id_set is intended for use with unsigned integer types"
415 * When constructing a sparse_id_map, you must provide a 'universe size' for
416 * the ids. This is one greater than the maximum key you'll insert into the
419 explicit sparse_id_map(K universe_size
)
420 : m_universe_size(universe_size
)
424 ? std::malloc(sizeof(K
) * universe_size
+
425 sizeof(value_type
) * universe_size
)
429 // Note: the sparse part of m_mem is deliberately uninitialized, but we do
430 // it for valgrind or asan builds.
431 #if FOLLY_SANITIZE || defined(VALGRIND)
433 std::memset(m_mem
, 0, sizeof(K
) * universe_size
);
438 if (!m_universe_size
) return;
439 if (!std::is_trivially_destructible
<V
>::value
) {
440 for (auto& kv
: *this) {
448 * Copy this map from `o'. Nothrow as long as V has a nothrow copy
451 * Post: operator==(o)
453 sparse_id_map(const sparse_id_map
& o
)
454 : m_universe_size
{o
.m_universe_size
}
458 ? std::malloc(sizeof(K
) * m_universe_size
+
459 sizeof(value_type
) * m_universe_size
)
464 auto initialize
= [&] {
465 for (; idx
< m_next
; ++idx
) {
466 new (&dense()[idx
]) value_type(o
.dense()[idx
]);
467 sparse()[o
.dense()[idx
].first
] = idx
;
470 if (std::is_trivially_destructible
<V
>::value
||
471 std::is_nothrow_copy_constructible
<V
>::value
) {
480 dense()[idx
].~value_type();
487 * Move construct a map from `o'. Leaves `o' in an unspecified but valid
488 * state. (Notably the universe size may be changed.)
492 sparse_id_map(sparse_id_map
&& o
) noexcept
493 : m_universe_size
{o
.m_universe_size
}
497 o
.m_universe_size
= 0;
505 * Copy assignment. Make this map equivalent to `o'.
507 * Strong exception guarantee.
509 sparse_id_map
& operator=(const sparse_id_map
& o
) {
510 sparse_id_map
tmp(o
);
516 * Move assign from `o'.
518 * Leaves `o' in an unspecified but valid state. Notably the universe size
523 sparse_id_map
& operator=(sparse_id_map
&& o
) noexcept
{
529 * Returns the universe size of this sparse_id_map. Once created, a map's
530 * universe size can not change unless you move-construct or move-assign from
533 size_type
universe_size() const { return m_universe_size
; }
536 * Iteration. Make sure you don't mutate the map while you're using its
539 * The order of elements in the map is guaranteed to be the same as the order
542 const_iterator
begin() const { return const_iterator(dense()); }
543 const_iterator
end() const { return const_iterator(dense() + m_next
); }
544 const_iterator
cbegin() const { return const_iterator(dense()); }
545 const_iterator
cend() const { return const_iterator(dense() + m_next
); }
548 * Since we iterate in insertion order, it might be convenient to ask what's
549 * at the front or the back. This class is definitely not a full model of
552 const value_type
& front() const {
556 const value_type
& back() const {
558 return dense()[m_next
- 1];
562 * Number of elements in this map.
564 size_type
size() const { return m_next
; }
567 * Returns: size() != 0
569 bool empty() const { return !size(); }
572 * Clear all members from the map. O(1) if V is trivially destructible,
578 if (!std::is_trivially_destructible
<V
>::value
) {
579 for (auto& kv
: *this) {
587 * Returns: whether this sparse_id_map contains a particular key. O(1).
589 bool contains(LookupKey lk
) const {
590 return containsImpl(KExtract()(lk
));
594 * Returns: whether this sparse_id_map contains a particular value. O(1).
595 * Does not require that the id is in range.
597 bool contains_safe(LookupKey lk
) const {
598 auto const k
= KExtract()(lk
);
599 return k
< m_universe_size
&& containsImpl(k
);
603 * Get a reference to the value for key `k', inserting it with a default
604 * constructed value if it doesn't exist. Strong guarantee.
606 V
& operator[](LookupKey lk
) {
607 auto const k
= KExtract()(lk
);
608 if (!containsImpl(k
)) insert(std::make_pair(k
, V
{}));
609 return dense()[sparse()[k
]].second
;
613 * Insert a new value into the set. O(1). Strong exception guarantee.
615 * Post: contains an element with id v.first
617 void insert(const value_type
& v
) {
618 assert(v
.first
< m_universe_size
);
619 if (containsImpl(v
.first
)) return;
620 new (&dense()[m_next
]) value_type(v
);
621 sparse()[v
.first
] = m_next
;
626 * Insert a new value into the set, moving it if we need it. O(1). Strong
627 * exception guarantee.
629 * Post: contains an element with id v.first
631 void insert(value_type
&& v
) {
632 assert(v
.first
< m_universe_size
);
633 if (containsImpl(v
.first
)) return;
634 new (&dense()[m_next
]) value_type(std::move(v
));
635 sparse()[v
.first
] = m_next
;
640 * Remove an element from the set, if it is a member. (Does not assume that
641 * it is.) No throw as long as V has a nothrow move assignment operator.
642 * Strong guarantee otherwise.
644 * Post: !contains(lk)
646 void erase(LookupKey lk
) {
647 auto const key
= KExtract()(lk
);
648 assert(key
< m_universe_size
);
649 // Move in back element and update sparse ptrs.
650 auto const didx
= sparse()[key
]; // possibly reads uninitialized mem
651 if (didx
>= m_next
|| dense()[didx
].first
!= key
) return;
652 auto& moving
= dense()[m_next
- 1];
653 auto const moved_key
= moving
.first
;
654 if (didx
< m_next
- 1) {
655 dense()[didx
].second
= std::move(moving
.second
);
656 const_cast<K
&>(dense()[didx
].first
) = moved_key
;
658 sparse()[moved_key
] = didx
;
659 dense()[m_next
- 1].~value_type();
661 // No need to write to sparse()[t]. If it's read, next and dense are
662 // rechecked to ensure it's actually relevant.
666 * Model EqualityComparable, as long as the value is EqualityComparable.
667 * Note that it's O(size()) worst case.
669 * This returns whether the two maps have equivalent key value pairs,
670 * regardless of the order they were inserted.
672 bool operator==(const sparse_id_map
& o
) const {
673 if (universe_size() != o
.universe_size()) return false;
674 if (size() != o
.size()) return false;
675 for (auto& kv
: *this) {
676 if (!o
.containsImpl(kv
.first
)) return false;
677 if (!(o
.dense()[o
.sparse()[kv
.first
]].second
== kv
.second
)) {
683 bool operator!=(const sparse_id_map
& o
) const { return !(*this == o
); }
686 * Merge a map into this one by intersecting the keys, and using a
687 * user-defined function to merge values that were present in both this and
688 * `o'. The user-defined value merge function should return a bool,
689 * indicating whether the value should be considered changed.
691 * Basic guarantee only. Nothrow if V has a nothrow move constructor or a
692 * nothrow copy constructor.
694 * Complexity: O(size()).
696 * Returns: true if this map changed keys, or if the user-supplied function
697 * returned true for any pair of values.
699 * Pre: universe_size() == o.universe_size()
702 bool merge(const sparse_id_map
& o
, Fun val_merge
) {
703 assert(m_universe_size
== o
.m_universe_size
);
705 auto changed
= false;
706 while (fwd
!= m_next
) {
707 assert(fwd
< m_next
);
708 auto const k
= dense()[fwd
].first
;
709 if (!o
.containsImpl(k
)) {
711 if (fwd
== m_next
- 1) { // Avoid self-move assigning values.
715 // Order here is important for exception safety: we can't decrement
716 // m_next until we've moved-from and then destroyed the old value.
717 auto& tomove
= dense()[m_next
- 1];
718 sparse()[tomove
.first
] = fwd
;
719 dense()[fwd
].second
= std::move(tomove
.second
);
720 const_cast<K
&>(dense()[fwd
].first
) = tomove
.first
;
721 tomove
.~value_type();
725 if (val_merge(dense()[fwd
].second
, o
.dense()[o
.sparse()[k
]].second
)) {
734 * Swap the contents of two maps.
736 void swap(sparse_id_map
& o
) noexcept
{
737 std::swap(m_universe_size
, o
.m_universe_size
);
738 std::swap(m_mem
, o
.m_mem
);
739 std::swap(m_next
, o
.m_next
);
743 bool containsImpl(K k
) const {
744 assert(k
< m_universe_size
);
745 auto const didx
= sparse()[k
]; // may read uninitialized memory
746 return didx
< m_next
&& dense()[didx
].first
== k
;
750 K
* sparse() { return static_cast<K
*>(m_mem
); }
751 const K
* sparse() const { return static_cast<K
*>(m_mem
); }
752 value_type
* dense() {
753 void* vpDense
= sparse() + m_universe_size
;
754 return static_cast<value_type
*>(vpDense
);
756 const value_type
* dense() const {
757 return const_cast<sparse_id_map
*>(this)->dense();
766 //////////////////////////////////////////////////////////////////////
768 // Non-member swaps for ADL swap idiom.
770 template<class T
, class LT
, class EX
>
771 void swap(sparse_id_set
<T
,LT
,EX
>& a
, sparse_id_set
<T
,LT
,EX
>& b
) {
775 template<class K
, class V
, class LK
, class LKE
>
776 void swap(sparse_id_map
<K
,V
,LK
,LKE
>& a
, sparse_id_map
<K
,V
,LK
,LKE
>& b
) {
780 //////////////////////////////////////////////////////////////////////