2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #ifndef incl_HPHP_SPARSE_ID_CONTAINERS_H_
17 #define incl_HPHP_SPARSE_ID_CONTAINERS_H_
23 #include <type_traits>
26 #include <folly/CPortability.h>
27 #include <folly/gen/String.h>
29 #include "hphp/util/compilation-flags.h"
30 #include "hphp/util/safe-cast.h"
34 //////////////////////////////////////////////////////////////////////
37 * Time-efficient representations for sparse sets and sparse maps keyed with
38 * integers from a known universe (i.e. values from zero to some maximum). It
39 * also has the peculiar (but maybe occasionally useful) property of iteration
40 * order matching insertion order as long as you haven't erased anything yet.
42 * Space consumption is O(universe), where universe is the maximum value the
43 * set can hold. See the constructor for more information on this. The set
44 * version has several member functions with preconditions relating to universe
47 * The datastructure implemented here is from this:
49 * http://dl.acm.org/citation.cfm?id=176484
51 * Some notes about this implementation:
53 * o The set doesn't support set complement. It's O(universe), so if you
54 * need it it's probably better to use a bitset of some sort.
56 * o Iterators are invalidated on any call to a non-const member function.
58 * o Moving from a set or map leaves it in an undetermined (but valid)
59 * state. Notably this includes potentially changing its universe size.
61 * o Set operations involving multiple sets are generally only legal if both
62 * sets have the same universe size. Exceptions are expressions relating
63 * to EqualityComparable, Assignable, and swap(), for reasons relating to
64 * moving potentially changing universe size.
66 * o Lookups and insertions can be done through a different type than the
67 * containers actually hold. This is to ease using non-integer types as
68 * "keys" in these classes, as long as they can be mapped down to ids. An
69 * 'extractor' function object type can be provided as a template
70 * parameter to control how the mapping works.
72 * Also, note that for very small universes, even if the bits are sparse
73 * there's a good chance you'll be better off with some kind of bitset than the
74 * set version of this.
78 //////////////////////////////////////////////////////////////////////
80 namespace sparse_id_detail
{
82 template<class T
, class Lookup
>
83 struct default_extract
{
84 T
operator()(Lookup l
) const {
86 return safe_cast
<T
>(convert
);
92 //////////////////////////////////////////////////////////////////////
97 class Extract
= sparse_id_detail::default_extract
<T
,LookupT
>
99 struct sparse_id_set
{
100 using value_type
= T
;
101 using size_type
= value_type
;
102 using const_iterator
= const T
*;
105 std::is_integral
<T
>::value
&& std::is_unsigned
<T
>::value
,
106 "sparse_id_set is intended for use with unsigned integer types"
110 * When constructing a sparse_id_set, you must provide a 'universe size' for
111 * the ids. This is one greater than the maximum value you'll insert into
114 * All functions dealing with values have a precondition that the values fit
115 * in the universe size, and most functions involving multiple sparse_id_sets
116 * (essentially everything except swap) will have a precondition that the two
117 * sets have the same universe size.
119 explicit sparse_id_set(T universe_size
)
120 : m_universe_size
{universe_size
}
124 ? static_cast<T
*>(std::malloc(sizeof(T
) * universe_size
* 2))
128 // Note: the sparse part of m_mem is deliberately uninitialized, but we do
129 // it for valgrind or asan builds.
130 #if FOLLY_SANITIZE || defined(VALGRIND)
132 std::memset(m_mem
, 0, sizeof(T
) * universe_size
);
136 ~sparse_id_set() { if (m_universe_size
) std::free(m_mem
); }
139 * Copy this set from `o'.
141 * Post: operator==(o)
143 sparse_id_set(const sparse_id_set
& o
)
144 : sparse_id_set(o
.m_universe_size
)
150 * Move construct a set from `o'.
152 * The set `o' is left in an unspecified but valid state. It's
153 * universe_size() is not even guaranteed to be the same after it is
156 sparse_id_set(sparse_id_set
&& o
) noexcept
157 : m_universe_size
{o
.m_universe_size
}
161 o
.m_universe_size
= 0;
171 * Post: operator==(o)
173 sparse_id_set
& operator=(const sparse_id_set
& o
) {
174 if (m_universe_size
== o
.m_universe_size
) {
179 sparse_id_set
tmp(o
);
187 * Leaves `o' in an unspecified, but valid state. It may not have the same
188 * universe size that it had before being moved from.
190 sparse_id_set
& operator=(sparse_id_set
&& o
) noexcept
{
193 // Make sure no one relies on the universe staying the same.
194 sparse_id_set
tmp(0);
201 * Returns the universe size of this sparse_id_set. Once created, a set's
202 * universe size can not change unless you move-construct or move-assign from
205 size_type
universe_size() const { return m_universe_size
; }
208 * Iteration. Make sure you don't mutate the set while you're using its
211 * The order of elements in the set is guaranteed to be the same as the order
214 const_iterator
begin() const { return const_iterator(dense()); }
215 const_iterator
end() const { return const_iterator(dense() + m_next
); }
216 const_iterator
cbegin() const { return const_iterator(dense()); }
217 const_iterator
cend() const { return const_iterator(dense() + m_next
); }
220 * Since we iterate in insertion order, it might be convenient to ask what's
221 * at the front or the back. This class is definitely not a full model of
224 T
front() const { assert(!empty()); return dense()[0]; }
225 T
back() const { assert(!empty()); return dense()[m_next
- 1]; }
228 * Number of elements in this set.
230 size_type
size() const { return m_next
; }
233 * Returns: size() != 0
235 bool empty() const { return !size(); }
238 * Clear all members from the set. O(1).
242 void clear() { m_next
= 0; }
245 * Returns: whether this sparse_id_set contains a particular value. O(1).
247 bool contains(LookupT lt
) const {
248 return containsImpl(Extract()(lt
));
252 * Returns: whether this sparse_id_set contains a particular value. O(1).
253 * Does not require that the id is in range.
255 bool contains_safe(LookupT lt
) const {
256 auto const t
= Extract()(lt
);
257 return t
< m_universe_size
&& containsImpl(t
);
261 * Insert a new value into the set. O(1)
263 * Post: contains an element with the id of `lt'
265 void insert(LookupT lt
) {
266 auto const t
= Extract()(lt
);
267 assert(t
< m_universe_size
);
268 if (containsImpl(t
)) return;
270 sparse()[t
] = m_next
;
275 * Remove an element from the set, if it is a member. (Does not assume that
278 * Post: !contains(lt)
280 void erase(LookupT lt
) {
281 auto const t
= Extract()(lt
);
282 assert(t
< m_universe_size
);
283 // Swap with back element and update sparse ptrs.
284 auto const didx
= sparse()[t
]; // possibly reads uninitialized mem
285 if (didx
>= m_next
|| dense()[didx
] != t
) return;
286 auto const moving
= dense()[m_next
- 1];
287 sparse()[moving
] = didx
;
288 dense()[didx
] = moving
;
290 // No need to write to sparse()[t]. If it's read, next and dense are
291 // rechecked to ensure it's actually relevant.
295 * These sets are EqualityComparable, even if they aren't from the save
296 * universe. (Rationale: if you move construct from something it's nice that
297 * it is still legal to compare to other things it was legally comparable to
300 * This returns whether the two sets have the same elements, regardless of
301 * the order they were inserted.
303 * But note that it's O(size()) worst case. You probably should just never
306 bool operator==(const sparse_id_set
& o
) const {
307 if (universe_size() != o
.universe_size()) return false;
308 if (size() != o
.size()) return false;
309 for (auto v
: *this) if (!o
.containsImpl(v
)) return false;
312 bool operator!=(const sparse_id_set
& o
) const { return !(*this == o
); }
315 * Union, difference and intersection operators.
317 * All of these operators are only provided as versions that modify the lhs
318 * in place. Idiomatic uses are going to involve updating id sets that
319 * already exist, so even with our move-construction support it will tend to
320 * involve allocations compared to mutation-based usage-styles.
322 * Union (|=) and difference (-=) are O(o.size()), while intersection (&=) is
325 * Pre: universe_size() == o.universe_size()
327 sparse_id_set
& operator|=(const sparse_id_set
& o
) {
328 assert(m_universe_size
== o
.m_universe_size
);
329 for (auto t
: o
) insert(t
);
332 sparse_id_set
& operator-=(const sparse_id_set
& o
) {
333 assert(m_universe_size
== o
.m_universe_size
);
334 for (auto t
: o
) erase(t
);
337 sparse_id_set
& operator&=(const sparse_id_set
& o
) {
338 assert(m_universe_size
== o
.m_universe_size
);
341 while (fwd
!= back
) {
343 if (!o
.containsImpl(dense()[fwd
])) {
344 auto const val
= dense()[--back
];
356 * Swap the contents of two sets.
358 * This function is unusual in that it does not have any preconditions about
359 * universe sizes matching.
361 void swap(sparse_id_set
& o
) noexcept
{
362 std::swap(m_universe_size
, o
.m_universe_size
);
363 std::swap(m_mem
, o
.m_mem
);
364 std::swap(m_next
, o
.m_next
);
368 * Convert a sparse id set to a std::string, intended for debug printing.
370 friend std::string
show(const sparse_id_set
& set
) {
371 using namespace folly::gen
;
373 | eachTo
<std::string
>()
374 | unsplit
<std::string
>(" ")
379 bool containsImpl(T t
) const {
380 assert(t
< m_universe_size
);
381 auto const didx
= sparse()[t
]; // may read uninitialized memory
382 return didx
< m_next
&& dense()[didx
] == t
;
386 T
* sparse() { return m_mem
; }
387 T
* dense() { return m_mem
+ m_universe_size
; }
388 const T
* sparse() const { return m_mem
; }
389 const T
* dense() const { return m_mem
+ m_universe_size
; }
397 //////////////////////////////////////////////////////////////////////
403 class KExtract
= sparse_id_detail::default_extract
<K
,LookupKey
>
405 struct sparse_id_map
{
406 using value_type
= std::pair
<const K
,V
>;
408 using const_iterator
= const value_type
*;
411 std::is_integral
<K
>::value
&& std::is_unsigned
<K
>::value
,
412 "sparse_id_set is intended for use with unsigned integer types"
416 * When constructing a sparse_id_map, you must provide a 'universe size' for
417 * the ids. This is one greater than the maximum key you'll insert into the
420 explicit sparse_id_map(K universe_size
)
421 : m_universe_size(universe_size
)
425 ? std::malloc(sizeof(K
) * universe_size
+
426 sizeof(value_type
) * universe_size
)
430 // Note: the sparse part of m_mem is deliberately uninitialized, but we do
431 // it for valgrind or asan builds.
432 #if FOLLY_SANITIZE || defined(VALGRIND)
434 std::memset(m_mem
, 0, sizeof(K
) * universe_size
);
439 if (!m_universe_size
) return;
440 if (!std::is_trivially_destructible
<V
>::value
) {
441 for (auto& kv
: *this) {
449 * Copy this map from `o'. Nothrow as long as V has a nothrow copy
452 * Post: operator==(o)
454 sparse_id_map(const sparse_id_map
& o
)
455 : m_universe_size
{o
.m_universe_size
}
459 ? std::malloc(sizeof(K
) * m_universe_size
+
460 sizeof(value_type
) * m_universe_size
)
465 auto initialize
= [&] {
466 for (; idx
< m_next
; ++idx
) {
467 new (&dense()[idx
]) value_type(o
.dense()[idx
]);
468 sparse()[o
.dense()[idx
].first
] = idx
;
471 if (std::is_trivially_destructible
<V
>::value
||
472 std::is_nothrow_copy_constructible
<V
>::value
) {
481 dense()[idx
].~value_type();
488 * Move construct a map from `o'. Leaves `o' in an unspecified but valid
489 * state. (Notably the universe size may be changed.)
493 sparse_id_map(sparse_id_map
&& o
) noexcept
494 : m_universe_size
{o
.m_universe_size
}
498 o
.m_universe_size
= 0;
506 * Copy assignment. Make this map equivalent to `o'.
508 * Strong exception guarantee.
510 sparse_id_map
& operator=(const sparse_id_map
& o
) {
511 sparse_id_map
tmp(o
);
517 * Move assign from `o'.
519 * Leaves `o' in an unspecified but valid state. Notably the universe size
524 sparse_id_map
& operator=(sparse_id_map
&& o
) noexcept
{
530 * Returns the universe size of this sparse_id_map. Once created, a map's
531 * universe size can not change unless you move-construct or move-assign from
534 size_type
universe_size() const { return m_universe_size
; }
537 * Iteration. Make sure you don't mutate the map while you're using its
540 * The order of elements in the map is guaranteed to be the same as the order
543 const_iterator
begin() const { return const_iterator(dense()); }
544 const_iterator
end() const { return const_iterator(dense() + m_next
); }
545 const_iterator
cbegin() const { return const_iterator(dense()); }
546 const_iterator
cend() const { return const_iterator(dense() + m_next
); }
549 * Since we iterate in insertion order, it might be convenient to ask what's
550 * at the front or the back. This class is definitely not a full model of
553 const value_type
& front() const {
557 const value_type
& back() const {
559 return dense()[m_next
- 1];
563 * Number of elements in this map.
565 size_type
size() const { return m_next
; }
568 * Returns: size() != 0
570 bool empty() const { return !size(); }
573 * Clear all members from the map. O(1) if V is trivially destructable,
579 if (!std::is_trivially_destructible
<V
>::value
) {
580 for (auto& kv
: *this) {
588 * Returns: whether this sparse_id_map contains a particular key. O(1).
590 bool contains(LookupKey lk
) const {
591 return containsImpl(KExtract()(lk
));
595 * Returns: whether this sparse_id_map contains a particular value. O(1).
596 * Does not require that the id is in range.
598 bool contains_safe(LookupKey lk
) const {
599 auto const k
= KExtract()(lk
);
600 return k
< m_universe_size
&& containsImpl(k
);
604 * Get a reference to the value for key `k', inserting it with a default
605 * constructed value if it doesn't exist. Strong guarantee.
607 V
& operator[](LookupKey lk
) {
608 auto const k
= KExtract()(lk
);
609 if (!containsImpl(k
)) insert(std::make_pair(k
, V
{}));
610 return dense()[sparse()[k
]].second
;
614 * Insert a new value into the set. O(1). Strong exception guarantee.
616 * Post: contains an element with id v.first
618 void insert(const value_type
& v
) {
619 assert(v
.first
< m_universe_size
);
620 if (containsImpl(v
.first
)) return;
621 new (&dense()[m_next
]) value_type(v
);
622 sparse()[v
.first
] = m_next
;
627 * Insert a new value into the set, moving it if we need it. O(1). Strong
628 * exception guarantee.
630 * Post: contains an element with id v.first
632 void insert(value_type
&& v
) {
633 assert(v
.first
< m_universe_size
);
634 if (containsImpl(v
.first
)) return;
635 new (&dense()[m_next
]) value_type(std::move(v
));
636 sparse()[v
.first
] = m_next
;
641 * Remove an element from the set, if it is a member. (Does not assume that
642 * it is.) No throw as long as V has a nothrow move assignment operator.
643 * Strong guarantee otherwise.
645 * Post: !contains(lk)
647 void erase(LookupKey lk
) {
648 auto const key
= KExtract()(lk
);
649 assert(key
< m_universe_size
);
650 // Move in back element and update sparse ptrs.
651 auto const didx
= sparse()[key
]; // possibly reads uninitialized mem
652 if (didx
>= m_next
|| dense()[didx
].first
!= key
) return;
653 auto& moving
= dense()[m_next
- 1];
654 auto const moved_key
= moving
.first
;
655 if (didx
< m_next
- 1) {
656 dense()[didx
].second
= std::move(moving
.second
);
657 const_cast<K
&>(dense()[didx
].first
) = moved_key
;
659 sparse()[moved_key
] = didx
;
660 dense()[m_next
- 1].~value_type();
662 // No need to write to sparse()[t]. If it's read, next and dense are
663 // rechecked to ensure it's actually relevant.
667 * Model EqualityComparable, as long as the value is EqualityComparable.
668 * Note that it's O(size()) worst case.
670 * This returns whether the two maps have equivalent key value pairs,
671 * regardless of the order they were inserted.
673 bool operator==(const sparse_id_map
& o
) const {
674 if (universe_size() != o
.universe_size()) return false;
675 if (size() != o
.size()) return false;
676 for (auto& kv
: *this) {
677 if (!o
.containsImpl(kv
.first
)) return false;
678 if (!(o
.dense()[o
.sparse()[kv
.first
]].second
== kv
.second
)) {
684 bool operator!=(const sparse_id_map
& o
) const { return !(*this == o
); }
687 * Merge a map into this one by intersecting the keys, and using a
688 * user-defined function to merge values that were present in both this and
689 * `o'. The user-defined value merge function should return a bool,
690 * indicating whether the value should be considered changed.
692 * Basic guarantee only. Nothrow if V has a nothrow move constructor or a
693 * nothrow copy constructor.
695 * Complexity: O(size()).
697 * Returns: true if this map changed keys, or if the user-supplied function
698 * returned true for any pair of values.
700 * Pre: universe_size() == o.universe_size()
703 bool merge(const sparse_id_map
& o
, Fun val_merge
) {
704 assert(m_universe_size
== o
.m_universe_size
);
706 auto changed
= false;
707 while (fwd
!= m_next
) {
708 assert(fwd
< m_next
);
709 auto const k
= dense()[fwd
].first
;
710 if (!o
.containsImpl(k
)) {
712 if (fwd
== m_next
- 1) { // Avoid self-move assigning values.
716 // Order here is important for exception safety: we can't decrement
717 // m_next until we've moved-from and then destroyed the old value.
718 auto& tomove
= dense()[m_next
- 1];
719 sparse()[tomove
.first
] = fwd
;
720 dense()[fwd
].second
= std::move(tomove
.second
);
721 const_cast<K
&>(dense()[fwd
].first
) = tomove
.first
;
722 tomove
.~value_type();
726 if (val_merge(dense()[fwd
].second
, o
.dense()[o
.sparse()[k
]].second
)) {
735 * Swap the contents of two maps.
737 void swap(sparse_id_map
& o
) noexcept
{
738 std::swap(m_universe_size
, o
.m_universe_size
);
739 std::swap(m_mem
, o
.m_mem
);
740 std::swap(m_next
, o
.m_next
);
744 bool containsImpl(K k
) const {
745 assert(k
< m_universe_size
);
746 auto const didx
= sparse()[k
]; // may read uninitialized memory
747 return didx
< m_next
&& dense()[didx
].first
== k
;
751 K
* sparse() { return static_cast<K
*>(m_mem
); }
752 const K
* sparse() const { return static_cast<K
*>(m_mem
); }
753 value_type
* dense() {
754 void* vpDense
= sparse() + m_universe_size
;
755 return static_cast<value_type
*>(vpDense
);
757 const value_type
* dense() const {
758 return const_cast<sparse_id_map
*>(this)->dense();
767 //////////////////////////////////////////////////////////////////////
769 // Non-member swaps for ADL swap idiom.
771 template<class T
, class LT
, class EX
>
772 void swap(sparse_id_set
<T
,LT
,EX
>& a
, sparse_id_set
<T
,LT
,EX
>& b
) {
776 template<class K
, class V
, class LK
, class LKE
>
777 void swap(sparse_id_map
<K
,V
,LK
,LKE
>& a
, sparse_id_map
<K
,V
,LK
,LKE
>& b
) {
781 //////////////////////////////////////////////////////////////////////