enable coroutine for rust emitter
[hiphop-php.git] / hphp / util / sparse-id-containers.h
blobaad23ce781e7643d9091e2f256c30d73b0b92682
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #ifndef incl_HPHP_SPARSE_ID_CONTAINERS_H_
17 #define incl_HPHP_SPARSE_ID_CONTAINERS_H_
19 #include <cstdlib>
20 #include <cstring>
21 #include <cassert>
22 #include <algorithm>
23 #include <type_traits>
24 #include <utility>
26 #include <folly/CPortability.h>
27 #include <folly/gen/String.h>
29 #include "hphp/util/compilation-flags.h"
30 #include "hphp/util/safe-cast.h"
32 namespace HPHP {
34 //////////////////////////////////////////////////////////////////////
37 * Time-efficient representations for sparse sets and sparse maps keyed with
38 * integers from a known universe (i.e. values from zero to some maximum). It
39 * also has the peculiar (but maybe occasionally useful) property of iteration
40 * order matching insertion order as long as you haven't erased anything yet.
42 * Space consumption is O(universe), where universe is the maximum value the
43 * set can hold. See the constructor for more information on this. The set
44 * version has several member functions with preconditions relating to universe
45 * size.
47 * The datastructure implemented here is from this:
49 * http://dl.acm.org/citation.cfm?id=176484
51 * Some notes about this implementation:
53 * o The set doesn't support set complement. It's O(universe), so if you
54 * need it it's probably better to use a bitset of some sort.
56 * o Iterators are invalidated on any call to a non-const member function.
58 * o Moving from a set or map leaves it in an undetermined (but valid)
59 * state. Notably this includes potentially changing its universe size.
61 * o Set operations involving multiple sets are generally only legal if both
62 * sets have the same universe size. Exceptions are expressions relating
63 * to EqualityComparable, Assignable, and swap(), for reasons relating to
64 * moving potentially changing universe size.
66 * o Lookups and insertions can be done through a different type than the
67 * containers actually hold. This is to ease using non-integer types as
68 * "keys" in these classes, as long as they can be mapped down to ids. An
69 * 'extractor' function object type can be provided as a template
70 * parameter to control how the mapping works.
72 * Also, note that for very small universes, even if the bits are sparse
73 * there's a good chance you'll be better off with some kind of bitset than the
74 * set version of this.
78 //////////////////////////////////////////////////////////////////////
80 namespace sparse_id_detail {
82 template<class T, class Lookup>
83 struct default_extract {
84 T operator()(Lookup l) const {
85 size_t convert = l;
86 return safe_cast<T>(convert);
92 //////////////////////////////////////////////////////////////////////
94 template<
95 class T,
96 class LookupT = T,
97 class Extract = sparse_id_detail::default_extract<T,LookupT>
99 struct sparse_id_set {
100 using value_type = T;
101 using size_type = value_type;
102 using const_iterator = const T*;
104 static_assert(
105 std::is_integral<T>::value && std::is_unsigned<T>::value,
106 "sparse_id_set is intended for use with unsigned integer types"
110 * When constructing a sparse_id_set, you must provide a 'universe size' for
111 * the ids. This is one greater than the maximum value you'll insert into
112 * the set.
114 * All functions dealing with values have a precondition that the values fit
115 * in the universe size, and most functions involving multiple sparse_id_sets
116 * (essentially everything except swap) will have a precondition that the two
117 * sets have the same universe size.
119 explicit sparse_id_set(T universe_size)
120 : m_universe_size{universe_size}
121 , m_next{0}
122 , m_mem{
123 universe_size
124 ? static_cast<T*>(std::malloc(sizeof(T) * universe_size * 2))
125 : nullptr
128 // Note: the sparse part of m_mem is deliberately uninitialized, but we do
129 // it for valgrind or asan builds.
130 #if FOLLY_SANITIZE || defined(VALGRIND)
131 if (m_mem) {
132 std::memset(m_mem, 0, sizeof(T) * universe_size);
134 #endif
136 ~sparse_id_set() { if (m_universe_size) std::free(m_mem); }
139 * Copy this set from `o'.
141 * Post: operator==(o)
143 sparse_id_set(const sparse_id_set& o)
144 : sparse_id_set(o.m_universe_size)
146 *this |= o;
150 * Move construct a set from `o'.
152 * The set `o' is left in an unspecified but valid state. It's
153 * universe_size() is not even guaranteed to be the same after it is
154 * moved from.
156 sparse_id_set(sparse_id_set&& o) noexcept
157 : m_universe_size{o.m_universe_size}
158 , m_next{o.m_next}
159 , m_mem{o.m_mem}
161 o.m_universe_size = 0;
162 if (debug) {
163 o.m_mem = nullptr;
164 o.m_next = 0;
169 * Copy assignment.
171 * Post: operator==(o)
173 sparse_id_set& operator=(const sparse_id_set& o) {
174 if (m_universe_size == o.m_universe_size) {
175 clear();
176 *this |= o;
177 return *this;
179 sparse_id_set tmp(o);
180 swap(o);
181 return *this;
185 * Move assignment.
187 * Leaves `o' in an unspecified, but valid state. It may not have the same
188 * universe size that it had before being moved from.
190 sparse_id_set& operator=(sparse_id_set&& o) noexcept {
191 swap(o);
192 if (debug) {
193 // Make sure no one relies on the universe staying the same.
194 sparse_id_set tmp(0);
195 tmp.swap(o);
197 return *this;
201 * Returns the universe size of this sparse_id_set. Once created, a set's
202 * universe size can not change unless you move-construct or move-assign from
203 * it.
205 size_type universe_size() const { return m_universe_size; }
208 * Iteration. Make sure you don't mutate the set while you're using its
209 * iterators.
211 * The order of elements in the set is guaranteed to be the same as the order
212 * of insertion.
214 const_iterator begin() const { return const_iterator(dense()); }
215 const_iterator end() const { return const_iterator(dense() + m_next); }
216 const_iterator cbegin() const { return const_iterator(dense()); }
217 const_iterator cend() const { return const_iterator(dense() + m_next); }
220 * Since we iterate in insertion order, it might be convenient to ask what's
221 * at the front or the back. This class is definitely not a full model of
222 * Sequence, however.
224 T front() const { assert(!empty()); return dense()[0]; }
225 T back() const { assert(!empty()); return dense()[m_next - 1]; }
228 * Number of elements in this set.
230 size_type size() const { return m_next; }
233 * Returns: size() != 0
235 bool empty() const { return !size(); }
238 * Clear all members from the set. O(1).
240 * Post: empty()
242 void clear() { m_next = 0; }
245 * Returns: whether this sparse_id_set contains a particular value. O(1).
247 bool contains(LookupT lt) const {
248 return containsImpl(Extract()(lt));
252 * Returns: whether this sparse_id_set contains a particular value. O(1).
253 * Does not require that the id is in range.
255 bool contains_safe(LookupT lt) const {
256 auto const t = Extract()(lt);
257 return t < m_universe_size && containsImpl(t);
261 * Insert a new value into the set. O(1)
263 * Post: contains an element with the id of `lt'
265 void insert(LookupT lt) {
266 auto const t = Extract()(lt);
267 assert(t < m_universe_size);
268 if (containsImpl(t)) return;
269 dense()[m_next] = t;
270 sparse()[t] = m_next;
271 ++m_next;
275 * Remove an element from the set, if it is a member. (Does not assume that
276 * it is.)
278 * Post: !contains(lt)
280 void erase(LookupT lt) {
281 auto const t = Extract()(lt);
282 assert(t < m_universe_size);
283 // Swap with back element and update sparse ptrs.
284 auto const didx = sparse()[t]; // possibly reads uninitialized mem
285 if (didx >= m_next || dense()[didx] != t) return;
286 auto const moving = dense()[m_next - 1];
287 sparse()[moving] = didx;
288 dense()[didx] = moving;
289 --m_next;
290 // No need to write to sparse()[t]. If it's read, next and dense are
291 // rechecked to ensure it's actually relevant.
295 * These sets are EqualityComparable, even if they aren't from the save
296 * universe. (Rationale: if you move construct from something it's nice that
297 * it is still legal to compare to other things it was legally comparable to
298 * before that.)
300 * This returns whether the two sets have the same elements, regardless of
301 * the order they were inserted.
303 * But note that it's O(size()) worst case. You probably should just never
304 * use this function.
306 bool operator==(const sparse_id_set& o) const {
307 if (universe_size() != o.universe_size()) return false;
308 if (size() != o.size()) return false;
309 for (auto v : *this) if (!o.containsImpl(v)) return false;
310 return true;
312 bool operator!=(const sparse_id_set& o) const { return !(*this == o); }
315 * Union, difference and intersection operators.
317 * All of these operators are only provided as versions that modify the lhs
318 * in place. Idiomatic uses are going to involve updating id sets that
319 * already exist, so even with our move-construction support it will tend to
320 * involve allocations compared to mutation-based usage-styles.
322 * Union (|=) and difference (-=) are O(o.size()), while intersection (&=) is
323 * O(this->size()).
325 * Pre: universe_size() == o.universe_size()
327 sparse_id_set& operator|=(const sparse_id_set& o) {
328 assert(m_universe_size == o.m_universe_size);
329 for (auto t : o) insert(t);
330 return *this;
332 sparse_id_set& operator-=(const sparse_id_set& o) {
333 assert(m_universe_size == o.m_universe_size);
334 for (auto t : o) erase(t);
335 return *this;
337 sparse_id_set& operator&=(const sparse_id_set& o) {
338 assert(m_universe_size == o.m_universe_size);
339 auto fwd = T{0};
340 auto back = m_next;
341 while (fwd != back) {
342 assert(fwd < back);
343 if (!o.containsImpl(dense()[fwd])) {
344 auto const val = dense()[--back];
345 sparse()[val] = fwd;
346 dense()[fwd] = val;
347 } else {
348 ++fwd;
351 m_next = back;
352 return *this;
356 * Swap the contents of two sets.
358 * This function is unusual in that it does not have any preconditions about
359 * universe sizes matching.
361 void swap(sparse_id_set& o) noexcept {
362 std::swap(m_universe_size, o.m_universe_size);
363 std::swap(m_mem, o.m_mem);
364 std::swap(m_next, o.m_next);
368 * Convert a sparse id set to a std::string, intended for debug printing.
370 friend std::string show(const sparse_id_set& set) {
371 using namespace folly::gen;
372 return from(set)
373 | eachTo<std::string>()
374 | unsplit<std::string>(" ")
378 private:
379 bool containsImpl(T t) const {
380 assert(t < m_universe_size);
381 auto const didx = sparse()[t]; // may read uninitialized memory
382 return didx < m_next && dense()[didx] == t;
385 private:
386 T* sparse() { return m_mem; }
387 T* dense() { return m_mem + m_universe_size; }
388 const T* sparse() const { return m_mem; }
389 const T* dense() const { return m_mem + m_universe_size; }
391 private:
392 T m_universe_size;
393 T m_next;
394 T* m_mem;
397 //////////////////////////////////////////////////////////////////////
399 template<
400 class K,
401 class V,
402 class LookupKey = K,
403 class KExtract = sparse_id_detail::default_extract<K,LookupKey>
405 struct sparse_id_map {
406 using value_type = std::pair<const K,V>;
407 using size_type = K;
408 using const_iterator = const value_type*;
410 static_assert(
411 std::is_integral<K>::value && std::is_unsigned<K>::value,
412 "sparse_id_set is intended for use with unsigned integer types"
416 * When constructing a sparse_id_map, you must provide a 'universe size' for
417 * the ids. This is one greater than the maximum key you'll insert into the
418 * map.
420 explicit sparse_id_map(K universe_size)
421 : m_universe_size(universe_size)
422 , m_next{0}
423 , m_mem{
424 universe_size
425 ? std::malloc(sizeof(K) * universe_size +
426 sizeof(value_type) * universe_size)
427 : nullptr
430 // Note: the sparse part of m_mem is deliberately uninitialized, but we do
431 // it for valgrind or asan builds.
432 #if FOLLY_SANITIZE || defined(VALGRIND)
433 if (m_mem) {
434 std::memset(m_mem, 0, sizeof(K) * universe_size);
436 #endif
438 ~sparse_id_map() {
439 if (!m_universe_size) return;
440 if (!std::is_trivially_destructible<V>::value) {
441 for (auto& kv : *this) {
442 kv.~value_type();
445 std::free(m_mem);
449 * Copy this map from `o'. Nothrow as long as V has a nothrow copy
450 * constructor.
452 * Post: operator==(o)
454 sparse_id_map(const sparse_id_map& o)
455 : m_universe_size{o.m_universe_size}
456 , m_next{o.m_next}
457 , m_mem{
458 m_universe_size
459 ? std::malloc(sizeof(K) * m_universe_size +
460 sizeof(value_type) * m_universe_size)
461 : nullptr
464 auto idx = K{0};
465 auto initialize = [&] {
466 for (; idx < m_next; ++idx) {
467 new (&dense()[idx]) value_type(o.dense()[idx]);
468 sparse()[o.dense()[idx].first] = idx;
471 if (std::is_trivially_destructible<V>::value ||
472 std::is_nothrow_copy_constructible<V>::value) {
473 initialize();
474 return;
477 try {
478 initialize();
479 } catch (...) {
480 while (idx-- > 0) {
481 dense()[idx].~value_type();
483 throw;
488 * Move construct a map from `o'. Leaves `o' in an unspecified but valid
489 * state. (Notably the universe size may be changed.)
491 * Nothrow guarantee.
493 sparse_id_map(sparse_id_map&& o) noexcept
494 : m_universe_size{o.m_universe_size}
495 , m_next{o.m_next}
496 , m_mem{o.m_mem}
498 o.m_universe_size = 0;
499 if (debug) {
500 o.m_mem = nullptr;
501 o.m_next = 0;
506 * Copy assignment. Make this map equivalent to `o'.
508 * Strong exception guarantee.
510 sparse_id_map& operator=(const sparse_id_map& o) {
511 sparse_id_map tmp(o);
512 swap(tmp);
513 return *this;
517 * Move assign from `o'.
519 * Leaves `o' in an unspecified but valid state. Notably the universe size
520 * may be changed.
522 * Nothrow guarantee.
524 sparse_id_map& operator=(sparse_id_map&& o) noexcept {
525 swap(o);
526 return *this;
530 * Returns the universe size of this sparse_id_map. Once created, a map's
531 * universe size can not change unless you move-construct or move-assign from
532 * it.
534 size_type universe_size() const { return m_universe_size; }
537 * Iteration. Make sure you don't mutate the map while you're using its
538 * iterators.
540 * The order of elements in the map is guaranteed to be the same as the order
541 * of insertion.
543 const_iterator begin() const { return const_iterator(dense()); }
544 const_iterator end() const { return const_iterator(dense() + m_next); }
545 const_iterator cbegin() const { return const_iterator(dense()); }
546 const_iterator cend() const { return const_iterator(dense() + m_next); }
549 * Since we iterate in insertion order, it might be convenient to ask what's
550 * at the front or the back. This class is definitely not a full model of
551 * Sequence, however.
553 const value_type& front() const {
554 assert(!empty());
555 return dense()[0];
557 const value_type& back() const {
558 assert(!empty());
559 return dense()[m_next - 1];
563 * Number of elements in this map.
565 size_type size() const { return m_next; }
568 * Returns: size() != 0
570 bool empty() const { return !size(); }
573 * Clear all members from the map. O(1) if V is trivially destructable,
574 * O(size()) if not.
576 * Post: empty()
578 void clear() {
579 if (!std::is_trivially_destructible<V>::value) {
580 for (auto& kv : *this) {
581 kv.~value_type();
584 m_next = 0;
588 * Returns: whether this sparse_id_map contains a particular key. O(1).
590 bool contains(LookupKey lk) const {
591 return containsImpl(KExtract()(lk));
595 * Returns: whether this sparse_id_map contains a particular value. O(1).
596 * Does not require that the id is in range.
598 bool contains_safe(LookupKey lk) const {
599 auto const k = KExtract()(lk);
600 return k < m_universe_size && containsImpl(k);
604 * Get a reference to the value for key `k', inserting it with a default
605 * constructed value if it doesn't exist. Strong guarantee.
607 V& operator[](LookupKey lk) {
608 auto const k = KExtract()(lk);
609 if (!containsImpl(k)) insert(std::make_pair(k, V{}));
610 return dense()[sparse()[k]].second;
614 * Insert a new value into the set. O(1). Strong exception guarantee.
616 * Post: contains an element with id v.first
618 void insert(const value_type& v) {
619 assert(v.first < m_universe_size);
620 if (containsImpl(v.first)) return;
621 new (&dense()[m_next]) value_type(v);
622 sparse()[v.first] = m_next;
623 ++m_next;
627 * Insert a new value into the set, moving it if we need it. O(1). Strong
628 * exception guarantee.
630 * Post: contains an element with id v.first
632 void insert(value_type&& v) {
633 assert(v.first < m_universe_size);
634 if (containsImpl(v.first)) return;
635 new (&dense()[m_next]) value_type(std::move(v));
636 sparse()[v.first] = m_next;
637 ++m_next;
641 * Remove an element from the set, if it is a member. (Does not assume that
642 * it is.) No throw as long as V has a nothrow move assignment operator.
643 * Strong guarantee otherwise.
645 * Post: !contains(lk)
647 void erase(LookupKey lk) {
648 auto const key = KExtract()(lk);
649 assert(key < m_universe_size);
650 // Move in back element and update sparse ptrs.
651 auto const didx = sparse()[key]; // possibly reads uninitialized mem
652 if (didx >= m_next || dense()[didx].first != key) return;
653 auto& moving = dense()[m_next - 1];
654 auto const moved_key = moving.first;
655 if (didx < m_next - 1) {
656 dense()[didx].second = std::move(moving.second);
657 const_cast<K&>(dense()[didx].first) = moved_key;
659 sparse()[moved_key] = didx;
660 dense()[m_next - 1].~value_type();
661 --m_next;
662 // No need to write to sparse()[t]. If it's read, next and dense are
663 // rechecked to ensure it's actually relevant.
667 * Model EqualityComparable, as long as the value is EqualityComparable.
668 * Note that it's O(size()) worst case.
670 * This returns whether the two maps have equivalent key value pairs,
671 * regardless of the order they were inserted.
673 bool operator==(const sparse_id_map& o) const {
674 if (universe_size() != o.universe_size()) return false;
675 if (size() != o.size()) return false;
676 for (auto& kv : *this) {
677 if (!o.containsImpl(kv.first)) return false;
678 if (!(o.dense()[o.sparse()[kv.first]].second == kv.second)) {
679 return false;
682 return true;
684 bool operator!=(const sparse_id_map& o) const { return !(*this == o); }
687 * Merge a map into this one by intersecting the keys, and using a
688 * user-defined function to merge values that were present in both this and
689 * `o'. The user-defined value merge function should return a bool,
690 * indicating whether the value should be considered changed.
692 * Basic guarantee only. Nothrow if V has a nothrow move constructor or a
693 * nothrow copy constructor.
695 * Complexity: O(size()).
697 * Returns: true if this map changed keys, or if the user-supplied function
698 * returned true for any pair of values.
700 * Pre: universe_size() == o.universe_size()
702 template<class Fun>
703 bool merge(const sparse_id_map& o, Fun val_merge) {
704 assert(m_universe_size == o.m_universe_size);
705 auto fwd = K{0};
706 auto changed = false;
707 while (fwd != m_next) {
708 assert(fwd < m_next);
709 auto const k = dense()[fwd].first;
710 if (!o.containsImpl(k)) {
711 changed = true;
712 if (fwd == m_next - 1) { // Avoid self-move assigning values.
713 --m_next;
714 continue;
716 // Order here is important for exception safety: we can't decrement
717 // m_next until we've moved-from and then destroyed the old value.
718 auto& tomove = dense()[m_next - 1];
719 sparse()[tomove.first] = fwd;
720 dense()[fwd].second = std::move(tomove.second);
721 const_cast<K&>(dense()[fwd].first) = tomove.first;
722 tomove.~value_type();
723 --m_next;
724 continue;
726 if (val_merge(dense()[fwd].second, o.dense()[o.sparse()[k]].second)) {
727 changed = true;
729 ++fwd;
731 return changed;
735 * Swap the contents of two maps.
737 void swap(sparse_id_map& o) noexcept {
738 std::swap(m_universe_size, o.m_universe_size);
739 std::swap(m_mem, o.m_mem);
740 std::swap(m_next, o.m_next);
743 private:
744 bool containsImpl(K k) const {
745 assert(k < m_universe_size);
746 auto const didx = sparse()[k]; // may read uninitialized memory
747 return didx < m_next && dense()[didx].first == k;
750 private:
751 K* sparse() { return static_cast<K*>(m_mem); }
752 const K* sparse() const { return static_cast<K*>(m_mem); }
753 value_type* dense() {
754 void* vpDense = sparse() + m_universe_size;
755 return static_cast<value_type*>(vpDense);
757 const value_type* dense() const {
758 return const_cast<sparse_id_map*>(this)->dense();
761 private:
762 K m_universe_size;
763 K m_next;
764 void* m_mem;
767 //////////////////////////////////////////////////////////////////////
769 // Non-member swaps for ADL swap idiom.
771 template<class T, class LT, class EX>
772 void swap(sparse_id_set<T,LT,EX>& a, sparse_id_set<T,LT,EX>& b) {
773 a.swap(b);
776 template<class K, class V, class LK, class LKE>
777 void swap(sparse_id_map<K,V,LK,LKE>& a, sparse_id_map<K,V,LK,LKE>& b) {
778 a.swap(b);
781 //////////////////////////////////////////////////////////////////////
785 #endif