This fixes a bug in PHP/HH's crypt_blowfish implementation that can cause a short...
[hiphop-php.git] / hphp / util / sparse-id-containers.h
blob77d8c61d895cbea12c8a0592a5ffc69d97420111
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #pragma once
18 #include <cstdlib>
19 #include <cstring>
20 #include <cassert>
21 #include <algorithm>
22 #include <type_traits>
23 #include <utility>
25 #include <folly/CPortability.h>
26 #include <folly/gen/String.h>
28 #include "hphp/util/compilation-flags.h"
29 #include "hphp/util/safe-cast.h"
31 namespace HPHP {
33 //////////////////////////////////////////////////////////////////////
36 * Time-efficient representations for sparse sets and sparse maps keyed with
37 * integers from a known universe (i.e. values from zero to some maximum). It
38 * also has the peculiar (but maybe occasionally useful) property of iteration
39 * order matching insertion order as long as you haven't erased anything yet.
41 * Space consumption is O(universe), where universe is the maximum value the
42 * set can hold. See the constructor for more information on this. The set
43 * version has several member functions with preconditions relating to universe
44 * size.
46 * The datastructure implemented here is from this:
48 * http://dl.acm.org/citation.cfm?id=176484
50 * Some notes about this implementation:
52 * o The set doesn't support set complement. It's O(universe), so if you
53 * need it it's probably better to use a bitset of some sort.
55 * o Iterators are invalidated on any call to a non-const member function.
57 * o Moving from a set or map leaves it in an undetermined (but valid)
58 * state. Notably this includes potentially changing its universe size.
60 * o Set operations involving multiple sets are generally only legal if both
61 * sets have the same universe size. Exceptions are expressions relating
62 * to EqualityComparable, Assignable, and swap(), for reasons relating to
63 * moving potentially changing universe size.
65 * o Lookups and insertions can be done through a different type than the
66 * containers actually hold. This is to ease using non-integer types as
67 * "keys" in these classes, as long as they can be mapped down to ids. An
68 * 'extractor' function object type can be provided as a template
69 * parameter to control how the mapping works.
71 * Also, note that for very small universes, even if the bits are sparse
72 * there's a good chance you'll be better off with some kind of bitset than the
73 * set version of this.
77 //////////////////////////////////////////////////////////////////////
79 namespace sparse_id_detail {
81 template<class T, class Lookup>
82 struct default_extract {
83 T operator()(Lookup l) const {
84 size_t convert = l;
85 return safe_cast<T>(convert);
91 //////////////////////////////////////////////////////////////////////
93 template<
94 class T,
95 class LookupT = T,
96 class Extract = sparse_id_detail::default_extract<T,LookupT>
98 struct sparse_id_set {
99 using value_type = T;
100 using size_type = value_type;
101 using const_iterator = const T*;
103 static_assert(
104 std::is_integral<T>::value && std::is_unsigned<T>::value,
105 "sparse_id_set is intended for use with unsigned integer types"
109 * When constructing a sparse_id_set, you must provide a 'universe size' for
110 * the ids. This is one greater than the maximum value you'll insert into
111 * the set.
113 * All functions dealing with values have a precondition that the values fit
114 * in the universe size, and most functions involving multiple sparse_id_sets
115 * (essentially everything except swap) will have a precondition that the two
116 * sets have the same universe size.
118 explicit sparse_id_set(T universe_size)
119 : m_universe_size{universe_size}
120 , m_next{0}
121 , m_mem{
122 universe_size
123 ? static_cast<T*>(std::malloc(sizeof(T) * universe_size * 2))
124 : nullptr
127 // Note: the sparse part of m_mem is deliberately uninitialized, but we do
128 // it for valgrind or asan builds.
129 #if FOLLY_SANITIZE || defined(VALGRIND)
130 if (m_mem) {
131 std::memset(m_mem, 0, sizeof(T) * universe_size);
133 #endif
135 ~sparse_id_set() { if (m_universe_size) std::free(m_mem); }
138 * Copy this set from `o'.
140 * Post: operator==(o)
142 sparse_id_set(const sparse_id_set& o)
143 : sparse_id_set(o.m_universe_size)
145 *this |= o;
149 * Move construct a set from `o'.
151 * The set `o' is left in an unspecified but valid state. It's
152 * universe_size() is not even guaranteed to be the same after it is
153 * moved from.
155 sparse_id_set(sparse_id_set&& o) noexcept
156 : m_universe_size{o.m_universe_size}
157 , m_next{o.m_next}
158 , m_mem{o.m_mem}
160 o.m_universe_size = 0;
161 if (debug) {
162 o.m_mem = nullptr;
163 o.m_next = 0;
168 * Copy assignment.
170 * Post: operator==(o)
172 sparse_id_set& operator=(const sparse_id_set& o) {
173 if (m_universe_size == o.m_universe_size) {
174 clear();
175 *this |= o;
176 return *this;
178 sparse_id_set tmp(o);
179 swap(o);
180 return *this;
184 * Move assignment.
186 * Leaves `o' in an unspecified, but valid state. It may not have the same
187 * universe size that it had before being moved from.
189 sparse_id_set& operator=(sparse_id_set&& o) noexcept {
190 swap(o);
191 if (debug) {
192 // Make sure no one relies on the universe staying the same.
193 sparse_id_set tmp(0);
194 tmp.swap(o);
196 return *this;
200 * Returns the universe size of this sparse_id_set. Once created, a set's
201 * universe size can not change unless you move-construct or move-assign from
202 * it.
204 size_type universe_size() const { return m_universe_size; }
207 * Iteration. Make sure you don't mutate the set while you're using its
208 * iterators.
210 * The order of elements in the set is guaranteed to be the same as the order
211 * of insertion.
213 const_iterator begin() const { return const_iterator(dense()); }
214 const_iterator end() const { return const_iterator(dense() + m_next); }
215 const_iterator cbegin() const { return const_iterator(dense()); }
216 const_iterator cend() const { return const_iterator(dense() + m_next); }
219 * Since we iterate in insertion order, it might be convenient to ask what's
220 * at the front or the back. This class is definitely not a full model of
221 * Sequence, however.
223 T front() const { assert(!empty()); return dense()[0]; }
224 T back() const { assert(!empty()); return dense()[m_next - 1]; }
227 * Number of elements in this set.
229 size_type size() const { return m_next; }
232 * Returns: size() != 0
234 bool empty() const { return !size(); }
237 * Clear all members from the set. O(1).
239 * Post: empty()
241 void clear() { m_next = 0; }
244 * Returns: whether this sparse_id_set contains a particular value. O(1).
246 bool contains(LookupT lt) const {
247 return containsImpl(Extract()(lt));
251 * Returns: whether this sparse_id_set contains a particular value. O(1).
252 * Does not require that the id is in range.
254 bool contains_safe(LookupT lt) const {
255 auto const t = Extract()(lt);
256 return t < m_universe_size && containsImpl(t);
260 * Insert a new value into the set. O(1)
262 * Post: contains an element with the id of `lt'
264 void insert(LookupT lt) {
265 auto const t = Extract()(lt);
266 assert(t < m_universe_size);
267 if (containsImpl(t)) return;
268 dense()[m_next] = t;
269 sparse()[t] = m_next;
270 ++m_next;
274 * Remove an element from the set, if it is a member. (Does not assume that
275 * it is.)
277 * Post: !contains(lt)
279 void erase(LookupT lt) {
280 auto const t = Extract()(lt);
281 assert(t < m_universe_size);
282 // Swap with back element and update sparse ptrs.
283 auto const didx = sparse()[t]; // possibly reads uninitialized mem
284 if (didx >= m_next || dense()[didx] != t) return;
285 auto const moving = dense()[m_next - 1];
286 sparse()[moving] = didx;
287 dense()[didx] = moving;
288 --m_next;
289 // No need to write to sparse()[t]. If it's read, next and dense are
290 // rechecked to ensure it's actually relevant.
294 * These sets are EqualityComparable, even if they aren't from the save
295 * universe. (Rationale: if you move construct from something it's nice that
296 * it is still legal to compare to other things it was legally comparable to
297 * before that.)
299 * This returns whether the two sets have the same elements, regardless of
300 * the order they were inserted.
302 * But note that it's O(size()) worst case. You probably should just never
303 * use this function.
305 bool operator==(const sparse_id_set& o) const {
306 if (universe_size() != o.universe_size()) return false;
307 if (size() != o.size()) return false;
308 for (auto v : *this) if (!o.containsImpl(v)) return false;
309 return true;
311 bool operator!=(const sparse_id_set& o) const { return !(*this == o); }
314 * Union, difference and intersection operators.
316 * All of these operators are only provided as versions that modify the lhs
317 * in place. Idiomatic uses are going to involve updating id sets that
318 * already exist, so even with our move-construction support it will tend to
319 * involve allocations compared to mutation-based usage-styles.
321 * Union (|=) and difference (-=) are O(o.size()), while intersection (&=) is
322 * O(this->size()).
324 * Pre: universe_size() == o.universe_size()
326 sparse_id_set& operator|=(const sparse_id_set& o) {
327 assert(m_universe_size == o.m_universe_size);
328 for (auto t : o) insert(t);
329 return *this;
331 sparse_id_set& operator-=(const sparse_id_set& o) {
332 assert(m_universe_size == o.m_universe_size);
333 for (auto t : o) erase(t);
334 return *this;
336 sparse_id_set& operator&=(const sparse_id_set& o) {
337 assert(m_universe_size == o.m_universe_size);
338 auto fwd = T{0};
339 auto back = m_next;
340 while (fwd != back) {
341 assert(fwd < back);
342 if (!o.containsImpl(dense()[fwd])) {
343 auto const val = dense()[--back];
344 sparse()[val] = fwd;
345 dense()[fwd] = val;
346 } else {
347 ++fwd;
350 m_next = back;
351 return *this;
355 * Swap the contents of two sets.
357 * This function is unusual in that it does not have any preconditions about
358 * universe sizes matching.
360 void swap(sparse_id_set& o) noexcept {
361 std::swap(m_universe_size, o.m_universe_size);
362 std::swap(m_mem, o.m_mem);
363 std::swap(m_next, o.m_next);
367 * Convert a sparse id set to a std::string, intended for debug printing.
369 friend std::string show(const sparse_id_set& set) {
370 using namespace folly::gen;
371 return from(set)
372 | eachTo<std::string>()
373 | unsplit<std::string>(" ")
377 private:
378 bool containsImpl(T t) const {
379 assert(t < m_universe_size);
380 auto const didx = sparse()[t]; // may read uninitialized memory
381 return didx < m_next && dense()[didx] == t;
384 private:
385 T* sparse() { return m_mem; }
386 T* dense() { return m_mem + m_universe_size; }
387 const T* sparse() const { return m_mem; }
388 const T* dense() const { return m_mem + m_universe_size; }
390 private:
391 T m_universe_size;
392 T m_next;
393 T* m_mem;
396 //////////////////////////////////////////////////////////////////////
398 template<
399 class K,
400 class V,
401 class LookupKey = K,
402 class KExtract = sparse_id_detail::default_extract<K,LookupKey>
404 struct sparse_id_map {
405 using value_type = std::pair<const K,V>;
406 using size_type = K;
407 using const_iterator = const value_type*;
409 static_assert(
410 std::is_integral<K>::value && std::is_unsigned<K>::value,
411 "sparse_id_set is intended for use with unsigned integer types"
415 * When constructing a sparse_id_map, you must provide a 'universe size' for
416 * the ids. This is one greater than the maximum key you'll insert into the
417 * map.
419 explicit sparse_id_map(K universe_size)
420 : m_universe_size(universe_size)
421 , m_next{0}
422 , m_mem{
423 universe_size
424 ? std::malloc(sizeof(K) * universe_size +
425 sizeof(value_type) * universe_size)
426 : nullptr
429 // Note: the sparse part of m_mem is deliberately uninitialized, but we do
430 // it for valgrind or asan builds.
431 #if FOLLY_SANITIZE || defined(VALGRIND)
432 if (m_mem) {
433 std::memset(m_mem, 0, sizeof(K) * universe_size);
435 #endif
437 ~sparse_id_map() {
438 if (!m_universe_size) return;
439 if (!std::is_trivially_destructible<V>::value) {
440 for (auto& kv : *this) {
441 kv.~value_type();
444 std::free(m_mem);
448 * Copy this map from `o'. Nothrow as long as V has a nothrow copy
449 * constructor.
451 * Post: operator==(o)
453 sparse_id_map(const sparse_id_map& o)
454 : m_universe_size{o.m_universe_size}
455 , m_next{o.m_next}
456 , m_mem{
457 m_universe_size
458 ? std::malloc(sizeof(K) * m_universe_size +
459 sizeof(value_type) * m_universe_size)
460 : nullptr
463 auto idx = K{0};
464 auto initialize = [&] {
465 for (; idx < m_next; ++idx) {
466 new (&dense()[idx]) value_type(o.dense()[idx]);
467 sparse()[o.dense()[idx].first] = idx;
470 if (std::is_trivially_destructible<V>::value ||
471 std::is_nothrow_copy_constructible<V>::value) {
472 initialize();
473 return;
476 try {
477 initialize();
478 } catch (...) {
479 while (idx-- > 0) {
480 dense()[idx].~value_type();
482 throw;
487 * Move construct a map from `o'. Leaves `o' in an unspecified but valid
488 * state. (Notably the universe size may be changed.)
490 * Nothrow guarantee.
492 sparse_id_map(sparse_id_map&& o) noexcept
493 : m_universe_size{o.m_universe_size}
494 , m_next{o.m_next}
495 , m_mem{o.m_mem}
497 o.m_universe_size = 0;
498 if (debug) {
499 o.m_mem = nullptr;
500 o.m_next = 0;
505 * Copy assignment. Make this map equivalent to `o'.
507 * Strong exception guarantee.
509 sparse_id_map& operator=(const sparse_id_map& o) {
510 sparse_id_map tmp(o);
511 swap(tmp);
512 return *this;
516 * Move assign from `o'.
518 * Leaves `o' in an unspecified but valid state. Notably the universe size
519 * may be changed.
521 * Nothrow guarantee.
523 sparse_id_map& operator=(sparse_id_map&& o) noexcept {
524 swap(o);
525 return *this;
529 * Returns the universe size of this sparse_id_map. Once created, a map's
530 * universe size can not change unless you move-construct or move-assign from
531 * it.
533 size_type universe_size() const { return m_universe_size; }
536 * Iteration. Make sure you don't mutate the map while you're using its
537 * iterators.
539 * The order of elements in the map is guaranteed to be the same as the order
540 * of insertion.
542 const_iterator begin() const { return const_iterator(dense()); }
543 const_iterator end() const { return const_iterator(dense() + m_next); }
544 const_iterator cbegin() const { return const_iterator(dense()); }
545 const_iterator cend() const { return const_iterator(dense() + m_next); }
548 * Since we iterate in insertion order, it might be convenient to ask what's
549 * at the front or the back. This class is definitely not a full model of
550 * Sequence, however.
552 const value_type& front() const {
553 assert(!empty());
554 return dense()[0];
556 const value_type& back() const {
557 assert(!empty());
558 return dense()[m_next - 1];
562 * Number of elements in this map.
564 size_type size() const { return m_next; }
567 * Returns: size() != 0
569 bool empty() const { return !size(); }
572 * Clear all members from the map. O(1) if V is trivially destructible,
573 * O(size()) if not.
575 * Post: empty()
577 void clear() {
578 if (!std::is_trivially_destructible<V>::value) {
579 for (auto& kv : *this) {
580 kv.~value_type();
583 m_next = 0;
587 * Returns: whether this sparse_id_map contains a particular key. O(1).
589 bool contains(LookupKey lk) const {
590 return containsImpl(KExtract()(lk));
594 * Returns: whether this sparse_id_map contains a particular value. O(1).
595 * Does not require that the id is in range.
597 bool contains_safe(LookupKey lk) const {
598 auto const k = KExtract()(lk);
599 return k < m_universe_size && containsImpl(k);
603 * Get a reference to the value for key `k', inserting it with a default
604 * constructed value if it doesn't exist. Strong guarantee.
606 V& operator[](LookupKey lk) {
607 auto const k = KExtract()(lk);
608 if (!containsImpl(k)) insert(std::make_pair(k, V{}));
609 return dense()[sparse()[k]].second;
613 * Insert a new value into the set. O(1). Strong exception guarantee.
615 * Post: contains an element with id v.first
617 void insert(const value_type& v) {
618 assert(v.first < m_universe_size);
619 if (containsImpl(v.first)) return;
620 new (&dense()[m_next]) value_type(v);
621 sparse()[v.first] = m_next;
622 ++m_next;
626 * Insert a new value into the set, moving it if we need it. O(1). Strong
627 * exception guarantee.
629 * Post: contains an element with id v.first
631 void insert(value_type&& v) {
632 assert(v.first < m_universe_size);
633 if (containsImpl(v.first)) return;
634 new (&dense()[m_next]) value_type(std::move(v));
635 sparse()[v.first] = m_next;
636 ++m_next;
640 * Remove an element from the set, if it is a member. (Does not assume that
641 * it is.) No throw as long as V has a nothrow move assignment operator.
642 * Strong guarantee otherwise.
644 * Post: !contains(lk)
646 void erase(LookupKey lk) {
647 auto const key = KExtract()(lk);
648 assert(key < m_universe_size);
649 // Move in back element and update sparse ptrs.
650 auto const didx = sparse()[key]; // possibly reads uninitialized mem
651 if (didx >= m_next || dense()[didx].first != key) return;
652 auto& moving = dense()[m_next - 1];
653 auto const moved_key = moving.first;
654 if (didx < m_next - 1) {
655 dense()[didx].second = std::move(moving.second);
656 const_cast<K&>(dense()[didx].first) = moved_key;
658 sparse()[moved_key] = didx;
659 dense()[m_next - 1].~value_type();
660 --m_next;
661 // No need to write to sparse()[t]. If it's read, next and dense are
662 // rechecked to ensure it's actually relevant.
666 * Model EqualityComparable, as long as the value is EqualityComparable.
667 * Note that it's O(size()) worst case.
669 * This returns whether the two maps have equivalent key value pairs,
670 * regardless of the order they were inserted.
672 bool operator==(const sparse_id_map& o) const {
673 if (universe_size() != o.universe_size()) return false;
674 if (size() != o.size()) return false;
675 for (auto& kv : *this) {
676 if (!o.containsImpl(kv.first)) return false;
677 if (!(o.dense()[o.sparse()[kv.first]].second == kv.second)) {
678 return false;
681 return true;
683 bool operator!=(const sparse_id_map& o) const { return !(*this == o); }
686 * Merge a map into this one by intersecting the keys, and using a
687 * user-defined function to merge values that were present in both this and
688 * `o'. The user-defined value merge function should return a bool,
689 * indicating whether the value should be considered changed.
691 * Basic guarantee only. Nothrow if V has a nothrow move constructor or a
692 * nothrow copy constructor.
694 * Complexity: O(size()).
696 * Returns: true if this map changed keys, or if the user-supplied function
697 * returned true for any pair of values.
699 * Pre: universe_size() == o.universe_size()
701 template<class Fun>
702 bool merge(const sparse_id_map& o, Fun val_merge) {
703 assert(m_universe_size == o.m_universe_size);
704 auto fwd = K{0};
705 auto changed = false;
706 while (fwd != m_next) {
707 assert(fwd < m_next);
708 auto const k = dense()[fwd].first;
709 if (!o.containsImpl(k)) {
710 changed = true;
711 if (fwd == m_next - 1) { // Avoid self-move assigning values.
712 --m_next;
713 continue;
715 // Order here is important for exception safety: we can't decrement
716 // m_next until we've moved-from and then destroyed the old value.
717 auto& tomove = dense()[m_next - 1];
718 sparse()[tomove.first] = fwd;
719 dense()[fwd].second = std::move(tomove.second);
720 const_cast<K&>(dense()[fwd].first) = tomove.first;
721 tomove.~value_type();
722 --m_next;
723 continue;
725 if (val_merge(dense()[fwd].second, o.dense()[o.sparse()[k]].second)) {
726 changed = true;
728 ++fwd;
730 return changed;
734 * Swap the contents of two maps.
736 void swap(sparse_id_map& o) noexcept {
737 std::swap(m_universe_size, o.m_universe_size);
738 std::swap(m_mem, o.m_mem);
739 std::swap(m_next, o.m_next);
742 private:
743 bool containsImpl(K k) const {
744 assert(k < m_universe_size);
745 auto const didx = sparse()[k]; // may read uninitialized memory
746 return didx < m_next && dense()[didx].first == k;
749 private:
750 K* sparse() { return static_cast<K*>(m_mem); }
751 const K* sparse() const { return static_cast<K*>(m_mem); }
752 value_type* dense() {
753 void* vpDense = sparse() + m_universe_size;
754 return static_cast<value_type*>(vpDense);
756 const value_type* dense() const {
757 return const_cast<sparse_id_map*>(this)->dense();
760 private:
761 K m_universe_size;
762 K m_next;
763 void* m_mem;
766 //////////////////////////////////////////////////////////////////////
768 // Non-member swaps for ADL swap idiom.
770 template<class T, class LT, class EX>
771 void swap(sparse_id_set<T,LT,EX>& a, sparse_id_set<T,LT,EX>& b) {
772 a.swap(b);
775 template<class K, class V, class LK, class LKE>
776 void swap(sparse_id_map<K,V,LK,LKE>& a, sparse_id_map<K,V,LK,LKE>& b) {
777 a.swap(b);
780 //////////////////////////////////////////////////////////////////////