1 /* Copyright (C) 2012-2018 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
10 GCC is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
13 License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License
20 and a copy of the GCC Runtime Library Exception along with this
21 program; see the files COPYING3 and COPYING.RUNTIME respectively.
22 If not, see <http://www.gnu.org/licenses/>. */
27 /* Code in this file manages a collection of insert-only sets. We
28 have only tested the case where Key is uintptr_t, though it
29 theoretically should work for some other cases. All odd keys are
30 reserved, and must not be inserted into any of the sets. This code
31 is intended primarily for sets of pointers, and the code is
32 optimized for small sets (including size 0 and 1), but regardless
33 of the set size, insert() and contains() have close to O(1) speed
36 TODO(gpike): fix this comment.
38 Recommended multithreaded use of a set:
40 For speed, we want to use a lock-free test for set membership. The
41 code handles simultaneous reads and inserts, as long as at most one
42 insertion is in progress at a time. After an insert, other threads
43 may not immediately "see" the inserted key if they perform a
44 lock-free read, so we recommend retrying, as explained below.
46 Also, to make data corruption less likely, we recommend using a
47 "normal" RW page as well as one or pages that are typically RO
48 but that can be switched to RW and back as needed. The latter
49 pages should contain sets. The former should contain a lock, L,
50 and an int or similar, num_writers. Then, to insert, something
51 like this would be safe:
53 o Increment num_writers; if that made it 1, change pages to RW.
55 o while (there are insertions to do in some set, S) {
57 do some insertions in S;
61 o Decrement num_writers; if that made it 0, change pages to RO.
64 And to check if the set contains some key, one could use
66 ({ Acquire L; bool b = set.contains(key); Release L; b; })
68 In this scheme, the number of threads with reads in progress isn't
69 tracked, so old sets can never be deleted. In addition, on some
70 architectures the intentionally racy reads might cause contains()
71 to return true when it should have returned false. This should be
72 no problem on x86, and most other machines, where reading or
73 writing an aligned uintptr_t is atomic. E.g., on those machines,
74 if *p is 0 and one thread does *p = x while another reads *p, the
75 read will see either 0 or x.
77 To make the above easier, the insert_only_hash_sets class provides
78 an interface to manipulate any number of hash sets. One shouldn't
79 create objects of that class, as it has no member data and its
82 So the recommended model is to have a single lock, a single
83 num_writers variable, and some number of sets. If lock contention
84 becomes a problem then the sets can be divided into k groups, each
85 of which has a lock and a num_writers variable; or each set can be
86 represented as a set of values that equal 0 mod m, a set of values
87 that equal 1 mod m, ..., plus a set of values that equal m-1 mod m.
89 However, we expect most or all uses of this code to call contains()
90 much more frequently than anything else, so lock contention is
95 #ifndef HASHTABLE_STATS
96 #define HASHTABLE_STATS 0
99 #ifndef HASHTABLE_STATS_ATOMIC
100 #define HASHTABLE_STATS_ATOMIC 0
104 #if HASHTABLE_STATS_ATOMIC
105 /* Stat counters, with atomics. */
106 #include <bits/atomic_word.h>
108 typedef _Atomic_word _AtomicStatCounter
;
111 inc_by (_AtomicStatCounter
&stat
, int amount
)
113 __atomic_add_fetch (&stat
, amount
, __ATOMIC_ACQ_REL
);
118 /* Stat counters, but without atomics. */
119 typedef int _AtomicStatCounter
;
122 inc_by (_AtomicStatCounter
& stat
, int amount
)
130 /* Number of calls to contains(), insert(), etc. */
131 extern _AtomicStatCounter stat_insert
;
132 extern _AtomicStatCounter stat_contains
;
133 extern _AtomicStatCounter stat_resize
;
134 extern _AtomicStatCounter stat_create
;
136 /* Sum of set size over all calls to contains(). */
137 extern _AtomicStatCounter stat_contains_sizes
;
139 /* contains() calls in a set whose capacity is more than 1. */
140 extern _AtomicStatCounter stat_contains_in_non_trivial_set
;
142 /* Probes in a set whose capacity is more than 1. Ideally, this will
143 be pretty close to stat_contains_in_non_trivial_set. That will
144 happen if our hash function is good and/or important keys were
145 inserted before unimportant keys. */
146 extern _AtomicStatCounter stat_probes_in_non_trivial_set
;
148 /* number of calls to contains() with size=0, 1, etc. */
149 extern _AtomicStatCounter stat_contains_size0
;
150 extern _AtomicStatCounter stat_contains_size1
;
151 extern _AtomicStatCounter stat_contains_size2
;
152 extern _AtomicStatCounter stat_contains_size3
;
153 extern _AtomicStatCounter stat_contains_size4
;
154 extern _AtomicStatCounter stat_contains_size5
;
155 extern _AtomicStatCounter stat_contains_size6
;
156 extern _AtomicStatCounter stat_contains_size7
;
157 extern _AtomicStatCounter stat_contains_size8
;
158 extern _AtomicStatCounter stat_contains_size9
;
159 extern _AtomicStatCounter stat_contains_size10
;
160 extern _AtomicStatCounter stat_contains_size11
;
161 extern _AtomicStatCounter stat_contains_size12
;
162 extern _AtomicStatCounter stat_contains_size13_or_more
;
163 extern _AtomicStatCounter stat_grow_from_size0_to_1
;
164 extern _AtomicStatCounter stat_grow_from_size1_to_2
;
165 extern _AtomicStatCounter stat_double_the_number_of_buckets
;
166 extern _AtomicStatCounter stat_insert_key_that_was_already_present
;
168 /* Hash collisions detected during insert_no_resize(). Only counts
169 hasher(k) == hasher(k'); hasher(k) % tablesize == hasher(k') %
170 tablesize is not sufficient. Will count collisions that are
171 detected during table resizes etc., so the same two keys may add to
172 this stat multiple times. */
173 extern _AtomicStatCounter stat_insert_found_hash_collision
;
177 struct insert_only_hash_sets_logger
180 log (char c
, char *buf
)
187 log (const char *s
, char *buf
)
188 { return strcpy (buf
, s
) + strlen (s
); }
191 log (_AtomicStatCounter i
, char *buf
)
194 return log ((char) ('0' + i
), buf
);
196 return log ((char) ('0' + i
% 10), log (i
/ 10, buf
));
200 log (const char *label
, _AtomicStatCounter i
, char *buf
)
202 buf
= log (label
, buf
);
203 buf
= log (": ", buf
);
205 return log ('\n', buf
);
209 // Write stats to the given buffer, which should be at least 4000 bytes.
211 insert_only_hash_tables_stats (char *buf
)
213 buf
= insert_only_hash_sets_logger::log ("insert", stat_insert
, buf
);
214 buf
= insert_only_hash_sets_logger::log ("contains", stat_contains
, buf
);
215 buf
= insert_only_hash_sets_logger::log ("resize", stat_resize
, buf
);
216 buf
= insert_only_hash_sets_logger::log ("create", stat_create
, buf
);
217 buf
= insert_only_hash_sets_logger::log ("insert_key_that_was_already_"
219 stat_insert_key_that_was_already_present
,
221 buf
= insert_only_hash_sets_logger::log ("contains_sizes",
222 stat_contains_sizes
, buf
);
223 buf
= insert_only_hash_sets_logger::log ("contains_in_non_trivial_set",
224 stat_contains_in_non_trivial_set
,
226 buf
= insert_only_hash_sets_logger::log ("probes_in_non_trivial_set",
227 stat_probes_in_non_trivial_set
,
229 buf
= insert_only_hash_sets_logger::log ("contains_size0",
230 stat_contains_size0
, buf
);
231 buf
= insert_only_hash_sets_logger::log ("contains_size1",
232 stat_contains_size1
, buf
);
233 buf
= insert_only_hash_sets_logger::log ("contains_size2",
234 stat_contains_size2
, buf
);
235 buf
= insert_only_hash_sets_logger::log ("contains_size3",
236 stat_contains_size3
, buf
);
237 buf
= insert_only_hash_sets_logger::log ("contains_size4",
238 stat_contains_size4
, buf
);
239 buf
= insert_only_hash_sets_logger::log ("contains_size5",
240 stat_contains_size5
, buf
);
241 buf
= insert_only_hash_sets_logger::log ("contains_size6",
242 stat_contains_size6
, buf
);
243 buf
= insert_only_hash_sets_logger::log ("contains_size7",
244 stat_contains_size7
, buf
);
245 buf
= insert_only_hash_sets_logger::log ("contains_size8",
246 stat_contains_size8
, buf
);
247 buf
= insert_only_hash_sets_logger::log ("contains_size9",
248 stat_contains_size9
, buf
);
249 buf
= insert_only_hash_sets_logger::log ("contains_size10",
250 stat_contains_size10
, buf
);
251 buf
= insert_only_hash_sets_logger::log ("contains_size11",
252 stat_contains_size11
, buf
);
253 buf
= insert_only_hash_sets_logger::log ("contains_size12",
254 stat_contains_size12
, buf
);
255 buf
= insert_only_hash_sets_logger::log ("contains_size13_or_more",
256 stat_contains_size13_or_more
, buf
);
257 buf
= insert_only_hash_sets_logger::log ("grow_from_size0_to_1",
258 stat_grow_from_size0_to_1
, buf
);
259 buf
= insert_only_hash_sets_logger::log ("grow_from_size1_to_2",
260 stat_grow_from_size1_to_2
, buf
);
261 buf
= insert_only_hash_sets_logger::log ("insert_found_hash_collision",
262 stat_insert_found_hash_collision
,
264 buf
= insert_only_hash_sets_logger::log ("double_the_number_of_buckets",
265 stat_double_the_number_of_buckets
,
273 #define inc_by(statname, amount) do { } while (false && (amount))
277 #define inc(statname) inc_by (statname, 1)
279 template <typename Key
, class HashFcn
, class Alloc
>
280 class insert_only_hash_sets
283 typedef Key key_type
;
284 typedef size_t size_type
;
285 typedef Alloc alloc_type
;
286 enum { illegal_key
= 1 };
287 enum { min_capacity
= 4 };
289 enum { stats
= true };
291 enum { stats
= false };
294 /* Do not directly use insert_only_hash_set. Instead, use the
295 static methods below to create and manipulate objects of the
298 Implementation details: each set is represented by a pointer
299 plus, perhaps, out-of-line data, which would be an object of type
300 insert_only_hash_set. For a pointer, s, the interpretation is: s
301 == NULL means empty set, lsb(s) == 1 means a set with one
302 element, which is (uintptr_t)s - 1, and otherwise s is a pointer
303 of type insert_only_hash_set*. So, to increase the size of a set
304 we have to change s and/or *s. To check if a set contains some
305 key we have to examine s and possibly *s. */
306 class insert_only_hash_set
309 /* Insert a key. The key must not be a reserved key. */
310 static inline insert_only_hash_set
*insert (key_type key
,
311 insert_only_hash_set
*s
);
314 /* Create an empty set. */
315 static inline insert_only_hash_set
*create (size_type capacity
);
317 /* Return whether the given key is present. If key is illegal_key
318 then either true or false may be returned, but for all other
319 reserved keys false will be returned. */
321 contains (key_type key
, const insert_only_hash_set
*s
)
328 case 0: inc (stat_contains_size0
); break;
329 case 1: inc (stat_contains_size1
); break;
330 case 2: inc (stat_contains_size2
); break;
331 case 3: inc (stat_contains_size3
); break;
332 case 4: inc (stat_contains_size4
); break;
333 case 5: inc (stat_contains_size5
); break;
334 case 6: inc (stat_contains_size6
); break;
335 case 7: inc (stat_contains_size7
); break;
336 case 8: inc (stat_contains_size8
); break;
337 case 9: inc (stat_contains_size9
); break;
338 case 10: inc (stat_contains_size10
); break;
339 case 11: inc (stat_contains_size11
); break;
340 case 12: inc (stat_contains_size12
); break;
341 default: inc (stat_contains_size13_or_more
); break;
343 inc_by (stat_contains_sizes
, size (s
));
346 return (singleton (s
) ?
347 singleton_key (key
) == s
:
348 ((s
!= NULL
) && s
->contains (key
)));
351 /* Return a set's size. */
353 size (const insert_only_hash_set
*s
)
354 { return (s
== NULL
) ? 0 : (singleton (s
) ? 1 : s
->num_entries
); }
356 static inline insert_only_hash_set
*resize (size_type target_num_buckets
,
357 insert_only_hash_set
*s
);
361 /* Return whether a set has size 1. */
363 singleton (const insert_only_hash_set
*s
)
364 { return (uintptr_t) s
& 1; }
366 /* Return the representation of a singleton set containing the
368 static insert_only_hash_set
*
369 singleton_key (key_type key
)
370 { return (insert_only_hash_set
*) ((uintptr_t) key
+ 1); }
372 /* Given a singleton set, what key does it contain? */
374 extract_singleton_key (const insert_only_hash_set
*s
)
376 VTV_DEBUG_ASSERT (singleton (s
));
377 return (key_type
) ((uintptr_t) s
- 1);
381 key_at_index (size_type index
)
382 { return buckets
[index
]; }
385 key_at_index (size_type index
) const
386 { return buckets
[index
]; }
389 next_index (size_type index
, size_type indices_examined
) const
390 { return (index
+ indices_examined
) & (num_buckets
- 1); }
392 inline void insert_no_resize (key_type key
);
394 inline bool contains (key_type key
) const;
396 inline insert_only_hash_set
*resize_if_necessary (void);
398 size_type num_buckets
; /* Must be a power of 2 not less than
400 volatile size_type num_entries
;
401 volatile key_type buckets
[0]; /* Actual array size is num_buckets. */
404 /* Create an empty set with the given capacity. Requires that n be
405 0 or a power of 2. If 1 < n < min_capacity then treat n as
406 min_capacity. Sets *handle. Returns true unless the allocator
407 fails. Subsequent operations on this set should use the same
410 static inline bool create (size_type n
, insert_only_hash_set
**handle
);
412 /* Force the capacity of a set to be n, unless it was more than n
413 already. Requires that n be 0 or a power of 2. Sets *handle
414 unless the current capacity is n or more. Returns true unless
415 the allocator fails. */
417 static inline bool resize (size_type n
, insert_only_hash_set
**handle
);
419 /* Insert a key. *handle is unmodified unless (1) a resize occurs,
420 or (2) the set was initially empty. Returns true unless the
421 allocator fails during a resize. If the allocator fails during a
422 resize then the set is reset to be the empty set. The key must
423 not be a reserved key. */
425 static inline bool insert (key_type key
, insert_only_hash_set
**handle
);
427 /* Check for the presence of a key. If key is illegal_key then
428 either true or false may be returned, but for all other reserved
429 keys false will be returned. */
432 contains (key_type key
, /* const */ insert_only_hash_set
**handle
)
433 { return insert_only_hash_set::contains (key
, *handle
); }
435 /* Return the size of the given set. */
437 size (const insert_only_hash_set
**handle
)
438 { return insert_only_hash_set::size (*handle
); }
441 is_reserved_key (key_type key
)
442 { return ((uintptr_t) key
% 2) == 1; }
445 template <typename Key
, class HashFcn
, class Alloc
>
446 typename insert_only_hash_sets
<Key
, HashFcn
, Alloc
>::insert_only_hash_set
*
447 insert_only_hash_sets
<Key
, HashFcn
, Alloc
>::insert_only_hash_set::resize
448 (size_type n
, insert_only_hash_set
*s
)
453 size_type capacity
= singleton (s
) ? 1 : s
->num_buckets
;
458 insert_only_hash_set
*result
=
459 create (std::max
<size_type
> (n
, min_capacity
));
464 result
->insert_no_resize (extract_singleton_key (s
));
468 for (size_type i
= 0; i
< s
->num_buckets
; i
++)
469 if (s
->buckets
[i
] != (key_type
) illegal_key
)
470 result
->insert_no_resize (s
->buckets
[i
]);
472 VTV_DEBUG_ASSERT (size (result
) == size (s
));
477 template <typename Key
, class HashFcn
, class Alloc
>
478 typename insert_only_hash_sets
<Key
, HashFcn
, Alloc
>::insert_only_hash_set
*
479 insert_only_hash_sets
<Key
, HashFcn
, Alloc
>::insert_only_hash_set::insert
480 (key_type key
, insert_only_hash_set
*s
)
482 VTV_DEBUG_ASSERT (!is_reserved_key (key
));
484 inc_by (stat_grow_from_size0_to_1
, s
== NULL
);
487 return singleton_key (key
);
491 const key_type old_key
= extract_singleton_key (s
);
495 /* Grow from size 1 to size 2. */
496 inc (stat_grow_from_size1_to_2
);
501 s
->insert_no_resize (old_key
);
502 s
->insert_no_resize (key
);
503 VTV_DEBUG_ASSERT (size (s
) == 2);
506 s
= s
->resize_if_necessary();
508 s
->insert_no_resize (key
);
512 template <typename Key
, class HashFcn
, class Alloc
>
513 typename insert_only_hash_sets
<Key
, HashFcn
, Alloc
>::insert_only_hash_set
*
514 insert_only_hash_sets
<Key
, HashFcn
, Alloc
>::insert_only_hash_set::create
520 VTV_DEBUG_ASSERT (capacity
> 1 && (capacity
& (capacity
- 1)) == 0);
521 VTV_DEBUG_ASSERT (sizeof (insert_only_hash_set
) == 2 * sizeof (size_type
));
522 capacity
= std::max
<size_type
> (capacity
, min_capacity
);
523 const size_t num_bytes
= sizeof (insert_only_hash_set
) +
524 sizeof (key_type
) * capacity
;
526 insert_only_hash_set
*result
= (insert_only_hash_set
*) alloc (num_bytes
);
527 result
->num_buckets
= capacity
;
528 result
->num_entries
= 0;
529 for (size_type i
= 0; i
< capacity
; i
++)
530 result
->buckets
[i
] = (key_type
) illegal_key
;
534 template <typename Key
, class HashFcn
, class Alloc
>
536 insert_only_hash_sets
<Key
, HashFcn
,
537 Alloc
>::insert_only_hash_set::insert_no_resize
541 const size_type capacity
= num_buckets
;
542 VTV_DEBUG_ASSERT (capacity
>= min_capacity
);
543 VTV_DEBUG_ASSERT (!is_reserved_key (key
));
544 size_type index
= hasher (key
) & (capacity
- 1);
545 key_type k
= key_at_index (index
);
546 size_type indices_examined
= 0;
550 if (k
== (key_type
) illegal_key
)
552 key_at_index (index
) = key
;
558 inc_by (stat_insert_found_hash_collision
,
559 hasher (k
) == hasher (key
));
561 VTV_DEBUG_ASSERT (indices_examined
< capacity
);
562 index
= next_index (index
, indices_examined
);
563 k
= key_at_index (index
);
567 template<typename Key
, class HashFcn
, class Alloc
>
569 insert_only_hash_sets
<Key
, HashFcn
, Alloc
>::insert_only_hash_set::contains
572 inc (stat_contains_in_non_trivial_set
);
574 const size_type capacity
= num_buckets
;
575 size_type index
= hasher (key
) & (capacity
- 1);
576 key_type k
= key_at_index (index
);
577 size_type indices_examined
= 0;
578 inc (stat_probes_in_non_trivial_set
);
582 if (/*UNLIKELY*/(k
== (key_type
) illegal_key
583 || indices_examined
== capacity
))
586 index
= next_index (index
, indices_examined
);
587 k
= key_at_index (index
);
588 inc (stat_probes_in_non_trivial_set
);
593 template <typename Key
, class HashFcn
, class Alloc
>
594 typename insert_only_hash_sets
<Key
, HashFcn
, Alloc
>::insert_only_hash_set
*
595 insert_only_hash_sets
<Key
, HashFcn
,
596 Alloc
>::insert_only_hash_set::resize_if_necessary (void)
598 VTV_DEBUG_ASSERT (num_buckets
>= min_capacity
);
599 size_type unused
= num_buckets
- num_entries
;
600 if (unused
< (num_buckets
>> 2))
602 inc (stat_double_the_number_of_buckets
);
603 size_type new_num_buckets
= num_buckets
* 2;
604 insert_only_hash_set
*s
= create (new_num_buckets
);
605 for (size_type i
= 0; i
< num_buckets
; i
++)
606 if (buckets
[i
] != (key_type
) illegal_key
)
607 s
->insert_no_resize (buckets
[i
]);
608 VTV_DEBUG_ASSERT (size (this) == size (s
));
615 template<typename Key
, class HashFcn
, class Alloc
>
617 insert_only_hash_sets
<Key
, HashFcn
, Alloc
>::create (size_type n
,
618 insert_only_hash_set
**handle
)
622 *handle
= insert_only_hash_set::create (n
);
623 return (n
<= 1) || (*handle
!= NULL
);
626 template<typename Key
, class HashFcn
, class Alloc
>
628 insert_only_hash_sets
<Key
, HashFcn
, Alloc
>::resize (size_type n
,
629 insert_only_hash_set
**handle
)
632 *handle
= insert_only_hash_set::resize (n
, *handle
);
633 return (n
<= 1) || (*handle
!= NULL
);
636 template<typename Key
, class HashFcn
, class Alloc
>
638 insert_only_hash_sets
<Key
, HashFcn
, Alloc
>::insert (key_type key
,
639 insert_only_hash_set
**handle
)
642 const size_type old_size
= insert_only_hash_set::size (*handle
);
643 *handle
= insert_only_hash_set::insert (key
, *handle
);
646 const size_type delta
= insert_only_hash_set::size (*handle
) - old_size
;
647 inc_by (stat_insert_key_that_was_already_present
, delta
== 0);
649 return *handle
!= NULL
;
652 #endif /* VTV_SET_H */