Reduce pinned object table size, part 1 of 2.
[sbcl.git] / src / runtime / hopscotch.c
blobeebc15c3df57b54096c500d12bce467bb62a52cc
1 /*
2 * This software is part of the SBCL system. See the README file for
3 * more information.
5 * This software is derived from the CMU CL system, which was
6 * written at Carnegie Mellon University and released into the
7 * public domain. The software is in the public domain and is
8 * provided with absolutely no warranty. See the COPYING and CREDITS
9 * files for more information.
13 * Our implementation of the hopscotch algorithm described in
14 * http://people.csail.mit.edu/shanir/publications/disc2008_submission_98.pdf
15 * which is extremely simple variation on linear probing
16 * that provides a guaranteed bound on number of probes.
19 #include <pthread.h> // only because of our dang non-self-contained .h files
20 #ifdef COLLECT_STATISTICS
21 #include <stdio.h>
22 #endif
24 #include "genesis/constants.h"
25 #include "runtime.h"
26 #include "gc-internal.h" // for os_validate()
27 #include "hopscotch.h"
29 typedef struct hopscotch_table* tableptr;
30 void hopscotch_integrity_check(tableptr,char*,int);
32 #define table_size(table) (1+(table).mask)
33 #define hash(x) x
34 #define INTEGRITY_CHECK(when) {}
36 /// The following "good" hash function slows GC down a lot.
37 /// Don't use it.
38 /// (Maybe there will be other places where we want it)
39 #if 0
40 /// C translation of Java code available at
41 /// https://android.googlesource.com/platform/libcore/+/49965c1/ojluni/src/main/java/sun/misc/Hashing.java
42 static uword_t hash(uword_t h)
44 h += (h << 15) ^ 0xffffcd7d;
45 h ^= (h >> 10);
46 h += (h << 3);
47 h ^= (h >> 6);
48 h += (h << 2) + (h << 14);
49 return h ^ (h >> 16);
51 #endif
53 /// Set a single bit in the hop mask for logical cell at 'index'
54 static inline void set_hop_bit(tableptr ht, unsigned index, int bit)
56 unsigned mask = 1<<bit;
57 ht->hops[index] |= mask;
59 /// Set all the bits in the hop mask for logical cell at 'index'
60 static inline void set_hop_mask(tableptr ht, unsigned index, unsigned bits)
62 ht->hops[index] = bits;
64 static inline unsigned get_hop_mask(tableptr ht, unsigned index)
66 return ht->hops[index];
69 /// Hopscotch storage allocation granularity.
70 /// Our usual value of "page size" is the GC page size, which is
71 /// coarser than necessary (cf {target}/backend-parms.lisp).
72 static int hh_allocation_granularity = 4096;
73 #define ALLOCATION_OVERHEAD (2*sizeof(unsigned int))
74 /// Return the number of usable bytes (excluding the header) in an allocation
75 #define usable_size(x) ((unsigned int*)x)[-1]
77 /// Sizing up a table can't be done in-place, so reserve a few blocks
78 /// of memory for when resize has to happen during GC. We don't return
79 /// these blocks to the OS. If even more is required, it will be allocated
80 /// as needed, but we'll only keep on reserve at most two blocks.
81 #define N_CACHED_ALLOCS 2
82 char* cached_alloc[N_CACHED_ALLOCS];
83 void hopscotch_init() // Called once on runtime startup, from gc_init().
85 // Get 16KB from the OS and evenly divide it into two pieces.
86 int n_bytes_per_slice = 8 * 1024;
87 int n_bytes_total = N_CACHED_ALLOCS * n_bytes_per_slice;
88 char* mem = (char*)os_validate(0, n_bytes_total);
89 gc_assert(mem);
90 cached_alloc[0] = mem + ALLOCATION_OVERHEAD;
91 cached_alloc[1] = cached_alloc[0] + n_bytes_per_slice;
92 // Write the user-visible size of each allocation into the block header
93 usable_size(cached_alloc[0]) = n_bytes_per_slice - ALLOCATION_OVERHEAD;
94 usable_size(cached_alloc[1]) = n_bytes_per_slice - ALLOCATION_OVERHEAD;
97 #if 0
98 void show_cache(char* when)
100 printf("cache %s is %p: %lx and %p: %lx\n",
101 when,
102 cached_alloc[0], (cached_alloc[0] ? usable_size(cached_alloc[0]) : 0),
103 cached_alloc[1], (cached_alloc[1] ? usable_size(cached_alloc[1]) : 0));
105 #endif
107 /* Return the address of at least 'nbytes' of storage.
108 * This is not a general-purpose thing - it's only intended to keep
109 * one or perhaps two hopscotch hash tables around during GC,
110 * for pinned objects, and maybe something else.
111 * As such, no attempt is made to minimize storage use,
112 * and if used more generally, would badly suffer from fragmentation.
114 static char* cached_allocate(os_vm_size_t nbytes)
116 // See if either cached allocation is large enough.
117 if (cached_alloc[0] && usable_size(cached_alloc[0]) >= nbytes) {
118 // Yup, just give the consumer the whole thing.
119 char* result = cached_alloc[0];
120 cached_alloc[0] = 0; // Remove from the pool
121 return result;
123 if (cached_alloc[1] && usable_size(cached_alloc[1]) >= nbytes) { // Ditto.
124 char* result = cached_alloc[1];
125 cached_alloc[1] = 0;
126 return result;
128 // Request more memory, not using malloc().
129 // Round up, since the OS will give more than asked if the request is
130 // not a multiple of the mmap granularity, which we'll assume is 4K.
131 // (It doesn't actually matter.)
132 nbytes = CEILING(nbytes, hh_allocation_granularity);
133 char* result = os_validate(0, nbytes);
134 gc_assert(result);
135 result += ALLOCATION_OVERHEAD;
136 usable_size(result) = nbytes - ALLOCATION_OVERHEAD;
137 return result;
140 /* Return 'mem' to the cache, first zero-filling to the specified length.
141 * Though the memory size is recorded in the header of the memory block,
142 * the allocator doesn't know how many bytes were touched by the requestor,
143 * which is why the length is specified again.
144 * If returning it to the OS and not the cache, then don't bother 0-filling.
146 static void cached_deallocate(char* mem, int zero_fill_length)
148 int line = 0;
149 if (!cached_alloc[0]) {
150 } else if (!cached_alloc[1])
151 line = 1;
152 else {
153 // Try to retain whichever 2 blocks are largest (the given one and
154 // cached ones) in the hope of fulfilling future requests from cache.
155 int this_size = usable_size(mem);
156 int cached_size0 = usable_size(cached_alloc[0]);
157 int cached_size1 = usable_size(cached_alloc[1]);
158 if (!(this_size > cached_size0 || this_size > cached_size1)) {
159 // mem is not strictly larger than either cached block. Release it.
160 os_deallocate(mem - ALLOCATION_OVERHEAD,
161 usable_size(mem) + ALLOCATION_OVERHEAD);
162 return;
164 // Evict and replace the smaller of the two cache entries.
165 if (cached_size1 < cached_size0)
166 line = 1;
167 os_deallocate(cached_alloc[line] - ALLOCATION_OVERHEAD,
168 usable_size(cached_alloc[line]) + ALLOCATION_OVERHEAD);
170 bzero(mem, zero_fill_length);
171 cached_alloc[line] = mem;
174 /* Initialize 'ht' for 'size' logical bins with a max hop of 'hop_range'.
175 * 'valuesp' makes a hash-map if true; a hash-set if false.
176 * Hop range will be selected automatically if specified as 0.
178 static void hopscotch_realloc(tableptr ht, boolean valuesp, int size, char hop_range)
180 // Somewhat arbitrary criteria that improve the worst-case probes for
181 // small hashtables. The reference algorithm uses a fixed max hop of 32,
182 // but fewer is better, and our pinned object table tends to be very small.
183 if (hop_range == 0) { // Let us pick.
184 if (size <= 1024) hop_range = 8;
185 else if (size <= 16384) hop_range = 16;
186 else hop_range = 32;
189 // The key/value arrays are *not* circular.
190 // The last few logical cells in the key array can use physical cells
191 // at indices greater than 'size'; there's no wrapping back to index 0.
192 int n_keys = size + (hop_range - 1);
193 unsigned storage_size = sizeof (uword_t) * n_keys
194 + sizeof (int) * size // hop bitmasks
195 + (valuesp ? (sizeof (int) * n_keys) : 0); // values
197 if (ht->keys)
198 gc_assert(usable_size(ht->keys) >= storage_size);
199 else
200 ht->keys = (uword_t*)cached_allocate(storage_size);
202 ht->mem_size = storage_size;
203 ht->mask = size - 1;
204 ht->hop_range = hop_range;
205 ht->hops = (unsigned*)((char*)ht->keys + sizeof (uword_t) * n_keys);
206 ht->values = !valuesp ? 0 :
207 (unsigned*)((char*)ht->hops + sizeof (int) * size);
210 /* Initialize 'ht' for first use, which entails zeroing the counters
211 * and allocating storage.
213 void hopscotch_create(tableptr ht, boolean valuesp, int size, char hop_range)
215 gc_assert((size & (size-1)) == 0); // ensure power-of-2
216 ht->count = 0;
217 ht->rehashing = 0;
218 ht->resized = 0;
219 // Ensure that the first reset() doesn't do something screwy.
220 ht->prev_size = size;
221 // Clear these even if not collecting statistics,
222 // because it looks ugly if we don't.
223 ht->hit .n_seeks = ht->hit .n_probes = 0;
224 ht->miss.n_seeks = ht->miss.n_probes = 0;
226 ht->keys = 0; // Forces allocation of backing storage.
227 hopscotch_realloc(ht, valuesp, size, hop_range);
230 /* Delete the storage associated with 'ht' */
231 void hopscotch_delete(tableptr ht)
233 if (ht->mem_size) { // Free it, zero-filling if ever used.
234 cached_deallocate((char*)ht->keys, ht->count ? ht->mem_size : 0);
235 ht->keys = 0;
236 ht->hops = 0;
237 ht->values = 0;
241 /* Prepare 'ht' for re-use. Same as CLRHASH */
242 void hopscotch_reset(tableptr ht)
244 if (ht->count) {
245 bzero(ht->keys, ht->mem_size);
246 ht->count = 0;
248 // If the size exceeds twice the final size from the prior run,
249 // or is the same size and was not enlarged, then downsize,
250 // but don't go below a certain minimal size.
251 int size = table_size(*ht);
252 #if 0
253 fprintf(stderr, "hh reset: size=%d prev=%d upsized=%d\n",
254 size, ht->prev_size, ht->resized);
255 #endif
256 if (size > (ht->prev_size << 1)
257 || (size == ht->prev_size && !ht->resized && size > 8))
258 // Halve the size for the next GC cycle
259 hopscotch_realloc(ht, ht->values != 0, size >> 1, 0);
260 ht->prev_size = size;
261 ht->resized = 0;
262 INTEGRITY_CHECK("after reset");
265 /* Double the size of 'ht'. Called when an insertion overflows the table */
266 tableptr hopscotch_resize_up(tableptr ht)
268 int size = ht->mask + 1; // Logical bin count
269 int old_max_index = hopscotch_max_key_index(*ht);
270 struct hopscotch_table copy;
272 #if 0
273 fprintf(stderr, "resize up: ct=%d cap=%d hop=%d LF=%f\n",
274 ht->count, 1+old_max_index, ht->hop_range,
275 (float)ht->count/(1+old_max_index));
276 #endif
277 INTEGRITY_CHECK("before resize");
278 // Copy the keys or key/value pairs.
280 // It's conceivable, however improbable, that there is a hash function
281 // which causes more collisions at the new size than the old size.
282 // Due to the fixed hop range, failure to insert while rehashing
283 // must be caught so that we can try again with a larger size.
284 // But usually this loop will execute exactly once.
285 int i;
286 do {
287 size *= 2;
288 hopscotch_create(&copy, ht->values != 0, size, 0);
289 copy.rehashing = 1; // Causes put() to return 0 on failure
290 if (ht->values) {
291 for(i=old_max_index ; i >= 0 ; --i)
292 if (ht->keys[i])
293 if (!hopscotch_put(&copy, ht->keys[i], ht->values[i]))
294 break;
295 } else {
296 for(i=old_max_index ; i >= 0 ; --i)
297 if (ht->keys[i])
298 if (!hopscotch_put(&copy, ht->keys[i], 1)) {
299 #if 0
300 fprintf(stderr, "resize failed with new size %d, hop_range %d\n",
301 size, copy.hop_range);
302 #endif
303 break;
306 } while (i >= 0 && (hopscotch_delete(&copy), 1));
308 // Zero-fill and release the old storage.
309 cached_deallocate((char*)ht->keys, ht->mem_size);
311 // Move all of the data pointers from 'copy' into ht.
312 // mem_size is passed to bzero() when resetting the table,
313 // so definitely be sure to use the new, not the old.
314 // And of course _don't_ hopscotch_delete() copy when done.
315 ht->mem_size = copy.mem_size;
316 ht->mask = copy.mask;
317 ht->hop_range = copy.hop_range;
318 ht->keys = copy.keys;
319 ht->hops = copy.hops;
320 ht->values = copy.values;
321 ht->resized = 1;
322 INTEGRITY_CHECK("after resize");
323 return ht;
326 void hopscotch_log_stats(tableptr ht)
328 #ifdef COLLECT_STATISTICS
329 static FILE *hh_logfile;
330 if (!hh_logfile)
331 hh_logfile = fopen("hash-stats.txt","a");
332 fprintf(hh_logfile,
333 "hopscotch: ct=%5d cap=%5d LF=%f seek=%5d+%5d probe/seek=%f+%f (hit+miss)\n",
334 ht->count,
335 (ht->mask + ht->hop_range),
336 (float)ht->count / (ht->mask + ht->hop_range),
337 ht->hit.n_seeks, ht->miss.n_seeks,
338 ht->hit.n_seeks>0 ? (float)ht->hit.n_probes / ht->hit.n_seeks : 0.0,
339 ht->miss.n_seeks>0 ? (float)ht->miss.n_probes / ht->miss.n_seeks : 0.0);
340 fflush(hh_logfile);
341 ht->hit.n_seeks = ht->hit.n_probes = 0;
342 ht->miss.n_seeks = ht->miss.n_probes = 0;
343 #endif
346 /* Return an integer with 'n' low-order 1 bits.
347 * This does not do the right thing for n = 0, but that's fine!
348 * (Shifting an unsigned 32-bit integer rightward by 32 is not defined.
349 * 32-bit x86 masks the shift amount to 5 bits, so you get 0 shift)
351 static inline unsigned int bitmask_of_width(int n) {
352 return (0xFFFFFFFFU >> (32 - n));
355 #define put_pair(i,k,v) ht->keys[i] = k; if(ht->values) ht->values[i] = v
357 /* Add key/val to 'ht'. 'val' is ignored for a hash-set */
358 int hopscotch_put(tableptr ht, uword_t key, unsigned int val)
360 // 'desired_index' is where 'key' logically belongs, but it
361 // may physically go in any cell to the right up to (range-1) away.
362 int desired_index = hash(key) & ht->mask;
363 if (ht->keys[desired_index] == 0) { // Instant win
364 put_pair(desired_index, key, val);
365 set_hop_bit(ht, desired_index, 0);
366 return ++ht->count;
368 // 'limit' is the inclusive bound on cell indices.
369 int limit = hopscotch_max_key_index(*ht);
370 int free_index = desired_index;
371 int displacement;
372 while (ht->keys[++free_index] != 0) // While cell is occupied
373 if (free_index == limit)
374 return ht->rehashing ? 0 : // fail if rehash table is too small
375 hopscotch_put(hopscotch_resize_up(ht), key, val);
377 // 'free_index' is where *some* item could go,
378 // but it might be too far away for this key.
379 retry:
380 if ((displacement = free_index - desired_index) < ht->hop_range) {
381 put_pair(free_index, key, val);
382 set_hop_bit(ht, desired_index, displacement);
383 return ++ht->count;
385 // Find the empty cell furthest away from and to the left of free_index,
386 // within the hop_range, that contains an item that can be moved.
387 int logical_bin = free_index - (ht->hop_range - 1);
388 // limit is the max index (inclusive) of the available free cells
389 // up to but excluding 'free_index'
390 limit = free_index - 1;
391 // In case free_index currently points to a physical bin "off the end"
392 // of the logical bins, confine to the highest logical bin,
393 // which is just the table mask.
394 if (limit >= (int)ht->mask)
395 limit = ht->mask;
396 // Now 'free_index' is fixed, and 'logical_bin' is what we search
397 // over to find something to displace into the free_index.
398 // Example: v----- free index
399 // | | X | X | O | | [X = filled. O = filled + owned]
400 // ^--- logical bin (displacement = 3)
401 // Look at the low 3 bits of the hop bits for 'logical_bin'.
402 // Those indicate the physical cells "owned" by the logical cell
403 // and within the needed distance to the free cell.
404 // If any are set, the leftmost bit is robbed to make room.
405 // In the above example, bit index 2 (0-based index) would be claimed.
406 for ( ; logical_bin <= limit ; ++logical_bin ) {
407 displacement = free_index - logical_bin;
408 unsigned bits = get_hop_mask(ht, logical_bin);
409 unsigned masked_bits = bits & bitmask_of_width(displacement);
410 if (masked_bits) {
411 int victim = ffs(masked_bits) - 1; // ffs() is 1-based
412 int physical_elt = logical_bin + victim;
413 // Relocate the contents of 'physical_elt' to 'free_index'
414 put_pair(free_index, ht->keys[physical_elt], ht->values[physical_elt]);
415 put_pair(physical_elt, 0, 0);
416 // This logical bin no longer owns the index where the victim was,
417 // but does own the index where it got moved to.
418 set_hop_mask(ht, logical_bin, bits ^ (1<<displacement | 1<<victim));
419 // Now free_index gets smaller, and we try again from the top.
420 free_index = physical_elt;
421 goto retry;
424 // Too many collisions and not enough room to move things around.
425 return ht->rehashing ? 0 : hopscotch_put(hopscotch_resize_up(ht), key, val);
426 #undef put_pair
429 /// When probing on lookup, while we could use the mask bits in the
430 /// desired logical bin to restrict the number of key comparisons made,
431 /// this turns out to be worse. Though slightly counter-intuitive,
432 /// it is likely due to one fewer conditional branch when we hit the
433 /// first choice physical cell. The probe() macro will decide whether
434 /// to use the mask bits and/or record the number of key comparisons.
435 /// Statistics gathering also slows us down a lot, so only do it when
436 /// making comparative benchmarks, not in real-world use.
437 #ifdef COLLECT_STATISTICS
438 #define probe(mask,i,retval) ++probes; if (ht->keys[i] == key) { \
439 ++ht->hit.n_seeks; ht->hit.n_probes += probes; \
440 return retval; }
441 #else
442 #define probe(mask,i,retval) if (ht->keys[i] == key) return retval
443 #endif
445 /* Test for membership in a hashset. Return 1 or 0. */
446 int hopscotch_containsp(tableptr ht, uword_t key)
448 // index needn't be 'long' but the code generated is better with it.
449 unsigned long index = hash(key) & ht->mask;
450 unsigned bits = get_hop_mask(ht, index);
451 #ifdef COLLECT_STATISTICS
452 int probes = 0;
453 #endif
454 // *** Use care when modifying this code, and benchmark it thoroughly! ***
455 // TODO: use XMM register to test 2 keys at once if properly aligned.
456 if (bits & 0xff) {
457 probe((1<<0), index+0, 1);
458 probe((1<<1), index+1, 1);
459 probe((1<<2), index+2, 1);
460 probe((1<<3), index+3, 1);
461 probe((1<<4), index+4, 1);
462 probe((1<<5), index+5, 1);
463 probe((1<<6), index+6, 1);
464 probe((1<<7), index+7, 1);
466 // There's a trade-off to be made: checking for fewer bits at a time
467 // (such as by "bits & 0x0f") would give finer grain to the set of
468 // physical cells tested, but would mean more iterations.
469 // It seems like 8 bits at a time is a good number, especially if the
470 // hop range is 8, because this general case need never execute.
471 while ((bits >>= 8) != 0) {
472 index += 8;
473 if (bits & 0xff) {
474 probe((1<<0), index+0, 1);
475 probe((1<<1), index+1, 1);
476 probe((1<<2), index+2, 1);
477 probe((1<<3), index+3, 1);
478 probe((1<<4), index+4, 1);
479 probe((1<<5), index+5, 1);
480 probe((1<<6), index+6, 1);
481 probe((1<<7), index+7, 1);
484 #ifdef COLLECT_STATISTICS
485 ++ht->miss.n_seeks;
486 ht->miss.n_probes += probes;
487 #endif
488 return 0;
491 /* Return the value associated with 'key', or -1 if not found */
492 int hopscotch_get(tableptr ht, uword_t key)
494 int index = hash(key) & ht->mask;
495 unsigned bits = get_hop_mask(ht, index);
496 #ifdef COLLECT_STATISTICS
497 int probes = 0;
498 #endif
499 // This is not as blazingly fast as the hand-unrolled loop
500 // in containsp(), but the GC does not need it, so ...
501 while (bits) {
502 if (bits & 0xf) {
503 probe(1, index+0, ht->values[index+0]);
504 probe(2, index+1, ht->values[index+1]);
505 probe(4, index+2, ht->values[index+2]);
506 probe(8, index+3, ht->values[index+3]);
508 index += 4;
509 bits >>= 4;
511 #ifdef COLLECT_STATISTICS
512 ++ht->miss.n_seeks;
513 ht->miss.n_probes += probes;
514 #endif
515 return -1;
518 #undef probe
520 #if 0
521 int popcount(unsigned x)
523 int count = 0;
524 for ( ; x != 0 ; x >>= 1 )
525 if (x&1) ++count;
526 return count;
529 /* Perform a bunch of sanity checks on 'ht' */
530 void hopscotch_integrity_check(tableptr ht, char*when, int verbose)
532 int n_items = 0, tot_bits_set = 0, i;
533 int size = table_size(*ht);
534 int n_kv_pairs = size + ht->hop_range-1;
535 int fail = 0;
536 FILE * s = stderr;
538 for(i=n_kv_pairs-1 ; i >= 0 ; --i) if (ht->keys[i]) ++n_items;
539 for(i=ht->mask; i>= 0; --i) tot_bits_set += popcount(get_hop_mask(ht,i));
540 if (verbose)
541 fprintf(s, "(%s) Verifying table @ %p. count=%d actual=%d bits=%d\n",
542 when, ht, ht->count, n_items, tot_bits_set);
543 for (i=0;i<n_kv_pairs;++i) {
544 uword_t key = ht->keys[i];
545 int claimed;
546 if (key != 0 || (i<=ht->mask && get_hop_mask(ht,i) != 0)) {
547 // Compute the logical cell that owns this physical cell.
548 int start_index = i - (ht->hop_range-1);
549 if (start_index < 0) start_index = 0;
550 int end_index = i;
551 if (end_index > ht->mask) end_index = ht->mask;
552 claimed = -1;
553 int logical_cell;
554 for (logical_cell = start_index ; logical_cell <= end_index ; ++logical_cell) {
555 unsigned hop_bits = get_hop_mask(ht, logical_cell);
556 if (hop_bits & (1<<(i - logical_cell))) {
557 if (claimed == -1)
558 claimed = logical_cell;
559 else {
560 fprintf(stderr,
561 "physical cell %d duplicately claimed: %d and %d",
562 i, claimed, logical_cell);
563 fail = 1;
567 if (verbose) {
568 if (claimed==i || (claimed==-1 && !key))
569 fprintf(s, " ");
570 else if (claimed!=-1) {
571 fprintf(s, "[%4d]", claimed);
572 if ((int)(ht->mask & hash(key)) != claimed)
573 lose("key hashes to wrong logical cell?");
574 } else { // should have been claimed
575 fprintf(s, " **** ");
576 fail = 1;
578 fprintf(s, " %4d: %04x", i, i <= ht->mask ? get_hop_mask(ht,i) : 0);
579 if (key)
580 fprintf(s, " %12p -> %d",
581 (void*)(key<<(1+WORD_SHIFT)),
582 (int)(ht->mask & hash(key)));
583 putc('\n', s);
587 if (ht->count != n_items || tot_bits_set != n_items || fail)
588 lose("integrity check on hashtable %p failed", ht);
589 fflush(s);
591 #endif