Fixes for running with undefined-behavior sanitizer
[sbcl.git] / src / runtime / hopscotch.c
blob4311f03e4c93a59bb893e61d01443578de42a037
1 /*
2 * This software is part of the SBCL system. See the README file for
3 * more information.
5 * This software is derived from the CMU CL system, which was
6 * written at Carnegie Mellon University and released into the
7 * public domain. The software is in the public domain and is
8 * provided with absolutely no warranty. See the COPYING and CREDITS
9 * files for more information.
13 * Our implementation of the hopscotch algorithm described in
14 * http://people.csail.mit.edu/shanir/publications/disc2008_submission_98.pdf
15 * which is extremely simple variation on linear probing
16 * that provides a guaranteed bound on number of probes.
19 #include "os.h"
20 #include "gc-internal.h" // for os_allocate()
21 #include "hopscotch.h"
22 #include <stdint.h>
23 #include <stdio.h>
24 #include "genesis/vector.h"
25 #include "murmur_hash.h"
27 typedef struct hopscotch_table* tableptr;
28 void hopscotch_integrity_check(tableptr,char*,int);
30 #define table_size(table) (1+(table).mask)
31 #define INTEGRITY_CHECK(when) {}
33 /// By default, XOR values based on the page number and the
34 /// relative index into the page, disregarding lowtag bits.
35 /// If a specific function has been set, then use that.
36 static inline uint32_t hash(tableptr ht, lispobj x) {
37 return ht->hash ? ht->hash(x) :
38 #ifdef LISP_FEATURE_GENCGC
39 (x >> GENCGC_CARD_SHIFT) ^ (x >> (1+WORD_SHIFT));
40 #else
41 (x >> (1+WORD_SHIFT));
42 #endif
45 /// From https://github.com/google/cityhash/blob/master/src/city.cc
46 /// which attributes it in turn to
47 /// https://github.com/PeterScott/murmur3/blob/master/murmur3.c
48 /// The right shift by N_LOWTAG_BITS is specific to SBCL.
49 uint32_t hopscotch_hmix(uword_t key)
51 uint32_t h = key >> N_LOWTAG_BITS;
52 h ^= h >> 16;
53 h *= 0x85ebca6b;
54 h ^= h >> 13;
55 h *= 0xc2b2ae35;
56 h ^= h >> 16;
57 return h;
60 /// Set a single bit in the hop mask for logical cell at 'index'
61 static inline void set_hop_bit(tableptr ht, unsigned index, int bit)
63 unsigned mask = 1U<<bit;
64 ht->hops[index] |= mask;
66 /// Set all the bits in the hop mask for logical cell at 'index'
67 static inline void set_hop_mask(tableptr ht, unsigned index, unsigned bits)
69 ht->hops[index] = bits;
71 static inline unsigned get_hop_mask(tableptr ht, unsigned index)
73 return ht->hops[index];
75 static void set_val(tableptr ht, int index, sword_t val)
77 switch(ht->value_size) {
78 #ifdef LISP_FEATURE_64_BIT
79 case 8: ht->values[index] = val; break;
80 #endif
81 case 4: ((int32_t*)ht->values)[index] = val; break;
82 case 2: ((int16_t*)ht->values)[index] = val; break;
83 case 1: ((int8_t* )ht->values)[index] = val; break;
86 static sword_t get_val(tableptr ht, int index)
88 switch(ht->value_size) {
89 #ifdef LISP_FEATURE_64_BIT
90 case 8: return ht->values[index];
91 #endif
92 case 4: return ((int32_t*)ht->values)[index];
93 case 2: return ((int16_t*)ht->values)[index];
94 case 1: return ((int8_t *)ht->values)[index];
96 // For a hashset, return the found index + 1
97 // so that 0 can mean "not found"
98 return index + 1;
100 #ifdef LISP_FEATURE_64_BIT
101 static sword_t get_val8(tableptr ht, int index) { return ht->values[index]; }
102 #endif
103 static sword_t get_val4(tableptr ht, int index) { return ((int32_t*)ht->values)[index]; }
104 static sword_t get_val2(tableptr ht, int index) { return ((int16_t*)ht->values)[index]; }
105 static sword_t get_val1(tableptr ht, int index) { return ((int8_t *)ht->values)[index]; }
107 /// Hopscotch storage allocation granularity.
108 /// Our usual value of "page size" is the GC page size, which is
109 /// coarser than necessary (cf {target}/backend-parms.lisp).
110 static int hh_allocation_granularity = 4096;
111 #define ALLOCATION_OVERHEAD (2*sizeof(unsigned int))
112 /// Return the number of usable bytes (excluding the header) in an allocation
113 #define usable_size(x) ((unsigned int*)x)[-1]
115 /// Sizing up a table can't be done in-place, so reserve a few blocks
116 /// of memory for when resize has to happen during GC. We don't return
117 /// these blocks to the OS. If even more is required, it will be allocated
118 /// as needed, but we'll only keep on reserve at most two blocks.
119 #define N_CACHED_ALLOCS 2
120 char* cached_alloc[N_CACHED_ALLOCS];
121 void hopscotch_init() // Called once on runtime startup, from gc_init().
123 // Prefill the cache with 2 entries, each the size of a kernel page.
124 int n_bytes_per_slice = getpagesize();
125 int n_bytes_total = N_CACHED_ALLOCS * n_bytes_per_slice;
126 char* mem = os_allocate(n_bytes_total);
127 gc_assert(mem);
128 cached_alloc[0] = mem + ALLOCATION_OVERHEAD;
129 cached_alloc[1] = cached_alloc[0] + n_bytes_per_slice;
130 // Write the user-visible size of each allocation into the block header
131 usable_size(cached_alloc[0]) = n_bytes_per_slice - ALLOCATION_OVERHEAD;
132 usable_size(cached_alloc[1]) = n_bytes_per_slice - ALLOCATION_OVERHEAD;
135 /* Return the address of at least 'nbytes' of storage.
136 * This is not a general-purpose thing - it's only intended to keep
137 * one or perhaps two hopscotch hash tables around during GC,
138 * for pinned objects, and maybe something else.
139 * As such, no attempt is made to minimize storage use,
140 * and if used more generally, would badly suffer from fragmentation.
142 static char* cached_allocate(os_vm_size_t nbytes)
144 // See if either cached allocation is large enough.
145 if (cached_alloc[0] && usable_size(cached_alloc[0]) >= nbytes) {
146 // Yup, just give the consumer the whole thing.
147 char* result = cached_alloc[0];
148 cached_alloc[0] = 0; // Remove from the pool
149 return result;
151 if (cached_alloc[1] && usable_size(cached_alloc[1]) >= nbytes) { // Ditto.
152 char* result = cached_alloc[1];
153 cached_alloc[1] = 0;
154 return result;
156 // Request more memory, not using malloc().
157 // Round up, since the OS will give more than asked if the request is
158 // not a multiple of the mmap granularity, which we'll assume is 4K.
159 // (It doesn't actually matter.)
160 nbytes = ALIGN_UP(nbytes, hh_allocation_granularity);
161 char* result = os_allocate(nbytes);
162 gc_assert(result);
163 result += ALLOCATION_OVERHEAD;
164 usable_size(result) = nbytes - ALLOCATION_OVERHEAD;
165 return result;
168 /* Return 'mem' to the cache, first zero-filling to the specified length.
169 * Though the memory size is recorded in the header of the memory block,
170 * the allocator doesn't know how many bytes were touched by the requestor,
171 * which is why the length is specified again.
172 * If returning it to the OS and not the cache, then don't bother 0-filling.
174 static void cached_deallocate(char* mem, int zero_fill_length)
176 int line = 0;
177 if (!cached_alloc[0]) {
178 } else if (!cached_alloc[1])
179 line = 1;
180 else {
181 // Try to retain whichever 2 blocks are largest (the given one and
182 // cached ones) in the hope of fulfilling future requests from cache.
183 int this_size = usable_size(mem);
184 int cached_size0 = usable_size(cached_alloc[0]);
185 int cached_size1 = usable_size(cached_alloc[1]);
186 if (!(this_size > cached_size0 || this_size > cached_size1)) {
187 // mem is not strictly larger than either cached block. Release it.
188 os_deallocate(mem - ALLOCATION_OVERHEAD,
189 usable_size(mem) + ALLOCATION_OVERHEAD);
190 return;
192 // Evict and replace the smaller of the two cache entries.
193 if (cached_size1 < cached_size0)
194 line = 1;
195 os_deallocate(cached_alloc[line] - ALLOCATION_OVERHEAD,
196 usable_size(cached_alloc[line]) + ALLOCATION_OVERHEAD);
198 bzero(mem, zero_fill_length);
199 cached_alloc[line] = mem;
202 /* Initialize 'ht' for 'size' logical bins with a max hop of 'hop_range'.
203 * 'valuesp' makes a hash-map if true; a hash-set if false.
204 * Hop range will be selected automatically if specified as 0.
206 static void hopscotch_realloc(tableptr ht, int size, char hop_range)
208 // Somewhat arbitrary criteria that improve the worst-case probes for
209 // small hashtables. The reference algorithm uses a fixed max hop of 32,
210 // but fewer is better, and our pinned object table tends to be very small.
211 if (hop_range == 0) { // Let us pick.
212 // The arbitrary cutoff of 1023 is based on the observed final size
213 // of 2063 (=2048+15) with commonly fewer than 80 items.
214 // Collisions must have been so frequent that the only way out
215 // was to bump the table size and rehash. We may as well allow more
216 // probes at smaller sizes for the sake of improved table density.
217 if (size <= 1023) hop_range = 8;
218 else if (size <= 16384) hop_range = 16;
219 else hop_range = 32;
222 // The key/value arrays are *not* circular.
223 // The last few logical cells in the key array can use physical cells
224 // at indices greater than 'size'; there's no wrapping back to index 0.
225 int n_keys = size + (hop_range - 1);
226 unsigned storage_size = (sizeof (uword_t) + ht->value_size) * n_keys
227 + sizeof (int) * size; // hop bitmasks
229 if (ht->keys)
230 gc_assert(usable_size(ht->keys) >= storage_size);
231 else
232 ht->keys = (uword_t*)cached_allocate(storage_size);
234 ht->mem_size = storage_size;
235 ht->mask = size - 1;
236 ht->hop_range = hop_range;
237 ht->threshold = n_keys * 13 / 16; // target load ~= 81.25%
238 ht->hops = (unsigned*)(ht->keys + n_keys);
239 // Values will be properly aligned no matter what,
240 // because the largest alignment we'd need is 8 bytes,
241 // which is twice the alignment of a hop entry,
242 // and 'size' is an even number.
243 ht->values = ht->value_size ? (sword_t*)(ht->hops + size) : 0;
246 /// Same as SB-KERNEL:%SXHASH-SIMPLE-STRING
247 uword_t sxhash_simple_string(struct vector* string)
249 #ifdef SIMPLE_CHARACTER_STRING_WIDETAG
250 unsigned int* char_string = (unsigned int*)(string->data);
251 #endif
252 unsigned char* base_string = (unsigned char*)(string->data);
253 sword_t len = fixnum_value(string->length);
254 uword_t result = 0;
255 sword_t i;
256 switch (widetag_of(string->header)) {
257 #define MIX(ch) {result += ch; result += result<<10; result ^= (result>>6);}
258 #ifdef SIMPLE_CHARACTER_STRING_WIDETAG
259 case SIMPLE_CHARACTER_STRING_WIDETAG:
260 for(i=0;i<len;++i) MIX(char_string[i])
261 #endif
262 case SIMPLE_BASE_STRING_WIDETAG:
263 for(i=0;i<len;++i) MIX(base_string[i])
264 break;
266 result += result << 3;
267 result ^= result >> 11;
268 result ^= result << 15;
269 result &= (~(uword_t)0) >> (1+N_FIXNUM_TAG_BITS);
270 return result;
273 // This works on vector-like objects, which includes most numerics.
274 static uword_t vector_sxhash(lispobj* object)
276 lispobj header = *object;
277 int widetag = widetag_of(header);
278 sword_t nwords = sizetab[widetag](object);
279 // Mix words. At present this works correctly only for specialized vectors,
280 // general vectors with eq-comparable elements,
281 // and numbers that do not contain pointers.
282 return gpr_murmur_hash3(object+1,
283 (nwords-1) * N_WORD_BYTES, // exclude header word
284 // Ignore vector header data bits except for the widetag
285 widetag >= SIMPLE_ARRAY_WIDETAG
286 ? hopscotch_hmix(widetag)
287 : hopscotch_hmix(header & 0xFFFFFFFF));
290 /* Compare vectors elementwise for EQLness.
291 * This is unlike EQUAL because it works on any specialized vector,
292 * and unlike EQUALP because it compares strings case-sensitively.
293 * Note: this works on most numbers too, not just vectors.
295 static boolean vector_eql(uword_t arg1, uword_t arg2)
297 if (arg1 == arg2) return 1;
298 lispobj* obj1 = (lispobj*)arg1;
299 lispobj* obj2 = (lispobj*)arg2;
300 lispobj header1 = *obj1;
301 if (((header1 ^ *obj2) & WIDETAG_MASK) != 0)
302 return 0; // not eql - different type objects
304 int widetag1 = widetag_of(header1);
305 sword_t nwords = sizetab[widetag1](obj1);
306 if (widetag1 < SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG)
307 // All words must match exactly. Start by comparing the length
308 // (as encoded in the header) since we don't yet know that obj2
309 // occupies the correct number of words.
310 return header1 == *obj2
311 && !memcmp(obj1 + 1, obj2 + 1, (nwords-1) << WORD_SHIFT);
313 // Vector elements must have already been coalesced
314 // when comparing simple-vectors for similarity.
315 return (obj1[1] == obj2[1]) // same length vectors
316 && !memcmp(obj1 + 2, obj2 + 2, (nwords-2) << WORD_SHIFT);
319 /* Initialize 'ht' for first use, which entails zeroing the counters
320 * and allocating storage.
322 void hopscotch_create(tableptr ht, int hashfun,
323 int bytes_per_value, int size, char hop_range)
325 gc_assert((size & (size-1)) == 0); // ensure power-of-2
326 ht->hashfun = hashfun;
327 switch (hashfun) {
328 case HOPSCOTCH_HASH_FUN_DEFAULT:
329 ht->compare = 0; ht->hash = 0; break;
330 case HOPSCOTCH_HASH_FUN_MIX:
331 ht->compare = 0; ht->hash = hopscotch_hmix; break;
332 case HOPSCOTCH_STRING_HASH:
333 ht->compare = vector_eql;
334 ht->hash = (uint32_t(*)(uword_t))sxhash_simple_string;
335 break;
336 case HOPSCOTCH_VECTOR_HASH:
337 ht->compare = vector_eql;
338 ht->hash = (uint32_t(*)(uword_t))vector_sxhash;
339 break;
341 default: lose("Bad hash function");
343 switch (bytes_per_value) {
344 case 0: ht->get_value = 0; break; // no value getter
345 case 1: ht->get_value = get_val1; break;
346 case 2: ht->get_value = get_val2; break;
347 case 4: ht->get_value = get_val4; break;
348 #ifdef LISP_FEATURE_64_BIT
349 case 8: ht->get_value = get_val8; break;
350 #endif
351 default: lose("Bad value size");
353 ht->count = 0;
354 ht->rehashing = 0;
355 ht->resized = 0;
356 // Ensure that the first reset() doesn't do something screwy.
357 ht->prev_size = size;
358 // Clear these even if not collecting statistics,
359 // because it looks ugly if we don't.
360 ht->hit .n_seeks = ht->hit .n_probes = 0;
361 ht->miss.n_seeks = ht->miss.n_probes = 0;
363 ht->value_size = bytes_per_value;
364 ht->keys = 0; // Forces allocation of backing storage.
365 hopscotch_realloc(ht, size, hop_range);
368 /* Delete the storage associated with 'ht' */
369 void hopscotch_destroy(tableptr ht)
371 if (ht->mem_size) { // Free it, zero-filling if ever used.
372 cached_deallocate((char*)ht->keys, ht->count ? ht->mem_size : 0);
373 ht->keys = 0;
374 ht->hops = 0;
375 ht->values = 0;
379 /* Prepare 'ht' for re-use. Same as CLRHASH */
380 void hopscotch_reset(tableptr ht)
382 if (ht->count) {
383 bzero(ht->keys, ht->mem_size);
384 ht->count = 0;
386 // If the size exceeds twice the final size from the prior run,
387 // or is the same size and was not enlarged, then downsize,
388 // but don't go below a certain minimal size.
389 int size = table_size(*ht);
390 #if 0
391 fprintf(stderr, "hh reset: size=%d prev=%d upsized=%d\n",
392 size, ht->prev_size, ht->resized);
393 #endif
394 if (size > (ht->prev_size << 1)
395 || (size == ht->prev_size && !ht->resized && size > 8))
396 // Halve the size for the next GC cycle
397 hopscotch_realloc(ht, size >> 1, 0);
398 ht->prev_size = size;
399 ht->resized = 0;
400 // Possibly reset the hash function to the fast-but-dumb one
401 if (ht->hashfun == HOPSCOTCH_HASH_FUN_DEFAULT)
402 ht->hash = 0;
403 INTEGRITY_CHECK("after reset");
406 /* Double the size of 'ht'. Called when an insertion overflows the table */
407 tableptr hopscotch_resize_up(tableptr ht)
409 int size = ht->mask + 1; // Logical bin count
410 int old_max_index = hopscotch_max_key_index(*ht);
411 struct hopscotch_table copy;
413 #if 0
414 fprintf(stderr, "resize up: ct=%d cap=%d hop=%d LF=%f\n",
415 ht->count, 1+old_max_index, ht->hop_range,
416 (float)ht->count/(1+old_max_index));
417 #endif
418 INTEGRITY_CHECK("before resize");
419 // Copy the keys or key/value pairs.
421 // It's conceivable, however improbable, that there is a hash function
422 // which causes more collisions at the new size than the old size.
423 // Due to the fixed hop range, failure to insert while rehashing
424 // must be caught so that we can try again with a larger size.
425 // But usually this loop will execute exactly once.
426 int i;
427 do {
428 size *= 2;
429 hopscotch_create(&copy, ht->hashfun, ht->value_size, size, 0);
430 // Maybe change the hash function if it's the dumb one
431 if (copy.hop_range > 16 && copy.hash == 0)
432 copy.hash = hopscotch_hmix;
433 copy.rehashing = 1; // Causes put() to return 0 on failure
434 if (ht->values) {
435 for(i=old_max_index ; i >= 0 ; --i)
436 if (ht->keys[i])
437 if (!hopscotch_insert(&copy, ht->keys[i], get_val(ht,i)))
438 break;
439 } else {
440 for(i=old_max_index ; i >= 0 ; --i)
441 if (ht->keys[i])
442 if (!hopscotch_insert(&copy, ht->keys[i], 1)) {
443 #if 0
444 fprintf(stderr, "resize failed with new size %d, hop_range %d\n",
445 size, copy.hop_range);
446 #endif
447 break;
450 } while (i >= 0 && (hopscotch_destroy(&copy), 1));
452 // Zero-fill and release the old storage.
453 cached_deallocate((char*)ht->keys, ht->mem_size);
455 // Move all of the data pointers from 'copy' into ht.
456 // mem_size is passed to bzero() when resetting the table,
457 // so definitely be sure to use the new, not the old.
458 // And of course _don't_ hopscotch_destroy() copy when done.
459 ht->hash = copy.hash;
460 ht->mem_size = copy.mem_size;
461 ht->mask = copy.mask;
462 ht->hop_range = copy.hop_range;
463 ht->threshold = copy.threshold;
464 ht->keys = copy.keys;
465 ht->hops = copy.hops;
466 ht->values = copy.values;
467 ht->resized = 1;
468 INTEGRITY_CHECK("after resize");
469 return ht;
472 void hopscotch_log_stats(tableptr ht, char *tablename)
474 #ifdef HOPSCOTCH_INSTRUMENT
475 static FILE *hh_logfile;
476 if (!hh_logfile)
477 hh_logfile = fopen("hash-stats.txt","a");
478 fprintf(hh_logfile,
479 "[hopscotch]: %s ct=%5d cap=%5d LF=%f seek=%5d+%5d probe/seek=%f+%f (hit+miss)\n",
480 tablename, ht->count,
481 (ht->mask + ht->hop_range),
482 (float)ht->count / (ht->mask + ht->hop_range),
483 ht->hit.n_seeks, ht->miss.n_seeks,
484 ht->hit.n_seeks>0 ? (float)ht->hit.n_probes / ht->hit.n_seeks : 0.0,
485 ht->miss.n_seeks>0 ? (float)ht->miss.n_probes / ht->miss.n_seeks : 0.0);
486 fflush(hh_logfile);
487 ht->hit.n_seeks = ht->hit.n_probes = 0;
488 ht->miss.n_seeks = ht->miss.n_probes = 0;
489 #endif
492 /* Return an integer with 'n' low-order 1 bits.
493 * This does not do the right thing for n = 0, but that's fine!
494 * (Shifting an unsigned 32-bit integer rightward by 32 is not defined.
495 * 32-bit x86 masks the shift amount to 5 bits, so you get 0 shift)
497 static inline unsigned int bitmask_of_width(int n) {
498 return (0xFFFFFFFFU >> (32 - n));
501 #define put_pair(i,k,v) ht->keys[i] = k; if(ht->values) set_val(ht, i, v)
503 /* Add key/val to 'ht'. 'val' is ignored for a hash-set */
504 int hopscotch_insert(tableptr ht, uword_t key, sword_t val)
506 gc_dcheck(key);
507 // 'desired_index' is where 'key' logically belongs, but it
508 // may physically go in any cell to the right up to (range-1) away.
509 int desired_index = hash(ht, key) & ht->mask;
510 if (ht->keys[desired_index] == 0) { // Instant win
511 put_pair(desired_index, key, val);
512 set_hop_bit(ht, desired_index, 0);
513 return ++ht->count; // Allow rehash threshold to be exceeded
515 if (!ht->rehashing && ht->count >= ht->threshold)
516 return hopscotch_insert(hopscotch_resize_up(ht), key, val);
517 // 'limit' is the inclusive bound on cell indices.
518 int limit = hopscotch_max_key_index(*ht);
519 int free_index = desired_index;
520 int displacement;
521 while (ht->keys[++free_index] != 0) // While cell is occupied
522 if (free_index == limit)
523 return ht->rehashing ? 0 : // fail if rehash table is too small
524 hopscotch_insert(hopscotch_resize_up(ht), key, val);
526 // 'free_index' is where *some* item could go,
527 // but it might be too far away for this key.
528 retry:
529 if ((displacement = free_index - desired_index) < ht->hop_range) {
530 put_pair(free_index, key, val);
531 set_hop_bit(ht, desired_index, displacement);
532 return ++ht->count;
534 // Find the empty cell furthest away from and to the left of free_index,
535 // within the hop_range, that contains an item that can be moved.
536 int logical_bin = free_index - (ht->hop_range - 1);
537 // limit is the max index (inclusive) of the available free cells
538 // up to but excluding 'free_index'
539 limit = free_index - 1;
540 // In case free_index currently points to a physical bin "off the end"
541 // of the logical bins, confine to the highest logical bin,
542 // which is just the table mask.
543 if (limit >= (int)ht->mask)
544 limit = ht->mask;
545 // Now 'free_index' is fixed, and 'logical_bin' is what we search
546 // over to find something to displace into the free_index.
547 // Example: v----- free index
548 // | | X | X | O | | [X = filled. O = filled + owned]
549 // ^--- logical bin (displacement = 3)
550 // Look at the low 3 bits of the hop bits for 'logical_bin'.
551 // Those indicate the physical cells "owned" by the logical cell
552 // and within the needed distance to the free cell.
553 // If any are set, the leftmost bit is robbed to make room.
554 // In the above example, bit index 2 (0-based index) would be claimed.
555 for ( ; logical_bin <= limit ; ++logical_bin ) {
556 displacement = free_index - logical_bin;
557 unsigned bits = get_hop_mask(ht, logical_bin);
558 unsigned masked_bits = bits & bitmask_of_width(displacement);
559 if (masked_bits) {
560 int victim = ffs(masked_bits) - 1; // ffs() is 1-based
561 int physical_elt = logical_bin + victim;
562 // Relocate the contents of 'physical_elt' to 'free_index'
563 put_pair(free_index, ht->keys[physical_elt], get_val(ht, physical_elt));
564 put_pair(physical_elt, 0, 0);
565 // This logical bin no longer owns the index where the victim was,
566 // but does own the index where it got moved to.
567 set_hop_mask(ht, logical_bin, bits ^ (1U<<displacement | 1U<<victim));
568 // Now free_index gets smaller, and we try again from the top.
569 free_index = physical_elt;
570 goto retry;
573 // Too many collisions and not enough room to move things around.
574 return ht->rehashing ? 0 : hopscotch_insert(hopscotch_resize_up(ht), key, val);
575 #undef put_pair
578 /// When probing on lookup, while we could use the mask bits in the
579 /// desired logical bin to restrict the number of key comparisons made,
580 /// this turns out to be worse. Though slightly counter-intuitive,
581 /// it is likely due to one fewer conditional branch when we hit the
582 /// first choice physical cell. The probe() macro will decide whether
583 /// to use the mask bits and/or record the number of key comparisons.
584 /// Statistics gathering also slows us down a lot, so only do it when
585 /// making comparative benchmarks, not in real-world use.
586 #ifdef HOPSCOTCH_INSTRUMENT
587 #define probe(mask,i,action) ++probes; if (ht->keys[i] == key) { \
588 ++ht->hit.n_seeks; ht->hit.n_probes += probes; action; }
589 #define tally_miss(table,n) ++table->miss.n_seeks; table->miss.n_probes += n
590 #else
591 #define probe(mask,i,action) if (ht->keys[i] == key) action;
592 #define tally_miss(table,n)
593 #endif
595 /* Test for membership in a hashset. Return 1 or 0. */
596 int hopscotch_containsp(tableptr ht, uword_t key)
598 // index needn't be 'long' but the code generated is better with it.
599 unsigned long index = hash(ht, key) & ht->mask;
600 unsigned bits = get_hop_mask(ht, index);
601 int __attribute__((unused)) probes = 0;
603 if (ht->compare) { // Custom comparator
604 for ( ; bits ; bits >>= 1, ++index )
605 if ((bits & 1) && ht->compare(ht->keys[index], key))
606 return 1;
607 return 0;
609 // *** Use care when modifying this code, and benchmark it thoroughly! ***
610 // TODO: use XMM register to test 2 keys at once if properly aligned.
611 if (bits & 0xff) {
612 probe((1<<0), index+0, return 1);
613 probe((1<<1), index+1, return 1);
614 probe((1<<2), index+2, return 1);
615 probe((1<<3), index+3, return 1);
616 probe((1<<4), index+4, return 1);
617 probe((1<<5), index+5, return 1);
618 probe((1<<6), index+6, return 1);
619 probe((1<<7), index+7, return 1);
621 // There's a trade-off to be made: checking for fewer bits at a time
622 // (such as by "bits & 0x0f") would give finer grain to the set of
623 // physical cells tested, but would mean more iterations.
624 // It seems like 8 bits at a time is a good number, especially if the
625 // hop range is 8, because this general case need never execute.
626 while ((bits >>= 8) != 0) {
627 index += 8;
628 if (bits & 0xff) {
629 probe((1<<0), index+0, return 1);
630 probe((1<<1), index+1, return 1);
631 probe((1<<2), index+2, return 1);
632 probe((1<<3), index+3, return 1);
633 probe((1<<4), index+4, return 1);
634 probe((1<<5), index+5, return 1);
635 probe((1<<6), index+6, return 1);
636 probe((1<<7), index+7, return 1);
639 tally_miss(ht, probes);
640 return 0;
643 /* Return the value associated with 'key', or 'notfound' if not found */
644 sword_t hopscotch_get(tableptr ht, uword_t key, sword_t notfound)
646 gc_dcheck(key);
647 int index = hash(ht, key) & ht->mask;
648 unsigned bits = get_hop_mask(ht, index);
649 int __attribute__((unused)) probes = 0;
650 // This is not as blazingly fast as the hand-unrolled loop
651 // in containsp(), but the GC does not need it, so ...
652 if (ht->compare) // Custom comparator
653 for ( ; bits ; bits >>= 1, ++index ) {
654 if ((bits & 1) && ht->compare(ht->keys[index], key))
655 goto found0;
657 else for ( ; bits ; bits >>= 4, index += 4)
658 if (bits & 0xf) {
659 probe(1, index+0, goto found0);
660 probe(2, index+1, goto found1);
661 probe(4, index+2, goto found2);
662 probe(8, index+3, goto found3);
664 tally_miss(ht, probes);
665 return notfound;
666 found3: ++index;
667 found2: ++index;
668 found1: ++index;
669 found0:
670 return get_val(ht, index);
673 /* Update or insert a key/value pair. Return nonzero if
674 * the key was inserted, or zero if the key existed. */
675 int hopscotch_put(tableptr ht, uword_t key, sword_t val)
677 gc_dcheck(key);
678 int index = hash(ht, key) & ht->mask;
679 unsigned bits = get_hop_mask(ht, index);
680 int __attribute__((unused)) probes = 0;
681 // This is not as blazingly fast as the hand-unrolled loop
682 // in containsp(), but the GC does not need it, so ...
683 if (ht->compare) // Custom comparator
684 for ( ; bits ; bits >>= 1, ++index ) {
685 if ((bits & 1) && ht->compare(ht->keys[index], key))
686 goto found0;
688 else for ( ; bits ; bits >>= 4, index += 4 )
689 if (bits & 0xf) {
690 probe(1, index+0, goto found0);
691 probe(2, index+1, goto found1);
692 probe(4, index+2, goto found2);
693 probe(8, index+3, goto found3);
695 tally_miss(ht, probes);
696 return hopscotch_insert(ht, key, val);
697 found3: ++index;
698 found2: ++index;
699 found1: ++index;
700 found0:
701 set_val(ht, index, val);
702 return 0;
705 #undef probe
707 #if 0
708 #include <stdio.h>
709 int popcount(unsigned x)
711 int count = 0;
712 for ( ; x != 0 ; x >>= 1 )
713 if (x&1) ++count;
714 return count;
717 /* Perform a bunch of sanity checks on 'ht' */
718 void hopscotch_integrity_check(tableptr ht, char*when, int verbose)
720 int n_items = 0, tot_bits_set = 0, i;
721 int size = table_size(*ht);
722 int n_kv_pairs = size + ht->hop_range-1;
723 int fail = 0;
724 FILE * s = stderr;
726 for(i=n_kv_pairs-1 ; i >= 0 ; --i) if (ht->keys[i]) ++n_items;
727 for(i=ht->mask; i>= 0; --i) tot_bits_set += popcount(get_hop_mask(ht,i));
728 if (verbose)
729 fprintf(s, "(%s) Verifying table @ %p. count=%d actual=%d bits=%d\n",
730 when, ht, ht->count, n_items, tot_bits_set);
731 for (i=0;i<n_kv_pairs;++i) {
732 uword_t key = ht->keys[i];
733 int claimed;
734 if (key != 0 || (i<=ht->mask && get_hop_mask(ht,i) != 0)) {
735 // Compute the logical cell that owns this physical cell.
736 int start_index = i - (ht->hop_range-1);
737 if (start_index < 0) start_index = 0;
738 int end_index = i;
739 if (end_index > ht->mask) end_index = ht->mask;
740 claimed = -1;
741 int logical_cell;
742 for (logical_cell = start_index ; logical_cell <= end_index ; ++logical_cell) {
743 unsigned hop_bits = get_hop_mask(ht, logical_cell);
744 if (hop_bits & (1<<(i - logical_cell))) {
745 if (claimed == -1)
746 claimed = logical_cell;
747 else {
748 fprintf(stderr,
749 "physical cell %d duplicately claimed: %d and %d",
750 i, claimed, logical_cell);
751 fail = 1;
755 if (verbose) {
756 if (claimed==i || (claimed==-1 && !key))
757 fprintf(s, " ");
758 else if (claimed!=-1) {
759 fprintf(s, "[%6d]", claimed);
760 if ((int)(ht->mask & hash(ht, key)) != claimed)
761 lose("key hashes to wrong logical cell?");
762 } else { // should have been claimed
763 fprintf(s, " **** ");
764 fail = 1;
766 fprintf(s, " %6d: %04x", i, i <= ht->mask ? get_hop_mask(ht,i) : 0);
767 if (key) {
768 fprintf(s, " %lx -> %d", key, (int)(ht->mask & hash(ht, key)));
769 if (ht->values)
770 fprintf(s, " {val=%lx}", hopscotch_get(ht, key, -1));
772 putc('\n', s);
776 if (ht->count != n_items || tot_bits_set != n_items || fail)
777 lose("integrity check on hashtable %p failed", ht);
778 fflush(s);
780 #endif