2 * This software is part of the SBCL system. See the README file for
5 * This software is derived from the CMU CL system, which was
6 * written at Carnegie Mellon University and released into the
7 * public domain. The software is in the public domain and is
8 * provided with absolutely no warranty. See the COPYING and CREDITS
9 * files for more information.
13 * Our implementation of the hopscotch algorithm described in
14 * http://people.csail.mit.edu/shanir/publications/disc2008_submission_98.pdf
15 * which is extremely simple variation on linear probing
16 * that provides a guaranteed bound on number of probes.
20 #include "gc-internal.h" // for os_validate()
21 #include "hopscotch.h"
23 #ifdef HOPSCOTCH_INSTRUMENT
27 typedef struct hopscotch_table
* tableptr
;
28 void hopscotch_integrity_check(tableptr
,char*,int);
30 #define table_size(table) (1+(table).mask)
31 #define INTEGRITY_CHECK(when) {}
33 /// By default, XOR values based on the page number and the
34 /// relative index into the page, disregarding lowtag bits.
35 /// If a specific function has been set, then use that.
36 static inline uint32_t hash(tableptr ht
, lispobj x
) {
37 return ht
->hash
? ht
->hash(x
) :
38 (x
>> GENCGC_CARD_SHIFT
) ^ (x
>> (1+WORD_SHIFT
));
41 /// From https://github.com/google/cityhash/blob/master/src/city.cc
42 /// which attributes it in turn to
43 /// https://github.com/PeterScott/murmur3/blob/master/murmur3.c
44 /// The right shift by N_LOWTAG_BITS is specific to SBCL.
45 uint32_t hopscotch_hmix(uword_t key
)
47 uint32_t h
= key
>> N_LOWTAG_BITS
;
56 /// Set a single bit in the hop mask for logical cell at 'index'
57 static inline void set_hop_bit(tableptr ht
, unsigned index
, int bit
)
59 unsigned mask
= 1<<bit
;
60 ht
->hops
[index
] |= mask
;
62 /// Set all the bits in the hop mask for logical cell at 'index'
63 static inline void set_hop_mask(tableptr ht
, unsigned index
, unsigned bits
)
65 ht
->hops
[index
] = bits
;
67 static inline unsigned get_hop_mask(tableptr ht
, unsigned index
)
69 return ht
->hops
[index
];
71 static void set_val(tableptr ht
, int index
, sword_t val
)
73 switch(ht
->value_size
) {
74 #ifdef LISP_FEATURE_64_BIT
75 case 8: ht
->values
[index
] = val
; break;
77 case 4: ((int32_t*)ht
->values
)[index
] = val
; break;
78 case 2: ((int16_t*)ht
->values
)[index
] = val
; break;
79 case 1: ((int8_t* )ht
->values
)[index
] = val
; break;
82 static sword_t
get_val(tableptr ht
, int index
)
84 switch(ht
->value_size
) {
85 #ifdef LISP_FEATURE_64_BIT
86 case 8: return ht
->values
[index
];
88 case 4: return ((int32_t*)ht
->values
)[index
];
89 case 2: return ((int16_t*)ht
->values
)[index
];
90 case 1: return ((int8_t *)ht
->values
)[index
];
94 #ifdef LISP_FEATURE_64_BIT
95 static sword_t
get_val8(tableptr ht
, int index
) { return ht
->values
[index
]; }
97 static sword_t
get_val4(tableptr ht
, int index
) { return ((int32_t*)ht
->values
)[index
]; }
98 static sword_t
get_val2(tableptr ht
, int index
) { return ((int16_t*)ht
->values
)[index
]; }
99 static sword_t
get_val1(tableptr ht
, int index
) { return ((int8_t *)ht
->values
)[index
]; }
101 /// Hopscotch storage allocation granularity.
102 /// Our usual value of "page size" is the GC page size, which is
103 /// coarser than necessary (cf {target}/backend-parms.lisp).
104 static int hh_allocation_granularity
= 4096;
105 #define ALLOCATION_OVERHEAD (2*sizeof(unsigned int))
106 /// Return the number of usable bytes (excluding the header) in an allocation
107 #define usable_size(x) ((unsigned int*)x)[-1]
109 /// Sizing up a table can't be done in-place, so reserve a few blocks
110 /// of memory for when resize has to happen during GC. We don't return
111 /// these blocks to the OS. If even more is required, it will be allocated
112 /// as needed, but we'll only keep on reserve at most two blocks.
113 #define N_CACHED_ALLOCS 2
114 char* cached_alloc
[N_CACHED_ALLOCS
];
115 void hopscotch_init() // Called once on runtime startup, from gc_init().
117 // Get 16KB from the OS and evenly divide it into two pieces.
118 int n_bytes_per_slice
= 8 * 1024;
119 int n_bytes_total
= N_CACHED_ALLOCS
* n_bytes_per_slice
;
120 char* mem
= (char*)os_validate(0, n_bytes_total
);
122 cached_alloc
[0] = mem
+ ALLOCATION_OVERHEAD
;
123 cached_alloc
[1] = cached_alloc
[0] + n_bytes_per_slice
;
124 // Write the user-visible size of each allocation into the block header
125 usable_size(cached_alloc
[0]) = n_bytes_per_slice
- ALLOCATION_OVERHEAD
;
126 usable_size(cached_alloc
[1]) = n_bytes_per_slice
- ALLOCATION_OVERHEAD
;
129 /* Return the address of at least 'nbytes' of storage.
130 * This is not a general-purpose thing - it's only intended to keep
131 * one or perhaps two hopscotch hash tables around during GC,
132 * for pinned objects, and maybe something else.
133 * As such, no attempt is made to minimize storage use,
134 * and if used more generally, would badly suffer from fragmentation.
136 static char* cached_allocate(os_vm_size_t nbytes
)
138 // See if either cached allocation is large enough.
139 if (cached_alloc
[0] && usable_size(cached_alloc
[0]) >= nbytes
) {
140 // Yup, just give the consumer the whole thing.
141 char* result
= cached_alloc
[0];
142 cached_alloc
[0] = 0; // Remove from the pool
145 if (cached_alloc
[1] && usable_size(cached_alloc
[1]) >= nbytes
) { // Ditto.
146 char* result
= cached_alloc
[1];
150 // Request more memory, not using malloc().
151 // Round up, since the OS will give more than asked if the request is
152 // not a multiple of the mmap granularity, which we'll assume is 4K.
153 // (It doesn't actually matter.)
154 nbytes
= CEILING(nbytes
, hh_allocation_granularity
);
155 char* result
= os_validate(0, nbytes
);
157 result
+= ALLOCATION_OVERHEAD
;
158 usable_size(result
) = nbytes
- ALLOCATION_OVERHEAD
;
162 /* Return 'mem' to the cache, first zero-filling to the specified length.
163 * Though the memory size is recorded in the header of the memory block,
164 * the allocator doesn't know how many bytes were touched by the requestor,
165 * which is why the length is specified again.
166 * If returning it to the OS and not the cache, then don't bother 0-filling.
168 static void cached_deallocate(char* mem
, int zero_fill_length
)
171 if (!cached_alloc
[0]) {
172 } else if (!cached_alloc
[1])
175 // Try to retain whichever 2 blocks are largest (the given one and
176 // cached ones) in the hope of fulfilling future requests from cache.
177 int this_size
= usable_size(mem
);
178 int cached_size0
= usable_size(cached_alloc
[0]);
179 int cached_size1
= usable_size(cached_alloc
[1]);
180 if (!(this_size
> cached_size0
|| this_size
> cached_size1
)) {
181 // mem is not strictly larger than either cached block. Release it.
182 os_deallocate(mem
- ALLOCATION_OVERHEAD
,
183 usable_size(mem
) + ALLOCATION_OVERHEAD
);
186 // Evict and replace the smaller of the two cache entries.
187 if (cached_size1
< cached_size0
)
189 os_deallocate(cached_alloc
[line
] - ALLOCATION_OVERHEAD
,
190 usable_size(cached_alloc
[line
]) + ALLOCATION_OVERHEAD
);
192 bzero(mem
, zero_fill_length
);
193 cached_alloc
[line
] = mem
;
196 /* Initialize 'ht' for 'size' logical bins with a max hop of 'hop_range'.
197 * 'valuesp' makes a hash-map if true; a hash-set if false.
198 * Hop range will be selected automatically if specified as 0.
200 static void hopscotch_realloc(tableptr ht
, int size
, char hop_range
)
202 // Somewhat arbitrary criteria that improve the worst-case probes for
203 // small hashtables. The reference algorithm uses a fixed max hop of 32,
204 // but fewer is better, and our pinned object table tends to be very small.
205 if (hop_range
== 0) { // Let us pick.
206 // The arbitrary cutoff of 1023 is based on the observed final size
207 // of 2063 (=2048+15) with commonly fewer than 80 items.
208 // Collisions must have been so frequent that the only way out
209 // was to bump the table size and rehash. We may as well allow more
210 // probes at smaller sizes for the sake of improved table density.
211 if (size
<= 1023) hop_range
= 8;
212 else if (size
<= 16384) hop_range
= 16;
216 // The key/value arrays are *not* circular.
217 // The last few logical cells in the key array can use physical cells
218 // at indices greater than 'size'; there's no wrapping back to index 0.
219 int n_keys
= size
+ (hop_range
- 1);
220 unsigned storage_size
= (sizeof (uword_t
) + ht
->value_size
) * n_keys
221 + sizeof (int) * size
; // hop bitmasks
224 gc_assert(usable_size(ht
->keys
) >= storage_size
);
226 ht
->keys
= (uword_t
*)cached_allocate(storage_size
);
228 ht
->mem_size
= storage_size
;
230 ht
->hop_range
= hop_range
;
231 ht
->threshold
= n_keys
* 13 / 16; // target load ~= 81.25%
232 ht
->hops
= (unsigned*)(ht
->keys
+ n_keys
);
233 // Values will be properly aligned no matter what,
234 // because the largest alignment we'd need is 8 bytes,
235 // which is twice the alignment of a hop entry,
236 // and 'size' is an even number.
237 ht
->values
= ht
->value_size
? (sword_t
*)(ht
->hops
+ size
) : 0;
240 /* Initialize 'ht' for first use, which entails zeroing the counters
241 * and allocating storage.
243 void hopscotch_create(tableptr ht
, int hashfun
,
244 int bytes_per_value
, int size
, char hop_range
)
246 gc_assert((size
& (size
-1)) == 0); // ensure power-of-2
247 ht
->hashfun
= hashfun
;
249 case HOPSCOTCH_HASH_FUN_DEFAULT
:
251 case HOPSCOTCH_HASH_FUN_MIX
:
252 ht
->hash
= hopscotch_hmix
; break;
253 default: lose("Bad hash function");
255 switch (bytes_per_value
) {
256 case 0: ht
->get_value
= 0; break; // no value getter
257 case 1: ht
->get_value
= get_val1
; break;
258 case 2: ht
->get_value
= get_val2
; break;
259 case 4: ht
->get_value
= get_val4
; break;
260 #ifdef LISP_FEATURE_64_BIT
261 case 8: ht
->get_value
= get_val8
; break;
263 default: lose("Bad value size");
268 // Ensure that the first reset() doesn't do something screwy.
269 ht
->prev_size
= size
;
270 // Clear these even if not collecting statistics,
271 // because it looks ugly if we don't.
272 ht
->hit
.n_seeks
= ht
->hit
.n_probes
= 0;
273 ht
->miss
.n_seeks
= ht
->miss
.n_probes
= 0;
275 ht
->value_size
= bytes_per_value
;
276 ht
->keys
= 0; // Forces allocation of backing storage.
277 hopscotch_realloc(ht
, size
, hop_range
);
280 /* Delete the storage associated with 'ht' */
281 void hopscotch_delete(tableptr ht
)
283 if (ht
->mem_size
) { // Free it, zero-filling if ever used.
284 cached_deallocate((char*)ht
->keys
, ht
->count
? ht
->mem_size
: 0);
291 /* Prepare 'ht' for re-use. Same as CLRHASH */
292 void hopscotch_reset(tableptr ht
)
295 bzero(ht
->keys
, ht
->mem_size
);
298 // If the size exceeds twice the final size from the prior run,
299 // or is the same size and was not enlarged, then downsize,
300 // but don't go below a certain minimal size.
301 int size
= table_size(*ht
);
303 fprintf(stderr
, "hh reset: size=%d prev=%d upsized=%d\n",
304 size
, ht
->prev_size
, ht
->resized
);
306 if (size
> (ht
->prev_size
<< 1)
307 || (size
== ht
->prev_size
&& !ht
->resized
&& size
> 8))
308 // Halve the size for the next GC cycle
309 hopscotch_realloc(ht
, size
>> 1, 0);
310 ht
->prev_size
= size
;
312 // Possibly reset the hash function to the fast-but-dumb one
313 if (ht
->hashfun
== HOPSCOTCH_HASH_FUN_DEFAULT
)
315 INTEGRITY_CHECK("after reset");
318 /* Double the size of 'ht'. Called when an insertion overflows the table */
319 tableptr
hopscotch_resize_up(tableptr ht
)
321 int size
= ht
->mask
+ 1; // Logical bin count
322 int old_max_index
= hopscotch_max_key_index(*ht
);
323 struct hopscotch_table copy
;
326 fprintf(stderr
, "resize up: ct=%d cap=%d hop=%d LF=%f\n",
327 ht
->count
, 1+old_max_index
, ht
->hop_range
,
328 (float)ht
->count
/(1+old_max_index
));
330 INTEGRITY_CHECK("before resize");
331 // Copy the keys or key/value pairs.
333 // It's conceivable, however improbable, that there is a hash function
334 // which causes more collisions at the new size than the old size.
335 // Due to the fixed hop range, failure to insert while rehashing
336 // must be caught so that we can try again with a larger size.
337 // But usually this loop will execute exactly once.
341 hopscotch_create(©
, ht
->hashfun
, ht
->value_size
, size
, 0);
342 // Maybe change the hash function if it's the dumb one
343 if (ht
->hop_range
> 16 && ht
->hash
== 0)
344 ht
->hash
= hopscotch_hmix
;
345 copy
.hash
= ht
->hash
; // in case DEFAULT was upgraded to MIX
346 copy
.rehashing
= 1; // Causes put() to return 0 on failure
348 for(i
=old_max_index
; i
>= 0 ; --i
)
350 if (!hopscotch_insert(©
, ht
->keys
[i
], get_val(ht
,i
)))
353 for(i
=old_max_index
; i
>= 0 ; --i
)
355 if (!hopscotch_insert(©
, ht
->keys
[i
], 1)) {
357 fprintf(stderr
, "resize failed with new size %d, hop_range %d\n",
358 size
, copy
.hop_range
);
363 } while (i
>= 0 && (hopscotch_delete(©
), 1));
365 // Zero-fill and release the old storage.
366 cached_deallocate((char*)ht
->keys
, ht
->mem_size
);
368 // Move all of the data pointers from 'copy' into ht.
369 // mem_size is passed to bzero() when resetting the table,
370 // so definitely be sure to use the new, not the old.
371 // And of course _don't_ hopscotch_delete() copy when done.
372 ht
->hash
= copy
.hash
;
373 ht
->mem_size
= copy
.mem_size
;
374 ht
->mask
= copy
.mask
;
375 ht
->hop_range
= copy
.hop_range
;
376 ht
->threshold
= copy
.threshold
;
377 ht
->keys
= copy
.keys
;
378 ht
->hops
= copy
.hops
;
379 ht
->values
= copy
.values
;
381 INTEGRITY_CHECK("after resize");
385 void hopscotch_log_stats(tableptr ht
, char *tablename
)
387 #ifdef HOPSCOTCH_INSTRUMENT
388 static FILE *hh_logfile
;
390 hh_logfile
= fopen("hash-stats.txt","a");
392 "[hopscotch]: %s ct=%5d cap=%5d LF=%f seek=%5d+%5d probe/seek=%f+%f (hit+miss)\n",
393 tablename
, ht
->count
,
394 (ht
->mask
+ ht
->hop_range
),
395 (float)ht
->count
/ (ht
->mask
+ ht
->hop_range
),
396 ht
->hit
.n_seeks
, ht
->miss
.n_seeks
,
397 ht
->hit
.n_seeks
>0 ? (float)ht
->hit
.n_probes
/ ht
->hit
.n_seeks
: 0.0,
398 ht
->miss
.n_seeks
>0 ? (float)ht
->miss
.n_probes
/ ht
->miss
.n_seeks
: 0.0);
400 ht
->hit
.n_seeks
= ht
->hit
.n_probes
= 0;
401 ht
->miss
.n_seeks
= ht
->miss
.n_probes
= 0;
405 /* Return an integer with 'n' low-order 1 bits.
406 * This does not do the right thing for n = 0, but that's fine!
407 * (Shifting an unsigned 32-bit integer rightward by 32 is not defined.
408 * 32-bit x86 masks the shift amount to 5 bits, so you get 0 shift)
410 static inline unsigned int bitmask_of_width(int n
) {
411 return (0xFFFFFFFFU
>> (32 - n
));
414 #define put_pair(i,k,v) ht->keys[i] = k; if(ht->values) set_val(ht, i, v)
416 /* Add key/val to 'ht'. 'val' is ignored for a hash-set */
417 int hopscotch_insert(tableptr ht
, uword_t key
, sword_t val
)
419 // 'desired_index' is where 'key' logically belongs, but it
420 // may physically go in any cell to the right up to (range-1) away.
421 int desired_index
= hash(ht
, key
) & ht
->mask
;
422 if (ht
->keys
[desired_index
] == 0) { // Instant win
423 put_pair(desired_index
, key
, val
);
424 set_hop_bit(ht
, desired_index
, 0);
425 return ++ht
->count
; // Allow rehash threshold to be exceeded
427 if (!ht
->rehashing
&& ht
->count
>= ht
->threshold
)
428 return hopscotch_insert(hopscotch_resize_up(ht
), key
, val
);
429 // 'limit' is the inclusive bound on cell indices.
430 int limit
= hopscotch_max_key_index(*ht
);
431 int free_index
= desired_index
;
433 while (ht
->keys
[++free_index
] != 0) // While cell is occupied
434 if (free_index
== limit
)
435 return ht
->rehashing
? 0 : // fail if rehash table is too small
436 hopscotch_insert(hopscotch_resize_up(ht
), key
, val
);
438 // 'free_index' is where *some* item could go,
439 // but it might be too far away for this key.
441 if ((displacement
= free_index
- desired_index
) < ht
->hop_range
) {
442 put_pair(free_index
, key
, val
);
443 set_hop_bit(ht
, desired_index
, displacement
);
446 // Find the empty cell furthest away from and to the left of free_index,
447 // within the hop_range, that contains an item that can be moved.
448 int logical_bin
= free_index
- (ht
->hop_range
- 1);
449 // limit is the max index (inclusive) of the available free cells
450 // up to but excluding 'free_index'
451 limit
= free_index
- 1;
452 // In case free_index currently points to a physical bin "off the end"
453 // of the logical bins, confine to the highest logical bin,
454 // which is just the table mask.
455 if (limit
>= (int)ht
->mask
)
457 // Now 'free_index' is fixed, and 'logical_bin' is what we search
458 // over to find something to displace into the free_index.
459 // Example: v----- free index
460 // | | X | X | O | | [X = filled. O = filled + owned]
461 // ^--- logical bin (displacement = 3)
462 // Look at the low 3 bits of the hop bits for 'logical_bin'.
463 // Those indicate the physical cells "owned" by the logical cell
464 // and within the needed distance to the free cell.
465 // If any are set, the leftmost bit is robbed to make room.
466 // In the above example, bit index 2 (0-based index) would be claimed.
467 for ( ; logical_bin
<= limit
; ++logical_bin
) {
468 displacement
= free_index
- logical_bin
;
469 unsigned bits
= get_hop_mask(ht
, logical_bin
);
470 unsigned masked_bits
= bits
& bitmask_of_width(displacement
);
472 int victim
= ffs(masked_bits
) - 1; // ffs() is 1-based
473 int physical_elt
= logical_bin
+ victim
;
474 // Relocate the contents of 'physical_elt' to 'free_index'
475 put_pair(free_index
, ht
->keys
[physical_elt
], get_val(ht
, physical_elt
));
476 put_pair(physical_elt
, 0, 0);
477 // This logical bin no longer owns the index where the victim was,
478 // but does own the index where it got moved to.
479 set_hop_mask(ht
, logical_bin
, bits
^ (1<<displacement
| 1<<victim
));
480 // Now free_index gets smaller, and we try again from the top.
481 free_index
= physical_elt
;
485 // Too many collisions and not enough room to move things around.
486 return ht
->rehashing
? 0 : hopscotch_insert(hopscotch_resize_up(ht
), key
, val
);
490 /// When probing on lookup, while we could use the mask bits in the
491 /// desired logical bin to restrict the number of key comparisons made,
492 /// this turns out to be worse. Though slightly counter-intuitive,
493 /// it is likely due to one fewer conditional branch when we hit the
494 /// first choice physical cell. The probe() macro will decide whether
495 /// to use the mask bits and/or record the number of key comparisons.
496 /// Statistics gathering also slows us down a lot, so only do it when
497 /// making comparative benchmarks, not in real-world use.
498 #ifdef HOPSCOTCH_INSTRUMENT
499 #define probe(mask,i,action) ++probes; if (ht->keys[i] == key) { \
500 ++ht->hit.n_seeks; ht->hit.n_probes += probes; action; }
501 #define tally_miss(table,n) ++table->miss.n_seeks; table->miss.n_probes += n
503 #define probe(mask,i,action) if (ht->keys[i] == key) action;
504 #define tally_miss(table,n)
507 /* Test for membership in a hashset. Return 1 or 0. */
508 int hopscotch_containsp(tableptr ht
, uword_t key
)
510 // index needn't be 'long' but the code generated is better with it.
511 unsigned long index
= hash(ht
, key
) & ht
->mask
;
512 unsigned bits
= get_hop_mask(ht
, index
);
513 int __attribute__((unused
)) probes
= 0;
514 // *** Use care when modifying this code, and benchmark it thoroughly! ***
515 // TODO: use XMM register to test 2 keys at once if properly aligned.
517 probe((1<<0), index
+0, return 1);
518 probe((1<<1), index
+1, return 1);
519 probe((1<<2), index
+2, return 1);
520 probe((1<<3), index
+3, return 1);
521 probe((1<<4), index
+4, return 1);
522 probe((1<<5), index
+5, return 1);
523 probe((1<<6), index
+6, return 1);
524 probe((1<<7), index
+7, return 1);
526 // There's a trade-off to be made: checking for fewer bits at a time
527 // (such as by "bits & 0x0f") would give finer grain to the set of
528 // physical cells tested, but would mean more iterations.
529 // It seems like 8 bits at a time is a good number, especially if the
530 // hop range is 8, because this general case need never execute.
531 while ((bits
>>= 8) != 0) {
534 probe((1<<0), index
+0, return 1);
535 probe((1<<1), index
+1, return 1);
536 probe((1<<2), index
+2, return 1);
537 probe((1<<3), index
+3, return 1);
538 probe((1<<4), index
+4, return 1);
539 probe((1<<5), index
+5, return 1);
540 probe((1<<6), index
+6, return 1);
541 probe((1<<7), index
+7, return 1);
544 tally_miss(ht
, probes
);
548 /* Return the value associated with 'key', or 'notfound' if not found */
549 sword_t
hopscotch_get(tableptr ht
, uword_t key
, sword_t notfound
)
551 int index
= hash(ht
, key
) & ht
->mask
;
552 unsigned bits
= get_hop_mask(ht
, index
);
553 int __attribute__((unused
)) probes
= 0;
554 // This is not as blazingly fast as the hand-unrolled loop
555 // in containsp(), but the GC does not need it, so ...
558 probe(1, index
+0, goto found0
);
559 probe(2, index
+1, goto found1
);
560 probe(4, index
+2, goto found2
);
561 probe(8, index
+3, goto found3
);
566 tally_miss(ht
, probes
);
572 return get_val(ht
, index
);
575 /* Update or insert a key/value pair. Return nonzero if
576 * the key was inserted, or zero if the key existed. */
577 int hopscotch_put(tableptr ht
, uword_t key
, sword_t val
)
579 int index
= hash(ht
, key
) & ht
->mask
;
580 unsigned bits
= get_hop_mask(ht
, index
);
581 int __attribute__((unused
)) probes
= 0;
582 // This is not as blazingly fast as the hand-unrolled loop
583 // in containsp(), but the GC does not need it, so ...
586 probe(1, index
+0, goto found0
);
587 probe(2, index
+1, goto found1
);
588 probe(4, index
+2, goto found2
);
589 probe(8, index
+3, goto found3
);
594 tally_miss(ht
, probes
);
595 return hopscotch_insert(ht
, key
, val
);
600 set_val(ht
, index
, val
);
608 int popcount(unsigned x
)
611 for ( ; x
!= 0 ; x
>>= 1 )
616 /* Perform a bunch of sanity checks on 'ht' */
617 void hopscotch_integrity_check(tableptr ht
, char*when
, int verbose
)
619 int n_items
= 0, tot_bits_set
= 0, i
;
620 int size
= table_size(*ht
);
621 int n_kv_pairs
= size
+ ht
->hop_range
-1;
625 for(i
=n_kv_pairs
-1 ; i
>= 0 ; --i
) if (ht
->keys
[i
]) ++n_items
;
626 for(i
=ht
->mask
; i
>= 0; --i
) tot_bits_set
+= popcount(get_hop_mask(ht
,i
));
628 fprintf(s
, "(%s) Verifying table @ %p. count=%d actual=%d bits=%d\n",
629 when
, ht
, ht
->count
, n_items
, tot_bits_set
);
630 for (i
=0;i
<n_kv_pairs
;++i
) {
631 uword_t key
= ht
->keys
[i
];
633 if (key
!= 0 || (i
<=ht
->mask
&& get_hop_mask(ht
,i
) != 0)) {
634 // Compute the logical cell that owns this physical cell.
635 int start_index
= i
- (ht
->hop_range
-1);
636 if (start_index
< 0) start_index
= 0;
638 if (end_index
> ht
->mask
) end_index
= ht
->mask
;
641 for (logical_cell
= start_index
; logical_cell
<= end_index
; ++logical_cell
) {
642 unsigned hop_bits
= get_hop_mask(ht
, logical_cell
);
643 if (hop_bits
& (1<<(i
- logical_cell
))) {
645 claimed
= logical_cell
;
648 "physical cell %d duplicately claimed: %d and %d",
649 i
, claimed
, logical_cell
);
655 if (claimed
==i
|| (claimed
==-1 && !key
))
657 else if (claimed
!=-1) {
658 fprintf(s
, "[%4d]", claimed
);
659 if ((int)(ht
->mask
& hash(ht
, key
)) != claimed
)
660 lose("key hashes to wrong logical cell?");
661 } else { // should have been claimed
662 fprintf(s
, " **** ");
665 fprintf(s
, " %4d: %04x", i
, i
<= ht
->mask
? get_hop_mask(ht
,i
) : 0);
667 fprintf(s
, " %lx -> %d", key
, (int)(ht
->mask
& hash(ht
, key
)));
669 fprintf(s
, " {val=%lx}", hopscotch_get(ht
, key
, -1));
675 if (ht
->count
!= n_items
|| tot_bits_set
!= n_items
|| fail
)
676 lose("integrity check on hashtable %p failed", ht
);