* 2022-01-18 [ci skip]
[ruby-80x24.org.git] / st.c
blob53e9dc832029b4db80505a25c2206f0bedb7eae5
1 /* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
7 /* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
15 The hash table has two arrays called *bins* and *entries*.
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
32 o The entry array contains table entries in the same order as they
33 were inserted.
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
103 #ifdef NOT_RUBY
104 #include "regint.h"
105 #include "st.h"
106 #else
107 #include "internal.h"
108 #include "internal/bits.h"
109 #include "internal/hash.h"
110 #include "internal/sanitizers.h"
111 #endif
113 #include <stdio.h>
114 #ifdef HAVE_STDLIB_H
115 #include <stdlib.h>
116 #endif
117 #include <string.h>
118 #include <assert.h>
120 #ifdef __GNUC__
121 #define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
122 #define EXPECT(expr, val) __builtin_expect(expr, val)
123 #define ATTRIBUTE_UNUSED __attribute__((unused))
124 #else
125 #define PREFETCH(addr, write_p)
126 #define EXPECT(expr, val) (expr)
127 #define ATTRIBUTE_UNUSED
128 #endif
130 /* The type of hashes. */
131 typedef st_index_t st_hash_t;
133 struct st_table_entry {
134 st_hash_t hash;
135 st_data_t key;
136 st_data_t record;
139 #define type_numhash st_hashtype_num
140 static const struct st_hash_type st_hashtype_num = {
141 st_numcmp,
142 st_numhash,
145 static int st_strcmp(st_data_t, st_data_t);
146 static st_index_t strhash(st_data_t);
147 static const struct st_hash_type type_strhash = {
148 st_strcmp,
149 strhash,
152 static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
153 static st_index_t strcasehash(st_data_t);
154 static const struct st_hash_type type_strcasehash = {
155 st_locale_insensitive_strcasecmp_i,
156 strcasehash,
159 /* Value used to catch uninitialized entries/bins during debugging.
160 There is a possibility for a false alarm, but its probability is
161 extremely small. */
162 #define ST_INIT_VAL 0xafafafafafafafaf
163 #define ST_INIT_VAL_BYTE 0xafa
165 #ifdef RUBY
166 #undef malloc
167 #undef realloc
168 #undef calloc
169 #undef free
170 #define malloc ruby_xmalloc
171 #define calloc ruby_xcalloc
172 #define realloc ruby_xrealloc
173 #define free ruby_xfree
174 #endif
176 #define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
177 #define PTR_EQUAL(tab, ptr, hash_val, key_) \
178 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
180 /* As PRT_EQUAL only its result is returned in RES. REBUILT_P is set
181 up to TRUE if the table is rebuilt during the comparison. */
182 #define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
183 do { \
184 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
185 res = PTR_EQUAL(tab, ptr, hash_val, key); \
186 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
187 } while (FALSE)
189 /* Features of a table. */
190 struct st_features {
191 /* Power of 2 used for number of allocated entries. */
192 unsigned char entry_power;
193 /* Power of 2 used for number of allocated bins. Depending on the
194 table size, the number of bins is 2-4 times more than the
195 number of entries. */
196 unsigned char bin_power;
197 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
198 unsigned char size_ind;
199 /* Bins are packed in words of type st_index_t. The following is
200 a size of bins counted by words. */
201 st_index_t bins_words;
204 /* Features of all possible size tables. */
205 #if SIZEOF_ST_INDEX_T == 8
206 #define MAX_POWER2 62
207 static const struct st_features features[] = {
208 {0, 1, 0, 0x0},
209 {1, 2, 0, 0x1},
210 {2, 3, 0, 0x1},
211 {3, 4, 0, 0x2},
212 {4, 5, 0, 0x4},
213 {5, 6, 0, 0x8},
214 {6, 7, 0, 0x10},
215 {7, 8, 0, 0x20},
216 {8, 9, 1, 0x80},
217 {9, 10, 1, 0x100},
218 {10, 11, 1, 0x200},
219 {11, 12, 1, 0x400},
220 {12, 13, 1, 0x800},
221 {13, 14, 1, 0x1000},
222 {14, 15, 1, 0x2000},
223 {15, 16, 1, 0x4000},
224 {16, 17, 2, 0x10000},
225 {17, 18, 2, 0x20000},
226 {18, 19, 2, 0x40000},
227 {19, 20, 2, 0x80000},
228 {20, 21, 2, 0x100000},
229 {21, 22, 2, 0x200000},
230 {22, 23, 2, 0x400000},
231 {23, 24, 2, 0x800000},
232 {24, 25, 2, 0x1000000},
233 {25, 26, 2, 0x2000000},
234 {26, 27, 2, 0x4000000},
235 {27, 28, 2, 0x8000000},
236 {28, 29, 2, 0x10000000},
237 {29, 30, 2, 0x20000000},
238 {30, 31, 2, 0x40000000},
239 {31, 32, 2, 0x80000000},
240 {32, 33, 3, 0x200000000},
241 {33, 34, 3, 0x400000000},
242 {34, 35, 3, 0x800000000},
243 {35, 36, 3, 0x1000000000},
244 {36, 37, 3, 0x2000000000},
245 {37, 38, 3, 0x4000000000},
246 {38, 39, 3, 0x8000000000},
247 {39, 40, 3, 0x10000000000},
248 {40, 41, 3, 0x20000000000},
249 {41, 42, 3, 0x40000000000},
250 {42, 43, 3, 0x80000000000},
251 {43, 44, 3, 0x100000000000},
252 {44, 45, 3, 0x200000000000},
253 {45, 46, 3, 0x400000000000},
254 {46, 47, 3, 0x800000000000},
255 {47, 48, 3, 0x1000000000000},
256 {48, 49, 3, 0x2000000000000},
257 {49, 50, 3, 0x4000000000000},
258 {50, 51, 3, 0x8000000000000},
259 {51, 52, 3, 0x10000000000000},
260 {52, 53, 3, 0x20000000000000},
261 {53, 54, 3, 0x40000000000000},
262 {54, 55, 3, 0x80000000000000},
263 {55, 56, 3, 0x100000000000000},
264 {56, 57, 3, 0x200000000000000},
265 {57, 58, 3, 0x400000000000000},
266 {58, 59, 3, 0x800000000000000},
267 {59, 60, 3, 0x1000000000000000},
268 {60, 61, 3, 0x2000000000000000},
269 {61, 62, 3, 0x4000000000000000},
270 {62, 63, 3, 0x8000000000000000},
273 #else
274 #define MAX_POWER2 30
276 static const struct st_features features[] = {
277 {0, 1, 0, 0x1},
278 {1, 2, 0, 0x1},
279 {2, 3, 0, 0x2},
280 {3, 4, 0, 0x4},
281 {4, 5, 0, 0x8},
282 {5, 6, 0, 0x10},
283 {6, 7, 0, 0x20},
284 {7, 8, 0, 0x40},
285 {8, 9, 1, 0x100},
286 {9, 10, 1, 0x200},
287 {10, 11, 1, 0x400},
288 {11, 12, 1, 0x800},
289 {12, 13, 1, 0x1000},
290 {13, 14, 1, 0x2000},
291 {14, 15, 1, 0x4000},
292 {15, 16, 1, 0x8000},
293 {16, 17, 2, 0x20000},
294 {17, 18, 2, 0x40000},
295 {18, 19, 2, 0x80000},
296 {19, 20, 2, 0x100000},
297 {20, 21, 2, 0x200000},
298 {21, 22, 2, 0x400000},
299 {22, 23, 2, 0x800000},
300 {23, 24, 2, 0x1000000},
301 {24, 25, 2, 0x2000000},
302 {25, 26, 2, 0x4000000},
303 {26, 27, 2, 0x8000000},
304 {27, 28, 2, 0x10000000},
305 {28, 29, 2, 0x20000000},
306 {29, 30, 2, 0x40000000},
307 {30, 31, 2, 0x80000000},
310 #endif
312 /* The reserved hash value and its substitution. */
313 #define RESERVED_HASH_VAL (~(st_hash_t) 0)
314 #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
316 /* Return hash value of KEY for table TAB. */
317 static inline st_hash_t
318 do_hash(st_data_t key, st_table *tab)
320 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
322 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
323 another value. Such mapping should be extremely rare. */
324 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
327 /* Power of 2 defining the minimal number of allocated entries. */
328 #define MINIMAL_POWER2 2
330 #if MINIMAL_POWER2 < 2
331 #error "MINIMAL_POWER2 should be >= 2"
332 #endif
334 /* If the power2 of the allocated `entries` is less than the following
335 value, don't allocate bins and use a linear search. */
336 #define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
338 /* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
339 static int
340 get_power2(st_index_t size)
342 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
343 if (n <= MAX_POWER2)
344 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
345 #ifndef NOT_RUBY
346 /* Ran out of the table entries */
347 rb_raise(rb_eRuntimeError, "st_table too big");
348 #endif
349 /* should raise exception */
350 return -1;
353 /* Return value of N-th bin in array BINS of table with bins size
354 index S. */
355 static inline st_index_t
356 get_bin(st_index_t *bins, int s, st_index_t n)
358 return (s == 0 ? ((unsigned char *) bins)[n]
359 : s == 1 ? ((unsigned short *) bins)[n]
360 : s == 2 ? ((unsigned int *) bins)[n]
361 : ((st_index_t *) bins)[n]);
364 /* Set up N-th bin in array BINS of table with bins size index S to
365 value V. */
366 static inline void
367 set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
369 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
370 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
371 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
372 else ((st_index_t *) bins)[n] = v;
375 /* These macros define reserved values for empty table bin and table
376 bin which contains a deleted entry. We will never use such values
377 for an entry index in bins. */
378 #define EMPTY_BIN 0
379 #define DELETED_BIN 1
380 /* Base of a real entry index in the bins. */
381 #define ENTRY_BASE 2
383 /* Mark I-th bin of table TAB as empty, in other words not
384 corresponding to any entry. */
385 #define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
387 /* Values used for not found entry and bin with given
388 characteristics. */
389 #define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
390 #define UNDEFINED_BIN_IND (~(st_index_t) 0)
392 /* Entry and bin values returned when we found a table rebuild during
393 the search. */
394 #define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
395 #define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
397 /* Mark I-th bin of table TAB as corresponding to a deleted table
398 entry. Update number of entries in the table and number of bins
399 corresponding to deleted entries. */
400 #define MARK_BIN_DELETED(tab, i) \
401 do { \
402 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
403 } while (0)
405 /* Macros to check that value B is used empty bins and bins
406 corresponding deleted entries. */
407 #define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
408 #define DELETED_BIN_P(b) ((b) == DELETED_BIN)
409 #define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
411 /* Macros to check empty bins and bins corresponding to deleted
412 entries. Bins are given by their index I in table TAB. */
413 #define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
414 #define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
415 #define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
417 /* Macros for marking and checking deleted entries given by their
418 pointer E_PTR. */
419 #define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
420 #define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
422 /* Return bin size index of table TAB. */
423 static inline unsigned int
424 get_size_ind(const st_table *tab)
426 return tab->size_ind;
429 /* Return the number of allocated bins of table TAB. */
430 static inline st_index_t
431 get_bins_num(const st_table *tab)
433 return ((st_index_t) 1)<<tab->bin_power;
436 /* Return mask for a bin index in table TAB. */
437 static inline st_index_t
438 bins_mask(const st_table *tab)
440 return get_bins_num(tab) - 1;
443 /* Return the index of table TAB bin corresponding to
444 HASH_VALUE. */
445 static inline st_index_t
446 hash_bin(st_hash_t hash_value, st_table *tab)
448 return hash_value & bins_mask(tab);
451 /* Return the number of allocated entries of table TAB. */
452 static inline st_index_t
453 get_allocated_entries(const st_table *tab)
455 return ((st_index_t) 1)<<tab->entry_power;
458 /* Return size of the allocated bins of table TAB. */
459 static inline st_index_t
460 bins_size(const st_table *tab)
462 return features[tab->entry_power].bins_words * sizeof (st_index_t);
465 /* Mark all bins of table TAB as empty. */
466 static void
467 initialize_bins(st_table *tab)
469 memset(tab->bins, 0, bins_size(tab));
472 /* Make table TAB empty. */
473 static void
474 make_tab_empty(st_table *tab)
476 tab->num_entries = 0;
477 tab->entries_start = tab->entries_bound = 0;
478 if (tab->bins != NULL)
479 initialize_bins(tab);
482 #ifdef HASH_LOG
483 #ifdef HAVE_UNISTD_H
484 #include <unistd.h>
485 #endif
486 static struct {
487 int all, total, num, str, strcase;
488 } collision;
490 /* Flag switching off output of package statistics at the end of
491 program. */
492 static int init_st = 0;
494 /* Output overall number of table searches and collisions into a
495 temporary file. */
496 static void
497 stat_col(void)
499 char fname[10+sizeof(long)*3];
500 FILE *f;
501 if (!collision.total) return;
502 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
503 if (f == NULL)
504 return;
505 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
506 ((double)collision.all / (collision.total)) * 100);
507 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
508 fclose(f);
510 #endif
512 /* Create and return table with TYPE which can hold at least SIZE
513 entries. The real number of entries which the table can hold is
514 the nearest power of two for SIZE. */
515 st_table *
516 st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
518 st_table *tab;
519 int n;
521 #ifdef HASH_LOG
522 #if HASH_LOG+0 < 0
524 const char *e = getenv("ST_HASH_LOG");
525 if (!e || !*e) init_st = 1;
527 #endif
528 if (init_st == 0) {
529 init_st = 1;
530 atexit(stat_col);
532 #endif
534 n = get_power2(size);
535 #ifndef RUBY
536 if (n < 0)
537 return NULL;
538 #endif
539 tab = (st_table *) malloc(sizeof (st_table));
540 #ifndef RUBY
541 if (tab == NULL)
542 return NULL;
543 #endif
544 tab->type = type;
545 tab->entry_power = n;
546 tab->bin_power = features[n].bin_power;
547 tab->size_ind = features[n].size_ind;
548 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
549 tab->bins = NULL;
550 else {
551 tab->bins = (st_index_t *) malloc(bins_size(tab));
552 #ifndef RUBY
553 if (tab->bins == NULL) {
554 free(tab);
555 return NULL;
557 #endif
559 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
560 * sizeof(st_table_entry));
561 #ifndef RUBY
562 if (tab->entries == NULL) {
563 st_free_table(tab);
564 return NULL;
566 #endif
567 make_tab_empty(tab);
568 tab->rebuilds_num = 0;
569 return tab;
572 /* Create and return table with TYPE which can hold a minimal number
573 of entries (see comments for get_power2). */
574 st_table *
575 st_init_table(const struct st_hash_type *type)
577 return st_init_table_with_size(type, 0);
580 /* Create and return table which can hold a minimal number of
581 numbers. */
582 st_table *
583 st_init_numtable(void)
585 return st_init_table(&type_numhash);
588 /* Create and return table which can hold SIZE numbers. */
589 st_table *
590 st_init_numtable_with_size(st_index_t size)
592 return st_init_table_with_size(&type_numhash, size);
595 /* Create and return table which can hold a minimal number of
596 strings. */
597 st_table *
598 st_init_strtable(void)
600 return st_init_table(&type_strhash);
603 /* Create and return table which can hold SIZE strings. */
604 st_table *
605 st_init_strtable_with_size(st_index_t size)
607 return st_init_table_with_size(&type_strhash, size);
610 /* Create and return table which can hold a minimal number of strings
611 whose character case is ignored. */
612 st_table *
613 st_init_strcasetable(void)
615 return st_init_table(&type_strcasehash);
618 /* Create and return table which can hold SIZE strings whose character
619 case is ignored. */
620 st_table *
621 st_init_strcasetable_with_size(st_index_t size)
623 return st_init_table_with_size(&type_strcasehash, size);
626 /* Make table TAB empty. */
627 void
628 st_clear(st_table *tab)
630 make_tab_empty(tab);
631 tab->rebuilds_num++;
634 /* Free table TAB space. */
635 void
636 st_free_table(st_table *tab)
638 if (tab->bins != NULL)
639 free(tab->bins);
640 free(tab->entries);
641 free(tab);
644 /* Return byte size of memory allocated for table TAB. */
645 size_t
646 st_memsize(const st_table *tab)
648 return(sizeof(st_table)
649 + (tab->bins == NULL ? 0 : bins_size(tab))
650 + get_allocated_entries(tab) * sizeof(st_table_entry));
653 static st_index_t
654 find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
656 static st_index_t
657 find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
659 static st_index_t
660 find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
662 static st_index_t
663 find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
664 st_data_t key, st_index_t *bin_ind);
666 #ifdef HASH_LOG
667 static void
668 count_collision(const struct st_hash_type *type)
670 collision.all++;
671 if (type == &type_numhash) {
672 collision.num++;
674 else if (type == &type_strhash) {
675 collision.strcase++;
677 else if (type == &type_strcasehash) {
678 collision.str++;
682 #define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
683 #define FOUND_BIN (collision_check ? collision.total++ : (void)0)
684 #define collision_check 0
685 #else
686 #define COLLISION
687 #define FOUND_BIN
688 #endif
690 /* If the number of entries in the table is at least REBUILD_THRESHOLD
691 times less than the entry array length, decrease the table
692 size. */
693 #define REBUILD_THRESHOLD 4
695 #if REBUILD_THRESHOLD < 2
696 #error "REBUILD_THRESHOLD should be >= 2"
697 #endif
699 /* Rebuild table TAB. Rebuilding removes all deleted bins and entries
700 and can change size of the table entries and bins arrays.
701 Rebuilding is implemented by creation of a new table or by
702 compaction of the existing one. */
703 static void
704 rebuild_table(st_table *tab)
706 st_index_t i, ni, bound;
707 unsigned int size_ind;
708 st_table *new_tab;
709 st_table_entry *entries, *new_entries;
710 st_table_entry *curr_entry_ptr;
711 st_index_t *bins;
712 st_index_t bin_ind;
714 bound = tab->entries_bound;
715 entries = tab->entries;
716 if ((2 * tab->num_entries <= get_allocated_entries(tab)
717 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
718 || tab->num_entries < (1 << MINIMAL_POWER2)) {
719 /* Compaction: */
720 tab->num_entries = 0;
721 if (tab->bins != NULL)
722 initialize_bins(tab);
723 new_tab = tab;
724 new_entries = entries;
726 else {
727 new_tab = st_init_table_with_size(tab->type,
728 2 * tab->num_entries - 1);
729 new_entries = new_tab->entries;
731 ni = 0;
732 bins = new_tab->bins;
733 size_ind = get_size_ind(new_tab);
734 for (i = tab->entries_start; i < bound; i++) {
735 curr_entry_ptr = &entries[i];
736 PREFETCH(entries + i + 1, 0);
737 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
738 continue;
739 if (&new_entries[ni] != curr_entry_ptr)
740 new_entries[ni] = *curr_entry_ptr;
741 if (EXPECT(bins != NULL, 1)) {
742 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
743 curr_entry_ptr->key);
744 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
746 new_tab->num_entries++;
747 ni++;
749 if (new_tab != tab) {
750 tab->entry_power = new_tab->entry_power;
751 tab->bin_power = new_tab->bin_power;
752 tab->size_ind = new_tab->size_ind;
753 if (tab->bins != NULL)
754 free(tab->bins);
755 tab->bins = new_tab->bins;
756 free(tab->entries);
757 tab->entries = new_tab->entries;
758 free(new_tab);
760 tab->entries_start = 0;
761 tab->entries_bound = tab->num_entries;
762 tab->rebuilds_num++;
765 /* Return the next secondary hash index for table TAB using previous
766 index IND and PERTERB. Finally modulo of the function becomes a
767 full *cycle linear congruential generator*, in other words it
768 guarantees traversing all table bins in extreme case.
770 According the Hull-Dobell theorem a generator
771 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
772 o m and c are relatively prime
773 o a-1 is divisible by all prime factors of m
774 o a-1 is divisible by 4 if m is divisible by 4.
776 For our case a is 5, c is 1, and m is a power of two. */
777 static inline st_index_t
778 secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
780 *perterb >>= 11;
781 ind = (ind << 2) + ind + *perterb + 1;
782 return hash_bin(ind, tab);
785 /* Find an entry with HASH_VALUE and KEY in TABLE using a linear
786 search. Return the index of the found entry in array `entries`.
787 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
788 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
789 static inline st_index_t
790 find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
792 int eq_p, rebuilt_p;
793 st_index_t i, bound;
794 st_table_entry *entries;
796 bound = tab->entries_bound;
797 entries = tab->entries;
798 for (i = tab->entries_start; i < bound; i++) {
799 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
800 if (EXPECT(rebuilt_p, 0))
801 return REBUILT_TABLE_ENTRY_IND;
802 if (eq_p)
803 return i;
805 return UNDEFINED_ENTRY_IND;
808 /* Use the quadratic probing. The method has a better data locality
809 but more collisions than the current approach. In average it
810 results in a bit slower search. */
811 /*#define QUADRATIC_PROBE*/
813 /* Return index of entry with HASH_VALUE and KEY in table TAB. If
814 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
815 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
816 static st_index_t
817 find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
819 int eq_p, rebuilt_p;
820 st_index_t ind;
821 #ifdef QUADRATIC_PROBE
822 st_index_t d;
823 #else
824 st_index_t peterb;
825 #endif
826 st_index_t bin;
827 st_table_entry *entries = tab->entries;
829 ind = hash_bin(hash_value, tab);
830 #ifdef QUADRATIC_PROBE
831 d = 1;
832 #else
833 peterb = hash_value;
834 #endif
835 FOUND_BIN;
836 for (;;) {
837 bin = get_bin(tab->bins, get_size_ind(tab), ind);
838 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
839 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
840 if (EXPECT(rebuilt_p, 0))
841 return REBUILT_TABLE_ENTRY_IND;
842 if (eq_p)
843 break;
845 else if (EMPTY_BIN_P(bin))
846 return UNDEFINED_ENTRY_IND;
847 #ifdef QUADRATIC_PROBE
848 ind = hash_bin(ind + d, tab);
849 d++;
850 #else
851 ind = secondary_hash(ind, tab, &peterb);
852 #endif
853 COLLISION;
855 return bin;
858 /* Find and return index of table TAB bin corresponding to an entry
859 with HASH_VALUE and KEY. If there is no such bin, return
860 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
861 return REBUILT_TABLE_BIN_IND. */
862 static st_index_t
863 find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
865 int eq_p, rebuilt_p;
866 st_index_t ind;
867 #ifdef QUADRATIC_PROBE
868 st_index_t d;
869 #else
870 st_index_t peterb;
871 #endif
872 st_index_t bin;
873 st_table_entry *entries = tab->entries;
875 ind = hash_bin(hash_value, tab);
876 #ifdef QUADRATIC_PROBE
877 d = 1;
878 #else
879 peterb = hash_value;
880 #endif
881 FOUND_BIN;
882 for (;;) {
883 bin = get_bin(tab->bins, get_size_ind(tab), ind);
884 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
885 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
886 if (EXPECT(rebuilt_p, 0))
887 return REBUILT_TABLE_BIN_IND;
888 if (eq_p)
889 break;
891 else if (EMPTY_BIN_P(bin))
892 return UNDEFINED_BIN_IND;
893 #ifdef QUADRATIC_PROBE
894 ind = hash_bin(ind + d, tab);
895 d++;
896 #else
897 ind = secondary_hash(ind, tab, &peterb);
898 #endif
899 COLLISION;
901 return ind;
904 /* Find and return index of table TAB bin corresponding to an entry
905 with HASH_VALUE and KEY. The entry should be in the table
906 already. */
907 static st_index_t
908 find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
910 st_index_t ind;
911 #ifdef QUADRATIC_PROBE
912 st_index_t d;
913 #else
914 st_index_t peterb;
915 #endif
916 st_index_t bin;
918 ind = hash_bin(hash_value, tab);
919 #ifdef QUADRATIC_PROBE
920 d = 1;
921 #else
922 peterb = hash_value;
923 #endif
924 FOUND_BIN;
925 for (;;) {
926 bin = get_bin(tab->bins, get_size_ind(tab), ind);
927 if (EMPTY_OR_DELETED_BIN_P(bin))
928 return ind;
929 #ifdef QUADRATIC_PROBE
930 ind = hash_bin(ind + d, tab);
931 d++;
932 #else
933 ind = secondary_hash(ind, tab, &peterb);
934 #endif
935 COLLISION;
939 /* Return index of table TAB bin for HASH_VALUE and KEY through
940 BIN_IND and the pointed value as the function result. Reserve the
941 bin for inclusion of the corresponding entry into the table if it
942 is not there yet. We always find such bin as bins array length is
943 bigger entries array. Although we can reuse a deleted bin, the
944 result bin value is always empty if the table has no entry with
945 KEY. Return the entries array index of the found entry or
946 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
947 during the search, return REBUILT_TABLE_ENTRY_IND. */
948 static st_index_t
949 find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
950 st_data_t key, st_index_t *bin_ind)
952 int eq_p, rebuilt_p;
953 st_index_t ind;
954 st_hash_t curr_hash_value = *hash_value;
955 #ifdef QUADRATIC_PROBE
956 st_index_t d;
957 #else
958 st_index_t peterb;
959 #endif
960 st_index_t entry_index;
961 st_index_t first_deleted_bin_ind;
962 st_table_entry *entries;
964 ind = hash_bin(curr_hash_value, tab);
965 #ifdef QUADRATIC_PROBE
966 d = 1;
967 #else
968 peterb = curr_hash_value;
969 #endif
970 FOUND_BIN;
971 first_deleted_bin_ind = UNDEFINED_BIN_IND;
972 entries = tab->entries;
973 for (;;) {
974 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
975 if (EMPTY_BIN_P(entry_index)) {
976 tab->num_entries++;
977 entry_index = UNDEFINED_ENTRY_IND;
978 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
979 /* We can reuse bin of a deleted entry. */
980 ind = first_deleted_bin_ind;
981 MARK_BIN_EMPTY(tab, ind);
983 break;
985 else if (! DELETED_BIN_P(entry_index)) {
986 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
987 if (EXPECT(rebuilt_p, 0))
988 return REBUILT_TABLE_ENTRY_IND;
989 if (eq_p)
990 break;
992 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
993 first_deleted_bin_ind = ind;
994 #ifdef QUADRATIC_PROBE
995 ind = hash_bin(ind + d, tab);
996 d++;
997 #else
998 ind = secondary_hash(ind, tab, &peterb);
999 #endif
1000 COLLISION;
1002 *bin_ind = ind;
1003 return entry_index;
1006 /* Find an entry with KEY in table TAB. Return non-zero if we found
1007 it. Set up *RECORD to the found entry record. */
1009 st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1011 st_index_t bin;
1012 st_hash_t hash = do_hash(key, tab);
1014 retry:
1015 if (tab->bins == NULL) {
1016 bin = find_entry(tab, hash, key);
1017 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1018 goto retry;
1019 if (bin == UNDEFINED_ENTRY_IND)
1020 return 0;
1022 else {
1023 bin = find_table_entry_ind(tab, hash, key);
1024 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1025 goto retry;
1026 if (bin == UNDEFINED_ENTRY_IND)
1027 return 0;
1028 bin -= ENTRY_BASE;
1030 if (value != 0)
1031 *value = tab->entries[bin].record;
1032 return 1;
1035 /* Find an entry with KEY in table TAB. Return non-zero if we found
1036 it. Set up *RESULT to the found table entry key. */
1038 st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1040 st_index_t bin;
1041 st_hash_t hash = do_hash(key, tab);
1043 retry:
1044 if (tab->bins == NULL) {
1045 bin = find_entry(tab, hash, key);
1046 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1047 goto retry;
1048 if (bin == UNDEFINED_ENTRY_IND)
1049 return 0;
1051 else {
1052 bin = find_table_entry_ind(tab, hash, key);
1053 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1054 goto retry;
1055 if (bin == UNDEFINED_ENTRY_IND)
1056 return 0;
1057 bin -= ENTRY_BASE;
1059 if (result != 0)
1060 *result = tab->entries[bin].key;
1061 return 1;
1064 /* Check the table and rebuild it if it is necessary. */
1065 static inline void
1066 rebuild_table_if_necessary (st_table *tab)
1068 st_index_t bound = tab->entries_bound;
1070 if (bound == get_allocated_entries(tab))
1071 rebuild_table(tab);
1074 /* Insert (KEY, VALUE) into table TAB and return zero. If there is
1075 already entry with KEY in the table, return nonzero and update
1076 the value of the found entry. */
1078 st_insert(st_table *tab, st_data_t key, st_data_t value)
1080 st_table_entry *entry;
1081 st_index_t bin;
1082 st_index_t ind;
1083 st_hash_t hash_value;
1084 st_index_t bin_ind;
1085 int new_p;
1087 hash_value = do_hash(key, tab);
1088 retry:
1089 rebuild_table_if_necessary(tab);
1090 if (tab->bins == NULL) {
1091 bin = find_entry(tab, hash_value, key);
1092 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1093 goto retry;
1094 new_p = bin == UNDEFINED_ENTRY_IND;
1095 if (new_p)
1096 tab->num_entries++;
1097 bin_ind = UNDEFINED_BIN_IND;
1099 else {
1100 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1101 key, &bin_ind);
1102 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1103 goto retry;
1104 new_p = bin == UNDEFINED_ENTRY_IND;
1105 bin -= ENTRY_BASE;
1107 if (new_p) {
1108 ind = tab->entries_bound++;
1109 entry = &tab->entries[ind];
1110 entry->hash = hash_value;
1111 entry->key = key;
1112 entry->record = value;
1113 if (bin_ind != UNDEFINED_BIN_IND)
1114 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1115 return 0;
1117 tab->entries[bin].record = value;
1118 return 1;
1121 /* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1122 entry with KEY before the insertion. */
1123 static inline void
1124 st_add_direct_with_hash(st_table *tab,
1125 st_data_t key, st_data_t value, st_hash_t hash)
1127 st_table_entry *entry;
1128 st_index_t ind;
1129 st_index_t bin_ind;
1131 rebuild_table_if_necessary(tab);
1132 ind = tab->entries_bound++;
1133 entry = &tab->entries[ind];
1134 entry->hash = hash;
1135 entry->key = key;
1136 entry->record = value;
1137 tab->num_entries++;
1138 if (tab->bins != NULL) {
1139 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1140 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1144 /* Insert (KEY, VALUE) into table TAB. The table should not have
1145 entry with KEY before the insertion. */
1146 void
1147 st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1149 st_hash_t hash_value;
1151 hash_value = do_hash(key, tab);
1152 st_add_direct_with_hash(tab, key, value, hash_value);
1155 /* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1156 there is already entry with KEY in the table, return nonzero and
1157 update the value of the found entry. */
1159 st_insert2(st_table *tab, st_data_t key, st_data_t value,
1160 st_data_t (*func)(st_data_t))
1162 st_table_entry *entry;
1163 st_index_t bin;
1164 st_index_t ind;
1165 st_hash_t hash_value;
1166 st_index_t bin_ind;
1167 int new_p;
1169 hash_value = do_hash(key, tab);
1170 retry:
1171 rebuild_table_if_necessary (tab);
1172 if (tab->bins == NULL) {
1173 bin = find_entry(tab, hash_value, key);
1174 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1175 goto retry;
1176 new_p = bin == UNDEFINED_ENTRY_IND;
1177 if (new_p)
1178 tab->num_entries++;
1179 bin_ind = UNDEFINED_BIN_IND;
1181 else {
1182 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1183 key, &bin_ind);
1184 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1185 goto retry;
1186 new_p = bin == UNDEFINED_ENTRY_IND;
1187 bin -= ENTRY_BASE;
1189 if (new_p) {
1190 key = (*func)(key);
1191 ind = tab->entries_bound++;
1192 entry = &tab->entries[ind];
1193 entry->hash = hash_value;
1194 entry->key = key;
1195 entry->record = value;
1196 if (bin_ind != UNDEFINED_BIN_IND)
1197 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1198 return 0;
1200 tab->entries[bin].record = value;
1201 return 1;
1204 /* Create and return a copy of table OLD_TAB. */
1205 st_table *
1206 st_copy(st_table *old_tab)
1208 st_table *new_tab;
1210 new_tab = (st_table *) malloc(sizeof(st_table));
1211 #ifndef RUBY
1212 if (new_tab == NULL)
1213 return NULL;
1214 #endif
1215 *new_tab = *old_tab;
1216 if (old_tab->bins == NULL)
1217 new_tab->bins = NULL;
1218 else {
1219 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1220 #ifndef RUBY
1221 if (new_tab->bins == NULL) {
1222 free(new_tab);
1223 return NULL;
1225 #endif
1227 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1228 * sizeof(st_table_entry));
1229 #ifndef RUBY
1230 if (new_tab->entries == NULL) {
1231 st_free_table(new_tab);
1232 return NULL;
1234 #endif
1235 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1236 get_allocated_entries(old_tab));
1237 if (old_tab->bins != NULL)
1238 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1239 return new_tab;
1242 /* Update the entries start of table TAB after removing an entry
1243 with index N in the array entries. */
1244 static inline void
1245 update_range_for_deleted(st_table *tab, st_index_t n)
1247 /* Do not update entries_bound here. Otherwise, we can fill all
1248 bins by deleted entry value before rebuilding the table. */
1249 if (tab->entries_start == n) {
1250 st_index_t start = n + 1;
1251 st_index_t bound = tab->entries_bound;
1252 st_table_entry *entries = tab->entries;
1253 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1254 tab->entries_start = start;
1258 /* Delete entry with KEY from table TAB, set up *VALUE (unless
1259 VALUE is zero) from deleted table entry, and return non-zero. If
1260 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1261 is zero), and return zero. */
1262 static int
1263 st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1265 st_table_entry *entry;
1266 st_index_t bin;
1267 st_index_t bin_ind;
1268 st_hash_t hash;
1270 hash = do_hash(*key, tab);
1271 retry:
1272 if (tab->bins == NULL) {
1273 bin = find_entry(tab, hash, *key);
1274 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1275 goto retry;
1276 if (bin == UNDEFINED_ENTRY_IND) {
1277 if (value != 0) *value = 0;
1278 return 0;
1281 else {
1282 bin_ind = find_table_bin_ind(tab, hash, *key);
1283 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1284 goto retry;
1285 if (bin_ind == UNDEFINED_BIN_IND) {
1286 if (value != 0) *value = 0;
1287 return 0;
1289 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1290 MARK_BIN_DELETED(tab, bin_ind);
1292 entry = &tab->entries[bin];
1293 *key = entry->key;
1294 if (value != 0) *value = entry->record;
1295 MARK_ENTRY_DELETED(entry);
1296 tab->num_entries--;
1297 update_range_for_deleted(tab, bin);
1298 return 1;
1302 st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1304 return st_general_delete(tab, key, value);
1307 /* The function and other functions with suffix '_safe' or '_check'
1308 are originated from the previous implementation of the hash tables.
1309 It was necessary for correct deleting entries during traversing
1310 tables. The current implementation permits deletion during
1311 traversing without a specific way to do this. */
1313 st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1314 st_data_t never ATTRIBUTE_UNUSED)
1316 return st_general_delete(tab, key, value);
1319 /* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1320 return zero. Otherwise, remove the first entry in the table.
1321 Return its key through KEY and its record through VALUE (unless
1322 VALUE is zero). */
1324 st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1326 st_index_t i, bound;
1327 st_index_t bin;
1328 st_table_entry *entries, *curr_entry_ptr;
1329 st_index_t bin_ind;
1331 entries = tab->entries;
1332 bound = tab->entries_bound;
1333 for (i = tab->entries_start; i < bound; i++) {
1334 curr_entry_ptr = &entries[i];
1335 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1336 st_hash_t entry_hash = curr_entry_ptr->hash;
1337 st_data_t entry_key = curr_entry_ptr->key;
1339 if (value != 0) *value = curr_entry_ptr->record;
1340 *key = entry_key;
1341 retry:
1342 if (tab->bins == NULL) {
1343 bin = find_entry(tab, entry_hash, entry_key);
1344 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1345 entries = tab->entries;
1346 goto retry;
1348 curr_entry_ptr = &entries[bin];
1350 else {
1351 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1352 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1353 entries = tab->entries;
1354 goto retry;
1356 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1357 - ENTRY_BASE];
1358 MARK_BIN_DELETED(tab, bin_ind);
1360 MARK_ENTRY_DELETED(curr_entry_ptr);
1361 tab->num_entries--;
1362 update_range_for_deleted(tab, i);
1363 return 1;
1366 tab->entries_start = tab->entries_bound = 0;
1367 if (value != 0) *value = 0;
1368 return 0;
1371 /* See comments for function st_delete_safe. */
1372 void
1373 st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1374 st_data_t never ATTRIBUTE_UNUSED)
1378 /* Find entry with KEY in table TAB, call FUNC with pointers to copies
1379 of the key and the value of the found entry, and non-zero as the
1380 3rd argument. If the entry is not found, call FUNC with a pointer
1381 to KEY, a pointer to zero, and a zero argument. If the call
1382 returns ST_CONTINUE, the table will have an entry with key and
1383 value returned by FUNC through the 1st and 2nd parameters. If the
1384 call of FUNC returns ST_DELETE, the table will not have entry with
1385 KEY. The function returns flag of that the entry with KEY was in
1386 the table before the call. */
1388 st_update(st_table *tab, st_data_t key,
1389 st_update_callback_func *func, st_data_t arg)
1391 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1392 st_index_t bin = 0; /* Ditto */
1393 st_table_entry *entries;
1394 st_index_t bin_ind;
1395 st_data_t value = 0, old_key;
1396 int retval, existing;
1397 st_hash_t hash = do_hash(key, tab);
1399 retry:
1400 entries = tab->entries;
1401 if (tab->bins == NULL) {
1402 bin = find_entry(tab, hash, key);
1403 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1404 goto retry;
1405 existing = bin != UNDEFINED_ENTRY_IND;
1406 entry = &entries[bin];
1407 bin_ind = UNDEFINED_BIN_IND;
1409 else {
1410 bin_ind = find_table_bin_ind(tab, hash, key);
1411 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1412 goto retry;
1413 existing = bin_ind != UNDEFINED_BIN_IND;
1414 if (existing) {
1415 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1416 entry = &entries[bin];
1419 if (existing) {
1420 key = entry->key;
1421 value = entry->record;
1423 old_key = key;
1424 retval = (*func)(&key, &value, arg, existing);
1425 switch (retval) {
1426 case ST_CONTINUE:
1427 if (! existing) {
1428 st_add_direct_with_hash(tab, key, value, hash);
1429 break;
1431 if (old_key != key) {
1432 entry->key = key;
1434 entry->record = value;
1435 break;
1436 case ST_DELETE:
1437 if (existing) {
1438 if (bin_ind != UNDEFINED_BIN_IND)
1439 MARK_BIN_DELETED(tab, bin_ind);
1440 MARK_ENTRY_DELETED(entry);
1441 tab->num_entries--;
1442 update_range_for_deleted(tab, bin);
1444 break;
1446 return existing;
1449 /* Traverse all entries in table TAB calling FUNC with current entry
1450 key and value and zero. If the call returns ST_STOP, stop
1451 traversing. If the call returns ST_DELETE, delete the current
1452 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1453 traversing. The function returns zero unless an error is found.
1454 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1455 different for ST_CHECK and when the current element is removed
1456 during traversing. */
1457 static inline int
1458 st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1459 int check_p)
1461 st_index_t bin;
1462 st_index_t bin_ind;
1463 st_table_entry *entries, *curr_entry_ptr;
1464 enum st_retval retval;
1465 st_index_t i, rebuilds_num;
1466 st_hash_t hash;
1467 st_data_t key;
1468 int error_p, packed_p = tab->bins == NULL;
1470 entries = tab->entries;
1471 /* The bound can change inside the loop even without rebuilding
1472 the table, e.g. by an entry insertion. */
1473 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1474 curr_entry_ptr = &entries[i];
1475 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1476 continue;
1477 key = curr_entry_ptr->key;
1478 rebuilds_num = tab->rebuilds_num;
1479 hash = curr_entry_ptr->hash;
1480 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1482 if (retval == ST_REPLACE && replace) {
1483 st_data_t value;
1484 value = curr_entry_ptr->record;
1485 retval = (*replace)(&key, &value, arg, TRUE);
1486 curr_entry_ptr->key = key;
1487 curr_entry_ptr->record = value;
1490 if (rebuilds_num != tab->rebuilds_num) {
1491 retry:
1492 entries = tab->entries;
1493 packed_p = tab->bins == NULL;
1494 if (packed_p) {
1495 i = find_entry(tab, hash, key);
1496 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1497 goto retry;
1498 error_p = i == UNDEFINED_ENTRY_IND;
1500 else {
1501 i = find_table_entry_ind(tab, hash, key);
1502 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1503 goto retry;
1504 error_p = i == UNDEFINED_ENTRY_IND;
1505 i -= ENTRY_BASE;
1507 if (error_p && check_p) {
1508 /* call func with error notice */
1509 retval = (*func)(0, 0, arg, 1);
1510 return 1;
1512 curr_entry_ptr = &entries[i];
1514 switch (retval) {
1515 case ST_REPLACE:
1516 break;
1517 case ST_CONTINUE:
1518 break;
1519 case ST_CHECK:
1520 if (check_p)
1521 break;
1522 case ST_STOP:
1523 return 0;
1524 case ST_DELETE: {
1525 st_data_t key = curr_entry_ptr->key;
1527 again:
1528 if (packed_p) {
1529 bin = find_entry(tab, hash, key);
1530 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1531 goto again;
1532 if (bin == UNDEFINED_ENTRY_IND)
1533 break;
1535 else {
1536 bin_ind = find_table_bin_ind(tab, hash, key);
1537 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1538 goto again;
1539 if (bin_ind == UNDEFINED_BIN_IND)
1540 break;
1541 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1542 MARK_BIN_DELETED(tab, bin_ind);
1544 curr_entry_ptr = &entries[bin];
1545 MARK_ENTRY_DELETED(curr_entry_ptr);
1546 tab->num_entries--;
1547 update_range_for_deleted(tab, bin);
1548 break;
1552 return 0;
1556 st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1558 return st_general_foreach(tab, func, replace, arg, TRUE);
1561 struct functor {
1562 st_foreach_callback_func *func;
1563 st_data_t arg;
1566 static int
1567 apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1569 const struct functor *f = (void *)d;
1570 return f->func(k, v, f->arg);
1574 st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1576 const struct functor f = { func, arg };
1577 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1580 /* See comments for function st_delete_safe. */
1582 st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1583 st_data_t never ATTRIBUTE_UNUSED)
1585 return st_general_foreach(tab, func, 0, arg, TRUE);
1588 /* Set up array KEYS by at most SIZE keys of head table TAB entries.
1589 Return the number of keys set up in array KEYS. */
1590 static inline st_index_t
1591 st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1593 st_index_t i, bound;
1594 st_data_t key, *keys_start, *keys_end;
1595 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1597 bound = tab->entries_bound;
1598 keys_start = keys;
1599 keys_end = keys + size;
1600 for (i = tab->entries_start; i < bound; i++) {
1601 if (keys == keys_end)
1602 break;
1603 curr_entry_ptr = &entries[i];
1604 key = curr_entry_ptr->key;
1605 if (! DELETED_ENTRY_P(curr_entry_ptr))
1606 *keys++ = key;
1609 return keys - keys_start;
1612 st_index_t
1613 st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1615 return st_general_keys(tab, keys, size);
1618 /* See comments for function st_delete_safe. */
1619 st_index_t
1620 st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1621 st_data_t never ATTRIBUTE_UNUSED)
1623 return st_general_keys(tab, keys, size);
1626 /* Set up array VALUES by at most SIZE values of head table TAB
1627 entries. Return the number of values set up in array VALUES. */
1628 static inline st_index_t
1629 st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1631 st_index_t i, bound;
1632 st_data_t *values_start, *values_end;
1633 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1635 values_start = values;
1636 values_end = values + size;
1637 bound = tab->entries_bound;
1638 for (i = tab->entries_start; i < bound; i++) {
1639 if (values == values_end)
1640 break;
1641 curr_entry_ptr = &entries[i];
1642 if (! DELETED_ENTRY_P(curr_entry_ptr))
1643 *values++ = curr_entry_ptr->record;
1646 return values - values_start;
1649 st_index_t
1650 st_values(st_table *tab, st_data_t *values, st_index_t size)
1652 return st_general_values(tab, values, size);
1655 /* See comments for function st_delete_safe. */
1656 st_index_t
1657 st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1658 st_data_t never ATTRIBUTE_UNUSED)
1660 return st_general_values(tab, values, size);
1663 #define FNV1_32A_INIT 0x811c9dc5
1666 * 32 bit magic FNV-1a prime
1668 #define FNV_32_PRIME 0x01000193
1670 #ifndef UNALIGNED_WORD_ACCESS
1671 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1672 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1673 defined(__powerpc64__) || defined(__aarch64__) || \
1674 defined(__mc68020__)
1675 # define UNALIGNED_WORD_ACCESS 1
1676 # endif
1677 #endif
1678 #ifndef UNALIGNED_WORD_ACCESS
1679 # define UNALIGNED_WORD_ACCESS 0
1680 #endif
1682 /* This hash function is quite simplified MurmurHash3
1683 * Simplification is legal, cause most of magic still happens in finalizator.
1684 * And finalizator is almost the same as in MurmurHash3 */
1685 #define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1686 #define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1688 #if ST_INDEX_BITS <= 32
1689 #define C1 (st_index_t)0xcc9e2d51
1690 #define C2 (st_index_t)0x1b873593
1691 #else
1692 #define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1693 #define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1694 #endif
1695 NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1696 NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1697 NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1699 static inline st_index_t
1700 murmur_step(st_index_t h, st_index_t k)
1702 #if ST_INDEX_BITS <= 32
1703 #define r1 (17)
1704 #define r2 (11)
1705 #else
1706 #define r1 (33)
1707 #define r2 (24)
1708 #endif
1709 k *= C1;
1710 h ^= ROTL(k, r1);
1711 h *= C2;
1712 h = ROTL(h, r2);
1713 return h;
1715 #undef r1
1716 #undef r2
1718 static inline st_index_t
1719 murmur_finish(st_index_t h)
1721 #if ST_INDEX_BITS <= 32
1722 #define r1 (16)
1723 #define r2 (13)
1724 #define r3 (16)
1725 const st_index_t c1 = 0x85ebca6b;
1726 const st_index_t c2 = 0xc2b2ae35;
1727 #else
1728 /* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1729 #define r1 (30)
1730 #define r2 (27)
1731 #define r3 (31)
1732 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1733 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1734 #endif
1735 #if ST_INDEX_BITS > 64
1736 h ^= h >> 64;
1737 h *= c2;
1738 h ^= h >> 65;
1739 #endif
1740 h ^= h >> r1;
1741 h *= c1;
1742 h ^= h >> r2;
1743 h *= c2;
1744 h ^= h >> r3;
1745 return h;
1747 #undef r1
1748 #undef r2
1749 #undef r3
1751 st_index_t
1752 st_hash(const void *ptr, size_t len, st_index_t h)
1754 const char *data = ptr;
1755 st_index_t t = 0;
1756 size_t l = len;
1758 #define data_at(n) (st_index_t)((unsigned char)data[(n)])
1759 #define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1760 #if SIZEOF_ST_INDEX_T > 4
1761 #define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1762 #if SIZEOF_ST_INDEX_T > 8
1763 #define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1764 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1765 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1766 #endif
1767 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1768 #else
1769 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1770 #endif
1771 #undef SKIP_TAIL
1772 if (len >= sizeof(st_index_t)) {
1773 #if !UNALIGNED_WORD_ACCESS
1774 int align = (int)((st_data_t)data % sizeof(st_index_t));
1775 if (align) {
1776 st_index_t d = 0;
1777 int sl, sr, pack;
1779 switch (align) {
1780 #ifdef WORDS_BIGENDIAN
1781 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1782 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1783 #else
1784 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1785 t |= data_at(n) << CHAR_BIT*(n)
1786 #endif
1787 UNALIGNED_ADD_ALL;
1788 #undef UNALIGNED_ADD
1791 #ifdef WORDS_BIGENDIAN
1792 t >>= (CHAR_BIT * align) - CHAR_BIT;
1793 #else
1794 t <<= (CHAR_BIT * align);
1795 #endif
1797 data += sizeof(st_index_t)-align;
1798 len -= sizeof(st_index_t)-align;
1800 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1801 sr = CHAR_BIT * align;
1803 while (len >= sizeof(st_index_t)) {
1804 d = *(st_index_t *)data;
1805 #ifdef WORDS_BIGENDIAN
1806 t = (t << sr) | (d >> sl);
1807 #else
1808 t = (t >> sr) | (d << sl);
1809 #endif
1810 h = murmur_step(h, t);
1811 t = d;
1812 data += sizeof(st_index_t);
1813 len -= sizeof(st_index_t);
1816 pack = len < (size_t)align ? (int)len : align;
1817 d = 0;
1818 switch (pack) {
1819 #ifdef WORDS_BIGENDIAN
1820 # define UNALIGNED_ADD(n) case (n) + 1: \
1821 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1822 #else
1823 # define UNALIGNED_ADD(n) case (n) + 1: \
1824 d |= data_at(n) << CHAR_BIT*(n)
1825 #endif
1826 UNALIGNED_ADD_ALL;
1827 #undef UNALIGNED_ADD
1829 #ifdef WORDS_BIGENDIAN
1830 t = (t << sr) | (d >> sl);
1831 #else
1832 t = (t >> sr) | (d << sl);
1833 #endif
1835 if (len < (size_t)align) goto skip_tail;
1836 # define SKIP_TAIL 1
1837 h = murmur_step(h, t);
1838 data += pack;
1839 len -= pack;
1841 else
1842 #endif
1843 #ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1844 #define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1845 #else
1846 #define aligned_data data
1847 #endif
1849 do {
1850 h = murmur_step(h, *(st_index_t *)aligned_data);
1851 data += sizeof(st_index_t);
1852 len -= sizeof(st_index_t);
1853 } while (len >= sizeof(st_index_t));
1857 t = 0;
1858 switch (len) {
1859 #if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1860 /* in this case byteorder doesn't really matter */
1861 #if SIZEOF_ST_INDEX_T > 4
1862 case 7: t |= data_at(6) << 48;
1863 case 6: t |= data_at(5) << 40;
1864 case 5: t |= data_at(4) << 32;
1865 case 4:
1866 t |= (st_index_t)*(uint32_t*)aligned_data;
1867 goto skip_tail;
1868 # define SKIP_TAIL 1
1869 #endif
1870 case 3: t |= data_at(2) << 16;
1871 case 2: t |= data_at(1) << 8;
1872 case 1: t |= data_at(0);
1873 #else
1874 #ifdef WORDS_BIGENDIAN
1875 # define UNALIGNED_ADD(n) case (n) + 1: \
1876 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1877 #else
1878 # define UNALIGNED_ADD(n) case (n) + 1: \
1879 t |= data_at(n) << CHAR_BIT*(n)
1880 #endif
1881 UNALIGNED_ADD_ALL;
1882 #undef UNALIGNED_ADD
1883 #endif
1884 #ifdef SKIP_TAIL
1885 skip_tail:
1886 #endif
1887 h ^= t; h -= ROTL(t, 7);
1888 h *= C2;
1890 h ^= l;
1891 #undef aligned_data
1893 return murmur_finish(h);
1896 st_index_t
1897 st_hash_uint32(st_index_t h, uint32_t i)
1899 return murmur_step(h, i);
1902 NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1903 st_index_t
1904 st_hash_uint(st_index_t h, st_index_t i)
1906 i += h;
1907 /* no matter if it is BigEndian or LittleEndian,
1908 * we hash just integers */
1909 #if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1910 h = murmur_step(h, i >> 8*8);
1911 #endif
1912 h = murmur_step(h, i);
1913 return h;
1916 st_index_t
1917 st_hash_end(st_index_t h)
1919 h = murmur_finish(h);
1920 return h;
1923 #undef st_hash_start
1924 st_index_t
1925 rb_st_hash_start(st_index_t h)
1927 return h;
1930 static st_index_t
1931 strhash(st_data_t arg)
1933 register const char *string = (const char *)arg;
1934 return st_hash(string, strlen(string), FNV1_32A_INIT);
1938 st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
1940 char c1, c2;
1942 while (1) {
1943 c1 = *s1++;
1944 c2 = *s2++;
1945 if (c1 == '\0' || c2 == '\0') {
1946 if (c1 != '\0') return 1;
1947 if (c2 != '\0') return -1;
1948 return 0;
1950 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
1951 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
1952 if (c1 != c2) {
1953 if (c1 > c2)
1954 return 1;
1955 else
1956 return -1;
1962 st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
1964 char c1, c2;
1965 size_t i;
1967 for (i = 0; i < n; i++) {
1968 c1 = *s1++;
1969 c2 = *s2++;
1970 if (c1 == '\0' || c2 == '\0') {
1971 if (c1 != '\0') return 1;
1972 if (c2 != '\0') return -1;
1973 return 0;
1975 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
1976 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
1977 if (c1 != c2) {
1978 if (c1 > c2)
1979 return 1;
1980 else
1981 return -1;
1984 return 0;
1987 static int
1988 st_strcmp(st_data_t lhs, st_data_t rhs)
1990 const char *s1 = (char *)lhs;
1991 const char *s2 = (char *)rhs;
1992 return strcmp(s1, s2);
1995 static int
1996 st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
1998 const char *s1 = (char *)lhs;
1999 const char *s2 = (char *)rhs;
2000 return st_locale_insensitive_strcasecmp(s1, s2);
2003 NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2004 static st_index_t
2005 strcasehash(st_data_t arg)
2007 register const char *string = (const char *)arg;
2008 register st_index_t hval = FNV1_32A_INIT;
2011 * FNV-1a hash each octet in the buffer
2013 while (*string) {
2014 unsigned int c = (unsigned char)*string++;
2015 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2016 hval ^= c;
2018 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2019 hval *= FNV_32_PRIME;
2021 return hval;
2025 st_numcmp(st_data_t x, st_data_t y)
2027 return x != y;
2030 st_index_t
2031 st_numhash(st_data_t n)
2033 enum {s1 = 11, s2 = 3};
2034 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2037 /* Expand TAB to be suitable for holding SIZ entries in total.
2038 Pre-existing entries remain not deleted inside of TAB, but its bins
2039 are cleared to expect future reconstruction. See rehash below. */
2040 static void
2041 st_expand_table(st_table *tab, st_index_t siz)
2043 st_table *tmp;
2044 st_index_t n;
2046 if (siz <= get_allocated_entries(tab))
2047 return; /* enough room already */
2049 tmp = st_init_table_with_size(tab->type, siz);
2050 n = get_allocated_entries(tab);
2051 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2052 free(tab->entries);
2053 if (tab->bins != NULL)
2054 free(tab->bins);
2055 if (tmp->bins != NULL)
2056 free(tmp->bins);
2057 tab->entry_power = tmp->entry_power;
2058 tab->bin_power = tmp->bin_power;
2059 tab->size_ind = tmp->size_ind;
2060 tab->entries = tmp->entries;
2061 tab->bins = NULL;
2062 tab->rebuilds_num++;
2063 free(tmp);
2066 /* Rehash using linear search. Return TRUE if we found that the table
2067 was rebuilt. */
2068 static int
2069 st_rehash_linear(st_table *tab)
2071 int eq_p, rebuilt_p;
2072 st_index_t i, j;
2073 st_table_entry *p, *q;
2074 if (tab->bins) {
2075 free(tab->bins);
2076 tab->bins = NULL;
2078 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2079 p = &tab->entries[i];
2080 if (DELETED_ENTRY_P(p))
2081 continue;
2082 for (j = i + 1; j < tab->entries_bound; j++) {
2083 q = &tab->entries[j];
2084 if (DELETED_ENTRY_P(q))
2085 continue;
2086 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2087 if (EXPECT(rebuilt_p, 0))
2088 return TRUE;
2089 if (eq_p) {
2090 *p = *q;
2091 MARK_ENTRY_DELETED(q);
2092 tab->num_entries--;
2093 update_range_for_deleted(tab, j);
2097 return FALSE;
2100 /* Rehash using index. Return TRUE if we found that the table was
2101 rebuilt. */
2102 static int
2103 st_rehash_indexed(st_table *tab)
2105 int eq_p, rebuilt_p;
2106 st_index_t i;
2107 st_index_t const n = bins_size(tab);
2108 unsigned int const size_ind = get_size_ind(tab);
2109 st_index_t *bins = realloc(tab->bins, n);
2110 tab->bins = bins;
2111 initialize_bins(tab);
2112 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2113 st_table_entry *p = &tab->entries[i];
2114 st_index_t ind;
2115 #ifdef QUADRATIC_PROBE
2116 st_index_t d = 1;
2117 #else
2118 st_index_t peterb = p->hash;
2119 #endif
2121 if (DELETED_ENTRY_P(p))
2122 continue;
2124 ind = hash_bin(p->hash, tab);
2125 for (;;) {
2126 st_index_t bin = get_bin(bins, size_ind, ind);
2127 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2128 /* ok, new room */
2129 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2130 break;
2132 else {
2133 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2134 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2135 if (EXPECT(rebuilt_p, 0))
2136 return TRUE;
2137 if (eq_p) {
2138 /* duplicated key; delete it */
2139 q->record = p->record;
2140 MARK_ENTRY_DELETED(p);
2141 tab->num_entries--;
2142 update_range_for_deleted(tab, bin);
2143 break;
2145 else {
2146 /* hash collision; skip it */
2147 #ifdef QUADRATIC_PROBE
2148 ind = hash_bin(ind + d, tab);
2149 d++;
2150 #else
2151 ind = secondary_hash(ind, tab, &peterb);
2152 #endif
2157 return FALSE;
2160 /* Reconstruct TAB's bins according to TAB's entries. This function
2161 permits conflicting keys inside of entries. No errors are reported
2162 then. All but one of them are discarded silently. */
2163 static void
2164 st_rehash(st_table *tab)
2166 int rebuilt_p;
2168 do {
2169 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2170 rebuilt_p = st_rehash_linear(tab);
2171 else
2172 rebuilt_p = st_rehash_indexed(tab);
2173 } while (rebuilt_p);
2176 #ifdef RUBY
2177 static st_data_t
2178 st_stringify(VALUE key)
2180 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2181 rb_hash_key_str(key) : key;
2184 static void
2185 st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2187 st_data_t k = st_stringify(key);
2188 st_table_entry e;
2189 e.hash = do_hash(k, tab);
2190 e.key = k;
2191 e.record = val;
2193 tab->entries[tab->entries_bound++] = e;
2194 tab->num_entries++;
2195 RB_OBJ_WRITTEN(hash, Qundef, k);
2196 RB_OBJ_WRITTEN(hash, Qundef, val);
2199 static void
2200 st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2202 long i;
2204 for (i = 0; i < argc; /* */) {
2205 st_data_t k = st_stringify(argv[i++]);
2206 st_data_t v = argv[i++];
2207 st_insert(tab, k, v);
2208 RB_OBJ_WRITTEN(hash, Qundef, k);
2209 RB_OBJ_WRITTEN(hash, Qundef, v);
2213 static void
2214 st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2216 long i;
2218 /* push elems */
2219 for (i = 0; i < argc; /* */) {
2220 VALUE key = argv[i++];
2221 VALUE val = argv[i++];
2222 st_insert_single(tab, hash, key, val);
2225 /* reindex */
2226 st_rehash(tab);
2229 /* Mimics ruby's { foo => bar } syntax. This function is subpart
2230 of rb_hash_bulk_insert. */
2231 void
2232 rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2234 st_index_t n, size = argc / 2;
2235 st_table *tab = RHASH_ST_TABLE(hash);
2237 tab = RHASH_TBL_RAW(hash);
2238 n = tab->entries_bound + size;
2239 st_expand_table(tab, n);
2240 if (UNLIKELY(tab->num_entries))
2241 st_insert_generic(tab, argc, argv, hash);
2242 else if (argc <= 2)
2243 st_insert_single(tab, hash, argv[0], argv[1]);
2244 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2245 st_insert_linear(tab, argc, argv, hash);
2246 else
2247 st_insert_generic(tab, argc, argv, hash);
2250 // to iterate iv_index_tbl
2251 st_data_t
2252 rb_st_nth_key(st_table *tab, st_index_t index)
2254 if (LIKELY(tab->entries_start == 0 &&
2255 tab->num_entries == tab->entries_bound &&
2256 index < tab->num_entries)) {
2257 return tab->entries[index].key;
2259 else {
2260 rb_bug("unreachable");
2264 #endif