[rubygems/rubygems] Use a constant empty tar header to avoid extra allocations
[ruby.git] / st.c
blob4f0259a237325a334f91230bf9d7121afda0cb77
1 /* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
7 /* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
15 The hash table has two arrays called *bins* and *entries*.
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
32 o The entry array contains table entries in the same order as they
33 were inserted.
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
103 #ifdef NOT_RUBY
104 #include "regint.h"
105 #include "st.h"
106 #elif defined RUBY_EXPORT
107 #include "internal.h"
108 #include "internal/bits.h"
109 #include "internal/hash.h"
110 #include "internal/sanitizers.h"
111 #include "internal/st.h"
112 #endif
114 #include <stdio.h>
115 #ifdef HAVE_STDLIB_H
116 #include <stdlib.h>
117 #endif
118 #include <string.h>
119 #include <assert.h>
121 #ifdef __GNUC__
122 #define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
123 #define EXPECT(expr, val) __builtin_expect(expr, val)
124 #define ATTRIBUTE_UNUSED __attribute__((unused))
125 #else
126 #define PREFETCH(addr, write_p)
127 #define EXPECT(expr, val) (expr)
128 #define ATTRIBUTE_UNUSED
129 #endif
131 /* The type of hashes. */
132 typedef st_index_t st_hash_t;
134 struct st_table_entry {
135 st_hash_t hash;
136 st_data_t key;
137 st_data_t record;
140 #define type_numhash st_hashtype_num
141 static const struct st_hash_type st_hashtype_num = {
142 st_numcmp,
143 st_numhash,
146 static int st_strcmp(st_data_t, st_data_t);
147 static st_index_t strhash(st_data_t);
148 static const struct st_hash_type type_strhash = {
149 st_strcmp,
150 strhash,
153 static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
154 static st_index_t strcasehash(st_data_t);
155 static const struct st_hash_type type_strcasehash = {
156 st_locale_insensitive_strcasecmp_i,
157 strcasehash,
160 /* Value used to catch uninitialized entries/bins during debugging.
161 There is a possibility for a false alarm, but its probability is
162 extremely small. */
163 #define ST_INIT_VAL 0xafafafafafafafaf
164 #define ST_INIT_VAL_BYTE 0xafa
166 #ifdef RUBY
167 #undef malloc
168 #undef realloc
169 #undef calloc
170 #undef free
171 #define malloc ruby_xmalloc
172 #define calloc ruby_xcalloc
173 #define realloc ruby_xrealloc
174 #define free ruby_xfree
175 #endif
177 #define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
178 #define PTR_EQUAL(tab, ptr, hash_val, key_) \
179 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
181 /* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
182 up to TRUE if the table is rebuilt during the comparison. */
183 #define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
184 do { \
185 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
186 res = PTR_EQUAL(tab, ptr, hash_val, key); \
187 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
188 } while (FALSE)
190 /* Features of a table. */
191 struct st_features {
192 /* Power of 2 used for number of allocated entries. */
193 unsigned char entry_power;
194 /* Power of 2 used for number of allocated bins. Depending on the
195 table size, the number of bins is 2-4 times more than the
196 number of entries. */
197 unsigned char bin_power;
198 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
199 unsigned char size_ind;
200 /* Bins are packed in words of type st_index_t. The following is
201 a size of bins counted by words. */
202 st_index_t bins_words;
205 /* Features of all possible size tables. */
206 #if SIZEOF_ST_INDEX_T == 8
207 #define MAX_POWER2 62
208 static const struct st_features features[] = {
209 {0, 1, 0, 0x0},
210 {1, 2, 0, 0x1},
211 {2, 3, 0, 0x1},
212 {3, 4, 0, 0x2},
213 {4, 5, 0, 0x4},
214 {5, 6, 0, 0x8},
215 {6, 7, 0, 0x10},
216 {7, 8, 0, 0x20},
217 {8, 9, 1, 0x80},
218 {9, 10, 1, 0x100},
219 {10, 11, 1, 0x200},
220 {11, 12, 1, 0x400},
221 {12, 13, 1, 0x800},
222 {13, 14, 1, 0x1000},
223 {14, 15, 1, 0x2000},
224 {15, 16, 1, 0x4000},
225 {16, 17, 2, 0x10000},
226 {17, 18, 2, 0x20000},
227 {18, 19, 2, 0x40000},
228 {19, 20, 2, 0x80000},
229 {20, 21, 2, 0x100000},
230 {21, 22, 2, 0x200000},
231 {22, 23, 2, 0x400000},
232 {23, 24, 2, 0x800000},
233 {24, 25, 2, 0x1000000},
234 {25, 26, 2, 0x2000000},
235 {26, 27, 2, 0x4000000},
236 {27, 28, 2, 0x8000000},
237 {28, 29, 2, 0x10000000},
238 {29, 30, 2, 0x20000000},
239 {30, 31, 2, 0x40000000},
240 {31, 32, 2, 0x80000000},
241 {32, 33, 3, 0x200000000},
242 {33, 34, 3, 0x400000000},
243 {34, 35, 3, 0x800000000},
244 {35, 36, 3, 0x1000000000},
245 {36, 37, 3, 0x2000000000},
246 {37, 38, 3, 0x4000000000},
247 {38, 39, 3, 0x8000000000},
248 {39, 40, 3, 0x10000000000},
249 {40, 41, 3, 0x20000000000},
250 {41, 42, 3, 0x40000000000},
251 {42, 43, 3, 0x80000000000},
252 {43, 44, 3, 0x100000000000},
253 {44, 45, 3, 0x200000000000},
254 {45, 46, 3, 0x400000000000},
255 {46, 47, 3, 0x800000000000},
256 {47, 48, 3, 0x1000000000000},
257 {48, 49, 3, 0x2000000000000},
258 {49, 50, 3, 0x4000000000000},
259 {50, 51, 3, 0x8000000000000},
260 {51, 52, 3, 0x10000000000000},
261 {52, 53, 3, 0x20000000000000},
262 {53, 54, 3, 0x40000000000000},
263 {54, 55, 3, 0x80000000000000},
264 {55, 56, 3, 0x100000000000000},
265 {56, 57, 3, 0x200000000000000},
266 {57, 58, 3, 0x400000000000000},
267 {58, 59, 3, 0x800000000000000},
268 {59, 60, 3, 0x1000000000000000},
269 {60, 61, 3, 0x2000000000000000},
270 {61, 62, 3, 0x4000000000000000},
271 {62, 63, 3, 0x8000000000000000},
274 #else
275 #define MAX_POWER2 30
277 static const struct st_features features[] = {
278 {0, 1, 0, 0x1},
279 {1, 2, 0, 0x1},
280 {2, 3, 0, 0x2},
281 {3, 4, 0, 0x4},
282 {4, 5, 0, 0x8},
283 {5, 6, 0, 0x10},
284 {6, 7, 0, 0x20},
285 {7, 8, 0, 0x40},
286 {8, 9, 1, 0x100},
287 {9, 10, 1, 0x200},
288 {10, 11, 1, 0x400},
289 {11, 12, 1, 0x800},
290 {12, 13, 1, 0x1000},
291 {13, 14, 1, 0x2000},
292 {14, 15, 1, 0x4000},
293 {15, 16, 1, 0x8000},
294 {16, 17, 2, 0x20000},
295 {17, 18, 2, 0x40000},
296 {18, 19, 2, 0x80000},
297 {19, 20, 2, 0x100000},
298 {20, 21, 2, 0x200000},
299 {21, 22, 2, 0x400000},
300 {22, 23, 2, 0x800000},
301 {23, 24, 2, 0x1000000},
302 {24, 25, 2, 0x2000000},
303 {25, 26, 2, 0x4000000},
304 {26, 27, 2, 0x8000000},
305 {27, 28, 2, 0x10000000},
306 {28, 29, 2, 0x20000000},
307 {29, 30, 2, 0x40000000},
308 {30, 31, 2, 0x80000000},
311 #endif
313 /* The reserved hash value and its substitution. */
314 #define RESERVED_HASH_VAL (~(st_hash_t) 0)
315 #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
317 /* Return hash value of KEY for table TAB. */
318 static inline st_hash_t
319 do_hash(st_data_t key, st_table *tab)
321 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
323 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
324 another value. Such mapping should be extremely rare. */
325 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
328 /* Power of 2 defining the minimal number of allocated entries. */
329 #define MINIMAL_POWER2 2
331 #if MINIMAL_POWER2 < 2
332 #error "MINIMAL_POWER2 should be >= 2"
333 #endif
335 /* If the power2 of the allocated `entries` is less than the following
336 value, don't allocate bins and use a linear search. */
337 #define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
339 /* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
340 static int
341 get_power2(st_index_t size)
343 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
344 if (n <= MAX_POWER2)
345 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
346 #ifdef RUBY
347 /* Ran out of the table entries */
348 rb_raise(rb_eRuntimeError, "st_table too big");
349 #endif
350 /* should raise exception */
351 return -1;
354 /* Return value of N-th bin in array BINS of table with bins size
355 index S. */
356 static inline st_index_t
357 get_bin(st_index_t *bins, int s, st_index_t n)
359 return (s == 0 ? ((unsigned char *) bins)[n]
360 : s == 1 ? ((unsigned short *) bins)[n]
361 : s == 2 ? ((unsigned int *) bins)[n]
362 : ((st_index_t *) bins)[n]);
365 /* Set up N-th bin in array BINS of table with bins size index S to
366 value V. */
367 static inline void
368 set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
370 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
371 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
372 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
373 else ((st_index_t *) bins)[n] = v;
376 /* These macros define reserved values for empty table bin and table
377 bin which contains a deleted entry. We will never use such values
378 for an entry index in bins. */
379 #define EMPTY_BIN 0
380 #define DELETED_BIN 1
381 /* Base of a real entry index in the bins. */
382 #define ENTRY_BASE 2
384 /* Mark I-th bin of table TAB as empty, in other words not
385 corresponding to any entry. */
386 #define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
388 /* Values used for not found entry and bin with given
389 characteristics. */
390 #define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
391 #define UNDEFINED_BIN_IND (~(st_index_t) 0)
393 /* Entry and bin values returned when we found a table rebuild during
394 the search. */
395 #define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
396 #define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
398 /* Mark I-th bin of table TAB as corresponding to a deleted table
399 entry. Update number of entries in the table and number of bins
400 corresponding to deleted entries. */
401 #define MARK_BIN_DELETED(tab, i) \
402 do { \
403 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
404 } while (0)
406 /* Macros to check that value B is used empty bins and bins
407 corresponding deleted entries. */
408 #define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
409 #define DELETED_BIN_P(b) ((b) == DELETED_BIN)
410 #define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
412 /* Macros to check empty bins and bins corresponding to deleted
413 entries. Bins are given by their index I in table TAB. */
414 #define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
415 #define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
416 #define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
418 /* Macros for marking and checking deleted entries given by their
419 pointer E_PTR. */
420 #define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
421 #define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
423 /* Return bin size index of table TAB. */
424 static inline unsigned int
425 get_size_ind(const st_table *tab)
427 return tab->size_ind;
430 /* Return the number of allocated bins of table TAB. */
431 static inline st_index_t
432 get_bins_num(const st_table *tab)
434 return ((st_index_t) 1)<<tab->bin_power;
437 /* Return mask for a bin index in table TAB. */
438 static inline st_index_t
439 bins_mask(const st_table *tab)
441 return get_bins_num(tab) - 1;
444 /* Return the index of table TAB bin corresponding to
445 HASH_VALUE. */
446 static inline st_index_t
447 hash_bin(st_hash_t hash_value, st_table *tab)
449 return hash_value & bins_mask(tab);
452 /* Return the number of allocated entries of table TAB. */
453 static inline st_index_t
454 get_allocated_entries(const st_table *tab)
456 return ((st_index_t) 1)<<tab->entry_power;
459 /* Return size of the allocated bins of table TAB. */
460 static inline st_index_t
461 bins_size(const st_table *tab)
463 return features[tab->entry_power].bins_words * sizeof (st_index_t);
466 /* Mark all bins of table TAB as empty. */
467 static void
468 initialize_bins(st_table *tab)
470 memset(tab->bins, 0, bins_size(tab));
473 /* Make table TAB empty. */
474 static void
475 make_tab_empty(st_table *tab)
477 tab->num_entries = 0;
478 tab->entries_start = tab->entries_bound = 0;
479 if (tab->bins != NULL)
480 initialize_bins(tab);
483 #ifdef HASH_LOG
484 #ifdef HAVE_UNISTD_H
485 #include <unistd.h>
486 #endif
487 static struct {
488 int all, total, num, str, strcase;
489 } collision;
491 /* Flag switching off output of package statistics at the end of
492 program. */
493 static int init_st = 0;
495 /* Output overall number of table searches and collisions into a
496 temporary file. */
497 static void
498 stat_col(void)
500 char fname[10+sizeof(long)*3];
501 FILE *f;
502 if (!collision.total) return;
503 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
504 if (f == NULL)
505 return;
506 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
507 ((double)collision.all / (collision.total)) * 100);
508 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
509 fclose(f);
511 #endif
513 st_table *
514 st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
516 int n;
518 #ifdef HASH_LOG
519 #if HASH_LOG+0 < 0
521 const char *e = getenv("ST_HASH_LOG");
522 if (!e || !*e) init_st = 1;
524 #endif
525 if (init_st == 0) {
526 init_st = 1;
527 atexit(stat_col);
529 #endif
531 n = get_power2(size);
532 #ifndef RUBY
533 if (n < 0)
534 return NULL;
535 #endif
537 tab->type = type;
538 tab->entry_power = n;
539 tab->bin_power = features[n].bin_power;
540 tab->size_ind = features[n].size_ind;
541 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
542 tab->bins = NULL;
543 else {
544 tab->bins = (st_index_t *) malloc(bins_size(tab));
545 #ifndef RUBY
546 if (tab->bins == NULL) {
547 free(tab);
548 return NULL;
550 #endif
552 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
553 * sizeof(st_table_entry));
554 #ifndef RUBY
555 if (tab->entries == NULL) {
556 st_free_table(tab);
557 return NULL;
559 #endif
560 make_tab_empty(tab);
561 tab->rebuilds_num = 0;
562 return tab;
565 /* Create and return table with TYPE which can hold at least SIZE
566 entries. The real number of entries which the table can hold is
567 the nearest power of two for SIZE. */
568 st_table *
569 st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
571 st_table *tab = malloc(sizeof(st_table));
572 #ifndef RUBY
573 if (tab == NULL)
574 return NULL;
575 #endif
577 #ifdef RUBY
578 st_init_existing_table_with_size(tab, type, size);
579 #else
580 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
581 free(tab);
582 return NULL;
584 #endif
586 return tab;
589 size_t
590 st_table_size(const struct st_table *tbl)
592 return tbl->num_entries;
595 /* Create and return table with TYPE which can hold a minimal number
596 of entries (see comments for get_power2). */
597 st_table *
598 st_init_table(const struct st_hash_type *type)
600 return st_init_table_with_size(type, 0);
603 /* Create and return table which can hold a minimal number of
604 numbers. */
605 st_table *
606 st_init_numtable(void)
608 return st_init_table(&type_numhash);
611 /* Create and return table which can hold SIZE numbers. */
612 st_table *
613 st_init_numtable_with_size(st_index_t size)
615 return st_init_table_with_size(&type_numhash, size);
618 /* Create and return table which can hold a minimal number of
619 strings. */
620 st_table *
621 st_init_strtable(void)
623 return st_init_table(&type_strhash);
626 /* Create and return table which can hold SIZE strings. */
627 st_table *
628 st_init_strtable_with_size(st_index_t size)
630 return st_init_table_with_size(&type_strhash, size);
633 /* Create and return table which can hold a minimal number of strings
634 whose character case is ignored. */
635 st_table *
636 st_init_strcasetable(void)
638 return st_init_table(&type_strcasehash);
641 /* Create and return table which can hold SIZE strings whose character
642 case is ignored. */
643 st_table *
644 st_init_strcasetable_with_size(st_index_t size)
646 return st_init_table_with_size(&type_strcasehash, size);
649 /* Make table TAB empty. */
650 void
651 st_clear(st_table *tab)
653 make_tab_empty(tab);
654 tab->rebuilds_num++;
657 /* Free table TAB space. */
658 void
659 st_free_table(st_table *tab)
661 free(tab->bins);
662 free(tab->entries);
663 free(tab);
666 /* Return byte size of memory allocated for table TAB. */
667 size_t
668 st_memsize(const st_table *tab)
670 return(sizeof(st_table)
671 + (tab->bins == NULL ? 0 : bins_size(tab))
672 + get_allocated_entries(tab) * sizeof(st_table_entry));
675 static st_index_t
676 find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
678 static st_index_t
679 find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
681 static st_index_t
682 find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
684 static st_index_t
685 find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
686 st_data_t key, st_index_t *bin_ind);
688 #ifdef HASH_LOG
689 static void
690 count_collision(const struct st_hash_type *type)
692 collision.all++;
693 if (type == &type_numhash) {
694 collision.num++;
696 else if (type == &type_strhash) {
697 collision.strcase++;
699 else if (type == &type_strcasehash) {
700 collision.str++;
704 #define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
705 #define FOUND_BIN (collision_check ? collision.total++ : (void)0)
706 #define collision_check 0
707 #else
708 #define COLLISION
709 #define FOUND_BIN
710 #endif
712 /* If the number of entries in the table is at least REBUILD_THRESHOLD
713 times less than the entry array length, decrease the table
714 size. */
715 #define REBUILD_THRESHOLD 4
717 #if REBUILD_THRESHOLD < 2
718 #error "REBUILD_THRESHOLD should be >= 2"
719 #endif
721 static void rebuild_table_with(st_table *const new_tab, st_table *const tab);
722 static void rebuild_move_table(st_table *const new_tab, st_table *const tab);
723 static void rebuild_cleanup(st_table *const tab);
725 /* Rebuild table TAB. Rebuilding removes all deleted bins and entries
726 and can change size of the table entries and bins arrays.
727 Rebuilding is implemented by creation of a new table or by
728 compaction of the existing one. */
729 static void
730 rebuild_table(st_table *tab)
732 if ((2 * tab->num_entries <= get_allocated_entries(tab)
733 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
734 || tab->num_entries < (1 << MINIMAL_POWER2)) {
735 /* Compaction: */
736 tab->num_entries = 0;
737 if (tab->bins != NULL)
738 initialize_bins(tab);
739 rebuild_table_with(tab, tab);
741 else {
742 st_table *new_tab;
743 /* This allocation could trigger GC and compaction. If tab is the
744 * gen_iv_tbl, then tab could have changed in size due to objects being
745 * freed and/or moved. Do not store attributes of tab before this line. */
746 new_tab = st_init_table_with_size(tab->type,
747 2 * tab->num_entries - 1);
748 rebuild_table_with(new_tab, tab);
749 rebuild_move_table(new_tab, tab);
751 rebuild_cleanup(tab);
754 static void
755 rebuild_table_with(st_table *const new_tab, st_table *const tab)
757 st_index_t i, ni;
758 unsigned int size_ind;
759 st_table_entry *new_entries;
760 st_table_entry *curr_entry_ptr;
761 st_index_t *bins;
762 st_index_t bin_ind;
764 new_entries = new_tab->entries;
766 ni = 0;
767 bins = new_tab->bins;
768 size_ind = get_size_ind(new_tab);
769 st_index_t bound = tab->entries_bound;
770 st_table_entry *entries = tab->entries;
772 for (i = tab->entries_start; i < bound; i++) {
773 curr_entry_ptr = &entries[i];
774 PREFETCH(entries + i + 1, 0);
775 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
776 continue;
777 if (&new_entries[ni] != curr_entry_ptr)
778 new_entries[ni] = *curr_entry_ptr;
779 if (EXPECT(bins != NULL, 1)) {
780 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
781 curr_entry_ptr->key);
782 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
784 new_tab->num_entries++;
785 ni++;
789 static void
790 rebuild_move_table(st_table *const new_tab, st_table *const tab)
792 tab->entry_power = new_tab->entry_power;
793 tab->bin_power = new_tab->bin_power;
794 tab->size_ind = new_tab->size_ind;
795 free(tab->bins);
796 tab->bins = new_tab->bins;
797 free(tab->entries);
798 tab->entries = new_tab->entries;
799 free(new_tab);
802 static void
803 rebuild_cleanup(st_table *const tab)
805 tab->entries_start = 0;
806 tab->entries_bound = tab->num_entries;
807 tab->rebuilds_num++;
810 /* Return the next secondary hash index for table TAB using previous
811 index IND and PERTURB. Finally modulo of the function becomes a
812 full *cycle linear congruential generator*, in other words it
813 guarantees traversing all table bins in extreme case.
815 According the Hull-Dobell theorem a generator
816 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
817 o m and c are relatively prime
818 o a-1 is divisible by all prime factors of m
819 o a-1 is divisible by 4 if m is divisible by 4.
821 For our case a is 5, c is 1, and m is a power of two. */
822 static inline st_index_t
823 secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb)
825 *perturb >>= 11;
826 ind = (ind << 2) + ind + *perturb + 1;
827 return hash_bin(ind, tab);
830 /* Find an entry with HASH_VALUE and KEY in TABLE using a linear
831 search. Return the index of the found entry in array `entries`.
832 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
833 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
834 static inline st_index_t
835 find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
837 int eq_p, rebuilt_p;
838 st_index_t i, bound;
839 st_table_entry *entries;
841 bound = tab->entries_bound;
842 entries = tab->entries;
843 for (i = tab->entries_start; i < bound; i++) {
844 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
845 if (EXPECT(rebuilt_p, 0))
846 return REBUILT_TABLE_ENTRY_IND;
847 if (eq_p)
848 return i;
850 return UNDEFINED_ENTRY_IND;
853 /* Use the quadratic probing. The method has a better data locality
854 but more collisions than the current approach. In average it
855 results in a bit slower search. */
856 /*#define QUADRATIC_PROBE*/
858 /* Return index of entry with HASH_VALUE and KEY in table TAB. If
859 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
860 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
861 static st_index_t
862 find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
864 int eq_p, rebuilt_p;
865 st_index_t ind;
866 #ifdef QUADRATIC_PROBE
867 st_index_t d;
868 #else
869 st_index_t perturb;
870 #endif
871 st_index_t bin;
872 st_table_entry *entries = tab->entries;
874 ind = hash_bin(hash_value, tab);
875 #ifdef QUADRATIC_PROBE
876 d = 1;
877 #else
878 perturb = hash_value;
879 #endif
880 FOUND_BIN;
881 for (;;) {
882 bin = get_bin(tab->bins, get_size_ind(tab), ind);
883 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
884 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
885 if (EXPECT(rebuilt_p, 0))
886 return REBUILT_TABLE_ENTRY_IND;
887 if (eq_p)
888 break;
890 else if (EMPTY_BIN_P(bin))
891 return UNDEFINED_ENTRY_IND;
892 #ifdef QUADRATIC_PROBE
893 ind = hash_bin(ind + d, tab);
894 d++;
895 #else
896 ind = secondary_hash(ind, tab, &perturb);
897 #endif
898 COLLISION;
900 return bin;
903 /* Find and return index of table TAB bin corresponding to an entry
904 with HASH_VALUE and KEY. If there is no such bin, return
905 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
906 return REBUILT_TABLE_BIN_IND. */
907 static st_index_t
908 find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
910 int eq_p, rebuilt_p;
911 st_index_t ind;
912 #ifdef QUADRATIC_PROBE
913 st_index_t d;
914 #else
915 st_index_t perturb;
916 #endif
917 st_index_t bin;
918 st_table_entry *entries = tab->entries;
920 ind = hash_bin(hash_value, tab);
921 #ifdef QUADRATIC_PROBE
922 d = 1;
923 #else
924 perturb = hash_value;
925 #endif
926 FOUND_BIN;
927 for (;;) {
928 bin = get_bin(tab->bins, get_size_ind(tab), ind);
929 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
930 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
931 if (EXPECT(rebuilt_p, 0))
932 return REBUILT_TABLE_BIN_IND;
933 if (eq_p)
934 break;
936 else if (EMPTY_BIN_P(bin))
937 return UNDEFINED_BIN_IND;
938 #ifdef QUADRATIC_PROBE
939 ind = hash_bin(ind + d, tab);
940 d++;
941 #else
942 ind = secondary_hash(ind, tab, &perturb);
943 #endif
944 COLLISION;
946 return ind;
949 /* Find and return index of table TAB bin corresponding to an entry
950 with HASH_VALUE and KEY. The entry should be in the table
951 already. */
952 static st_index_t
953 find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
955 st_index_t ind;
956 #ifdef QUADRATIC_PROBE
957 st_index_t d;
958 #else
959 st_index_t perturb;
960 #endif
961 st_index_t bin;
963 ind = hash_bin(hash_value, tab);
964 #ifdef QUADRATIC_PROBE
965 d = 1;
966 #else
967 perturb = hash_value;
968 #endif
969 FOUND_BIN;
970 for (;;) {
971 bin = get_bin(tab->bins, get_size_ind(tab), ind);
972 if (EMPTY_OR_DELETED_BIN_P(bin))
973 return ind;
974 #ifdef QUADRATIC_PROBE
975 ind = hash_bin(ind + d, tab);
976 d++;
977 #else
978 ind = secondary_hash(ind, tab, &perturb);
979 #endif
980 COLLISION;
984 /* Return index of table TAB bin for HASH_VALUE and KEY through
985 BIN_IND and the pointed value as the function result. Reserve the
986 bin for inclusion of the corresponding entry into the table if it
987 is not there yet. We always find such bin as bins array length is
988 bigger entries array. Although we can reuse a deleted bin, the
989 result bin value is always empty if the table has no entry with
990 KEY. Return the entries array index of the found entry or
991 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
992 during the search, return REBUILT_TABLE_ENTRY_IND. */
993 static st_index_t
994 find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
995 st_data_t key, st_index_t *bin_ind)
997 int eq_p, rebuilt_p;
998 st_index_t ind;
999 st_hash_t curr_hash_value = *hash_value;
1000 #ifdef QUADRATIC_PROBE
1001 st_index_t d;
1002 #else
1003 st_index_t perturb;
1004 #endif
1005 st_index_t entry_index;
1006 st_index_t first_deleted_bin_ind;
1007 st_table_entry *entries;
1009 ind = hash_bin(curr_hash_value, tab);
1010 #ifdef QUADRATIC_PROBE
1011 d = 1;
1012 #else
1013 perturb = curr_hash_value;
1014 #endif
1015 FOUND_BIN;
1016 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1017 entries = tab->entries;
1018 for (;;) {
1019 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1020 if (EMPTY_BIN_P(entry_index)) {
1021 tab->num_entries++;
1022 entry_index = UNDEFINED_ENTRY_IND;
1023 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1024 /* We can reuse bin of a deleted entry. */
1025 ind = first_deleted_bin_ind;
1026 MARK_BIN_EMPTY(tab, ind);
1028 break;
1030 else if (! DELETED_BIN_P(entry_index)) {
1031 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1032 if (EXPECT(rebuilt_p, 0))
1033 return REBUILT_TABLE_ENTRY_IND;
1034 if (eq_p)
1035 break;
1037 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1038 first_deleted_bin_ind = ind;
1039 #ifdef QUADRATIC_PROBE
1040 ind = hash_bin(ind + d, tab);
1041 d++;
1042 #else
1043 ind = secondary_hash(ind, tab, &perturb);
1044 #endif
1045 COLLISION;
1047 *bin_ind = ind;
1048 return entry_index;
1051 /* Find an entry with KEY in table TAB. Return non-zero if we found
1052 it. Set up *RECORD to the found entry record. */
1054 st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1056 st_index_t bin;
1057 st_hash_t hash = do_hash(key, tab);
1059 retry:
1060 if (tab->bins == NULL) {
1061 bin = find_entry(tab, hash, key);
1062 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1063 goto retry;
1064 if (bin == UNDEFINED_ENTRY_IND)
1065 return 0;
1067 else {
1068 bin = find_table_entry_ind(tab, hash, key);
1069 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1070 goto retry;
1071 if (bin == UNDEFINED_ENTRY_IND)
1072 return 0;
1073 bin -= ENTRY_BASE;
1075 if (value != 0)
1076 *value = tab->entries[bin].record;
1077 return 1;
1080 /* Find an entry with KEY in table TAB. Return non-zero if we found
1081 it. Set up *RESULT to the found table entry key. */
1083 st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1085 st_index_t bin;
1086 st_hash_t hash = do_hash(key, tab);
1088 retry:
1089 if (tab->bins == NULL) {
1090 bin = find_entry(tab, hash, key);
1091 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1092 goto retry;
1093 if (bin == UNDEFINED_ENTRY_IND)
1094 return 0;
1096 else {
1097 bin = find_table_entry_ind(tab, hash, key);
1098 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1099 goto retry;
1100 if (bin == UNDEFINED_ENTRY_IND)
1101 return 0;
1102 bin -= ENTRY_BASE;
1104 if (result != 0)
1105 *result = tab->entries[bin].key;
1106 return 1;
1109 /* Check the table and rebuild it if it is necessary. */
1110 static inline void
1111 rebuild_table_if_necessary (st_table *tab)
1113 st_index_t bound = tab->entries_bound;
1115 if (bound == get_allocated_entries(tab))
1116 rebuild_table(tab);
1119 /* Insert (KEY, VALUE) into table TAB and return zero. If there is
1120 already entry with KEY in the table, return nonzero and update
1121 the value of the found entry. */
1123 st_insert(st_table *tab, st_data_t key, st_data_t value)
1125 st_table_entry *entry;
1126 st_index_t bin;
1127 st_index_t ind;
1128 st_hash_t hash_value;
1129 st_index_t bin_ind;
1130 int new_p;
1132 hash_value = do_hash(key, tab);
1133 retry:
1134 rebuild_table_if_necessary(tab);
1135 if (tab->bins == NULL) {
1136 bin = find_entry(tab, hash_value, key);
1137 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1138 goto retry;
1139 new_p = bin == UNDEFINED_ENTRY_IND;
1140 if (new_p)
1141 tab->num_entries++;
1142 bin_ind = UNDEFINED_BIN_IND;
1144 else {
1145 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1146 key, &bin_ind);
1147 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1148 goto retry;
1149 new_p = bin == UNDEFINED_ENTRY_IND;
1150 bin -= ENTRY_BASE;
1152 if (new_p) {
1153 ind = tab->entries_bound++;
1154 entry = &tab->entries[ind];
1155 entry->hash = hash_value;
1156 entry->key = key;
1157 entry->record = value;
1158 if (bin_ind != UNDEFINED_BIN_IND)
1159 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1160 return 0;
1162 tab->entries[bin].record = value;
1163 return 1;
1166 /* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1167 entry with KEY before the insertion. */
1168 static inline void
1169 st_add_direct_with_hash(st_table *tab,
1170 st_data_t key, st_data_t value, st_hash_t hash)
1172 st_table_entry *entry;
1173 st_index_t ind;
1174 st_index_t bin_ind;
1176 rebuild_table_if_necessary(tab);
1177 ind = tab->entries_bound++;
1178 entry = &tab->entries[ind];
1179 entry->hash = hash;
1180 entry->key = key;
1181 entry->record = value;
1182 tab->num_entries++;
1183 if (tab->bins != NULL) {
1184 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1185 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1189 void
1190 rb_st_add_direct_with_hash(st_table *tab,
1191 st_data_t key, st_data_t value, st_hash_t hash)
1193 st_add_direct_with_hash(tab, key, value, hash);
1196 /* Insert (KEY, VALUE) into table TAB. The table should not have
1197 entry with KEY before the insertion. */
1198 void
1199 st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1201 st_hash_t hash_value;
1203 hash_value = do_hash(key, tab);
1204 st_add_direct_with_hash(tab, key, value, hash_value);
1207 /* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1208 there is already entry with KEY in the table, return nonzero and
1209 update the value of the found entry. */
1211 st_insert2(st_table *tab, st_data_t key, st_data_t value,
1212 st_data_t (*func)(st_data_t))
1214 st_table_entry *entry;
1215 st_index_t bin;
1216 st_index_t ind;
1217 st_hash_t hash_value;
1218 st_index_t bin_ind;
1219 int new_p;
1221 hash_value = do_hash(key, tab);
1222 retry:
1223 rebuild_table_if_necessary (tab);
1224 if (tab->bins == NULL) {
1225 bin = find_entry(tab, hash_value, key);
1226 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1227 goto retry;
1228 new_p = bin == UNDEFINED_ENTRY_IND;
1229 if (new_p)
1230 tab->num_entries++;
1231 bin_ind = UNDEFINED_BIN_IND;
1233 else {
1234 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1235 key, &bin_ind);
1236 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1237 goto retry;
1238 new_p = bin == UNDEFINED_ENTRY_IND;
1239 bin -= ENTRY_BASE;
1241 if (new_p) {
1242 key = (*func)(key);
1243 ind = tab->entries_bound++;
1244 entry = &tab->entries[ind];
1245 entry->hash = hash_value;
1246 entry->key = key;
1247 entry->record = value;
1248 if (bin_ind != UNDEFINED_BIN_IND)
1249 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1250 return 0;
1252 tab->entries[bin].record = value;
1253 return 1;
1256 /* Create a copy of old_tab into new_tab. */
1257 st_table *
1258 st_replace(st_table *new_tab, st_table *old_tab)
1260 *new_tab = *old_tab;
1261 if (old_tab->bins == NULL)
1262 new_tab->bins = NULL;
1263 else {
1264 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1265 #ifndef RUBY
1266 if (new_tab->bins == NULL) {
1267 return NULL;
1269 #endif
1271 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1272 * sizeof(st_table_entry));
1273 #ifndef RUBY
1274 if (new_tab->entries == NULL) {
1275 return NULL;
1277 #endif
1278 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1279 get_allocated_entries(old_tab));
1280 if (old_tab->bins != NULL)
1281 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1283 return new_tab;
1286 /* Create and return a copy of table OLD_TAB. */
1287 st_table *
1288 st_copy(st_table *old_tab)
1290 st_table *new_tab;
1292 new_tab = (st_table *) malloc(sizeof(st_table));
1293 #ifndef RUBY
1294 if (new_tab == NULL)
1295 return NULL;
1296 #endif
1298 if (st_replace(new_tab, old_tab) == NULL) {
1299 st_free_table(new_tab);
1300 return NULL;
1303 return new_tab;
1306 /* Update the entries start of table TAB after removing an entry
1307 with index N in the array entries. */
1308 static inline void
1309 update_range_for_deleted(st_table *tab, st_index_t n)
1311 /* Do not update entries_bound here. Otherwise, we can fill all
1312 bins by deleted entry value before rebuilding the table. */
1313 if (tab->entries_start == n) {
1314 st_index_t start = n + 1;
1315 st_index_t bound = tab->entries_bound;
1316 st_table_entry *entries = tab->entries;
1317 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1318 tab->entries_start = start;
1322 /* Delete entry with KEY from table TAB, set up *VALUE (unless
1323 VALUE is zero) from deleted table entry, and return non-zero. If
1324 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1325 is zero), and return zero. */
1326 static int
1327 st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1329 st_table_entry *entry;
1330 st_index_t bin;
1331 st_index_t bin_ind;
1332 st_hash_t hash;
1334 hash = do_hash(*key, tab);
1335 retry:
1336 if (tab->bins == NULL) {
1337 bin = find_entry(tab, hash, *key);
1338 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1339 goto retry;
1340 if (bin == UNDEFINED_ENTRY_IND) {
1341 if (value != 0) *value = 0;
1342 return 0;
1345 else {
1346 bin_ind = find_table_bin_ind(tab, hash, *key);
1347 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1348 goto retry;
1349 if (bin_ind == UNDEFINED_BIN_IND) {
1350 if (value != 0) *value = 0;
1351 return 0;
1353 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1354 MARK_BIN_DELETED(tab, bin_ind);
1356 entry = &tab->entries[bin];
1357 *key = entry->key;
1358 if (value != 0) *value = entry->record;
1359 MARK_ENTRY_DELETED(entry);
1360 tab->num_entries--;
1361 update_range_for_deleted(tab, bin);
1362 return 1;
1366 st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1368 return st_general_delete(tab, key, value);
1371 /* The function and other functions with suffix '_safe' or '_check'
1372 are originated from the previous implementation of the hash tables.
1373 It was necessary for correct deleting entries during traversing
1374 tables. The current implementation permits deletion during
1375 traversing without a specific way to do this. */
1377 st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1378 st_data_t never ATTRIBUTE_UNUSED)
1380 return st_general_delete(tab, key, value);
1383 /* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1384 return zero. Otherwise, remove the first entry in the table.
1385 Return its key through KEY and its record through VALUE (unless
1386 VALUE is zero). */
1388 st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1390 st_index_t i, bound;
1391 st_index_t bin;
1392 st_table_entry *entries, *curr_entry_ptr;
1393 st_index_t bin_ind;
1395 entries = tab->entries;
1396 bound = tab->entries_bound;
1397 for (i = tab->entries_start; i < bound; i++) {
1398 curr_entry_ptr = &entries[i];
1399 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1400 st_hash_t entry_hash = curr_entry_ptr->hash;
1401 st_data_t entry_key = curr_entry_ptr->key;
1403 if (value != 0) *value = curr_entry_ptr->record;
1404 *key = entry_key;
1405 retry:
1406 if (tab->bins == NULL) {
1407 bin = find_entry(tab, entry_hash, entry_key);
1408 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1409 entries = tab->entries;
1410 goto retry;
1412 curr_entry_ptr = &entries[bin];
1414 else {
1415 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1416 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1417 entries = tab->entries;
1418 goto retry;
1420 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1421 - ENTRY_BASE];
1422 MARK_BIN_DELETED(tab, bin_ind);
1424 MARK_ENTRY_DELETED(curr_entry_ptr);
1425 tab->num_entries--;
1426 update_range_for_deleted(tab, i);
1427 return 1;
1430 if (value != 0) *value = 0;
1431 return 0;
1434 /* See comments for function st_delete_safe. */
1435 void
1436 st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1437 st_data_t never ATTRIBUTE_UNUSED)
1441 /* Find entry with KEY in table TAB, call FUNC with pointers to copies
1442 of the key and the value of the found entry, and non-zero as the
1443 3rd argument. If the entry is not found, call FUNC with a pointer
1444 to KEY, a pointer to zero, and a zero argument. If the call
1445 returns ST_CONTINUE, the table will have an entry with key and
1446 value returned by FUNC through the 1st and 2nd parameters. If the
1447 call of FUNC returns ST_DELETE, the table will not have entry with
1448 KEY. The function returns flag of that the entry with KEY was in
1449 the table before the call. */
1451 st_update(st_table *tab, st_data_t key,
1452 st_update_callback_func *func, st_data_t arg)
1454 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1455 st_index_t bin = 0; /* Ditto */
1456 st_table_entry *entries;
1457 st_index_t bin_ind;
1458 st_data_t value = 0, old_key;
1459 int retval, existing;
1460 st_hash_t hash = do_hash(key, tab);
1462 retry:
1463 entries = tab->entries;
1464 if (tab->bins == NULL) {
1465 bin = find_entry(tab, hash, key);
1466 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1467 goto retry;
1468 existing = bin != UNDEFINED_ENTRY_IND;
1469 entry = &entries[bin];
1470 bin_ind = UNDEFINED_BIN_IND;
1472 else {
1473 bin_ind = find_table_bin_ind(tab, hash, key);
1474 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1475 goto retry;
1476 existing = bin_ind != UNDEFINED_BIN_IND;
1477 if (existing) {
1478 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1479 entry = &entries[bin];
1482 if (existing) {
1483 key = entry->key;
1484 value = entry->record;
1486 old_key = key;
1487 retval = (*func)(&key, &value, arg, existing);
1488 switch (retval) {
1489 case ST_CONTINUE:
1490 if (! existing) {
1491 st_add_direct_with_hash(tab, key, value, hash);
1492 break;
1494 if (old_key != key) {
1495 entry->key = key;
1497 entry->record = value;
1498 break;
1499 case ST_DELETE:
1500 if (existing) {
1501 if (bin_ind != UNDEFINED_BIN_IND)
1502 MARK_BIN_DELETED(tab, bin_ind);
1503 MARK_ENTRY_DELETED(entry);
1504 tab->num_entries--;
1505 update_range_for_deleted(tab, bin);
1507 break;
1509 return existing;
1512 /* Traverse all entries in table TAB calling FUNC with current entry
1513 key and value and zero. If the call returns ST_STOP, stop
1514 traversing. If the call returns ST_DELETE, delete the current
1515 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1516 traversing. The function returns zero unless an error is found.
1517 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1518 different for ST_CHECK and when the current element is removed
1519 during traversing. */
1520 static inline int
1521 st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1522 int check_p)
1524 st_index_t bin;
1525 st_index_t bin_ind;
1526 st_table_entry *entries, *curr_entry_ptr;
1527 enum st_retval retval;
1528 st_index_t i, rebuilds_num;
1529 st_hash_t hash;
1530 st_data_t key;
1531 int error_p, packed_p = tab->bins == NULL;
1533 entries = tab->entries;
1534 /* The bound can change inside the loop even without rebuilding
1535 the table, e.g. by an entry insertion. */
1536 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1537 curr_entry_ptr = &entries[i];
1538 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1539 continue;
1540 key = curr_entry_ptr->key;
1541 rebuilds_num = tab->rebuilds_num;
1542 hash = curr_entry_ptr->hash;
1543 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1545 if (retval == ST_REPLACE && replace) {
1546 st_data_t value;
1547 value = curr_entry_ptr->record;
1548 retval = (*replace)(&key, &value, arg, TRUE);
1549 curr_entry_ptr->key = key;
1550 curr_entry_ptr->record = value;
1553 if (rebuilds_num != tab->rebuilds_num) {
1554 retry:
1555 entries = tab->entries;
1556 packed_p = tab->bins == NULL;
1557 if (packed_p) {
1558 i = find_entry(tab, hash, key);
1559 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1560 goto retry;
1561 error_p = i == UNDEFINED_ENTRY_IND;
1563 else {
1564 i = find_table_entry_ind(tab, hash, key);
1565 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1566 goto retry;
1567 error_p = i == UNDEFINED_ENTRY_IND;
1568 i -= ENTRY_BASE;
1570 if (error_p && check_p) {
1571 /* call func with error notice */
1572 retval = (*func)(0, 0, arg, 1);
1573 return 1;
1575 curr_entry_ptr = &entries[i];
1577 switch (retval) {
1578 case ST_REPLACE:
1579 break;
1580 case ST_CONTINUE:
1581 break;
1582 case ST_CHECK:
1583 if (check_p)
1584 break;
1585 case ST_STOP:
1586 return 0;
1587 case ST_DELETE: {
1588 st_data_t key = curr_entry_ptr->key;
1590 again:
1591 if (packed_p) {
1592 bin = find_entry(tab, hash, key);
1593 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1594 goto again;
1595 if (bin == UNDEFINED_ENTRY_IND)
1596 break;
1598 else {
1599 bin_ind = find_table_bin_ind(tab, hash, key);
1600 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1601 goto again;
1602 if (bin_ind == UNDEFINED_BIN_IND)
1603 break;
1604 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1605 MARK_BIN_DELETED(tab, bin_ind);
1607 curr_entry_ptr = &entries[bin];
1608 MARK_ENTRY_DELETED(curr_entry_ptr);
1609 tab->num_entries--;
1610 update_range_for_deleted(tab, bin);
1611 break;
1615 return 0;
1619 st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1621 return st_general_foreach(tab, func, replace, arg, TRUE);
1624 struct functor {
1625 st_foreach_callback_func *func;
1626 st_data_t arg;
1629 static int
1630 apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1632 const struct functor *f = (void *)d;
1633 return f->func(k, v, f->arg);
1637 st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1639 const struct functor f = { func, arg };
1640 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1643 /* See comments for function st_delete_safe. */
1645 st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1646 st_data_t never ATTRIBUTE_UNUSED)
1648 return st_general_foreach(tab, func, 0, arg, TRUE);
1651 /* Set up array KEYS by at most SIZE keys of head table TAB entries.
1652 Return the number of keys set up in array KEYS. */
1653 static inline st_index_t
1654 st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1656 st_index_t i, bound;
1657 st_data_t key, *keys_start, *keys_end;
1658 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1660 bound = tab->entries_bound;
1661 keys_start = keys;
1662 keys_end = keys + size;
1663 for (i = tab->entries_start; i < bound; i++) {
1664 if (keys == keys_end)
1665 break;
1666 curr_entry_ptr = &entries[i];
1667 key = curr_entry_ptr->key;
1668 if (! DELETED_ENTRY_P(curr_entry_ptr))
1669 *keys++ = key;
1672 return keys - keys_start;
1675 st_index_t
1676 st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1678 return st_general_keys(tab, keys, size);
1681 /* See comments for function st_delete_safe. */
1682 st_index_t
1683 st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1684 st_data_t never ATTRIBUTE_UNUSED)
1686 return st_general_keys(tab, keys, size);
1689 /* Set up array VALUES by at most SIZE values of head table TAB
1690 entries. Return the number of values set up in array VALUES. */
1691 static inline st_index_t
1692 st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1694 st_index_t i, bound;
1695 st_data_t *values_start, *values_end;
1696 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1698 values_start = values;
1699 values_end = values + size;
1700 bound = tab->entries_bound;
1701 for (i = tab->entries_start; i < bound; i++) {
1702 if (values == values_end)
1703 break;
1704 curr_entry_ptr = &entries[i];
1705 if (! DELETED_ENTRY_P(curr_entry_ptr))
1706 *values++ = curr_entry_ptr->record;
1709 return values - values_start;
1712 st_index_t
1713 st_values(st_table *tab, st_data_t *values, st_index_t size)
1715 return st_general_values(tab, values, size);
1718 /* See comments for function st_delete_safe. */
1719 st_index_t
1720 st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1721 st_data_t never ATTRIBUTE_UNUSED)
1723 return st_general_values(tab, values, size);
1726 #define FNV1_32A_INIT 0x811c9dc5
1729 * 32 bit magic FNV-1a prime
1731 #define FNV_32_PRIME 0x01000193
1733 /* __POWERPC__ added to accommodate Darwin case. */
1734 #ifndef UNALIGNED_WORD_ACCESS
1735 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1736 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1737 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1738 defined(__mc68020__)
1739 # define UNALIGNED_WORD_ACCESS 1
1740 # endif
1741 #endif
1742 #ifndef UNALIGNED_WORD_ACCESS
1743 # define UNALIGNED_WORD_ACCESS 0
1744 #endif
1746 /* This hash function is quite simplified MurmurHash3
1747 * Simplification is legal, cause most of magic still happens in finalizator.
1748 * And finalizator is almost the same as in MurmurHash3 */
1749 #define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1750 #define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1752 #if ST_INDEX_BITS <= 32
1753 #define C1 (st_index_t)0xcc9e2d51
1754 #define C2 (st_index_t)0x1b873593
1755 #else
1756 #define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1757 #define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1758 #endif
1759 NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1760 NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1761 NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1763 static inline st_index_t
1764 murmur_step(st_index_t h, st_index_t k)
1766 #if ST_INDEX_BITS <= 32
1767 #define r1 (17)
1768 #define r2 (11)
1769 #else
1770 #define r1 (33)
1771 #define r2 (24)
1772 #endif
1773 k *= C1;
1774 h ^= ROTL(k, r1);
1775 h *= C2;
1776 h = ROTL(h, r2);
1777 return h;
1779 #undef r1
1780 #undef r2
1782 static inline st_index_t
1783 murmur_finish(st_index_t h)
1785 #if ST_INDEX_BITS <= 32
1786 #define r1 (16)
1787 #define r2 (13)
1788 #define r3 (16)
1789 const st_index_t c1 = 0x85ebca6b;
1790 const st_index_t c2 = 0xc2b2ae35;
1791 #else
1792 /* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1793 #define r1 (30)
1794 #define r2 (27)
1795 #define r3 (31)
1796 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1797 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1798 #endif
1799 #if ST_INDEX_BITS > 64
1800 h ^= h >> 64;
1801 h *= c2;
1802 h ^= h >> 65;
1803 #endif
1804 h ^= h >> r1;
1805 h *= c1;
1806 h ^= h >> r2;
1807 h *= c2;
1808 h ^= h >> r3;
1809 return h;
1811 #undef r1
1812 #undef r2
1813 #undef r3
1815 st_index_t
1816 st_hash(const void *ptr, size_t len, st_index_t h)
1818 const char *data = ptr;
1819 st_index_t t = 0;
1820 size_t l = len;
1822 #define data_at(n) (st_index_t)((unsigned char)data[(n)])
1823 #define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1824 #if SIZEOF_ST_INDEX_T > 4
1825 #define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1826 #if SIZEOF_ST_INDEX_T > 8
1827 #define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1828 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1829 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1830 #endif
1831 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1832 #else
1833 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1834 #endif
1835 #undef SKIP_TAIL
1836 if (len >= sizeof(st_index_t)) {
1837 #if !UNALIGNED_WORD_ACCESS
1838 int align = (int)((st_data_t)data % sizeof(st_index_t));
1839 if (align) {
1840 st_index_t d = 0;
1841 int sl, sr, pack;
1843 switch (align) {
1844 #ifdef WORDS_BIGENDIAN
1845 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1846 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1847 #else
1848 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1849 t |= data_at(n) << CHAR_BIT*(n)
1850 #endif
1851 UNALIGNED_ADD_ALL;
1852 #undef UNALIGNED_ADD
1855 #ifdef WORDS_BIGENDIAN
1856 t >>= (CHAR_BIT * align) - CHAR_BIT;
1857 #else
1858 t <<= (CHAR_BIT * align);
1859 #endif
1861 data += sizeof(st_index_t)-align;
1862 len -= sizeof(st_index_t)-align;
1864 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1865 sr = CHAR_BIT * align;
1867 while (len >= sizeof(st_index_t)) {
1868 d = *(st_index_t *)data;
1869 #ifdef WORDS_BIGENDIAN
1870 t = (t << sr) | (d >> sl);
1871 #else
1872 t = (t >> sr) | (d << sl);
1873 #endif
1874 h = murmur_step(h, t);
1875 t = d;
1876 data += sizeof(st_index_t);
1877 len -= sizeof(st_index_t);
1880 pack = len < (size_t)align ? (int)len : align;
1881 d = 0;
1882 switch (pack) {
1883 #ifdef WORDS_BIGENDIAN
1884 # define UNALIGNED_ADD(n) case (n) + 1: \
1885 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1886 #else
1887 # define UNALIGNED_ADD(n) case (n) + 1: \
1888 d |= data_at(n) << CHAR_BIT*(n)
1889 #endif
1890 UNALIGNED_ADD_ALL;
1891 #undef UNALIGNED_ADD
1893 #ifdef WORDS_BIGENDIAN
1894 t = (t << sr) | (d >> sl);
1895 #else
1896 t = (t >> sr) | (d << sl);
1897 #endif
1899 if (len < (size_t)align) goto skip_tail;
1900 # define SKIP_TAIL 1
1901 h = murmur_step(h, t);
1902 data += pack;
1903 len -= pack;
1905 else
1906 #endif
1907 #ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1908 #define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1909 #else
1910 #define aligned_data data
1911 #endif
1913 do {
1914 h = murmur_step(h, *(st_index_t *)aligned_data);
1915 data += sizeof(st_index_t);
1916 len -= sizeof(st_index_t);
1917 } while (len >= sizeof(st_index_t));
1921 t = 0;
1922 switch (len) {
1923 #if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1924 /* in this case byteorder doesn't really matter */
1925 #if SIZEOF_ST_INDEX_T > 4
1926 case 7: t |= data_at(6) << 48;
1927 case 6: t |= data_at(5) << 40;
1928 case 5: t |= data_at(4) << 32;
1929 case 4:
1930 t |= (st_index_t)*(uint32_t*)aligned_data;
1931 goto skip_tail;
1932 # define SKIP_TAIL 1
1933 #endif
1934 case 3: t |= data_at(2) << 16;
1935 case 2: t |= data_at(1) << 8;
1936 case 1: t |= data_at(0);
1937 #else
1938 #ifdef WORDS_BIGENDIAN
1939 # define UNALIGNED_ADD(n) case (n) + 1: \
1940 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1941 #else
1942 # define UNALIGNED_ADD(n) case (n) + 1: \
1943 t |= data_at(n) << CHAR_BIT*(n)
1944 #endif
1945 UNALIGNED_ADD_ALL;
1946 #undef UNALIGNED_ADD
1947 #endif
1948 #ifdef SKIP_TAIL
1949 skip_tail:
1950 #endif
1951 h ^= t; h -= ROTL(t, 7);
1952 h *= C2;
1954 h ^= l;
1955 #undef aligned_data
1957 return murmur_finish(h);
1960 st_index_t
1961 st_hash_uint32(st_index_t h, uint32_t i)
1963 return murmur_step(h, i);
1966 NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1967 st_index_t
1968 st_hash_uint(st_index_t h, st_index_t i)
1970 i += h;
1971 /* no matter if it is BigEndian or LittleEndian,
1972 * we hash just integers */
1973 #if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1974 h = murmur_step(h, i >> 8*8);
1975 #endif
1976 h = murmur_step(h, i);
1977 return h;
1980 st_index_t
1981 st_hash_end(st_index_t h)
1983 h = murmur_finish(h);
1984 return h;
1987 #undef st_hash_start
1988 st_index_t
1989 rb_st_hash_start(st_index_t h)
1991 return h;
1994 static st_index_t
1995 strhash(st_data_t arg)
1997 register const char *string = (const char *)arg;
1998 return st_hash(string, strlen(string), FNV1_32A_INIT);
2002 st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
2004 char c1, c2;
2006 while (1) {
2007 c1 = *s1++;
2008 c2 = *s2++;
2009 if (c1 == '\0' || c2 == '\0') {
2010 if (c1 != '\0') return 1;
2011 if (c2 != '\0') return -1;
2012 return 0;
2014 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2015 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2016 if (c1 != c2) {
2017 if (c1 > c2)
2018 return 1;
2019 else
2020 return -1;
2026 st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2028 char c1, c2;
2029 size_t i;
2031 for (i = 0; i < n; i++) {
2032 c1 = *s1++;
2033 c2 = *s2++;
2034 if (c1 == '\0' || c2 == '\0') {
2035 if (c1 != '\0') return 1;
2036 if (c2 != '\0') return -1;
2037 return 0;
2039 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2040 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2041 if (c1 != c2) {
2042 if (c1 > c2)
2043 return 1;
2044 else
2045 return -1;
2048 return 0;
2051 static int
2052 st_strcmp(st_data_t lhs, st_data_t rhs)
2054 const char *s1 = (char *)lhs;
2055 const char *s2 = (char *)rhs;
2056 return strcmp(s1, s2);
2059 static int
2060 st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2062 const char *s1 = (char *)lhs;
2063 const char *s2 = (char *)rhs;
2064 return st_locale_insensitive_strcasecmp(s1, s2);
2067 NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2068 static st_index_t
2069 strcasehash(st_data_t arg)
2071 register const char *string = (const char *)arg;
2072 register st_index_t hval = FNV1_32A_INIT;
2075 * FNV-1a hash each octet in the buffer
2077 while (*string) {
2078 unsigned int c = (unsigned char)*string++;
2079 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2080 hval ^= c;
2082 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2083 hval *= FNV_32_PRIME;
2085 return hval;
2089 st_numcmp(st_data_t x, st_data_t y)
2091 return x != y;
2094 st_index_t
2095 st_numhash(st_data_t n)
2097 enum {s1 = 11, s2 = 3};
2098 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2101 #ifdef RUBY
2102 /* Expand TAB to be suitable for holding SIZ entries in total.
2103 Pre-existing entries remain not deleted inside of TAB, but its bins
2104 are cleared to expect future reconstruction. See rehash below. */
2105 static void
2106 st_expand_table(st_table *tab, st_index_t siz)
2108 st_table *tmp;
2109 st_index_t n;
2111 if (siz <= get_allocated_entries(tab))
2112 return; /* enough room already */
2114 tmp = st_init_table_with_size(tab->type, siz);
2115 n = get_allocated_entries(tab);
2116 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2117 free(tab->entries);
2118 free(tab->bins);
2119 free(tmp->bins);
2120 tab->entry_power = tmp->entry_power;
2121 tab->bin_power = tmp->bin_power;
2122 tab->size_ind = tmp->size_ind;
2123 tab->entries = tmp->entries;
2124 tab->bins = NULL;
2125 tab->rebuilds_num++;
2126 free(tmp);
2129 /* Rehash using linear search. Return TRUE if we found that the table
2130 was rebuilt. */
2131 static int
2132 st_rehash_linear(st_table *tab)
2134 int eq_p, rebuilt_p;
2135 st_index_t i, j;
2136 st_table_entry *p, *q;
2138 free(tab->bins);
2139 tab->bins = NULL;
2141 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2142 p = &tab->entries[i];
2143 if (DELETED_ENTRY_P(p))
2144 continue;
2145 for (j = i + 1; j < tab->entries_bound; j++) {
2146 q = &tab->entries[j];
2147 if (DELETED_ENTRY_P(q))
2148 continue;
2149 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2150 if (EXPECT(rebuilt_p, 0))
2151 return TRUE;
2152 if (eq_p) {
2153 *p = *q;
2154 MARK_ENTRY_DELETED(q);
2155 tab->num_entries--;
2156 update_range_for_deleted(tab, j);
2160 return FALSE;
2163 /* Rehash using index. Return TRUE if we found that the table was
2164 rebuilt. */
2165 static int
2166 st_rehash_indexed(st_table *tab)
2168 int eq_p, rebuilt_p;
2169 st_index_t i;
2170 st_index_t const n = bins_size(tab);
2171 unsigned int const size_ind = get_size_ind(tab);
2172 st_index_t *bins = realloc(tab->bins, n);
2173 tab->bins = bins;
2174 initialize_bins(tab);
2175 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2176 st_table_entry *p = &tab->entries[i];
2177 st_index_t ind;
2178 #ifdef QUADRATIC_PROBE
2179 st_index_t d = 1;
2180 #else
2181 st_index_t perturb = p->hash;
2182 #endif
2184 if (DELETED_ENTRY_P(p))
2185 continue;
2187 ind = hash_bin(p->hash, tab);
2188 for (;;) {
2189 st_index_t bin = get_bin(bins, size_ind, ind);
2190 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2191 /* ok, new room */
2192 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2193 break;
2195 else {
2196 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2197 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2198 if (EXPECT(rebuilt_p, 0))
2199 return TRUE;
2200 if (eq_p) {
2201 /* duplicated key; delete it */
2202 q->record = p->record;
2203 MARK_ENTRY_DELETED(p);
2204 tab->num_entries--;
2205 update_range_for_deleted(tab, bin);
2206 break;
2208 else {
2209 /* hash collision; skip it */
2210 #ifdef QUADRATIC_PROBE
2211 ind = hash_bin(ind + d, tab);
2212 d++;
2213 #else
2214 ind = secondary_hash(ind, tab, &perturb);
2215 #endif
2220 return FALSE;
2223 /* Reconstruct TAB's bins according to TAB's entries. This function
2224 permits conflicting keys inside of entries. No errors are reported
2225 then. All but one of them are discarded silently. */
2226 static void
2227 st_rehash(st_table *tab)
2229 int rebuilt_p;
2231 do {
2232 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2233 rebuilt_p = st_rehash_linear(tab);
2234 else
2235 rebuilt_p = st_rehash_indexed(tab);
2236 } while (rebuilt_p);
2239 static st_data_t
2240 st_stringify(VALUE key)
2242 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2243 rb_hash_key_str(key) : key;
2246 static void
2247 st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2249 st_data_t k = st_stringify(key);
2250 st_table_entry e;
2251 e.hash = do_hash(k, tab);
2252 e.key = k;
2253 e.record = val;
2255 tab->entries[tab->entries_bound++] = e;
2256 tab->num_entries++;
2257 RB_OBJ_WRITTEN(hash, Qundef, k);
2258 RB_OBJ_WRITTEN(hash, Qundef, val);
2261 static void
2262 st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2264 long i;
2266 for (i = 0; i < argc; /* */) {
2267 st_data_t k = st_stringify(argv[i++]);
2268 st_data_t v = argv[i++];
2269 st_insert(tab, k, v);
2270 RB_OBJ_WRITTEN(hash, Qundef, k);
2271 RB_OBJ_WRITTEN(hash, Qundef, v);
2275 static void
2276 st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2278 long i;
2280 /* push elems */
2281 for (i = 0; i < argc; /* */) {
2282 VALUE key = argv[i++];
2283 VALUE val = argv[i++];
2284 st_insert_single(tab, hash, key, val);
2287 /* reindex */
2288 st_rehash(tab);
2291 /* Mimics ruby's { foo => bar } syntax. This function is subpart
2292 of rb_hash_bulk_insert. */
2293 void
2294 rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2296 st_index_t n, size = argc / 2;
2297 st_table *tab = RHASH_ST_TABLE(hash);
2299 tab = RHASH_TBL_RAW(hash);
2300 n = tab->entries_bound + size;
2301 st_expand_table(tab, n);
2302 if (UNLIKELY(tab->num_entries))
2303 st_insert_generic(tab, argc, argv, hash);
2304 else if (argc <= 2)
2305 st_insert_single(tab, hash, argv[0], argv[1]);
2306 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2307 st_insert_linear(tab, argc, argv, hash);
2308 else
2309 st_insert_generic(tab, argc, argv, hash);
2312 // to iterate iv_index_tbl
2313 st_data_t
2314 rb_st_nth_key(st_table *tab, st_index_t index)
2316 if (LIKELY(tab->entries_start == 0 &&
2317 tab->num_entries == tab->entries_bound &&
2318 index < tab->num_entries)) {
2319 return tab->entries[index].key;
2321 else {
2322 rb_bug("unreachable");
2326 void
2327 rb_st_compact_table(st_table *tab)
2329 st_index_t num = tab->num_entries;
2330 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2331 /* Compaction: */
2332 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2333 rebuild_table_with(new_tab, tab);
2334 rebuild_move_table(new_tab, tab);
2335 rebuild_cleanup(tab);
2339 #endif