1 /* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
7 /* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
15 The hash table has two arrays called *bins* and *entries*.
20 |-------| --------------------------------
21 | index | | | entry: | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
28 |-------| |_ entries start |_ entries bound
32 o The entry array contains table entries in the same order as they
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
106 #elif defined RUBY_EXPORT
107 #include "internal.h"
108 #include "internal/bits.h"
109 #include "internal/hash.h"
110 #include "internal/sanitizers.h"
111 #include "internal/st.h"
122 #define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
123 #define EXPECT(expr, val) __builtin_expect(expr, val)
124 #define ATTRIBUTE_UNUSED __attribute__((unused))
126 #define PREFETCH(addr, write_p)
127 #define EXPECT(expr, val) (expr)
128 #define ATTRIBUTE_UNUSED
131 /* The type of hashes. */
132 typedef st_index_t st_hash_t
;
134 struct st_table_entry
{
140 #define type_numhash st_hashtype_num
141 static const struct st_hash_type st_hashtype_num
= {
146 static int st_strcmp(st_data_t
, st_data_t
);
147 static st_index_t
strhash(st_data_t
);
148 static const struct st_hash_type type_strhash
= {
153 static int st_locale_insensitive_strcasecmp_i(st_data_t lhs
, st_data_t rhs
);
154 static st_index_t
strcasehash(st_data_t
);
155 static const struct st_hash_type type_strcasehash
= {
156 st_locale_insensitive_strcasecmp_i
,
160 /* Value used to catch uninitialized entries/bins during debugging.
161 There is a possibility for a false alarm, but its probability is
163 #define ST_INIT_VAL 0xafafafafafafafaf
164 #define ST_INIT_VAL_BYTE 0xafa
171 #define malloc ruby_xmalloc
172 #define calloc ruby_xcalloc
173 #define realloc ruby_xrealloc
174 #define free ruby_xfree
177 #define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
178 #define PTR_EQUAL(tab, ptr, hash_val, key_) \
179 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
181 /* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
182 up to TRUE if the table is rebuilt during the comparison. */
183 #define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
185 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
186 res = PTR_EQUAL(tab, ptr, hash_val, key); \
187 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
190 /* Features of a table. */
192 /* Power of 2 used for number of allocated entries. */
193 unsigned char entry_power
;
194 /* Power of 2 used for number of allocated bins. Depending on the
195 table size, the number of bins is 2-4 times more than the
196 number of entries. */
197 unsigned char bin_power
;
198 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
199 unsigned char size_ind
;
200 /* Bins are packed in words of type st_index_t. The following is
201 a size of bins counted by words. */
202 st_index_t bins_words
;
205 /* Features of all possible size tables. */
206 #if SIZEOF_ST_INDEX_T == 8
207 #define MAX_POWER2 62
208 static const struct st_features features
[] = {
225 {16, 17, 2, 0x10000},
226 {17, 18, 2, 0x20000},
227 {18, 19, 2, 0x40000},
228 {19, 20, 2, 0x80000},
229 {20, 21, 2, 0x100000},
230 {21, 22, 2, 0x200000},
231 {22, 23, 2, 0x400000},
232 {23, 24, 2, 0x800000},
233 {24, 25, 2, 0x1000000},
234 {25, 26, 2, 0x2000000},
235 {26, 27, 2, 0x4000000},
236 {27, 28, 2, 0x8000000},
237 {28, 29, 2, 0x10000000},
238 {29, 30, 2, 0x20000000},
239 {30, 31, 2, 0x40000000},
240 {31, 32, 2, 0x80000000},
241 {32, 33, 3, 0x200000000},
242 {33, 34, 3, 0x400000000},
243 {34, 35, 3, 0x800000000},
244 {35, 36, 3, 0x1000000000},
245 {36, 37, 3, 0x2000000000},
246 {37, 38, 3, 0x4000000000},
247 {38, 39, 3, 0x8000000000},
248 {39, 40, 3, 0x10000000000},
249 {40, 41, 3, 0x20000000000},
250 {41, 42, 3, 0x40000000000},
251 {42, 43, 3, 0x80000000000},
252 {43, 44, 3, 0x100000000000},
253 {44, 45, 3, 0x200000000000},
254 {45, 46, 3, 0x400000000000},
255 {46, 47, 3, 0x800000000000},
256 {47, 48, 3, 0x1000000000000},
257 {48, 49, 3, 0x2000000000000},
258 {49, 50, 3, 0x4000000000000},
259 {50, 51, 3, 0x8000000000000},
260 {51, 52, 3, 0x10000000000000},
261 {52, 53, 3, 0x20000000000000},
262 {53, 54, 3, 0x40000000000000},
263 {54, 55, 3, 0x80000000000000},
264 {55, 56, 3, 0x100000000000000},
265 {56, 57, 3, 0x200000000000000},
266 {57, 58, 3, 0x400000000000000},
267 {58, 59, 3, 0x800000000000000},
268 {59, 60, 3, 0x1000000000000000},
269 {60, 61, 3, 0x2000000000000000},
270 {61, 62, 3, 0x4000000000000000},
271 {62, 63, 3, 0x8000000000000000},
275 #define MAX_POWER2 30
277 static const struct st_features features
[] = {
294 {16, 17, 2, 0x20000},
295 {17, 18, 2, 0x40000},
296 {18, 19, 2, 0x80000},
297 {19, 20, 2, 0x100000},
298 {20, 21, 2, 0x200000},
299 {21, 22, 2, 0x400000},
300 {22, 23, 2, 0x800000},
301 {23, 24, 2, 0x1000000},
302 {24, 25, 2, 0x2000000},
303 {25, 26, 2, 0x4000000},
304 {26, 27, 2, 0x8000000},
305 {27, 28, 2, 0x10000000},
306 {28, 29, 2, 0x20000000},
307 {29, 30, 2, 0x40000000},
308 {30, 31, 2, 0x80000000},
313 /* The reserved hash value and its substitution. */
314 #define RESERVED_HASH_VAL (~(st_hash_t) 0)
315 #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
317 /* Return hash value of KEY for table TAB. */
318 static inline st_hash_t
319 do_hash(st_data_t key
, st_table
*tab
)
321 st_hash_t hash
= (st_hash_t
)(tab
->type
->hash
)(key
);
323 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
324 another value. Such mapping should be extremely rare. */
325 return hash
== RESERVED_HASH_VAL
? RESERVED_HASH_SUBSTITUTION_VAL
: hash
;
328 /* Power of 2 defining the minimal number of allocated entries. */
329 #define MINIMAL_POWER2 2
331 #if MINIMAL_POWER2 < 2
332 #error "MINIMAL_POWER2 should be >= 2"
335 /* If the power2 of the allocated `entries` is less than the following
336 value, don't allocate bins and use a linear search. */
337 #define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
339 /* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
341 get_power2(st_index_t size
)
343 unsigned int n
= ST_INDEX_BITS
- nlz_intptr(size
);
345 return n
< MINIMAL_POWER2
? MINIMAL_POWER2
: n
;
347 /* Ran out of the table entries */
348 rb_raise(rb_eRuntimeError
, "st_table too big");
350 /* should raise exception */
354 /* Return value of N-th bin in array BINS of table with bins size
356 static inline st_index_t
357 get_bin(st_index_t
*bins
, int s
, st_index_t n
)
359 return (s
== 0 ? ((unsigned char *) bins
)[n
]
360 : s
== 1 ? ((unsigned short *) bins
)[n
]
361 : s
== 2 ? ((unsigned int *) bins
)[n
]
362 : ((st_index_t
*) bins
)[n
]);
365 /* Set up N-th bin in array BINS of table with bins size index S to
368 set_bin(st_index_t
*bins
, int s
, st_index_t n
, st_index_t v
)
370 if (s
== 0) ((unsigned char *) bins
)[n
] = (unsigned char) v
;
371 else if (s
== 1) ((unsigned short *) bins
)[n
] = (unsigned short) v
;
372 else if (s
== 2) ((unsigned int *) bins
)[n
] = (unsigned int) v
;
373 else ((st_index_t
*) bins
)[n
] = v
;
376 /* These macros define reserved values for empty table bin and table
377 bin which contains a deleted entry. We will never use such values
378 for an entry index in bins. */
380 #define DELETED_BIN 1
381 /* Base of a real entry index in the bins. */
384 /* Mark I-th bin of table TAB as empty, in other words not
385 corresponding to any entry. */
386 #define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
388 /* Values used for not found entry and bin with given
390 #define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
391 #define UNDEFINED_BIN_IND (~(st_index_t) 0)
393 /* Entry and bin values returned when we found a table rebuild during
395 #define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
396 #define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
398 /* Mark I-th bin of table TAB as corresponding to a deleted table
399 entry. Update number of entries in the table and number of bins
400 corresponding to deleted entries. */
401 #define MARK_BIN_DELETED(tab, i) \
403 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
406 /* Macros to check that value B is used empty bins and bins
407 corresponding deleted entries. */
408 #define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
409 #define DELETED_BIN_P(b) ((b) == DELETED_BIN)
410 #define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
412 /* Macros to check empty bins and bins corresponding to deleted
413 entries. Bins are given by their index I in table TAB. */
414 #define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
415 #define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
416 #define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
418 /* Macros for marking and checking deleted entries given by their
420 #define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
421 #define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
423 /* Return bin size index of table TAB. */
424 static inline unsigned int
425 get_size_ind(const st_table
*tab
)
427 return tab
->size_ind
;
430 /* Return the number of allocated bins of table TAB. */
431 static inline st_index_t
432 get_bins_num(const st_table
*tab
)
434 return ((st_index_t
) 1)<<tab
->bin_power
;
437 /* Return mask for a bin index in table TAB. */
438 static inline st_index_t
439 bins_mask(const st_table
*tab
)
441 return get_bins_num(tab
) - 1;
444 /* Return the index of table TAB bin corresponding to
446 static inline st_index_t
447 hash_bin(st_hash_t hash_value
, st_table
*tab
)
449 return hash_value
& bins_mask(tab
);
452 /* Return the number of allocated entries of table TAB. */
453 static inline st_index_t
454 get_allocated_entries(const st_table
*tab
)
456 return ((st_index_t
) 1)<<tab
->entry_power
;
459 /* Return size of the allocated bins of table TAB. */
460 static inline st_index_t
461 bins_size(const st_table
*tab
)
463 return features
[tab
->entry_power
].bins_words
* sizeof (st_index_t
);
466 /* Mark all bins of table TAB as empty. */
468 initialize_bins(st_table
*tab
)
470 memset(tab
->bins
, 0, bins_size(tab
));
473 /* Make table TAB empty. */
475 make_tab_empty(st_table
*tab
)
477 tab
->num_entries
= 0;
478 tab
->entries_start
= tab
->entries_bound
= 0;
479 if (tab
->bins
!= NULL
)
480 initialize_bins(tab
);
488 int all
, total
, num
, str
, strcase
;
491 /* Flag switching off output of package statistics at the end of
493 static int init_st
= 0;
495 /* Output overall number of table searches and collisions into a
500 char fname
[10+sizeof(long)*3];
502 if (!collision
.total
) return;
503 f
= fopen((snprintf(fname
, sizeof(fname
), "/tmp/col%ld", (long)getpid()), fname
), "w");
506 fprintf(f
, "collision: %d / %d (%6.2f)\n", collision
.all
, collision
.total
,
507 ((double)collision
.all
/ (collision
.total
)) * 100);
508 fprintf(f
, "num: %d, str: %d, strcase: %d\n", collision
.num
, collision
.str
, collision
.strcase
);
514 st_init_existing_table_with_size(st_table
*tab
, const struct st_hash_type
*type
, st_index_t size
)
521 const char *e
= getenv("ST_HASH_LOG");
522 if (!e
|| !*e
) init_st
= 1;
531 n
= get_power2(size
);
538 tab
->entry_power
= n
;
539 tab
->bin_power
= features
[n
].bin_power
;
540 tab
->size_ind
= features
[n
].size_ind
;
541 if (n
<= MAX_POWER2_FOR_TABLES_WITHOUT_BINS
)
544 tab
->bins
= (st_index_t
*) malloc(bins_size(tab
));
546 if (tab
->bins
== NULL
) {
552 tab
->entries
= (st_table_entry
*) malloc(get_allocated_entries(tab
)
553 * sizeof(st_table_entry
));
555 if (tab
->entries
== NULL
) {
561 tab
->rebuilds_num
= 0;
565 /* Create and return table with TYPE which can hold at least SIZE
566 entries. The real number of entries which the table can hold is
567 the nearest power of two for SIZE. */
569 st_init_table_with_size(const struct st_hash_type
*type
, st_index_t size
)
571 st_table
*tab
= malloc(sizeof(st_table
));
578 st_init_existing_table_with_size(tab
, type
, size
);
580 if (st_init_existing_table_with_size(tab
, type
, size
) == NULL
) {
590 st_table_size(const struct st_table
*tbl
)
592 return tbl
->num_entries
;
595 /* Create and return table with TYPE which can hold a minimal number
596 of entries (see comments for get_power2). */
598 st_init_table(const struct st_hash_type
*type
)
600 return st_init_table_with_size(type
, 0);
603 /* Create and return table which can hold a minimal number of
606 st_init_numtable(void)
608 return st_init_table(&type_numhash
);
611 /* Create and return table which can hold SIZE numbers. */
613 st_init_numtable_with_size(st_index_t size
)
615 return st_init_table_with_size(&type_numhash
, size
);
618 /* Create and return table which can hold a minimal number of
621 st_init_strtable(void)
623 return st_init_table(&type_strhash
);
626 /* Create and return table which can hold SIZE strings. */
628 st_init_strtable_with_size(st_index_t size
)
630 return st_init_table_with_size(&type_strhash
, size
);
633 /* Create and return table which can hold a minimal number of strings
634 whose character case is ignored. */
636 st_init_strcasetable(void)
638 return st_init_table(&type_strcasehash
);
641 /* Create and return table which can hold SIZE strings whose character
644 st_init_strcasetable_with_size(st_index_t size
)
646 return st_init_table_with_size(&type_strcasehash
, size
);
649 /* Make table TAB empty. */
651 st_clear(st_table
*tab
)
657 /* Free table TAB space. */
659 st_free_table(st_table
*tab
)
666 /* Return byte size of memory allocated for table TAB. */
668 st_memsize(const st_table
*tab
)
670 return(sizeof(st_table
)
671 + (tab
->bins
== NULL
? 0 : bins_size(tab
))
672 + get_allocated_entries(tab
) * sizeof(st_table_entry
));
676 find_table_entry_ind(st_table
*tab
, st_hash_t hash_value
, st_data_t key
);
679 find_table_bin_ind(st_table
*tab
, st_hash_t hash_value
, st_data_t key
);
682 find_table_bin_ind_direct(st_table
*table
, st_hash_t hash_value
, st_data_t key
);
685 find_table_bin_ptr_and_reserve(st_table
*tab
, st_hash_t
*hash_value
,
686 st_data_t key
, st_index_t
*bin_ind
);
690 count_collision(const struct st_hash_type
*type
)
693 if (type
== &type_numhash
) {
696 else if (type
== &type_strhash
) {
699 else if (type
== &type_strcasehash
) {
704 #define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
705 #define FOUND_BIN (collision_check ? collision.total++ : (void)0)
706 #define collision_check 0
712 /* If the number of entries in the table is at least REBUILD_THRESHOLD
713 times less than the entry array length, decrease the table
715 #define REBUILD_THRESHOLD 4
717 #if REBUILD_THRESHOLD < 2
718 #error "REBUILD_THRESHOLD should be >= 2"
721 static void rebuild_table_with(st_table
*const new_tab
, st_table
*const tab
);
722 static void rebuild_move_table(st_table
*const new_tab
, st_table
*const tab
);
723 static void rebuild_cleanup(st_table
*const tab
);
725 /* Rebuild table TAB. Rebuilding removes all deleted bins and entries
726 and can change size of the table entries and bins arrays.
727 Rebuilding is implemented by creation of a new table or by
728 compaction of the existing one. */
730 rebuild_table(st_table
*tab
)
732 if ((2 * tab
->num_entries
<= get_allocated_entries(tab
)
733 && REBUILD_THRESHOLD
* tab
->num_entries
> get_allocated_entries(tab
))
734 || tab
->num_entries
< (1 << MINIMAL_POWER2
)) {
736 tab
->num_entries
= 0;
737 if (tab
->bins
!= NULL
)
738 initialize_bins(tab
);
739 rebuild_table_with(tab
, tab
);
743 /* This allocation could trigger GC and compaction. If tab is the
744 * gen_iv_tbl, then tab could have changed in size due to objects being
745 * freed and/or moved. Do not store attributes of tab before this line. */
746 new_tab
= st_init_table_with_size(tab
->type
,
747 2 * tab
->num_entries
- 1);
748 rebuild_table_with(new_tab
, tab
);
749 rebuild_move_table(new_tab
, tab
);
751 rebuild_cleanup(tab
);
755 rebuild_table_with(st_table
*const new_tab
, st_table
*const tab
)
758 unsigned int size_ind
;
759 st_table_entry
*new_entries
;
760 st_table_entry
*curr_entry_ptr
;
764 new_entries
= new_tab
->entries
;
767 bins
= new_tab
->bins
;
768 size_ind
= get_size_ind(new_tab
);
769 st_index_t bound
= tab
->entries_bound
;
770 st_table_entry
*entries
= tab
->entries
;
772 for (i
= tab
->entries_start
; i
< bound
; i
++) {
773 curr_entry_ptr
= &entries
[i
];
774 PREFETCH(entries
+ i
+ 1, 0);
775 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr
), 0))
777 if (&new_entries
[ni
] != curr_entry_ptr
)
778 new_entries
[ni
] = *curr_entry_ptr
;
779 if (EXPECT(bins
!= NULL
, 1)) {
780 bin_ind
= find_table_bin_ind_direct(new_tab
, curr_entry_ptr
->hash
,
781 curr_entry_ptr
->key
);
782 set_bin(bins
, size_ind
, bin_ind
, ni
+ ENTRY_BASE
);
784 new_tab
->num_entries
++;
790 rebuild_move_table(st_table
*const new_tab
, st_table
*const tab
)
792 tab
->entry_power
= new_tab
->entry_power
;
793 tab
->bin_power
= new_tab
->bin_power
;
794 tab
->size_ind
= new_tab
->size_ind
;
796 tab
->bins
= new_tab
->bins
;
798 tab
->entries
= new_tab
->entries
;
803 rebuild_cleanup(st_table
*const tab
)
805 tab
->entries_start
= 0;
806 tab
->entries_bound
= tab
->num_entries
;
810 /* Return the next secondary hash index for table TAB using previous
811 index IND and PERTURB. Finally modulo of the function becomes a
812 full *cycle linear congruential generator*, in other words it
813 guarantees traversing all table bins in extreme case.
815 According the Hull-Dobell theorem a generator
816 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
817 o m and c are relatively prime
818 o a-1 is divisible by all prime factors of m
819 o a-1 is divisible by 4 if m is divisible by 4.
821 For our case a is 5, c is 1, and m is a power of two. */
822 static inline st_index_t
823 secondary_hash(st_index_t ind
, st_table
*tab
, st_index_t
*perturb
)
826 ind
= (ind
<< 2) + ind
+ *perturb
+ 1;
827 return hash_bin(ind
, tab
);
830 /* Find an entry with HASH_VALUE and KEY in TABLE using a linear
831 search. Return the index of the found entry in array `entries`.
832 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
833 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
834 static inline st_index_t
835 find_entry(st_table
*tab
, st_hash_t hash_value
, st_data_t key
)
839 st_table_entry
*entries
;
841 bound
= tab
->entries_bound
;
842 entries
= tab
->entries
;
843 for (i
= tab
->entries_start
; i
< bound
; i
++) {
844 DO_PTR_EQUAL_CHECK(tab
, &entries
[i
], hash_value
, key
, eq_p
, rebuilt_p
);
845 if (EXPECT(rebuilt_p
, 0))
846 return REBUILT_TABLE_ENTRY_IND
;
850 return UNDEFINED_ENTRY_IND
;
853 /* Use the quadratic probing. The method has a better data locality
854 but more collisions than the current approach. In average it
855 results in a bit slower search. */
856 /*#define QUADRATIC_PROBE*/
858 /* Return index of entry with HASH_VALUE and KEY in table TAB. If
859 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
860 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
862 find_table_entry_ind(st_table
*tab
, st_hash_t hash_value
, st_data_t key
)
866 #ifdef QUADRATIC_PROBE
872 st_table_entry
*entries
= tab
->entries
;
874 ind
= hash_bin(hash_value
, tab
);
875 #ifdef QUADRATIC_PROBE
878 perturb
= hash_value
;
882 bin
= get_bin(tab
->bins
, get_size_ind(tab
), ind
);
883 if (! EMPTY_OR_DELETED_BIN_P(bin
)) {
884 DO_PTR_EQUAL_CHECK(tab
, &entries
[bin
- ENTRY_BASE
], hash_value
, key
, eq_p
, rebuilt_p
);
885 if (EXPECT(rebuilt_p
, 0))
886 return REBUILT_TABLE_ENTRY_IND
;
890 else if (EMPTY_BIN_P(bin
))
891 return UNDEFINED_ENTRY_IND
;
892 #ifdef QUADRATIC_PROBE
893 ind
= hash_bin(ind
+ d
, tab
);
896 ind
= secondary_hash(ind
, tab
, &perturb
);
903 /* Find and return index of table TAB bin corresponding to an entry
904 with HASH_VALUE and KEY. If there is no such bin, return
905 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
906 return REBUILT_TABLE_BIN_IND. */
908 find_table_bin_ind(st_table
*tab
, st_hash_t hash_value
, st_data_t key
)
912 #ifdef QUADRATIC_PROBE
918 st_table_entry
*entries
= tab
->entries
;
920 ind
= hash_bin(hash_value
, tab
);
921 #ifdef QUADRATIC_PROBE
924 perturb
= hash_value
;
928 bin
= get_bin(tab
->bins
, get_size_ind(tab
), ind
);
929 if (! EMPTY_OR_DELETED_BIN_P(bin
)) {
930 DO_PTR_EQUAL_CHECK(tab
, &entries
[bin
- ENTRY_BASE
], hash_value
, key
, eq_p
, rebuilt_p
);
931 if (EXPECT(rebuilt_p
, 0))
932 return REBUILT_TABLE_BIN_IND
;
936 else if (EMPTY_BIN_P(bin
))
937 return UNDEFINED_BIN_IND
;
938 #ifdef QUADRATIC_PROBE
939 ind
= hash_bin(ind
+ d
, tab
);
942 ind
= secondary_hash(ind
, tab
, &perturb
);
949 /* Find and return index of table TAB bin corresponding to an entry
950 with HASH_VALUE and KEY. The entry should be in the table
953 find_table_bin_ind_direct(st_table
*tab
, st_hash_t hash_value
, st_data_t key
)
956 #ifdef QUADRATIC_PROBE
963 ind
= hash_bin(hash_value
, tab
);
964 #ifdef QUADRATIC_PROBE
967 perturb
= hash_value
;
971 bin
= get_bin(tab
->bins
, get_size_ind(tab
), ind
);
972 if (EMPTY_OR_DELETED_BIN_P(bin
))
974 #ifdef QUADRATIC_PROBE
975 ind
= hash_bin(ind
+ d
, tab
);
978 ind
= secondary_hash(ind
, tab
, &perturb
);
984 /* Return index of table TAB bin for HASH_VALUE and KEY through
985 BIN_IND and the pointed value as the function result. Reserve the
986 bin for inclusion of the corresponding entry into the table if it
987 is not there yet. We always find such bin as bins array length is
988 bigger entries array. Although we can reuse a deleted bin, the
989 result bin value is always empty if the table has no entry with
990 KEY. Return the entries array index of the found entry or
991 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
992 during the search, return REBUILT_TABLE_ENTRY_IND. */
994 find_table_bin_ptr_and_reserve(st_table
*tab
, st_hash_t
*hash_value
,
995 st_data_t key
, st_index_t
*bin_ind
)
999 st_hash_t curr_hash_value
= *hash_value
;
1000 #ifdef QUADRATIC_PROBE
1005 st_index_t entry_index
;
1006 st_index_t first_deleted_bin_ind
;
1007 st_table_entry
*entries
;
1009 ind
= hash_bin(curr_hash_value
, tab
);
1010 #ifdef QUADRATIC_PROBE
1013 perturb
= curr_hash_value
;
1016 first_deleted_bin_ind
= UNDEFINED_BIN_IND
;
1017 entries
= tab
->entries
;
1019 entry_index
= get_bin(tab
->bins
, get_size_ind(tab
), ind
);
1020 if (EMPTY_BIN_P(entry_index
)) {
1022 entry_index
= UNDEFINED_ENTRY_IND
;
1023 if (first_deleted_bin_ind
!= UNDEFINED_BIN_IND
) {
1024 /* We can reuse bin of a deleted entry. */
1025 ind
= first_deleted_bin_ind
;
1026 MARK_BIN_EMPTY(tab
, ind
);
1030 else if (! DELETED_BIN_P(entry_index
)) {
1031 DO_PTR_EQUAL_CHECK(tab
, &entries
[entry_index
- ENTRY_BASE
], curr_hash_value
, key
, eq_p
, rebuilt_p
);
1032 if (EXPECT(rebuilt_p
, 0))
1033 return REBUILT_TABLE_ENTRY_IND
;
1037 else if (first_deleted_bin_ind
== UNDEFINED_BIN_IND
)
1038 first_deleted_bin_ind
= ind
;
1039 #ifdef QUADRATIC_PROBE
1040 ind
= hash_bin(ind
+ d
, tab
);
1043 ind
= secondary_hash(ind
, tab
, &perturb
);
1051 /* Find an entry with KEY in table TAB. Return non-zero if we found
1052 it. Set up *RECORD to the found entry record. */
1054 st_lookup(st_table
*tab
, st_data_t key
, st_data_t
*value
)
1057 st_hash_t hash
= do_hash(key
, tab
);
1060 if (tab
->bins
== NULL
) {
1061 bin
= find_entry(tab
, hash
, key
);
1062 if (EXPECT(bin
== REBUILT_TABLE_ENTRY_IND
, 0))
1064 if (bin
== UNDEFINED_ENTRY_IND
)
1068 bin
= find_table_entry_ind(tab
, hash
, key
);
1069 if (EXPECT(bin
== REBUILT_TABLE_ENTRY_IND
, 0))
1071 if (bin
== UNDEFINED_ENTRY_IND
)
1076 *value
= tab
->entries
[bin
].record
;
1080 /* Find an entry with KEY in table TAB. Return non-zero if we found
1081 it. Set up *RESULT to the found table entry key. */
1083 st_get_key(st_table
*tab
, st_data_t key
, st_data_t
*result
)
1086 st_hash_t hash
= do_hash(key
, tab
);
1089 if (tab
->bins
== NULL
) {
1090 bin
= find_entry(tab
, hash
, key
);
1091 if (EXPECT(bin
== REBUILT_TABLE_ENTRY_IND
, 0))
1093 if (bin
== UNDEFINED_ENTRY_IND
)
1097 bin
= find_table_entry_ind(tab
, hash
, key
);
1098 if (EXPECT(bin
== REBUILT_TABLE_ENTRY_IND
, 0))
1100 if (bin
== UNDEFINED_ENTRY_IND
)
1105 *result
= tab
->entries
[bin
].key
;
1109 /* Check the table and rebuild it if it is necessary. */
1111 rebuild_table_if_necessary (st_table
*tab
)
1113 st_index_t bound
= tab
->entries_bound
;
1115 if (bound
== get_allocated_entries(tab
))
1119 /* Insert (KEY, VALUE) into table TAB and return zero. If there is
1120 already entry with KEY in the table, return nonzero and update
1121 the value of the found entry. */
1123 st_insert(st_table
*tab
, st_data_t key
, st_data_t value
)
1125 st_table_entry
*entry
;
1128 st_hash_t hash_value
;
1132 hash_value
= do_hash(key
, tab
);
1134 rebuild_table_if_necessary(tab
);
1135 if (tab
->bins
== NULL
) {
1136 bin
= find_entry(tab
, hash_value
, key
);
1137 if (EXPECT(bin
== REBUILT_TABLE_ENTRY_IND
, 0))
1139 new_p
= bin
== UNDEFINED_ENTRY_IND
;
1142 bin_ind
= UNDEFINED_BIN_IND
;
1145 bin
= find_table_bin_ptr_and_reserve(tab
, &hash_value
,
1147 if (EXPECT(bin
== REBUILT_TABLE_ENTRY_IND
, 0))
1149 new_p
= bin
== UNDEFINED_ENTRY_IND
;
1153 ind
= tab
->entries_bound
++;
1154 entry
= &tab
->entries
[ind
];
1155 entry
->hash
= hash_value
;
1157 entry
->record
= value
;
1158 if (bin_ind
!= UNDEFINED_BIN_IND
)
1159 set_bin(tab
->bins
, get_size_ind(tab
), bin_ind
, ind
+ ENTRY_BASE
);
1162 tab
->entries
[bin
].record
= value
;
1166 /* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1167 entry with KEY before the insertion. */
1169 st_add_direct_with_hash(st_table
*tab
,
1170 st_data_t key
, st_data_t value
, st_hash_t hash
)
1172 st_table_entry
*entry
;
1176 rebuild_table_if_necessary(tab
);
1177 ind
= tab
->entries_bound
++;
1178 entry
= &tab
->entries
[ind
];
1181 entry
->record
= value
;
1183 if (tab
->bins
!= NULL
) {
1184 bin_ind
= find_table_bin_ind_direct(tab
, hash
, key
);
1185 set_bin(tab
->bins
, get_size_ind(tab
), bin_ind
, ind
+ ENTRY_BASE
);
1190 rb_st_add_direct_with_hash(st_table
*tab
,
1191 st_data_t key
, st_data_t value
, st_hash_t hash
)
1193 st_add_direct_with_hash(tab
, key
, value
, hash
);
1196 /* Insert (KEY, VALUE) into table TAB. The table should not have
1197 entry with KEY before the insertion. */
1199 st_add_direct(st_table
*tab
, st_data_t key
, st_data_t value
)
1201 st_hash_t hash_value
;
1203 hash_value
= do_hash(key
, tab
);
1204 st_add_direct_with_hash(tab
, key
, value
, hash_value
);
1207 /* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1208 there is already entry with KEY in the table, return nonzero and
1209 update the value of the found entry. */
1211 st_insert2(st_table
*tab
, st_data_t key
, st_data_t value
,
1212 st_data_t (*func
)(st_data_t
))
1214 st_table_entry
*entry
;
1217 st_hash_t hash_value
;
1221 hash_value
= do_hash(key
, tab
);
1223 rebuild_table_if_necessary (tab
);
1224 if (tab
->bins
== NULL
) {
1225 bin
= find_entry(tab
, hash_value
, key
);
1226 if (EXPECT(bin
== REBUILT_TABLE_ENTRY_IND
, 0))
1228 new_p
= bin
== UNDEFINED_ENTRY_IND
;
1231 bin_ind
= UNDEFINED_BIN_IND
;
1234 bin
= find_table_bin_ptr_and_reserve(tab
, &hash_value
,
1236 if (EXPECT(bin
== REBUILT_TABLE_ENTRY_IND
, 0))
1238 new_p
= bin
== UNDEFINED_ENTRY_IND
;
1243 ind
= tab
->entries_bound
++;
1244 entry
= &tab
->entries
[ind
];
1245 entry
->hash
= hash_value
;
1247 entry
->record
= value
;
1248 if (bin_ind
!= UNDEFINED_BIN_IND
)
1249 set_bin(tab
->bins
, get_size_ind(tab
), bin_ind
, ind
+ ENTRY_BASE
);
1252 tab
->entries
[bin
].record
= value
;
1256 /* Create a copy of old_tab into new_tab. */
1258 st_replace(st_table
*new_tab
, st_table
*old_tab
)
1260 *new_tab
= *old_tab
;
1261 if (old_tab
->bins
== NULL
)
1262 new_tab
->bins
= NULL
;
1264 new_tab
->bins
= (st_index_t
*) malloc(bins_size(old_tab
));
1266 if (new_tab
->bins
== NULL
) {
1271 new_tab
->entries
= (st_table_entry
*) malloc(get_allocated_entries(old_tab
)
1272 * sizeof(st_table_entry
));
1274 if (new_tab
->entries
== NULL
) {
1278 MEMCPY(new_tab
->entries
, old_tab
->entries
, st_table_entry
,
1279 get_allocated_entries(old_tab
));
1280 if (old_tab
->bins
!= NULL
)
1281 MEMCPY(new_tab
->bins
, old_tab
->bins
, char, bins_size(old_tab
));
1286 /* Create and return a copy of table OLD_TAB. */
1288 st_copy(st_table
*old_tab
)
1292 new_tab
= (st_table
*) malloc(sizeof(st_table
));
1294 if (new_tab
== NULL
)
1298 if (st_replace(new_tab
, old_tab
) == NULL
) {
1299 st_free_table(new_tab
);
1306 /* Update the entries start of table TAB after removing an entry
1307 with index N in the array entries. */
1309 update_range_for_deleted(st_table
*tab
, st_index_t n
)
1311 /* Do not update entries_bound here. Otherwise, we can fill all
1312 bins by deleted entry value before rebuilding the table. */
1313 if (tab
->entries_start
== n
) {
1314 st_index_t start
= n
+ 1;
1315 st_index_t bound
= tab
->entries_bound
;
1316 st_table_entry
*entries
= tab
->entries
;
1317 while (start
< bound
&& DELETED_ENTRY_P(&entries
[start
])) start
++;
1318 tab
->entries_start
= start
;
1322 /* Delete entry with KEY from table TAB, set up *VALUE (unless
1323 VALUE is zero) from deleted table entry, and return non-zero. If
1324 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1325 is zero), and return zero. */
1327 st_general_delete(st_table
*tab
, st_data_t
*key
, st_data_t
*value
)
1329 st_table_entry
*entry
;
1334 hash
= do_hash(*key
, tab
);
1336 if (tab
->bins
== NULL
) {
1337 bin
= find_entry(tab
, hash
, *key
);
1338 if (EXPECT(bin
== REBUILT_TABLE_ENTRY_IND
, 0))
1340 if (bin
== UNDEFINED_ENTRY_IND
) {
1341 if (value
!= 0) *value
= 0;
1346 bin_ind
= find_table_bin_ind(tab
, hash
, *key
);
1347 if (EXPECT(bin_ind
== REBUILT_TABLE_BIN_IND
, 0))
1349 if (bin_ind
== UNDEFINED_BIN_IND
) {
1350 if (value
!= 0) *value
= 0;
1353 bin
= get_bin(tab
->bins
, get_size_ind(tab
), bin_ind
) - ENTRY_BASE
;
1354 MARK_BIN_DELETED(tab
, bin_ind
);
1356 entry
= &tab
->entries
[bin
];
1358 if (value
!= 0) *value
= entry
->record
;
1359 MARK_ENTRY_DELETED(entry
);
1361 update_range_for_deleted(tab
, bin
);
1366 st_delete(st_table
*tab
, st_data_t
*key
, st_data_t
*value
)
1368 return st_general_delete(tab
, key
, value
);
1371 /* The function and other functions with suffix '_safe' or '_check'
1372 are originated from the previous implementation of the hash tables.
1373 It was necessary for correct deleting entries during traversing
1374 tables. The current implementation permits deletion during
1375 traversing without a specific way to do this. */
1377 st_delete_safe(st_table
*tab
, st_data_t
*key
, st_data_t
*value
,
1378 st_data_t never ATTRIBUTE_UNUSED
)
1380 return st_general_delete(tab
, key
, value
);
1383 /* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1384 return zero. Otherwise, remove the first entry in the table.
1385 Return its key through KEY and its record through VALUE (unless
1388 st_shift(st_table
*tab
, st_data_t
*key
, st_data_t
*value
)
1390 st_index_t i
, bound
;
1392 st_table_entry
*entries
, *curr_entry_ptr
;
1395 entries
= tab
->entries
;
1396 bound
= tab
->entries_bound
;
1397 for (i
= tab
->entries_start
; i
< bound
; i
++) {
1398 curr_entry_ptr
= &entries
[i
];
1399 if (! DELETED_ENTRY_P(curr_entry_ptr
)) {
1400 st_hash_t entry_hash
= curr_entry_ptr
->hash
;
1401 st_data_t entry_key
= curr_entry_ptr
->key
;
1403 if (value
!= 0) *value
= curr_entry_ptr
->record
;
1406 if (tab
->bins
== NULL
) {
1407 bin
= find_entry(tab
, entry_hash
, entry_key
);
1408 if (EXPECT(bin
== REBUILT_TABLE_ENTRY_IND
, 0)) {
1409 entries
= tab
->entries
;
1412 curr_entry_ptr
= &entries
[bin
];
1415 bin_ind
= find_table_bin_ind(tab
, entry_hash
, entry_key
);
1416 if (EXPECT(bin_ind
== REBUILT_TABLE_BIN_IND
, 0)) {
1417 entries
= tab
->entries
;
1420 curr_entry_ptr
= &entries
[get_bin(tab
->bins
, get_size_ind(tab
), bin_ind
)
1422 MARK_BIN_DELETED(tab
, bin_ind
);
1424 MARK_ENTRY_DELETED(curr_entry_ptr
);
1426 update_range_for_deleted(tab
, i
);
1430 if (value
!= 0) *value
= 0;
1434 /* See comments for function st_delete_safe. */
1436 st_cleanup_safe(st_table
*tab ATTRIBUTE_UNUSED
,
1437 st_data_t never ATTRIBUTE_UNUSED
)
1441 /* Find entry with KEY in table TAB, call FUNC with pointers to copies
1442 of the key and the value of the found entry, and non-zero as the
1443 3rd argument. If the entry is not found, call FUNC with a pointer
1444 to KEY, a pointer to zero, and a zero argument. If the call
1445 returns ST_CONTINUE, the table will have an entry with key and
1446 value returned by FUNC through the 1st and 2nd parameters. If the
1447 call of FUNC returns ST_DELETE, the table will not have entry with
1448 KEY. The function returns flag of that the entry with KEY was in
1449 the table before the call. */
1451 st_update(st_table
*tab
, st_data_t key
,
1452 st_update_callback_func
*func
, st_data_t arg
)
1454 st_table_entry
*entry
= NULL
; /* to avoid uninitialized value warning */
1455 st_index_t bin
= 0; /* Ditto */
1456 st_table_entry
*entries
;
1458 st_data_t value
= 0, old_key
;
1459 int retval
, existing
;
1460 st_hash_t hash
= do_hash(key
, tab
);
1463 entries
= tab
->entries
;
1464 if (tab
->bins
== NULL
) {
1465 bin
= find_entry(tab
, hash
, key
);
1466 if (EXPECT(bin
== REBUILT_TABLE_ENTRY_IND
, 0))
1468 existing
= bin
!= UNDEFINED_ENTRY_IND
;
1469 entry
= &entries
[bin
];
1470 bin_ind
= UNDEFINED_BIN_IND
;
1473 bin_ind
= find_table_bin_ind(tab
, hash
, key
);
1474 if (EXPECT(bin_ind
== REBUILT_TABLE_BIN_IND
, 0))
1476 existing
= bin_ind
!= UNDEFINED_BIN_IND
;
1478 bin
= get_bin(tab
->bins
, get_size_ind(tab
), bin_ind
) - ENTRY_BASE
;
1479 entry
= &entries
[bin
];
1484 value
= entry
->record
;
1487 retval
= (*func
)(&key
, &value
, arg
, existing
);
1491 st_add_direct_with_hash(tab
, key
, value
, hash
);
1494 if (old_key
!= key
) {
1497 entry
->record
= value
;
1501 if (bin_ind
!= UNDEFINED_BIN_IND
)
1502 MARK_BIN_DELETED(tab
, bin_ind
);
1503 MARK_ENTRY_DELETED(entry
);
1505 update_range_for_deleted(tab
, bin
);
1512 /* Traverse all entries in table TAB calling FUNC with current entry
1513 key and value and zero. If the call returns ST_STOP, stop
1514 traversing. If the call returns ST_DELETE, delete the current
1515 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1516 traversing. The function returns zero unless an error is found.
1517 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1518 different for ST_CHECK and when the current element is removed
1519 during traversing. */
1521 st_general_foreach(st_table
*tab
, st_foreach_check_callback_func
*func
, st_update_callback_func
*replace
, st_data_t arg
,
1526 st_table_entry
*entries
, *curr_entry_ptr
;
1527 enum st_retval retval
;
1528 st_index_t i
, rebuilds_num
;
1531 int error_p
, packed_p
= tab
->bins
== NULL
;
1533 entries
= tab
->entries
;
1534 /* The bound can change inside the loop even without rebuilding
1535 the table, e.g. by an entry insertion. */
1536 for (i
= tab
->entries_start
; i
< tab
->entries_bound
; i
++) {
1537 curr_entry_ptr
= &entries
[i
];
1538 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr
), 0))
1540 key
= curr_entry_ptr
->key
;
1541 rebuilds_num
= tab
->rebuilds_num
;
1542 hash
= curr_entry_ptr
->hash
;
1543 retval
= (*func
)(key
, curr_entry_ptr
->record
, arg
, 0);
1545 if (retval
== ST_REPLACE
&& replace
) {
1547 value
= curr_entry_ptr
->record
;
1548 retval
= (*replace
)(&key
, &value
, arg
, TRUE
);
1549 curr_entry_ptr
->key
= key
;
1550 curr_entry_ptr
->record
= value
;
1553 if (rebuilds_num
!= tab
->rebuilds_num
) {
1555 entries
= tab
->entries
;
1556 packed_p
= tab
->bins
== NULL
;
1558 i
= find_entry(tab
, hash
, key
);
1559 if (EXPECT(i
== REBUILT_TABLE_ENTRY_IND
, 0))
1561 error_p
= i
== UNDEFINED_ENTRY_IND
;
1564 i
= find_table_entry_ind(tab
, hash
, key
);
1565 if (EXPECT(i
== REBUILT_TABLE_ENTRY_IND
, 0))
1567 error_p
= i
== UNDEFINED_ENTRY_IND
;
1570 if (error_p
&& check_p
) {
1571 /* call func with error notice */
1572 retval
= (*func
)(0, 0, arg
, 1);
1575 curr_entry_ptr
= &entries
[i
];
1588 st_data_t key
= curr_entry_ptr
->key
;
1592 bin
= find_entry(tab
, hash
, key
);
1593 if (EXPECT(bin
== REBUILT_TABLE_ENTRY_IND
, 0))
1595 if (bin
== UNDEFINED_ENTRY_IND
)
1599 bin_ind
= find_table_bin_ind(tab
, hash
, key
);
1600 if (EXPECT(bin_ind
== REBUILT_TABLE_BIN_IND
, 0))
1602 if (bin_ind
== UNDEFINED_BIN_IND
)
1604 bin
= get_bin(tab
->bins
, get_size_ind(tab
), bin_ind
) - ENTRY_BASE
;
1605 MARK_BIN_DELETED(tab
, bin_ind
);
1607 curr_entry_ptr
= &entries
[bin
];
1608 MARK_ENTRY_DELETED(curr_entry_ptr
);
1610 update_range_for_deleted(tab
, bin
);
1619 st_foreach_with_replace(st_table
*tab
, st_foreach_check_callback_func
*func
, st_update_callback_func
*replace
, st_data_t arg
)
1621 return st_general_foreach(tab
, func
, replace
, arg
, TRUE
);
1625 st_foreach_callback_func
*func
;
1630 apply_functor(st_data_t k
, st_data_t v
, st_data_t d
, int _
)
1632 const struct functor
*f
= (void *)d
;
1633 return f
->func(k
, v
, f
->arg
);
1637 st_foreach(st_table
*tab
, st_foreach_callback_func
*func
, st_data_t arg
)
1639 const struct functor f
= { func
, arg
};
1640 return st_general_foreach(tab
, apply_functor
, 0, (st_data_t
)&f
, FALSE
);
1643 /* See comments for function st_delete_safe. */
1645 st_foreach_check(st_table
*tab
, st_foreach_check_callback_func
*func
, st_data_t arg
,
1646 st_data_t never ATTRIBUTE_UNUSED
)
1648 return st_general_foreach(tab
, func
, 0, arg
, TRUE
);
1651 /* Set up array KEYS by at most SIZE keys of head table TAB entries.
1652 Return the number of keys set up in array KEYS. */
1653 static inline st_index_t
1654 st_general_keys(st_table
*tab
, st_data_t
*keys
, st_index_t size
)
1656 st_index_t i
, bound
;
1657 st_data_t key
, *keys_start
, *keys_end
;
1658 st_table_entry
*curr_entry_ptr
, *entries
= tab
->entries
;
1660 bound
= tab
->entries_bound
;
1662 keys_end
= keys
+ size
;
1663 for (i
= tab
->entries_start
; i
< bound
; i
++) {
1664 if (keys
== keys_end
)
1666 curr_entry_ptr
= &entries
[i
];
1667 key
= curr_entry_ptr
->key
;
1668 if (! DELETED_ENTRY_P(curr_entry_ptr
))
1672 return keys
- keys_start
;
1676 st_keys(st_table
*tab
, st_data_t
*keys
, st_index_t size
)
1678 return st_general_keys(tab
, keys
, size
);
1681 /* See comments for function st_delete_safe. */
1683 st_keys_check(st_table
*tab
, st_data_t
*keys
, st_index_t size
,
1684 st_data_t never ATTRIBUTE_UNUSED
)
1686 return st_general_keys(tab
, keys
, size
);
1689 /* Set up array VALUES by at most SIZE values of head table TAB
1690 entries. Return the number of values set up in array VALUES. */
1691 static inline st_index_t
1692 st_general_values(st_table
*tab
, st_data_t
*values
, st_index_t size
)
1694 st_index_t i
, bound
;
1695 st_data_t
*values_start
, *values_end
;
1696 st_table_entry
*curr_entry_ptr
, *entries
= tab
->entries
;
1698 values_start
= values
;
1699 values_end
= values
+ size
;
1700 bound
= tab
->entries_bound
;
1701 for (i
= tab
->entries_start
; i
< bound
; i
++) {
1702 if (values
== values_end
)
1704 curr_entry_ptr
= &entries
[i
];
1705 if (! DELETED_ENTRY_P(curr_entry_ptr
))
1706 *values
++ = curr_entry_ptr
->record
;
1709 return values
- values_start
;
1713 st_values(st_table
*tab
, st_data_t
*values
, st_index_t size
)
1715 return st_general_values(tab
, values
, size
);
1718 /* See comments for function st_delete_safe. */
1720 st_values_check(st_table
*tab
, st_data_t
*values
, st_index_t size
,
1721 st_data_t never ATTRIBUTE_UNUSED
)
1723 return st_general_values(tab
, values
, size
);
1726 #define FNV1_32A_INIT 0x811c9dc5
1729 * 32 bit magic FNV-1a prime
1731 #define FNV_32_PRIME 0x01000193
1733 /* __POWERPC__ added to accommodate Darwin case. */
1734 #ifndef UNALIGNED_WORD_ACCESS
1735 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1736 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1737 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1738 defined(__mc68020__)
1739 # define UNALIGNED_WORD_ACCESS 1
1742 #ifndef UNALIGNED_WORD_ACCESS
1743 # define UNALIGNED_WORD_ACCESS 0
1746 /* This hash function is quite simplified MurmurHash3
1747 * Simplification is legal, cause most of magic still happens in finalizator.
1748 * And finalizator is almost the same as in MurmurHash3 */
1749 #define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1750 #define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1752 #if ST_INDEX_BITS <= 32
1753 #define C1 (st_index_t)0xcc9e2d51
1754 #define C2 (st_index_t)0x1b873593
1756 #define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1757 #define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1759 NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t
murmur_step(st_index_t h
, st_index_t k
));
1760 NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t
murmur_finish(st_index_t h
));
1761 NO_SANITIZE("unsigned-integer-overflow", extern st_index_t
st_hash(const void *ptr
, size_t len
, st_index_t h
));
1763 static inline st_index_t
1764 murmur_step(st_index_t h
, st_index_t k
)
1766 #if ST_INDEX_BITS <= 32
1782 static inline st_index_t
1783 murmur_finish(st_index_t h
)
1785 #if ST_INDEX_BITS <= 32
1789 const st_index_t c1
= 0x85ebca6b;
1790 const st_index_t c2
= 0xc2b2ae35;
1792 /* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1796 const st_index_t c1
= BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1797 const st_index_t c2
= BIG_CONSTANT(0x94d049bb,0x133111eb);
1799 #if ST_INDEX_BITS > 64
1816 st_hash(const void *ptr
, size_t len
, st_index_t h
)
1818 const char *data
= ptr
;
1822 #define data_at(n) (st_index_t)((unsigned char)data[(n)])
1823 #define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1824 #if SIZEOF_ST_INDEX_T > 4
1825 #define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1826 #if SIZEOF_ST_INDEX_T > 8
1827 #define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1828 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1829 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1831 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1833 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1836 if (len
>= sizeof(st_index_t
)) {
1837 #if !UNALIGNED_WORD_ACCESS
1838 int align
= (int)((st_data_t
)data
% sizeof(st_index_t
));
1844 #ifdef WORDS_BIGENDIAN
1845 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1846 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1848 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1849 t |= data_at(n) << CHAR_BIT*(n)
1852 #undef UNALIGNED_ADD
1855 #ifdef WORDS_BIGENDIAN
1856 t
>>= (CHAR_BIT
* align
) - CHAR_BIT
;
1858 t
<<= (CHAR_BIT
* align
);
1861 data
+= sizeof(st_index_t
)-align
;
1862 len
-= sizeof(st_index_t
)-align
;
1864 sl
= CHAR_BIT
* (SIZEOF_ST_INDEX_T
-align
);
1865 sr
= CHAR_BIT
* align
;
1867 while (len
>= sizeof(st_index_t
)) {
1868 d
= *(st_index_t
*)data
;
1869 #ifdef WORDS_BIGENDIAN
1870 t
= (t
<< sr
) | (d
>> sl
);
1872 t
= (t
>> sr
) | (d
<< sl
);
1874 h
= murmur_step(h
, t
);
1876 data
+= sizeof(st_index_t
);
1877 len
-= sizeof(st_index_t
);
1880 pack
= len
< (size_t)align
? (int)len
: align
;
1883 #ifdef WORDS_BIGENDIAN
1884 # define UNALIGNED_ADD(n) case (n) + 1: \
1885 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1887 # define UNALIGNED_ADD(n) case (n) + 1: \
1888 d |= data_at(n) << CHAR_BIT*(n)
1891 #undef UNALIGNED_ADD
1893 #ifdef WORDS_BIGENDIAN
1894 t
= (t
<< sr
) | (d
>> sl
);
1896 t
= (t
>> sr
) | (d
<< sl
);
1899 if (len
< (size_t)align
) goto skip_tail
;
1900 # define SKIP_TAIL 1
1901 h
= murmur_step(h
, t
);
1907 #ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1908 #define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1910 #define aligned_data data
1914 h
= murmur_step(h
, *(st_index_t
*)aligned_data
);
1915 data
+= sizeof(st_index_t
);
1916 len
-= sizeof(st_index_t
);
1917 } while (len
>= sizeof(st_index_t
));
1923 #if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1924 /* in this case byteorder doesn't really matter */
1925 #if SIZEOF_ST_INDEX_T > 4
1926 case 7: t
|= data_at(6) << 48;
1927 case 6: t
|= data_at(5) << 40;
1928 case 5: t
|= data_at(4) << 32;
1930 t
|= (st_index_t
)*(uint32_t*)aligned_data
;
1932 # define SKIP_TAIL 1
1934 case 3: t
|= data_at(2) << 16;
1935 case 2: t
|= data_at(1) << 8;
1936 case 1: t
|= data_at(0);
1938 #ifdef WORDS_BIGENDIAN
1939 # define UNALIGNED_ADD(n) case (n) + 1: \
1940 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1942 # define UNALIGNED_ADD(n) case (n) + 1: \
1943 t |= data_at(n) << CHAR_BIT*(n)
1946 #undef UNALIGNED_ADD
1951 h
^= t
; h
-= ROTL(t
, 7);
1957 return murmur_finish(h
);
1961 st_hash_uint32(st_index_t h
, uint32_t i
)
1963 return murmur_step(h
, i
);
1966 NO_SANITIZE("unsigned-integer-overflow", extern st_index_t
st_hash_uint(st_index_t h
, st_index_t i
));
1968 st_hash_uint(st_index_t h
, st_index_t i
)
1971 /* no matter if it is BigEndian or LittleEndian,
1972 * we hash just integers */
1973 #if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1974 h
= murmur_step(h
, i
>> 8*8);
1976 h
= murmur_step(h
, i
);
1981 st_hash_end(st_index_t h
)
1983 h
= murmur_finish(h
);
1987 #undef st_hash_start
1989 rb_st_hash_start(st_index_t h
)
1995 strhash(st_data_t arg
)
1997 register const char *string
= (const char *)arg
;
1998 return st_hash(string
, strlen(string
), FNV1_32A_INIT
);
2002 st_locale_insensitive_strcasecmp(const char *s1
, const char *s2
)
2009 if (c1
== '\0' || c2
== '\0') {
2010 if (c1
!= '\0') return 1;
2011 if (c2
!= '\0') return -1;
2014 if (('A' <= c1
) && (c1
<= 'Z')) c1
+= 'a' - 'A';
2015 if (('A' <= c2
) && (c2
<= 'Z')) c2
+= 'a' - 'A';
2026 st_locale_insensitive_strncasecmp(const char *s1
, const char *s2
, size_t n
)
2031 for (i
= 0; i
< n
; i
++) {
2034 if (c1
== '\0' || c2
== '\0') {
2035 if (c1
!= '\0') return 1;
2036 if (c2
!= '\0') return -1;
2039 if (('A' <= c1
) && (c1
<= 'Z')) c1
+= 'a' - 'A';
2040 if (('A' <= c2
) && (c2
<= 'Z')) c2
+= 'a' - 'A';
2052 st_strcmp(st_data_t lhs
, st_data_t rhs
)
2054 const char *s1
= (char *)lhs
;
2055 const char *s2
= (char *)rhs
;
2056 return strcmp(s1
, s2
);
2060 st_locale_insensitive_strcasecmp_i(st_data_t lhs
, st_data_t rhs
)
2062 const char *s1
= (char *)lhs
;
2063 const char *s2
= (char *)rhs
;
2064 return st_locale_insensitive_strcasecmp(s1
, s2
);
2067 NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t
strcasehash(st_data_t
)));
2069 strcasehash(st_data_t arg
)
2071 register const char *string
= (const char *)arg
;
2072 register st_index_t hval
= FNV1_32A_INIT
;
2075 * FNV-1a hash each octet in the buffer
2078 unsigned int c
= (unsigned char)*string
++;
2079 if ((unsigned int)(c
- 'A') <= ('Z' - 'A')) c
+= 'a' - 'A';
2082 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2083 hval
*= FNV_32_PRIME
;
2089 st_numcmp(st_data_t x
, st_data_t y
)
2095 st_numhash(st_data_t n
)
2097 enum {s1
= 11, s2
= 3};
2098 return (st_index_t
)((n
>>s1
|(n
<<s2
)) ^ (n
>>s2
));
2102 /* Expand TAB to be suitable for holding SIZ entries in total.
2103 Pre-existing entries remain not deleted inside of TAB, but its bins
2104 are cleared to expect future reconstruction. See rehash below. */
2106 st_expand_table(st_table
*tab
, st_index_t siz
)
2111 if (siz
<= get_allocated_entries(tab
))
2112 return; /* enough room already */
2114 tmp
= st_init_table_with_size(tab
->type
, siz
);
2115 n
= get_allocated_entries(tab
);
2116 MEMCPY(tmp
->entries
, tab
->entries
, st_table_entry
, n
);
2120 tab
->entry_power
= tmp
->entry_power
;
2121 tab
->bin_power
= tmp
->bin_power
;
2122 tab
->size_ind
= tmp
->size_ind
;
2123 tab
->entries
= tmp
->entries
;
2125 tab
->rebuilds_num
++;
2129 /* Rehash using linear search. Return TRUE if we found that the table
2132 st_rehash_linear(st_table
*tab
)
2134 int eq_p
, rebuilt_p
;
2136 st_table_entry
*p
, *q
;
2141 for (i
= tab
->entries_start
; i
< tab
->entries_bound
; i
++) {
2142 p
= &tab
->entries
[i
];
2143 if (DELETED_ENTRY_P(p
))
2145 for (j
= i
+ 1; j
< tab
->entries_bound
; j
++) {
2146 q
= &tab
->entries
[j
];
2147 if (DELETED_ENTRY_P(q
))
2149 DO_PTR_EQUAL_CHECK(tab
, p
, q
->hash
, q
->key
, eq_p
, rebuilt_p
);
2150 if (EXPECT(rebuilt_p
, 0))
2154 MARK_ENTRY_DELETED(q
);
2156 update_range_for_deleted(tab
, j
);
2163 /* Rehash using index. Return TRUE if we found that the table was
2166 st_rehash_indexed(st_table
*tab
)
2168 int eq_p
, rebuilt_p
;
2170 st_index_t
const n
= bins_size(tab
);
2171 unsigned int const size_ind
= get_size_ind(tab
);
2172 st_index_t
*bins
= realloc(tab
->bins
, n
);
2174 initialize_bins(tab
);
2175 for (i
= tab
->entries_start
; i
< tab
->entries_bound
; i
++) {
2176 st_table_entry
*p
= &tab
->entries
[i
];
2178 #ifdef QUADRATIC_PROBE
2181 st_index_t perturb
= p
->hash
;
2184 if (DELETED_ENTRY_P(p
))
2187 ind
= hash_bin(p
->hash
, tab
);
2189 st_index_t bin
= get_bin(bins
, size_ind
, ind
);
2190 if (EMPTY_OR_DELETED_BIN_P(bin
)) {
2192 set_bin(bins
, size_ind
, ind
, i
+ ENTRY_BASE
);
2196 st_table_entry
*q
= &tab
->entries
[bin
- ENTRY_BASE
];
2197 DO_PTR_EQUAL_CHECK(tab
, q
, p
->hash
, p
->key
, eq_p
, rebuilt_p
);
2198 if (EXPECT(rebuilt_p
, 0))
2201 /* duplicated key; delete it */
2202 q
->record
= p
->record
;
2203 MARK_ENTRY_DELETED(p
);
2205 update_range_for_deleted(tab
, bin
);
2209 /* hash collision; skip it */
2210 #ifdef QUADRATIC_PROBE
2211 ind
= hash_bin(ind
+ d
, tab
);
2214 ind
= secondary_hash(ind
, tab
, &perturb
);
2223 /* Reconstruct TAB's bins according to TAB's entries. This function
2224 permits conflicting keys inside of entries. No errors are reported
2225 then. All but one of them are discarded silently. */
2227 st_rehash(st_table
*tab
)
2232 if (tab
->bin_power
<= MAX_POWER2_FOR_TABLES_WITHOUT_BINS
)
2233 rebuilt_p
= st_rehash_linear(tab
);
2235 rebuilt_p
= st_rehash_indexed(tab
);
2236 } while (rebuilt_p
);
2240 st_stringify(VALUE key
)
2242 return (rb_obj_class(key
) == rb_cString
&& !RB_OBJ_FROZEN(key
)) ?
2243 rb_hash_key_str(key
) : key
;
2247 st_insert_single(st_table
*tab
, VALUE hash
, VALUE key
, VALUE val
)
2249 st_data_t k
= st_stringify(key
);
2251 e
.hash
= do_hash(k
, tab
);
2255 tab
->entries
[tab
->entries_bound
++] = e
;
2257 RB_OBJ_WRITTEN(hash
, Qundef
, k
);
2258 RB_OBJ_WRITTEN(hash
, Qundef
, val
);
2262 st_insert_linear(st_table
*tab
, long argc
, const VALUE
*argv
, VALUE hash
)
2266 for (i
= 0; i
< argc
; /* */) {
2267 st_data_t k
= st_stringify(argv
[i
++]);
2268 st_data_t v
= argv
[i
++];
2269 st_insert(tab
, k
, v
);
2270 RB_OBJ_WRITTEN(hash
, Qundef
, k
);
2271 RB_OBJ_WRITTEN(hash
, Qundef
, v
);
2276 st_insert_generic(st_table
*tab
, long argc
, const VALUE
*argv
, VALUE hash
)
2281 for (i
= 0; i
< argc
; /* */) {
2282 VALUE key
= argv
[i
++];
2283 VALUE val
= argv
[i
++];
2284 st_insert_single(tab
, hash
, key
, val
);
2291 /* Mimics ruby's { foo => bar } syntax. This function is subpart
2292 of rb_hash_bulk_insert. */
2294 rb_hash_bulk_insert_into_st_table(long argc
, const VALUE
*argv
, VALUE hash
)
2296 st_index_t n
, size
= argc
/ 2;
2297 st_table
*tab
= RHASH_ST_TABLE(hash
);
2299 tab
= RHASH_TBL_RAW(hash
);
2300 n
= tab
->entries_bound
+ size
;
2301 st_expand_table(tab
, n
);
2302 if (UNLIKELY(tab
->num_entries
))
2303 st_insert_generic(tab
, argc
, argv
, hash
);
2305 st_insert_single(tab
, hash
, argv
[0], argv
[1]);
2306 else if (tab
->bin_power
<= MAX_POWER2_FOR_TABLES_WITHOUT_BINS
)
2307 st_insert_linear(tab
, argc
, argv
, hash
);
2309 st_insert_generic(tab
, argc
, argv
, hash
);
2312 // to iterate iv_index_tbl
2314 rb_st_nth_key(st_table
*tab
, st_index_t index
)
2316 if (LIKELY(tab
->entries_start
== 0 &&
2317 tab
->num_entries
== tab
->entries_bound
&&
2318 index
< tab
->num_entries
)) {
2319 return tab
->entries
[index
].key
;
2322 rb_bug("unreachable");
2327 rb_st_compact_table(st_table
*tab
)
2329 st_index_t num
= tab
->num_entries
;
2330 if (REBUILD_THRESHOLD
* num
<= get_allocated_entries(tab
)) {
2332 st_table
*new_tab
= st_init_table_with_size(tab
->type
, 2 * num
);
2333 rebuild_table_with(new_tab
, tab
);
2334 rebuild_move_table(new_tab
, tab
);
2335 rebuild_cleanup(tab
);