exp2l: Work around a NetBSD 10.0/i386 bug.
[gnulib.git] / lib / hash.c
blob2b123be75433cf01c294cbc663987646526bbe76
1 /* hash - hashing table processing.
3 Copyright (C) 1998-2004, 2006-2007, 2009-2024 Free Software Foundation, Inc.
5 Written by Jim Meyering, 1992.
7 This file is free software: you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as
9 published by the Free Software Foundation; either version 2.1 of the
10 License, or (at your option) any later version.
12 This file is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with this program. If not, see <https://www.gnu.org/licenses/>. */
20 /* A generic hash table package. */
22 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
23 of malloc. If you change USE_OBSTACK, you have to recompile! */
25 #include <config.h>
27 #include "hash.h"
29 #include "bitrotate.h"
30 #include "xalloc-oversized.h"
32 #include <errno.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
37 #if USE_OBSTACK
38 # include "obstack.h"
39 # ifndef obstack_chunk_alloc
40 # define obstack_chunk_alloc malloc
41 # endif
42 # ifndef obstack_chunk_free
43 # define obstack_chunk_free free
44 # endif
45 #endif
47 struct hash_entry
49 void *data;
50 struct hash_entry *next;
53 struct hash_table
55 /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
56 for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets
57 are not empty, there are N_ENTRIES active entries in the table. */
58 struct hash_entry *bucket;
59 struct hash_entry const *bucket_limit;
60 size_t n_buckets;
61 size_t n_buckets_used;
62 size_t n_entries;
64 /* Tuning arguments, kept in a physically separate structure. */
65 const Hash_tuning *tuning;
67 /* Three functions are given to 'hash_initialize', see the documentation
68 block for this function. In a word, HASHER randomizes a user entry
69 into a number up from 0 up to some maximum minus 1; COMPARATOR returns
70 true if two user entries compare equally; and DATA_FREER is the cleanup
71 function for a user entry. */
72 Hash_hasher hasher;
73 Hash_comparator comparator;
74 Hash_data_freer data_freer;
76 /* A linked list of freed struct hash_entry structs. */
77 struct hash_entry *free_entry_list;
79 #if USE_OBSTACK
80 /* Whenever obstacks are used, it is possible to allocate all overflowed
81 entries into a single stack, so they all can be freed in a single
82 operation. It is not clear if the speedup is worth the trouble. */
83 struct obstack entry_stack;
84 #endif
87 /* A hash table contains many internal entries, each holding a pointer to
88 some user-provided data (also called a user entry). An entry indistinctly
89 refers to both the internal entry and its associated user entry. A user
90 entry contents may be hashed by a randomization function (the hashing
91 function, or just "hasher" for short) into a number (or "slot") between 0
92 and the current table size. At each slot position in the hash table,
93 starts a linked chain of entries for which the user data all hash to this
94 slot. A bucket is the collection of all entries hashing to the same slot.
96 A good "hasher" function will distribute entries rather evenly in buckets.
97 In the ideal case, the length of each bucket is roughly the number of
98 entries divided by the table size. Finding the slot for a data is usually
99 done in constant time by the "hasher", and the later finding of a precise
100 entry is linear in time with the size of the bucket. Consequently, a
101 larger hash table size (that is, a larger number of buckets) is prone to
102 yielding shorter chains, *given* the "hasher" function behaves properly.
104 Long buckets slow down the lookup algorithm. One might use big hash table
105 sizes in hope to reduce the average length of buckets, but this might
106 become inordinate, as unused slots in the hash table take some space. The
107 best bet is to make sure you are using a good "hasher" function (beware
108 that those are not that easy to write! :-), and to use a table size
109 larger than the actual number of entries. */
111 /* If an insertion makes the ratio of nonempty buckets to table size larger
112 than the growth threshold (a number between 0.0 and 1.0), then increase
113 the table size by multiplying by the growth factor (a number greater than
114 1.0). The growth threshold defaults to 0.8, and the growth factor
115 defaults to 1.414, meaning that the table will have doubled its size
116 every second time 80% of the buckets get used. */
117 #define DEFAULT_GROWTH_THRESHOLD 0.8f
118 #define DEFAULT_GROWTH_FACTOR 1.414f
120 /* If a deletion empties a bucket and causes the ratio of used buckets to
121 table size to become smaller than the shrink threshold (a number between
122 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
123 number greater than the shrink threshold but smaller than 1.0). The shrink
124 threshold and factor default to 0.0 and 1.0, meaning that the table never
125 shrinks. */
126 #define DEFAULT_SHRINK_THRESHOLD 0.0f
127 #define DEFAULT_SHRINK_FACTOR 1.0f
129 /* Use this to initialize or reset a TUNING structure to
130 some sensible values. */
131 static const Hash_tuning default_tuning =
133 DEFAULT_SHRINK_THRESHOLD,
134 DEFAULT_SHRINK_FACTOR,
135 DEFAULT_GROWTH_THRESHOLD,
136 DEFAULT_GROWTH_FACTOR,
137 false
140 /* Information and lookup. */
142 size_t
143 hash_get_n_buckets (const Hash_table *table)
145 return table->n_buckets;
148 size_t
149 hash_get_n_buckets_used (const Hash_table *table)
151 return table->n_buckets_used;
154 size_t
155 hash_get_n_entries (const Hash_table *table)
157 return table->n_entries;
160 size_t
161 hash_get_max_bucket_length (const Hash_table *table)
163 struct hash_entry const *bucket;
164 size_t max_bucket_length = 0;
166 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
168 if (bucket->data)
170 struct hash_entry const *cursor = bucket;
171 size_t bucket_length = 1;
173 while (cursor = cursor->next, cursor)
174 bucket_length++;
176 if (bucket_length > max_bucket_length)
177 max_bucket_length = bucket_length;
181 return max_bucket_length;
184 bool
185 hash_table_ok (const Hash_table *table)
187 struct hash_entry const *bucket;
188 size_t n_buckets_used = 0;
189 size_t n_entries = 0;
191 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
193 if (bucket->data)
195 struct hash_entry const *cursor = bucket;
197 /* Count bucket head. */
198 n_buckets_used++;
199 n_entries++;
201 /* Count bucket overflow. */
202 while (cursor = cursor->next, cursor)
203 n_entries++;
207 if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
208 return true;
210 return false;
213 void
214 hash_print_statistics (const Hash_table *table, FILE *stream)
216 size_t n_entries = hash_get_n_entries (table);
217 size_t n_buckets = hash_get_n_buckets (table);
218 size_t n_buckets_used = hash_get_n_buckets_used (table);
219 size_t max_bucket_length = hash_get_max_bucket_length (table);
221 fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries);
222 fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
223 fprintf (stream, "# buckets used: %lu (%.2f%%)\n",
224 (unsigned long int) n_buckets_used,
225 (100.0 * n_buckets_used) / n_buckets);
226 fprintf (stream, "max bucket length: %lu\n",
227 (unsigned long int) max_bucket_length);
230 /* Hash KEY and return a pointer to the selected bucket.
231 If TABLE->hasher misbehaves, abort. */
232 static struct hash_entry *
233 safe_hasher (const Hash_table *table, const void *key)
235 size_t n = table->hasher (key, table->n_buckets);
236 if (! (n < table->n_buckets))
237 abort ();
238 return table->bucket + n;
241 void *
242 hash_lookup (const Hash_table *table, const void *entry)
244 struct hash_entry const *bucket = safe_hasher (table, entry);
245 struct hash_entry const *cursor;
247 if (bucket->data == NULL)
248 return NULL;
250 for (cursor = bucket; cursor; cursor = cursor->next)
251 if (entry == cursor->data || table->comparator (entry, cursor->data))
252 return cursor->data;
254 return NULL;
257 /* Walking. */
259 void *
260 hash_get_first (const Hash_table *table)
262 struct hash_entry const *bucket;
264 if (table->n_entries == 0)
265 return NULL;
267 for (bucket = table->bucket; ; bucket++)
268 if (! (bucket < table->bucket_limit))
269 abort ();
270 else if (bucket->data)
271 return bucket->data;
274 void *
275 hash_get_next (const Hash_table *table, const void *entry)
277 struct hash_entry const *bucket = safe_hasher (table, entry);
278 struct hash_entry const *cursor;
280 /* Find next entry in the same bucket. */
281 cursor = bucket;
284 if (cursor->data == entry && cursor->next)
285 return cursor->next->data;
286 cursor = cursor->next;
288 while (cursor != NULL);
290 /* Find first entry in any subsequent bucket. */
291 while (++bucket < table->bucket_limit)
292 if (bucket->data)
293 return bucket->data;
295 /* None found. */
296 return NULL;
299 size_t
300 hash_get_entries (const Hash_table *table, void **buffer,
301 size_t buffer_size)
303 size_t counter = 0;
304 struct hash_entry const *bucket;
305 struct hash_entry const *cursor;
307 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
309 if (bucket->data)
311 for (cursor = bucket; cursor; cursor = cursor->next)
313 if (counter >= buffer_size)
314 return counter;
315 buffer[counter++] = cursor->data;
320 return counter;
323 size_t
324 hash_do_for_each (const Hash_table *table, Hash_processor processor,
325 void *processor_data)
327 size_t counter = 0;
328 struct hash_entry const *bucket;
329 struct hash_entry const *cursor;
331 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
333 if (bucket->data)
335 for (cursor = bucket; cursor; cursor = cursor->next)
337 if (! processor (cursor->data, processor_data))
338 return counter;
339 counter++;
344 return counter;
347 /* Allocation and clean-up. */
349 #if USE_DIFF_HASH
351 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
352 B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
353 Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash
354 algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
355 may not be good for your application." */
357 size_t
358 hash_string (const char *string, size_t n_buckets)
360 # define HASH_ONE_CHAR(Value, Byte) \
361 ((Byte) + rotl_sz (Value, 7))
363 size_t value = 0;
364 unsigned char ch;
366 for (; (ch = *string); string++)
367 value = HASH_ONE_CHAR (value, ch);
368 return value % n_buckets;
370 # undef HASH_ONE_CHAR
373 #else /* not USE_DIFF_HASH */
375 /* This one comes from 'recode', and performs a bit better than the above as
376 per a few experiments. It is inspired from a hashing routine found in the
377 very old Cyber 'snoop', itself written in typical Greg Mansfield style.
378 (By the way, what happened to this excellent man? Is he still alive?) */
380 size_t
381 hash_string (const char *string, size_t n_buckets)
383 size_t value = 0;
384 unsigned char ch;
386 for (; (ch = *string); string++)
387 value = (value * 31 + ch) % n_buckets;
388 return value;
391 #endif /* not USE_DIFF_HASH */
393 /* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd
394 number at least equal to 11. */
396 static bool _GL_ATTRIBUTE_CONST
397 is_prime (size_t candidate)
399 size_t divisor = 3;
400 size_t square = divisor * divisor;
402 while (square < candidate && (candidate % divisor))
404 divisor++;
405 square += 4 * divisor;
406 divisor++;
409 return (candidate % divisor ? true : false);
412 /* Round a given CANDIDATE number up to the nearest prime, and return that
413 prime. Primes lower than 10 are merely skipped. */
415 static size_t _GL_ATTRIBUTE_CONST
416 next_prime (size_t candidate)
418 /* Skip small primes. */
419 if (candidate < 10)
420 candidate = 10;
422 /* Make it definitely odd. */
423 candidate |= 1;
425 while (SIZE_MAX != candidate && !is_prime (candidate))
426 candidate += 2;
428 return candidate;
431 void
432 hash_reset_tuning (Hash_tuning *tuning)
434 *tuning = default_tuning;
437 /* If the user passes a NULL hasher, we hash the raw pointer. */
438 static size_t
439 raw_hasher (const void *data, size_t n)
441 /* When hashing unique pointers, it is often the case that they were
442 generated by malloc and thus have the property that the low-order
443 bits are 0. As this tends to give poorer performance with small
444 tables, we rotate the pointer value before performing division,
445 in an attempt to improve hash quality. */
446 size_t val = rotr_sz ((size_t) data, 3);
447 return val % n;
450 /* If the user passes a NULL comparator, we use pointer comparison. */
451 static bool
452 raw_comparator (const void *a, const void *b)
454 return a == b;
458 /* For the given hash TABLE, check the user supplied tuning structure for
459 reasonable values, and return true if there is no gross error with it.
460 Otherwise, definitively reset the TUNING field to some acceptable default
461 in the hash table (that is, the user loses the right of further modifying
462 tuning arguments), and return false. */
464 static bool
465 check_tuning (Hash_table *table)
467 const Hash_tuning *tuning = table->tuning;
468 float epsilon;
469 if (tuning == &default_tuning)
470 return true;
472 /* Be a bit stricter than mathematics would require, so that
473 rounding errors in size calculations do not cause allocations to
474 fail to grow or shrink as they should. The smallest allocation
475 is 11 (due to next_prime's algorithm), so an epsilon of 0.1
476 should be good enough. */
477 epsilon = 0.1f;
479 if (epsilon < tuning->growth_threshold
480 && tuning->growth_threshold < 1 - epsilon
481 && 1 + epsilon < tuning->growth_factor
482 && 0 <= tuning->shrink_threshold
483 && tuning->shrink_threshold + epsilon < tuning->shrink_factor
484 && tuning->shrink_factor <= 1
485 && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
486 return true;
488 table->tuning = &default_tuning;
489 return false;
492 /* Compute the size of the bucket array for the given CANDIDATE and
493 TUNING, or return 0 if there is no possible way to allocate that
494 many entries. */
496 static size_t _GL_ATTRIBUTE_PURE
497 compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
499 if (!tuning->is_n_buckets)
501 float new_candidate = candidate / tuning->growth_threshold;
502 if ((float) SIZE_MAX <= new_candidate)
503 goto nomem;
504 candidate = new_candidate;
506 candidate = next_prime (candidate);
507 if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))
508 goto nomem;
509 return candidate;
511 nomem:
512 errno = ENOMEM;
513 return 0;
516 Hash_table *
517 hash_initialize (size_t candidate, const Hash_tuning *tuning,
518 Hash_hasher hasher, Hash_comparator comparator,
519 Hash_data_freer data_freer)
521 Hash_table *table;
523 if (hasher == NULL)
524 hasher = raw_hasher;
525 if (comparator == NULL)
526 comparator = raw_comparator;
528 table = malloc (sizeof *table);
529 if (table == NULL)
530 return NULL;
532 if (!tuning)
533 tuning = &default_tuning;
534 table->tuning = tuning;
535 if (!check_tuning (table))
537 /* Fail if the tuning options are invalid. This is the only occasion
538 when the user gets some feedback about it. Once the table is created,
539 if the user provides invalid tuning options, we silently revert to
540 using the defaults, and ignore further request to change the tuning
541 options. */
542 errno = EINVAL;
543 goto fail;
546 table->n_buckets = compute_bucket_size (candidate, tuning);
547 if (!table->n_buckets)
548 goto fail;
550 table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
551 if (table->bucket == NULL)
552 goto fail;
553 table->bucket_limit = table->bucket + table->n_buckets;
554 table->n_buckets_used = 0;
555 table->n_entries = 0;
557 table->hasher = hasher;
558 table->comparator = comparator;
559 table->data_freer = data_freer;
561 table->free_entry_list = NULL;
562 #if USE_OBSTACK
563 obstack_init (&table->entry_stack);
564 #endif
565 return table;
567 fail:
568 free (table);
569 return NULL;
572 void
573 hash_clear (Hash_table *table)
575 struct hash_entry *bucket;
577 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
579 if (bucket->data)
581 struct hash_entry *cursor;
582 struct hash_entry *next;
584 /* Free the bucket overflow. */
585 for (cursor = bucket->next; cursor; cursor = next)
587 if (table->data_freer)
588 table->data_freer (cursor->data);
589 cursor->data = NULL;
591 next = cursor->next;
592 /* Relinking is done one entry at a time, as it is to be expected
593 that overflows are either rare or short. */
594 cursor->next = table->free_entry_list;
595 table->free_entry_list = cursor;
598 /* Free the bucket head. */
599 if (table->data_freer)
600 table->data_freer (bucket->data);
601 bucket->data = NULL;
602 bucket->next = NULL;
606 table->n_buckets_used = 0;
607 table->n_entries = 0;
610 void
611 hash_free (Hash_table *table)
613 struct hash_entry *bucket;
614 struct hash_entry *cursor;
615 struct hash_entry *next;
616 int err = errno;
618 /* Call the user data_freer function. */
619 if (table->data_freer && table->n_entries)
621 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
623 if (bucket->data)
625 for (cursor = bucket; cursor; cursor = cursor->next)
626 table->data_freer (cursor->data);
631 #if USE_OBSTACK
633 obstack_free (&table->entry_stack, NULL);
635 #else
637 /* Free all bucket overflowed entries. */
638 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
640 for (cursor = bucket->next; cursor; cursor = next)
642 next = cursor->next;
643 free (cursor);
647 /* Also reclaim the internal list of previously freed entries. */
648 for (cursor = table->free_entry_list; cursor; cursor = next)
650 next = cursor->next;
651 free (cursor);
654 #endif
656 /* Free the remainder of the hash table structure. */
657 free (table->bucket);
658 free (table);
660 errno = err;
663 /* Insertion and deletion. */
665 /* Get a new hash entry for a bucket overflow, possibly by recycling a
666 previously freed one. If this is not possible, allocate a new one. */
668 static struct hash_entry *
669 allocate_entry (Hash_table *table)
671 struct hash_entry *new;
673 if (table->free_entry_list)
675 new = table->free_entry_list;
676 table->free_entry_list = new->next;
678 else
680 #if USE_OBSTACK
681 new = obstack_alloc (&table->entry_stack, sizeof *new);
682 #else
683 new = malloc (sizeof *new);
684 #endif
687 return new;
690 /* Free a hash entry which was part of some bucket overflow,
691 saving it for later recycling. */
693 static void
694 free_entry (Hash_table *table, struct hash_entry *entry)
696 entry->data = NULL;
697 entry->next = table->free_entry_list;
698 table->free_entry_list = entry;
701 /* This private function is used to help with insertion and deletion. When
702 ENTRY matches an entry in the table, return a pointer to the corresponding
703 user data and set *BUCKET_HEAD to the head of the selected bucket.
704 Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in
705 the table, unlink the matching entry. */
707 static void *
708 hash_find_entry (Hash_table *table, const void *entry,
709 struct hash_entry **bucket_head, bool delete)
711 struct hash_entry *bucket = safe_hasher (table, entry);
712 struct hash_entry *cursor;
714 *bucket_head = bucket;
716 /* Test for empty bucket. */
717 if (bucket->data == NULL)
718 return NULL;
720 /* See if the entry is the first in the bucket. */
721 if (entry == bucket->data || table->comparator (entry, bucket->data))
723 void *data = bucket->data;
725 if (delete)
727 if (bucket->next)
729 struct hash_entry *next = bucket->next;
731 /* Bump the first overflow entry into the bucket head, then save
732 the previous first overflow entry for later recycling. */
733 *bucket = *next;
734 free_entry (table, next);
736 else
738 bucket->data = NULL;
742 return data;
745 /* Scan the bucket overflow. */
746 for (cursor = bucket; cursor->next; cursor = cursor->next)
748 if (entry == cursor->next->data
749 || table->comparator (entry, cursor->next->data))
751 void *data = cursor->next->data;
753 if (delete)
755 struct hash_entry *next = cursor->next;
757 /* Unlink the entry to delete, then save the freed entry for later
758 recycling. */
759 cursor->next = next->next;
760 free_entry (table, next);
763 return data;
767 /* No entry found. */
768 return NULL;
771 /* Internal helper, to move entries from SRC to DST. Both tables must
772 share the same free entry list. If SAFE, only move overflow
773 entries, saving bucket heads for later, so that no allocations will
774 occur. Return false (setting errno) if the free entry list is
775 exhausted and an allocation fails. */
777 static bool
778 transfer_entries (Hash_table *dst, Hash_table *src, bool safe)
780 struct hash_entry *bucket;
781 struct hash_entry *cursor;
782 struct hash_entry *next;
783 for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
784 if (bucket->data)
786 void *data;
787 struct hash_entry *new_bucket;
789 /* Within each bucket, transfer overflow entries first and
790 then the bucket head, to minimize memory pressure. After
791 all, the only time we might allocate is when moving the
792 bucket head, but moving overflow entries first may create
793 free entries that can be recycled by the time we finally
794 get to the bucket head. */
795 for (cursor = bucket->next; cursor; cursor = next)
797 data = cursor->data;
798 new_bucket = safe_hasher (dst, data);
800 next = cursor->next;
802 if (new_bucket->data)
804 /* Merely relink an existing entry, when moving from a
805 bucket overflow into a bucket overflow. */
806 cursor->next = new_bucket->next;
807 new_bucket->next = cursor;
809 else
811 /* Free an existing entry, when moving from a bucket
812 overflow into a bucket header. */
813 new_bucket->data = data;
814 dst->n_buckets_used++;
815 free_entry (dst, cursor);
818 /* Now move the bucket head. Be sure that if we fail due to
819 allocation failure that the src table is in a consistent
820 state. */
821 data = bucket->data;
822 bucket->next = NULL;
823 if (safe)
824 continue;
825 new_bucket = safe_hasher (dst, data);
827 if (new_bucket->data)
829 /* Allocate or recycle an entry, when moving from a bucket
830 header into a bucket overflow. */
831 struct hash_entry *new_entry = allocate_entry (dst);
833 if (new_entry == NULL)
834 return false;
836 new_entry->data = data;
837 new_entry->next = new_bucket->next;
838 new_bucket->next = new_entry;
840 else
842 /* Move from one bucket header to another. */
843 new_bucket->data = data;
844 dst->n_buckets_used++;
846 bucket->data = NULL;
847 src->n_buckets_used--;
849 return true;
852 bool
853 hash_rehash (Hash_table *table, size_t candidate)
855 Hash_table storage;
856 Hash_table *new_table;
857 size_t new_size = compute_bucket_size (candidate, table->tuning);
859 if (!new_size)
860 return false;
861 if (new_size == table->n_buckets)
862 return true;
863 new_table = &storage;
864 new_table->bucket = calloc (new_size, sizeof *new_table->bucket);
865 if (new_table->bucket == NULL)
866 return false;
867 new_table->n_buckets = new_size;
868 new_table->bucket_limit = new_table->bucket + new_size;
869 new_table->n_buckets_used = 0;
870 new_table->n_entries = 0;
871 new_table->tuning = table->tuning;
872 new_table->hasher = table->hasher;
873 new_table->comparator = table->comparator;
874 new_table->data_freer = table->data_freer;
876 /* In order for the transfer to successfully complete, we need
877 additional overflow entries when distinct buckets in the old
878 table collide into a common bucket in the new table. The worst
879 case possible is a hasher that gives a good spread with the old
880 size, but returns a constant with the new size; if we were to
881 guarantee table->n_buckets_used-1 free entries in advance, then
882 the transfer would be guaranteed to not allocate memory.
883 However, for large tables, a guarantee of no further allocation
884 introduces a lot of extra memory pressure, all for an unlikely
885 corner case (most rehashes reduce, rather than increase, the
886 number of overflow entries needed). So, we instead ensure that
887 the transfer process can be reversed if we hit a memory
888 allocation failure mid-transfer. */
890 /* Merely reuse the extra old space into the new table. */
891 #if USE_OBSTACK
892 new_table->entry_stack = table->entry_stack;
893 #endif
894 new_table->free_entry_list = table->free_entry_list;
896 if (transfer_entries (new_table, table, false))
898 /* Entries transferred successfully; tie up the loose ends. */
899 free (table->bucket);
900 table->bucket = new_table->bucket;
901 table->bucket_limit = new_table->bucket_limit;
902 table->n_buckets = new_table->n_buckets;
903 table->n_buckets_used = new_table->n_buckets_used;
904 table->free_entry_list = new_table->free_entry_list;
905 /* table->n_entries and table->entry_stack already hold their value. */
906 return true;
909 /* We've allocated new_table->bucket (and possibly some entries),
910 exhausted the free list, and moved some but not all entries into
911 new_table. We must undo the partial move before returning
912 failure. The only way to get into this situation is if new_table
913 uses fewer buckets than the old table, so we will reclaim some
914 free entries as overflows in the new table are put back into
915 distinct buckets in the old table.
917 There are some pathological cases where a single pass through the
918 table requires more intermediate overflow entries than using two
919 passes. Two passes give worse cache performance and takes
920 longer, but at this point, we're already out of memory, so slow
921 and safe is better than failure. */
922 int err = errno;
923 table->free_entry_list = new_table->free_entry_list;
924 if (! (transfer_entries (table, new_table, true)
925 && transfer_entries (table, new_table, false)))
926 abort ();
927 /* table->n_entries already holds its value. */
928 free (new_table->bucket);
929 errno = err;
930 return false;
934 hash_insert_if_absent (Hash_table *table, void const *entry,
935 void const **matched_ent)
937 void *data;
938 struct hash_entry *bucket;
940 /* The caller cannot insert a NULL entry, since hash_lookup returns NULL
941 to indicate "not found", and hash_find_entry uses "bucket->data == NULL"
942 to indicate an empty bucket. */
943 if (! entry)
944 abort ();
946 /* If there's a matching entry already in the table, return that. */
947 if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
949 if (matched_ent)
950 *matched_ent = data;
951 return 0;
954 /* If the growth threshold of the buckets in use has been reached, increase
955 the table size and rehash. There's no point in checking the number of
956 entries: if the hashing function is ill-conditioned, rehashing is not
957 likely to improve it. */
959 if (table->n_buckets_used
960 > table->tuning->growth_threshold * table->n_buckets)
962 /* Check more fully, before starting real work. If tuning arguments
963 became invalid, the second check will rely on proper defaults. */
964 check_tuning (table);
965 if (table->n_buckets_used
966 > table->tuning->growth_threshold * table->n_buckets)
968 const Hash_tuning *tuning = table->tuning;
969 float candidate =
970 (tuning->is_n_buckets
971 ? (table->n_buckets * tuning->growth_factor)
972 : (table->n_buckets * tuning->growth_factor
973 * tuning->growth_threshold));
975 if ((float) SIZE_MAX <= candidate)
977 errno = ENOMEM;
978 return -1;
981 /* If the rehash fails, arrange to return NULL. */
982 if (!hash_rehash (table, candidate))
983 return -1;
985 /* Update the bucket we are interested in. */
986 if (hash_find_entry (table, entry, &bucket, false) != NULL)
987 abort ();
991 /* ENTRY is not matched, it should be inserted. */
993 if (bucket->data)
995 struct hash_entry *new_entry = allocate_entry (table);
997 if (new_entry == NULL)
998 return -1;
1000 /* Add ENTRY in the overflow of the bucket. */
1002 new_entry->data = (void *) entry;
1003 new_entry->next = bucket->next;
1004 bucket->next = new_entry;
1005 table->n_entries++;
1006 return 1;
1009 /* Add ENTRY right in the bucket head. */
1011 bucket->data = (void *) entry;
1012 table->n_entries++;
1013 table->n_buckets_used++;
1015 return 1;
1018 void *
1019 hash_insert (Hash_table *table, void const *entry)
1021 void const *matched_ent;
1022 int err = hash_insert_if_absent (table, entry, &matched_ent);
1023 return (err == -1
1024 ? NULL
1025 : (void *) (err == 0 ? matched_ent : entry));
1028 void *
1029 hash_remove (Hash_table *table, const void *entry)
1031 void *data;
1032 struct hash_entry *bucket;
1034 data = hash_find_entry (table, entry, &bucket, true);
1035 if (!data)
1036 return NULL;
1038 table->n_entries--;
1039 if (!bucket->data)
1041 table->n_buckets_used--;
1043 /* If the shrink threshold of the buckets in use has been reached,
1044 rehash into a smaller table. */
1046 if (table->n_buckets_used
1047 < table->tuning->shrink_threshold * table->n_buckets)
1049 /* Check more fully, before starting real work. If tuning arguments
1050 became invalid, the second check will rely on proper defaults. */
1051 check_tuning (table);
1052 if (table->n_buckets_used
1053 < table->tuning->shrink_threshold * table->n_buckets)
1055 const Hash_tuning *tuning = table->tuning;
1056 size_t candidate =
1057 (tuning->is_n_buckets
1058 ? table->n_buckets * tuning->shrink_factor
1059 : (table->n_buckets * tuning->shrink_factor
1060 * tuning->growth_threshold));
1062 if (!hash_rehash (table, candidate))
1064 /* Failure to allocate memory in an attempt to
1065 shrink the table is not fatal. But since memory
1066 is low, we can at least be kind and free any
1067 spare entries, rather than keeping them tied up
1068 in the free entry list. */
1069 #if ! USE_OBSTACK
1070 struct hash_entry *cursor = table->free_entry_list;
1071 struct hash_entry *next;
1072 while (cursor)
1074 next = cursor->next;
1075 free (cursor);
1076 cursor = next;
1078 table->free_entry_list = NULL;
1079 #endif
1085 return data;
1088 void *
1089 hash_delete (Hash_table *table, const void *entry)
1091 return hash_remove (table, entry);
1094 /* Testing. */
1096 #if TESTING
1098 void
1099 hash_print (const Hash_table *table)
1101 struct hash_entry *bucket = (struct hash_entry *) table->bucket;
1103 for ( ; bucket < table->bucket_limit; bucket++)
1105 struct hash_entry *cursor;
1107 if (bucket)
1108 printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1110 for (cursor = bucket; cursor; cursor = cursor->next)
1112 char const *s = cursor->data;
1113 /* FIXME */
1114 if (s)
1115 printf (" %s\n", s);
1120 #endif /* TESTING */