lib/hash.c

   1 /* hash - hashing table processing.
   2
   3    Copyright (C) 1998-2004, 2006-2007, 2009-2024 Free Software Foundation, Inc.
   4
   5    Written by Jim Meyering, 1992.
   6
   7    This file is free software: you can redistribute it and/or modify
   8    it under the terms of the GNU Lesser General Public License as
   9    published by the Free Software Foundation; either version 2.1 of the
  10    License, or (at your option) any later version.
  11
  12    This file is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU Lesser General Public License for more details.
  16
  17    You should have received a copy of the GNU Lesser General Public License
  18    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  19
  20 /* A generic hash table package.  */
  21
  22 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
  23    of malloc.  If you change USE_OBSTACK, you have to recompile!  */
  24
  25 #include <config.h>
  26
  27 #include "hash.h"
  28
  29 #include "bitrotate.h"
  30 #include "xalloc-oversized.h"
  31
  32 #include <errno.h>
  33 #include <stdint.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36
  37 #if USE_OBSTACK
  38 # include "obstack.h"
  39 # ifndef obstack_chunk_alloc
  40 #  define obstack_chunk_alloc malloc
  41 # endif
  42 # ifndef obstack_chunk_free
  43 #  define obstack_chunk_free free
  44 # endif
  45 #endif
  46
  47 struct hash_entry
  48   {
  49     void *data;
  50     struct hash_entry *next;
  51   };
  52
  53 struct hash_table
  54   {
  55     /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
  56        for a possibility of N_BUCKETS.  Among those, N_BUCKETS_USED buckets
  57        are not empty, there are N_ENTRIES active entries in the table.  */
  58     struct hash_entry *bucket;
  59     struct hash_entry const *bucket_limit;
  60     size_t n_buckets;
  61     size_t n_buckets_used;
  62     size_t n_entries;
  63
  64     /* Tuning arguments, kept in a physically separate structure.  */
  65     const Hash_tuning *tuning;
  66
  67     /* Three functions are given to 'hash_initialize', see the documentation
  68        block for this function.  In a word, HASHER randomizes a user entry
  69        into a number up from 0 up to some maximum minus 1; COMPARATOR returns
  70        true if two user entries compare equally; and DATA_FREER is the cleanup
  71        function for a user entry.  */
  72     Hash_hasher hasher;
  73     Hash_comparator comparator;
  74     Hash_data_freer data_freer;
  75
  76     /* A linked list of freed struct hash_entry structs.  */
  77     struct hash_entry *free_entry_list;
  78
  79 #if USE_OBSTACK
  80     /* Whenever obstacks are used, it is possible to allocate all overflowed
  81        entries into a single stack, so they all can be freed in a single
  82        operation.  It is not clear if the speedup is worth the trouble.  */
  83     struct obstack entry_stack;
  84 #endif
  85   };
  86
  87 /* A hash table contains many internal entries, each holding a pointer to
  88    some user-provided data (also called a user entry).  An entry indistinctly
  89    refers to both the internal entry and its associated user entry.  A user
  90    entry contents may be hashed by a randomization function (the hashing
  91    function, or just "hasher" for short) into a number (or "slot") between 0
  92    and the current table size.  At each slot position in the hash table,
  93    starts a linked chain of entries for which the user data all hash to this
  94    slot.  A bucket is the collection of all entries hashing to the same slot.
  95
  96    A good "hasher" function will distribute entries rather evenly in buckets.
  97    In the ideal case, the length of each bucket is roughly the number of
  98    entries divided by the table size.  Finding the slot for a data is usually
  99    done in constant time by the "hasher", and the later finding of a precise
 100    entry is linear in time with the size of the bucket.  Consequently, a
 101    larger hash table size (that is, a larger number of buckets) is prone to
 102    yielding shorter chains, *given* the "hasher" function behaves properly.
 103
 104    Long buckets slow down the lookup algorithm.  One might use big hash table
 105    sizes in hope to reduce the average length of buckets, but this might
 106    become inordinate, as unused slots in the hash table take some space.  The
 107    best bet is to make sure you are using a good "hasher" function (beware
 108    that those are not that easy to write! :-), and to use a table size
 109    larger than the actual number of entries.  */
 110
 111 /* If an insertion makes the ratio of nonempty buckets to table size larger
 112    than the growth threshold (a number between 0.0 and 1.0), then increase
 113    the table size by multiplying by the growth factor (a number greater than
 114    1.0).  The growth threshold defaults to 0.8, and the growth factor
 115    defaults to 1.414, meaning that the table will have doubled its size
 116    every second time 80% of the buckets get used.  */
 117 #define DEFAULT_GROWTH_THRESHOLD 0.8f
 118 #define DEFAULT_GROWTH_FACTOR 1.414f
 119
 120 /* If a deletion empties a bucket and causes the ratio of used buckets to
 121    table size to become smaller than the shrink threshold (a number between
 122    0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
 123    number greater than the shrink threshold but smaller than 1.0).  The shrink
 124    threshold and factor default to 0.0 and 1.0, meaning that the table never
 125    shrinks.  */
 126 #define DEFAULT_SHRINK_THRESHOLD 0.0f
 127 #define DEFAULT_SHRINK_FACTOR 1.0f
 128
 129 /* Use this to initialize or reset a TUNING structure to
 130    some sensible values. */
 131 static const Hash_tuning default_tuning =
 132   {
 133     DEFAULT_SHRINK_THRESHOLD,
 134     DEFAULT_SHRINK_FACTOR,
 135     DEFAULT_GROWTH_THRESHOLD,
 136     DEFAULT_GROWTH_FACTOR,
 137     false
 138   };
 139
 140 /* Information and lookup.  */
 141
 142 size_t
 143 hash_get_n_buckets (const Hash_table *table)
 144 {
 145   return table->n_buckets;
 146 }
 147
 148 size_t
 149 hash_get_n_buckets_used (const Hash_table *table)
 150 {
 151   return table->n_buckets_used;
 152 }
 153
 154 size_t
 155 hash_get_n_entries (const Hash_table *table)
 156 {
 157   return table->n_entries;
 158 }
 159
 160 size_t
 161 hash_get_max_bucket_length (const Hash_table *table)
 162 {
 163   struct hash_entry const *bucket;
 164   size_t max_bucket_length = 0;
 165
 166   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 167     {
 168       if (bucket->data)
 169         {
 170           struct hash_entry const *cursor = bucket;
 171           size_t bucket_length = 1;
 172
 173           while (cursor = cursor->next, cursor)
 174             bucket_length++;
 175
 176           if (bucket_length > max_bucket_length)
 177             max_bucket_length = bucket_length;
 178         }
 179     }
 180
 181   return max_bucket_length;
 182 }
 183
 184 bool
 185 hash_table_ok (const Hash_table *table)
 186 {
 187   struct hash_entry const *bucket;
 188   size_t n_buckets_used = 0;
 189   size_t n_entries = 0;
 190
 191   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 192     {
 193       if (bucket->data)
 194         {
 195           struct hash_entry const *cursor = bucket;
 196
 197           /* Count bucket head.  */
 198           n_buckets_used++;
 199           n_entries++;
 200
 201           /* Count bucket overflow.  */
 202           while (cursor = cursor->next, cursor)
 203             n_entries++;
 204         }
 205     }
 206
 207   if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
 208     return true;
 209
 210   return false;
 211 }
 212
 213 void
 214 hash_print_statistics (const Hash_table *table, FILE *stream)
 215 {
 216   size_t n_entries = hash_get_n_entries (table);
 217   size_t n_buckets = hash_get_n_buckets (table);
 218   size_t n_buckets_used = hash_get_n_buckets_used (table);
 219   size_t max_bucket_length = hash_get_max_bucket_length (table);
 220
 221   fprintf (stream, "# entries:         %lu\n", (unsigned long int) n_entries);
 222   fprintf (stream, "# buckets:         %lu\n", (unsigned long int) n_buckets);
 223   fprintf (stream, "# buckets used:    %lu (%.2f%%)\n",
 224            (unsigned long int) n_buckets_used,
 225            (100.0 * n_buckets_used) / n_buckets);
 226   fprintf (stream, "max bucket length: %lu\n",
 227            (unsigned long int) max_bucket_length);
 228 }
 229
 230 /* Hash KEY and return a pointer to the selected bucket.
 231    If TABLE->hasher misbehaves, abort.  */
 232 static struct hash_entry *
 233 safe_hasher (const Hash_table *table, const void *key)
 234 {
 235   size_t n = table->hasher (key, table->n_buckets);
 236   if (! (n < table->n_buckets))
 237     abort ();
 238   return table->bucket + n;
 239 }
 240
 241 void *
 242 hash_lookup (const Hash_table *table, const void *entry)
 243 {
 244   struct hash_entry const *bucket = safe_hasher (table, entry);
 245   struct hash_entry const *cursor;
 246
 247   if (bucket->data == NULL)
 248     return NULL;
 249
 250   for (cursor = bucket; cursor; cursor = cursor->next)
 251     if (entry == cursor->data || table->comparator (entry, cursor->data))
 252       return cursor->data;
 253
 254   return NULL;
 255 }
 256
 257 /* Walking.  */
 258
 259 void *
 260 hash_get_first (const Hash_table *table)
 261 {
 262   struct hash_entry const *bucket;
 263
 264   if (table->n_entries == 0)
 265     return NULL;
 266
 267   for (bucket = table->bucket; ; bucket++)
 268     if (! (bucket < table->bucket_limit))
 269       abort ();
 270     else if (bucket->data)
 271       return bucket->data;
 272 }
 273
 274 void *
 275 hash_get_next (const Hash_table *table, const void *entry)
 276 {
 277   struct hash_entry const *bucket = safe_hasher (table, entry);
 278   struct hash_entry const *cursor;
 279
 280   /* Find next entry in the same bucket.  */
 281   cursor = bucket;
 282   do
 283     {
 284       if (cursor->data == entry && cursor->next)
 285         return cursor->next->data;
 286       cursor = cursor->next;
 287     }
 288   while (cursor != NULL);
 289
 290   /* Find first entry in any subsequent bucket.  */
 291   while (++bucket < table->bucket_limit)
 292     if (bucket->data)
 293       return bucket->data;
 294
 295   /* None found.  */
 296   return NULL;
 297 }
 298
 299 size_t
 300 hash_get_entries (const Hash_table *table, void **buffer,
 301                   size_t buffer_size)
 302 {
 303   size_t counter = 0;
 304   struct hash_entry const *bucket;
 305   struct hash_entry const *cursor;
 306
 307   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 308     {
 309       if (bucket->data)
 310         {
 311           for (cursor = bucket; cursor; cursor = cursor->next)
 312             {
 313               if (counter >= buffer_size)
 314                 return counter;
 315               buffer[counter++] = cursor->data;
 316             }
 317         }
 318     }
 319
 320   return counter;
 321 }
 322
 323 size_t
 324 hash_do_for_each (const Hash_table *table, Hash_processor processor,
 325                   void *processor_data)
 326 {
 327   size_t counter = 0;
 328   struct hash_entry const *bucket;
 329   struct hash_entry const *cursor;
 330
 331   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 332     {
 333       if (bucket->data)
 334         {
 335           for (cursor = bucket; cursor; cursor = cursor->next)
 336             {
 337               if (! processor (cursor->data, processor_data))
 338                 return counter;
 339               counter++;
 340             }
 341         }
 342     }
 343
 344   return counter;
 345 }
 346
 347 /* Allocation and clean-up.  */
 348
 349 #if USE_DIFF_HASH
 350
 351 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
 352    B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
 353    Software--practice & experience 20, 2 (Feb 1990), 209-224.  Good hash
 354    algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
 355    may not be good for your application."  */
 356
 357 size_t
 358 hash_string (const char *string, size_t n_buckets)
 359 {
 360 # define HASH_ONE_CHAR(Value, Byte) \
 361   ((Byte) + rotl_sz (Value, 7))
 362
 363   size_t value = 0;
 364   unsigned char ch;
 365
 366   for (; (ch = *string); string++)
 367     value = HASH_ONE_CHAR (value, ch);
 368   return value % n_buckets;
 369
 370 # undef HASH_ONE_CHAR
 371 }
 372
 373 #else /* not USE_DIFF_HASH */
 374
 375 /* This one comes from 'recode', and performs a bit better than the above as
 376    per a few experiments.  It is inspired from a hashing routine found in the
 377    very old Cyber 'snoop', itself written in typical Greg Mansfield style.
 378    (By the way, what happened to this excellent man?  Is he still alive?)  */
 379
 380 size_t
 381 hash_string (const char *string, size_t n_buckets)
 382 {
 383   size_t value = 0;
 384   unsigned char ch;
 385
 386   for (; (ch = *string); string++)
 387     value = (value * 31 + ch) % n_buckets;
 388   return value;
 389 }
 390
 391 #endif /* not USE_DIFF_HASH */
 392
 393 /* Return true if CANDIDATE is a prime number.  CANDIDATE should be an odd
 394    number at least equal to 11.  */
 395
 396 static bool _GL_ATTRIBUTE_CONST
 397 is_prime (size_t candidate)
 398 {
 399   size_t divisor = 3;
 400   size_t square = divisor * divisor;
 401
 402   while (square < candidate && (candidate % divisor))
 403     {
 404       divisor++;
 405       square += 4 * divisor;
 406       divisor++;
 407     }
 408
 409   return (candidate % divisor ? true : false);
 410 }
 411
 412 /* Round a given CANDIDATE number up to the nearest prime, and return that
 413    prime.  Primes lower than 10 are merely skipped.  */
 414
 415 static size_t _GL_ATTRIBUTE_CONST
 416 next_prime (size_t candidate)
 417 {
 418   /* Skip small primes.  */
 419   if (candidate < 10)
 420     candidate = 10;
 421
 422   /* Make it definitely odd.  */
 423   candidate |= 1;
 424
 425   while (SIZE_MAX != candidate && !is_prime (candidate))
 426     candidate += 2;
 427
 428   return candidate;
 429 }
 430
 431 void
 432 hash_reset_tuning (Hash_tuning *tuning)
 433 {
 434   *tuning = default_tuning;
 435 }
 436
 437 /* If the user passes a NULL hasher, we hash the raw pointer.  */
 438 static size_t
 439 raw_hasher (const void *data, size_t n)
 440 {
 441   /* When hashing unique pointers, it is often the case that they were
 442      generated by malloc and thus have the property that the low-order
 443      bits are 0.  As this tends to give poorer performance with small
 444      tables, we rotate the pointer value before performing division,
 445      in an attempt to improve hash quality.  */
 446   size_t val = rotr_sz ((size_t) data, 3);
 447   return val % n;
 448 }
 449
 450 /* If the user passes a NULL comparator, we use pointer comparison.  */
 451 static bool
 452 raw_comparator (const void *a, const void *b)
 453 {
 454   return a == b;
 455 }
 456
 457
 458 /* For the given hash TABLE, check the user supplied tuning structure for
 459    reasonable values, and return true if there is no gross error with it.
 460    Otherwise, definitively reset the TUNING field to some acceptable default
 461    in the hash table (that is, the user loses the right of further modifying
 462    tuning arguments), and return false.  */
 463
 464 static bool
 465 check_tuning (Hash_table *table)
 466 {
 467   const Hash_tuning *tuning = table->tuning;
 468   float epsilon;
 469   if (tuning == &default_tuning)
 470     return true;
 471
 472   /* Be a bit stricter than mathematics would require, so that
 473      rounding errors in size calculations do not cause allocations to
 474      fail to grow or shrink as they should.  The smallest allocation
 475      is 11 (due to next_prime's algorithm), so an epsilon of 0.1
 476      should be good enough.  */
 477   epsilon = 0.1f;
 478
 479   if (epsilon < tuning->growth_threshold
 480       && tuning->growth_threshold < 1 - epsilon
 481       && 1 + epsilon < tuning->growth_factor
 482       && 0 <= tuning->shrink_threshold
 483       && tuning->shrink_threshold + epsilon < tuning->shrink_factor
 484       && tuning->shrink_factor <= 1
 485       && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
 486     return true;
 487
 488   table->tuning = &default_tuning;
 489   return false;
 490 }
 491
 492 /* Compute the size of the bucket array for the given CANDIDATE and
 493    TUNING, or return 0 if there is no possible way to allocate that
 494    many entries.  */
 495
 496 static size_t _GL_ATTRIBUTE_PURE
 497 compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
 498 {
 499   if (!tuning->is_n_buckets)
 500     {
 501       float new_candidate = candidate / tuning->growth_threshold;
 502       if ((float) SIZE_MAX <= new_candidate)
 503         goto nomem;
 504       candidate = new_candidate;
 505     }
 506   candidate = next_prime (candidate);
 507   if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))
 508     goto nomem;
 509   return candidate;
 510
 511  nomem:
 512   errno = ENOMEM;
 513   return 0;
 514 }
 515
 516 Hash_table *
 517 hash_initialize (size_t candidate, const Hash_tuning *tuning,
 518                  Hash_hasher hasher, Hash_comparator comparator,
 519                  Hash_data_freer data_freer)
 520 {
 521   Hash_table *table;
 522
 523   if (hasher == NULL)
 524     hasher = raw_hasher;
 525   if (comparator == NULL)
 526     comparator = raw_comparator;
 527
 528   table = malloc (sizeof *table);
 529   if (table == NULL)
 530     return NULL;
 531
 532   if (!tuning)
 533     tuning = &default_tuning;
 534   table->tuning = tuning;
 535   if (!check_tuning (table))
 536     {
 537       /* Fail if the tuning options are invalid.  This is the only occasion
 538          when the user gets some feedback about it.  Once the table is created,
 539          if the user provides invalid tuning options, we silently revert to
 540          using the defaults, and ignore further request to change the tuning
 541          options.  */
 542       errno = EINVAL;
 543       goto fail;
 544     }
 545
 546   table->n_buckets = compute_bucket_size (candidate, tuning);
 547   if (!table->n_buckets)
 548     goto fail;
 549
 550   table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
 551   if (table->bucket == NULL)
 552     goto fail;
 553   table->bucket_limit = table->bucket + table->n_buckets;
 554   table->n_buckets_used = 0;
 555   table->n_entries = 0;
 556
 557   table->hasher = hasher;
 558   table->comparator = comparator;
 559   table->data_freer = data_freer;
 560
 561   table->free_entry_list = NULL;
 562 #if USE_OBSTACK
 563   obstack_init (&table->entry_stack);
 564 #endif
 565   return table;
 566
 567  fail:
 568   free (table);
 569   return NULL;
 570 }
 571
 572 void
 573 hash_clear (Hash_table *table)
 574 {
 575   struct hash_entry *bucket;
 576
 577   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 578     {
 579       if (bucket->data)
 580         {
 581           struct hash_entry *cursor;
 582           struct hash_entry *next;
 583
 584           /* Free the bucket overflow.  */
 585           for (cursor = bucket->next; cursor; cursor = next)
 586             {
 587               if (table->data_freer)
 588                 table->data_freer (cursor->data);
 589               cursor->data = NULL;
 590
 591               next = cursor->next;
 592               /* Relinking is done one entry at a time, as it is to be expected
 593                  that overflows are either rare or short.  */
 594               cursor->next = table->free_entry_list;
 595               table->free_entry_list = cursor;
 596             }
 597
 598           /* Free the bucket head.  */
 599           if (table->data_freer)
 600             table->data_freer (bucket->data);
 601           bucket->data = NULL;
 602           bucket->next = NULL;
 603         }
 604     }
 605
 606   table->n_buckets_used = 0;
 607   table->n_entries = 0;
 608 }
 609
 610 void
 611 hash_free (Hash_table *table)
 612 {
 613   struct hash_entry *bucket;
 614   struct hash_entry *cursor;
 615   struct hash_entry *next;
 616   int err = errno;
 617
 618   /* Call the user data_freer function.  */
 619   if (table->data_freer && table->n_entries)
 620     {
 621       for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 622         {
 623           if (bucket->data)
 624             {
 625               for (cursor = bucket; cursor; cursor = cursor->next)
 626                 table->data_freer (cursor->data);
 627             }
 628         }
 629     }
 630
 631 #if USE_OBSTACK
 632
 633   obstack_free (&table->entry_stack, NULL);
 634
 635 #else
 636
 637   /* Free all bucket overflowed entries.  */
 638   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 639     {
 640       for (cursor = bucket->next; cursor; cursor = next)
 641         {
 642           next = cursor->next;
 643           free (cursor);
 644         }
 645     }
 646
 647   /* Also reclaim the internal list of previously freed entries.  */
 648   for (cursor = table->free_entry_list; cursor; cursor = next)
 649     {
 650       next = cursor->next;
 651       free (cursor);
 652     }
 653
 654 #endif
 655
 656   /* Free the remainder of the hash table structure.  */
 657   free (table->bucket);
 658   free (table);
 659
 660   errno = err;
 661 }
 662
 663 /* Insertion and deletion.  */
 664
 665 /* Get a new hash entry for a bucket overflow, possibly by recycling a
 666    previously freed one.  If this is not possible, allocate a new one.  */
 667
 668 static struct hash_entry *
 669 allocate_entry (Hash_table *table)
 670 {
 671   struct hash_entry *new;
 672
 673   if (table->free_entry_list)
 674     {
 675       new = table->free_entry_list;
 676       table->free_entry_list = new->next;
 677     }
 678   else
 679     {
 680 #if USE_OBSTACK
 681       new = obstack_alloc (&table->entry_stack, sizeof *new);
 682 #else
 683       new = malloc (sizeof *new);
 684 #endif
 685     }
 686
 687   return new;
 688 }
 689
 690 /* Free a hash entry which was part of some bucket overflow,
 691    saving it for later recycling.  */
 692
 693 static void
 694 free_entry (Hash_table *table, struct hash_entry *entry)
 695 {
 696   entry->data = NULL;
 697   entry->next = table->free_entry_list;
 698   table->free_entry_list = entry;
 699 }
 700
 701 /* This private function is used to help with insertion and deletion.  When
 702    ENTRY matches an entry in the table, return a pointer to the corresponding
 703    user data and set *BUCKET_HEAD to the head of the selected bucket.
 704    Otherwise, return NULL.  When DELETE is true and ENTRY matches an entry in
 705    the table, unlink the matching entry.  */
 706
 707 static void *
 708 hash_find_entry (Hash_table *table, const void *entry,
 709                  struct hash_entry **bucket_head, bool delete)
 710 {
 711   struct hash_entry *bucket = safe_hasher (table, entry);
 712   struct hash_entry *cursor;
 713
 714   *bucket_head = bucket;
 715
 716   /* Test for empty bucket.  */
 717   if (bucket->data == NULL)
 718     return NULL;
 719
 720   /* See if the entry is the first in the bucket.  */
 721   if (entry == bucket->data || table->comparator (entry, bucket->data))
 722     {
 723       void *data = bucket->data;
 724
 725       if (delete)
 726         {
 727           if (bucket->next)
 728             {
 729               struct hash_entry *next = bucket->next;
 730
 731               /* Bump the first overflow entry into the bucket head, then save
 732                  the previous first overflow entry for later recycling.  */
 733               *bucket = *next;
 734               free_entry (table, next);
 735             }
 736           else
 737             {
 738               bucket->data = NULL;
 739             }
 740         }
 741
 742       return data;
 743     }
 744
 745   /* Scan the bucket overflow.  */
 746   for (cursor = bucket; cursor->next; cursor = cursor->next)
 747     {
 748       if (entry == cursor->next->data
 749           || table->comparator (entry, cursor->next->data))
 750         {
 751           void *data = cursor->next->data;
 752
 753           if (delete)
 754             {
 755               struct hash_entry *next = cursor->next;
 756
 757               /* Unlink the entry to delete, then save the freed entry for later
 758                  recycling.  */
 759               cursor->next = next->next;
 760               free_entry (table, next);
 761             }
 762
 763           return data;
 764         }
 765     }
 766
 767   /* No entry found.  */
 768   return NULL;
 769 }
 770
 771 /* Internal helper, to move entries from SRC to DST.  Both tables must
 772    share the same free entry list.  If SAFE, only move overflow
 773    entries, saving bucket heads for later, so that no allocations will
 774    occur.  Return false (setting errno) if the free entry list is
 775    exhausted and an allocation fails.  */
 776
 777 static bool
 778 transfer_entries (Hash_table *dst, Hash_table *src, bool safe)
 779 {
 780   struct hash_entry *bucket;
 781   struct hash_entry *cursor;
 782   struct hash_entry *next;
 783   for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
 784     if (bucket->data)
 785       {
 786         void *data;
 787         struct hash_entry *new_bucket;
 788
 789         /* Within each bucket, transfer overflow entries first and
 790            then the bucket head, to minimize memory pressure.  After
 791            all, the only time we might allocate is when moving the
 792            bucket head, but moving overflow entries first may create
 793            free entries that can be recycled by the time we finally
 794            get to the bucket head.  */
 795         for (cursor = bucket->next; cursor; cursor = next)
 796           {
 797             data = cursor->data;
 798             new_bucket = safe_hasher (dst, data);
 799
 800             next = cursor->next;
 801
 802             if (new_bucket->data)
 803               {
 804                 /* Merely relink an existing entry, when moving from a
 805                    bucket overflow into a bucket overflow.  */
 806                 cursor->next = new_bucket->next;
 807                 new_bucket->next = cursor;
 808               }
 809             else
 810               {
 811                 /* Free an existing entry, when moving from a bucket
 812                    overflow into a bucket header.  */
 813                 new_bucket->data = data;
 814                 dst->n_buckets_used++;
 815                 free_entry (dst, cursor);
 816               }
 817           }
 818         /* Now move the bucket head.  Be sure that if we fail due to
 819            allocation failure that the src table is in a consistent
 820            state.  */
 821         data = bucket->data;
 822         bucket->next = NULL;
 823         if (safe)
 824           continue;
 825         new_bucket = safe_hasher (dst, data);
 826
 827         if (new_bucket->data)
 828           {
 829             /* Allocate or recycle an entry, when moving from a bucket
 830                header into a bucket overflow.  */
 831             struct hash_entry *new_entry = allocate_entry (dst);
 832
 833             if (new_entry == NULL)
 834               return false;
 835
 836             new_entry->data = data;
 837             new_entry->next = new_bucket->next;
 838             new_bucket->next = new_entry;
 839           }
 840         else
 841           {
 842             /* Move from one bucket header to another.  */
 843             new_bucket->data = data;
 844             dst->n_buckets_used++;
 845           }
 846         bucket->data = NULL;
 847         src->n_buckets_used--;
 848       }
 849   return true;
 850 }
 851
 852 bool
 853 hash_rehash (Hash_table *table, size_t candidate)
 854 {
 855   Hash_table storage;
 856   Hash_table *new_table;
 857   size_t new_size = compute_bucket_size (candidate, table->tuning);
 858
 859   if (!new_size)
 860     return false;
 861   if (new_size == table->n_buckets)
 862     return true;
 863   new_table = &storage;
 864   new_table->bucket = calloc (new_size, sizeof *new_table->bucket);
 865   if (new_table->bucket == NULL)
 866     return false;
 867   new_table->n_buckets = new_size;
 868   new_table->bucket_limit = new_table->bucket + new_size;
 869   new_table->n_buckets_used = 0;
 870   new_table->n_entries = 0;
 871   new_table->tuning = table->tuning;
 872   new_table->hasher = table->hasher;
 873   new_table->comparator = table->comparator;
 874   new_table->data_freer = table->data_freer;
 875
 876   /* In order for the transfer to successfully complete, we need
 877      additional overflow entries when distinct buckets in the old
 878      table collide into a common bucket in the new table.  The worst
 879      case possible is a hasher that gives a good spread with the old
 880      size, but returns a constant with the new size; if we were to
 881      guarantee table->n_buckets_used-1 free entries in advance, then
 882      the transfer would be guaranteed to not allocate memory.
 883      However, for large tables, a guarantee of no further allocation
 884      introduces a lot of extra memory pressure, all for an unlikely
 885      corner case (most rehashes reduce, rather than increase, the
 886      number of overflow entries needed).  So, we instead ensure that
 887      the transfer process can be reversed if we hit a memory
 888      allocation failure mid-transfer.  */
 889
 890   /* Merely reuse the extra old space into the new table.  */
 891 #if USE_OBSTACK
 892   new_table->entry_stack = table->entry_stack;
 893 #endif
 894   new_table->free_entry_list = table->free_entry_list;
 895
 896   if (transfer_entries (new_table, table, false))
 897     {
 898       /* Entries transferred successfully; tie up the loose ends.  */
 899       free (table->bucket);
 900       table->bucket = new_table->bucket;
 901       table->bucket_limit = new_table->bucket_limit;
 902       table->n_buckets = new_table->n_buckets;
 903       table->n_buckets_used = new_table->n_buckets_used;
 904       table->free_entry_list = new_table->free_entry_list;
 905       /* table->n_entries and table->entry_stack already hold their value.  */
 906       return true;
 907     }
 908
 909   /* We've allocated new_table->bucket (and possibly some entries),
 910      exhausted the free list, and moved some but not all entries into
 911      new_table.  We must undo the partial move before returning
 912      failure.  The only way to get into this situation is if new_table
 913      uses fewer buckets than the old table, so we will reclaim some
 914      free entries as overflows in the new table are put back into
 915      distinct buckets in the old table.
 916
 917      There are some pathological cases where a single pass through the
 918      table requires more intermediate overflow entries than using two
 919      passes.  Two passes give worse cache performance and takes
 920      longer, but at this point, we're already out of memory, so slow
 921      and safe is better than failure.  */
 922   int err = errno;
 923   table->free_entry_list = new_table->free_entry_list;
 924   if (! (transfer_entries (table, new_table, true)
 925          && transfer_entries (table, new_table, false)))
 926     abort ();
 927   /* table->n_entries already holds its value.  */
 928   free (new_table->bucket);
 929   errno = err;
 930   return false;
 931 }
 932
 933 int
 934 hash_insert_if_absent (Hash_table *table, void const *entry,
 935                        void const **matched_ent)
 936 {
 937   void *data;
 938   struct hash_entry *bucket;
 939
 940   /* The caller cannot insert a NULL entry, since hash_lookup returns NULL
 941      to indicate "not found", and hash_find_entry uses "bucket->data == NULL"
 942      to indicate an empty bucket.  */
 943   if (! entry)
 944     abort ();
 945
 946   /* If there's a matching entry already in the table, return that.  */
 947   if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
 948     {
 949       if (matched_ent)
 950         *matched_ent = data;
 951       return 0;
 952     }
 953
 954   /* If the growth threshold of the buckets in use has been reached, increase
 955      the table size and rehash.  There's no point in checking the number of
 956      entries:  if the hashing function is ill-conditioned, rehashing is not
 957      likely to improve it.  */
 958
 959   if (table->n_buckets_used
 960       > table->tuning->growth_threshold * table->n_buckets)
 961     {
 962       /* Check more fully, before starting real work.  If tuning arguments
 963          became invalid, the second check will rely on proper defaults.  */
 964       check_tuning (table);
 965       if (table->n_buckets_used
 966           > table->tuning->growth_threshold * table->n_buckets)
 967         {
 968           const Hash_tuning *tuning = table->tuning;
 969           float candidate =
 970             (tuning->is_n_buckets
 971              ? (table->n_buckets * tuning->growth_factor)
 972              : (table->n_buckets * tuning->growth_factor
 973                 * tuning->growth_threshold));
 974
 975           if ((float) SIZE_MAX <= candidate)
 976             {
 977               errno = ENOMEM;
 978               return -1;
 979             }
 980
 981           /* If the rehash fails, arrange to return NULL.  */
 982           if (!hash_rehash (table, candidate))
 983             return -1;
 984
 985           /* Update the bucket we are interested in.  */
 986           if (hash_find_entry (table, entry, &bucket, false) != NULL)
 987             abort ();
 988         }
 989     }
 990
 991   /* ENTRY is not matched, it should be inserted.  */
 992
 993   if (bucket->data)
 994     {
 995       struct hash_entry *new_entry = allocate_entry (table);
 996
 997       if (new_entry == NULL)
 998         return -1;
 999
1000       /* Add ENTRY in the overflow of the bucket.  */
1001
1002       new_entry->data = (void *) entry;
1003       new_entry->next = bucket->next;
1004       bucket->next = new_entry;
1005       table->n_entries++;
1006       return 1;
1007     }
1008
1009   /* Add ENTRY right in the bucket head.  */
1010
1011   bucket->data = (void *) entry;
1012   table->n_entries++;
1013   table->n_buckets_used++;
1014
1015   return 1;
1016 }
1017
1018 void *
1019 hash_insert (Hash_table *table, void const *entry)
1020 {
1021   void const *matched_ent;
1022   int err = hash_insert_if_absent (table, entry, &matched_ent);
1023   return (err == -1
1024           ? NULL
1025           : (void *) (err == 0 ? matched_ent : entry));
1026 }
1027
1028 void *
1029 hash_remove (Hash_table *table, const void *entry)
1030 {
1031   void *data;
1032   struct hash_entry *bucket;
1033
1034   data = hash_find_entry (table, entry, &bucket, true);
1035   if (!data)
1036     return NULL;
1037
1038   table->n_entries--;
1039   if (!bucket->data)
1040     {
1041       table->n_buckets_used--;
1042
1043       /* If the shrink threshold of the buckets in use has been reached,
1044          rehash into a smaller table.  */
1045
1046       if (table->n_buckets_used
1047           < table->tuning->shrink_threshold * table->n_buckets)
1048         {
1049           /* Check more fully, before starting real work.  If tuning arguments
1050              became invalid, the second check will rely on proper defaults.  */
1051           check_tuning (table);
1052           if (table->n_buckets_used
1053               < table->tuning->shrink_threshold * table->n_buckets)
1054             {
1055               const Hash_tuning *tuning = table->tuning;
1056               size_t candidate =
1057                 (tuning->is_n_buckets
1058                  ? table->n_buckets * tuning->shrink_factor
1059                  : (table->n_buckets * tuning->shrink_factor
1060                     * tuning->growth_threshold));
1061
1062               if (!hash_rehash (table, candidate))
1063                 {
1064                   /* Failure to allocate memory in an attempt to
1065                      shrink the table is not fatal.  But since memory
1066                      is low, we can at least be kind and free any
1067                      spare entries, rather than keeping them tied up
1068                      in the free entry list.  */
1069 #if ! USE_OBSTACK
1070                   struct hash_entry *cursor = table->free_entry_list;
1071                   struct hash_entry *next;
1072                   while (cursor)
1073                     {
1074                       next = cursor->next;
1075                       free (cursor);
1076                       cursor = next;
1077                     }
1078                   table->free_entry_list = NULL;
1079 #endif
1080                 }
1081             }
1082         }
1083     }
1084
1085   return data;
1086 }
1087
1088 void *
1089 hash_delete (Hash_table *table, const void *entry)
1090 {
1091   return hash_remove (table, entry);
1092 }
1093
1094 /* Testing.  */
1095
1096 #if TESTING
1097
1098 void
1099 hash_print (const Hash_table *table)
1100 {
1101   struct hash_entry *bucket = (struct hash_entry *) table->bucket;
1102
1103   for ( ; bucket < table->bucket_limit; bucket++)
1104     {
1105       struct hash_entry *cursor;
1106
1107       if (bucket)
1108         printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1109
1110       for (cursor = bucket; cursor; cursor = cursor->next)
1111         {
1112           char const *s = cursor->data;
1113           /* FIXME */
1114           if (s)
1115             printf ("  %s\n", s);
1116         }
1117     }
1118 }
1119
1120 #endif /* TESTING */