lib/hash.c

   1 /* hash - hashing table processing.
   2
   3    Copyright (C) 1998-2004, 2006-2007, 2009-2019 Free Software Foundation, Inc.
   4
   5    Written by Jim Meyering, 1992.
   6
   7    This program is free software: you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  19
  20 /* A generic hash table package.  */
  21
  22 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
  23    of malloc.  If you change USE_OBSTACK, you have to recompile!  */
  24
  25 #include <config.h>
  26
  27 #include "hash.h"
  28
  29 #include "bitrotate.h"
  30 #include "xalloc-oversized.h"
  31
  32 #include <stdint.h>
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35
  36 #if USE_OBSTACK
  37 # include "obstack.h"
  38 # ifndef obstack_chunk_alloc
  39 #  define obstack_chunk_alloc malloc
  40 # endif
  41 # ifndef obstack_chunk_free
  42 #  define obstack_chunk_free free
  43 # endif
  44 #endif
  45
  46 struct hash_entry
  47   {
  48     void *data;
  49     struct hash_entry *next;
  50   };
  51
  52 struct hash_table
  53   {
  54     /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
  55        for a possibility of N_BUCKETS.  Among those, N_BUCKETS_USED buckets
  56        are not empty, there are N_ENTRIES active entries in the table.  */
  57     struct hash_entry *bucket;
  58     struct hash_entry const *bucket_limit;
  59     size_t n_buckets;
  60     size_t n_buckets_used;
  61     size_t n_entries;
  62
  63     /* Tuning arguments, kept in a physically separate structure.  */
  64     const Hash_tuning *tuning;
  65
  66     /* Three functions are given to 'hash_initialize', see the documentation
  67        block for this function.  In a word, HASHER randomizes a user entry
  68        into a number up from 0 up to some maximum minus 1; COMPARATOR returns
  69        true if two user entries compare equally; and DATA_FREER is the cleanup
  70        function for a user entry.  */
  71     Hash_hasher hasher;
  72     Hash_comparator comparator;
  73     Hash_data_freer data_freer;
  74
  75     /* A linked list of freed struct hash_entry structs.  */
  76     struct hash_entry *free_entry_list;
  77
  78 #if USE_OBSTACK
  79     /* Whenever obstacks are used, it is possible to allocate all overflowed
  80        entries into a single stack, so they all can be freed in a single
  81        operation.  It is not clear if the speedup is worth the trouble.  */
  82     struct obstack entry_stack;
  83 #endif
  84   };
  85
  86 /* A hash table contains many internal entries, each holding a pointer to
  87    some user-provided data (also called a user entry).  An entry indistinctly
  88    refers to both the internal entry and its associated user entry.  A user
  89    entry contents may be hashed by a randomization function (the hashing
  90    function, or just "hasher" for short) into a number (or "slot") between 0
  91    and the current table size.  At each slot position in the hash table,
  92    starts a linked chain of entries for which the user data all hash to this
  93    slot.  A bucket is the collection of all entries hashing to the same slot.
  94
  95    A good "hasher" function will distribute entries rather evenly in buckets.
  96    In the ideal case, the length of each bucket is roughly the number of
  97    entries divided by the table size.  Finding the slot for a data is usually
  98    done in constant time by the "hasher", and the later finding of a precise
  99    entry is linear in time with the size of the bucket.  Consequently, a
 100    larger hash table size (that is, a larger number of buckets) is prone to
 101    yielding shorter chains, *given* the "hasher" function behaves properly.
 102
 103    Long buckets slow down the lookup algorithm.  One might use big hash table
 104    sizes in hope to reduce the average length of buckets, but this might
 105    become inordinate, as unused slots in the hash table take some space.  The
 106    best bet is to make sure you are using a good "hasher" function (beware
 107    that those are not that easy to write! :-), and to use a table size
 108    larger than the actual number of entries.  */
 109
 110 /* If an insertion makes the ratio of nonempty buckets to table size larger
 111    than the growth threshold (a number between 0.0 and 1.0), then increase
 112    the table size by multiplying by the growth factor (a number greater than
 113    1.0).  The growth threshold defaults to 0.8, and the growth factor
 114    defaults to 1.414, meaning that the table will have doubled its size
 115    every second time 80% of the buckets get used.  */
 116 #define DEFAULT_GROWTH_THRESHOLD 0.8f
 117 #define DEFAULT_GROWTH_FACTOR 1.414f
 118
 119 /* If a deletion empties a bucket and causes the ratio of used buckets to
 120    table size to become smaller than the shrink threshold (a number between
 121    0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
 122    number greater than the shrink threshold but smaller than 1.0).  The shrink
 123    threshold and factor default to 0.0 and 1.0, meaning that the table never
 124    shrinks.  */
 125 #define DEFAULT_SHRINK_THRESHOLD 0.0f
 126 #define DEFAULT_SHRINK_FACTOR 1.0f
 127
 128 /* Use this to initialize or reset a TUNING structure to
 129    some sensible values. */
 130 static const Hash_tuning default_tuning =
 131   {
 132     DEFAULT_SHRINK_THRESHOLD,
 133     DEFAULT_SHRINK_FACTOR,
 134     DEFAULT_GROWTH_THRESHOLD,
 135     DEFAULT_GROWTH_FACTOR,
 136     false
 137   };
 138
 139 /* Information and lookup.  */
 140
 141 /* The following few functions provide information about the overall hash
 142    table organization: the number of entries, number of buckets and maximum
 143    length of buckets.  */
 144
 145 /* Return the number of buckets in the hash table.  The table size, the total
 146    number of buckets (used plus unused), or the maximum number of slots, are
 147    the same quantity.  */
 148
 149 size_t
 150 hash_get_n_buckets (const Hash_table *table)
 151 {
 152   return table->n_buckets;
 153 }
 154
 155 /* Return the number of slots in use (non-empty buckets).  */
 156
 157 size_t
 158 hash_get_n_buckets_used (const Hash_table *table)
 159 {
 160   return table->n_buckets_used;
 161 }
 162
 163 /* Return the number of active entries.  */
 164
 165 size_t
 166 hash_get_n_entries (const Hash_table *table)
 167 {
 168   return table->n_entries;
 169 }
 170
 171 /* Return the length of the longest chain (bucket).  */
 172
 173 size_t
 174 hash_get_max_bucket_length (const Hash_table *table)
 175 {
 176   struct hash_entry const *bucket;
 177   size_t max_bucket_length = 0;
 178
 179   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 180     {
 181       if (bucket->data)
 182         {
 183           struct hash_entry const *cursor = bucket;
 184           size_t bucket_length = 1;
 185
 186           while (cursor = cursor->next, cursor)
 187             bucket_length++;
 188
 189           if (bucket_length > max_bucket_length)
 190             max_bucket_length = bucket_length;
 191         }
 192     }
 193
 194   return max_bucket_length;
 195 }
 196
 197 /* Do a mild validation of a hash table, by traversing it and checking two
 198    statistics.  */
 199
 200 bool
 201 hash_table_ok (const Hash_table *table)
 202 {
 203   struct hash_entry const *bucket;
 204   size_t n_buckets_used = 0;
 205   size_t n_entries = 0;
 206
 207   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 208     {
 209       if (bucket->data)
 210         {
 211           struct hash_entry const *cursor = bucket;
 212
 213           /* Count bucket head.  */
 214           n_buckets_used++;
 215           n_entries++;
 216
 217           /* Count bucket overflow.  */
 218           while (cursor = cursor->next, cursor)
 219             n_entries++;
 220         }
 221     }
 222
 223   if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
 224     return true;
 225
 226   return false;
 227 }
 228
 229 void
 230 hash_print_statistics (const Hash_table *table, FILE *stream)
 231 {
 232   size_t n_entries = hash_get_n_entries (table);
 233   size_t n_buckets = hash_get_n_buckets (table);
 234   size_t n_buckets_used = hash_get_n_buckets_used (table);
 235   size_t max_bucket_length = hash_get_max_bucket_length (table);
 236
 237   fprintf (stream, "# entries:         %lu\n", (unsigned long int) n_entries);
 238   fprintf (stream, "# buckets:         %lu\n", (unsigned long int) n_buckets);
 239   fprintf (stream, "# buckets used:    %lu (%.2f%%)\n",
 240            (unsigned long int) n_buckets_used,
 241            (100.0 * n_buckets_used) / n_buckets);
 242   fprintf (stream, "max bucket length: %lu\n",
 243            (unsigned long int) max_bucket_length);
 244 }
 245
 246 /* Hash KEY and return a pointer to the selected bucket.
 247    If TABLE->hasher misbehaves, abort.  */
 248 static struct hash_entry *
 249 safe_hasher (const Hash_table *table, const void *key)
 250 {
 251   size_t n = table->hasher (key, table->n_buckets);
 252   if (! (n < table->n_buckets))
 253     abort ();
 254   return table->bucket + n;
 255 }
 256
 257 /* If ENTRY matches an entry already in the hash table, return the
 258    entry from the table.  Otherwise, return NULL.  */
 259
 260 void *
 261 hash_lookup (const Hash_table *table, const void *entry)
 262 {
 263   struct hash_entry const *bucket = safe_hasher (table, entry);
 264   struct hash_entry const *cursor;
 265
 266   if (bucket->data == NULL)
 267     return NULL;
 268
 269   for (cursor = bucket; cursor; cursor = cursor->next)
 270     if (entry == cursor->data || table->comparator (entry, cursor->data))
 271       return cursor->data;
 272
 273   return NULL;
 274 }
 275
 276 /* Walking.  */
 277
 278 /* The functions in this page traverse the hash table and process the
 279    contained entries.  For the traversal to work properly, the hash table
 280    should not be resized nor modified while any particular entry is being
 281    processed.  In particular, entries should not be added, and an entry
 282    may be removed only if there is no shrink threshold and the entry being
 283    removed has already been passed to hash_get_next.  */
 284
 285 /* Return the first data in the table, or NULL if the table is empty.  */
 286
 287 void *
 288 hash_get_first (const Hash_table *table)
 289 {
 290   struct hash_entry const *bucket;
 291
 292   if (table->n_entries == 0)
 293     return NULL;
 294
 295   for (bucket = table->bucket; ; bucket++)
 296     if (! (bucket < table->bucket_limit))
 297       abort ();
 298     else if (bucket->data)
 299       return bucket->data;
 300 }
 301
 302 /* Return the user data for the entry following ENTRY, where ENTRY has been
 303    returned by a previous call to either 'hash_get_first' or 'hash_get_next'.
 304    Return NULL if there are no more entries.  */
 305
 306 void *
 307 hash_get_next (const Hash_table *table, const void *entry)
 308 {
 309   struct hash_entry const *bucket = safe_hasher (table, entry);
 310   struct hash_entry const *cursor;
 311
 312   /* Find next entry in the same bucket.  */
 313   cursor = bucket;
 314   do
 315     {
 316       if (cursor->data == entry && cursor->next)
 317         return cursor->next->data;
 318       cursor = cursor->next;
 319     }
 320   while (cursor != NULL);
 321
 322   /* Find first entry in any subsequent bucket.  */
 323   while (++bucket < table->bucket_limit)
 324     if (bucket->data)
 325       return bucket->data;
 326
 327   /* None found.  */
 328   return NULL;
 329 }
 330
 331 /* Fill BUFFER with pointers to active user entries in the hash table, then
 332    return the number of pointers copied.  Do not copy more than BUFFER_SIZE
 333    pointers.  */
 334
 335 size_t
 336 hash_get_entries (const Hash_table *table, void **buffer,
 337                   size_t buffer_size)
 338 {
 339   size_t counter = 0;
 340   struct hash_entry const *bucket;
 341   struct hash_entry const *cursor;
 342
 343   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 344     {
 345       if (bucket->data)
 346         {
 347           for (cursor = bucket; cursor; cursor = cursor->next)
 348             {
 349               if (counter >= buffer_size)
 350                 return counter;
 351               buffer[counter++] = cursor->data;
 352             }
 353         }
 354     }
 355
 356   return counter;
 357 }
 358
 359 /* Call a PROCESSOR function for each entry of a hash table, and return the
 360    number of entries for which the processor function returned success.  A
 361    pointer to some PROCESSOR_DATA which will be made available to each call to
 362    the processor function.  The PROCESSOR accepts two arguments: the first is
 363    the user entry being walked into, the second is the value of PROCESSOR_DATA
 364    as received.  The walking continue for as long as the PROCESSOR function
 365    returns nonzero.  When it returns zero, the walking is interrupted.  */
 366
 367 size_t
 368 hash_do_for_each (const Hash_table *table, Hash_processor processor,
 369                   void *processor_data)
 370 {
 371   size_t counter = 0;
 372   struct hash_entry const *bucket;
 373   struct hash_entry const *cursor;
 374
 375   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 376     {
 377       if (bucket->data)
 378         {
 379           for (cursor = bucket; cursor; cursor = cursor->next)
 380             {
 381               if (! processor (cursor->data, processor_data))
 382                 return counter;
 383               counter++;
 384             }
 385         }
 386     }
 387
 388   return counter;
 389 }
 390
 391 /* Allocation and clean-up.  */
 392
 393 /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
 394    This is a convenience routine for constructing other hashing functions.  */
 395
 396 #if USE_DIFF_HASH
 397
 398 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
 399    B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
 400    Software--practice & experience 20, 2 (Feb 1990), 209-224.  Good hash
 401    algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
 402    may not be good for your application."  */
 403
 404 size_t
 405 hash_string (const char *string, size_t n_buckets)
 406 {
 407 # define HASH_ONE_CHAR(Value, Byte) \
 408   ((Byte) + rotl_sz (Value, 7))
 409
 410   size_t value = 0;
 411   unsigned char ch;
 412
 413   for (; (ch = *string); string++)
 414     value = HASH_ONE_CHAR (value, ch);
 415   return value % n_buckets;
 416
 417 # undef HASH_ONE_CHAR
 418 }
 419
 420 #else /* not USE_DIFF_HASH */
 421
 422 /* This one comes from 'recode', and performs a bit better than the above as
 423    per a few experiments.  It is inspired from a hashing routine found in the
 424    very old Cyber 'snoop', itself written in typical Greg Mansfield style.
 425    (By the way, what happened to this excellent man?  Is he still alive?)  */
 426
 427 size_t
 428 hash_string (const char *string, size_t n_buckets)
 429 {
 430   size_t value = 0;
 431   unsigned char ch;
 432
 433   for (; (ch = *string); string++)
 434     value = (value * 31 + ch) % n_buckets;
 435   return value;
 436 }
 437
 438 #endif /* not USE_DIFF_HASH */
 439
 440 /* Return true if CANDIDATE is a prime number.  CANDIDATE should be an odd
 441    number at least equal to 11.  */
 442
 443 static bool _GL_ATTRIBUTE_CONST
 444 is_prime (size_t candidate)
 445 {
 446   size_t divisor = 3;
 447   size_t square = divisor * divisor;
 448
 449   while (square < candidate && (candidate % divisor))
 450     {
 451       divisor++;
 452       square += 4 * divisor;
 453       divisor++;
 454     }
 455
 456   return (candidate % divisor ? true : false);
 457 }
 458
 459 /* Round a given CANDIDATE number up to the nearest prime, and return that
 460    prime.  Primes lower than 10 are merely skipped.  */
 461
 462 static size_t _GL_ATTRIBUTE_CONST
 463 next_prime (size_t candidate)
 464 {
 465   /* Skip small primes.  */
 466   if (candidate < 10)
 467     candidate = 10;
 468
 469   /* Make it definitely odd.  */
 470   candidate |= 1;
 471
 472   while (SIZE_MAX != candidate && !is_prime (candidate))
 473     candidate += 2;
 474
 475   return candidate;
 476 }
 477
 478 void
 479 hash_reset_tuning (Hash_tuning *tuning)
 480 {
 481   *tuning = default_tuning;
 482 }
 483
 484 /* If the user passes a NULL hasher, we hash the raw pointer.  */
 485 static size_t
 486 raw_hasher (const void *data, size_t n)
 487 {
 488   /* When hashing unique pointers, it is often the case that they were
 489      generated by malloc and thus have the property that the low-order
 490      bits are 0.  As this tends to give poorer performance with small
 491      tables, we rotate the pointer value before performing division,
 492      in an attempt to improve hash quality.  */
 493   size_t val = rotr_sz ((size_t) data, 3);
 494   return val % n;
 495 }
 496
 497 /* If the user passes a NULL comparator, we use pointer comparison.  */
 498 static bool
 499 raw_comparator (const void *a, const void *b)
 500 {
 501   return a == b;
 502 }
 503
 504
 505 /* For the given hash TABLE, check the user supplied tuning structure for
 506    reasonable values, and return true if there is no gross error with it.
 507    Otherwise, definitively reset the TUNING field to some acceptable default
 508    in the hash table (that is, the user loses the right of further modifying
 509    tuning arguments), and return false.  */
 510
 511 static bool
 512 check_tuning (Hash_table *table)
 513 {
 514   const Hash_tuning *tuning = table->tuning;
 515   float epsilon;
 516   if (tuning == &default_tuning)
 517     return true;
 518
 519   /* Be a bit stricter than mathematics would require, so that
 520      rounding errors in size calculations do not cause allocations to
 521      fail to grow or shrink as they should.  The smallest allocation
 522      is 11 (due to next_prime's algorithm), so an epsilon of 0.1
 523      should be good enough.  */
 524   epsilon = 0.1f;
 525
 526   if (epsilon < tuning->growth_threshold
 527       && tuning->growth_threshold < 1 - epsilon
 528       && 1 + epsilon < tuning->growth_factor
 529       && 0 <= tuning->shrink_threshold
 530       && tuning->shrink_threshold + epsilon < tuning->shrink_factor
 531       && tuning->shrink_factor <= 1
 532       && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
 533     return true;
 534
 535   table->tuning = &default_tuning;
 536   return false;
 537 }
 538
 539 /* Compute the size of the bucket array for the given CANDIDATE and
 540    TUNING, or return 0 if there is no possible way to allocate that
 541    many entries.  */
 542
 543 static size_t _GL_ATTRIBUTE_PURE
 544 compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
 545 {
 546   if (!tuning->is_n_buckets)
 547     {
 548       float new_candidate = candidate / tuning->growth_threshold;
 549       if (SIZE_MAX <= new_candidate)
 550         return 0;
 551       candidate = new_candidate;
 552     }
 553   candidate = next_prime (candidate);
 554   if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))
 555     return 0;
 556   return candidate;
 557 }
 558
 559 /* Allocate and return a new hash table, or NULL upon failure.  The initial
 560    number of buckets is automatically selected so as to _guarantee_ that you
 561    may insert at least CANDIDATE different user entries before any growth of
 562    the hash table size occurs.  So, if have a reasonably tight a-priori upper
 563    bound on the number of entries you intend to insert in the hash table, you
 564    may save some table memory and insertion time, by specifying it here.  If
 565    the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
 566    argument has its meaning changed to the wanted number of buckets.
 567
 568    TUNING points to a structure of user-supplied values, in case some fine
 569    tuning is wanted over the default behavior of the hasher.  If TUNING is
 570    NULL, the default tuning parameters are used instead.  If TUNING is
 571    provided but the values requested are out of bounds or might cause
 572    rounding errors, return NULL.
 573
 574    The user-supplied HASHER function, when not NULL, accepts two
 575    arguments ENTRY and TABLE_SIZE.  It computes, by hashing ENTRY contents, a
 576    slot number for that entry which should be in the range 0..TABLE_SIZE-1.
 577    This slot number is then returned.
 578
 579    The user-supplied COMPARATOR function, when not NULL, accepts two
 580    arguments pointing to user data, it then returns true for a pair of entries
 581    that compare equal, or false otherwise.  This function is internally called
 582    on entries which are already known to hash to the same bucket index,
 583    but which are distinct pointers.
 584
 585    The user-supplied DATA_FREER function, when not NULL, may be later called
 586    with the user data as an argument, just before the entry containing the
 587    data gets freed.  This happens from within 'hash_free' or 'hash_clear'.
 588    You should specify this function only if you want these functions to free
 589    all of your 'data' data.  This is typically the case when your data is
 590    simply an auxiliary struct that you have malloc'd to aggregate several
 591    values.  */
 592
 593 Hash_table *
 594 hash_initialize (size_t candidate, const Hash_tuning *tuning,
 595                  Hash_hasher hasher, Hash_comparator comparator,
 596                  Hash_data_freer data_freer)
 597 {
 598   Hash_table *table;
 599
 600   if (hasher == NULL)
 601     hasher = raw_hasher;
 602   if (comparator == NULL)
 603     comparator = raw_comparator;
 604
 605   table = malloc (sizeof *table);
 606   if (table == NULL)
 607     return NULL;
 608
 609   if (!tuning)
 610     tuning = &default_tuning;
 611   table->tuning = tuning;
 612   if (!check_tuning (table))
 613     {
 614       /* Fail if the tuning options are invalid.  This is the only occasion
 615          when the user gets some feedback about it.  Once the table is created,
 616          if the user provides invalid tuning options, we silently revert to
 617          using the defaults, and ignore further request to change the tuning
 618          options.  */
 619       goto fail;
 620     }
 621
 622   table->n_buckets = compute_bucket_size (candidate, tuning);
 623   if (!table->n_buckets)
 624     goto fail;
 625
 626   table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
 627   if (table->bucket == NULL)
 628     goto fail;
 629   table->bucket_limit = table->bucket + table->n_buckets;
 630   table->n_buckets_used = 0;
 631   table->n_entries = 0;
 632
 633   table->hasher = hasher;
 634   table->comparator = comparator;
 635   table->data_freer = data_freer;
 636
 637   table->free_entry_list = NULL;
 638 #if USE_OBSTACK
 639   obstack_init (&table->entry_stack);
 640 #endif
 641   return table;
 642
 643  fail:
 644   free (table);
 645   return NULL;
 646 }
 647
 648 /* Make all buckets empty, placing any chained entries on the free list.
 649    Apply the user-specified function data_freer (if any) to the datas of any
 650    affected entries.  */
 651
 652 void
 653 hash_clear (Hash_table *table)
 654 {
 655   struct hash_entry *bucket;
 656
 657   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 658     {
 659       if (bucket->data)
 660         {
 661           struct hash_entry *cursor;
 662           struct hash_entry *next;
 663
 664           /* Free the bucket overflow.  */
 665           for (cursor = bucket->next; cursor; cursor = next)
 666             {
 667               if (table->data_freer)
 668                 table->data_freer (cursor->data);
 669               cursor->data = NULL;
 670
 671               next = cursor->next;
 672               /* Relinking is done one entry at a time, as it is to be expected
 673                  that overflows are either rare or short.  */
 674               cursor->next = table->free_entry_list;
 675               table->free_entry_list = cursor;
 676             }
 677
 678           /* Free the bucket head.  */
 679           if (table->data_freer)
 680             table->data_freer (bucket->data);
 681           bucket->data = NULL;
 682           bucket->next = NULL;
 683         }
 684     }
 685
 686   table->n_buckets_used = 0;
 687   table->n_entries = 0;
 688 }
 689
 690 /* Reclaim all storage associated with a hash table.  If a data_freer
 691    function has been supplied by the user when the hash table was created,
 692    this function applies it to the data of each entry before freeing that
 693    entry.  */
 694
 695 void
 696 hash_free (Hash_table *table)
 697 {
 698   struct hash_entry *bucket;
 699   struct hash_entry *cursor;
 700   struct hash_entry *next;
 701
 702   /* Call the user data_freer function.  */
 703   if (table->data_freer && table->n_entries)
 704     {
 705       for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 706         {
 707           if (bucket->data)
 708             {
 709               for (cursor = bucket; cursor; cursor = cursor->next)
 710                 table->data_freer (cursor->data);
 711             }
 712         }
 713     }
 714
 715 #if USE_OBSTACK
 716
 717   obstack_free (&table->entry_stack, NULL);
 718
 719 #else
 720
 721   /* Free all bucket overflowed entries.  */
 722   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 723     {
 724       for (cursor = bucket->next; cursor; cursor = next)
 725         {
 726           next = cursor->next;
 727           free (cursor);
 728         }
 729     }
 730
 731   /* Also reclaim the internal list of previously freed entries.  */
 732   for (cursor = table->free_entry_list; cursor; cursor = next)
 733     {
 734       next = cursor->next;
 735       free (cursor);
 736     }
 737
 738 #endif
 739
 740   /* Free the remainder of the hash table structure.  */
 741   free (table->bucket);
 742   free (table);
 743 }
 744
 745 /* Insertion and deletion.  */
 746
 747 /* Get a new hash entry for a bucket overflow, possibly by recycling a
 748    previously freed one.  If this is not possible, allocate a new one.  */
 749
 750 static struct hash_entry *
 751 allocate_entry (Hash_table *table)
 752 {
 753   struct hash_entry *new;
 754
 755   if (table->free_entry_list)
 756     {
 757       new = table->free_entry_list;
 758       table->free_entry_list = new->next;
 759     }
 760   else
 761     {
 762 #if USE_OBSTACK
 763       new = obstack_alloc (&table->entry_stack, sizeof *new);
 764 #else
 765       new = malloc (sizeof *new);
 766 #endif
 767     }
 768
 769   return new;
 770 }
 771
 772 /* Free a hash entry which was part of some bucket overflow,
 773    saving it for later recycling.  */
 774
 775 static void
 776 free_entry (Hash_table *table, struct hash_entry *entry)
 777 {
 778   entry->data = NULL;
 779   entry->next = table->free_entry_list;
 780   table->free_entry_list = entry;
 781 }
 782
 783 /* This private function is used to help with insertion and deletion.  When
 784    ENTRY matches an entry in the table, return a pointer to the corresponding
 785    user data and set *BUCKET_HEAD to the head of the selected bucket.
 786    Otherwise, return NULL.  When DELETE is true and ENTRY matches an entry in
 787    the table, unlink the matching entry.  */
 788
 789 static void *
 790 hash_find_entry (Hash_table *table, const void *entry,
 791                  struct hash_entry **bucket_head, bool delete)
 792 {
 793   struct hash_entry *bucket = safe_hasher (table, entry);
 794   struct hash_entry *cursor;
 795
 796   *bucket_head = bucket;
 797
 798   /* Test for empty bucket.  */
 799   if (bucket->data == NULL)
 800     return NULL;
 801
 802   /* See if the entry is the first in the bucket.  */
 803   if (entry == bucket->data || table->comparator (entry, bucket->data))
 804     {
 805       void *data = bucket->data;
 806
 807       if (delete)
 808         {
 809           if (bucket->next)
 810             {
 811               struct hash_entry *next = bucket->next;
 812
 813               /* Bump the first overflow entry into the bucket head, then save
 814                  the previous first overflow entry for later recycling.  */
 815               *bucket = *next;
 816               free_entry (table, next);
 817             }
 818           else
 819             {
 820               bucket->data = NULL;
 821             }
 822         }
 823
 824       return data;
 825     }
 826
 827   /* Scan the bucket overflow.  */
 828   for (cursor = bucket; cursor->next; cursor = cursor->next)
 829     {
 830       if (entry == cursor->next->data
 831           || table->comparator (entry, cursor->next->data))
 832         {
 833           void *data = cursor->next->data;
 834
 835           if (delete)
 836             {
 837               struct hash_entry *next = cursor->next;
 838
 839               /* Unlink the entry to delete, then save the freed entry for later
 840                  recycling.  */
 841               cursor->next = next->next;
 842               free_entry (table, next);
 843             }
 844
 845           return data;
 846         }
 847     }
 848
 849   /* No entry found.  */
 850   return NULL;
 851 }
 852
 853 /* Internal helper, to move entries from SRC to DST.  Both tables must
 854    share the same free entry list.  If SAFE, only move overflow
 855    entries, saving bucket heads for later, so that no allocations will
 856    occur.  Return false if the free entry list is exhausted and an
 857    allocation fails.  */
 858
 859 static bool
 860 transfer_entries (Hash_table *dst, Hash_table *src, bool safe)
 861 {
 862   struct hash_entry *bucket;
 863   struct hash_entry *cursor;
 864   struct hash_entry *next;
 865   for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
 866     if (bucket->data)
 867       {
 868         void *data;
 869         struct hash_entry *new_bucket;
 870
 871         /* Within each bucket, transfer overflow entries first and
 872            then the bucket head, to minimize memory pressure.  After
 873            all, the only time we might allocate is when moving the
 874            bucket head, but moving overflow entries first may create
 875            free entries that can be recycled by the time we finally
 876            get to the bucket head.  */
 877         for (cursor = bucket->next; cursor; cursor = next)
 878           {
 879             data = cursor->data;
 880             new_bucket = safe_hasher (dst, data);
 881
 882             next = cursor->next;
 883
 884             if (new_bucket->data)
 885               {
 886                 /* Merely relink an existing entry, when moving from a
 887                    bucket overflow into a bucket overflow.  */
 888                 cursor->next = new_bucket->next;
 889                 new_bucket->next = cursor;
 890               }
 891             else
 892               {
 893                 /* Free an existing entry, when moving from a bucket
 894                    overflow into a bucket header.  */
 895                 new_bucket->data = data;
 896                 dst->n_buckets_used++;
 897                 free_entry (dst, cursor);
 898               }
 899           }
 900         /* Now move the bucket head.  Be sure that if we fail due to
 901            allocation failure that the src table is in a consistent
 902            state.  */
 903         data = bucket->data;
 904         bucket->next = NULL;
 905         if (safe)
 906           continue;
 907         new_bucket = safe_hasher (dst, data);
 908
 909         if (new_bucket->data)
 910           {
 911             /* Allocate or recycle an entry, when moving from a bucket
 912                header into a bucket overflow.  */
 913             struct hash_entry *new_entry = allocate_entry (dst);
 914
 915             if (new_entry == NULL)
 916               return false;
 917
 918             new_entry->data = data;
 919             new_entry->next = new_bucket->next;
 920             new_bucket->next = new_entry;
 921           }
 922         else
 923           {
 924             /* Move from one bucket header to another.  */
 925             new_bucket->data = data;
 926             dst->n_buckets_used++;
 927           }
 928         bucket->data = NULL;
 929         src->n_buckets_used--;
 930       }
 931   return true;
 932 }
 933
 934 /* For an already existing hash table, change the number of buckets through
 935    specifying CANDIDATE.  The contents of the hash table are preserved.  The
 936    new number of buckets is automatically selected so as to _guarantee_ that
 937    the table may receive at least CANDIDATE different user entries, including
 938    those already in the table, before any other growth of the hash table size
 939    occurs.  If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
 940    exact number of buckets desired.  Return true iff the rehash succeeded.  */
 941
 942 bool
 943 hash_rehash (Hash_table *table, size_t candidate)
 944 {
 945   Hash_table storage;
 946   Hash_table *new_table;
 947   size_t new_size = compute_bucket_size (candidate, table->tuning);
 948
 949   if (!new_size)
 950     return false;
 951   if (new_size == table->n_buckets)
 952     return true;
 953   new_table = &storage;
 954   new_table->bucket = calloc (new_size, sizeof *new_table->bucket);
 955   if (new_table->bucket == NULL)
 956     return false;
 957   new_table->n_buckets = new_size;
 958   new_table->bucket_limit = new_table->bucket + new_size;
 959   new_table->n_buckets_used = 0;
 960   new_table->n_entries = 0;
 961   new_table->tuning = table->tuning;
 962   new_table->hasher = table->hasher;
 963   new_table->comparator = table->comparator;
 964   new_table->data_freer = table->data_freer;
 965
 966   /* In order for the transfer to successfully complete, we need
 967      additional overflow entries when distinct buckets in the old
 968      table collide into a common bucket in the new table.  The worst
 969      case possible is a hasher that gives a good spread with the old
 970      size, but returns a constant with the new size; if we were to
 971      guarantee table->n_buckets_used-1 free entries in advance, then
 972      the transfer would be guaranteed to not allocate memory.
 973      However, for large tables, a guarantee of no further allocation
 974      introduces a lot of extra memory pressure, all for an unlikely
 975      corner case (most rehashes reduce, rather than increase, the
 976      number of overflow entries needed).  So, we instead ensure that
 977      the transfer process can be reversed if we hit a memory
 978      allocation failure mid-transfer.  */
 979
 980   /* Merely reuse the extra old space into the new table.  */
 981 #if USE_OBSTACK
 982   new_table->entry_stack = table->entry_stack;
 983 #endif
 984   new_table->free_entry_list = table->free_entry_list;
 985
 986   if (transfer_entries (new_table, table, false))
 987     {
 988       /* Entries transferred successfully; tie up the loose ends.  */
 989       free (table->bucket);
 990       table->bucket = new_table->bucket;
 991       table->bucket_limit = new_table->bucket_limit;
 992       table->n_buckets = new_table->n_buckets;
 993       table->n_buckets_used = new_table->n_buckets_used;
 994       table->free_entry_list = new_table->free_entry_list;
 995       /* table->n_entries and table->entry_stack already hold their value.  */
 996       return true;
 997     }
 998
 999   /* We've allocated new_table->bucket (and possibly some entries),
1000      exhausted the free list, and moved some but not all entries into
1001      new_table.  We must undo the partial move before returning
1002      failure.  The only way to get into this situation is if new_table
1003      uses fewer buckets than the old table, so we will reclaim some
1004      free entries as overflows in the new table are put back into
1005      distinct buckets in the old table.
1006
1007      There are some pathological cases where a single pass through the
1008      table requires more intermediate overflow entries than using two
1009      passes.  Two passes give worse cache performance and takes
1010      longer, but at this point, we're already out of memory, so slow
1011      and safe is better than failure.  */
1012   table->free_entry_list = new_table->free_entry_list;
1013   if (! (transfer_entries (table, new_table, true)
1014          && transfer_entries (table, new_table, false)))
1015     abort ();
1016   /* table->n_entries already holds its value.  */
1017   free (new_table->bucket);
1018   return false;
1019 }
1020
1021 /* Insert ENTRY into hash TABLE if there is not already a matching entry.
1022
1023    Return -1 upon memory allocation failure.
1024    Return 1 if insertion succeeded.
1025    Return 0 if there is already a matching entry in the table,
1026    and in that case, if MATCHED_ENT is non-NULL, set *MATCHED_ENT
1027    to that entry.
1028
1029    This interface is easier to use than hash_insert when you must
1030    distinguish between the latter two cases.  More importantly,
1031    hash_insert is unusable for some types of ENTRY values.  When using
1032    hash_insert, the only way to distinguish those cases is to compare
1033    the return value and ENTRY.  That works only when you can have two
1034    different ENTRY values that point to data that compares "equal".  Thus,
1035    when the ENTRY value is a simple scalar, you must use
1036    hash_insert_if_absent.  ENTRY must not be NULL.  */
1037 int
1038 hash_insert_if_absent (Hash_table *table, void const *entry,
1039                        void const **matched_ent)
1040 {
1041   void *data;
1042   struct hash_entry *bucket;
1043
1044   /* The caller cannot insert a NULL entry, since hash_lookup returns NULL
1045      to indicate "not found", and hash_find_entry uses "bucket->data == NULL"
1046      to indicate an empty bucket.  */
1047   if (! entry)
1048     abort ();
1049
1050   /* If there's a matching entry already in the table, return that.  */
1051   if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
1052     {
1053       if (matched_ent)
1054         *matched_ent = data;
1055       return 0;
1056     }
1057
1058   /* If the growth threshold of the buckets in use has been reached, increase
1059      the table size and rehash.  There's no point in checking the number of
1060      entries:  if the hashing function is ill-conditioned, rehashing is not
1061      likely to improve it.  */
1062
1063   if (table->n_buckets_used
1064       > table->tuning->growth_threshold * table->n_buckets)
1065     {
1066       /* Check more fully, before starting real work.  If tuning arguments
1067          became invalid, the second check will rely on proper defaults.  */
1068       check_tuning (table);
1069       if (table->n_buckets_used
1070           > table->tuning->growth_threshold * table->n_buckets)
1071         {
1072           const Hash_tuning *tuning = table->tuning;
1073           float candidate =
1074             (tuning->is_n_buckets
1075              ? (table->n_buckets * tuning->growth_factor)
1076              : (table->n_buckets * tuning->growth_factor
1077                 * tuning->growth_threshold));
1078
1079           if (SIZE_MAX <= candidate)
1080             return -1;
1081
1082           /* If the rehash fails, arrange to return NULL.  */
1083           if (!hash_rehash (table, candidate))
1084             return -1;
1085
1086           /* Update the bucket we are interested in.  */
1087           if (hash_find_entry (table, entry, &bucket, false) != NULL)
1088             abort ();
1089         }
1090     }
1091
1092   /* ENTRY is not matched, it should be inserted.  */
1093
1094   if (bucket->data)
1095     {
1096       struct hash_entry *new_entry = allocate_entry (table);
1097
1098       if (new_entry == NULL)
1099         return -1;
1100
1101       /* Add ENTRY in the overflow of the bucket.  */
1102
1103       new_entry->data = (void *) entry;
1104       new_entry->next = bucket->next;
1105       bucket->next = new_entry;
1106       table->n_entries++;
1107       return 1;
1108     }
1109
1110   /* Add ENTRY right in the bucket head.  */
1111
1112   bucket->data = (void *) entry;
1113   table->n_entries++;
1114   table->n_buckets_used++;
1115
1116   return 1;
1117 }
1118
1119 /* If ENTRY matches an entry already in the hash table, return the pointer
1120    to the entry from the table.  Otherwise, insert ENTRY and return ENTRY.
1121    Return NULL if the storage required for insertion cannot be allocated.
1122    This implementation does not support duplicate entries or insertion of
1123    NULL.  */
1124
1125 void *
1126 hash_insert (Hash_table *table, void const *entry)
1127 {
1128   void const *matched_ent;
1129   int err = hash_insert_if_absent (table, entry, &matched_ent);
1130   return (err == -1
1131           ? NULL
1132           : (void *) (err == 0 ? matched_ent : entry));
1133 }
1134
1135 /* If ENTRY is already in the table, remove it and return the just-deleted
1136    data (the user may want to deallocate its storage).  If ENTRY is not in the
1137    table, don't modify the table and return NULL.  */
1138
1139 void *
1140 hash_delete (Hash_table *table, const void *entry)
1141 {
1142   void *data;
1143   struct hash_entry *bucket;
1144
1145   data = hash_find_entry (table, entry, &bucket, true);
1146   if (!data)
1147     return NULL;
1148
1149   table->n_entries--;
1150   if (!bucket->data)
1151     {
1152       table->n_buckets_used--;
1153
1154       /* If the shrink threshold of the buckets in use has been reached,
1155          rehash into a smaller table.  */
1156
1157       if (table->n_buckets_used
1158           < table->tuning->shrink_threshold * table->n_buckets)
1159         {
1160           /* Check more fully, before starting real work.  If tuning arguments
1161              became invalid, the second check will rely on proper defaults.  */
1162           check_tuning (table);
1163           if (table->n_buckets_used
1164               < table->tuning->shrink_threshold * table->n_buckets)
1165             {
1166               const Hash_tuning *tuning = table->tuning;
1167               size_t candidate =
1168                 (tuning->is_n_buckets
1169                  ? table->n_buckets * tuning->shrink_factor
1170                  : (table->n_buckets * tuning->shrink_factor
1171                     * tuning->growth_threshold));
1172
1173               if (!hash_rehash (table, candidate))
1174                 {
1175                   /* Failure to allocate memory in an attempt to
1176                      shrink the table is not fatal.  But since memory
1177                      is low, we can at least be kind and free any
1178                      spare entries, rather than keeping them tied up
1179                      in the free entry list.  */
1180 #if ! USE_OBSTACK
1181                   struct hash_entry *cursor = table->free_entry_list;
1182                   struct hash_entry *next;
1183                   while (cursor)
1184                     {
1185                       next = cursor->next;
1186                       free (cursor);
1187                       cursor = next;
1188                     }
1189                   table->free_entry_list = NULL;
1190 #endif
1191                 }
1192             }
1193         }
1194     }
1195
1196   return data;
1197 }
1198
1199 /* Testing.  */
1200
1201 #if TESTING
1202
1203 void
1204 hash_print (const Hash_table *table)
1205 {
1206   struct hash_entry *bucket = (struct hash_entry *) table->bucket;
1207
1208   for ( ; bucket < table->bucket_limit; bucket++)
1209     {
1210       struct hash_entry *cursor;
1211
1212       if (bucket)
1213         printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1214
1215       for (cursor = bucket; cursor; cursor = cursor->next)
1216         {
1217           char const *s = cursor->data;
1218           /* FIXME */
1219           if (s)
1220             printf ("  %s\n", s);
1221         }
1222     }
1223 }
1224
1225 #endif /* TESTING */