m4/hash.c

   1 /* GNU m4 -- A simple macro processor
   2    Copyright (C) 2001, 2006-2010, 2013-2014, 2017 Free Software
   3    Foundation, Inc.
   4    Written by Gary V. Vaughan <gary@gnu.org>
   5
   6    This file is part of GNU M4.
   7
   8    GNU M4 is free software: you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation, either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    GNU M4 is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  20 */
  21
  22 /* TODO:
  23    - Use an obstack to manage the node memory.
  24    - Implement the macroized magic values with the API.
  25  */
  26
  27 #include <config.h>
  28
  29 #include "hash.h"
  30 #include "m4private.h"
  31
  32 #include "bitrotate.h"
  33 #include <limits.h>
  34
  35 typedef struct hash_node hash_node;
  36
  37 struct m4_hash
  38 {
  39   size_t size;                  /* number of buckets allocated */
  40   size_t length;                /* number of elements inserted */
  41   m4_hash_hash_func *hash_func;
  42   m4_hash_cmp_func *cmp_func;
  43   hash_node **buckets;
  44 #ifndef NDEBUG
  45   m4_hash_iterator *iter;       /* current iterator */
  46 #endif
  47 };
  48
  49 struct hash_node
  50 {
  51   hash_node *next;
  52   const void *key;
  53   void *value;
  54 };
  55
  56
  57 struct m4_hash_iterator
  58 {
  59   const m4_hash *hash;          /* contains the buckets */
  60   hash_node *   place;          /* the node we are about to return */
  61   hash_node *   next;           /* the next node, incase PLACE is removed */
  62   size_t        next_bucket;    /* the next bucket index following NEXT */
  63 #ifndef NDEBUG
  64   m4_hash_iterator *chain;      /* multiple iterators visiting one hash */
  65 #endif
  66 };
  67
  68 \f
  69 #define HASH_SIZE(hash)         ((hash)->size)
  70 #define HASH_LENGTH(hash)       ((hash)->length)
  71 #define HASH_BUCKETS(hash)      ((hash)->buckets)
  72 #define HASH_HASH_FUNC(hash)    ((hash)->hash_func)
  73 #define HASH_CMP_FUNC(hash)     ((hash)->cmp_func)
  74
  75 #define NODE_NEXT(node)         ((node)->next)
  76 #define NODE_KEY(node)          ((node)->key)
  77 #define NODE_VALUE(node)        ((node)->value)
  78
  79 #define ITERATOR_HASH(i)        ((i)->hash)
  80 #define ITERATOR_PLACE(i)       ((i)->place)
  81 #define ITERATOR_NEXT(i)        ((i)->next)
  82 #define ITERATOR_NEXT_BUCKET(i) ((i)->next_bucket)
  83
  84 /*#define ITERATOR_NEXT_NEXT(i)   NODE_NEXT (ITERATOR_PLACE (i))*/
  85
  86 /* Helper macros. */
  87 #define BUCKET_NTH(hash, n)     (HASH_BUCKETS (hash)[n])
  88 #define BUCKET_COUNT(hash, key)                                 \
  89         ((*HASH_HASH_FUNC (hash))(key) % HASH_SIZE (hash))
  90 #define BUCKET_KEY(hash, key)                                   \
  91         (BUCKET_NTH ((hash), BUCKET_COUNT ((hash), (key))))
  92
  93 /* Debugging macros.  */
  94 #ifdef NDEBUG
  95 # define HASH_ITER(hash)        0
  96 # define ITER_CHAIN(iter)       0
  97 #else
  98 # define HASH_ITER(hash)        (((m4_hash *) hash)->iter)
  99 # define ITER_CHAIN(iter)       ((iter)->chain)
 100 #endif
 101
 102 \f
 103 static void             bucket_insert   (m4_hash *hash, hash_node *bucket);
 104 static void             bucket_delete   (m4_hash *hash, size_t i);
 105 static hash_node *      node_new        (const void *key, void *value);
 106 static void             node_insert     (m4_hash *hash, hash_node *node);
 107 static hash_node *      node_lookup     (m4_hash *hash, const void *key);
 108 static void             node_delete     (m4_hash *hash, hash_node *node);
 109 static void             maybe_grow      (m4_hash *hash);
 110
 111
 112 \f
 113 static hash_node *free_list = NULL;
 114
 115
 116 \f
 117 /* Allocate and return a new, unpopulated but initialised m4_hash with
 118    SIZE buckets, where HASH_FUNC will be used to generate bucket numbers
 119    and CMP_FUNC will be called to compare keys.  */
 120 m4_hash *
 121 m4_hash_new (size_t size, m4_hash_hash_func *hash_func,
 122              m4_hash_cmp_func *cmp_func)
 123 {
 124   m4_hash *hash;
 125
 126   assert (hash_func);
 127   assert (cmp_func);
 128
 129   if (size == 0)
 130     size = M4_HASH_DEFAULT_SIZE;
 131
 132   hash                  = (m4_hash *) xmalloc (sizeof *hash);
 133   HASH_SIZE (hash)      = size;
 134   HASH_LENGTH (hash)    = 0;
 135   HASH_BUCKETS (hash)   = (hash_node **) xcalloc (size,
 136                                                   sizeof *HASH_BUCKETS (hash));
 137   HASH_HASH_FUNC (hash) = hash_func;
 138   HASH_CMP_FUNC (hash)  = cmp_func;
 139 #ifndef NDEBUG
 140   HASH_ITER (hash)      = NULL;
 141 #endif
 142
 143   return hash;
 144 }
 145
 146 m4_hash *
 147 m4_hash_dup (m4_hash *src, m4_hash_copy_func *copy)
 148 {
 149   m4_hash *dest;
 150
 151   assert (src);
 152   assert (copy);
 153
 154   dest = m4_hash_new (HASH_SIZE (src), HASH_HASH_FUNC (src),
 155                       HASH_CMP_FUNC (src));
 156
 157   m4_hash_apply (src, (m4_hash_apply_func *) copy, dest);
 158
 159   return dest;
 160 }
 161
 162 /* Recycle each of the nodes in HASH onto the free list, and release
 163    the rest of the memory used by the table.  Memory addressed by the
 164    recycled nodes is _NOT_ freed: this needs to be done manually to
 165    prevent memory leaks.  This is not safe to call while HASH is being
 166    iterated.  */
 167 void
 168 m4_hash_delete (m4_hash *hash)
 169 {
 170   size_t i;
 171
 172   assert (hash);
 173   assert (!HASH_ITER (hash));
 174
 175   for (i = 0; i < HASH_SIZE (hash); ++i)
 176     if (BUCKET_NTH (hash, i))
 177       bucket_delete (hash, i);
 178   free (HASH_BUCKETS (hash));
 179   free (hash);
 180 }
 181
 182 /* Check that the nodes in bucket I have been cleared, and recycle
 183    each of the nodes in the bucket to the free list.  Bucket I must
 184    not be empty when this function is called.  */
 185 static void
 186 bucket_delete (m4_hash *hash, size_t i)
 187 {
 188   hash_node *node;
 189
 190   assert (hash);
 191   assert (BUCKET_NTH (hash, i));
 192   assert (i < HASH_SIZE (hash));
 193
 194   for (node = BUCKET_NTH (hash, i); node->next; node = NODE_NEXT (node))
 195     {
 196       assert (NODE_KEY (node) == NULL);
 197       --HASH_LENGTH (hash);
 198     }
 199
 200   assert (NODE_KEY (node) == NULL);
 201   --HASH_LENGTH (hash);
 202
 203   NODE_NEXT (node)      = free_list;
 204   free_list             = BUCKET_NTH (hash, i);
 205   BUCKET_NTH (hash, i)  = NULL;
 206 }
 207
 208 /* Create and initialise a new node with KEY and VALUE, by reusing a
 209    node from the free list if possible.  */
 210 static hash_node *
 211 node_new (const void *key, void *value)
 212 {
 213   hash_node *node = NULL;
 214
 215   if (free_list)
 216     {
 217       node = free_list;
 218       free_list = NODE_NEXT (free_list);
 219     }
 220   else
 221     node = (hash_node *) xmalloc (sizeof *node);
 222
 223   assert (node);
 224
 225   NODE_NEXT  (node)     = NULL;
 226   NODE_KEY   (node)     = key;
 227   NODE_VALUE (node)     = value;
 228
 229   return node;
 230 }
 231
 232 /* Check that NODE has been cleared, and recycle it to the free list.  */
 233 static void
 234 node_delete (m4_hash *hash, hash_node *node)
 235 {
 236   assert (node);
 237   assert (NODE_KEY (node) == NULL);
 238
 239   NODE_NEXT (node)      = free_list;
 240   free_list             = node;
 241
 242   --HASH_LENGTH (hash);
 243 }
 244
 245 /* Create a new entry in HASH with KEY and VALUE, making use of nodes
 246    in the free list if possible, and potentially growing the size of
 247    the table if node density is too high.  This is not safe to call
 248    while HASH is being iterated.  Currently, it is not safe to call
 249    this if another entry already matches KEY.  */
 250 const void *
 251 m4_hash_insert (m4_hash *hash, const void *key, void *value)
 252 {
 253   hash_node *node;
 254
 255   assert (hash);
 256   assert (!HASH_ITER (hash));
 257
 258   node = node_new (key, value);
 259   node_insert (hash, node);
 260   maybe_grow (hash);
 261
 262   return key;
 263 }
 264
 265 /* Push the unconnected NODE on to the front of the appropriate
 266    bucket, effectively preventing retrieval of other nodes with
 267    the same key (where "sameness" is determined by HASH's
 268    cmp_func).  */
 269 static void
 270 node_insert (m4_hash *hash, hash_node *node)
 271 {
 272   size_t n;
 273
 274   assert (hash);
 275   assert (node);
 276   assert (NODE_NEXT (node) == NULL);
 277
 278   n = BUCKET_COUNT (hash, NODE_KEY (node));
 279   NODE_NEXT (node)      = BUCKET_NTH (hash, n);
 280   BUCKET_NTH (hash, n)  = node;
 281
 282   ++HASH_LENGTH (hash);
 283 }
 284
 285 /* Remove from HASH, the first node with key KEY; comparing keys with
 286    HASH's cmp_func.  Any nodes with the same KEY previously hidden by
 287    the removed node will become visible again.  The key field of the
 288    removed node is returned, or NULL if there was no match.  This is
 289    unsafe if multiple iterators are visiting HASH, or when a lone
 290    iterator is visiting on a different key.  */
 291 void *
 292 m4_hash_remove (m4_hash *hash, const void *key)
 293 {
 294   size_t n;
 295   hash_node *node = NULL;
 296
 297 #ifndef NDEBUG
 298   m4_hash_iterator *iter = HASH_ITER (hash);
 299
 300   assert (hash);
 301   if (HASH_ITER (hash))
 302     {
 303       assert (!ITER_CHAIN (iter));
 304       assert (ITERATOR_PLACE (iter));
 305     }
 306 #endif
 307
 308   n = BUCKET_COUNT (hash, key);
 309   do
 310     {
 311       hash_node *next = node ? NODE_NEXT (node) : BUCKET_NTH (hash, n);
 312
 313       if (next && ((*HASH_CMP_FUNC (hash)) (NODE_KEY (next), key) == 0))
 314         {
 315           if (node)
 316             NODE_NEXT (node) = NODE_NEXT (next);
 317           else
 318             BUCKET_NTH (hash, n) = NODE_NEXT (next);
 319
 320           key = NODE_KEY (next);
 321 #ifndef NDEBUG
 322           if (iter)
 323             assert (ITERATOR_PLACE (iter) == next);
 324           NODE_KEY (next) = NULL;
 325 #endif
 326           node_delete (hash, next);
 327           return (void *) key; /* Cast away const.  */
 328         }
 329       node = next;
 330     }
 331   while (node);
 332
 333   return NULL;
 334 }
 335
 336 /* Return the address of the value field of the first node in HASH
 337    that has a matching KEY.  The address is returned so that an
 338    explicit NULL value can be distinguished from a failed lookup (also
 339    NULL).  Fortuitously for M4, this also means that the value field
 340    can be changed `in situ' to implement a value stack.  Safe to call
 341    even when an iterator is in force.  */
 342 void **
 343 m4_hash_lookup (m4_hash *hash, const void *key)
 344 {
 345   hash_node *node;
 346
 347   assert (hash);
 348
 349   node = node_lookup (hash, key);
 350
 351   return node ? &NODE_VALUE (node) : NULL;
 352 }
 353
 354 /* Return the first node in HASH that has a matching KEY.  */
 355 static hash_node *
 356 node_lookup (m4_hash *hash, const void *key)
 357 {
 358   hash_node *node;
 359
 360   assert (hash);
 361
 362   node = BUCKET_KEY (hash, key);
 363
 364   while (node && (*HASH_CMP_FUNC (hash)) (NODE_KEY (node), key))
 365     node = NODE_NEXT (node);
 366
 367   return node;
 368 }
 369
 370 /* How many entries are currently contained by HASH.  Safe to call
 371    even during an interation.  */
 372 size_t M4_GNUC_PURE
 373 m4_get_hash_length (m4_hash *hash)
 374 {
 375   assert (hash);
 376
 377   return HASH_LENGTH (hash);
 378 }
 379
 380 #if 0
 381 /* Force the number of buckets to be the given value.  You probably ought
 382    not to be using this function once the table has been in use, since
 383    the maximum density algorithm will grow the number of buckets back to
 384    what was there before if you try to shrink the table.  It is useful
 385    to set a smaller or larger initial size if you know in advance what
 386    order of magnitude of entries will be in the table.  Be aware that
 387    the efficiency of the lookup and grow features require that the size
 388    always be 1 less than a power of 2.  Unsafe if HASH is being visited
 389    by an iterator.  */
 390 void
 391 m4_hash_resize (m4_hash *hash, size_t size)
 392 {
 393   hash_node **original_buckets;
 394   size_t original_size;
 395
 396   assert (hash);
 397   assert (!HASH_ITER (hash));
 398
 399   original_size         = HASH_SIZE (hash);
 400   original_buckets      = HASH_BUCKETS (hash);
 401
 402   HASH_SIZE (hash)      = size;
 403   HASH_BUCKETS (hash)   = (hash_node **) xcalloc (size,
 404                                                   sizeof *HASH_BUCKETS (hash));
 405
 406   {
 407     size_t i;
 408     for (i = 0; i < original_size; ++i)
 409       if (original_buckets[i])
 410         bucket_insert (hash, original_buckets[i]);
 411   }
 412
 413   free (original_buckets);
 414 }
 415 #endif
 416
 417 /* If the node density breaks the threshold, increase the size of
 418    HASH and repopulate with the original nodes.  */
 419 static void
 420 maybe_grow (m4_hash *hash)
 421 {
 422   float nodes_per_bucket;
 423
 424   assert (hash);
 425
 426   nodes_per_bucket = (float) HASH_LENGTH (hash) / (float) HASH_SIZE (hash);
 427
 428   if (nodes_per_bucket > (float) M4_HASH_MAXIMUM_DENSITY)
 429     {
 430       size_t original_size = HASH_SIZE (hash);
 431       hash_node **original_buckets = HASH_BUCKETS (hash);
 432
 433       /* HASH sizes are always 1 less than a power of 2.  */
 434       HASH_SIZE (hash)    = (2 * (1 + original_size)) -1;
 435       HASH_BUCKETS (hash) =
 436         (hash_node **) xcalloc (HASH_SIZE (hash), sizeof *HASH_BUCKETS (hash));
 437
 438       {
 439         size_t i;
 440         for (i = 0; i < original_size; ++i)
 441           if (original_buckets[i])
 442             bucket_insert (hash, original_buckets[i]);
 443       }
 444
 445       free (original_buckets);
 446     }
 447 }
 448
 449 /* Insert each node in BUCKET into HASH.  Relative ordering of nodes
 450    is not preserved.  This would need to change if we were to
 451    guarantee relative ordering within a bucket for identical keys.  */
 452 static void
 453 bucket_insert (m4_hash *hash, hash_node *bucket)
 454 {
 455   assert (hash);
 456   assert (bucket);
 457
 458   do
 459     {
 460       hash_node *next = NODE_NEXT (bucket);
 461
 462       /* Break link to rest of the bucket before reinserting.  */
 463       NODE_NEXT (bucket) = NULL;
 464       node_insert (hash, bucket);
 465
 466       bucket = next;
 467     }
 468   while (bucket);
 469 }
 470
 471 /* Reclaim all memory used by free nodes.  Safe to call at any time,
 472    although only worth calling at program shutdown to verify no
 473    leaks.  */
 474 void
 475 m4_hash_exit (void)
 476 {
 477   while (free_list)
 478     {
 479       hash_node *stale = free_list;
 480       free_list = NODE_NEXT (stale);
 481       free (stale);
 482     }
 483 }
 484
 485
 486 \f
 487 /* Iterate over a given HASH.  Start with PLACE being NULL, then
 488    repeat with PLACE being the previous return value.  The return
 489    value is the current location of the iterator, or NULL when the
 490    walk is complete.  Call m4_free_hash_iterator to abort iteration.
 491    During the iteration, it is safe to search the list, and if no
 492    other iterator is active, it is safe to remove the key pointed to
 493    by this iterator.  All other actions that modify HASH are
 494    unsafe.  */
 495 m4_hash_iterator *
 496 m4_get_hash_iterator_next (const m4_hash *hash, m4_hash_iterator *place)
 497 {
 498   assert (hash);
 499   assert (!place || (ITERATOR_HASH (place) == hash));
 500
 501   /* On the first iteration, allocate an iterator.  */
 502   if (!place)
 503     {
 504       place = (m4_hash_iterator *) xzalloc (sizeof *place);
 505       ITERATOR_HASH (place) = hash;
 506 #ifndef NDEBUG
 507       ITER_CHAIN (place) = HASH_ITER (hash);
 508       HASH_ITER (hash) = place;
 509 #endif
 510     }
 511
 512  next:
 513   ITERATOR_PLACE (place) = ITERATOR_NEXT (place);
 514
 515   /* If there is another node in the current bucket, select it.  */
 516   if (ITERATOR_NEXT (place) && NODE_NEXT (ITERATOR_NEXT (place)))
 517     {
 518       ITERATOR_NEXT (place) = NODE_NEXT (ITERATOR_NEXT (place));
 519     }
 520   else
 521     {
 522       /* Find the next non-empty bucket.  */
 523       while ((ITERATOR_NEXT_BUCKET (place) < HASH_SIZE (hash))
 524          && (BUCKET_NTH (hash, ITERATOR_NEXT_BUCKET (place)) == NULL))
 525         {
 526           ++ITERATOR_NEXT_BUCKET (place);
 527         }
 528
 529       /* Select the first node in the new bucket.  */
 530       if (ITERATOR_NEXT_BUCKET (place) < HASH_SIZE (hash))
 531         {
 532           ITERATOR_NEXT (place)
 533             = BUCKET_NTH (hash, ITERATOR_NEXT_BUCKET (place));
 534         }
 535       else
 536         ITERATOR_NEXT (place) = NULL;
 537
 538       /* Advance the `next' reference.  */
 539       ++ITERATOR_NEXT_BUCKET (place);
 540     }
 541
 542   /* If there are no more nodes to return, recycle the iterator memory.  */
 543   if (! (ITERATOR_PLACE (place) || ITERATOR_NEXT (place)))
 544     {
 545       m4_free_hash_iterator (hash, place);
 546       return NULL;
 547     }
 548
 549   /* On the first call we need to put the 1st node in PLACE and
 550      the 2nd node in NEXT.  */
 551   if (ITERATOR_NEXT (place) && !ITERATOR_PLACE (place))
 552     goto next;
 553
 554   assert (place && ITERATOR_PLACE (place));
 555
 556   return place;
 557 }
 558
 559 /* Clean up the iterator PLACE within HASH when aborting an iteration
 560    early.  */
 561 void
 562 m4_free_hash_iterator (const m4_hash *hash, m4_hash_iterator *place)
 563 {
 564 #ifndef NDEBUG
 565   m4_hash_iterator *iter = NULL;
 566   m4_hash_iterator *next;
 567
 568   assert (hash);
 569   assert (place && (ITERATOR_HASH (place) == hash));
 570
 571   do
 572     {
 573       next = iter ? ITER_CHAIN (iter) : HASH_ITER (hash);
 574       if (place == next)
 575         {
 576           if (iter)
 577             ITER_CHAIN (iter) = ITER_CHAIN (next);
 578           else
 579             HASH_ITER (hash) = ITER_CHAIN (next);
 580           break;
 581         }
 582       iter = next;
 583     }
 584   while (iter);
 585   assert (next);
 586 #endif
 587   free (place);
 588 }
 589
 590 /* Return the key being visited by the iterator PLACE.  */
 591 const void * M4_GNUC_PURE
 592 m4_get_hash_iterator_key (m4_hash_iterator *place)
 593 {
 594   assert (place);
 595
 596   return NODE_KEY (ITERATOR_PLACE (place));
 597 }
 598
 599 /* Return the value being visited by the iterator PLACE.  */
 600 void * M4_GNUC_PURE
 601 m4_get_hash_iterator_value (m4_hash_iterator *place)
 602 {
 603   assert (place);
 604
 605   return NODE_VALUE (ITERATOR_PLACE (place));
 606 }
 607
 608 /* The following function is used for the cases where we want to do
 609    something to each and every entry in HASH.  This function traverses
 610    the hash table, and calls a specified function FUNC for each entry
 611    in the table.  FUNC is called with a pointer to the entry key,
 612    value, and the passed DATA argument.  If FUNC returns non-NULL,
 613    abort the iteration and return that value; a return of NULL implies
 614    success on all entries.  */
 615 void *
 616 m4_hash_apply (m4_hash *hash, m4_hash_apply_func *func, void *userdata)
 617 {
 618   m4_hash_iterator *place  = NULL;
 619   void *            result = NULL;
 620
 621   assert (hash);
 622   assert (func);
 623
 624   while ((place = m4_get_hash_iterator_next (hash, place)))
 625     {
 626       result = (*func) (hash, m4_get_hash_iterator_key (place),
 627                         m4_get_hash_iterator_value (place), userdata);
 628
 629       if (result != NULL)
 630         {
 631           m4_free_hash_iterator (hash, place);
 632           break;
 633         }
 634     }
 635
 636   return result;
 637 }
 638
 639 \f
 640 /* Using a string (char * and size_t pair) as the hash key is common
 641    enough that we provide implementations here for use in client hash
 642    table routines.  */
 643
 644 /* Return a hash value for a string, similar to gnulib's hash module,
 645    but with length factored in.  */
 646 size_t M4_GNUC_PURE
 647 m4_hash_string_hash (const void *ptr)
 648 {
 649   const m4_string *key = (const m4_string *) ptr;
 650   const char *s = key->str;
 651   size_t len = key->len;
 652   size_t val = len;
 653
 654   while (len--)
 655     val = rotl_sz (val, 7) + to_uchar (*s++);
 656   return val;
 657 }
 658
 659 /* Comparison function for hash keys -- used by the underlying
 660    hash table ADT when searching for a key match during name lookup.  */
 661 int M4_GNUC_PURE
 662 m4_hash_string_cmp (const void *key, const void *try)
 663 {
 664   const m4_string *a = (const m4_string *) key;
 665   const m4_string *b = (const m4_string *) try;
 666   if (a->len < b->len)
 667     return -1;
 668   if (b->len < a->len)
 669     return 1;
 670   return memcmp (a->str, b->str, a->len);
 671 }