maint: summarize highlights of 1.4.18 release
[m4/ericb.git] / m4 / hash.c
blob01b90648aab04ee5264de84b5a0e66bc572cd013
1 /* GNU m4 -- A simple macro processor
2 Copyright (C) 2001, 2006-2010, 2013-2014, 2017 Free Software
3 Foundation, Inc.
4 Written by Gary V. Vaughan <gary@gnu.org>
6 This file is part of GNU M4.
8 GNU M4 is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU M4 is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 /* TODO:
23 - Use an obstack to manage the node memory.
24 - Implement the macroized magic values with the API.
27 #include <config.h>
29 #include "hash.h"
30 #include "m4private.h"
32 #include "bitrotate.h"
33 #include <limits.h>
35 typedef struct hash_node hash_node;
37 struct m4_hash
39 size_t size; /* number of buckets allocated */
40 size_t length; /* number of elements inserted */
41 m4_hash_hash_func *hash_func;
42 m4_hash_cmp_func *cmp_func;
43 hash_node **buckets;
44 #ifndef NDEBUG
45 m4_hash_iterator *iter; /* current iterator */
46 #endif
49 struct hash_node
51 hash_node *next;
52 const void *key;
53 void *value;
57 struct m4_hash_iterator
59 const m4_hash *hash; /* contains the buckets */
60 hash_node * place; /* the node we are about to return */
61 hash_node * next; /* the next node, incase PLACE is removed */
62 size_t next_bucket; /* the next bucket index following NEXT */
63 #ifndef NDEBUG
64 m4_hash_iterator *chain; /* multiple iterators visiting one hash */
65 #endif
69 #define HASH_SIZE(hash) ((hash)->size)
70 #define HASH_LENGTH(hash) ((hash)->length)
71 #define HASH_BUCKETS(hash) ((hash)->buckets)
72 #define HASH_HASH_FUNC(hash) ((hash)->hash_func)
73 #define HASH_CMP_FUNC(hash) ((hash)->cmp_func)
75 #define NODE_NEXT(node) ((node)->next)
76 #define NODE_KEY(node) ((node)->key)
77 #define NODE_VALUE(node) ((node)->value)
79 #define ITERATOR_HASH(i) ((i)->hash)
80 #define ITERATOR_PLACE(i) ((i)->place)
81 #define ITERATOR_NEXT(i) ((i)->next)
82 #define ITERATOR_NEXT_BUCKET(i) ((i)->next_bucket)
84 /*#define ITERATOR_NEXT_NEXT(i) NODE_NEXT (ITERATOR_PLACE (i))*/
86 /* Helper macros. */
87 #define BUCKET_NTH(hash, n) (HASH_BUCKETS (hash)[n])
88 #define BUCKET_COUNT(hash, key) \
89 ((*HASH_HASH_FUNC (hash))(key) % HASH_SIZE (hash))
90 #define BUCKET_KEY(hash, key) \
91 (BUCKET_NTH ((hash), BUCKET_COUNT ((hash), (key))))
93 /* Debugging macros. */
94 #ifdef NDEBUG
95 # define HASH_ITER(hash) 0
96 # define ITER_CHAIN(iter) 0
97 #else
98 # define HASH_ITER(hash) (((m4_hash *) hash)->iter)
99 # define ITER_CHAIN(iter) ((iter)->chain)
100 #endif
103 static void bucket_insert (m4_hash *hash, hash_node *bucket);
104 static void bucket_delete (m4_hash *hash, size_t i);
105 static hash_node * node_new (const void *key, void *value);
106 static void node_insert (m4_hash *hash, hash_node *node);
107 static hash_node * node_lookup (m4_hash *hash, const void *key);
108 static void node_delete (m4_hash *hash, hash_node *node);
109 static void maybe_grow (m4_hash *hash);
113 static hash_node *free_list = NULL;
117 /* Allocate and return a new, unpopulated but initialised m4_hash with
118 SIZE buckets, where HASH_FUNC will be used to generate bucket numbers
119 and CMP_FUNC will be called to compare keys. */
120 m4_hash *
121 m4_hash_new (size_t size, m4_hash_hash_func *hash_func,
122 m4_hash_cmp_func *cmp_func)
124 m4_hash *hash;
126 assert (hash_func);
127 assert (cmp_func);
129 if (size == 0)
130 size = M4_HASH_DEFAULT_SIZE;
132 hash = (m4_hash *) xmalloc (sizeof *hash);
133 HASH_SIZE (hash) = size;
134 HASH_LENGTH (hash) = 0;
135 HASH_BUCKETS (hash) = (hash_node **) xcalloc (size,
136 sizeof *HASH_BUCKETS (hash));
137 HASH_HASH_FUNC (hash) = hash_func;
138 HASH_CMP_FUNC (hash) = cmp_func;
139 #ifndef NDEBUG
140 HASH_ITER (hash) = NULL;
141 #endif
143 return hash;
146 m4_hash *
147 m4_hash_dup (m4_hash *src, m4_hash_copy_func *copy)
149 m4_hash *dest;
151 assert (src);
152 assert (copy);
154 dest = m4_hash_new (HASH_SIZE (src), HASH_HASH_FUNC (src),
155 HASH_CMP_FUNC (src));
157 m4_hash_apply (src, (m4_hash_apply_func *) copy, dest);
159 return dest;
162 /* Recycle each of the nodes in HASH onto the free list, and release
163 the rest of the memory used by the table. Memory addressed by the
164 recycled nodes is _NOT_ freed: this needs to be done manually to
165 prevent memory leaks. This is not safe to call while HASH is being
166 iterated. */
167 void
168 m4_hash_delete (m4_hash *hash)
170 size_t i;
172 assert (hash);
173 assert (!HASH_ITER (hash));
175 for (i = 0; i < HASH_SIZE (hash); ++i)
176 if (BUCKET_NTH (hash, i))
177 bucket_delete (hash, i);
178 free (HASH_BUCKETS (hash));
179 free (hash);
182 /* Check that the nodes in bucket I have been cleared, and recycle
183 each of the nodes in the bucket to the free list. Bucket I must
184 not be empty when this function is called. */
185 static void
186 bucket_delete (m4_hash *hash, size_t i)
188 hash_node *node;
190 assert (hash);
191 assert (BUCKET_NTH (hash, i));
192 assert (i < HASH_SIZE (hash));
194 for (node = BUCKET_NTH (hash, i); node->next; node = NODE_NEXT (node))
196 assert (NODE_KEY (node) == NULL);
197 --HASH_LENGTH (hash);
200 assert (NODE_KEY (node) == NULL);
201 --HASH_LENGTH (hash);
203 NODE_NEXT (node) = free_list;
204 free_list = BUCKET_NTH (hash, i);
205 BUCKET_NTH (hash, i) = NULL;
208 /* Create and initialise a new node with KEY and VALUE, by reusing a
209 node from the free list if possible. */
210 static hash_node *
211 node_new (const void *key, void *value)
213 hash_node *node = NULL;
215 if (free_list)
217 node = free_list;
218 free_list = NODE_NEXT (free_list);
220 else
221 node = (hash_node *) xmalloc (sizeof *node);
223 assert (node);
225 NODE_NEXT (node) = NULL;
226 NODE_KEY (node) = key;
227 NODE_VALUE (node) = value;
229 return node;
232 /* Check that NODE has been cleared, and recycle it to the free list. */
233 static void
234 node_delete (m4_hash *hash, hash_node *node)
236 assert (node);
237 assert (NODE_KEY (node) == NULL);
239 NODE_NEXT (node) = free_list;
240 free_list = node;
242 --HASH_LENGTH (hash);
245 /* Create a new entry in HASH with KEY and VALUE, making use of nodes
246 in the free list if possible, and potentially growing the size of
247 the table if node density is too high. This is not safe to call
248 while HASH is being iterated. Currently, it is not safe to call
249 this if another entry already matches KEY. */
250 const void *
251 m4_hash_insert (m4_hash *hash, const void *key, void *value)
253 hash_node *node;
255 assert (hash);
256 assert (!HASH_ITER (hash));
258 node = node_new (key, value);
259 node_insert (hash, node);
260 maybe_grow (hash);
262 return key;
265 /* Push the unconnected NODE on to the front of the appropriate
266 bucket, effectively preventing retrieval of other nodes with
267 the same key (where "sameness" is determined by HASH's
268 cmp_func). */
269 static void
270 node_insert (m4_hash *hash, hash_node *node)
272 size_t n;
274 assert (hash);
275 assert (node);
276 assert (NODE_NEXT (node) == NULL);
278 n = BUCKET_COUNT (hash, NODE_KEY (node));
279 NODE_NEXT (node) = BUCKET_NTH (hash, n);
280 BUCKET_NTH (hash, n) = node;
282 ++HASH_LENGTH (hash);
285 /* Remove from HASH, the first node with key KEY; comparing keys with
286 HASH's cmp_func. Any nodes with the same KEY previously hidden by
287 the removed node will become visible again. The key field of the
288 removed node is returned, or NULL if there was no match. This is
289 unsafe if multiple iterators are visiting HASH, or when a lone
290 iterator is visiting on a different key. */
291 void *
292 m4_hash_remove (m4_hash *hash, const void *key)
294 size_t n;
295 hash_node *node = NULL;
297 #ifndef NDEBUG
298 m4_hash_iterator *iter = HASH_ITER (hash);
300 assert (hash);
301 if (HASH_ITER (hash))
303 assert (!ITER_CHAIN (iter));
304 assert (ITERATOR_PLACE (iter));
306 #endif
308 n = BUCKET_COUNT (hash, key);
311 hash_node *next = node ? NODE_NEXT (node) : BUCKET_NTH (hash, n);
313 if (next && ((*HASH_CMP_FUNC (hash)) (NODE_KEY (next), key) == 0))
315 if (node)
316 NODE_NEXT (node) = NODE_NEXT (next);
317 else
318 BUCKET_NTH (hash, n) = NODE_NEXT (next);
320 key = NODE_KEY (next);
321 #ifndef NDEBUG
322 if (iter)
323 assert (ITERATOR_PLACE (iter) == next);
324 NODE_KEY (next) = NULL;
325 #endif
326 node_delete (hash, next);
327 return (void *) key; /* Cast away const. */
329 node = next;
331 while (node);
333 return NULL;
336 /* Return the address of the value field of the first node in HASH
337 that has a matching KEY. The address is returned so that an
338 explicit NULL value can be distinguished from a failed lookup (also
339 NULL). Fortuitously for M4, this also means that the value field
340 can be changed `in situ' to implement a value stack. Safe to call
341 even when an iterator is in force. */
342 void **
343 m4_hash_lookup (m4_hash *hash, const void *key)
345 hash_node *node;
347 assert (hash);
349 node = node_lookup (hash, key);
351 return node ? &NODE_VALUE (node) : NULL;
354 /* Return the first node in HASH that has a matching KEY. */
355 static hash_node *
356 node_lookup (m4_hash *hash, const void *key)
358 hash_node *node;
360 assert (hash);
362 node = BUCKET_KEY (hash, key);
364 while (node && (*HASH_CMP_FUNC (hash)) (NODE_KEY (node), key))
365 node = NODE_NEXT (node);
367 return node;
370 /* How many entries are currently contained by HASH. Safe to call
371 even during an interation. */
372 size_t M4_GNUC_PURE
373 m4_get_hash_length (m4_hash *hash)
375 assert (hash);
377 return HASH_LENGTH (hash);
380 #if 0
381 /* Force the number of buckets to be the given value. You probably ought
382 not to be using this function once the table has been in use, since
383 the maximum density algorithm will grow the number of buckets back to
384 what was there before if you try to shrink the table. It is useful
385 to set a smaller or larger initial size if you know in advance what
386 order of magnitude of entries will be in the table. Be aware that
387 the efficiency of the lookup and grow features require that the size
388 always be 1 less than a power of 2. Unsafe if HASH is being visited
389 by an iterator. */
390 void
391 m4_hash_resize (m4_hash *hash, size_t size)
393 hash_node **original_buckets;
394 size_t original_size;
396 assert (hash);
397 assert (!HASH_ITER (hash));
399 original_size = HASH_SIZE (hash);
400 original_buckets = HASH_BUCKETS (hash);
402 HASH_SIZE (hash) = size;
403 HASH_BUCKETS (hash) = (hash_node **) xcalloc (size,
404 sizeof *HASH_BUCKETS (hash));
407 size_t i;
408 for (i = 0; i < original_size; ++i)
409 if (original_buckets[i])
410 bucket_insert (hash, original_buckets[i]);
413 free (original_buckets);
415 #endif
417 /* If the node density breaks the threshold, increase the size of
418 HASH and repopulate with the original nodes. */
419 static void
420 maybe_grow (m4_hash *hash)
422 float nodes_per_bucket;
424 assert (hash);
426 nodes_per_bucket = (float) HASH_LENGTH (hash) / (float) HASH_SIZE (hash);
428 if (nodes_per_bucket > (float) M4_HASH_MAXIMUM_DENSITY)
430 size_t original_size = HASH_SIZE (hash);
431 hash_node **original_buckets = HASH_BUCKETS (hash);
433 /* HASH sizes are always 1 less than a power of 2. */
434 HASH_SIZE (hash) = (2 * (1 + original_size)) -1;
435 HASH_BUCKETS (hash) =
436 (hash_node **) xcalloc (HASH_SIZE (hash), sizeof *HASH_BUCKETS (hash));
439 size_t i;
440 for (i = 0; i < original_size; ++i)
441 if (original_buckets[i])
442 bucket_insert (hash, original_buckets[i]);
445 free (original_buckets);
449 /* Insert each node in BUCKET into HASH. Relative ordering of nodes
450 is not preserved. This would need to change if we were to
451 guarantee relative ordering within a bucket for identical keys. */
452 static void
453 bucket_insert (m4_hash *hash, hash_node *bucket)
455 assert (hash);
456 assert (bucket);
460 hash_node *next = NODE_NEXT (bucket);
462 /* Break link to rest of the bucket before reinserting. */
463 NODE_NEXT (bucket) = NULL;
464 node_insert (hash, bucket);
466 bucket = next;
468 while (bucket);
471 /* Reclaim all memory used by free nodes. Safe to call at any time,
472 although only worth calling at program shutdown to verify no
473 leaks. */
474 void
475 m4_hash_exit (void)
477 while (free_list)
479 hash_node *stale = free_list;
480 free_list = NODE_NEXT (stale);
481 free (stale);
487 /* Iterate over a given HASH. Start with PLACE being NULL, then
488 repeat with PLACE being the previous return value. The return
489 value is the current location of the iterator, or NULL when the
490 walk is complete. Call m4_free_hash_iterator to abort iteration.
491 During the iteration, it is safe to search the list, and if no
492 other iterator is active, it is safe to remove the key pointed to
493 by this iterator. All other actions that modify HASH are
494 unsafe. */
495 m4_hash_iterator *
496 m4_get_hash_iterator_next (const m4_hash *hash, m4_hash_iterator *place)
498 assert (hash);
499 assert (!place || (ITERATOR_HASH (place) == hash));
501 /* On the first iteration, allocate an iterator. */
502 if (!place)
504 place = (m4_hash_iterator *) xzalloc (sizeof *place);
505 ITERATOR_HASH (place) = hash;
506 #ifndef NDEBUG
507 ITER_CHAIN (place) = HASH_ITER (hash);
508 HASH_ITER (hash) = place;
509 #endif
512 next:
513 ITERATOR_PLACE (place) = ITERATOR_NEXT (place);
515 /* If there is another node in the current bucket, select it. */
516 if (ITERATOR_NEXT (place) && NODE_NEXT (ITERATOR_NEXT (place)))
518 ITERATOR_NEXT (place) = NODE_NEXT (ITERATOR_NEXT (place));
520 else
522 /* Find the next non-empty bucket. */
523 while ((ITERATOR_NEXT_BUCKET (place) < HASH_SIZE (hash))
524 && (BUCKET_NTH (hash, ITERATOR_NEXT_BUCKET (place)) == NULL))
526 ++ITERATOR_NEXT_BUCKET (place);
529 /* Select the first node in the new bucket. */
530 if (ITERATOR_NEXT_BUCKET (place) < HASH_SIZE (hash))
532 ITERATOR_NEXT (place)
533 = BUCKET_NTH (hash, ITERATOR_NEXT_BUCKET (place));
535 else
536 ITERATOR_NEXT (place) = NULL;
538 /* Advance the `next' reference. */
539 ++ITERATOR_NEXT_BUCKET (place);
542 /* If there are no more nodes to return, recycle the iterator memory. */
543 if (! (ITERATOR_PLACE (place) || ITERATOR_NEXT (place)))
545 m4_free_hash_iterator (hash, place);
546 return NULL;
549 /* On the first call we need to put the 1st node in PLACE and
550 the 2nd node in NEXT. */
551 if (ITERATOR_NEXT (place) && !ITERATOR_PLACE (place))
552 goto next;
554 assert (place && ITERATOR_PLACE (place));
556 return place;
559 /* Clean up the iterator PLACE within HASH when aborting an iteration
560 early. */
561 void
562 m4_free_hash_iterator (const m4_hash *hash, m4_hash_iterator *place)
564 #ifndef NDEBUG
565 m4_hash_iterator *iter = NULL;
566 m4_hash_iterator *next;
568 assert (hash);
569 assert (place && (ITERATOR_HASH (place) == hash));
573 next = iter ? ITER_CHAIN (iter) : HASH_ITER (hash);
574 if (place == next)
576 if (iter)
577 ITER_CHAIN (iter) = ITER_CHAIN (next);
578 else
579 HASH_ITER (hash) = ITER_CHAIN (next);
580 break;
582 iter = next;
584 while (iter);
585 assert (next);
586 #endif
587 free (place);
590 /* Return the key being visited by the iterator PLACE. */
591 const void * M4_GNUC_PURE
592 m4_get_hash_iterator_key (m4_hash_iterator *place)
594 assert (place);
596 return NODE_KEY (ITERATOR_PLACE (place));
599 /* Return the value being visited by the iterator PLACE. */
600 void * M4_GNUC_PURE
601 m4_get_hash_iterator_value (m4_hash_iterator *place)
603 assert (place);
605 return NODE_VALUE (ITERATOR_PLACE (place));
608 /* The following function is used for the cases where we want to do
609 something to each and every entry in HASH. This function traverses
610 the hash table, and calls a specified function FUNC for each entry
611 in the table. FUNC is called with a pointer to the entry key,
612 value, and the passed DATA argument. If FUNC returns non-NULL,
613 abort the iteration and return that value; a return of NULL implies
614 success on all entries. */
615 void *
616 m4_hash_apply (m4_hash *hash, m4_hash_apply_func *func, void *userdata)
618 m4_hash_iterator *place = NULL;
619 void * result = NULL;
621 assert (hash);
622 assert (func);
624 while ((place = m4_get_hash_iterator_next (hash, place)))
626 result = (*func) (hash, m4_get_hash_iterator_key (place),
627 m4_get_hash_iterator_value (place), userdata);
629 if (result != NULL)
631 m4_free_hash_iterator (hash, place);
632 break;
636 return result;
640 /* Using a string (char * and size_t pair) as the hash key is common
641 enough that we provide implementations here for use in client hash
642 table routines. */
644 /* Return a hash value for a string, similar to gnulib's hash module,
645 but with length factored in. */
646 size_t M4_GNUC_PURE
647 m4_hash_string_hash (const void *ptr)
649 const m4_string *key = (const m4_string *) ptr;
650 const char *s = key->str;
651 size_t len = key->len;
652 size_t val = len;
654 while (len--)
655 val = rotl_sz (val, 7) + to_uchar (*s++);
656 return val;
659 /* Comparison function for hash keys -- used by the underlying
660 hash table ADT when searching for a key match during name lookup. */
661 int M4_GNUC_PURE
662 m4_hash_string_cmp (const void *key, const void *try)
664 const m4_string *a = (const m4_string *) key;
665 const m4_string *b = (const m4_string *) try;
666 if (a->len < b->len)
667 return -1;
668 if (b->len < a->len)
669 return 1;
670 return memcmp (a->str, b->str, a->len);