1 /* GNU m4 -- A simple macro processor
2 Copyright (C) 2001, 2006-2010, 2013-2014, 2017 Free Software
4 Written by Gary V. Vaughan <gary@gnu.org>
6 This file is part of GNU M4.
8 GNU M4 is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU M4 is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 - Use an obstack to manage the node memory.
24 - Implement the macroized magic values with the API.
30 #include "m4private.h"
32 #include "bitrotate.h"
35 typedef struct hash_node hash_node
;
39 size_t size
; /* number of buckets allocated */
40 size_t length
; /* number of elements inserted */
41 m4_hash_hash_func
*hash_func
;
42 m4_hash_cmp_func
*cmp_func
;
45 m4_hash_iterator
*iter
; /* current iterator */
57 struct m4_hash_iterator
59 const m4_hash
*hash
; /* contains the buckets */
60 hash_node
* place
; /* the node we are about to return */
61 hash_node
* next
; /* the next node, incase PLACE is removed */
62 size_t next_bucket
; /* the next bucket index following NEXT */
64 m4_hash_iterator
*chain
; /* multiple iterators visiting one hash */
69 #define HASH_SIZE(hash) ((hash)->size)
70 #define HASH_LENGTH(hash) ((hash)->length)
71 #define HASH_BUCKETS(hash) ((hash)->buckets)
72 #define HASH_HASH_FUNC(hash) ((hash)->hash_func)
73 #define HASH_CMP_FUNC(hash) ((hash)->cmp_func)
75 #define NODE_NEXT(node) ((node)->next)
76 #define NODE_KEY(node) ((node)->key)
77 #define NODE_VALUE(node) ((node)->value)
79 #define ITERATOR_HASH(i) ((i)->hash)
80 #define ITERATOR_PLACE(i) ((i)->place)
81 #define ITERATOR_NEXT(i) ((i)->next)
82 #define ITERATOR_NEXT_BUCKET(i) ((i)->next_bucket)
84 /*#define ITERATOR_NEXT_NEXT(i) NODE_NEXT (ITERATOR_PLACE (i))*/
87 #define BUCKET_NTH(hash, n) (HASH_BUCKETS (hash)[n])
88 #define BUCKET_COUNT(hash, key) \
89 ((*HASH_HASH_FUNC (hash))(key) % HASH_SIZE (hash))
90 #define BUCKET_KEY(hash, key) \
91 (BUCKET_NTH ((hash), BUCKET_COUNT ((hash), (key))))
93 /* Debugging macros. */
95 # define HASH_ITER(hash) 0
96 # define ITER_CHAIN(iter) 0
98 # define HASH_ITER(hash) (((m4_hash *) hash)->iter)
99 # define ITER_CHAIN(iter) ((iter)->chain)
103 static void bucket_insert (m4_hash
*hash
, hash_node
*bucket
);
104 static void bucket_delete (m4_hash
*hash
, size_t i
);
105 static hash_node
* node_new (const void *key
, void *value
);
106 static void node_insert (m4_hash
*hash
, hash_node
*node
);
107 static hash_node
* node_lookup (m4_hash
*hash
, const void *key
);
108 static void node_delete (m4_hash
*hash
, hash_node
*node
);
109 static void maybe_grow (m4_hash
*hash
);
113 static hash_node
*free_list
= NULL
;
117 /* Allocate and return a new, unpopulated but initialised m4_hash with
118 SIZE buckets, where HASH_FUNC will be used to generate bucket numbers
119 and CMP_FUNC will be called to compare keys. */
121 m4_hash_new (size_t size
, m4_hash_hash_func
*hash_func
,
122 m4_hash_cmp_func
*cmp_func
)
130 size
= M4_HASH_DEFAULT_SIZE
;
132 hash
= (m4_hash
*) xmalloc (sizeof *hash
);
133 HASH_SIZE (hash
) = size
;
134 HASH_LENGTH (hash
) = 0;
135 HASH_BUCKETS (hash
) = (hash_node
**) xcalloc (size
,
136 sizeof *HASH_BUCKETS (hash
));
137 HASH_HASH_FUNC (hash
) = hash_func
;
138 HASH_CMP_FUNC (hash
) = cmp_func
;
140 HASH_ITER (hash
) = NULL
;
147 m4_hash_dup (m4_hash
*src
, m4_hash_copy_func
*copy
)
154 dest
= m4_hash_new (HASH_SIZE (src
), HASH_HASH_FUNC (src
),
155 HASH_CMP_FUNC (src
));
157 m4_hash_apply (src
, (m4_hash_apply_func
*) copy
, dest
);
162 /* Recycle each of the nodes in HASH onto the free list, and release
163 the rest of the memory used by the table. Memory addressed by the
164 recycled nodes is _NOT_ freed: this needs to be done manually to
165 prevent memory leaks. This is not safe to call while HASH is being
168 m4_hash_delete (m4_hash
*hash
)
173 assert (!HASH_ITER (hash
));
175 for (i
= 0; i
< HASH_SIZE (hash
); ++i
)
176 if (BUCKET_NTH (hash
, i
))
177 bucket_delete (hash
, i
);
178 free (HASH_BUCKETS (hash
));
182 /* Check that the nodes in bucket I have been cleared, and recycle
183 each of the nodes in the bucket to the free list. Bucket I must
184 not be empty when this function is called. */
186 bucket_delete (m4_hash
*hash
, size_t i
)
191 assert (BUCKET_NTH (hash
, i
));
192 assert (i
< HASH_SIZE (hash
));
194 for (node
= BUCKET_NTH (hash
, i
); node
->next
; node
= NODE_NEXT (node
))
196 assert (NODE_KEY (node
) == NULL
);
197 --HASH_LENGTH (hash
);
200 assert (NODE_KEY (node
) == NULL
);
201 --HASH_LENGTH (hash
);
203 NODE_NEXT (node
) = free_list
;
204 free_list
= BUCKET_NTH (hash
, i
);
205 BUCKET_NTH (hash
, i
) = NULL
;
208 /* Create and initialise a new node with KEY and VALUE, by reusing a
209 node from the free list if possible. */
211 node_new (const void *key
, void *value
)
213 hash_node
*node
= NULL
;
218 free_list
= NODE_NEXT (free_list
);
221 node
= (hash_node
*) xmalloc (sizeof *node
);
225 NODE_NEXT (node
) = NULL
;
226 NODE_KEY (node
) = key
;
227 NODE_VALUE (node
) = value
;
232 /* Check that NODE has been cleared, and recycle it to the free list. */
234 node_delete (m4_hash
*hash
, hash_node
*node
)
237 assert (NODE_KEY (node
) == NULL
);
239 NODE_NEXT (node
) = free_list
;
242 --HASH_LENGTH (hash
);
245 /* Create a new entry in HASH with KEY and VALUE, making use of nodes
246 in the free list if possible, and potentially growing the size of
247 the table if node density is too high. This is not safe to call
248 while HASH is being iterated. Currently, it is not safe to call
249 this if another entry already matches KEY. */
251 m4_hash_insert (m4_hash
*hash
, const void *key
, void *value
)
256 assert (!HASH_ITER (hash
));
258 node
= node_new (key
, value
);
259 node_insert (hash
, node
);
265 /* Push the unconnected NODE on to the front of the appropriate
266 bucket, effectively preventing retrieval of other nodes with
267 the same key (where "sameness" is determined by HASH's
270 node_insert (m4_hash
*hash
, hash_node
*node
)
276 assert (NODE_NEXT (node
) == NULL
);
278 n
= BUCKET_COUNT (hash
, NODE_KEY (node
));
279 NODE_NEXT (node
) = BUCKET_NTH (hash
, n
);
280 BUCKET_NTH (hash
, n
) = node
;
282 ++HASH_LENGTH (hash
);
285 /* Remove from HASH, the first node with key KEY; comparing keys with
286 HASH's cmp_func. Any nodes with the same KEY previously hidden by
287 the removed node will become visible again. The key field of the
288 removed node is returned, or NULL if there was no match. This is
289 unsafe if multiple iterators are visiting HASH, or when a lone
290 iterator is visiting on a different key. */
292 m4_hash_remove (m4_hash
*hash
, const void *key
)
295 hash_node
*node
= NULL
;
298 m4_hash_iterator
*iter
= HASH_ITER (hash
);
301 if (HASH_ITER (hash
))
303 assert (!ITER_CHAIN (iter
));
304 assert (ITERATOR_PLACE (iter
));
308 n
= BUCKET_COUNT (hash
, key
);
311 hash_node
*next
= node
? NODE_NEXT (node
) : BUCKET_NTH (hash
, n
);
313 if (next
&& ((*HASH_CMP_FUNC (hash
)) (NODE_KEY (next
), key
) == 0))
316 NODE_NEXT (node
) = NODE_NEXT (next
);
318 BUCKET_NTH (hash
, n
) = NODE_NEXT (next
);
320 key
= NODE_KEY (next
);
323 assert (ITERATOR_PLACE (iter
) == next
);
324 NODE_KEY (next
) = NULL
;
326 node_delete (hash
, next
);
327 return (void *) key
; /* Cast away const. */
336 /* Return the address of the value field of the first node in HASH
337 that has a matching KEY. The address is returned so that an
338 explicit NULL value can be distinguished from a failed lookup (also
339 NULL). Fortuitously for M4, this also means that the value field
340 can be changed `in situ' to implement a value stack. Safe to call
341 even when an iterator is in force. */
343 m4_hash_lookup (m4_hash
*hash
, const void *key
)
349 node
= node_lookup (hash
, key
);
351 return node
? &NODE_VALUE (node
) : NULL
;
354 /* Return the first node in HASH that has a matching KEY. */
356 node_lookup (m4_hash
*hash
, const void *key
)
362 node
= BUCKET_KEY (hash
, key
);
364 while (node
&& (*HASH_CMP_FUNC (hash
)) (NODE_KEY (node
), key
))
365 node
= NODE_NEXT (node
);
370 /* How many entries are currently contained by HASH. Safe to call
371 even during an interation. */
373 m4_get_hash_length (m4_hash
*hash
)
377 return HASH_LENGTH (hash
);
381 /* Force the number of buckets to be the given value. You probably ought
382 not to be using this function once the table has been in use, since
383 the maximum density algorithm will grow the number of buckets back to
384 what was there before if you try to shrink the table. It is useful
385 to set a smaller or larger initial size if you know in advance what
386 order of magnitude of entries will be in the table. Be aware that
387 the efficiency of the lookup and grow features require that the size
388 always be 1 less than a power of 2. Unsafe if HASH is being visited
391 m4_hash_resize (m4_hash
*hash
, size_t size
)
393 hash_node
**original_buckets
;
394 size_t original_size
;
397 assert (!HASH_ITER (hash
));
399 original_size
= HASH_SIZE (hash
);
400 original_buckets
= HASH_BUCKETS (hash
);
402 HASH_SIZE (hash
) = size
;
403 HASH_BUCKETS (hash
) = (hash_node
**) xcalloc (size
,
404 sizeof *HASH_BUCKETS (hash
));
408 for (i
= 0; i
< original_size
; ++i
)
409 if (original_buckets
[i
])
410 bucket_insert (hash
, original_buckets
[i
]);
413 free (original_buckets
);
417 /* If the node density breaks the threshold, increase the size of
418 HASH and repopulate with the original nodes. */
420 maybe_grow (m4_hash
*hash
)
422 float nodes_per_bucket
;
426 nodes_per_bucket
= (float) HASH_LENGTH (hash
) / (float) HASH_SIZE (hash
);
428 if (nodes_per_bucket
> (float) M4_HASH_MAXIMUM_DENSITY
)
430 size_t original_size
= HASH_SIZE (hash
);
431 hash_node
**original_buckets
= HASH_BUCKETS (hash
);
433 /* HASH sizes are always 1 less than a power of 2. */
434 HASH_SIZE (hash
) = (2 * (1 + original_size
)) -1;
435 HASH_BUCKETS (hash
) =
436 (hash_node
**) xcalloc (HASH_SIZE (hash
), sizeof *HASH_BUCKETS (hash
));
440 for (i
= 0; i
< original_size
; ++i
)
441 if (original_buckets
[i
])
442 bucket_insert (hash
, original_buckets
[i
]);
445 free (original_buckets
);
449 /* Insert each node in BUCKET into HASH. Relative ordering of nodes
450 is not preserved. This would need to change if we were to
451 guarantee relative ordering within a bucket for identical keys. */
453 bucket_insert (m4_hash
*hash
, hash_node
*bucket
)
460 hash_node
*next
= NODE_NEXT (bucket
);
462 /* Break link to rest of the bucket before reinserting. */
463 NODE_NEXT (bucket
) = NULL
;
464 node_insert (hash
, bucket
);
471 /* Reclaim all memory used by free nodes. Safe to call at any time,
472 although only worth calling at program shutdown to verify no
479 hash_node
*stale
= free_list
;
480 free_list
= NODE_NEXT (stale
);
487 /* Iterate over a given HASH. Start with PLACE being NULL, then
488 repeat with PLACE being the previous return value. The return
489 value is the current location of the iterator, or NULL when the
490 walk is complete. Call m4_free_hash_iterator to abort iteration.
491 During the iteration, it is safe to search the list, and if no
492 other iterator is active, it is safe to remove the key pointed to
493 by this iterator. All other actions that modify HASH are
496 m4_get_hash_iterator_next (const m4_hash
*hash
, m4_hash_iterator
*place
)
499 assert (!place
|| (ITERATOR_HASH (place
) == hash
));
501 /* On the first iteration, allocate an iterator. */
504 place
= (m4_hash_iterator
*) xzalloc (sizeof *place
);
505 ITERATOR_HASH (place
) = hash
;
507 ITER_CHAIN (place
) = HASH_ITER (hash
);
508 HASH_ITER (hash
) = place
;
513 ITERATOR_PLACE (place
) = ITERATOR_NEXT (place
);
515 /* If there is another node in the current bucket, select it. */
516 if (ITERATOR_NEXT (place
) && NODE_NEXT (ITERATOR_NEXT (place
)))
518 ITERATOR_NEXT (place
) = NODE_NEXT (ITERATOR_NEXT (place
));
522 /* Find the next non-empty bucket. */
523 while ((ITERATOR_NEXT_BUCKET (place
) < HASH_SIZE (hash
))
524 && (BUCKET_NTH (hash
, ITERATOR_NEXT_BUCKET (place
)) == NULL
))
526 ++ITERATOR_NEXT_BUCKET (place
);
529 /* Select the first node in the new bucket. */
530 if (ITERATOR_NEXT_BUCKET (place
) < HASH_SIZE (hash
))
532 ITERATOR_NEXT (place
)
533 = BUCKET_NTH (hash
, ITERATOR_NEXT_BUCKET (place
));
536 ITERATOR_NEXT (place
) = NULL
;
538 /* Advance the `next' reference. */
539 ++ITERATOR_NEXT_BUCKET (place
);
542 /* If there are no more nodes to return, recycle the iterator memory. */
543 if (! (ITERATOR_PLACE (place
) || ITERATOR_NEXT (place
)))
545 m4_free_hash_iterator (hash
, place
);
549 /* On the first call we need to put the 1st node in PLACE and
550 the 2nd node in NEXT. */
551 if (ITERATOR_NEXT (place
) && !ITERATOR_PLACE (place
))
554 assert (place
&& ITERATOR_PLACE (place
));
559 /* Clean up the iterator PLACE within HASH when aborting an iteration
562 m4_free_hash_iterator (const m4_hash
*hash
, m4_hash_iterator
*place
)
565 m4_hash_iterator
*iter
= NULL
;
566 m4_hash_iterator
*next
;
569 assert (place
&& (ITERATOR_HASH (place
) == hash
));
573 next
= iter
? ITER_CHAIN (iter
) : HASH_ITER (hash
);
577 ITER_CHAIN (iter
) = ITER_CHAIN (next
);
579 HASH_ITER (hash
) = ITER_CHAIN (next
);
590 /* Return the key being visited by the iterator PLACE. */
591 const void * M4_GNUC_PURE
592 m4_get_hash_iterator_key (m4_hash_iterator
*place
)
596 return NODE_KEY (ITERATOR_PLACE (place
));
599 /* Return the value being visited by the iterator PLACE. */
601 m4_get_hash_iterator_value (m4_hash_iterator
*place
)
605 return NODE_VALUE (ITERATOR_PLACE (place
));
608 /* The following function is used for the cases where we want to do
609 something to each and every entry in HASH. This function traverses
610 the hash table, and calls a specified function FUNC for each entry
611 in the table. FUNC is called with a pointer to the entry key,
612 value, and the passed DATA argument. If FUNC returns non-NULL,
613 abort the iteration and return that value; a return of NULL implies
614 success on all entries. */
616 m4_hash_apply (m4_hash
*hash
, m4_hash_apply_func
*func
, void *userdata
)
618 m4_hash_iterator
*place
= NULL
;
619 void * result
= NULL
;
624 while ((place
= m4_get_hash_iterator_next (hash
, place
)))
626 result
= (*func
) (hash
, m4_get_hash_iterator_key (place
),
627 m4_get_hash_iterator_value (place
), userdata
);
631 m4_free_hash_iterator (hash
, place
);
640 /* Using a string (char * and size_t pair) as the hash key is common
641 enough that we provide implementations here for use in client hash
644 /* Return a hash value for a string, similar to gnulib's hash module,
645 but with length factored in. */
647 m4_hash_string_hash (const void *ptr
)
649 const m4_string
*key
= (const m4_string
*) ptr
;
650 const char *s
= key
->str
;
651 size_t len
= key
->len
;
655 val
= rotl_sz (val
, 7) + to_uchar (*s
++);
659 /* Comparison function for hash keys -- used by the underlying
660 hash table ADT when searching for a key match during name lookup. */
662 m4_hash_string_cmp (const void *key
, const void *try)
664 const m4_string
*a
= (const m4_string
*) key
;
665 const m4_string
*b
= (const m4_string
*) try;
670 return memcmp (a
->str
, b
->str
, a
->len
);