3 * Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net>
5 * Free Software License:
7 * All rights are reserved by the author, with the following exceptions:
8 * Permission is granted to freely reproduce and distribute this software,
9 * possibly in exchange for a fee, provided that this copyright notice appears
10 * intact. Permission is also granted to adapt this software to produce
11 * derivative works, as long as the modified versions carry this copyright
12 * notice and additional notices stating that the work has been modified.
13 * This source code may be translated into executable form and incorporated
14 * into proprietary software; there is no requirement for such software to
15 * contain a copyright notice related to this source.
17 * $Id: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $
18 * $Name: kazlib_1_20 $
26 #define HASH_IMPLEMENTATION
30 static const char rcsid
[] = "$Id: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $";
34 #define INIT_SIZE (1UL << (INIT_BITS)) /* must be power of two */
35 #define INIT_MASK ((INIT_SIZE) - 1)
37 #define next hash_next
39 #define data hash_data
40 #define hkey hash_hkey
42 #define table hash_table
43 #define nchains hash_nchains
44 #define nodecount hash_nodecount
45 #define maxcount hash_maxcount
46 #define highmark hash_highmark
47 #define lowmark hash_lowmark
48 #define compare hash_compare
49 #define function hash_function
50 #define allocnode hash_allocnode
51 #define freenode hash_freenode
52 #define context hash_context
53 #define mask hash_mask
54 #define dynamic hash_dynamic
56 #define table hash_table
57 #define chain hash_chain
59 static hnode_t
*hnode_alloc(void *context
);
60 static void hnode_free(hnode_t
*node
, void *context
);
61 static hash_val_t
hash_fun_default(const void *key
);
62 static int hash_comp_default(const void *key1
, const void *key2
);
67 * Compute the number of bits in the hash_val_t type. We know that hash_val_t
68 * is an unsigned integral type. Thus the highest value it can hold is a
69 * Mersenne number (power of two, less one). We initialize a hash_val_t
70 * object with this value and then shift bits out one by one while counting.
72 * 1. HASH_VAL_T_MAX is a Mersenne number---one that is one less than a power
73 * of two. This means that its binary representation consists of all one
74 * bits, and hence ``val'' is initialized to all one bits.
75 * 2. While bits remain in val, we increment the bit count and shift it to the
76 * right, replacing the topmost bit by zero.
79 static void compute_bits(void)
81 hash_val_t val
= HASH_VAL_T_MAX
; /* 1 */
89 hash_val_t_bit
= bits
;
93 * Verify whether the given argument is a power of two.
96 static int is_power_of_two(hash_val_t arg
)
100 while ((arg
& 1) == 0)
106 * Compute a shift amount from a given table size
109 static hash_val_t
compute_mask(hashcount_t size
)
111 assert (is_power_of_two(size
));
118 * Initialize the table of pointers to null.
121 static void clear_table(hash_t
*hash
)
125 for (i
= 0; i
< hash
->nchains
; i
++)
126 hash
->table
[i
] = NULL
;
130 * Double the size of a dynamic table. This works as follows. Each chain splits
131 * into two adjacent chains. The shift amount increases by one, exposing an
132 * additional bit of each hashed key. For each node in the original chain, the
133 * value of this newly exposed bit will decide which of the two new chains will
134 * receive the node: if the bit is 1, the chain with the higher index will have
135 * the node, otherwise the lower chain will receive the node. In this manner,
136 * the hash table will continue to function exactly as before without having to
137 * rehash any of the keys.
140 * 2. The new number of chains is twice the old number of chains.
141 * 3. The new mask is one bit wider than the previous, revealing a
142 * new bit in all hashed keys.
143 * 4. Allocate a new table of chain pointers that is twice as large as the
145 * 5. If the reallocation was successful, we perform the rest of the growth
146 * algorithm, otherwise we do nothing.
147 * 6. The exposed_bit variable holds a mask with which each hashed key can be
148 * AND-ed to test the value of its newly exposed bit.
149 * 7. Now loop over each chain in the table and sort its nodes into two
150 * chains based on the value of each node's newly exposed hash bit.
151 * 8. The low chain replaces the current chain. The high chain goes
152 * into the corresponding sister chain in the upper half of the table.
153 * 9. We have finished dealing with the chains and nodes. We now update
154 * the various bookeeping fields of the hash structure.
157 static void grow_table(hash_t
*hash
)
161 assert (2 * hash
->nchains
> hash
->nchains
); /* 1 */
163 newtable
= realloc(hash
->table
,
164 sizeof *newtable
* hash
->nchains
* 2); /* 4 */
166 if (newtable
) { /* 5 */
167 hash_val_t mask
= (hash
->mask
<< 1) | 1; /* 3 */
168 hash_val_t exposed_bit
= mask
^ hash
->mask
; /* 6 */
171 assert (mask
!= hash
->mask
);
173 for (chain
= 0; chain
< hash
->nchains
; chain
++) { /* 7 */
174 hnode_t
*low_chain
= 0, *high_chain
= 0, *hptr
, *next
;
176 for (hptr
= newtable
[chain
]; hptr
!= 0; hptr
= next
) {
179 if (hptr
->hkey
& exposed_bit
) {
180 hptr
->next
= high_chain
;
183 hptr
->next
= low_chain
;
188 newtable
[chain
] = low_chain
; /* 8 */
189 newtable
[chain
+ hash
->nchains
] = high_chain
;
192 hash
->table
= newtable
; /* 9 */
198 assert (hash_verify(hash
));
202 * Cut a table size in half. This is done by folding together adjacent chains
203 * and populating the lower half of the table with these chains. The chains are
204 * simply spliced together. Once this is done, the whole table is reallocated
205 * to a smaller object.
207 * 1. It is illegal to have a hash table with one slot. This would mean that
208 * hash->shift is equal to hash_val_t_bit, an illegal shift value.
209 * Also, other things could go wrong, such as hash->lowmark becoming zero.
210 * 2. Looping over each pair of sister chains, the low_chain is set to
211 * point to the head node of the chain in the lower half of the table,
212 * and high_chain points to the head node of the sister in the upper half.
213 * 3. The intent here is to compute a pointer to the last node of the
214 * lower chain into the low_tail variable. If this chain is empty,
215 * low_tail ends up with a null value.
216 * 4. If the lower chain is not empty, we simply tack the upper chain onto it.
217 * If the upper chain is a null pointer, nothing happens.
218 * 5. Otherwise if the lower chain is empty but the upper one is not,
219 * If the low chain is empty, but the high chain is not, then the
220 * high chain is simply transferred to the lower half of the table.
221 * 6. Otherwise if both chains are empty, there is nothing to do.
222 * 7. All the chain pointers are in the lower half of the table now, so
223 * we reallocate it to a smaller object. This, of course, invalidates
224 * all pointer-to-pointers which reference into the table from the
225 * first node of each chain.
226 * 8. Though it's unlikely, the reallocation may fail. In this case we
227 * pretend that the table _was_ reallocated to a smaller object.
228 * 9. Finally, update the various table parameters to reflect the new size.
231 static void shrink_table(hash_t
*hash
)
233 hash_val_t chain
, nchains
;
234 hnode_t
**newtable
, *low_tail
, *low_chain
, *high_chain
;
236 assert (hash
->nchains
>= 2); /* 1 */
237 nchains
= hash
->nchains
/ 2;
239 for (chain
= 0; chain
< nchains
; chain
++) {
240 low_chain
= hash
->table
[chain
]; /* 2 */
241 high_chain
= hash
->table
[chain
+ nchains
];
242 for (low_tail
= low_chain
; low_tail
&& low_tail
->next
; low_tail
= low_tail
->next
)
244 if (low_chain
!= 0) /* 4 */
245 low_tail
->next
= high_chain
;
246 else if (high_chain
!= 0) /* 5 */
247 hash
->table
[chain
] = high_chain
;
249 assert (hash
->table
[chain
] == NULL
); /* 6 */
251 newtable
= realloc(hash
->table
,
252 sizeof *newtable
* nchains
); /* 7 */
253 if (newtable
) /* 8 */
254 hash
->table
= newtable
;
255 hash
->mask
>>= 1; /* 9 */
256 hash
->nchains
= nchains
;
259 assert (hash_verify(hash
));
264 * Create a dynamic hash table. Both the hash table structure and the table
265 * itself are dynamically allocated. Furthermore, the table is extendible in
266 * that it will automatically grow as its load factor increases beyond a
269 * 1. If the number of bits in the hash_val_t type has not been computed yet,
270 * we do so here, because this is likely to be the first function that the
272 * 2. Allocate a hash table control structure.
273 * 3. If a hash table control structure is successfully allocated, we
274 * proceed to initialize it. Otherwise we return a null pointer.
275 * 4. We try to allocate the table of hash chains.
276 * 5. If we were able to allocate the hash chain table, we can finish
277 * initializing the hash structure and the table. Otherwise, we must
278 * backtrack by freeing the hash structure.
279 * 6. INIT_SIZE should be a power of two. The high and low marks are always set
280 * to be twice the table size and half the table size respectively. When the
281 * number of nodes in the table grows beyond the high size (beyond load
282 * factor 2), it will double in size to cut the load factor down to about
283 * about 1. If the table shrinks down to or beneath load factor 0.5,
284 * it will shrink, bringing the load up to about 1. However, the table
285 * will never shrink beneath INIT_SIZE even if it's emptied.
286 * 7. This indicates that the table is dynamically allocated and dynamically
287 * resized on the fly. A table that has this value set to zero is
288 * assumed to be statically allocated and will not be resized.
289 * 8. The table of chains must be properly reset to all null pointers.
292 hash_t
*hash_create(hashcount_t maxcount
, hash_comp_t compfun
,
297 if (hash_val_t_bit
== 0) /* 1 */
300 hash
= malloc(sizeof *hash
); /* 2 */
303 hash
->table
= malloc(sizeof *hash
->table
* INIT_SIZE
); /* 4 */
304 if (hash
->table
) { /* 5 */
305 hash
->nchains
= INIT_SIZE
; /* 6 */
306 hash
->highmark
= INIT_SIZE
* 2;
307 hash
->lowmark
= INIT_SIZE
/ 2;
309 hash
->maxcount
= maxcount
;
310 hash
->compare
= compfun
? compfun
: hash_comp_default
;
311 hash
->function
= hashfun
? hashfun
: hash_fun_default
;
312 hash
->allocnode
= hnode_alloc
;
313 hash
->freenode
= hnode_free
;
314 hash
->context
= NULL
;
315 hash
->mask
= INIT_MASK
;
316 hash
->dynamic
= 1; /* 7 */
317 clear_table(hash
); /* 8 */
318 assert (hash_verify(hash
));
328 * Select a different set of node allocator routines.
331 void hash_set_allocator(hash_t
*hash
, hnode_alloc_t al
,
332 hnode_free_t fr
, void *context
)
334 assert (hash_count(hash
) == 0);
335 assert ((al
== 0 && fr
== 0) || (al
!= 0 && fr
!= 0));
337 hash
->allocnode
= al
? al
: hnode_alloc
;
338 hash
->freenode
= fr
? fr
: hnode_free
;
339 hash
->context
= context
;
343 * Free every node in the hash using the hash->freenode() function pointer, and
344 * cause the hash to become empty.
347 void hash_free_nodes(hash_t
*hash
)
351 hash_scan_begin(&hs
, hash
);
352 while ((node
= hash_scan_next(&hs
))) {
353 hash_scan_delete(hash
, node
);
354 hash
->freenode(node
, hash
->context
);
361 * Obsolescent function for removing all nodes from a table,
362 * freeing them and then freeing the table all in one step.
365 void hash_free(hash_t
*hash
)
367 #ifdef KAZLIB_OBSOLESCENT_DEBUG
368 assert ("call to obsolescent function hash_free()" && 0);
370 hash_free_nodes(hash
);
375 * Free a dynamic hash table structure.
378 void hash_destroy(hash_t
*hash
)
380 assert (hash_val_t_bit
!= 0);
381 assert (hash_isempty(hash
));
387 * Initialize a user supplied hash structure. The user also supplies a table of
388 * chains which is assigned to the hash structure. The table is static---it
389 * will not grow or shrink.
390 * 1. See note 1. in hash_create().
391 * 2. The user supplied array of pointers hopefully contains nchains nodes.
392 * 3. See note 7. in hash_create().
393 * 4. We must dynamically compute the mask from the given power of two table
395 * 5. The user supplied table can't be assumed to contain null pointers,
396 * so we reset it here.
399 hash_t
*hash_init(hash_t
*hash
, hashcount_t maxcount
,
400 hash_comp_t compfun
, hash_fun_t hashfun
, hnode_t
**table
,
403 if (hash_val_t_bit
== 0) /* 1 */
406 assert (is_power_of_two(nchains
));
408 hash
->table
= table
; /* 2 */
409 hash
->nchains
= nchains
;
411 hash
->maxcount
= maxcount
;
412 hash
->compare
= compfun
? compfun
: hash_comp_default
;
413 hash
->function
= hashfun
? hashfun
: hash_fun_default
;
414 hash
->dynamic
= 0; /* 3 */
415 hash
->mask
= compute_mask(nchains
); /* 4 */
416 clear_table(hash
); /* 5 */
418 assert (hash_verify(hash
));
424 * Reset the hash scanner so that the next element retrieved by
425 * hash_scan_next() shall be the first element on the first non-empty chain.
427 * 1. Locate the first non empty chain.
428 * 2. If an empty chain is found, remember which one it is and set the next
429 * pointer to refer to its first element.
430 * 3. Otherwise if a chain is not found, set the next pointer to NULL
431 * so that hash_scan_next() shall indicate failure.
434 void hash_scan_begin(hscan_t
*scan
, hash_t
*hash
)
436 hash_val_t nchains
= hash
->nchains
;
443 for (chain
= 0; chain
< nchains
&& hash
->table
[chain
] == 0; chain
++)
446 if (chain
< nchains
) { /* 2 */
448 scan
->next
= hash
->table
[chain
];
455 * Retrieve the next node from the hash table, and update the pointer
456 * for the next invocation of hash_scan_next().
458 * 1. Remember the next pointer in a temporary value so that it can be
460 * 2. This assertion essentially checks whether the module has been properly
461 * initialized. The first point of interaction with the module should be
462 * either hash_create() or hash_init(), both of which set hash_val_t_bit to
464 * 3. If the next pointer we are returning is not NULL, then the user is
465 * allowed to call hash_scan_next() again. We prepare the new next pointer
466 * for that call right now. That way the user is allowed to delete the node
467 * we are about to return, since we will no longer be needing it to locate
469 * 4. If there is a next node in the chain (next->next), then that becomes the
470 * new next node, otherwise ...
471 * 5. We have exhausted the current chain, and must locate the next subsequent
472 * non-empty chain in the table.
473 * 6. If a non-empty chain is found, the first element of that chain becomes
474 * the new next node. Otherwise there is no new next node and we set the
475 * pointer to NULL so that the next time hash_scan_next() is called, a null
476 * pointer shall be immediately returned.
480 hnode_t
*hash_scan_next(hscan_t
*scan
)
482 hnode_t
*next
= scan
->next
; /* 1 */
483 hash_t
*hash
= scan
->table
;
484 hash_val_t chain
= scan
->chain
+ 1;
485 hash_val_t nchains
= hash
->nchains
;
487 assert (hash_val_t_bit
!= 0); /* 2 */
490 if (next
->next
) { /* 4 */
491 scan
->next
= next
->next
;
493 while (chain
< nchains
&& hash
->table
[chain
] == 0) /* 5 */
495 if (chain
< nchains
) { /* 6 */
497 scan
->next
= hash
->table
[chain
];
507 * Insert a node into the hash table.
509 * 1. It's illegal to insert more than the maximum number of nodes. The client
510 * should verify that the hash table is not full before attempting an
512 * 2. The same key may not be inserted into a table twice.
513 * 3. If the table is dynamic and the load factor is already at >= 2,
515 * 4. We take the bottom N bits of the hash value to derive the chain index,
516 * where N is the base 2 logarithm of the size of the hash table.
519 void hash_insert(hash_t
*hash
, hnode_t
*node
, const void *key
)
521 hash_val_t hkey
, chain
;
523 assert (hash_val_t_bit
!= 0);
524 assert (node
->next
== NULL
);
525 assert (hash
->nodecount
< hash
->maxcount
); /* 1 */
526 assert (hash_lookup(hash
, key
) == NULL
); /* 2 */
528 if (hash
->dynamic
&& hash
->nodecount
>= hash
->highmark
) /* 3 */
531 hkey
= hash
->function(key
);
532 chain
= hkey
& hash
->mask
; /* 4 */
536 node
->next
= hash
->table
[chain
];
537 hash
->table
[chain
] = node
;
540 assert (hash_verify(hash
));
544 * Find a node in the hash table and return a pointer to it.
546 * 1. We hash the key and keep the entire hash value. As an optimization, when
547 * we descend down the chain, we can compare hash values first and only if
548 * hash values match do we perform a full key comparison.
549 * 2. To locate the chain from among 2^N chains, we look at the lower N bits of
550 * the hash value by anding them with the current mask.
551 * 3. Looping through the chain, we compare the stored hash value inside each
552 * node against our computed hash. If they match, then we do a full
553 * comparison between the unhashed keys. If these match, we have located the
557 hnode_t
*hash_lookup(hash_t
*hash
, const void *key
)
559 hash_val_t hkey
, chain
;
562 hkey
= hash
->function(key
); /* 1 */
563 chain
= hkey
& hash
->mask
; /* 2 */
565 for (nptr
= hash
->table
[chain
]; nptr
; nptr
= nptr
->next
) { /* 3 */
566 if (nptr
->hkey
== hkey
&& hash
->compare(nptr
->key
, key
) == 0)
574 * Delete the given node from the hash table. Since the chains
575 * are singly linked, we must locate the start of the node's chain
578 * 1. The node must belong to this hash table, and its key must not have
579 * been tampered with.
580 * 2. If this deletion will take the node count below the low mark, we
581 * shrink the table now.
582 * 3. Determine which chain the node belongs to, and fetch the pointer
583 * to the first node in this chain.
584 * 4. If the node being deleted is the first node in the chain, then
585 * simply update the chain head pointer.
586 * 5. Otherwise advance to the node's predecessor, and splice out
587 * by updating the predecessor's next pointer.
588 * 6. Indicate that the node is no longer in a hash table.
591 hnode_t
*hash_delete(hash_t
*hash
, hnode_t
*node
)
596 assert (hash_lookup(hash
, node
->key
) == node
); /* 1 */
597 assert (hash_val_t_bit
!= 0);
599 if (hash
->dynamic
&& hash
->nodecount
<= hash
->lowmark
600 && hash
->nodecount
> INIT_SIZE
)
601 shrink_table(hash
); /* 2 */
603 chain
= node
->hkey
& hash
->mask
; /* 3 */
604 hptr
= hash
->table
[chain
];
606 if (hptr
== node
) { /* 4 */
607 hash
->table
[chain
] = node
->next
;
609 while (hptr
->next
!= node
) { /* 5 */
613 assert (hptr
->next
== node
);
614 hptr
->next
= node
->next
;
618 assert (hash_verify(hash
));
620 node
->next
= NULL
; /* 6 */
624 int hash_alloc_insert(hash_t
*hash
, const void *key
, void *data
)
626 hnode_t
*node
= hash
->allocnode(hash
->context
);
629 hnode_init(node
, data
);
630 hash_insert(hash
, node
, key
);
636 void hash_delete_free(hash_t
*hash
, hnode_t
*node
)
638 hash_delete(hash
, node
);
639 hash
->freenode(node
, hash
->context
);
643 * Exactly like hash_delete, except does not trigger table shrinkage. This is to be
644 * used from within a hash table scan operation. See notes for hash_delete.
647 hnode_t
*hash_scan_delete(hash_t
*hash
, hnode_t
*node
)
652 assert (hash_lookup(hash
, node
->key
) == node
);
653 assert (hash_val_t_bit
!= 0);
655 chain
= node
->hkey
& hash
->mask
;
656 hptr
= hash
->table
[chain
];
659 hash
->table
[chain
] = node
->next
;
661 while (hptr
->next
!= node
)
663 hptr
->next
= node
->next
;
667 assert (hash_verify(hash
));
674 * Like hash_delete_free but based on hash_scan_delete.
677 void hash_scan_delfree(hash_t
*hash
, hnode_t
*node
)
679 hash_scan_delete(hash
, node
);
680 hash
->freenode(node
, hash
->context
);
684 * Verify whether the given object is a valid hash table. This means
686 * 1. If the hash table is dynamic, verify whether the high and
687 * low expansion/shrinkage thresholds are powers of two.
688 * 2. Count all nodes in the table, and test each hash value
689 * to see whether it is correct for the node's chain.
692 int hash_verify(hash_t
*hash
)
694 hashcount_t count
= 0;
698 if (hash
->dynamic
) { /* 1 */
699 if (hash
->lowmark
>= hash
->highmark
)
701 if (!is_power_of_two(hash
->highmark
))
703 if (!is_power_of_two(hash
->lowmark
))
707 for (chain
= 0; chain
< hash
->nchains
; chain
++) { /* 2 */
708 for (hptr
= hash
->table
[chain
]; hptr
!= 0; hptr
= hptr
->next
) {
709 if ((hptr
->hkey
& hash
->mask
) != chain
)
715 if (count
!= hash
->nodecount
)
722 * Test whether the hash table is full and return 1 if this is true,
727 int hash_isfull(hash_t
*hash
)
729 return hash
->nodecount
== hash
->maxcount
;
733 * Test whether the hash table is empty and return 1 if this is true,
738 int hash_isempty(hash_t
*hash
)
740 return hash
->nodecount
== 0;
743 static hnode_t
*hnode_alloc(void *context
)
745 return malloc(sizeof *hnode_alloc(NULL
));
748 static void hnode_free(hnode_t
*node
, void *context
)
755 * Create a hash table node dynamically and assign it the given data.
758 hnode_t
*hnode_create(void *data
)
760 hnode_t
*node
= malloc(sizeof *node
);
769 * Initialize a client-supplied node
772 hnode_t
*hnode_init(hnode_t
*hnode
, void *data
)
780 * Destroy a dynamically allocated node.
783 void hnode_destroy(hnode_t
*hnode
)
789 void hnode_put(hnode_t
*node
, void *data
)
795 void *hnode_get(hnode_t
*node
)
801 const void *hnode_getkey(hnode_t
*node
)
807 hashcount_t
hash_count(hash_t
*hash
)
809 return hash
->nodecount
;
813 hashcount_t
hash_size(hash_t
*hash
)
815 return hash
->nchains
;
818 static hash_val_t
hash_fun_default(const void *key
)
820 static unsigned long randbox
[] = {
821 0x49848f1bU
, 0xe6255dbaU
, 0x36da5bdcU
, 0x47bf94e9U
,
822 0x8cbcce22U
, 0x559fc06aU
, 0xd268f536U
, 0xe10af79aU
,
823 0xc1af4d69U
, 0x1d2917b5U
, 0xec4c304dU
, 0x9ee5016cU
,
824 0x69232f74U
, 0xfead7bb3U
, 0xe9089ab6U
, 0xf012f6aeU
,
827 const unsigned char *str
= key
;
831 acc
^= randbox
[(*str
+ acc
) & 0xf];
832 acc
= (acc
<< 1) | (acc
>> 31);
834 acc
^= randbox
[((*str
++ >> 4) + acc
) & 0xf];
835 acc
= (acc
<< 2) | (acc
>> 30);
841 static int hash_comp_default(const void *key1
, const void *key2
)
843 return strcmp(key1
, key2
);