1 /* Copyright (c) 2002, Christopher Clark.
2 * Copyright (c) 2005-2006, Nick Mathewson.
3 * Copyright (c) 2007-2018, The Tor Project, Inc. */
4 /* See license at end. */
6 /* Based on ideas by Christopher Clark and interfaces from Niels Provos. */
9 These macros provide an intrustive implementation for a typesafe chaining
10 hash table, loosely based on the BSD tree.h and queue.h macros. Here's
13 First, pick a the structure that you'll be storing in the hashtable. Let's
14 say that's "struct dinosaur". To this structure, you add an HT_ENTRY()
18 HT_ENTRY(dinosaur) node; // The name inside the () must match the
21 // These are just fields from the dinosaur structure...
29 You can declare the hashtable itself as:
31 HT_HEAD(dinosaur_ht, dinosaur);
33 This declares a new 'struct dinosaur_ht' type.
35 Now you need to declare two functions to help implement the hashtable: one
36 compares two dinosaurs for equality, and one computes the hash of a
37 dinosaur. Let's say that two dinosaurs are equal if they have the same ID
41 dinosaurs_equal(const struct dinosaur *d1, const struct dinosaur *d2)
43 return d1->dinosaur_id == d2->dinosaur_id &&
44 0 == strcmp(d1->name, d2->name);
48 dinosaur_hash(const struct dinosaur *d)
50 // This is a very bad hash function. Use siphash24g instead.
51 return (d->dinosaur_id + d->name[0] ) * 1337 + d->name[1] * 1337;
54 Now you'll need to declare the functions that manipulate the hash table.
55 To do this, you put this declaration either in a header file, or inside
56 a regular module, depending on what visibility you want.
58 HT_PROTOTYPE(dinosaur_ht, // The name of the hashtable struct
59 dinosaur, // The name of the element struct,
60 node, // The name of HT_ENTRY member
61 dinosaur_hash, dinosaurs_equal);
63 Later, inside a C function, you use this macro to declare the hashtable
66 HT_GENERATE2(dinosaur_ht, dinosaur, node, dinosaur_hash, dinosaurs_equal,
67 0.6, tor_reallocarray, tor_free_);
69 Note the use of tor_free_, not tor_free. The 0.6 is magic.
71 Now you can use the hashtable! You can initialize one with
73 struct dinosaur_ht my_dinos = HT_INITIALIZER();
75 Or create one in core with
77 struct dinosaur_ht *dinos = tor_malloc(sizeof(dinosaur_ht));
78 HT_INIT(dinosaur_ht, dinos);
80 To the hashtable, you use the HT_FOO(dinosaur_ht, ...) macros. For
81 example, to put new_dino into dinos, you say:
83 HT_REPLACE(dinosaur_ht, dinos, new_dino);
85 If you're searching for an element, you need to use a dummy 'key' element in
86 the search. For example.
88 struct dinosaur dino_key;
89 dino_key.dinosaur_id = 12345;
90 dino_key.name = tor_strdup("Atrociraptor");
92 struct dinosaur *found = HT_FIND(dinosaurs_ht, dinos, &dino_key);
94 Have fun with your hash table!
98 #ifndef HT_H_INCLUDED_
99 #define HT_H_INCLUDED_
101 #define HT_HEAD(name, type) \
103 /* The hash table itself. */ \
104 struct type **hth_table; \
105 /* How long is the hash table? */ \
106 unsigned hth_table_length; \
107 /* How many elements does the table contain? */ \
108 unsigned hth_n_entries; \
109 /* How many elements will we allow in the table before resizing it? */ \
110 unsigned hth_load_limit; \
111 /* Position of hth_table_length in the primes table. */ \
115 #define HT_INITIALIZER() \
116 { NULL, 0, 0, 0, -1 }
118 #ifdef HT_NO_CACHE_HASH_VALUES
119 #define HT_ENTRY(type) \
121 struct type *hte_next; \
124 #define HT_ENTRY(type) \
126 struct type *hte_next; \
131 /* || 0 is for -Wparentheses-equality (-Wall?) appeasement under clang */
132 #define HT_EMPTY(head) \
133 (((head)->hth_n_entries == 0) || 0)
135 /* How many elements in 'head'? */
136 #define HT_SIZE(head) \
137 ((head)->hth_n_entries)
139 /* Return memory usage for a hashtable (not counting the entries themselves) */
140 #define HT_MEM_USAGE(head) \
141 (sizeof(*head) + (head)->hth_table_length * sizeof(void*))
143 #define HT_FIND(name, head, elm) name##_HT_FIND((head), (elm))
144 #define HT_INSERT(name, head, elm) name##_HT_INSERT((head), (elm))
145 #define HT_REPLACE(name, head, elm) name##_HT_REPLACE((head), (elm))
146 #define HT_REMOVE(name, head, elm) name##_HT_REMOVE((head), (elm))
147 #define HT_START(name, head) name##_HT_START(head)
148 #define HT_NEXT(name, head, elm) name##_HT_NEXT((head), (elm))
149 #define HT_NEXT_RMV(name, head, elm) name##_HT_NEXT_RMV((head), (elm))
150 #define HT_CLEAR(name, head) name##_HT_CLEAR(head)
151 #define HT_INIT(name, head) name##_HT_INIT(head)
152 #define HT_REP_IS_BAD_(name, head) name##_HT_REP_IS_BAD_(head)
153 #define HT_FOREACH_FN(name, head, fn, data) \
154 name##_HT_FOREACH_FN((head), (fn), (data))
156 static inline unsigned
157 ht_improve_hash(unsigned h
)
159 /* Aim to protect against poor hash functions by adding logic here
160 * - logic taken from java 1.4 hashtable source */
162 h
^= ((h
>> 14) | (h
<< 18)); /* >>> */
164 h
^= ((h
>> 10) | (h
<< 22)); /* >>> */
169 /** Basic string hash function, from Java standard String.hashCode(). */
170 static inline unsigned
171 ht_string_hash(const char *s
)
176 h
+= ((signed char)*s
++)*m
;
177 m
= (m
<<5)-1; /* m *= 31 */
184 /** Basic string hash function, from Python's str.__hash__() */
185 static inline unsigned
186 ht_string_hash(const char *s
)
189 const unsigned char *cp
= (const unsigned char *)s
;
192 h
= (1000003*h
) ^ *cp
++;
194 /* This conversion truncates the length of the string, but that's ok. */
195 h
^= (unsigned)(cp
-(const unsigned char*)s
);
200 #ifndef HT_NO_CACHE_HASH_VALUES
201 #define HT_SET_HASH_(elm, field, hashfn) \
202 do { (elm)->field.hte_hash = hashfn(elm); } while (0)
203 #define HT_SET_HASHVAL_(elm, field, val) \
204 do { (elm)->field.hte_hash = (val); } while (0)
205 #define HT_ELT_HASH_(elm, field, hashfn) \
206 ((elm)->field.hte_hash)
208 #define HT_SET_HASH_(elm, field, hashfn) \
210 #define HT_ELT_HASH_(elm, field, hashfn) \
212 #define HT_SET_HASHVAL_(elm, field, val) \
216 #define HT_BUCKET_NUM_(head, field, elm, hashfn) \
217 (HT_ELT_HASH_(elm,field,hashfn) % head->hth_table_length)
219 /* Helper: alias for the bucket containing 'elm'. */
220 #define HT_BUCKET_(head, field, elm, hashfn) \
221 ((head)->hth_table[HT_BUCKET_NUM_(head, field, elm, hashfn)])
223 #define HT_FOREACH(x, name, head) \
224 for ((x) = HT_START(name, head); \
226 (x) = HT_NEXT(name, head, x))
229 #define HT_ASSERT_(x) tor_assert(x)
231 #define HT_ASSERT_(x) (void)0
234 #define HT_PROTOTYPE(name, type, field, hashfn, eqfn) \
235 int name##_HT_GROW(struct name *ht, unsigned min_capacity); \
236 void name##_HT_CLEAR(struct name *ht); \
237 int name##_HT_REP_IS_BAD_(const struct name *ht); \
239 name##_HT_INIT(struct name *head) { \
240 head->hth_table_length = 0; \
241 head->hth_table = NULL; \
242 head->hth_n_entries = 0; \
243 head->hth_load_limit = 0; \
244 head->hth_prime_idx = -1; \
246 /* Helper: returns a pointer to the right location in the table \
247 * 'head' to find or insert the element 'elm'. */ \
248 static inline struct type ** \
249 name##_HT_FIND_P_(struct name *head, struct type *elm) \
252 if (!head->hth_table) \
254 p = &HT_BUCKET_(head, field, elm, hashfn); \
258 p = &(*p)->field.hte_next; \
262 /* Return a pointer to the element in the table 'head' matching 'elm', \
263 * or NULL if no such element exists */ \
264 ATTR_UNUSED static inline struct type * \
265 name##_HT_FIND(const struct name *head, struct type *elm) \
268 struct name *h = (struct name *) head; \
269 HT_SET_HASH_(elm, field, hashfn); \
270 p = name##_HT_FIND_P_(h, elm); \
271 return p ? *p : NULL; \
273 /* Insert the element 'elm' into the table 'head'. Do not call this \
274 * function if the table might already contain a matching element. */ \
275 ATTR_UNUSED static inline void \
276 name##_HT_INSERT(struct name *head, struct type *elm) \
279 if (!head->hth_table || head->hth_n_entries >= head->hth_load_limit) \
280 name##_HT_GROW(head, head->hth_n_entries+1); \
281 ++head->hth_n_entries; \
282 HT_SET_HASH_(elm, field, hashfn); \
283 p = &HT_BUCKET_(head, field, elm, hashfn); \
284 elm->field.hte_next = *p; \
287 /* Insert the element 'elm' into the table 'head'. If there already \
288 * a matching element in the table, replace that element and return \
290 ATTR_UNUSED static inline struct type * \
291 name##_HT_REPLACE(struct name *head, struct type *elm) \
293 struct type **p, *r; \
294 if (!head->hth_table || head->hth_n_entries >= head->hth_load_limit) \
295 name##_HT_GROW(head, head->hth_n_entries+1); \
296 HT_SET_HASH_(elm, field, hashfn); \
297 p = name##_HT_FIND_P_(head, elm); \
298 HT_ASSERT_(p != NULL); /* this holds because we called HT_GROW */ \
301 if (r && (r!=elm)) { \
302 elm->field.hte_next = r->field.hte_next; \
303 r->field.hte_next = NULL; \
306 ++head->hth_n_entries; \
310 /* Remove any element matching 'elm' from the table 'head'. If such \
311 * an element is found, return it; otherwise return NULL. */ \
312 ATTR_UNUSED static inline struct type * \
313 name##_HT_REMOVE(struct name *head, struct type *elm) \
315 struct type **p, *r; \
316 HT_SET_HASH_(elm, field, hashfn); \
317 p = name##_HT_FIND_P_(head,elm); \
321 *p = r->field.hte_next; \
322 r->field.hte_next = NULL; \
323 --head->hth_n_entries; \
326 /* Invoke the function 'fn' on every element of the table 'head', \
327 * using 'data' as its second argument. If the function returns \
328 * nonzero, remove the most recently examined element before invoking \
329 * the function again. */ \
330 ATTR_UNUSED static inline void \
331 name##_HT_FOREACH_FN(struct name *head, \
332 int (*fn)(struct type *, void *), \
336 struct type **p, **nextp, *next; \
337 if (!head->hth_table) \
339 for (idx=0; idx < head->hth_table_length; ++idx) { \
340 p = &head->hth_table[idx]; \
342 nextp = &(*p)->field.hte_next; \
344 if (fn(*p, data)) { \
345 --head->hth_n_entries; \
353 /* Return a pointer to the first element in the table 'head', under \
354 * an arbitrary order. This order is stable under remove operations, \
355 * but not under others. If the table is empty, return NULL. */ \
356 ATTR_UNUSED static inline struct type ** \
357 name##_HT_START(struct name *head) \
360 while (b < head->hth_table_length) { \
361 if (head->hth_table[b]) { \
363 HT_BUCKET_NUM_(head,field,head->hth_table[b],hashfn)); \
364 return &head->hth_table[b]; \
370 /* Return the next element in 'head' after 'elm', under the arbitrary \
371 * order used by HT_START. If there are no more elements, return \
372 * NULL. If 'elm' is to be removed from the table, you must call \
373 * this function for the next value before you remove it. \
375 ATTR_UNUSED static inline struct type ** \
376 name##_HT_NEXT(struct name *head, struct type **elm) \
378 if ((*elm)->field.hte_next) { \
379 HT_ASSERT_(HT_BUCKET_NUM_(head,field,*elm,hashfn) == \
380 HT_BUCKET_NUM_(head,field,(*elm)->field.hte_next,hashfn)); \
381 return &(*elm)->field.hte_next; \
383 unsigned b = HT_BUCKET_NUM_(head,field,*elm,hashfn)+1; \
384 while (b < head->hth_table_length) { \
385 if (head->hth_table[b]) { \
387 HT_BUCKET_NUM_(head,field,head->hth_table[b],hashfn)); \
388 return &head->hth_table[b]; \
395 ATTR_UNUSED static inline struct type ** \
396 name##_HT_NEXT_RMV(struct name *head, struct type **elm) \
398 unsigned h = HT_ELT_HASH_(*elm, field, hashfn); \
399 *elm = (*elm)->field.hte_next; \
400 --head->hth_n_entries; \
404 unsigned b = (h % head->hth_table_length)+1; \
405 while (b < head->hth_table_length) { \
406 if (head->hth_table[b]) \
407 return &head->hth_table[b]; \
414 #define HT_GENERATE2(name, type, field, hashfn, eqfn, load, reallocarrayfn, \
416 /* Primes that aren't too far from powers of two. We stop at */ \
417 /* P=402653189 because P*sizeof(void*) is less than SSIZE_MAX */ \
418 /* even on a 32-bit platform. */ \
419 static unsigned name##_PRIMES[] = { \
421 769, 1543, 3079, 6151, \
422 12289, 24593, 49157, 98317, \
423 196613, 393241, 786433, 1572869, \
424 3145739, 6291469, 12582917, 25165843, \
425 50331653, 100663319, 201326611, 402653189 \
427 static unsigned name##_N_PRIMES = \
428 (unsigned)(sizeof(name##_PRIMES)/sizeof(name##_PRIMES[0])); \
429 /* Expand the internal table of 'head' until it is large enough to \
430 * hold 'size' elements. Return 0 on success, -1 on allocation \
433 name##_HT_GROW(struct name *head, unsigned size) \
435 unsigned new_len, new_load_limit; \
437 struct type **new_table; \
438 if (head->hth_prime_idx == (int)name##_N_PRIMES - 1) \
440 if (head->hth_load_limit > size) \
442 prime_idx = head->hth_prime_idx; \
444 new_len = name##_PRIMES[++prime_idx]; \
445 new_load_limit = (unsigned)(load*new_len); \
446 } while (new_load_limit <= size && \
447 prime_idx < (int)name##_N_PRIMES); \
448 if ((new_table = reallocarrayfn(NULL, new_len, sizeof(struct type*)))) { \
450 memset(new_table, 0, new_len*sizeof(struct type*)); \
451 for (b = 0; b < head->hth_table_length; ++b) { \
452 struct type *elm, *next; \
454 elm = head->hth_table[b]; \
456 next = elm->field.hte_next; \
457 b2 = HT_ELT_HASH_(elm, field, hashfn) % new_len; \
458 elm->field.hte_next = new_table[b2]; \
459 new_table[b2] = elm; \
463 if (head->hth_table) \
464 freefn(head->hth_table); \
465 head->hth_table = new_table; \
468 new_table = reallocarrayfn(head->hth_table, new_len, sizeof(struct type*)); \
469 if (!new_table) return -1; \
470 memset(new_table + head->hth_table_length, 0, \
471 (new_len - head->hth_table_length)*sizeof(struct type*)); \
472 for (b=0; b < head->hth_table_length; ++b) { \
473 struct type *e, **pE; \
474 for (pE = &new_table[b], e = *pE; e != NULL; e = *pE) { \
475 b2 = HT_ELT_HASH_(e, field, hashfn) % new_len; \
477 pE = &e->field.hte_next; \
479 *pE = e->field.hte_next; \
480 e->field.hte_next = new_table[b2]; \
485 head->hth_table = new_table; \
487 head->hth_table_length = new_len; \
488 head->hth_prime_idx = prime_idx; \
489 head->hth_load_limit = new_load_limit; \
492 /* Free all storage held by 'head'. Does not free 'head' itself, or \
493 * individual elements. */ \
495 name##_HT_CLEAR(struct name *head) \
497 if (head->hth_table) \
498 freefn(head->hth_table); \
499 head->hth_table_length = 0; \
500 name##_HT_INIT(head); \
502 /* Debugging helper: return false iff the representation of 'head' is \
503 * internally consistent. */ \
505 name##_HT_REP_IS_BAD_(const struct name *head) \
509 if (!head->hth_table_length) { \
510 if (!head->hth_table && !head->hth_n_entries && \
511 !head->hth_load_limit && head->hth_prime_idx == -1) \
516 if (!head->hth_table || head->hth_prime_idx < 0 || \
517 !head->hth_load_limit) \
519 if (head->hth_n_entries > head->hth_load_limit) \
521 if (head->hth_table_length != name##_PRIMES[head->hth_prime_idx]) \
523 if (head->hth_load_limit != (unsigned)(load*head->hth_table_length)) \
525 for (n = i = 0; i < head->hth_table_length; ++i) { \
526 for (elm = head->hth_table[i]; elm; elm = elm->field.hte_next) { \
527 if (HT_ELT_HASH_(elm, field, hashfn) != hashfn(elm)) \
529 if (HT_BUCKET_NUM_(head,field,elm,hashfn) != i) \
534 if (n != head->hth_n_entries) \
539 #define HT_GENERATE(name, type, field, hashfn, eqfn, load, mallocfn, \
542 name##_reallocarray(void *arg, size_t a, size_t b) \
544 if ((b) && (a) > SIZE_MAX / (b)) \
547 return reallocfn((arg),(a)*(b)); \
549 return mallocfn((a)*(b)); \
551 HT_GENERATE2(name, type, field, hashfn, eqfn, load, \
552 name##_reallocarray, freefn)
554 /** Implements an over-optimized "find and insert if absent" block;
555 * not meant for direct usage by typical code, or usage outside the critical
557 #define HT_FIND_OR_INSERT_(name, field, hashfn, head, eltype, elm, var, y, n) \
559 struct name *var##_head_ = head; \
560 struct eltype **var; \
561 if (!var##_head_->hth_table || \
562 var##_head_->hth_n_entries >= var##_head_->hth_load_limit) \
563 name##_HT_GROW(var##_head_, var##_head_->hth_n_entries+1); \
564 HT_SET_HASH_((elm), field, hashfn); \
565 var = name##_HT_FIND_P_(var##_head_, (elm)); \
566 HT_ASSERT_(var); /* Holds because we called HT_GROW */ \
573 #define HT_FOI_INSERT_(field, head, elm, newent, var) \
575 HT_SET_HASHVAL_(newent, field, (elm)->field.hte_hash); \
576 newent->field.hte_next = NULL; \
578 ++((head)->hth_n_entries); \
582 * Copyright 2005, Nick Mathewson. Implementation logic is adapted from code
583 * by Christopher Clark, retrofit to allow drop-in memory management, and to
584 * use the same interface as Niels Provos's tree.h. This is probably still
585 * a derived work, so the original license below still applies.
587 * Copyright (c) 2002, Christopher Clark
588 * All rights reserved.
590 * Redistribution and use in source and binary forms, with or without
591 * modification, are permitted provided that the following conditions
594 * * Redistributions of source code must retain the above copyright
595 * notice, this list of conditions and the following disclaimer.
597 * * Redistributions in binary form must reproduce the above copyright
598 * notice, this list of conditions and the following disclaimer in the
599 * documentation and/or other materials provided with the distribution.
601 * * Neither the name of the original author; nor the names of any contributors
602 * may be used to endorse or promote products derived from this software
603 * without specific prior written permission.
606 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
607 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
608 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
609 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
610 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
611 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
612 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
613 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
614 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
615 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
616 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.