Merge branch 'tor-github/pr/487' into maint-0.3.5
[tor.git] / src / ext / ht.h
blobdf9f60ba1d42a3b615ba7e7dae691a436c86895d
1 /* Copyright (c) 2002, Christopher Clark.
2 * Copyright (c) 2005-2006, Nick Mathewson.
3 * Copyright (c) 2007-2018, The Tor Project, Inc. */
4 /* See license at end. */
6 /* Based on ideas by Christopher Clark and interfaces from Niels Provos. */
8 /*
9 These macros provide an intrustive implementation for a typesafe chaining
10 hash table, loosely based on the BSD tree.h and queue.h macros. Here's
11 how to use them.
13 First, pick a the structure that you'll be storing in the hashtable. Let's
14 say that's "struct dinosaur". To this structure, you add an HT_ENTRY()
15 member, as such:
17 struct dinosaur {
18 HT_ENTRY(dinosaur) node; // The name inside the () must match the
19 // struct.
21 // These are just fields from the dinosaur structure...
22 long dinosaur_id;
23 char *name;
24 long age;
25 int is_ornithischian;
26 int is_herbivorous;
29 You can declare the hashtable itself as:
31 HT_HEAD(dinosaur_ht, dinosaur);
33 This declares a new 'struct dinosaur_ht' type.
35 Now you need to declare two functions to help implement the hashtable: one
36 compares two dinosaurs for equality, and one computes the hash of a
37 dinosaur. Let's say that two dinosaurs are equal if they have the same ID
38 and name.
40 int
41 dinosaurs_equal(const struct dinosaur *d1, const struct dinosaur *d2)
43 return d1->dinosaur_id == d2->dinosaur_id &&
44 0 == strcmp(d1->name, d2->name);
47 unsigned
48 dinosaur_hash(const struct dinosaur *d)
50 // This is a very bad hash function. Use siphash24g instead.
51 return (d->dinosaur_id + d->name[0] ) * 1337 + d->name[1] * 1337;
54 Now you'll need to declare the functions that manipulate the hash table.
55 To do this, you put this declaration either in a header file, or inside
56 a regular module, depending on what visibility you want.
58 HT_PROTOTYPE(dinosaur_ht, // The name of the hashtable struct
59 dinosaur, // The name of the element struct,
60 node, // The name of HT_ENTRY member
61 dinosaur_hash, dinosaurs_equal);
63 Later, inside a C function, you use this macro to declare the hashtable
64 functions.
66 HT_GENERATE2(dinosaur_ht, dinosaur, node, dinosaur_hash, dinosaurs_equal,
67 0.6, tor_reallocarray, tor_free_);
69 Note the use of tor_free_, not tor_free. The 0.6 is magic.
71 Now you can use the hashtable! You can initialize one with
73 struct dinosaur_ht my_dinos = HT_INITIALIZER();
75 Or create one in core with
77 struct dinosaur_ht *dinos = tor_malloc(sizeof(dinosaur_ht));
78 HT_INIT(dinosaur_ht, dinos);
80 To the hashtable, you use the HT_FOO(dinosaur_ht, ...) macros. For
81 example, to put new_dino into dinos, you say:
83 HT_REPLACE(dinosaur_ht, dinos, new_dino);
85 If you're searching for an element, you need to use a dummy 'key' element in
86 the search. For example.
88 struct dinosaur dino_key;
89 dino_key.dinosaur_id = 12345;
90 dino_key.name = tor_strdup("Atrociraptor");
92 struct dinosaur *found = HT_FIND(dinosaurs_ht, dinos, &dino_key);
94 Have fun with your hash table!
98 #ifndef HT_H_INCLUDED_
99 #define HT_H_INCLUDED_
101 #define HT_HEAD(name, type) \
102 struct name { \
103 /* The hash table itself. */ \
104 struct type **hth_table; \
105 /* How long is the hash table? */ \
106 unsigned hth_table_length; \
107 /* How many elements does the table contain? */ \
108 unsigned hth_n_entries; \
109 /* How many elements will we allow in the table before resizing it? */ \
110 unsigned hth_load_limit; \
111 /* Position of hth_table_length in the primes table. */ \
112 int hth_prime_idx; \
115 #define HT_INITIALIZER() \
116 { NULL, 0, 0, 0, -1 }
118 #ifdef HT_NO_CACHE_HASH_VALUES
119 #define HT_ENTRY(type) \
120 struct { \
121 struct type *hte_next; \
123 #else
124 #define HT_ENTRY(type) \
125 struct { \
126 struct type *hte_next; \
127 unsigned hte_hash; \
129 #endif
131 /* || 0 is for -Wparentheses-equality (-Wall?) appeasement under clang */
132 #define HT_EMPTY(head) \
133 (((head)->hth_n_entries == 0) || 0)
135 /* How many elements in 'head'? */
136 #define HT_SIZE(head) \
137 ((head)->hth_n_entries)
139 /* Return memory usage for a hashtable (not counting the entries themselves) */
140 #define HT_MEM_USAGE(head) \
141 (sizeof(*head) + (head)->hth_table_length * sizeof(void*))
143 #define HT_FIND(name, head, elm) name##_HT_FIND((head), (elm))
144 #define HT_INSERT(name, head, elm) name##_HT_INSERT((head), (elm))
145 #define HT_REPLACE(name, head, elm) name##_HT_REPLACE((head), (elm))
146 #define HT_REMOVE(name, head, elm) name##_HT_REMOVE((head), (elm))
147 #define HT_START(name, head) name##_HT_START(head)
148 #define HT_NEXT(name, head, elm) name##_HT_NEXT((head), (elm))
149 #define HT_NEXT_RMV(name, head, elm) name##_HT_NEXT_RMV((head), (elm))
150 #define HT_CLEAR(name, head) name##_HT_CLEAR(head)
151 #define HT_INIT(name, head) name##_HT_INIT(head)
152 #define HT_REP_IS_BAD_(name, head) name##_HT_REP_IS_BAD_(head)
153 #define HT_FOREACH_FN(name, head, fn, data) \
154 name##_HT_FOREACH_FN((head), (fn), (data))
155 /* Helper: */
156 static inline unsigned
157 ht_improve_hash(unsigned h)
159 /* Aim to protect against poor hash functions by adding logic here
160 * - logic taken from java 1.4 hashtable source */
161 h += ~(h << 9);
162 h ^= ((h >> 14) | (h << 18)); /* >>> */
163 h += (h << 4);
164 h ^= ((h >> 10) | (h << 22)); /* >>> */
165 return h;
168 #if 0
169 /** Basic string hash function, from Java standard String.hashCode(). */
170 static inline unsigned
171 ht_string_hash(const char *s)
173 unsigned h = 0;
174 int m = 1;
175 while (*s) {
176 h += ((signed char)*s++)*m;
177 m = (m<<5)-1; /* m *= 31 */
179 return h;
181 #endif
183 #if 0
184 /** Basic string hash function, from Python's str.__hash__() */
185 static inline unsigned
186 ht_string_hash(const char *s)
188 unsigned h;
189 const unsigned char *cp = (const unsigned char *)s;
190 h = *cp << 7;
191 while (*cp) {
192 h = (1000003*h) ^ *cp++;
194 /* This conversion truncates the length of the string, but that's ok. */
195 h ^= (unsigned)(cp-(const unsigned char*)s);
196 return h;
198 #endif
200 #ifndef HT_NO_CACHE_HASH_VALUES
201 #define HT_SET_HASH_(elm, field, hashfn) \
202 do { (elm)->field.hte_hash = hashfn(elm); } while (0)
203 #define HT_SET_HASHVAL_(elm, field, val) \
204 do { (elm)->field.hte_hash = (val); } while (0)
205 #define HT_ELT_HASH_(elm, field, hashfn) \
206 ((elm)->field.hte_hash)
207 #else
208 #define HT_SET_HASH_(elm, field, hashfn) \
209 ((void)0)
210 #define HT_ELT_HASH_(elm, field, hashfn) \
211 (hashfn(elm))
212 #define HT_SET_HASHVAL_(elm, field, val) \
213 ((void)0)
214 #endif
216 #define HT_BUCKET_NUM_(head, field, elm, hashfn) \
217 (HT_ELT_HASH_(elm,field,hashfn) % head->hth_table_length)
219 /* Helper: alias for the bucket containing 'elm'. */
220 #define HT_BUCKET_(head, field, elm, hashfn) \
221 ((head)->hth_table[HT_BUCKET_NUM_(head, field, elm, hashfn)])
223 #define HT_FOREACH(x, name, head) \
224 for ((x) = HT_START(name, head); \
225 (x) != NULL; \
226 (x) = HT_NEXT(name, head, x))
228 #ifndef HT_NDEBUG
229 #define HT_ASSERT_(x) tor_assert(x)
230 #else
231 #define HT_ASSERT_(x) (void)0
232 #endif
234 #define HT_PROTOTYPE(name, type, field, hashfn, eqfn) \
235 int name##_HT_GROW(struct name *ht, unsigned min_capacity); \
236 void name##_HT_CLEAR(struct name *ht); \
237 int name##_HT_REP_IS_BAD_(const struct name *ht); \
238 static inline void \
239 name##_HT_INIT(struct name *head) { \
240 head->hth_table_length = 0; \
241 head->hth_table = NULL; \
242 head->hth_n_entries = 0; \
243 head->hth_load_limit = 0; \
244 head->hth_prime_idx = -1; \
246 /* Helper: returns a pointer to the right location in the table \
247 * 'head' to find or insert the element 'elm'. */ \
248 static inline struct type ** \
249 name##_HT_FIND_P_(struct name *head, struct type *elm) \
251 struct type **p; \
252 if (!head->hth_table) \
253 return NULL; \
254 p = &HT_BUCKET_(head, field, elm, hashfn); \
255 while (*p) { \
256 if (eqfn(*p, elm)) \
257 return p; \
258 p = &(*p)->field.hte_next; \
260 return p; \
262 /* Return a pointer to the element in the table 'head' matching 'elm', \
263 * or NULL if no such element exists */ \
264 ATTR_UNUSED static inline struct type * \
265 name##_HT_FIND(const struct name *head, struct type *elm) \
267 struct type **p; \
268 struct name *h = (struct name *) head; \
269 HT_SET_HASH_(elm, field, hashfn); \
270 p = name##_HT_FIND_P_(h, elm); \
271 return p ? *p : NULL; \
273 /* Insert the element 'elm' into the table 'head'. Do not call this \
274 * function if the table might already contain a matching element. */ \
275 ATTR_UNUSED static inline void \
276 name##_HT_INSERT(struct name *head, struct type *elm) \
278 struct type **p; \
279 if (!head->hth_table || head->hth_n_entries >= head->hth_load_limit) \
280 name##_HT_GROW(head, head->hth_n_entries+1); \
281 ++head->hth_n_entries; \
282 HT_SET_HASH_(elm, field, hashfn); \
283 p = &HT_BUCKET_(head, field, elm, hashfn); \
284 elm->field.hte_next = *p; \
285 *p = elm; \
287 /* Insert the element 'elm' into the table 'head'. If there already \
288 * a matching element in the table, replace that element and return \
289 * it. */ \
290 ATTR_UNUSED static inline struct type * \
291 name##_HT_REPLACE(struct name *head, struct type *elm) \
293 struct type **p, *r; \
294 if (!head->hth_table || head->hth_n_entries >= head->hth_load_limit) \
295 name##_HT_GROW(head, head->hth_n_entries+1); \
296 HT_SET_HASH_(elm, field, hashfn); \
297 p = name##_HT_FIND_P_(head, elm); \
298 HT_ASSERT_(p != NULL); /* this holds because we called HT_GROW */ \
299 r = *p; \
300 *p = elm; \
301 if (r && (r!=elm)) { \
302 elm->field.hte_next = r->field.hte_next; \
303 r->field.hte_next = NULL; \
304 return r; \
305 } else { \
306 ++head->hth_n_entries; \
307 return NULL; \
310 /* Remove any element matching 'elm' from the table 'head'. If such \
311 * an element is found, return it; otherwise return NULL. */ \
312 ATTR_UNUSED static inline struct type * \
313 name##_HT_REMOVE(struct name *head, struct type *elm) \
315 struct type **p, *r; \
316 HT_SET_HASH_(elm, field, hashfn); \
317 p = name##_HT_FIND_P_(head,elm); \
318 if (!p || !*p) \
319 return NULL; \
320 r = *p; \
321 *p = r->field.hte_next; \
322 r->field.hte_next = NULL; \
323 --head->hth_n_entries; \
324 return r; \
326 /* Invoke the function 'fn' on every element of the table 'head', \
327 * using 'data' as its second argument. If the function returns \
328 * nonzero, remove the most recently examined element before invoking \
329 * the function again. */ \
330 ATTR_UNUSED static inline void \
331 name##_HT_FOREACH_FN(struct name *head, \
332 int (*fn)(struct type *, void *), \
333 void *data) \
335 unsigned idx; \
336 struct type **p, **nextp, *next; \
337 if (!head->hth_table) \
338 return; \
339 for (idx=0; idx < head->hth_table_length; ++idx) { \
340 p = &head->hth_table[idx]; \
341 while (*p) { \
342 nextp = &(*p)->field.hte_next; \
343 next = *nextp; \
344 if (fn(*p, data)) { \
345 --head->hth_n_entries; \
346 *p = next; \
347 } else { \
348 p = nextp; \
353 /* Return a pointer to the first element in the table 'head', under \
354 * an arbitrary order. This order is stable under remove operations, \
355 * but not under others. If the table is empty, return NULL. */ \
356 ATTR_UNUSED static inline struct type ** \
357 name##_HT_START(struct name *head) \
359 unsigned b = 0; \
360 while (b < head->hth_table_length) { \
361 if (head->hth_table[b]) { \
362 HT_ASSERT_(b == \
363 HT_BUCKET_NUM_(head,field,head->hth_table[b],hashfn)); \
364 return &head->hth_table[b]; \
366 ++b; \
368 return NULL; \
370 /* Return the next element in 'head' after 'elm', under the arbitrary \
371 * order used by HT_START. If there are no more elements, return \
372 * NULL. If 'elm' is to be removed from the table, you must call \
373 * this function for the next value before you remove it. \
374 */ \
375 ATTR_UNUSED static inline struct type ** \
376 name##_HT_NEXT(struct name *head, struct type **elm) \
378 if ((*elm)->field.hte_next) { \
379 HT_ASSERT_(HT_BUCKET_NUM_(head,field,*elm,hashfn) == \
380 HT_BUCKET_NUM_(head,field,(*elm)->field.hte_next,hashfn)); \
381 return &(*elm)->field.hte_next; \
382 } else { \
383 unsigned b = HT_BUCKET_NUM_(head,field,*elm,hashfn)+1; \
384 while (b < head->hth_table_length) { \
385 if (head->hth_table[b]) { \
386 HT_ASSERT_(b == \
387 HT_BUCKET_NUM_(head,field,head->hth_table[b],hashfn)); \
388 return &head->hth_table[b]; \
390 ++b; \
392 return NULL; \
395 ATTR_UNUSED static inline struct type ** \
396 name##_HT_NEXT_RMV(struct name *head, struct type **elm) \
398 unsigned h = HT_ELT_HASH_(*elm, field, hashfn); \
399 *elm = (*elm)->field.hte_next; \
400 --head->hth_n_entries; \
401 if (*elm) { \
402 return elm; \
403 } else { \
404 unsigned b = (h % head->hth_table_length)+1; \
405 while (b < head->hth_table_length) { \
406 if (head->hth_table[b]) \
407 return &head->hth_table[b]; \
408 ++b; \
410 return NULL; \
414 #define HT_GENERATE2(name, type, field, hashfn, eqfn, load, reallocarrayfn, \
415 freefn) \
416 /* Primes that aren't too far from powers of two. We stop at */ \
417 /* P=402653189 because P*sizeof(void*) is less than SSIZE_MAX */ \
418 /* even on a 32-bit platform. */ \
419 static unsigned name##_PRIMES[] = { \
420 53, 97, 193, 389, \
421 769, 1543, 3079, 6151, \
422 12289, 24593, 49157, 98317, \
423 196613, 393241, 786433, 1572869, \
424 3145739, 6291469, 12582917, 25165843, \
425 50331653, 100663319, 201326611, 402653189 \
426 }; \
427 static unsigned name##_N_PRIMES = \
428 (unsigned)(sizeof(name##_PRIMES)/sizeof(name##_PRIMES[0])); \
429 /* Expand the internal table of 'head' until it is large enough to \
430 * hold 'size' elements. Return 0 on success, -1 on allocation \
431 * failure. */ \
432 int \
433 name##_HT_GROW(struct name *head, unsigned size) \
435 unsigned new_len, new_load_limit; \
436 int prime_idx; \
437 struct type **new_table; \
438 if (head->hth_prime_idx == (int)name##_N_PRIMES - 1) \
439 return 0; \
440 if (head->hth_load_limit > size) \
441 return 0; \
442 prime_idx = head->hth_prime_idx; \
443 do { \
444 new_len = name##_PRIMES[++prime_idx]; \
445 new_load_limit = (unsigned)(load*new_len); \
446 } while (new_load_limit <= size && \
447 prime_idx < (int)name##_N_PRIMES); \
448 if ((new_table = reallocarrayfn(NULL, new_len, sizeof(struct type*)))) { \
449 unsigned b; \
450 memset(new_table, 0, new_len*sizeof(struct type*)); \
451 for (b = 0; b < head->hth_table_length; ++b) { \
452 struct type *elm, *next; \
453 unsigned b2; \
454 elm = head->hth_table[b]; \
455 while (elm) { \
456 next = elm->field.hte_next; \
457 b2 = HT_ELT_HASH_(elm, field, hashfn) % new_len; \
458 elm->field.hte_next = new_table[b2]; \
459 new_table[b2] = elm; \
460 elm = next; \
463 if (head->hth_table) \
464 freefn(head->hth_table); \
465 head->hth_table = new_table; \
466 } else { \
467 unsigned b, b2; \
468 new_table = reallocarrayfn(head->hth_table, new_len, sizeof(struct type*)); \
469 if (!new_table) return -1; \
470 memset(new_table + head->hth_table_length, 0, \
471 (new_len - head->hth_table_length)*sizeof(struct type*)); \
472 for (b=0; b < head->hth_table_length; ++b) { \
473 struct type *e, **pE; \
474 for (pE = &new_table[b], e = *pE; e != NULL; e = *pE) { \
475 b2 = HT_ELT_HASH_(e, field, hashfn) % new_len; \
476 if (b2 == b) { \
477 pE = &e->field.hte_next; \
478 } else { \
479 *pE = e->field.hte_next; \
480 e->field.hte_next = new_table[b2]; \
481 new_table[b2] = e; \
485 head->hth_table = new_table; \
487 head->hth_table_length = new_len; \
488 head->hth_prime_idx = prime_idx; \
489 head->hth_load_limit = new_load_limit; \
490 return 0; \
492 /* Free all storage held by 'head'. Does not free 'head' itself, or \
493 * individual elements. */ \
494 void \
495 name##_HT_CLEAR(struct name *head) \
497 if (head->hth_table) \
498 freefn(head->hth_table); \
499 head->hth_table_length = 0; \
500 name##_HT_INIT(head); \
502 /* Debugging helper: return false iff the representation of 'head' is \
503 * internally consistent. */ \
504 int \
505 name##_HT_REP_IS_BAD_(const struct name *head) \
507 unsigned n, i; \
508 struct type *elm; \
509 if (!head->hth_table_length) { \
510 if (!head->hth_table && !head->hth_n_entries && \
511 !head->hth_load_limit && head->hth_prime_idx == -1) \
512 return 0; \
513 else \
514 return 1; \
516 if (!head->hth_table || head->hth_prime_idx < 0 || \
517 !head->hth_load_limit) \
518 return 2; \
519 if (head->hth_n_entries > head->hth_load_limit) \
520 return 3; \
521 if (head->hth_table_length != name##_PRIMES[head->hth_prime_idx]) \
522 return 4; \
523 if (head->hth_load_limit != (unsigned)(load*head->hth_table_length)) \
524 return 5; \
525 for (n = i = 0; i < head->hth_table_length; ++i) { \
526 for (elm = head->hth_table[i]; elm; elm = elm->field.hte_next) { \
527 if (HT_ELT_HASH_(elm, field, hashfn) != hashfn(elm)) \
528 return 1000 + i; \
529 if (HT_BUCKET_NUM_(head,field,elm,hashfn) != i) \
530 return 10000 + i; \
531 ++n; \
534 if (n != head->hth_n_entries) \
535 return 6; \
536 return 0; \
539 #define HT_GENERATE(name, type, field, hashfn, eqfn, load, mallocfn, \
540 reallocfn, freefn) \
541 static void * \
542 name##_reallocarray(void *arg, size_t a, size_t b) \
544 if ((b) && (a) > SIZE_MAX / (b)) \
545 return NULL; \
546 if (arg) \
547 return reallocfn((arg),(a)*(b)); \
548 else \
549 return mallocfn((a)*(b)); \
551 HT_GENERATE2(name, type, field, hashfn, eqfn, load, \
552 name##_reallocarray, freefn)
554 /** Implements an over-optimized "find and insert if absent" block;
555 * not meant for direct usage by typical code, or usage outside the critical
556 * path.*/
557 #define HT_FIND_OR_INSERT_(name, field, hashfn, head, eltype, elm, var, y, n) \
559 struct name *var##_head_ = head; \
560 struct eltype **var; \
561 if (!var##_head_->hth_table || \
562 var##_head_->hth_n_entries >= var##_head_->hth_load_limit) \
563 name##_HT_GROW(var##_head_, var##_head_->hth_n_entries+1); \
564 HT_SET_HASH_((elm), field, hashfn); \
565 var = name##_HT_FIND_P_(var##_head_, (elm)); \
566 HT_ASSERT_(var); /* Holds because we called HT_GROW */ \
567 if (*var) { \
568 y; \
569 } else { \
570 n; \
573 #define HT_FOI_INSERT_(field, head, elm, newent, var) \
575 HT_SET_HASHVAL_(newent, field, (elm)->field.hte_hash); \
576 newent->field.hte_next = NULL; \
577 *var = newent; \
578 ++((head)->hth_n_entries); \
582 * Copyright 2005, Nick Mathewson. Implementation logic is adapted from code
583 * by Christopher Clark, retrofit to allow drop-in memory management, and to
584 * use the same interface as Niels Provos's tree.h. This is probably still
585 * a derived work, so the original license below still applies.
587 * Copyright (c) 2002, Christopher Clark
588 * All rights reserved.
590 * Redistribution and use in source and binary forms, with or without
591 * modification, are permitted provided that the following conditions
592 * are met:
594 * * Redistributions of source code must retain the above copyright
595 * notice, this list of conditions and the following disclaimer.
597 * * Redistributions in binary form must reproduce the above copyright
598 * notice, this list of conditions and the following disclaimer in the
599 * documentation and/or other materials provided with the distribution.
601 * * Neither the name of the original author; nor the names of any contributors
602 * may be used to endorse or promote products derived from this software
603 * without specific prior written permission.
606 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
607 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
608 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
609 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
610 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
611 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
612 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
613 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
614 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
615 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
616 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
619 #endif