tommyDS_hashlin/tommyhashlin.h

   1 /*
   2  * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions
   6  * are met:
   7  *
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  *
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  19  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  22  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  24  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  25  * POSSIBILITY OF SUCH DAMAGE.
  26  */
  27
  28 /** \file
  29  * Linear chained hashtable.
  30  *
  31  * This hashtable resizes dynamically and progressively using a variation of the
  32  * linear hashing algorithm described in http://en.wikipedia.org/wiki/Linear_hashing
  33  *
  34  * It starts with the minimal size of 16 buckets, it doubles the size then it
  35  * reaches a load factor greater than 0.5 and it halves the size with a load
  36  * factor lower than 0.125.
  37  *
  38  * The progressive resize is good for real-time and interactive applications
  39  * as it makes insert and delete operations taking always the same time.
  40  *
  41  * For resizing it's used a dynamic array that supports access to not contigous
  42  * segments.
  43  * In this way we only allocate additional table segments on the heap, without
  44  * freeing the previous table, and then not increasing the heap fragmentation.
  45  *
  46  * The resize takes place inside tommy_hashlin_insert() and tommy_hashlin_remove().
  47  * No resize is done in the tommy_hashlin_search() operation.
  48  *
  49  * To initialize the hashtable you have to call tommy_hashlin_init().
  50  *
  51  * \code
  52  * tommy_hashslin hashlin;
  53  *
  54  * tommy_hashlin_init(&hashlin);
  55  * \endcode
  56  *
  57  * To insert elements in the hashtable you have to call tommy_hashlin_insert() for
  58  * each element.
  59  * In the insertion call you have to specify the address of the node, the
  60  * address of the object, and the hash value of the key to use.
  61  * The address of the object is used to initialize the tommy_node::data field
  62  * of the node, and the hash to initialize the tommy_node::key field.
  63  *
  64  * \code
  65  * struct object {
  66  *     int value;
  67  *     // other fields
  68  *     tommy_node node;
  69  * };
  70  *
  71  * struct object* obj = malloc(sizeof(struct object)); // creates the object
  72  *
  73  * obj->value = ...; // initializes the object
  74  *
  75  * tommy_hashlin_insert(&hashlin, &obj->node, obj, tommy_inthash_u32(obj->value)); // inserts the object
  76  * \endcode
  77  *
  78  * To find and element in the hashtable you have to call tommy_hashtable_search()
  79  * providing a comparison function, its argument, and the hash of the key to search.
  80  *
  81  * \code
  82  * int compare(const void* arg, const void* obj)
  83  * {
  84  *     return *(const int*)arg != ((const struct object*)obj)->value;
  85  * }
  86  *
  87  * int value_to_find = 1;
  88  * struct object* obj = tommy_hashlin_search(&hashlin, compare, &value_to_find, tommy_inthash_u32(value_to_find));
  89  * if (!obj) {
  90  *     // not found
  91  * } else {
  92  *     // found
  93  * }
  94  * \endcode
  95  *
  96  * To iterate over all the elements in the hashtable with the same key, you have to
  97  * use tommy_hashlin_bucket() and follow the tommy_node::next pointer until NULL.
  98  * You have also to check explicitely for the key, as the bucket may contains
  99  * different keys.
 100  *
 101  * \code
 102  * int value_to_find = 1;
 103  * tommy_node* i = tommy_hashlin_bucket(&hashlin, tommy_inthash_u32(value_to_find));
 104  * while (i) {
 105  *     struct object* obj = i->data; // gets the object pointer
 106  *
 107  *     if (obj->value == value_to_find) {
 108  *         printf("%d\n", obj->value); // process the object
 109  *     }
 110  *
 111  *     i = i->next; // goes to the next element
 112  * }
 113  * \endcode
 114  *
 115  * To remove an element from the hashtable you have to call tommy_hashlin_remove()
 116  * providing a comparison function, its argument, and the hash of the key to search
 117  * and remove.
 118  *
 119  * \code
 120  * struct object* obj = tommy_hashlin_remove(&hashlin, compare, &value_to_remove, tommy_inthash_u32(value_to_remove));
 121  * if (obj) {
 122  *     free(obj); // frees the object allocated memory
 123  * }
 124  * \endcode
 125  *
 126  * To destroy the hashtable you have to remove all the elements, and deinitialize
 127  * the hashtable calling tommy_hashlin_done().
 128  *
 129  * \code
 130  * tommy_hashlin_done(&hashlin);
 131  * \endcode
 132  *
 133  * If you need to iterate over all the elements in the hashtable, you can use
 134  * tommy_hashlin_foreach() or tommy_hashlin_foreach_arg().
 135  * If you need a more precise control with a real iteration, you have to insert
 136  * all the elements also in a ::tommy_list, and use the list to iterate.
 137  * See the \ref multiindex example for more detail.
 138  */
 139
 140 #ifndef __TOMMYHASHLIN_H
 141 #define __TOMMYHASHLIN_H
 142
 143 #include "tommyhash.h"
 144
 145 /******************************************************************************/
 146 /* hashlin */
 147
 148 /** \internal
 149  * Initial and minimal size of the hashtable expressed as a power of 2.
 150  * The initial size is 2^TOMMY_HASHLIN_BIT.
 151  */
 152 #define TOMMY_HASHLIN_BIT 6
 153
 154 /**
 155  * Hashtable node.
 156  * This is the node that you have to include inside your objects.
 157  */
 158 typedef tommy_node tommy_hashlin_node;
 159
 160 /**
 161  * Hashtable container type.
 162  * \note Don't use internal fields directly, but access the container only using functions.
 163  */
 164 typedef struct tommy_hashlin_struct {
 165         tommy_hashlin_node** bucket[TOMMY_SIZE_BIT]; /**< Dynamic array of hash buckets. One list for each hash modulus. */
 166         tommy_size_t bucket_max; /**< Number of buckets. */
 167         tommy_size_t bucket_mask; /**< Bit mask to access the buckets. */
 168         tommy_size_t low_max; /**< Low order max value. */
 169         tommy_size_t low_mask; /**< Low order mask value. */
 170         tommy_size_t split; /**< Split position. */
 171         tommy_size_t count; /**< Number of elements. */
 172         tommy_uint_t bucket_bit; /**< Bits used in the bit mask. */
 173         tommy_uint_t state; /**< Reallocation state. */
 174 } tommy_hashlin;
 175
 176 /**
 177  * Initializes the hashtable.
 178  */
 179 void tommy_hashlin_init(tommy_hashlin* hashlin);
 180
 181 /**
 182  * Deinitializes the hashtable.
 183  *
 184  * You can call this function with elements still contained,
 185  * but such elements are not going to be freed by this call.
 186  */
 187 void tommy_hashlin_done(tommy_hashlin* hashlin);
 188
 189 /**
 190  * Inserts an element in the hashtable.
 191  */
 192 void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void* data, tommy_hash_t hash);
 193
 194 /**
 195  * Searches and removes an element from the hashtable.
 196  * You have to provide a compare function and the hash of the element you want to remove.
 197  * If the element is not found, 0 is returned.
 198  * If more equal elements are present, the first one is removed.
 199  * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one.
 200  * The function should return 0 for equal elements, anything other for different elements.
 201  * \param cmp_arg Compare argument passed as first argument of the compare function.
 202  * \param hash Hash of the element to find and remove.
 203  * \return The removed element, or 0 if not found.
 204  */
 205 void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash);
 206
 207 /** \internal
 208  * Returns the bucket at the specified position.
 209  */
 210 tommy_inline tommy_hashlin_node** tommy_hashlin_pos(tommy_hashlin* hashlin, tommy_hash_t pos)
 211 {
 212         tommy_uint_t bsr;
 213
 214         /* get the highest bit set, in case of all 0, return 0 */
 215         bsr = tommy_ilog2(pos | 1);
 216
 217         return &hashlin->bucket[bsr][pos];
 218 }
 219
 220 /** \internal
 221  * Returns a pointer to the bucket of the specified hash.
 222  */
 223 tommy_inline tommy_hashlin_node** tommy_hashlin_bucket_ref(tommy_hashlin* hashlin, tommy_hash_t hash)
 224 {
 225         tommy_size_t pos;
 226         tommy_size_t high_pos;
 227
 228         pos = hash & hashlin->low_mask;
 229         high_pos = hash & hashlin->bucket_mask;
 230
 231         /* if this position is already allocated in the high half */
 232         if (pos < hashlin->split) {
 233                 /* The following assigment is expected to be implemented */
 234                 /* with a conditional move instruction */
 235                 /* that results in a little better and constant performance */
 236                 /* regardless of the split position. */
 237                 /* This affects mostly the worst case, when the split value */
 238                 /* is near at its half, resulting in a totally unpredictable */
 239                 /* condition by the CPU. */
 240                 /* In such case the use of the conditional move is generally faster. */
 241
 242                 /* use also the high bit */
 243                 pos = high_pos;
 244         }
 245
 246         return tommy_hashlin_pos(hashlin, pos);
 247 }
 248
 249 /**
 250  * Gets the bucket of the specified hash.
 251  * The bucket is guaranteed to contain ALL the elements with the specified hash,
 252  * but it can contain also others.
 253  * You can access elements in the bucket following the ::next pointer until 0.
 254  * \param hash Hash of the element to find.
 255  * \return The head of the bucket, or 0 if empty.
 256  */
 257 tommy_inline tommy_hashlin_node* tommy_hashlin_bucket(tommy_hashlin* hashlin, tommy_hash_t hash)
 258 {
 259         return *tommy_hashlin_bucket_ref(hashlin, hash);
 260 }
 261
 262 /**
 263  * Searches an element in the hashtable.
 264  * You have to provide a compare function and the hash of the element you want to find.
 265  * If more equal elements are present, the first one is returned.
 266  * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one.
 267  * The function should return 0 for equal elements, anything other for different elements.
 268  * \param cmp_arg Compare argument passed as first argument of the compare function.
 269  * \param hash Hash of the element to find.
 270  * \return The first element found, or 0 if none.
 271  */
 272 tommy_inline void* tommy_hashlin_search(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash)
 273 {
 274         tommy_hashlin_node* i = tommy_hashlin_bucket(hashlin, hash);
 275
 276         while (i) {
 277                 /* we first check if the hash matches, as in the same bucket we may have multiples hash values */
 278                 if (i->index == hash && cmp(cmp_arg, i->data) == 0)
 279                         return i->data;
 280                 i = i->next;
 281         }
 282         return 0;
 283 }
 284
 285 /**
 286  * Removes an element from the hashtable.
 287  * You must already have the address of the element to remove.
 288  */
 289 void tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node* node);
 290
 291 /**
 292  * Calls the specified function for each element in the hashtable.
 293  *
 294  * You cannot add or remove elements from the inside of the callback,
 295  * but can use it to deallocate them.
 296  *
 297  * \code
 298  * tommy_hashlin hashlin;
 299  *
 300  * // initializes the hashtable
 301  * tommy_hashlin_init(&hashlin);
 302  *
 303  * ...
 304  *
 305  * // creates an object
 306  * struct object* obj = malloc(sizeof(struct object));
 307  *
 308  * ...
 309  *
 310  * // insert it in the hashtable
 311  * tommy_hashlin_insert(&hashlin, &obj->node, obj, tommy_inthash_u32(obj->value));
 312  *
 313  * ...
 314  *
 315  * // deallocates all the objects iterating the hashtable
 316  * tommy_hashlin_foreach(&hashlin, free);
 317  *
 318  * // deallocates the hashtable
 319  * tommy_hashlin_done(&hashlin);
 320  * \endcode
 321  */
 322 void tommy_hashlin_foreach(tommy_hashlin* hashlin, tommy_foreach_func* func);
 323
 324 /**
 325  * Calls the specified function with an argument for each element in the hashtable.
 326  */
 327 void tommy_hashlin_foreach_arg(tommy_hashlin* hashlin, tommy_foreach_arg_func* func, void* arg);
 328
 329 /**
 330  * Gets the number of elements.
 331  */
 332 tommy_inline tommy_size_t tommy_hashlin_count(tommy_hashlin* hashlin)
 333 {
 334         return hashlin->count;
 335 }
 336
 337 /**
 338  * Gets the size of allocated memory.
 339  * It includes the size of the ::tommy_hashlin_node of the stored elements.
 340  */
 341 tommy_size_t tommy_hashlin_memory_usage(tommy_hashlin* hashlin);
 342
 343 #endif
 344