tommyDS_hashlin/tommyhashlin.c

   1 /*
   2  * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions
   6  * are met:
   7  *
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  *
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  19  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  22  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  24  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  25  * POSSIBILITY OF SUCH DAMAGE.
  26  */
  27
  28 #include "tommyhashlin.h"
  29 #include "tommylist.h"
  30
  31 #include <assert.h> /* for assert */
  32
  33 /******************************************************************************/
  34 /* hashlin */
  35
  36 /**
  37  * Reallocation states.
  38  */
  39 #define TOMMY_HASHLIN_STATE_STABLE 0
  40 #define TOMMY_HASHLIN_STATE_GROW 1
  41 #define TOMMY_HASHLIN_STATE_SHRINK 2
  42
  43 /**
  44  * Set the hashtable in stable state.
  45  */
  46 tommy_inline void tommy_hashlin_stable(tommy_hashlin* hashlin)
  47 {
  48         hashlin->state = TOMMY_HASHLIN_STATE_STABLE;
  49
  50         /* setup low_mask/max/split to allow tommy_hashlin_bucket_ref() */
  51         /* and tommy_hashlin_foreach() to work regardless we are in stable state */
  52         hashlin->low_max = hashlin->bucket_max;
  53         hashlin->low_mask = hashlin->bucket_mask;
  54         hashlin->split = 0;
  55 }
  56
  57 void tommy_hashlin_init(tommy_hashlin* hashlin)
  58 {
  59         tommy_uint_t i;
  60
  61         /* fixed initial size */
  62         hashlin->bucket_bit = TOMMY_HASHLIN_BIT;
  63         hashlin->bucket_max = 1 << hashlin->bucket_bit;
  64         hashlin->bucket_mask = hashlin->bucket_max - 1;
  65         hashlin->bucket[0] = tommy_cast(tommy_hashlin_node**, tommy_calloc(hashlin->bucket_max, sizeof(tommy_hashlin_node*)));
  66         for (i = 1; i < TOMMY_HASHLIN_BIT; ++i)
  67                 hashlin->bucket[i] = hashlin->bucket[0];
  68
  69         /* stable state */
  70         tommy_hashlin_stable(hashlin);
  71
  72         hashlin->count = 0;
  73 }
  74
  75 void tommy_hashlin_done(tommy_hashlin* hashlin)
  76 {
  77         tommy_uint_t i;
  78
  79         tommy_free(hashlin->bucket[0]);
  80         for (i = TOMMY_HASHLIN_BIT; i < hashlin->bucket_bit; ++i) {
  81                 tommy_hashlin_node** segment = hashlin->bucket[i];
  82                 tommy_free(&segment[((tommy_ptrdiff_t)1) << i]);
  83         }
  84 }
  85
  86 /**
  87  * Grow one step.
  88  */
  89 tommy_inline void hashlin_grow_step(tommy_hashlin* hashlin)
  90 {
  91         /* grow if more than 50% full */
  92         if (hashlin->state != TOMMY_HASHLIN_STATE_GROW
  93                 && hashlin->count > hashlin->bucket_max / 2
  94         ) {
  95                 /* if we are stable, setup a new grow state */
  96                 /* otherwise continue with the already setup shrink one */
  97                 /* but in backward direction */
  98                 if (hashlin->state == TOMMY_HASHLIN_STATE_STABLE) {
  99                         tommy_hashlin_node** segment;
 100
 101                         /* set the lower size */
 102                         hashlin->low_max = hashlin->bucket_max;
 103                         hashlin->low_mask = hashlin->bucket_mask;
 104
 105                         /* allocate the new vector using malloc() and not calloc() */
 106                         /* because data is fully initialized in the split process */
 107                         segment = tommy_cast(tommy_hashlin_node**, tommy_malloc(hashlin->low_max * sizeof(tommy_hashlin_node*)));
 108
 109                         /* store it adjusting the offset */
 110                         /* cast to ptrdiff_t to ensure to get a negative value */
 111                         hashlin->bucket[hashlin->bucket_bit] = &segment[-(tommy_ptrdiff_t)hashlin->low_max];
 112
 113                         /* grow the hash size */
 114                         ++hashlin->bucket_bit;
 115                         hashlin->bucket_max = 1 << hashlin->bucket_bit;
 116                         hashlin->bucket_mask = hashlin->bucket_max - 1;
 117
 118                         /* start from the beginning going forward */
 119                         hashlin->split = 0;
 120                 }
 121
 122                 /* grow state */
 123                 hashlin->state = TOMMY_HASHLIN_STATE_GROW;
 124         }
 125
 126         /* if we are growing */
 127         if (hashlin->state == TOMMY_HASHLIN_STATE_GROW) {
 128                 /* compute the split target required to finish the reallocation before the next resize */
 129                 tommy_size_t split_target = 2 * hashlin->count;
 130
 131                 /* reallocate buckets until the split target */
 132                 while (hashlin->split + hashlin->low_max < split_target) {
 133                         tommy_hashlin_node** split[2];
 134                         tommy_hashlin_node* j;
 135                         tommy_size_t mask;
 136
 137                         /* get the low bucket */
 138                         split[0] = tommy_hashlin_pos(hashlin, hashlin->split);
 139
 140                         /* get the high bucket */
 141                         split[1] = tommy_hashlin_pos(hashlin, hashlin->split + hashlin->low_max);
 142
 143                         /* save the low bucket */
 144                         j = *split[0];
 145
 146                         /* reinitialize the buckets */
 147                         *split[0] = 0;
 148                         *split[1] = 0;
 149
 150                         /* the bit used to identify the bucket */
 151                         mask = hashlin->low_max;
 152
 153                         /* flush the bucket */
 154                         while (j) {
 155                                 tommy_hashlin_node* j_next = j->next;
 156                                 tommy_size_t pos = (j->index & mask) != 0;
 157                                 if (*split[pos])
 158                                         tommy_list_insert_tail_not_empty(*split[pos], j);
 159                                 else
 160                                         tommy_list_insert_first(split[pos], j);
 161                                 j = j_next;
 162                         }
 163
 164                         /* go forward */
 165                         ++hashlin->split;
 166
 167                         /* if we have finished, change the state */
 168                         if (hashlin->split == hashlin->low_max) {
 169                                 /* go in stable mode */
 170                                 tommy_hashlin_stable(hashlin);
 171                                 break;
 172                         }
 173                 }
 174         }
 175 }
 176
 177 /**
 178  * Shrink one step.
 179  */
 180 tommy_inline void hashlin_shrink_step(tommy_hashlin* hashlin)
 181 {
 182         /* shrink if less than 12.5% full */
 183         if (hashlin->state != TOMMY_HASHLIN_STATE_SHRINK
 184                 && hashlin->count < hashlin->bucket_max / 8
 185         ) {
 186                 /* avoid to shrink the first bucket */
 187                 if (hashlin->bucket_bit > TOMMY_HASHLIN_BIT) {
 188                         /* if we are stable, setup a new shrink state */
 189                         /* otherwise continue with the already setup grow one */
 190                         /* but in backward direction */
 191                         if (hashlin->state == TOMMY_HASHLIN_STATE_STABLE) {
 192                                 /* set the lower size */
 193                                 hashlin->low_max = hashlin->bucket_max / 2;
 194                                 hashlin->low_mask = hashlin->bucket_mask / 2;
 195
 196                                 /* start from the half going backward */
 197                                 hashlin->split = hashlin->low_max;
 198                         }
 199
 200                         /* start reallocation */
 201                         hashlin->state = TOMMY_HASHLIN_STATE_SHRINK;
 202                 }
 203         }
 204
 205         /* if we are shrinking */
 206         if (hashlin->state == TOMMY_HASHLIN_STATE_SHRINK) {
 207                 /* compute the split target required to finish the reallocation before the next resize */
 208                 tommy_size_t split_target = 8 * hashlin->count;
 209
 210                 /* reallocate buckets until the split target */
 211                 while (hashlin->split + hashlin->low_max > split_target) {
 212                         tommy_hashlin_node** split[2];
 213
 214                         /* go backward position */
 215                         --hashlin->split;
 216
 217                         /* get the low bucket */
 218                         split[0] = tommy_hashlin_pos(hashlin, hashlin->split);
 219
 220                         /* get the high bucket */
 221                         split[1] = tommy_hashlin_pos(hashlin, hashlin->split + hashlin->low_max);
 222
 223                         /* concat the high bucket into the low one */
 224                         tommy_list_concat(split[0], split[1]);
 225
 226                         /* if we have finished, clean up and change the state */
 227                         if (hashlin->split == 0) {
 228                                 tommy_hashlin_node** segment;
 229
 230                                 /* shrink the hash size */
 231                                 --hashlin->bucket_bit;
 232                                 hashlin->bucket_max = 1 << hashlin->bucket_bit;
 233                                 hashlin->bucket_mask = hashlin->bucket_max - 1;
 234
 235                                 /* free the last segment */
 236                                 segment = hashlin->bucket[hashlin->bucket_bit];
 237                                 tommy_free(&segment[((tommy_ptrdiff_t)1) << hashlin->bucket_bit]);
 238
 239                                 /* go in stable mode */
 240                                 tommy_hashlin_stable(hashlin);
 241                                 break;
 242                         }
 243                 }
 244         }
 245 }
 246
 247 void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void* data, tommy_hash_t hash)
 248 {
 249         tommy_list_insert_tail(tommy_hashlin_bucket_ref(hashlin, hash), node, data);
 250
 251         node->index = hash;
 252
 253         ++hashlin->count;
 254
 255         hashlin_grow_step(hashlin);
 256 }
 257
 258 void tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node* node)
 259 {
 260         tommy_list_remove_existing(tommy_hashlin_bucket_ref(hashlin, node->index), node);
 261
 262         --hashlin->count;
 263
 264         hashlin_shrink_step(hashlin);
 265 }
 266
 267 void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash)
 268 {
 269         tommy_hashlin_node** let_ptr = tommy_hashlin_bucket_ref(hashlin, hash);
 270         tommy_hashlin_node* node = *let_ptr;
 271
 272         while (node) {
 273                 /* we first check if the hash matches, as in the same bucket we may have multiples hash values */
 274                 if (node->index == hash && cmp(cmp_arg, node->data) == 0) {
 275                         tommy_list_remove_existing(let_ptr, node);
 276
 277                         --hashlin->count;
 278
 279                         hashlin_shrink_step(hashlin);
 280
 281                         return node->data;
 282                 }
 283                 node = node->next;
 284         }
 285
 286         return 0;
 287 }
 288
 289 void tommy_hashlin_foreach(tommy_hashlin* hashlin, tommy_foreach_func* func)
 290 {
 291         tommy_size_t bucket_max;
 292         tommy_size_t pos;
 293
 294         /* number of valid buckets */
 295         bucket_max = hashlin->low_max + hashlin->split;
 296
 297         for (pos = 0; pos < bucket_max; ++pos) {
 298                 tommy_hashlin_node* node = *tommy_hashlin_pos(hashlin, pos);
 299
 300                 while (node) {
 301                         void* data = node->data;
 302                         node = node->next;
 303                         func(data);
 304                 }
 305         }
 306 }
 307
 308 void tommy_hashlin_foreach_arg(tommy_hashlin* hashlin, tommy_foreach_arg_func* func, void* arg)
 309 {
 310         tommy_size_t bucket_max;
 311         tommy_size_t pos;
 312
 313         /* number of valid buckets */
 314         bucket_max = hashlin->low_max + hashlin->split;
 315
 316         for (pos = 0; pos < bucket_max; ++pos) {
 317                 tommy_hashlin_node* node = *tommy_hashlin_pos(hashlin, pos);
 318
 319                 while (node) {
 320                         void* data = node->data;
 321                         node = node->next;
 322                         func(arg, data);
 323                 }
 324         }
 325 }
 326
 327 tommy_size_t tommy_hashlin_memory_usage(tommy_hashlin* hashlin)
 328 {
 329         return hashlin->bucket_max * (tommy_size_t)sizeof(hashlin->bucket[0][0])
 330                + hashlin->count * (tommy_size_t)sizeof(tommy_hashlin_node);
 331 }
 332