nasmlib/hashtbl.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * hashtbl.c
  36  *
  37  * Efficient dictionary hash table class.
  38  */
  39
  40 #include "compiler.h"
  41
  42 #include "nasm.h"
  43 #include "hashtbl.h"
  44
  45 #define HASH_MAX_LOAD   2       /* Higher = more memory-efficient, slower */
  46 #define HASH_INIT_SIZE  16      /* Initial size (power of 2, min 4) */
  47
  48 #define hash_calc(key,keylen)   crc64b(CRC64_INIT, (key), (keylen))
  49 #define hash_calci(key,keylen)  crc64ib(CRC64_INIT, (key), (keylen))
  50 #define hash_max_load(size)     ((size) * (HASH_MAX_LOAD - 1) / HASH_MAX_LOAD)
  51 #define hash_expand(size)       ((size) << 1)
  52 #define hash_mask(size)         ((size) - 1)
  53 #define hash_pos(hash, mask)    ((hash) & (mask))
  54 #define hash_inc(hash, mask)    ((((hash) >> 32) & (mask)) | 1) /* always odd */
  55 #define hash_pos_next(pos, inc, mask) (((pos) + (inc)) & (mask))
  56
  57 static void hash_init(struct hash_table *head)
  58 {
  59     head->size     = HASH_INIT_SIZE;
  60     head->load     = 0;
  61     head->max_load = hash_max_load(head->size);
  62     nasm_newn(head->table, head->size);
  63 }
  64
  65 /*
  66  * Find an entry in a hash table.  The key can be any binary object.
  67  *
  68  * On failure, if "insert" is non-NULL, store data in that structure
  69  * which can be used to insert that node using hash_add().
  70  * See hash_add() for constraints on the uses of the insert object.
  71  *
  72  * On success, return a pointer to the "data" element of the hash
  73  * structure.
  74  */
  75 void **hash_findb(struct hash_table *head, const void *key,
  76                   size_t keylen, struct hash_insert *insert)
  77 {
  78     struct hash_node *np = NULL;
  79     struct hash_node *tbl = head->table;
  80     uint64_t hash = hash_calc(key, keylen);
  81     size_t mask = hash_mask(head->size);
  82     size_t pos = hash_pos(hash, mask);
  83     size_t inc = hash_inc(hash, mask);
  84
  85     if (likely(tbl)) {
  86         while ((np = &tbl[pos])->key) {
  87             if (hash == np->hash &&
  88                 keylen == np->keylen &&
  89                 !memcmp(key, np->key, keylen))
  90                 return &np->data;
  91             pos = hash_pos_next(pos, inc, mask);
  92         }
  93     }
  94
  95     /* Not found.  Store info for insert if requested. */
  96     if (insert) {
  97         insert->node.hash = hash;
  98         insert->node.key = key;
  99         insert->node.keylen = keylen;
 100         insert->node.data = NULL;
 101         insert->head  = head;
 102         insert->where = np;
 103     }
 104     return NULL;
 105 }
 106
 107 /*
 108  * Same as hash_findb(), but for a C string.
 109  */
 110 void **hash_find(struct hash_table *head, const char *key,
 111                  struct hash_insert *insert)
 112 {
 113     return hash_findb(head, key, strlen(key)+1, insert);
 114 }
 115
 116 /*
 117  * Same as hash_findb(), but for case-insensitive hashing.
 118  */
 119 void **hash_findib(struct hash_table *head, const void *key, size_t keylen,
 120                    struct hash_insert *insert)
 121 {
 122     struct hash_node *np = NULL;
 123     struct hash_node *tbl = head->table;
 124     uint64_t hash = hash_calci(key, keylen);
 125     size_t mask = hash_mask(head->size);
 126     size_t pos = hash_pos(hash, mask);
 127     size_t inc = hash_inc(hash, mask);
 128
 129     if (likely(tbl)) {
 130         while ((np = &tbl[pos])->key) {
 131             if (hash == np->hash &&
 132                 keylen == np->keylen &&
 133                 !nasm_memicmp(key, np->key, keylen))
 134                 return &np->data;
 135             pos = hash_pos_next(pos, inc, mask);
 136         }
 137     }
 138
 139     /* Not found.  Store info for insert if requested. */
 140     if (insert) {
 141         insert->node.hash = hash;
 142         insert->node.key = key;
 143         insert->node.keylen = keylen;
 144         insert->node.data = NULL;
 145         insert->head  = head;
 146         insert->where = np;
 147     }
 148     return NULL;
 149 }
 150
 151 /*
 152  * Same as hash_find(), but for case-insensitive hashing.
 153  */
 154 void **hash_findi(struct hash_table *head, const char *key,
 155                   struct hash_insert *insert)
 156 {
 157     return hash_findib(head, key, strlen(key)+1, insert);
 158 }
 159
 160 /*
 161  * Insert node.  Return a pointer to the "data" element of the newly
 162  * created hash node.
 163  *
 164  * The following constraints apply:
 165  * 1. A call to hash_add() invalidates all other outstanding hash_insert
 166  *    objects; attempting to use them causes a wild pointer reference.
 167  * 2. The key provided must exactly match the key passed to hash_find*(),
 168  *    but it does not have to point to the same storage address. The key
 169  *    buffer provided to this function must not be freed for the lifespan
 170  *    of the hash. NULL will use the same pointer that was passed to
 171  *    hash_find*().
 172  */
 173 void **hash_add(struct hash_insert *insert, const void *key, void *data)
 174 {
 175     struct hash_table *head  = insert->head;
 176     struct hash_node *np = insert->where;
 177
 178     if (unlikely(!np)) {
 179         hash_init(head);
 180         /* The hash table is empty, so we don't need to iterate here */
 181         np = &head->table[hash_pos(insert->node.hash, hash_mask(head->size))];
 182     }
 183
 184     /*
 185      * Insert node.  We can always do this, even if we need to
 186      * rebalance immediately after.
 187      */
 188     *np = insert->node;
 189     np->data = data;
 190     if (key)
 191         np->key = key;
 192
 193     if (unlikely(++head->load > head->max_load)) {
 194         /* Need to expand the table */
 195         size_t newsize           = hash_expand(head->size);
 196         struct hash_node *newtbl;
 197         size_t mask              = hash_mask(newsize);
 198         struct hash_node *op, *xp;
 199         size_t i;
 200
 201         nasm_newn(newtbl, newsize);
 202
 203         /* Rebalance all the entries */
 204         for (i = 0, op = head->table; i < head->size; i++, op++) {
 205             if (op->key) {
 206                 size_t pos = hash_pos(op->hash, mask);
 207                 size_t inc = hash_inc(op->hash, mask);
 208
 209                 while ((xp = &newtbl[pos])->key)
 210                     pos = hash_pos_next(pos, inc, mask);
 211
 212                 *xp = *op;
 213                 if (op == np)
 214                     np = xp;
 215             }
 216         }
 217         nasm_free(head->table);
 218
 219         head->table    = newtbl;
 220         head->size     = newsize;
 221         head->max_load = hash_max_load(newsize);
 222     }
 223
 224     return &np->data;
 225 }
 226
 227 /*
 228  * Iterate over all members of a hash set. For the first call, iter
 229  * should be as initialized by hash_iterator_init(). Returns a struct
 230  * hash_node representing the current object, or NULL if we have
 231  * reached the end of the hash table.
 232  *
 233  * Calling hash_add() will invalidate the iterator.
 234  */
 235 const struct hash_node *hash_iterate(struct hash_iterator *iter)
 236 {
 237     const struct hash_table *head = iter->head;
 238     const struct hash_node *cp = iter->next;
 239     const struct hash_node *ep = head->table + head->size;
 240
 241     /* For an empty table, cp == ep == NULL */
 242     while (cp < ep) {
 243         if (cp->key) {
 244             iter->next = cp+1;
 245             return cp;
 246         }
 247         cp++;
 248     }
 249
 250     iter->next = head->table;
 251     return NULL;
 252 }
 253
 254 /*
 255  * Free the hash itself.  Doesn't free the data elements; use
 256  * hash_iterate() to do that first, if needed.  This function is normally
 257  * used when the hash data entries are either freed separately, or
 258  * compound objects which can't be freed in a single operation.
 259  */
 260 void hash_free(struct hash_table *head)
 261 {
 262     void *p = head->table;
 263     memset(head, 0, sizeof *head);
 264     nasm_free(p);
 265 }
 266
 267 /*
 268  * Frees the hash *and* all data elements.  This is applicable only in
 269  * the case where the data element is a single allocation.  If the
 270  * second argument is false, the key string is part of the data
 271  * allocation or belongs to an allocation which will be freed
 272  * separately, if it is true the keys are also freed.
 273  */
 274 void hash_free_all(struct hash_table *head, bool free_keys)
 275 {
 276     struct hash_iterator it;
 277     const struct hash_node *np;
 278
 279     hash_for_each(head, it, np) {
 280         if (np->data)
 281             nasm_free(np->data);
 282         if (free_keys && np->key)
 283             nasm_free((void *)np->key);
 284     }
 285
 286     hash_free(head);
 287 }