src/include/lib/simplehash.h

   1 /*
   2  * simplehash.h
   3  *
   4  *        When included this file generates a "templated" (by way of macros)
   5  *        open-addressing hash table implementation specialized to user-defined
   6  *        types.
   7  *
   8  *        It's probably not worthwhile to generate such a specialized implementation
   9  *        for hash tables that aren't performance or space sensitive.
  10  *
  11  *        Compared to dynahash, simplehash has the following benefits:
  12  *
  13  *        - Due to the "templated" code generation has known structure sizes and no
  14  *          indirect function calls (which show up substantially in dynahash
  15  *          profiles). These features considerably increase speed for small
  16  *          entries.
  17  *        - Open addressing has better CPU cache behavior than dynahash's chained
  18  *          hashtables.
  19  *        - The generated interface is type-safe and easier to use than dynahash,
  20  *          though at the cost of more complex setup.
  21  *        - Allocates memory in a MemoryContext or another allocator with a
  22  *          malloc/free style interface (which isn't easily usable in a shared
  23  *          memory context)
  24  *        - Does not require the overhead of a separate memory context.
  25  *
  26  * Usage notes:
  27  *
  28  *        To generate a hash-table and associated functions for a use case several
  29  *        macros have to be #define'ed before this file is included.  Including
  30  *        the file #undef's all those, so a new hash table can be generated
  31  *        afterwards.
  32  *        The relevant parameters are:
  33  *        - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
  34  *              will result in hash table type 'foo_hash' and functions like
  35  *              'foo_insert'/'foo_lookup' and so forth.
  36  *        - SH_ELEMENT_TYPE - type of the contained elements
  37  *        - SH_KEY_TYPE - type of the hashtable's key
  38  *        - SH_DECLARE - if defined function prototypes and type declarations are
  39  *              generated
  40  *        - SH_DEFINE - if defined function definitions are generated
  41  *        - SH_SCOPE - in which scope (e.g. extern, static inline) do function
  42  *              declarations reside
  43  *        - SH_RAW_ALLOCATOR - if defined, memory contexts are not used; instead,
  44  *          use this to allocate bytes. The allocator must zero the returned space.
  45  *        - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
  46  *              are defined, so you can supply your own
  47  *        The following parameters are only relevant when SH_DEFINE is defined:
  48  *        - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
  49  *        - SH_EQUAL(table, a, b) - compare two table keys
  50  *        - SH_HASH_KEY(table, key) - generate hash for the key
  51  *        - SH_STORE_HASH - if defined the hash is stored in the elements
  52  *        - SH_GET_HASH(tb, a) - return the field to store the hash in
  53  *
  54  *        The element type is required to contain a "status" member that can store
  55  *        the range of values defined in the SH_STATUS enum.
  56  *
  57  *        While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because
  58  *        the hash table implementation needs to compare hashes to move elements
  59  *        (particularly when growing the hash), it's preferable, if possible, to
  60  *        store the element's hash in the element's data type. If the hash is so
  61  *        stored, the hash table will also compare hashes before calling SH_EQUAL
  62  *        when comparing two keys.
  63  *
  64  *        For convenience the hash table create functions accept a void pointer
  65  *        that will be stored in the hash table type's member private_data. This
  66  *        allows callbacks to reference caller provided data.
  67  *
  68  *        For examples of usage look at tidbitmap.c (file local definition) and
  69  *        execnodes.h/execGrouping.c (exposed declaration, file local
  70  *        implementation).
  71  *
  72  * Hash table design:
  73  *
  74  *        The hash table design chosen is a variant of linear open-addressing. The
  75  *        reason for doing so is that linear addressing is CPU cache & pipeline
  76  *        friendly. The biggest disadvantage of simple linear addressing schemes
  77  *        are highly variable lookup times due to clustering, and deletions
  78  *        leaving a lot of tombstones around.  To address these issues a variant
  79  *        of "robin hood" hashing is employed.  Robin hood hashing optimizes
  80  *        chaining lengths by moving elements close to their optimal bucket
  81  *        ("rich" elements), out of the way if a to-be-inserted element is further
  82  *        away from its optimal position (i.e. it's "poor").  While that can make
  83  *        insertions slower, the average lookup performance is a lot better, and
  84  *        higher fill factors can be used in a still performant manner.  To avoid
  85  *        tombstones - which normally solve the issue that a deleted node's
  86  *        presence is relevant to determine whether a lookup needs to continue
  87  *        looking or is done - buckets following a deleted element are shifted
  88  *        backwards, unless they're empty or already at their optimal position.
  89  *
  90  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  91  * Portions Copyright (c) 1994, Regents of the University of California
  92  *
  93  * src/include/lib/simplehash.h
  94  */
  95
  96 #include "port/pg_bitutils.h"
  97
  98 /* helpers */
  99 #define SH_MAKE_PREFIX(a) CppConcat(a,_)
 100 #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
 101 #define SH_MAKE_NAME_(a,b) CppConcat(a,b)
 102
 103 /* name macros for: */
 104
 105 /* type declarations */
 106 #define SH_TYPE SH_MAKE_NAME(hash)
 107 #define SH_STATUS SH_MAKE_NAME(status)
 108 #define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
 109 #define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
 110 #define SH_ITERATOR SH_MAKE_NAME(iterator)
 111
 112 /* function declarations */
 113 #define SH_CREATE SH_MAKE_NAME(create)
 114 #define SH_DESTROY SH_MAKE_NAME(destroy)
 115 #define SH_RESET SH_MAKE_NAME(reset)
 116 #define SH_INSERT SH_MAKE_NAME(insert)
 117 #define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
 118 #define SH_DELETE_ITEM SH_MAKE_NAME(delete_item)
 119 #define SH_DELETE SH_MAKE_NAME(delete)
 120 #define SH_LOOKUP SH_MAKE_NAME(lookup)
 121 #define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash)
 122 #define SH_GROW SH_MAKE_NAME(grow)
 123 #define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
 124 #define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
 125 #define SH_ITERATE SH_MAKE_NAME(iterate)
 126 #define SH_ALLOCATE SH_MAKE_NAME(allocate)
 127 #define SH_FREE SH_MAKE_NAME(free)
 128 #define SH_STAT SH_MAKE_NAME(stat)
 129
 130 /* internal helper functions (no externally visible prototypes) */
 131 #define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
 132 #define SH_NEXT SH_MAKE_NAME(next)
 133 #define SH_PREV SH_MAKE_NAME(prev)
 134 #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
 135 #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
 136 #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
 137 #define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
 138 #define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
 139
 140 /* generate forward declarations necessary to use the hash table */
 141 #ifdef SH_DECLARE
 142
 143 /* type definitions */
 144 typedef struct SH_TYPE
 145 {
 146         /*
 147          * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
 148          * tables.  Note that the maximum number of elements is lower
 149          * (SH_MAX_FILLFACTOR)
 150          */
 151         uint64          size;
 152
 153         /* how many elements have valid contents */
 154         uint32          members;
 155
 156         /* mask for bucket and size calculations, based on size */
 157         uint32          sizemask;
 158
 159         /* boundary after which to grow hashtable */
 160         uint32          grow_threshold;
 161
 162         /* hash buckets */
 163         SH_ELEMENT_TYPE *data;
 164
 165 #ifndef SH_RAW_ALLOCATOR
 166         /* memory context to use for allocations */
 167         MemoryContext ctx;
 168 #endif
 169
 170         /* user defined data, useful for callbacks */
 171         void       *private_data;
 172 }                       SH_TYPE;
 173
 174 typedef enum SH_STATUS
 175 {
 176         SH_STATUS_EMPTY = 0x00,
 177         SH_STATUS_IN_USE = 0x01
 178 } SH_STATUS;
 179
 180 typedef struct SH_ITERATOR
 181 {
 182         uint32          cur;                    /* current element */
 183         uint32          end;
 184         bool            done;                   /* iterator exhausted? */
 185 }                       SH_ITERATOR;
 186
 187 /* externally visible function prototypes */
 188 #ifdef SH_RAW_ALLOCATOR
 189 /* <prefix>_hash <prefix>_create(uint32 nelements, void *private_data) */
 190 SH_SCOPE        SH_TYPE *SH_CREATE(uint32 nelements, void *private_data);
 191 #else
 192 /*
 193  * <prefix>_hash <prefix>_create(MemoryContext ctx, uint32 nelements,
 194  *                                                               void *private_data)
 195  */
 196 SH_SCOPE        SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
 197                                                            void *private_data);
 198 #endif
 199
 200 /* void <prefix>_destroy(<prefix>_hash *tb) */
 201 SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
 202
 203 /* void <prefix>_reset(<prefix>_hash *tb) */
 204 SH_SCOPE void SH_RESET(SH_TYPE * tb);
 205
 206 /* void <prefix>_grow(<prefix>_hash *tb, uint64 newsize) */
 207 SH_SCOPE void SH_GROW(SH_TYPE * tb, uint64 newsize);
 208
 209 /* <element> *<prefix>_insert(<prefix>_hash *tb, <key> key, bool *found) */
 210 SH_SCOPE        SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
 211
 212 /*
 213  * <element> *<prefix>_insert_hash(<prefix>_hash *tb, <key> key, uint32 hash,
 214  *                                                                bool *found)
 215  */
 216 SH_SCOPE        SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
 217                                                                                         uint32 hash, bool *found);
 218
 219 /* <element> *<prefix>_lookup(<prefix>_hash *tb, <key> key) */
 220 SH_SCOPE        SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
 221
 222 /* <element> *<prefix>_lookup_hash(<prefix>_hash *tb, <key> key, uint32 hash) */
 223 SH_SCOPE        SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
 224                                                                                         uint32 hash);
 225
 226 /* void <prefix>_delete_item(<prefix>_hash *tb, <element> *entry) */
 227 SH_SCOPE void SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry);
 228
 229 /* bool <prefix>_delete(<prefix>_hash *tb, <key> key) */
 230 SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
 231
 232 /* void <prefix>_start_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
 233 SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
 234
 235 /*
 236  * void <prefix>_start_iterate_at(<prefix>_hash *tb, <prefix>_iterator *iter,
 237  *                                                                uint32 at)
 238  */
 239 SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
 240
 241 /* <element> *<prefix>_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
 242 SH_SCOPE        SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
 243
 244 /* void <prefix>_stat(<prefix>_hash *tb */
 245 SH_SCOPE void SH_STAT(SH_TYPE * tb);
 246
 247 #endif                                                  /* SH_DECLARE */
 248
 249
 250 /* generate implementation of the hash table */
 251 #ifdef SH_DEFINE
 252
 253 #ifndef SH_RAW_ALLOCATOR
 254 #include "utils/memutils.h"
 255 #endif
 256
 257 /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
 258 #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
 259
 260 /* normal fillfactor, unless already close to maximum */
 261 #ifndef SH_FILLFACTOR
 262 #define SH_FILLFACTOR (0.9)
 263 #endif
 264 /* increase fillfactor if we otherwise would error out */
 265 #define SH_MAX_FILLFACTOR (0.98)
 266 /* grow if actual and optimal location bigger than */
 267 #ifndef SH_GROW_MAX_DIB
 268 #define SH_GROW_MAX_DIB 25
 269 #endif
 270 /* grow if more than elements to move when inserting */
 271 #ifndef SH_GROW_MAX_MOVE
 272 #define SH_GROW_MAX_MOVE 150
 273 #endif
 274 #ifndef SH_GROW_MIN_FILLFACTOR
 275 /* but do not grow due to SH_GROW_MAX_* if below */
 276 #define SH_GROW_MIN_FILLFACTOR 0.1
 277 #endif
 278
 279 #ifdef SH_STORE_HASH
 280 #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
 281 #else
 282 #define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
 283 #endif
 284
 285 /*
 286  * Wrap the following definitions in include guards, to avoid multiple
 287  * definition errors if this header is included more than once.  The rest of
 288  * the file deliberately has no include guards, because it can be included
 289  * with different parameters to define functions and types with non-colliding
 290  * names.
 291  */
 292 #ifndef SIMPLEHASH_H
 293 #define SIMPLEHASH_H
 294
 295 #ifdef FRONTEND
 296 #define sh_error(...) \
 297         do { pg_log_fatal(__VA_ARGS__); exit(1); } while(0)
 298 #define sh_log(...) pg_log_info(__VA_ARGS__)
 299 #else
 300 #define sh_error(...) elog(ERROR, __VA_ARGS__)
 301 #define sh_log(...) elog(LOG, __VA_ARGS__)
 302 #endif
 303
 304 #endif
 305
 306 /*
 307  * Compute sizing parameters for hashtable. Called when creating and growing
 308  * the hashtable.
 309  */
 310 static inline void
 311 SH_COMPUTE_PARAMETERS(SH_TYPE * tb, uint64 newsize)
 312 {
 313         uint64          size;
 314
 315         /* supporting zero sized hashes would complicate matters */
 316         size = Max(newsize, 2);
 317
 318         /* round up size to the next power of 2, that's how bucketing works */
 319         size = pg_nextpower2_64(size);
 320         Assert(size <= SH_MAX_SIZE);
 321
 322         /*
 323          * Verify that allocation of ->data is possible on this platform, without
 324          * overflowing Size.
 325          */
 326         if (unlikely((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= SIZE_MAX / 2))
 327                 sh_error("hash table too large");
 328
 329         /* now set size */
 330         tb->size = size;
 331         tb->sizemask = (uint32) (size - 1);
 332
 333         /*
 334          * Compute the next threshold at which we need to grow the hash table
 335          * again.
 336          */
 337         if (tb->size == SH_MAX_SIZE)
 338                 tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
 339         else
 340                 tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
 341 }
 342
 343 /* return the optimal bucket for the hash */
 344 static inline uint32
 345 SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
 346 {
 347         return hash & tb->sizemask;
 348 }
 349
 350 /* return next bucket after the current, handling wraparound */
 351 static inline uint32
 352 SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
 353 {
 354         curelem = (curelem + 1) & tb->sizemask;
 355
 356         Assert(curelem != startelem);
 357
 358         return curelem;
 359 }
 360
 361 /* return bucket before the current, handling wraparound */
 362 static inline uint32
 363 SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
 364 {
 365         curelem = (curelem - 1) & tb->sizemask;
 366
 367         Assert(curelem != startelem);
 368
 369         return curelem;
 370 }
 371
 372 /* return distance between bucket and its optimal position */
 373 static inline uint32
 374 SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
 375 {
 376         if (optimal <= bucket)
 377                 return bucket - optimal;
 378         else
 379                 return (tb->size + bucket) - optimal;
 380 }
 381
 382 static inline uint32
 383 SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
 384 {
 385 #ifdef SH_STORE_HASH
 386         return SH_GET_HASH(tb, entry);
 387 #else
 388         return SH_HASH_KEY(tb, entry->SH_KEY);
 389 #endif
 390 }
 391
 392 /* default memory allocator function */
 393 static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
 394 static inline void SH_FREE(SH_TYPE * type, void *pointer);
 395
 396 #ifndef SH_USE_NONDEFAULT_ALLOCATOR
 397
 398 /* default memory allocator function */
 399 static inline void *
 400 SH_ALLOCATE(SH_TYPE * type, Size size)
 401 {
 402 #ifdef SH_RAW_ALLOCATOR
 403         return SH_RAW_ALLOCATOR(size);
 404 #else
 405         return MemoryContextAllocExtended(type->ctx, size,
 406                                                                           MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
 407 #endif
 408 }
 409
 410 /* default memory free function */
 411 static inline void
 412 SH_FREE(SH_TYPE * type, void *pointer)
 413 {
 414         pfree(pointer);
 415 }
 416
 417 #endif
 418
 419 /*
 420  * Create a hash table with enough space for `nelements` distinct members.
 421  * Memory for the hash table is allocated from the passed-in context.  If
 422  * desired, the array of elements can be allocated using a passed-in allocator;
 423  * this could be useful in order to place the array of elements in a shared
 424  * memory, or in a context that will outlive the rest of the hash table.
 425  * Memory other than for the array of elements will still be allocated from
 426  * the passed-in context.
 427  */
 428 #ifdef SH_RAW_ALLOCATOR
 429 SH_SCOPE        SH_TYPE *
 430 SH_CREATE(uint32 nelements, void *private_data)
 431 #else
 432 SH_SCOPE        SH_TYPE *
 433 SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
 434 #endif
 435 {
 436         SH_TYPE    *tb;
 437         uint64          size;
 438
 439 #ifdef SH_RAW_ALLOCATOR
 440         tb = SH_RAW_ALLOCATOR(sizeof(SH_TYPE));
 441 #else
 442         tb = MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
 443         tb->ctx = ctx;
 444 #endif
 445         tb->private_data = private_data;
 446
 447         /* increase nelements by fillfactor, want to store nelements elements */
 448         size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
 449
 450         SH_COMPUTE_PARAMETERS(tb, size);
 451
 452         tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
 453
 454         return tb;
 455 }
 456
 457 /* destroy a previously created hash table */
 458 SH_SCOPE void
 459 SH_DESTROY(SH_TYPE * tb)
 460 {
 461         SH_FREE(tb, tb->data);
 462         pfree(tb);
 463 }
 464
 465 /* reset the contents of a previously created hash table */
 466 SH_SCOPE void
 467 SH_RESET(SH_TYPE * tb)
 468 {
 469         memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
 470         tb->members = 0;
 471 }
 472
 473 /*
 474  * Grow a hash table to at least `newsize` buckets.
 475  *
 476  * Usually this will automatically be called by insertions/deletions, when
 477  * necessary. But resizing to the exact input size can be advantageous
 478  * performance-wise, when known at some point.
 479  */
 480 SH_SCOPE void
 481 SH_GROW(SH_TYPE * tb, uint64 newsize)
 482 {
 483         uint64          oldsize = tb->size;
 484         SH_ELEMENT_TYPE *olddata = tb->data;
 485         SH_ELEMENT_TYPE *newdata;
 486         uint32          i;
 487         uint32          startelem = 0;
 488         uint32          copyelem;
 489
 490         Assert(oldsize == pg_nextpower2_64(oldsize));
 491         Assert(oldsize != SH_MAX_SIZE);
 492         Assert(oldsize < newsize);
 493
 494         /* compute parameters for new table */
 495         SH_COMPUTE_PARAMETERS(tb, newsize);
 496
 497         tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
 498
 499         newdata = tb->data;
 500
 501         /*
 502          * Copy entries from the old data to newdata. We theoretically could use
 503          * SH_INSERT here, to avoid code duplication, but that's more general than
 504          * we need. We neither want tb->members increased, nor do we need to do
 505          * deal with deleted elements, nor do we need to compare keys. So a
 506          * special-cased implementation is lot faster. As resizing can be time
 507          * consuming and frequent, that's worthwhile to optimize.
 508          *
 509          * To be able to simply move entries over, we have to start not at the
 510          * first bucket (i.e olddata[0]), but find the first bucket that's either
 511          * empty, or is occupied by an entry at its optimal position. Such a
 512          * bucket has to exist in any table with a load factor under 1, as not all
 513          * buckets are occupied, i.e. there always has to be an empty bucket.  By
 514          * starting at such a bucket we can move the entries to the larger table,
 515          * without having to deal with conflicts.
 516          */
 517
 518         /* search for the first element in the hash that's not wrapped around */
 519         for (i = 0; i < oldsize; i++)
 520         {
 521                 SH_ELEMENT_TYPE *oldentry = &olddata[i];
 522                 uint32          hash;
 523                 uint32          optimal;
 524
 525                 if (oldentry->status != SH_STATUS_IN_USE)
 526                 {
 527                         startelem = i;
 528                         break;
 529                 }
 530
 531                 hash = SH_ENTRY_HASH(tb, oldentry);
 532                 optimal = SH_INITIAL_BUCKET(tb, hash);
 533
 534                 if (optimal == i)
 535                 {
 536                         startelem = i;
 537                         break;
 538                 }
 539         }
 540
 541         /* and copy all elements in the old table */
 542         copyelem = startelem;
 543         for (i = 0; i < oldsize; i++)
 544         {
 545                 SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
 546
 547                 if (oldentry->status == SH_STATUS_IN_USE)
 548                 {
 549                         uint32          hash;
 550                         uint32          startelem;
 551                         uint32          curelem;
 552                         SH_ELEMENT_TYPE *newentry;
 553
 554                         hash = SH_ENTRY_HASH(tb, oldentry);
 555                         startelem = SH_INITIAL_BUCKET(tb, hash);
 556                         curelem = startelem;
 557
 558                         /* find empty element to put data into */
 559                         while (true)
 560                         {
 561                                 newentry = &newdata[curelem];
 562
 563                                 if (newentry->status == SH_STATUS_EMPTY)
 564                                 {
 565                                         break;
 566                                 }
 567
 568                                 curelem = SH_NEXT(tb, curelem, startelem);
 569                         }
 570
 571                         /* copy entry to new slot */
 572                         memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
 573                 }
 574
 575                 /* can't use SH_NEXT here, would use new size */
 576                 copyelem++;
 577                 if (copyelem >= oldsize)
 578                 {
 579                         copyelem = 0;
 580                 }
 581         }
 582
 583         SH_FREE(tb, olddata);
 584 }
 585
 586 /*
 587  * This is a separate static inline function, so it can be reliably be inlined
 588  * into its wrapper functions even if SH_SCOPE is extern.
 589  */
 590 static inline SH_ELEMENT_TYPE *
 591 SH_INSERT_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
 592 {
 593         uint32          startelem;
 594         uint32          curelem;
 595         SH_ELEMENT_TYPE *data;
 596         uint32          insertdist;
 597
 598 restart:
 599         insertdist = 0;
 600
 601         /*
 602          * We do the grow check even if the key is actually present, to avoid
 603          * doing the check inside the loop. This also lets us avoid having to
 604          * re-find our position in the hashtable after resizing.
 605          *
 606          * Note that this also reached when resizing the table due to
 607          * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
 608          */
 609         if (unlikely(tb->members >= tb->grow_threshold))
 610         {
 611                 if (unlikely(tb->size == SH_MAX_SIZE))
 612                         sh_error("hash table size exceeded");
 613
 614                 /*
 615                  * When optimizing, it can be very useful to print these out.
 616                  */
 617                 /* SH_STAT(tb); */
 618                 SH_GROW(tb, tb->size * 2);
 619                 /* SH_STAT(tb); */
 620         }
 621
 622         /* perform insert, start bucket search at optimal location */
 623         data = tb->data;
 624         startelem = SH_INITIAL_BUCKET(tb, hash);
 625         curelem = startelem;
 626         while (true)
 627         {
 628                 uint32          curdist;
 629                 uint32          curhash;
 630                 uint32          curoptimal;
 631                 SH_ELEMENT_TYPE *entry = &data[curelem];
 632
 633                 /* any empty bucket can directly be used */
 634                 if (entry->status == SH_STATUS_EMPTY)
 635                 {
 636                         tb->members++;
 637                         entry->SH_KEY = key;
 638 #ifdef SH_STORE_HASH
 639                         SH_GET_HASH(tb, entry) = hash;
 640 #endif
 641                         entry->status = SH_STATUS_IN_USE;
 642                         *found = false;
 643                         return entry;
 644                 }
 645
 646                 /*
 647                  * If the bucket is not empty, we either found a match (in which case
 648                  * we're done), or we have to decide whether to skip over or move the
 649                  * colliding entry. When the colliding element's distance to its
 650                  * optimal position is smaller than the to-be-inserted entry's, we
 651                  * shift the colliding entry (and its followers) forward by one.
 652                  */
 653
 654                 if (SH_COMPARE_KEYS(tb, hash, key, entry))
 655                 {
 656                         Assert(entry->status == SH_STATUS_IN_USE);
 657                         *found = true;
 658                         return entry;
 659                 }
 660
 661                 curhash = SH_ENTRY_HASH(tb, entry);
 662                 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
 663                 curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
 664
 665                 if (insertdist > curdist)
 666                 {
 667                         SH_ELEMENT_TYPE *lastentry = entry;
 668                         uint32          emptyelem = curelem;
 669                         uint32          moveelem;
 670                         int32           emptydist = 0;
 671
 672                         /* find next empty bucket */
 673                         while (true)
 674                         {
 675                                 SH_ELEMENT_TYPE *emptyentry;
 676
 677                                 emptyelem = SH_NEXT(tb, emptyelem, startelem);
 678                                 emptyentry = &data[emptyelem];
 679
 680                                 if (emptyentry->status == SH_STATUS_EMPTY)
 681                                 {
 682                                         lastentry = emptyentry;
 683                                         break;
 684                                 }
 685
 686                                 /*
 687                                  * To avoid negative consequences from overly imbalanced
 688                                  * hashtables, grow the hashtable if collisions would require
 689                                  * us to move a lot of entries.  The most likely cause of such
 690                                  * imbalance is filling a (currently) small table, from a
 691                                  * currently big one, in hash-table order.  Don't grow if the
 692                                  * hashtable would be too empty, to prevent quick space
 693                                  * explosion for some weird edge cases.
 694                                  */
 695                                 if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
 696                                         ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
 697                                 {
 698                                         tb->grow_threshold = 0;
 699                                         goto restart;
 700                                 }
 701                         }
 702
 703                         /* shift forward, starting at last occupied element */
 704
 705                         /*
 706                          * TODO: This could be optimized to be one memcpy in many cases,
 707                          * excepting wrapping around at the end of ->data. Hasn't shown up
 708                          * in profiles so far though.
 709                          */
 710                         moveelem = emptyelem;
 711                         while (moveelem != curelem)
 712                         {
 713                                 SH_ELEMENT_TYPE *moveentry;
 714
 715                                 moveelem = SH_PREV(tb, moveelem, startelem);
 716                                 moveentry = &data[moveelem];
 717
 718                                 memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
 719                                 lastentry = moveentry;
 720                         }
 721
 722                         /* and fill the now empty spot */
 723                         tb->members++;
 724
 725                         entry->SH_KEY = key;
 726 #ifdef SH_STORE_HASH
 727                         SH_GET_HASH(tb, entry) = hash;
 728 #endif
 729                         entry->status = SH_STATUS_IN_USE;
 730                         *found = false;
 731                         return entry;
 732                 }
 733
 734                 curelem = SH_NEXT(tb, curelem, startelem);
 735                 insertdist++;
 736
 737                 /*
 738                  * To avoid negative consequences from overly imbalanced hashtables,
 739                  * grow the hashtable if collisions lead to large runs. The most
 740                  * likely cause of such imbalance is filling a (currently) small
 741                  * table, from a currently big one, in hash-table order.  Don't grow
 742                  * if the hashtable would be too empty, to prevent quick space
 743                  * explosion for some weird edge cases.
 744                  */
 745                 if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
 746                         ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
 747                 {
 748                         tb->grow_threshold = 0;
 749                         goto restart;
 750                 }
 751         }
 752 }
 753
 754 /*
 755  * Insert the key key into the hash-table, set *found to true if the key
 756  * already exists, false otherwise. Returns the hash-table entry in either
 757  * case.
 758  */
 759 SH_SCOPE        SH_ELEMENT_TYPE *
 760 SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
 761 {
 762         uint32          hash = SH_HASH_KEY(tb, key);
 763
 764         return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
 765 }
 766
 767 /*
 768  * Insert the key key into the hash-table using an already-calculated
 769  * hash. Set *found to true if the key already exists, false
 770  * otherwise. Returns the hash-table entry in either case.
 771  */
 772 SH_SCOPE        SH_ELEMENT_TYPE *
 773 SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
 774 {
 775         return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
 776 }
 777
 778 /*
 779  * This is a separate static inline function, so it can be reliably be inlined
 780  * into its wrapper functions even if SH_SCOPE is extern.
 781  */
 782 static inline SH_ELEMENT_TYPE *
 783 SH_LOOKUP_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
 784 {
 785         const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
 786         uint32          curelem = startelem;
 787
 788         while (true)
 789         {
 790                 SH_ELEMENT_TYPE *entry = &tb->data[curelem];
 791
 792                 if (entry->status == SH_STATUS_EMPTY)
 793                 {
 794                         return NULL;
 795                 }
 796
 797                 Assert(entry->status == SH_STATUS_IN_USE);
 798
 799                 if (SH_COMPARE_KEYS(tb, hash, key, entry))
 800                         return entry;
 801
 802                 /*
 803                  * TODO: we could stop search based on distance. If the current
 804                  * buckets's distance-from-optimal is smaller than what we've skipped
 805                  * already, the entry doesn't exist. Probably only do so if
 806                  * SH_STORE_HASH is defined, to avoid re-computing hashes?
 807                  */
 808
 809                 curelem = SH_NEXT(tb, curelem, startelem);
 810         }
 811 }
 812
 813 /*
 814  * Lookup up entry in hash table.  Returns NULL if key not present.
 815  */
 816 SH_SCOPE        SH_ELEMENT_TYPE *
 817 SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
 818 {
 819         uint32          hash = SH_HASH_KEY(tb, key);
 820
 821         return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
 822 }
 823
 824 /*
 825  * Lookup up entry in hash table using an already-calculated hash.
 826  *
 827  * Returns NULL if key not present.
 828  */
 829 SH_SCOPE        SH_ELEMENT_TYPE *
 830 SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
 831 {
 832         return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
 833 }
 834
 835 /*
 836  * Delete entry from hash table by key.  Returns whether to-be-deleted key was
 837  * present.
 838  */
 839 SH_SCOPE bool
 840 SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
 841 {
 842         uint32          hash = SH_HASH_KEY(tb, key);
 843         uint32          startelem = SH_INITIAL_BUCKET(tb, hash);
 844         uint32          curelem = startelem;
 845
 846         while (true)
 847         {
 848                 SH_ELEMENT_TYPE *entry = &tb->data[curelem];
 849
 850                 if (entry->status == SH_STATUS_EMPTY)
 851                         return false;
 852
 853                 if (entry->status == SH_STATUS_IN_USE &&
 854                         SH_COMPARE_KEYS(tb, hash, key, entry))
 855                 {
 856                         SH_ELEMENT_TYPE *lastentry = entry;
 857
 858                         tb->members--;
 859
 860                         /*
 861                          * Backward shift following elements till either an empty element
 862                          * or an element at its optimal position is encountered.
 863                          *
 864                          * While that sounds expensive, the average chain length is short,
 865                          * and deletions would otherwise require tombstones.
 866                          */
 867                         while (true)
 868                         {
 869                                 SH_ELEMENT_TYPE *curentry;
 870                                 uint32          curhash;
 871                                 uint32          curoptimal;
 872
 873                                 curelem = SH_NEXT(tb, curelem, startelem);
 874                                 curentry = &tb->data[curelem];
 875
 876                                 if (curentry->status != SH_STATUS_IN_USE)
 877                                 {
 878                                         lastentry->status = SH_STATUS_EMPTY;
 879                                         break;
 880                                 }
 881
 882                                 curhash = SH_ENTRY_HASH(tb, curentry);
 883                                 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
 884
 885                                 /* current is at optimal position, done */
 886                                 if (curoptimal == curelem)
 887                                 {
 888                                         lastentry->status = SH_STATUS_EMPTY;
 889                                         break;
 890                                 }
 891
 892                                 /* shift */
 893                                 memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
 894
 895                                 lastentry = curentry;
 896                         }
 897
 898                         return true;
 899                 }
 900
 901                 /* TODO: return false; if distance too big */
 902
 903                 curelem = SH_NEXT(tb, curelem, startelem);
 904         }
 905 }
 906
 907 /*
 908  * Delete entry from hash table by entry pointer
 909  */
 910 SH_SCOPE void
 911 SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
 912 {
 913         SH_ELEMENT_TYPE *lastentry = entry;
 914         uint32          hash = SH_ENTRY_HASH(tb, entry);
 915         uint32          startelem = SH_INITIAL_BUCKET(tb, hash);
 916         uint32          curelem;
 917
 918         /* Calculate the index of 'entry' */
 919         curelem = entry - &tb->data[0];
 920
 921         tb->members--;
 922
 923         /*
 924          * Backward shift following elements till either an empty element or an
 925          * element at its optimal position is encountered.
 926          *
 927          * While that sounds expensive, the average chain length is short, and
 928          * deletions would otherwise require tombstones.
 929          */
 930         while (true)
 931         {
 932                 SH_ELEMENT_TYPE *curentry;
 933                 uint32          curhash;
 934                 uint32          curoptimal;
 935
 936                 curelem = SH_NEXT(tb, curelem, startelem);
 937                 curentry = &tb->data[curelem];
 938
 939                 if (curentry->status != SH_STATUS_IN_USE)
 940                 {
 941                         lastentry->status = SH_STATUS_EMPTY;
 942                         break;
 943                 }
 944
 945                 curhash = SH_ENTRY_HASH(tb, curentry);
 946                 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
 947
 948                 /* current is at optimal position, done */
 949                 if (curoptimal == curelem)
 950                 {
 951                         lastentry->status = SH_STATUS_EMPTY;
 952                         break;
 953                 }
 954
 955                 /* shift */
 956                 memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
 957
 958                 lastentry = curentry;
 959         }
 960 }
 961
 962 /*
 963  * Initialize iterator.
 964  */
 965 SH_SCOPE void
 966 SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
 967 {
 968         int                     i;
 969         uint64          startelem = PG_UINT64_MAX;
 970
 971         /*
 972          * Search for the first empty element. As deletions during iterations are
 973          * supported, we want to start/end at an element that cannot be affected
 974          * by elements being shifted.
 975          */
 976         for (i = 0; i < tb->size; i++)
 977         {
 978                 SH_ELEMENT_TYPE *entry = &tb->data[i];
 979
 980                 if (entry->status != SH_STATUS_IN_USE)
 981                 {
 982                         startelem = i;
 983                         break;
 984                 }
 985         }
 986
 987         Assert(startelem < SH_MAX_SIZE);
 988
 989         /*
 990          * Iterate backwards, that allows the current element to be deleted, even
 991          * if there are backward shifts
 992          */
 993         iter->cur = startelem;
 994         iter->end = iter->cur;
 995         iter->done = false;
 996 }
 997
 998 /*
 999  * Initialize iterator to a specific bucket. That's really only useful for
1000  * cases where callers are partially iterating over the hashspace, and that
1001  * iteration deletes and inserts elements based on visited entries. Doing that
1002  * repeatedly could lead to an unbalanced keyspace when always starting at the
1003  * same position.
1004  */
1005 SH_SCOPE void
1006 SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
1007 {
1008         /*
1009          * Iterate backwards, that allows the current element to be deleted, even
1010          * if there are backward shifts.
1011          */
1012         iter->cur = at & tb->sizemask;  /* ensure at is within a valid range */
1013         iter->end = iter->cur;
1014         iter->done = false;
1015 }
1016
1017 /*
1018  * Iterate over all entries in the hash-table. Return the next occupied entry,
1019  * or NULL if done.
1020  *
1021  * During iteration the current entry in the hash table may be deleted,
1022  * without leading to elements being skipped or returned twice.  Additionally
1023  * the rest of the table may be modified (i.e. there can be insertions or
1024  * deletions), but if so, there's neither a guarantee that all nodes are
1025  * visited at least once, nor a guarantee that a node is visited at most once.
1026  */
1027 SH_SCOPE        SH_ELEMENT_TYPE *
1028 SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
1029 {
1030         while (!iter->done)
1031         {
1032                 SH_ELEMENT_TYPE *elem;
1033
1034                 elem = &tb->data[iter->cur];
1035
1036                 /* next element in backward direction */
1037                 iter->cur = (iter->cur - 1) & tb->sizemask;
1038
1039                 if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
1040                         iter->done = true;
1041                 if (elem->status == SH_STATUS_IN_USE)
1042                 {
1043                         return elem;
1044                 }
1045         }
1046
1047         return NULL;
1048 }
1049
1050 /*
1051  * Report some statistics about the state of the hashtable. For
1052  * debugging/profiling purposes only.
1053  */
1054 SH_SCOPE void
1055 SH_STAT(SH_TYPE * tb)
1056 {
1057         uint32          max_chain_length = 0;
1058         uint32          total_chain_length = 0;
1059         double          avg_chain_length;
1060         double          fillfactor;
1061         uint32          i;
1062
1063         uint32     *collisions = palloc0(tb->size * sizeof(uint32));
1064         uint32          total_collisions = 0;
1065         uint32          max_collisions = 0;
1066         double          avg_collisions;
1067
1068         for (i = 0; i < tb->size; i++)
1069         {
1070                 uint32          hash;
1071                 uint32          optimal;
1072                 uint32          dist;
1073                 SH_ELEMENT_TYPE *elem;
1074
1075                 elem = &tb->data[i];
1076
1077                 if (elem->status != SH_STATUS_IN_USE)
1078                         continue;
1079
1080                 hash = SH_ENTRY_HASH(tb, elem);
1081                 optimal = SH_INITIAL_BUCKET(tb, hash);
1082                 dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
1083
1084                 if (dist > max_chain_length)
1085                         max_chain_length = dist;
1086                 total_chain_length += dist;
1087
1088                 collisions[optimal]++;
1089         }
1090
1091         for (i = 0; i < tb->size; i++)
1092         {
1093                 uint32          curcoll = collisions[i];
1094
1095                 if (curcoll == 0)
1096                         continue;
1097
1098                 /* single contained element is not a collision */
1099                 curcoll--;
1100                 total_collisions += curcoll;
1101                 if (curcoll > max_collisions)
1102                         max_collisions = curcoll;
1103         }
1104
1105         if (tb->members > 0)
1106         {
1107                 fillfactor = tb->members / ((double) tb->size);
1108                 avg_chain_length = ((double) total_chain_length) / tb->members;
1109                 avg_collisions = ((double) total_collisions) / tb->members;
1110         }
1111         else
1112         {
1113                 fillfactor = 0;
1114                 avg_chain_length = 0;
1115                 avg_collisions = 0;
1116         }
1117
1118         sh_log("size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %u, avg_collisions: %f",
1119                    tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
1120                    total_collisions, max_collisions, avg_collisions);
1121 }
1122
1123 #endif                                                  /* SH_DEFINE */
1124
1125
1126 /* undefine external parameters, so next hash table can be defined */
1127 #undef SH_PREFIX
1128 #undef SH_KEY_TYPE
1129 #undef SH_KEY
1130 #undef SH_ELEMENT_TYPE
1131 #undef SH_HASH_KEY
1132 #undef SH_SCOPE
1133 #undef SH_DECLARE
1134 #undef SH_DEFINE
1135 #undef SH_GET_HASH
1136 #undef SH_STORE_HASH
1137 #undef SH_USE_NONDEFAULT_ALLOCATOR
1138 #undef SH_EQUAL
1139
1140 /* undefine locally declared macros */
1141 #undef SH_MAKE_PREFIX
1142 #undef SH_MAKE_NAME
1143 #undef SH_MAKE_NAME_
1144 #undef SH_FILLFACTOR
1145 #undef SH_MAX_FILLFACTOR
1146 #undef SH_GROW_MAX_DIB
1147 #undef SH_GROW_MAX_MOVE
1148 #undef SH_GROW_MIN_FILLFACTOR
1149 #undef SH_MAX_SIZE
1150
1151 /* types */
1152 #undef SH_TYPE
1153 #undef SH_STATUS
1154 #undef SH_STATUS_EMPTY
1155 #undef SH_STATUS_IN_USE
1156 #undef SH_ITERATOR
1157
1158 /* external function names */
1159 #undef SH_CREATE
1160 #undef SH_DESTROY
1161 #undef SH_RESET
1162 #undef SH_INSERT
1163 #undef SH_INSERT_HASH
1164 #undef SH_DELETE_ITEM
1165 #undef SH_DELETE
1166 #undef SH_LOOKUP
1167 #undef SH_LOOKUP_HASH
1168 #undef SH_GROW
1169 #undef SH_START_ITERATE
1170 #undef SH_START_ITERATE_AT
1171 #undef SH_ITERATE
1172 #undef SH_ALLOCATE
1173 #undef SH_FREE
1174 #undef SH_STAT
1175
1176 /* internal function names */
1177 #undef SH_COMPUTE_PARAMETERS
1178 #undef SH_COMPARE_KEYS
1179 #undef SH_INITIAL_BUCKET
1180 #undef SH_NEXT
1181 #undef SH_PREV
1182 #undef SH_DISTANCE_FROM_OPTIMAL
1183 #undef SH_ENTRY_HASH
1184 #undef SH_INSERT_HASH_INTERNAL
1185 #undef SH_LOOKUP_HASH_INTERNAL