util/qht.c

   1 /*
   2  * qht.c - QEMU Hash Table, designed to scale for read-mostly workloads.
   3  *
   4  * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
   5  *
   6  * License: GNU GPL, version 2 or later.
   7  *   See the COPYING file in the top-level directory.
   8  *
   9  * Assumptions:
  10  * - NULL cannot be inserted/removed as a pointer value.
  11  * - Trying to insert an already-existing hash-pointer pair is OK. However,
  12  *   it is not OK to insert into the same hash table different hash-pointer
  13  *   pairs that have the same pointer value, but not the hashes.
  14  * - Lookups are performed under an RCU read-critical section; removals
  15  *   must wait for a grace period to elapse before freeing removed objects.
  16  *
  17  * Features:
  18  * - Reads (i.e. lookups and iterators) can be concurrent with other reads.
  19  *   Lookups that are concurrent with writes to the same bucket will retry
  20  *   via a seqlock; iterators acquire all bucket locks and therefore can be
  21  *   concurrent with lookups and are serialized wrt writers.
  22  * - Writes (i.e. insertions/removals) can be concurrent with writes to
  23  *   different buckets; writes to the same bucket are serialized through a lock.
  24  * - Optional auto-resizing: the hash table resizes up if the load surpasses
  25  *   a certain threshold. Resizing is done concurrently with readers; writes
  26  *   are serialized with the resize operation.
  27  *
  28  * The key structure is the bucket, which is cacheline-sized. Buckets
  29  * contain a few hash values and pointers; the u32 hash values are stored in
  30  * full so that resizing is fast. Having this structure instead of directly
  31  * chaining items has two advantages:
  32  * - Failed lookups fail fast, and touch a minimum number of cache lines.
  33  * - Resizing the hash table with concurrent lookups is easy.
  34  *
  35  * There are two types of buckets:
  36  * 1. "head" buckets are the ones allocated in the array of buckets in qht_map.
  37  * 2. all "non-head" buckets (i.e. all others) are members of a chain that
  38  *    starts from a head bucket.
  39  * Note that the seqlock and spinlock of a head bucket applies to all buckets
  40  * chained to it; these two fields are unused in non-head buckets.
  41  *
  42  * On removals, we move the last valid item in the chain to the position of the
  43  * just-removed entry. This makes lookups slightly faster, since the moment an
  44  * invalid entry is found, the (failed) lookup is over.
  45  *
  46  * Resizing is done by taking all bucket spinlocks (so that no other writers can
  47  * race with us) and then copying all entries into a new hash map. Then, the
  48  * ht->map pointer is set, and the old map is freed once no RCU readers can see
  49  * it anymore.
  50  *
  51  * Writers check for concurrent resizes by comparing ht->map before and after
  52  * acquiring their bucket lock. If they don't match, a resize has occured
  53  * while the bucket spinlock was being acquired.
  54  *
  55  * Related Work:
  56  * - Idea of cacheline-sized buckets with full hashes taken from:
  57  *   David, Guerraoui & Trigonakis, "Asynchronized Concurrency:
  58  *   The Secret to Scaling Concurrent Search Data Structures", ASPLOS'15.
  59  * - Why not RCU-based hash tables? They would allow us to get rid of the
  60  *   seqlock, but resizing would take forever since RCU read critical
  61  *   sections in QEMU take quite a long time.
  62  *   More info on relativistic hash tables:
  63  *   + Triplett, McKenney & Walpole, "Resizable, Scalable, Concurrent Hash
  64  *     Tables via Relativistic Programming", USENIX ATC'11.
  65  *   + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014.
  66  *     https://lwn.net/Articles/612021/
  67  */
  68 #include "qemu/qht.h"
  69 #include "qemu/atomic.h"
  70 #include "qemu/rcu.h"
  71
  72 //#define QHT_DEBUG
  73
  74 /*
  75  * We want to avoid false sharing of cache lines. Most systems have 64-byte
  76  * cache lines so we go with it for simplicity.
  77  *
  78  * Note that systems with smaller cache lines will be fine (the struct is
  79  * almost 64-bytes); systems with larger cache lines might suffer from
  80  * some false sharing.
  81  */
  82 #define QHT_BUCKET_ALIGN 64
  83
  84 /* define these to keep sizeof(qht_bucket) within QHT_BUCKET_ALIGN */
  85 #if HOST_LONG_BITS == 32
  86 #define QHT_BUCKET_ENTRIES 6
  87 #else /* 64-bit */
  88 #define QHT_BUCKET_ENTRIES 4
  89 #endif
  90
  91 /*
  92  * Note: reading partially-updated pointers in @pointers could lead to
  93  * segfaults. We thus access them with atomic_read/set; this guarantees
  94  * that the compiler makes all those accesses atomic. We also need the
  95  * volatile-like behavior in atomic_read, since otherwise the compiler
  96  * might refetch the pointer.
  97  * atomic_read's are of course not necessary when the bucket lock is held.
  98  *
  99  * If both ht->lock and b->lock are grabbed, ht->lock should always
 100  * be grabbed first.
 101  */
 102 struct qht_bucket {
 103     QemuSpin lock;
 104     QemuSeqLock sequence;
 105     uint32_t hashes[QHT_BUCKET_ENTRIES];
 106     void *pointers[QHT_BUCKET_ENTRIES];
 107     struct qht_bucket *next;
 108 } QEMU_ALIGNED(QHT_BUCKET_ALIGN);
 109
 110 QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN);
 111
 112 /**
 113  * struct qht_map - structure to track an array of buckets
 114  * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind
 115  *       find the whole struct.
 116  * @buckets: array of head buckets. It is constant once the map is created.
 117  * @n_buckets: number of head buckets. It is constant once the map is created.
 118  * @n_added_buckets: number of added (i.e. "non-head") buckets
 119  * @n_added_buckets_threshold: threshold to trigger an upward resize once the
 120  *                             number of added buckets surpasses it.
 121  *
 122  * Buckets are tracked in what we call a "map", i.e. this structure.
 123  */
 124 struct qht_map {
 125     struct rcu_head rcu;
 126     struct qht_bucket *buckets;
 127     size_t n_buckets;
 128     size_t n_added_buckets;
 129     size_t n_added_buckets_threshold;
 130 };
 131
 132 /* trigger a resize when n_added_buckets > n_buckets / div */
 133 #define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8
 134
 135 static void qht_do_resize(struct qht *ht, struct qht_map *new);
 136 static void qht_grow_maybe(struct qht *ht);
 137
 138 #ifdef QHT_DEBUG
 139
 140 #define qht_debug_assert(X) do { assert(X); } while (0)
 141
 142 static void qht_bucket_debug__locked(struct qht_bucket *b)
 143 {
 144     bool seen_empty = false;
 145     bool corrupt = false;
 146     int i;
 147
 148     do {
 149         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
 150             if (b->pointers[i] == NULL) {
 151                 seen_empty = true;
 152                 continue;
 153             }
 154             if (seen_empty) {
 155                 fprintf(stderr, "%s: b: %p, pos: %i, hash: 0x%x, p: %p\n",
 156                         __func__, b, i, b->hashes[i], b->pointers[i]);
 157                 corrupt = true;
 158             }
 159         }
 160         b = b->next;
 161     } while (b);
 162     qht_debug_assert(!corrupt);
 163 }
 164
 165 static void qht_map_debug__all_locked(struct qht_map *map)
 166 {
 167     int i;
 168
 169     for (i = 0; i < map->n_buckets; i++) {
 170         qht_bucket_debug__locked(&map->buckets[i]);
 171     }
 172 }
 173 #else
 174
 175 #define qht_debug_assert(X) do { (void)(X); } while (0)
 176
 177 static inline void qht_bucket_debug__locked(struct qht_bucket *b)
 178 { }
 179
 180 static inline void qht_map_debug__all_locked(struct qht_map *map)
 181 { }
 182 #endif /* QHT_DEBUG */
 183
 184 static inline size_t qht_elems_to_buckets(size_t n_elems)
 185 {
 186     return pow2ceil(n_elems / QHT_BUCKET_ENTRIES);
 187 }
 188
 189 static inline void qht_head_init(struct qht_bucket *b)
 190 {
 191     memset(b, 0, sizeof(*b));
 192     qemu_spin_init(&b->lock);
 193     seqlock_init(&b->sequence);
 194 }
 195
 196 static inline
 197 struct qht_bucket *qht_map_to_bucket(struct qht_map *map, uint32_t hash)
 198 {
 199     return &map->buckets[hash & (map->n_buckets - 1)];
 200 }
 201
 202 /* acquire all bucket locks from a map */
 203 static void qht_map_lock_buckets(struct qht_map *map)
 204 {
 205     size_t i;
 206
 207     for (i = 0; i < map->n_buckets; i++) {
 208         struct qht_bucket *b = &map->buckets[i];
 209
 210         qemu_spin_lock(&b->lock);
 211     }
 212 }
 213
 214 static void qht_map_unlock_buckets(struct qht_map *map)
 215 {
 216     size_t i;
 217
 218     for (i = 0; i < map->n_buckets; i++) {
 219         struct qht_bucket *b = &map->buckets[i];
 220
 221         qemu_spin_unlock(&b->lock);
 222     }
 223 }
 224
 225 /*
 226  * Call with at least a bucket lock held.
 227  * @map should be the value read before acquiring the lock (or locks).
 228  */
 229 static inline bool qht_map_is_stale__locked(struct qht *ht, struct qht_map *map)
 230 {
 231     return map != ht->map;
 232 }
 233
 234 /*
 235  * Grab all bucket locks, and set @pmap after making sure the map isn't stale.
 236  *
 237  * Pairs with qht_map_unlock_buckets(), hence the pass-by-reference.
 238  *
 239  * Note: callers cannot have ht->lock held.
 240  */
 241 static inline
 242 void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap)
 243 {
 244     struct qht_map *map;
 245
 246     map = atomic_rcu_read(&ht->map);
 247     qht_map_lock_buckets(map);
 248     if (likely(!qht_map_is_stale__locked(ht, map))) {
 249         *pmap = map;
 250         return;
 251     }
 252     qht_map_unlock_buckets(map);
 253
 254     /* we raced with a resize; acquire ht->lock to see the updated ht->map */
 255     qemu_mutex_lock(&ht->lock);
 256     map = ht->map;
 257     qht_map_lock_buckets(map);
 258     qemu_mutex_unlock(&ht->lock);
 259     *pmap = map;
 260     return;
 261 }
 262
 263 /*
 264  * Get a head bucket and lock it, making sure its parent map is not stale.
 265  * @pmap is filled with a pointer to the bucket's parent map.
 266  *
 267  * Unlock with qemu_spin_unlock(&b->lock).
 268  *
 269  * Note: callers cannot have ht->lock held.
 270  */
 271 static inline
 272 struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash,
 273                                              struct qht_map **pmap)
 274 {
 275     struct qht_bucket *b;
 276     struct qht_map *map;
 277
 278     map = atomic_rcu_read(&ht->map);
 279     b = qht_map_to_bucket(map, hash);
 280
 281     qemu_spin_lock(&b->lock);
 282     if (likely(!qht_map_is_stale__locked(ht, map))) {
 283         *pmap = map;
 284         return b;
 285     }
 286     qemu_spin_unlock(&b->lock);
 287
 288     /* we raced with a resize; acquire ht->lock to see the updated ht->map */
 289     qemu_mutex_lock(&ht->lock);
 290     map = ht->map;
 291     b = qht_map_to_bucket(map, hash);
 292     qemu_spin_lock(&b->lock);
 293     qemu_mutex_unlock(&ht->lock);
 294     *pmap = map;
 295     return b;
 296 }
 297
 298 static inline bool qht_map_needs_resize(struct qht_map *map)
 299 {
 300     return atomic_read(&map->n_added_buckets) > map->n_added_buckets_threshold;
 301 }
 302
 303 static inline void qht_chain_destroy(struct qht_bucket *head)
 304 {
 305     struct qht_bucket *curr = head->next;
 306     struct qht_bucket *prev;
 307
 308     while (curr) {
 309         prev = curr;
 310         curr = curr->next;
 311         qemu_vfree(prev);
 312     }
 313 }
 314
 315 /* pass only an orphan map */
 316 static void qht_map_destroy(struct qht_map *map)
 317 {
 318     size_t i;
 319
 320     for (i = 0; i < map->n_buckets; i++) {
 321         qht_chain_destroy(&map->buckets[i]);
 322     }
 323     qemu_vfree(map->buckets);
 324     g_free(map);
 325 }
 326
 327 static struct qht_map *qht_map_create(size_t n_buckets)
 328 {
 329     struct qht_map *map;
 330     size_t i;
 331
 332     map = g_malloc(sizeof(*map));
 333     map->n_buckets = n_buckets;
 334
 335     map->n_added_buckets = 0;
 336     map->n_added_buckets_threshold = n_buckets /
 337         QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV;
 338
 339     /* let tiny hash tables to at least add one non-head bucket */
 340     if (unlikely(map->n_added_buckets_threshold == 0)) {
 341         map->n_added_buckets_threshold = 1;
 342     }
 343
 344     map->buckets = qemu_memalign(QHT_BUCKET_ALIGN,
 345                                  sizeof(*map->buckets) * n_buckets);
 346     for (i = 0; i < n_buckets; i++) {
 347         qht_head_init(&map->buckets[i]);
 348     }
 349     return map;
 350 }
 351
 352 void qht_init(struct qht *ht, size_t n_elems, unsigned int mode)
 353 {
 354     struct qht_map *map;
 355     size_t n_buckets = qht_elems_to_buckets(n_elems);
 356
 357     ht->mode = mode;
 358     qemu_mutex_init(&ht->lock);
 359     map = qht_map_create(n_buckets);
 360     atomic_rcu_set(&ht->map, map);
 361 }
 362
 363 /* call only when there are no readers/writers left */
 364 void qht_destroy(struct qht *ht)
 365 {
 366     qht_map_destroy(ht->map);
 367     memset(ht, 0, sizeof(*ht));
 368 }
 369
 370 static void qht_bucket_reset__locked(struct qht_bucket *head)
 371 {
 372     struct qht_bucket *b = head;
 373     int i;
 374
 375     seqlock_write_begin(&head->sequence);
 376     do {
 377         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
 378             if (b->pointers[i] == NULL) {
 379                 goto done;
 380             }
 381             b->hashes[i] = 0;
 382             atomic_set(&b->pointers[i], NULL);
 383         }
 384         b = b->next;
 385     } while (b);
 386  done:
 387     seqlock_write_end(&head->sequence);
 388 }
 389
 390 /* call with all bucket locks held */
 391 static void qht_map_reset__all_locked(struct qht_map *map)
 392 {
 393     size_t i;
 394
 395     for (i = 0; i < map->n_buckets; i++) {
 396         qht_bucket_reset__locked(&map->buckets[i]);
 397     }
 398     qht_map_debug__all_locked(map);
 399 }
 400
 401 void qht_reset(struct qht *ht)
 402 {
 403     struct qht_map *map;
 404
 405     qht_map_lock_buckets__no_stale(ht, &map);
 406     qht_map_reset__all_locked(map);
 407     qht_map_unlock_buckets(map);
 408 }
 409
 410 bool qht_reset_size(struct qht *ht, size_t n_elems)
 411 {
 412     struct qht_map *new;
 413     struct qht_map *map;
 414     size_t n_buckets;
 415     bool resize = false;
 416
 417     n_buckets = qht_elems_to_buckets(n_elems);
 418
 419     qemu_mutex_lock(&ht->lock);
 420     map = ht->map;
 421     if (n_buckets != map->n_buckets) {
 422         new = qht_map_create(n_buckets);
 423         resize = true;
 424     }
 425
 426     qht_map_lock_buckets(map);
 427     qht_map_reset__all_locked(map);
 428     if (resize) {
 429         qht_do_resize(ht, new);
 430     }
 431     qht_map_unlock_buckets(map);
 432     qemu_mutex_unlock(&ht->lock);
 433
 434     return resize;
 435 }
 436
 437 static inline
 438 void *qht_do_lookup(struct qht_bucket *head, qht_lookup_func_t func,
 439                     const void *userp, uint32_t hash)
 440 {
 441     struct qht_bucket *b = head;
 442     int i;
 443
 444     do {
 445         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
 446             if (b->hashes[i] == hash) {
 447                 void *p = atomic_read(&b->pointers[i]);
 448
 449                 if (likely(p) && likely(func(p, userp))) {
 450                     return p;
 451                 }
 452             }
 453         }
 454         b = atomic_rcu_read(&b->next);
 455     } while (b);
 456
 457     return NULL;
 458 }
 459
 460 static __attribute__((noinline))
 461 void *qht_lookup__slowpath(struct qht_bucket *b, qht_lookup_func_t func,
 462                            const void *userp, uint32_t hash)
 463 {
 464     unsigned int version;
 465     void *ret;
 466
 467     do {
 468         version = seqlock_read_begin(&b->sequence);
 469         ret = qht_do_lookup(b, func, userp, hash);
 470     } while (seqlock_read_retry(&b->sequence, version));
 471     return ret;
 472 }
 473
 474 void *qht_lookup(struct qht *ht, qht_lookup_func_t func, const void *userp,
 475                  uint32_t hash)
 476 {
 477     struct qht_bucket *b;
 478     struct qht_map *map;
 479     unsigned int version;
 480     void *ret;
 481
 482     map = atomic_rcu_read(&ht->map);
 483     b = qht_map_to_bucket(map, hash);
 484
 485     version = seqlock_read_begin(&b->sequence);
 486     ret = qht_do_lookup(b, func, userp, hash);
 487     if (likely(!seqlock_read_retry(&b->sequence, version))) {
 488         return ret;
 489     }
 490     /*
 491      * Removing the do/while from the fastpath gives a 4% perf. increase when
 492      * running a 100%-lookup microbenchmark.
 493      */
 494     return qht_lookup__slowpath(b, func, userp, hash);
 495 }
 496
 497 /* call with head->lock held */
 498 static bool qht_insert__locked(struct qht *ht, struct qht_map *map,
 499                                struct qht_bucket *head, void *p, uint32_t hash,
 500                                bool *needs_resize)
 501 {
 502     struct qht_bucket *b = head;
 503     struct qht_bucket *prev = NULL;
 504     struct qht_bucket *new = NULL;
 505     int i;
 506
 507     do {
 508         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
 509             if (b->pointers[i]) {
 510                 if (unlikely(b->pointers[i] == p)) {
 511                     return false;
 512                 }
 513             } else {
 514                 goto found;
 515             }
 516         }
 517         prev = b;
 518         b = b->next;
 519     } while (b);
 520
 521     b = qemu_memalign(QHT_BUCKET_ALIGN, sizeof(*b));
 522     memset(b, 0, sizeof(*b));
 523     new = b;
 524     i = 0;
 525     atomic_inc(&map->n_added_buckets);
 526     if (unlikely(qht_map_needs_resize(map)) && needs_resize) {
 527         *needs_resize = true;
 528     }
 529
 530  found:
 531     /* found an empty key: acquire the seqlock and write */
 532     seqlock_write_begin(&head->sequence);
 533     if (new) {
 534         atomic_rcu_set(&prev->next, b);
 535     }
 536     b->hashes[i] = hash;
 537     atomic_set(&b->pointers[i], p);
 538     seqlock_write_end(&head->sequence);
 539     return true;
 540 }
 541
 542 static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
 543 {
 544     struct qht_map *map;
 545
 546     /*
 547      * If the lock is taken it probably means there's an ongoing resize,
 548      * so bail out.
 549      */
 550     if (qemu_mutex_trylock(&ht->lock)) {
 551         return;
 552     }
 553     map = ht->map;
 554     /* another thread might have just performed the resize we were after */
 555     if (qht_map_needs_resize(map)) {
 556         struct qht_map *new = qht_map_create(map->n_buckets * 2);
 557
 558         qht_map_lock_buckets(map);
 559         qht_do_resize(ht, new);
 560         qht_map_unlock_buckets(map);
 561     }
 562     qemu_mutex_unlock(&ht->lock);
 563 }
 564
 565 bool qht_insert(struct qht *ht, void *p, uint32_t hash)
 566 {
 567     struct qht_bucket *b;
 568     struct qht_map *map;
 569     bool needs_resize = false;
 570     bool ret;
 571
 572     /* NULL pointers are not supported */
 573     qht_debug_assert(p);
 574
 575     b = qht_bucket_lock__no_stale(ht, hash, &map);
 576     ret = qht_insert__locked(ht, map, b, p, hash, &needs_resize);
 577     qht_bucket_debug__locked(b);
 578     qemu_spin_unlock(&b->lock);
 579
 580     if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) {
 581         qht_grow_maybe(ht);
 582     }
 583     return ret;
 584 }
 585
 586 static inline bool qht_entry_is_last(struct qht_bucket *b, int pos)
 587 {
 588     if (pos == QHT_BUCKET_ENTRIES - 1) {
 589         if (b->next == NULL) {
 590             return true;
 591         }
 592         return b->next->pointers[0] == NULL;
 593     }
 594     return b->pointers[pos + 1] == NULL;
 595 }
 596
 597 static void
 598 qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j)
 599 {
 600     qht_debug_assert(!(to == from && i == j));
 601     qht_debug_assert(to->pointers[i]);
 602     qht_debug_assert(from->pointers[j]);
 603
 604     to->hashes[i] = from->hashes[j];
 605     atomic_set(&to->pointers[i], from->pointers[j]);
 606
 607     from->hashes[j] = 0;
 608     atomic_set(&from->pointers[j], NULL);
 609 }
 610
 611 /*
 612  * Find the last valid entry in @head, and swap it with @orig[pos], which has
 613  * just been invalidated.
 614  */
 615 static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos)
 616 {
 617     struct qht_bucket *b = orig;
 618     struct qht_bucket *prev = NULL;
 619     int i;
 620
 621     if (qht_entry_is_last(orig, pos)) {
 622         orig->hashes[pos] = 0;
 623         atomic_set(&orig->pointers[pos], NULL);
 624         return;
 625     }
 626     do {
 627         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
 628             if (b->pointers[i]) {
 629                 continue;
 630             }
 631             if (i > 0) {
 632                 return qht_entry_move(orig, pos, b, i - 1);
 633             }
 634             qht_debug_assert(prev);
 635             return qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1);
 636         }
 637         prev = b;
 638         b = b->next;
 639     } while (b);
 640     /* no free entries other than orig[pos], so swap it with the last one */
 641     qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1);
 642 }
 643
 644 /* call with b->lock held */
 645 static inline
 646 bool qht_remove__locked(struct qht_map *map, struct qht_bucket *head,
 647                         const void *p, uint32_t hash)
 648 {
 649     struct qht_bucket *b = head;
 650     int i;
 651
 652     do {
 653         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
 654             void *q = b->pointers[i];
 655
 656             if (unlikely(q == NULL)) {
 657                 return false;
 658             }
 659             if (q == p) {
 660                 qht_debug_assert(b->hashes[i] == hash);
 661                 seqlock_write_begin(&head->sequence);
 662                 qht_bucket_remove_entry(b, i);
 663                 seqlock_write_end(&head->sequence);
 664                 return true;
 665             }
 666         }
 667         b = b->next;
 668     } while (b);
 669     return false;
 670 }
 671
 672 bool qht_remove(struct qht *ht, const void *p, uint32_t hash)
 673 {
 674     struct qht_bucket *b;
 675     struct qht_map *map;
 676     bool ret;
 677
 678     /* NULL pointers are not supported */
 679     qht_debug_assert(p);
 680
 681     b = qht_bucket_lock__no_stale(ht, hash, &map);
 682     ret = qht_remove__locked(map, b, p, hash);
 683     qht_bucket_debug__locked(b);
 684     qemu_spin_unlock(&b->lock);
 685     return ret;
 686 }
 687
 688 static inline void qht_bucket_iter(struct qht *ht, struct qht_bucket *b,
 689                                    qht_iter_func_t func, void *userp)
 690 {
 691     int i;
 692
 693     do {
 694         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
 695             if (b->pointers[i] == NULL) {
 696                 return;
 697             }
 698             func(ht, b->pointers[i], b->hashes[i], userp);
 699         }
 700         b = b->next;
 701     } while (b);
 702 }
 703
 704 /* call with all of the map's locks held */
 705 static inline void qht_map_iter__all_locked(struct qht *ht, struct qht_map *map,
 706                                             qht_iter_func_t func, void *userp)
 707 {
 708     size_t i;
 709
 710     for (i = 0; i < map->n_buckets; i++) {
 711         qht_bucket_iter(ht, &map->buckets[i], func, userp);
 712     }
 713 }
 714
 715 void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp)
 716 {
 717     struct qht_map *map;
 718
 719     map = atomic_rcu_read(&ht->map);
 720     qht_map_lock_buckets(map);
 721     /* Note: ht here is merely for carrying ht->mode; ht->map won't be read */
 722     qht_map_iter__all_locked(ht, map, func, userp);
 723     qht_map_unlock_buckets(map);
 724 }
 725
 726 static void qht_map_copy(struct qht *ht, void *p, uint32_t hash, void *userp)
 727 {
 728     struct qht_map *new = userp;
 729     struct qht_bucket *b = qht_map_to_bucket(new, hash);
 730
 731     /* no need to acquire b->lock because no thread has seen this map yet */
 732     qht_insert__locked(ht, new, b, p, hash, NULL);
 733 }
 734
 735 /*
 736  * Call with ht->lock and all bucket locks held.
 737  *
 738  * Creating the @new map here would add unnecessary delay while all the locks
 739  * are held--holding up the bucket locks is particularly bad, since no writes
 740  * can occur while these are held. Thus, we let callers create the new map,
 741  * hopefully without the bucket locks held.
 742  */
 743 static void qht_do_resize(struct qht *ht, struct qht_map *new)
 744 {
 745     struct qht_map *old;
 746
 747     old = ht->map;
 748     g_assert_cmpuint(new->n_buckets, !=, old->n_buckets);
 749
 750     qht_map_iter__all_locked(ht, old, qht_map_copy, new);
 751     qht_map_debug__all_locked(new);
 752
 753     atomic_rcu_set(&ht->map, new);
 754     call_rcu(old, qht_map_destroy, rcu);
 755 }
 756
 757 bool qht_resize(struct qht *ht, size_t n_elems)
 758 {
 759     size_t n_buckets = qht_elems_to_buckets(n_elems);
 760     size_t ret = false;
 761
 762     qemu_mutex_lock(&ht->lock);
 763     if (n_buckets != ht->map->n_buckets) {
 764         struct qht_map *new;
 765         struct qht_map *old = ht->map;
 766
 767         new = qht_map_create(n_buckets);
 768         qht_map_lock_buckets(old);
 769         qht_do_resize(ht, new);
 770         qht_map_unlock_buckets(old);
 771         ret = true;
 772     }
 773     qemu_mutex_unlock(&ht->lock);
 774
 775     return ret;
 776 }
 777
 778 /* pass @stats to qht_statistics_destroy() when done */
 779 void qht_statistics_init(struct qht *ht, struct qht_stats *stats)
 780 {
 781     struct qht_map *map;
 782     int i;
 783
 784     map = atomic_rcu_read(&ht->map);
 785
 786     stats->head_buckets = map->n_buckets;
 787     stats->used_head_buckets = 0;
 788     stats->entries = 0;
 789     qdist_init(&stats->chain);
 790     qdist_init(&stats->occupancy);
 791
 792     for (i = 0; i < map->n_buckets; i++) {
 793         struct qht_bucket *head = &map->buckets[i];
 794         struct qht_bucket *b;
 795         unsigned int version;
 796         size_t buckets;
 797         size_t entries;
 798         int j;
 799
 800         do {
 801             version = seqlock_read_begin(&head->sequence);
 802             buckets = 0;
 803             entries = 0;
 804             b = head;
 805             do {
 806                 for (j = 0; j < QHT_BUCKET_ENTRIES; j++) {
 807                     if (atomic_read(&b->pointers[j]) == NULL) {
 808                         break;
 809                     }
 810                     entries++;
 811                 }
 812                 buckets++;
 813                 b = atomic_rcu_read(&b->next);
 814             } while (b);
 815         } while (seqlock_read_retry(&head->sequence, version));
 816
 817         if (entries) {
 818             qdist_inc(&stats->chain, buckets);
 819             qdist_inc(&stats->occupancy,
 820                       (double)entries / QHT_BUCKET_ENTRIES / buckets);
 821             stats->used_head_buckets++;
 822             stats->entries += entries;
 823         } else {
 824             qdist_inc(&stats->occupancy, 0);
 825         }
 826     }
 827 }
 828
 829 void qht_statistics_destroy(struct qht_stats *stats)
 830 {
 831     qdist_destroy(&stats->occupancy);
 832     qdist_destroy(&stats->chain);
 833 }