hphp/util/concurrent-lru-cache.h

   1 /*
   2  * Copyright (c) 2014 Tim Starling
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <atomic>
  20 #include <mutex>
  21 #include <new>
  22 #include <thread>
  23 #include <vector>
  24 #include <tbb/concurrent_hash_map.h>
  25
  26 namespace HPHP {
  27
  28 /**
  29  * ConcurrentLRUCache is a thread-safe hashtable with a limited size. When
  30  * it is full, insert() evicts the least recently used item from the cache.
  31  *
  32  * The find() operation fills a ConstAccessor object, which is a smart pointer
  33  * similar to TBB's const_accessor. After eviction, destruction of the value is
  34  * deferred until all ConstAccessor objects are destroyed.
  35  *
  36  * The implementation is generally conservative, relying on the documented
  37  * behaviour of tbb::concurrent_hash_map. LRU list transactions are protected
  38  * with a single mutex. Having our own doubly-linked list implementation helps
  39  * to ensure that list transactions are sufficiently brief, consisting of only
  40  * a few loads and stores. User code is not executed while the lock is held.
  41  *
  42  * The acquisition of the list mutex during find() is non-blocking (try_lock),
  43  * so under heavy lookup load, the container will not stall, instead some LRU
  44  * update operations will be omitted.
  45  *
  46  * Insert performance was observed to degrade rapidly when there is a heavy
  47  * concurrent insert/evict load, mostly due to locks in the underlying
  48  * TBB::CHM. So if that is a possibility for your workload,
  49  * ConcurrentScalableCache is recommended instead.
  50  */
  51 template <class TKey, class TValue, class THash = tbb::tbb_hash_compare<TKey>>
  52 struct ConcurrentLRUCache {
  53 private:
  54   /**
  55    * The LRU list node.
  56    *
  57    * We make a copy of the key in the list node, allowing us to find the
  58    * TBB::CHM element from the list node. TBB::CHM invalidates iterators
  59    * on most operations, even find(), ruling out more efficient
  60    * implementations.
  61    */
  62   struct ListNode {
  63     ListNode()
  64       : m_prev(OutOfListMarker), m_next(nullptr)
  65     {}
  66
  67     explicit ListNode(const TKey& key)
  68       : m_key(key), m_prev(OutOfListMarker), m_next(nullptr)
  69     {}
  70
  71     TKey m_key;
  72     ListNode* m_prev;
  73     ListNode* m_next;
  74
  75     bool isInList() const {
  76       return m_prev != OutOfListMarker;
  77     }
  78   };
  79
  80   static ListNode* const OutOfListMarker;
  81
  82   /**
  83    * The value that we store in the hashtable. The list node is allocated from
  84    * an internal object_pool. The ListNode* is owned by the list.
  85    */
  86   struct HashMapValue {
  87     HashMapValue()
  88       : m_listNode(nullptr)
  89     {}
  90
  91     HashMapValue(const TValue& value, ListNode* node)
  92       : m_value(value), m_listNode(node)
  93     {}
  94
  95     TValue m_value;
  96     ListNode* m_listNode;
  97   };
  98
  99   typedef tbb::concurrent_hash_map<TKey, HashMapValue, THash> HashMap;
 100   typedef typename HashMap::const_accessor HashMapConstAccessor;
 101   typedef typename HashMap::accessor HashMapAccessor;
 102   typedef typename HashMap::value_type HashMapValuePair;
 103   typedef std::pair<const TKey, TValue> SnapshotValue;
 104
 105 public:
 106   /**
 107    * The proxy object for TBB::CHM::const_accessor. Provides direct access to
 108    * the user's value by dereferencing, thus hiding our implementation
 109    * details.
 110    */
 111   struct ConstAccessor {
 112     ConstAccessor() {}
 113
 114     const TValue& operator*() const {
 115       return *get();
 116     }
 117
 118     const TValue* operator->() const {
 119       return get();
 120     }
 121
 122     const TValue* get() const {
 123       return &m_hashAccessor->second.m_value;
 124     }
 125
 126     bool empty() const {
 127       return m_hashAccessor.empty();
 128     }
 129
 130   private:
 131     friend struct ConcurrentLRUCache;
 132     HashMapConstAccessor m_hashAccessor;
 133   };
 134
 135   /**
 136    * Create a container with a given maximum size
 137    */
 138   explicit ConcurrentLRUCache(size_t maxSize);
 139
 140   ConcurrentLRUCache(const ConcurrentLRUCache& other) = delete;
 141   ConcurrentLRUCache& operator=(const ConcurrentLRUCache&) = delete;
 142
 143   ~ConcurrentLRUCache() {
 144     clear();
 145   }
 146
 147   /**
 148    * Find a value by key, and return it by filling the ConstAccessor, which
 149    * can be default-constructed. Returns true if the element was found, false
 150    * otherwise. Updates the eviction list, making the element the
 151    * most-recently used.
 152    */
 153   bool find(ConstAccessor& ac, const TKey& key);
 154
 155   /**
 156    * Insert a value into the container. Both the key and value will be copied.
 157    * The new element will put into the eviction list as the most-recently
 158    * used.
 159    *
 160    * If there was already an element in the container with the same key, it
 161    * will not be updated, and false will be returned. Otherwise, true will be
 162    * returned.
 163    */
 164   bool insert(const TKey& key, const TValue& value);
 165
 166   /**
 167    * Clear the container. NOT THREAD SAFE -- do not use while other threads
 168    * are accessing the container.
 169    */
 170   void clear();
 171
 172   /**
 173    * Get a snapshot of the keys in the container by copying them into the
 174    * supplied vector. This will block inserts and prevent LRU updates while it
 175    * completes. The keys will be inserted in order from most-recently used to
 176    * least-recently used.
 177    */
 178   void snapshotKeys(std::vector<TKey>& keys);
 179
 180   /**
 181    * Get the approximate size of the container. May be slightly too low when
 182    * insertion is in progress.
 183    */
 184   size_t size() const {
 185     return m_size.load();
 186   }
 187
 188 private:
 189   /**
 190    * Unlink a node from the list. The caller must lock the list mutex while
 191    * this is called.
 192    */
 193   void delink(ListNode* node);
 194
 195   /**
 196    * Add a new node to the list in the most-recently used position. The caller
 197    * must lock the list mutex while this is called.
 198    */
 199   void pushFront(ListNode* node);
 200
 201   /**
 202    * Evict the least-recently used item from the container. This function does
 203    * its own locking.
 204    */
 205   void evict();
 206
 207   /**
 208    * The maximum number of elements in the container.
 209    */
 210   size_t m_maxSize;
 211
 212   /**
 213    * This atomic variable is used to signal to all threads whether or not
 214    * eviction should be done on insert. It is approximately equal to the
 215    * number of elements in the container.
 216    */
 217   std::atomic<size_t> m_size;
 218
 219   /**
 220    * The underlying TBB hash map.
 221    */
 222   HashMap m_map;
 223
 224   /**
 225    * The linked list. The "head" is the most-recently used node, and the
 226    * "tail" is the least-recently used node. The list mutex must be held
 227    * during both read and write.
 228    */
 229   ListNode m_head;
 230   ListNode m_tail;
 231   typedef std::mutex ListMutex;
 232   ListMutex m_listMutex;
 233 };
 234
 235 template <class TKey, class TValue, class THash>
 236 typename ConcurrentLRUCache<TKey, TValue, THash>::ListNode* const
 237 ConcurrentLRUCache<TKey, TValue, THash>::OutOfListMarker = (ListNode*)-1;
 238
 239 template <class TKey, class TValue, class THash>
 240 ConcurrentLRUCache<TKey, TValue, THash>::
 241 ConcurrentLRUCache(size_t maxSize)
 242   : m_maxSize(maxSize), m_size(0),
 243   m_map(std::thread::hardware_concurrency() * 4) // it will automatically grow
 244 {
 245   m_head.m_prev = nullptr;
 246   m_head.m_next = &m_tail;
 247   m_tail.m_prev = &m_head;
 248 }
 249
 250 template <class TKey, class TValue, class THash>
 251 bool ConcurrentLRUCache<TKey, TValue, THash>::
 252 find(ConstAccessor& ac, const TKey& key) {
 253   HashMapConstAccessor& hashAccessor = ac.m_hashAccessor;
 254   if (!m_map.find(hashAccessor, key)) {
 255     return false;
 256   }
 257
 258   // Acquire the lock, but don't block if it is already held
 259   std::unique_lock<ListMutex> lock(m_listMutex, std::try_to_lock);
 260   if (lock) {
 261     ListNode* node = hashAccessor->second.m_listNode;
 262     // The list node may be out of the list if it is in the process of being
 263     // inserted or evicted. Doing this check allows us to lock the list for
 264     // shorter periods of time.
 265     if (node->isInList()) {
 266       delink(node);
 267       pushFront(node);
 268     }
 269     lock.unlock();
 270   }
 271   return true;
 272 }
 273
 274 template <class TKey, class TValue, class THash>
 275 bool ConcurrentLRUCache<TKey, TValue, THash>::
 276 insert(const TKey& key, const TValue& value) {
 277   // Insert into the CHM
 278   ListNode* node = new ListNode(key);
 279   HashMapAccessor hashAccessor;
 280   HashMapValuePair hashMapValue(key, HashMapValue(value, node));
 281   if (!m_map.insert(hashAccessor, hashMapValue)) {
 282     delete node;
 283     return false;
 284   }
 285
 286   // Evict if necessary, now that we know the hashmap insertion was successful.
 287   size_t size = m_size.load();
 288   bool evictionDone = false;
 289   if (size >= m_maxSize) {
 290     // The container is at (or over) capacity, so eviction needs to be done.
 291     // Do not decrement m_size, since that would cause other threads to
 292     // inappropriately omit eviction during their own inserts.
 293     evict();
 294     evictionDone = true;
 295   }
 296
 297   // Note that we have to update the LRU list before we increment m_size, so
 298   // that other threads don't attempt to evict list items before they even
 299   // exist.
 300   std::unique_lock<ListMutex> lock(m_listMutex);
 301   pushFront(node);
 302   lock.unlock();
 303   if (!evictionDone) {
 304     size = m_size++;
 305   }
 306   if (size > m_maxSize) {
 307     // It is possible for the size to temporarily exceed the maximum if there is
 308     // a heavy insert() load, once only as the cache fills. In this situation,
 309     // we have to be careful not to have every thread simultaneously attempt to
 310     // evict the extra entries, since we could end up underfilled. Instead we do
 311     // a compare-and-exchange to acquire an exclusive right to reduce the size
 312     // to a particular value.
 313     //
 314     // We could continue to evict in a loop, but if there are a lot of threads
 315     // here at the same time, that could lead to spinning. So we will just evict
 316     // one extra element per insert() until the overfill is rectified.
 317     if (m_size.compare_exchange_strong(size, size - 1)) {
 318       evict();
 319     }
 320   }
 321   return true;
 322 }
 323
 324 template <class TKey, class TValue, class THash>
 325 void ConcurrentLRUCache<TKey, TValue, THash>::
 326 clear() {
 327   m_map.clear();
 328   ListNode* node = m_head.m_next;
 329   ListNode* next;
 330   while (node != &m_tail) {
 331     next = node->m_next;
 332     delete node;
 333     node = next;
 334   }
 335   m_head.m_next = &m_tail;
 336   m_tail.m_prev = &m_head;
 337   m_size = 0;
 338 }
 339
 340 template <class TKey, class TValue, class THash>
 341 void ConcurrentLRUCache<TKey, TValue, THash>::
 342 snapshotKeys(std::vector<TKey>& keys) {
 343   keys.reserve(keys.size() + m_size.load());
 344   std::lock_guard<ListMutex> lock(m_listMutex);
 345   for (ListNode* node = m_head.m_next; node != &m_tail; node = node->m_next) {
 346     keys.push_back(node->m_key);
 347   }
 348 }
 349
 350 template <class TKey, class TValue, class THash>
 351 inline void ConcurrentLRUCache<TKey, TValue, THash>::
 352 delink(ListNode* node) {
 353   ListNode* prev = node->m_prev;
 354   ListNode* next = node->m_next;
 355   prev->m_next = next;
 356   next->m_prev = prev;
 357   node->m_prev = OutOfListMarker;
 358 }
 359
 360 template <class TKey, class TValue, class THash>
 361 inline void ConcurrentLRUCache<TKey, TValue, THash>::
 362 pushFront(ListNode* node) {
 363   ListNode* oldRealHead = m_head.m_next;
 364   node->m_prev = &m_head;
 365   node->m_next = oldRealHead;
 366   oldRealHead->m_prev = node;
 367   m_head.m_next = node;
 368 }
 369
 370 template <class TKey, class TValue, class THash>
 371 void ConcurrentLRUCache<TKey, TValue, THash>::
 372 evict() {
 373   std::unique_lock<ListMutex> lock(m_listMutex);
 374   ListNode* moribund = m_tail.m_prev;
 375   if (moribund == &m_head) {
 376     // List is empty, can't evict
 377     return;
 378   }
 379   delink(moribund);
 380   lock.unlock();
 381
 382   HashMapAccessor hashAccessor;
 383   if (!m_map.find(hashAccessor, moribund->m_key)) {
 384     // Presumably unreachable
 385     return;
 386   }
 387   m_map.erase(hashAccessor);
 388   delete moribund;
 389 }
 390
 391 } // namespace HPHP
 392