hphp/util/concurrent-lru-cache.h

   1 /*
   2  * Copyright (c) 2014 Tim Starling
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef incl_HPHP_UTIL_LRU_CACHE_H
  18 #define incl_HPHP_UTIL_LRU_CACHE_H
  19
  20 #include <atomic>
  21 #include <mutex>
  22 #include <new>
  23 #include <thread>
  24 #include <vector>
  25 #include <tbb/concurrent_hash_map.h>
  26
  27 namespace HPHP {
  28
  29 /**
  30  * ConcurrentLRUCache is a thread-safe hashtable with a limited size. When
  31  * it is full, insert() evicts the least recently used item from the cache.
  32  *
  33  * The find() operation fills a ConstAccessor object, which is a smart pointer
  34  * similar to TBB's const_accessor. After eviction, destruction of the value is
  35  * deferred until all ConstAccessor objects are destroyed.
  36  *
  37  * The implementation is generally conservative, relying on the documented
  38  * behaviour of tbb::concurrent_hash_map. LRU list transactions are protected
  39  * with a single mutex. Having our own doubly-linked list implementation helps
  40  * to ensure that list transactions are sufficiently brief, consisting of only
  41  * a few loads and stores. User code is not executed while the lock is held.
  42  *
  43  * The acquisition of the list mutex during find() is non-blocking (try_lock),
  44  * so under heavy lookup load, the container will not stall, instead some LRU
  45  * update operations will be omitted.
  46  *
  47  * Insert performance was observed to degrade rapidly when there is a heavy
  48  * concurrent insert/evict load, mostly due to locks in the underlying
  49  * TBB::CHM. So if that is a possibility for your workload,
  50  * ConcurrentScalableCache is recommended instead.
  51  */
  52 template <class TKey, class TValue, class THash = tbb::tbb_hash_compare<TKey>>
  53 struct ConcurrentLRUCache {
  54 private:
  55   /**
  56    * The LRU list node.
  57    *
  58    * We make a copy of the key in the list node, allowing us to find the
  59    * TBB::CHM element from the list node. TBB::CHM invalidates iterators
  60    * on most operations, even find(), ruling out more efficient
  61    * implementations.
  62    */
  63   struct ListNode {
  64     ListNode()
  65       : m_prev(OutOfListMarker), m_next(nullptr)
  66     {}
  67
  68     explicit ListNode(const TKey& key)
  69       : m_key(key), m_prev(OutOfListMarker), m_next(nullptr)
  70     {}
  71
  72     TKey m_key;
  73     ListNode* m_prev;
  74     ListNode* m_next;
  75
  76     bool isInList() const {
  77       return m_prev != OutOfListMarker;
  78     }
  79   };
  80
  81   static ListNode* const OutOfListMarker;
  82
  83   /**
  84    * The value that we store in the hashtable. The list node is allocated from
  85    * an internal object_pool. The ListNode* is owned by the list.
  86    */
  87   struct HashMapValue {
  88     HashMapValue()
  89       : m_listNode(nullptr)
  90     {}
  91
  92     HashMapValue(const TValue& value, ListNode* node)
  93       : m_value(value), m_listNode(node)
  94     {}
  95
  96     TValue m_value;
  97     ListNode* m_listNode;
  98   };
  99
 100   typedef tbb::concurrent_hash_map<TKey, HashMapValue, THash> HashMap;
 101   typedef typename HashMap::const_accessor HashMapConstAccessor;
 102   typedef typename HashMap::accessor HashMapAccessor;
 103   typedef typename HashMap::value_type HashMapValuePair;
 104   typedef std::pair<const TKey, TValue> SnapshotValue;
 105
 106 public:
 107   /**
 108    * The proxy object for TBB::CHM::const_accessor. Provides direct access to
 109    * the user's value by dereferencing, thus hiding our implementation
 110    * details.
 111    */
 112   struct ConstAccessor {
 113     ConstAccessor() {}
 114
 115     const TValue& operator*() const {
 116       return *get();
 117     }
 118
 119     const TValue* operator->() const {
 120       return get();
 121     }
 122
 123     const TValue* get() const {
 124       return &m_hashAccessor->second.m_value;
 125     }
 126
 127     bool empty() const {
 128       return m_hashAccessor.empty();
 129     }
 130
 131   private:
 132     friend struct ConcurrentLRUCache;
 133     HashMapConstAccessor m_hashAccessor;
 134   };
 135
 136   /**
 137    * Create a container with a given maximum size
 138    */
 139   explicit ConcurrentLRUCache(size_t maxSize);
 140
 141   ConcurrentLRUCache(const ConcurrentLRUCache& other) = delete;
 142   ConcurrentLRUCache& operator=(const ConcurrentLRUCache&) = delete;
 143
 144   ~ConcurrentLRUCache() {
 145     clear();
 146   }
 147
 148   /**
 149    * Find a value by key, and return it by filling the ConstAccessor, which
 150    * can be default-constructed. Returns true if the element was found, false
 151    * otherwise. Updates the eviction list, making the element the
 152    * most-recently used.
 153    */
 154   bool find(ConstAccessor& ac, const TKey& key);
 155
 156   /**
 157    * Insert a value into the container. Both the key and value will be copied.
 158    * The new element will put into the eviction list as the most-recently
 159    * used.
 160    *
 161    * If there was already an element in the container with the same key, it
 162    * will not be updated, and false will be returned. Otherwise, true will be
 163    * returned.
 164    */
 165   bool insert(const TKey& key, const TValue& value);
 166
 167   /**
 168    * Clear the container. NOT THREAD SAFE -- do not use while other threads
 169    * are accessing the container.
 170    */
 171   void clear();
 172
 173   /**
 174    * Get a snapshot of the keys in the container by copying them into the
 175    * supplied vector. This will block inserts and prevent LRU updates while it
 176    * completes. The keys will be inserted in order from most-recently used to
 177    * least-recently used.
 178    */
 179   void snapshotKeys(std::vector<TKey>& keys);
 180
 181   /**
 182    * Get the approximate size of the container. May be slightly too low when
 183    * insertion is in progress.
 184    */
 185   size_t size() const {
 186     return m_size.load();
 187   }
 188
 189 private:
 190   /**
 191    * Unlink a node from the list. The caller must lock the list mutex while
 192    * this is called.
 193    */
 194   void delink(ListNode* node);
 195
 196   /**
 197    * Add a new node to the list in the most-recently used position. The caller
 198    * must lock the list mutex while this is called.
 199    */
 200   void pushFront(ListNode* node);
 201
 202   /**
 203    * Evict the least-recently used item from the container. This function does
 204    * its own locking.
 205    */
 206   void evict();
 207
 208   /**
 209    * The maximum number of elements in the container.
 210    */
 211   size_t m_maxSize;
 212
 213   /**
 214    * This atomic variable is used to signal to all threads whether or not
 215    * eviction should be done on insert. It is approximately equal to the
 216    * number of elements in the container.
 217    */
 218   std::atomic<size_t> m_size;
 219
 220   /**
 221    * The underlying TBB hash map.
 222    */
 223   HashMap m_map;
 224
 225   /**
 226    * The linked list. The "head" is the most-recently used node, and the
 227    * "tail" is the least-recently used node. The list mutex must be held
 228    * during both read and write.
 229    */
 230   ListNode m_head;
 231   ListNode m_tail;
 232   typedef std::mutex ListMutex;
 233   ListMutex m_listMutex;
 234 };
 235
 236 template <class TKey, class TValue, class THash>
 237 typename ConcurrentLRUCache<TKey, TValue, THash>::ListNode* const
 238 ConcurrentLRUCache<TKey, TValue, THash>::OutOfListMarker = (ListNode*)-1;
 239
 240 template <class TKey, class TValue, class THash>
 241 ConcurrentLRUCache<TKey, TValue, THash>::
 242 ConcurrentLRUCache(size_t maxSize)
 243   : m_maxSize(maxSize), m_size(0),
 244   m_map(std::thread::hardware_concurrency() * 4) // it will automatically grow
 245 {
 246   m_head.m_prev = nullptr;
 247   m_head.m_next = &m_tail;
 248   m_tail.m_prev = &m_head;
 249 }
 250
 251 template <class TKey, class TValue, class THash>
 252 bool ConcurrentLRUCache<TKey, TValue, THash>::
 253 find(ConstAccessor& ac, const TKey& key) {
 254   HashMapConstAccessor& hashAccessor = ac.m_hashAccessor;
 255   if (!m_map.find(hashAccessor, key)) {
 256     return false;
 257   }
 258
 259   // Acquire the lock, but don't block if it is already held
 260   std::unique_lock<ListMutex> lock(m_listMutex, std::try_to_lock);
 261   if (lock) {
 262     ListNode* node = hashAccessor->second.m_listNode;
 263     // The list node may be out of the list if it is in the process of being
 264     // inserted or evicted. Doing this check allows us to lock the list for
 265     // shorter periods of time.
 266     if (node->isInList()) {
 267       delink(node);
 268       pushFront(node);
 269     }
 270     lock.unlock();
 271   }
 272   return true;
 273 }
 274
 275 template <class TKey, class TValue, class THash>
 276 bool ConcurrentLRUCache<TKey, TValue, THash>::
 277 insert(const TKey& key, const TValue& value) {
 278   // Insert into the CHM
 279   ListNode* node = new ListNode(key);
 280   HashMapAccessor hashAccessor;
 281   HashMapValuePair hashMapValue(key, HashMapValue(value, node));
 282   if (!m_map.insert(hashAccessor, hashMapValue)) {
 283     delete node;
 284     return false;
 285   }
 286
 287   // Evict if necessary, now that we know the hashmap insertion was successful.
 288   size_t size = m_size.load();
 289   bool evictionDone = false;
 290   if (size >= m_maxSize) {
 291     // The container is at (or over) capacity, so eviction needs to be done.
 292     // Do not decrement m_size, since that would cause other threads to
 293     // inappropriately omit eviction during their own inserts.
 294     evict();
 295     evictionDone = true;
 296   }
 297
 298   // Note that we have to update the LRU list before we increment m_size, so
 299   // that other threads don't attempt to evict list items before they even
 300   // exist.
 301   std::unique_lock<ListMutex> lock(m_listMutex);
 302   pushFront(node);
 303   lock.unlock();
 304   if (!evictionDone) {
 305     size = m_size++;
 306   }
 307   if (size > m_maxSize) {
 308     // It is possible for the size to temporarily exceed the maximum if there is
 309     // a heavy insert() load, once only as the cache fills. In this situation,
 310     // we have to be careful not to have every thread simultaneously attempt to
 311     // evict the extra entries, since we could end up underfilled. Instead we do
 312     // a compare-and-exchange to acquire an exclusive right to reduce the size
 313     // to a particular value.
 314     //
 315     // We could continue to evict in a loop, but if there are a lot of threads
 316     // here at the same time, that could lead to spinning. So we will just evict
 317     // one extra element per insert() until the overfill is rectified.
 318     if (m_size.compare_exchange_strong(size, size - 1)) {
 319       evict();
 320     }
 321   }
 322   return true;
 323 }
 324
 325 template <class TKey, class TValue, class THash>
 326 void ConcurrentLRUCache<TKey, TValue, THash>::
 327 clear() {
 328   m_map.clear();
 329   ListNode* node = m_head.m_next;
 330   ListNode* next;
 331   while (node != &m_tail) {
 332     next = node->m_next;
 333     delete node;
 334     node = next;
 335   }
 336   m_head.m_next = &m_tail;
 337   m_tail.m_prev = &m_head;
 338   m_size = 0;
 339 }
 340
 341 template <class TKey, class TValue, class THash>
 342 void ConcurrentLRUCache<TKey, TValue, THash>::
 343 snapshotKeys(std::vector<TKey>& keys) {
 344   keys.reserve(keys.size() + m_size.load());
 345   std::lock_guard<ListMutex> lock(m_listMutex);
 346   for (ListNode* node = m_head.m_next; node != &m_tail; node = node->m_next) {
 347     keys.push_back(node->m_key);
 348   }
 349 }
 350
 351 template <class TKey, class TValue, class THash>
 352 inline void ConcurrentLRUCache<TKey, TValue, THash>::
 353 delink(ListNode* node) {
 354   ListNode* prev = node->m_prev;
 355   ListNode* next = node->m_next;
 356   prev->m_next = next;
 357   next->m_prev = prev;
 358   node->m_prev = OutOfListMarker;
 359 }
 360
 361 template <class TKey, class TValue, class THash>
 362 inline void ConcurrentLRUCache<TKey, TValue, THash>::
 363 pushFront(ListNode* node) {
 364   ListNode* oldRealHead = m_head.m_next;
 365   node->m_prev = &m_head;
 366   node->m_next = oldRealHead;
 367   oldRealHead->m_prev = node;
 368   m_head.m_next = node;
 369 }
 370
 371 template <class TKey, class TValue, class THash>
 372 void ConcurrentLRUCache<TKey, TValue, THash>::
 373 evict() {
 374   std::unique_lock<ListMutex> lock(m_listMutex);
 375   ListNode* moribund = m_tail.m_prev;
 376   if (moribund == &m_head) {
 377     // List is empty, can't evict
 378     return;
 379   }
 380   delink(moribund);
 381   lock.unlock();
 382
 383   HashMapAccessor hashAccessor;
 384   if (!m_map.find(hashAccessor, moribund->m_key)) {
 385     // Presumably unreachable
 386     return;
 387   }
 388   m_map.erase(hashAccessor);
 389   delete moribund;
 390 }
 391
 392 } // namespace HPHP
 393
 394 #endif