Delete atomic_add
[hiphop-php.git] / src / util / lfu_table.h
blob368ae259ac46c87463656182fc19a07e49aefd3c
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef __HPHP_UTIL_LFU_TABLE_H__
18 #define __HPHP_UTIL_LFU_TABLE_H__
20 #include <util/base.h>
21 #include <util/lock.h>
23 namespace HPHP {
24 ///////////////////////////////////////////////////////////////////////////////
26 template<class K, class V>
27 class LFUNullDestructor {
28 public:
29 void operator()(const K &k, V &v) {}
32 /**
33 * A lookup table with a maximum size. Once it reaches maximum size,
34 * inserts will evict the least frequently used item.
36 * It consists of 3 parts: a queue, a heap, and a map.
37 * The map is used for lookup by key. The queue is a staging area for
38 * values that are too new to have an accurate frequency, and the heap
39 * serves as a priority queue for looking up the element that has
40 * the lowest frequency.
42 * Frequencies are updated only when the hit count hits a multiple of the
43 * period option. When a frequency is updated, the heap is also updated
44 * to maintain the heap property. Frequency should be locked by the queue
45 * lock since it must not change during heap operations.
47 * There are two locks that make it thread safe. There is read/write lock
48 * for the map, and a lock for the queue/heap. The acquisition order
49 * is map then queue/heap.
51 * An element is in the map if it is in the queue xor the heap.
52 * If max capacity is 0, then all queue operations are disabled.
53 * Elements can be added to the map but not the queue.
55 template<class K, class V, class H, class E,
56 class D = LFUNullDestructor<K, V> >
57 class LFUTable {
58 class Node {
59 public:
60 Node(const K &k) : key(k), prev(NULL), next(NULL), hits(0), heapIndex(0),
61 immortal(false), m_freq(0), m_timestamp(time(NULL)) {}
62 Node(const K &k, const V &v)
63 : key(k), val(v), prev(NULL), next(NULL), hits(0), heapIndex(0),
64 immortal(false), m_freq(0), m_timestamp(time(NULL)) {}
65 ~Node() {
66 D d;
67 d(key, val);
69 const K &key;
70 V val;
71 Node *prev;
72 Node *next;
73 std::atomic<uint64> hits;
74 uint heapIndex;
75 bool immortal;
78 // Frequency updates must be accompanied by heap updates since otherwise
79 // it may break the heap property. It should probably be under lock too.
80 bool updateFrequency() {
81 double lifetime = time(NULL) - m_timestamp;
82 double oldFreq = m_freq;
83 if (lifetime > 0) {
84 m_freq = hits / lifetime;
86 return m_freq > oldFreq;
88 double frequency() const {
89 return m_freq;
91 time_t timestamp() {
92 return m_timestamp;
94 private:
95 double m_freq;
96 time_t m_timestamp;
98 // The map
99 class Map : public hphp_hash_map<K, Node*, H, E> {};
100 //typedef std::map<K, Node*> Map;
102 public:
103 class LFUTableConstIterator;
104 class LFUTableIterator {
105 public:
106 LFUTableIterator(typename LFUTable::Map::iterator &it) : m_it(it) {}
107 LFUTableIterator(const LFUTableIterator &it) : m_it(it.m_it) {}
108 const K &first() const {
109 return m_it->first;
111 V &second() const {
112 return m_it->second->val;
114 LFUTableIterator &operator++() {
115 ++m_it;
116 return *this;
118 bool operator==(const LFUTableIterator &it) const {
119 return m_it == it.m_it;
121 bool operator!=(const LFUTableIterator &it) const {
122 return m_it != it.m_it;
124 private:
125 friend class LFUTable::LFUTableConstIterator;
126 friend class LFUTable;
127 typename LFUTable::Map::iterator m_it;
129 class LFUTableConstIterator {
130 public:
131 LFUTableConstIterator(typename LFUTable::Map::const_iterator &it)
132 : m_it(it) {}
133 LFUTableConstIterator(const LFUTableConstIterator &it) : m_it(it.m_it) {
135 LFUTableConstIterator(const LFUTableIterator &it) : m_it(it.m_it) {
137 const K &first() const {
138 return m_it->first;
140 const V &second() const {
141 return m_it->second->val;
143 LFUTableConstIterator &operator++() {
144 ++m_it;
145 return *this;
147 bool operator==(const LFUTableConstIterator &it) const {
148 return m_it == it.m_it;
150 bool operator!=(const LFUTableConstIterator &it) const {
151 return m_it != it.m_it;
153 private:
154 friend class LFUTable;
155 typename LFUTable::Map::const_iterator m_it;
158 * AtomicReader is used to perform a lookup under the internal
159 * lock of the table. It takes the read lock.
161 class AtomicReader {
162 public:
163 virtual ~AtomicReader() {}
164 virtual void read(const K &key, const V &val) = 0;
167 * AtomicUpdater is used to perform a update/insert under the internal
168 * lock of the table. It takes the write lock.
169 * The update method is given a reference to the value in the table and
170 * a bool that specifies if the element was newly added.
171 * It the method returns false, the element is deleted.
173 class AtomicUpdater {
174 public:
175 virtual ~AtomicUpdater() {}
176 virtual bool update(const K &key, V &val, bool newlyCreated) = 0;
179 public:
180 LFUTable(time_t maturity, size_t maxCap, int updatePeriod)
181 : m_head(NULL), m_tail(NULL), m_immortalCount(0),
182 m_maturityThreshold(maturity), m_maximumCapacity(maxCap),
183 m_updatePeriod(updatePeriod) {
184 //Lfu table is currently buggy and at the moment not worth fixing
185 assert(false);
187 ~LFUTable() {
188 clear();
191 typedef LFUTableConstIterator const_iterator;
192 typedef LFUTableIterator iterator;
194 const_iterator begin() const {
195 return const_iterator(m_map.begin());
197 const_iterator end() const {
198 return const_iterator(m_map.end());
200 iterator begin() {
201 typename Map::iterator it = m_map.begin();
202 return iterator(it);
204 iterator end() {
205 typename Map::iterator it = m_map.end();
206 return iterator(it);
209 void insert(const K &k, const V &v) {
210 WriteLock lock(m_mapLock);
211 if (_contains(k)) {
212 _erase(k);
214 _makeRoom();
215 // Add to the map
216 typename Map::iterator ins = m_map.insert(std::pair<const K, Node*>(k,NULL))
217 .first;
218 Node *n = new Node(ins->first, v);
219 ins->second = n;
220 // Add to queue
221 insertQueue(n);
224 bool atomicRead(const K &k, AtomicReader &reader) {
225 ReadLock lock(m_mapLock);
226 Node *n = _getNode(k, false);
227 if (n) {
228 reader.read(n->key, n->val);
229 return true;
231 return false;
234 void atomicForeach(AtomicReader &reader) {
235 ReadLock lock(m_mapLock);
236 for (typename Map::const_iterator it = m_map.begin();
237 it != m_map.end(); ++it) {
238 reader.read(it->first, it->second->val);
242 void atomicUpdate(const K &k, AtomicUpdater &updater, bool createNew,
243 bool immortal = false) {
244 WriteLock lock(m_mapLock);
245 bool erase = false;
246 Node *n = _getNode(k, false);
247 bool created = false;
248 if (!n && createNew) {
249 n = _createNode(k, immortal);
250 created = true;
252 if (n) {
253 erase = updater.update(n->key, n->val, created);
255 if (erase) {
256 _erase(k);
260 void erase(const K &k) {
261 WriteLock lock(m_mapLock);
262 _erase(k);
264 void erase(const iterator &it) {
265 WriteLock lock(m_mapLock);
266 _erase(it);
269 bool lookup(const K &k, V &result) {
270 ReadLock lock(m_mapLock);
271 Node *n = _getNode(k, false);
272 if (n) {
273 result = n->val;
274 return true;
276 return false;
278 iterator find(const K &k) {
279 ReadLock lock(m_mapLock);
280 typename Map::iterator it = m_map.find(k);
281 if (it != m_map.end()) {
282 Node *n = it->second;
283 _bumpNode(n);
285 return iterator(it);
287 V &operator[](const K &k) {
288 WriteLock lock(m_mapLock);
289 return _getNode(k, true)->val;
292 size_t size() const {
293 return m_map.size();
295 size_t maximumCapacity() const {
296 return m_maximumCapacity;
298 size_t immortalCount() const {
299 return m_immortalCount;
302 void clear() {
303 WriteLock lock(m_mapLock);
304 typename Map::iterator it = m_map.begin();
305 while (it != m_map.end()) {
306 typename Map::iterator cit = it++;
307 Node *n = cit->second;
308 if (n->immortal) {
309 // Trade immortal for mortal
310 ++m_maximumCapacity;
311 --m_immortalCount;
312 } else {
313 removeQueue(cit->second);
315 m_map.erase(cit);
316 delete n;
318 assert(m_head == NULL);
319 assert(m_tail == NULL);
320 assert(m_heap.size() == 0);
323 bool check() {
324 WriteLock lock(m_mapLock);
325 Lock qlock(m_queueLock);
327 bool fail = false;
328 Node *prev = NULL;
329 Node *n = m_head;
330 while (n) {
331 if (m_map.find(n->key) == m_map.end()) {
332 fail = true;
333 Logger::Error("Value in queue not in map");
334 assert(!fail);
336 if (n->prev != prev) {
337 fail = true;
338 Logger::Error("Queue list corrupted");
339 assert(!fail);
341 prev = n;
342 n = n->next;
343 if (!n && prev != m_tail) {
344 fail = true;
345 Logger::Error("Queue tail incorrect");
346 assert(!fail);
349 uint hsize = m_heap.size();
350 for (uint i = 0; i < hsize; i++) {
351 Node *n = m_heap[i];
352 if (m_map.find(n->key) == m_map.end()) {
353 fail = true;
354 Logger::Error("Value in queue not in heap");
355 assert(!fail);
357 size_t child = (i+1) * 2;
358 if (child <= hsize && n->frequency() > m_heap[child-1]->frequency()) {
359 fail = true;
360 Logger::Error("Heap property violated");
361 assert(!fail);
363 child++;
364 if (child <= hsize && n->frequency() > m_heap[child-1]->frequency()) {
365 fail = true;
366 Logger::Error("Heap property violated");
367 assert(!fail);
370 return !fail;
373 private:
375 private:
376 //////////////////////////////////////////////////////////////////////////////
377 // These methods are to be used when the map lock is already acquired.
378 bool _contains(const K &k) {
379 return m_map.find(k) != m_map.end();
381 void _erase(const K &k) {
382 typename Map::iterator it = m_map.find(k);
383 if (it != m_map.end()) {
384 _erase(it);
387 void _erase(const iterator &it) {
388 Node *n = it.m_it->second;
389 m_map.erase(it.m_it);
390 if (n->immortal) {
391 // We trade an immortal for a mortal value
392 --m_immortalCount;
393 ++m_maximumCapacity;
394 } else {
395 removeQueue(n);
397 delete n;
400 void _bumpNode(Node *n) {
401 if (!m_maximumCapacity) return;
402 if (n->hits.fetch_add(1, std::memory_order_relaxed) % m_updatePeriod == 0) {
403 Lock lock(m_queueLock);
404 // hits could have increased between incrementing and taking the lock,
405 // but it does not matter.
406 // It is also possible (if unlikely) that two threads could be trying to
407 // do this update, but that doesn't matter either.
408 bool increase = n->updateFrequency();
409 _updateQueue(n, increase);
412 void _makeRoom() {
413 if (!m_maximumCapacity) return;
414 while (size() - m_immortalCount >= m_maximumCapacity) {
415 Node *m = popQueue();
416 assert(m);
417 m_map.erase(m->key);
418 delete m;
421 Node *_getNode(const K &k, bool force, bool immortal = false) {
422 typename Map::iterator it = m_map.find(k);
423 if (it != m_map.end()) {
424 _bumpNode(it->second);
425 return it->second;
427 if (force) {
428 return _createNode(k, immortal);
429 } else {
430 return NULL;
434 Node *_createNode(const K &k, bool immortal = false) {
435 if (immortal) {
436 ++m_immortalCount;
437 } else {
438 _makeRoom();
440 // Add to the map
441 typename Map::iterator ins = m_map.insert(std::pair<const K, Node*>(k,NULL))
442 .first;
443 Node *n = new Node(ins->first);
444 ins->second = n;
445 n->immortal = immortal;
446 if (!immortal) {
447 // Add to queue
448 insertQueue(n);
450 return n;
453 void _dumpStatus() {
454 std::cout << "in map:\n";
455 for (typename Map::const_iterator it = m_map.begin(); it != m_map.end();
456 ++it) {
457 std::cout << it->first << " : " << it->second->frequency() << "\n";
459 if (!m_maximumCapacity) return;
460 std::cout << "in queue:\n";
461 Node *n = m_head;
462 while (n) {
463 std::cout << n->key << " : " << n->frequency() << "\n";
464 n = n->next;
466 std::cout << "in heap\n";
467 for (uint i = 0; i < m_heap.size(); i++) {
468 std::cout << m_heap[i]->key << " : " << m_heap[i]->frequency() << "\n";
472 private:
473 //////////////////////////////////////////////////////////////////////////////
474 // Queue interface methods
476 // Add item to queue
477 void insertQueue(Node *item) {
478 if (!m_maximumCapacity) return;
479 Lock lock(m_queueLock);
480 // Add to head of list
481 item->heapIndex = 0;
482 item->prev = NULL;
483 item->next = m_head;
484 if (m_head) m_head->prev = item;
485 m_head = item;
486 if (!m_tail) m_tail = item;
487 _shiftMature();
490 void removeQueue(Node *item) {
491 if (!m_maximumCapacity) return;
492 Lock lock(m_queueLock);
493 if (item == m_head) {
494 m_head = item->next;
496 if (item == m_tail) {
497 m_tail = item->prev;
499 if (item->prev) {
500 item->prev->next = item->next;
502 if (item->next) {
503 item->next->prev = item->prev;
505 item->prev = item->next = NULL;
506 if (item->heapIndex != 0) {
507 int pos = item->heapIndex;
508 int last = m_heap.size();
509 item->heapIndex = 0;
510 m_heap[pos-1] = m_heap[last-1];
511 m_heap[pos-1]->heapIndex = pos;
512 m_heap.pop_back();
513 _heapifyDown(pos);
516 // Pop the minimum
517 Node *popQueue() {
518 if (!m_maximumCapacity) return NULL;
519 Lock lock(m_queueLock);
520 _shiftMature();
521 if (m_heap.size() == 0) {
522 Node *res = m_tail;
523 if (!res) return NULL;
524 if (res->prev) {
525 res->prev->next = NULL;
527 m_tail = res->prev;
528 res->prev = NULL;
529 if (res == m_head) m_head = NULL;
530 return res;
532 Node *res = m_heap[0];
533 res->heapIndex = 0;
534 m_heap[0] = m_heap[m_heap.size() - 1];
535 m_heap[0]->heapIndex = 1;
536 m_heap.pop_back();
537 _heapifyDown(1);
538 return res;
541 //////////////////////////////////////////////////////////////////////////////
542 // Queue helper methods
544 void _shiftMature() {
545 // Move mature items to heap
546 time_t now = time(NULL);
547 while (m_tail && ((now - m_tail->timestamp()) >= m_maturityThreshold)) {
548 Node *last = m_tail;
549 if (last->prev) {
550 last->prev->next = NULL;
552 m_tail = last->prev;
553 if (m_head == last) m_head = NULL;
554 last->prev = NULL;
555 last->updateFrequency();
556 _heapInsert(last);
560 void _updateQueue(const Node *item, bool increase) {
561 if (!m_maximumCapacity) return;
562 if (item->heapIndex != 0) {
563 if (increase) {
564 _heapifyDown(item->heapIndex);
565 } else {
566 _heapifyUp(item->heapIndex);
571 void _heapInsert(Node *item) {
572 m_heap.push_back(item);
573 item->heapIndex = m_heap.size();
574 int pos = m_heap.size();
575 _heapifyUp(pos);
578 void _heapifyUp(int pos) {
579 while (pos != 1) {
580 Node *&child = m_heap[pos - 1];
581 Node *&parent = m_heap[pos/2 - 1];
582 if (parent->frequency() > child->frequency()) {
583 std::swap(child, parent);
584 child->heapIndex = pos;
585 parent->heapIndex = pos/2;
586 pos = pos/2;
587 } else {
588 break;
593 void _heapifyDown(int pos) {
594 int size = m_heap.size();
595 for (;;) {
596 int child;
597 if (pos * 2 > size) break;
598 if (pos * 2 + 1 > size || (m_heap[pos*2 - 1]->frequency() <
599 m_heap[pos*2]->frequency())) {
600 child = pos * 2;
601 } else {
602 child = pos * 2 + 1;
604 if (m_heap[pos - 1]->frequency() > m_heap[child - 1]->frequency()) {
605 std::swap(m_heap[pos - 1], m_heap[child - 1]);
606 m_heap[pos - 1]->heapIndex = pos;
607 m_heap[child - 1]->heapIndex = child;
608 pos = child;
609 } else {
610 break;
615 private:
616 // The queue. A doubly linked list since I need to remove elements at will.
617 Node *m_head;
618 Node *m_tail;
619 // The heap
620 std::vector<Node*> m_heap;
621 Map m_map;
622 size_t m_immortalCount;
624 // Locks in acquisition order
625 ReadWriteMutex m_mapLock;
626 Mutex m_queueLock;
628 // Options
629 time_t m_maturityThreshold;
630 size_t m_maximumCapacity;
631 int m_updatePeriod;
634 ///////////////////////////////////////////////////////////////////////////////
637 #endif // __HPHP_UTIL_LFU_TABLE_H__