2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef __HPHP_UTIL_LFU_TABLE_H__
18 #define __HPHP_UTIL_LFU_TABLE_H__
20 #include <util/base.h>
21 #include <util/lock.h>
24 ///////////////////////////////////////////////////////////////////////////////
26 template<class K
, class V
>
27 class LFUNullDestructor
{
29 void operator()(const K
&k
, V
&v
) {}
33 * A lookup table with a maximum size. Once it reaches maximum size,
34 * inserts will evict the least frequently used item.
36 * It consists of 3 parts: a queue, a heap, and a map.
37 * The map is used for lookup by key. The queue is a staging area for
38 * values that are too new to have an accurate frequency, and the heap
39 * serves as a priority queue for looking up the element that has
40 * the lowest frequency.
42 * Frequencies are updated only when the hit count hits a multiple of the
43 * period option. When a frequency is updated, the heap is also updated
44 * to maintain the heap property. Frequency should be locked by the queue
45 * lock since it must not change during heap operations.
47 * There are two locks that make it thread safe. There is read/write lock
48 * for the map, and a lock for the queue/heap. The acquisition order
49 * is map then queue/heap.
51 * An element is in the map if it is in the queue xor the heap.
52 * If max capacity is 0, then all queue operations are disabled.
53 * Elements can be added to the map but not the queue.
55 template<class K
, class V
, class H
, class E
,
56 class D
= LFUNullDestructor
<K
, V
> >
60 Node(const K
&k
) : key(k
), prev(NULL
), next(NULL
), hits(0), heapIndex(0),
61 immortal(false), m_freq(0), m_timestamp(time(NULL
)) {}
62 Node(const K
&k
, const V
&v
)
63 : key(k
), val(v
), prev(NULL
), next(NULL
), hits(0), heapIndex(0),
64 immortal(false), m_freq(0), m_timestamp(time(NULL
)) {}
73 std::atomic
<uint64
> hits
;
78 // Frequency updates must be accompanied by heap updates since otherwise
79 // it may break the heap property. It should probably be under lock too.
80 bool updateFrequency() {
81 double lifetime
= time(NULL
) - m_timestamp
;
82 double oldFreq
= m_freq
;
84 m_freq
= hits
/ lifetime
;
86 return m_freq
> oldFreq
;
88 double frequency() const {
99 class Map
: public hphp_hash_map
<K
, Node
*, H
, E
> {};
100 //typedef std::map<K, Node*> Map;
103 class LFUTableConstIterator
;
104 class LFUTableIterator
{
106 LFUTableIterator(typename
LFUTable::Map::iterator
&it
) : m_it(it
) {}
107 LFUTableIterator(const LFUTableIterator
&it
) : m_it(it
.m_it
) {}
108 const K
&first() const {
112 return m_it
->second
->val
;
114 LFUTableIterator
&operator++() {
118 bool operator==(const LFUTableIterator
&it
) const {
119 return m_it
== it
.m_it
;
121 bool operator!=(const LFUTableIterator
&it
) const {
122 return m_it
!= it
.m_it
;
125 friend class LFUTable::LFUTableConstIterator
;
126 friend class LFUTable
;
127 typename
LFUTable::Map::iterator m_it
;
129 class LFUTableConstIterator
{
131 LFUTableConstIterator(typename
LFUTable::Map::const_iterator
&it
)
133 LFUTableConstIterator(const LFUTableConstIterator
&it
) : m_it(it
.m_it
) {
135 LFUTableConstIterator(const LFUTableIterator
&it
) : m_it(it
.m_it
) {
137 const K
&first() const {
140 const V
&second() const {
141 return m_it
->second
->val
;
143 LFUTableConstIterator
&operator++() {
147 bool operator==(const LFUTableConstIterator
&it
) const {
148 return m_it
== it
.m_it
;
150 bool operator!=(const LFUTableConstIterator
&it
) const {
151 return m_it
!= it
.m_it
;
154 friend class LFUTable
;
155 typename
LFUTable::Map::const_iterator m_it
;
158 * AtomicReader is used to perform a lookup under the internal
159 * lock of the table. It takes the read lock.
163 virtual ~AtomicReader() {}
164 virtual void read(const K
&key
, const V
&val
) = 0;
167 * AtomicUpdater is used to perform a update/insert under the internal
168 * lock of the table. It takes the write lock.
169 * The update method is given a reference to the value in the table and
170 * a bool that specifies if the element was newly added.
171 * It the method returns false, the element is deleted.
173 class AtomicUpdater
{
175 virtual ~AtomicUpdater() {}
176 virtual bool update(const K
&key
, V
&val
, bool newlyCreated
) = 0;
180 LFUTable(time_t maturity
, size_t maxCap
, int updatePeriod
)
181 : m_head(NULL
), m_tail(NULL
), m_immortalCount(0),
182 m_maturityThreshold(maturity
), m_maximumCapacity(maxCap
),
183 m_updatePeriod(updatePeriod
) {
184 //Lfu table is currently buggy and at the moment not worth fixing
191 typedef LFUTableConstIterator const_iterator
;
192 typedef LFUTableIterator iterator
;
194 const_iterator
begin() const {
195 return const_iterator(m_map
.begin());
197 const_iterator
end() const {
198 return const_iterator(m_map
.end());
201 typename
Map::iterator it
= m_map
.begin();
205 typename
Map::iterator it
= m_map
.end();
209 void insert(const K
&k
, const V
&v
) {
210 WriteLock
lock(m_mapLock
);
216 typename
Map::iterator ins
= m_map
.insert(std::pair
<const K
, Node
*>(k
,NULL
))
218 Node
*n
= new Node(ins
->first
, v
);
224 bool atomicRead(const K
&k
, AtomicReader
&reader
) {
225 ReadLock
lock(m_mapLock
);
226 Node
*n
= _getNode(k
, false);
228 reader
.read(n
->key
, n
->val
);
234 void atomicForeach(AtomicReader
&reader
) {
235 ReadLock
lock(m_mapLock
);
236 for (typename
Map::const_iterator it
= m_map
.begin();
237 it
!= m_map
.end(); ++it
) {
238 reader
.read(it
->first
, it
->second
->val
);
242 void atomicUpdate(const K
&k
, AtomicUpdater
&updater
, bool createNew
,
243 bool immortal
= false) {
244 WriteLock
lock(m_mapLock
);
246 Node
*n
= _getNode(k
, false);
247 bool created
= false;
248 if (!n
&& createNew
) {
249 n
= _createNode(k
, immortal
);
253 erase
= updater
.update(n
->key
, n
->val
, created
);
260 void erase(const K
&k
) {
261 WriteLock
lock(m_mapLock
);
264 void erase(const iterator
&it
) {
265 WriteLock
lock(m_mapLock
);
269 bool lookup(const K
&k
, V
&result
) {
270 ReadLock
lock(m_mapLock
);
271 Node
*n
= _getNode(k
, false);
278 iterator
find(const K
&k
) {
279 ReadLock
lock(m_mapLock
);
280 typename
Map::iterator it
= m_map
.find(k
);
281 if (it
!= m_map
.end()) {
282 Node
*n
= it
->second
;
287 V
&operator[](const K
&k
) {
288 WriteLock
lock(m_mapLock
);
289 return _getNode(k
, true)->val
;
292 size_t size() const {
295 size_t maximumCapacity() const {
296 return m_maximumCapacity
;
298 size_t immortalCount() const {
299 return m_immortalCount
;
303 WriteLock
lock(m_mapLock
);
304 typename
Map::iterator it
= m_map
.begin();
305 while (it
!= m_map
.end()) {
306 typename
Map::iterator cit
= it
++;
307 Node
*n
= cit
->second
;
309 // Trade immortal for mortal
313 removeQueue(cit
->second
);
318 assert(m_head
== NULL
);
319 assert(m_tail
== NULL
);
320 assert(m_heap
.size() == 0);
324 WriteLock
lock(m_mapLock
);
325 Lock
qlock(m_queueLock
);
331 if (m_map
.find(n
->key
) == m_map
.end()) {
333 Logger::Error("Value in queue not in map");
336 if (n
->prev
!= prev
) {
338 Logger::Error("Queue list corrupted");
343 if (!n
&& prev
!= m_tail
) {
345 Logger::Error("Queue tail incorrect");
349 uint hsize
= m_heap
.size();
350 for (uint i
= 0; i
< hsize
; i
++) {
352 if (m_map
.find(n
->key
) == m_map
.end()) {
354 Logger::Error("Value in queue not in heap");
357 size_t child
= (i
+1) * 2;
358 if (child
<= hsize
&& n
->frequency() > m_heap
[child
-1]->frequency()) {
360 Logger::Error("Heap property violated");
364 if (child
<= hsize
&& n
->frequency() > m_heap
[child
-1]->frequency()) {
366 Logger::Error("Heap property violated");
376 //////////////////////////////////////////////////////////////////////////////
377 // These methods are to be used when the map lock is already acquired.
378 bool _contains(const K
&k
) {
379 return m_map
.find(k
) != m_map
.end();
381 void _erase(const K
&k
) {
382 typename
Map::iterator it
= m_map
.find(k
);
383 if (it
!= m_map
.end()) {
387 void _erase(const iterator
&it
) {
388 Node
*n
= it
.m_it
->second
;
389 m_map
.erase(it
.m_it
);
391 // We trade an immortal for a mortal value
400 void _bumpNode(Node
*n
) {
401 if (!m_maximumCapacity
) return;
402 if (n
->hits
.fetch_add(1, std::memory_order_relaxed
) % m_updatePeriod
== 0) {
403 Lock
lock(m_queueLock
);
404 // hits could have increased between incrementing and taking the lock,
405 // but it does not matter.
406 // It is also possible (if unlikely) that two threads could be trying to
407 // do this update, but that doesn't matter either.
408 bool increase
= n
->updateFrequency();
409 _updateQueue(n
, increase
);
413 if (!m_maximumCapacity
) return;
414 while (size() - m_immortalCount
>= m_maximumCapacity
) {
415 Node
*m
= popQueue();
421 Node
*_getNode(const K
&k
, bool force
, bool immortal
= false) {
422 typename
Map::iterator it
= m_map
.find(k
);
423 if (it
!= m_map
.end()) {
424 _bumpNode(it
->second
);
428 return _createNode(k
, immortal
);
434 Node
*_createNode(const K
&k
, bool immortal
= false) {
441 typename
Map::iterator ins
= m_map
.insert(std::pair
<const K
, Node
*>(k
,NULL
))
443 Node
*n
= new Node(ins
->first
);
445 n
->immortal
= immortal
;
454 std::cout
<< "in map:\n";
455 for (typename
Map::const_iterator it
= m_map
.begin(); it
!= m_map
.end();
457 std::cout
<< it
->first
<< " : " << it
->second
->frequency() << "\n";
459 if (!m_maximumCapacity
) return;
460 std::cout
<< "in queue:\n";
463 std::cout
<< n
->key
<< " : " << n
->frequency() << "\n";
466 std::cout
<< "in heap\n";
467 for (uint i
= 0; i
< m_heap
.size(); i
++) {
468 std::cout
<< m_heap
[i
]->key
<< " : " << m_heap
[i
]->frequency() << "\n";
473 //////////////////////////////////////////////////////////////////////////////
474 // Queue interface methods
477 void insertQueue(Node
*item
) {
478 if (!m_maximumCapacity
) return;
479 Lock
lock(m_queueLock
);
480 // Add to head of list
484 if (m_head
) m_head
->prev
= item
;
486 if (!m_tail
) m_tail
= item
;
490 void removeQueue(Node
*item
) {
491 if (!m_maximumCapacity
) return;
492 Lock
lock(m_queueLock
);
493 if (item
== m_head
) {
496 if (item
== m_tail
) {
500 item
->prev
->next
= item
->next
;
503 item
->next
->prev
= item
->prev
;
505 item
->prev
= item
->next
= NULL
;
506 if (item
->heapIndex
!= 0) {
507 int pos
= item
->heapIndex
;
508 int last
= m_heap
.size();
510 m_heap
[pos
-1] = m_heap
[last
-1];
511 m_heap
[pos
-1]->heapIndex
= pos
;
518 if (!m_maximumCapacity
) return NULL
;
519 Lock
lock(m_queueLock
);
521 if (m_heap
.size() == 0) {
523 if (!res
) return NULL
;
525 res
->prev
->next
= NULL
;
529 if (res
== m_head
) m_head
= NULL
;
532 Node
*res
= m_heap
[0];
534 m_heap
[0] = m_heap
[m_heap
.size() - 1];
535 m_heap
[0]->heapIndex
= 1;
541 //////////////////////////////////////////////////////////////////////////////
542 // Queue helper methods
544 void _shiftMature() {
545 // Move mature items to heap
546 time_t now
= time(NULL
);
547 while (m_tail
&& ((now
- m_tail
->timestamp()) >= m_maturityThreshold
)) {
550 last
->prev
->next
= NULL
;
553 if (m_head
== last
) m_head
= NULL
;
555 last
->updateFrequency();
560 void _updateQueue(const Node
*item
, bool increase
) {
561 if (!m_maximumCapacity
) return;
562 if (item
->heapIndex
!= 0) {
564 _heapifyDown(item
->heapIndex
);
566 _heapifyUp(item
->heapIndex
);
571 void _heapInsert(Node
*item
) {
572 m_heap
.push_back(item
);
573 item
->heapIndex
= m_heap
.size();
574 int pos
= m_heap
.size();
578 void _heapifyUp(int pos
) {
580 Node
*&child
= m_heap
[pos
- 1];
581 Node
*&parent
= m_heap
[pos
/2 - 1];
582 if (parent
->frequency() > child
->frequency()) {
583 std::swap(child
, parent
);
584 child
->heapIndex
= pos
;
585 parent
->heapIndex
= pos
/2;
593 void _heapifyDown(int pos
) {
594 int size
= m_heap
.size();
597 if (pos
* 2 > size
) break;
598 if (pos
* 2 + 1 > size
|| (m_heap
[pos
*2 - 1]->frequency() <
599 m_heap
[pos
*2]->frequency())) {
604 if (m_heap
[pos
- 1]->frequency() > m_heap
[child
- 1]->frequency()) {
605 std::swap(m_heap
[pos
- 1], m_heap
[child
- 1]);
606 m_heap
[pos
- 1]->heapIndex
= pos
;
607 m_heap
[child
- 1]->heapIndex
= child
;
616 // The queue. A doubly linked list since I need to remove elements at will.
620 std::vector
<Node
*> m_heap
;
622 size_t m_immortalCount
;
624 // Locks in acquisition order
625 ReadWriteMutex m_mapLock
;
629 time_t m_maturityThreshold
;
630 size_t m_maximumCapacity
;
634 ///////////////////////////////////////////////////////////////////////////////
637 #endif // __HPHP_UTIL_LFU_TABLE_H__