1 mbcache2: Use referenced bit instead of LRU
3 From: Jan Kara <jack@suse.cz>
5 Currently we maintain perfect LRU list by moving entry to the tail of
6 the list when it gets used. However these operations on cache-global
7 list are relatively expensive.
9 In this patch we switch to lazy updates of LRU list. Whenever entry gets
10 used, we set a referenced bit in it. When reclaiming entries, we give
11 referenced entries another round in the LRU. Since the list is not a
12 real LRU anymore, rename it to just 'list'.
14 In my testing this logic gives about 30% boost to workloads with mostly
15 unique xattr blocks (e.g. xattr-bench with 10 files and 10000 unique
18 Signed-off-by: Jan Kara <jack@suse.cz>
19 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
21 fs/mbcache2.c | 87 +++++++++++++++++++++++++++++++-----------------
22 include/linux/mbcache2.h | 11 +++---
23 2 files changed, 63 insertions(+), 35 deletions(-)
25 diff --git a/fs/mbcache2.c b/fs/mbcache2.c
26 index 3e3198d6b9d6..49f7a6feaa83 100644
29 @@ -30,9 +30,9 @@ struct mb2_cache {
31 /* Maximum entries in cache to avoid degrading hash too much */
33 - /* Protects c_lru_list, c_entry_count */
34 - spinlock_t c_lru_list_lock;
35 - struct list_head c_lru_list;
36 + /* Protects c_list, c_entry_count */
37 + spinlock_t c_list_lock;
38 + struct list_head c_list;
39 /* Number of entries in cache */
40 unsigned long c_entry_count;
41 struct shrinker c_shrink;
42 @@ -45,6 +45,29 @@ static struct kmem_cache *mb2_entry_cache;
43 static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
44 unsigned int nr_to_scan);
46 +static inline bool mb2_cache_entry_referenced(struct mb2_cache_entry *entry)
48 + return entry->_e_hash_list_head & 1;
51 +static inline void mb2_cache_entry_set_referenced(struct mb2_cache_entry *entry)
53 + entry->_e_hash_list_head |= 1;
56 +static inline void mb2_cache_entry_clear_referenced(
57 + struct mb2_cache_entry *entry)
59 + entry->_e_hash_list_head &= ~1;
62 +static inline struct hlist_bl_head *mb2_cache_entry_head(
63 + struct mb2_cache_entry *entry)
65 + return (struct hlist_bl_head *)
66 + (entry->_e_hash_list_head & ~1);
70 * Number of entries to reclaim synchronously when there are too many entries
72 @@ -80,13 +103,13 @@ int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
76 - INIT_LIST_HEAD(&entry->e_lru_list);
77 + INIT_LIST_HEAD(&entry->e_list);
78 /* One ref for hash, one ref returned */
79 atomic_set(&entry->e_refcnt, 1);
81 entry->e_block = block;
82 head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
83 - entry->e_hash_list_head = head;
84 + entry->_e_hash_list_head = (unsigned long)head;
86 hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
87 if (dup->e_key == key && dup->e_block == block) {
88 @@ -98,12 +121,12 @@ int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
89 hlist_bl_add_head(&entry->e_hash_list, head);
90 hlist_bl_unlock(head);
92 - spin_lock(&cache->c_lru_list_lock);
93 - list_add_tail(&entry->e_lru_list, &cache->c_lru_list);
94 + spin_lock(&cache->c_list_lock);
95 + list_add_tail(&entry->e_list, &cache->c_list);
96 /* Grab ref for LRU list */
97 atomic_inc(&entry->e_refcnt);
98 cache->c_entry_count++;
99 - spin_unlock(&cache->c_lru_list_lock);
100 + spin_unlock(&cache->c_list_lock);
104 @@ -124,7 +147,7 @@ static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache,
105 struct hlist_bl_head *head;
108 - head = entry->e_hash_list_head;
109 + head = mb2_cache_entry_head(entry);
111 head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
113 @@ -203,13 +226,13 @@ void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
114 /* We keep hash list reference to keep entry alive */
115 hlist_bl_del_init(&entry->e_hash_list);
116 hlist_bl_unlock(head);
117 - spin_lock(&cache->c_lru_list_lock);
118 - if (!list_empty(&entry->e_lru_list)) {
119 - list_del_init(&entry->e_lru_list);
120 + spin_lock(&cache->c_list_lock);
121 + if (!list_empty(&entry->e_list)) {
122 + list_del_init(&entry->e_list);
123 cache->c_entry_count--;
124 atomic_dec(&entry->e_refcnt);
126 - spin_unlock(&cache->c_lru_list_lock);
127 + spin_unlock(&cache->c_list_lock);
128 mb2_cache_entry_put(cache, entry);
131 @@ -222,15 +245,12 @@ EXPORT_SYMBOL(mb2_cache_entry_delete_block);
132 * @cache - cache the entry belongs to
133 * @entry - entry that got used
135 - * Move entry in lru list to reflect the fact that it was used.
136 + * Marks entry as used to give hit higher chances of surviving in cache.
138 void mb2_cache_entry_touch(struct mb2_cache *cache,
139 struct mb2_cache_entry *entry)
141 - spin_lock(&cache->c_lru_list_lock);
142 - if (!list_empty(&entry->e_lru_list))
143 - list_move_tail(&cache->c_lru_list, &entry->e_lru_list);
144 - spin_unlock(&cache->c_lru_list_lock);
145 + mb2_cache_entry_set_referenced(entry);
147 EXPORT_SYMBOL(mb2_cache_entry_touch);
149 @@ -251,18 +271,23 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
150 struct hlist_bl_head *head;
151 unsigned int shrunk = 0;
153 - spin_lock(&cache->c_lru_list_lock);
154 - while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) {
155 - entry = list_first_entry(&cache->c_lru_list,
156 - struct mb2_cache_entry, e_lru_list);
157 - list_del_init(&entry->e_lru_list);
158 + spin_lock(&cache->c_list_lock);
159 + while (nr_to_scan-- && !list_empty(&cache->c_list)) {
160 + entry = list_first_entry(&cache->c_list,
161 + struct mb2_cache_entry, e_list);
162 + if (mb2_cache_entry_referenced(entry)) {
163 + mb2_cache_entry_clear_referenced(entry);
164 + list_move_tail(&cache->c_list, &entry->e_list);
167 + list_del_init(&entry->e_list);
168 cache->c_entry_count--;
170 * We keep LRU list reference so that entry doesn't go away
173 - spin_unlock(&cache->c_lru_list_lock);
174 - head = entry->e_hash_list_head;
175 + spin_unlock(&cache->c_list_lock);
176 + head = mb2_cache_entry_head(entry);
178 if (!hlist_bl_unhashed(&entry->e_hash_list)) {
179 hlist_bl_del_init(&entry->e_hash_list);
180 @@ -272,9 +297,9 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
181 if (mb2_cache_entry_put(cache, entry))
184 - spin_lock(&cache->c_lru_list_lock);
185 + spin_lock(&cache->c_list_lock);
187 - spin_unlock(&cache->c_lru_list_lock);
188 + spin_unlock(&cache->c_list_lock);
192 @@ -318,8 +343,8 @@ struct mb2_cache *mb2_cache_create(int bucket_bits)
194 cache->c_bucket_bits = bucket_bits;
195 cache->c_max_entries = bucket_count << 4;
196 - INIT_LIST_HEAD(&cache->c_lru_list);
197 - spin_lock_init(&cache->c_lru_list_lock);
198 + INIT_LIST_HEAD(&cache->c_list);
199 + spin_lock_init(&cache->c_list_lock);
200 cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
202 if (!cache->c_hash) {
203 @@ -361,13 +386,13 @@ void mb2_cache_destroy(struct mb2_cache *cache)
204 * We don't bother with any locking. Cache must not be used at this
207 - list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) {
208 + list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
209 if (!hlist_bl_unhashed(&entry->e_hash_list)) {
210 hlist_bl_del_init(&entry->e_hash_list);
211 atomic_dec(&entry->e_refcnt);
214 - list_del(&entry->e_lru_list);
215 + list_del(&entry->e_list);
216 WARN_ON(atomic_read(&entry->e_refcnt) != 1);
217 mb2_cache_entry_put(cache, entry);
219 diff --git a/include/linux/mbcache2.h b/include/linux/mbcache2.h
220 index b6f160ff2533..c934843a6a31 100644
221 --- a/include/linux/mbcache2.h
222 +++ b/include/linux/mbcache2.h
226 struct mb2_cache_entry {
227 - /* LRU list - protected by cache->c_lru_list_lock */
228 - struct list_head e_lru_list;
229 + /* List of entries in cache - protected by cache->c_list_lock */
230 + struct list_head e_list;
231 /* Hash table list - protected by bitlock in e_hash_list_head */
232 struct hlist_bl_node e_hash_list;
234 @@ -19,8 +19,11 @@ struct mb2_cache_entry {
236 /* Block number of hashed block - stable during lifetime of the entry */
238 - /* Head of hash list (for list bit lock) - stable */
239 - struct hlist_bl_head *e_hash_list_head;
241 + * Head of hash list (for list bit lock) - stable. Combined with
242 + * referenced bit of entry
244 + unsigned long _e_hash_list_head;
247 struct mb2_cache *mb2_cache_create(int bucket_bits);