1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2018 HUAWEI, Inc.
4 * http://www.huawei.com/
5 * Created by Gao Xiang <gaoxiang25@huawei.com>
8 #include <linux/pagevec.h>
10 struct page
*erofs_allocpage(struct list_head
*pool
, gfp_t gfp
)
14 if (!list_empty(pool
)) {
15 page
= lru_to_page(pool
);
16 DBG_BUGON(page_ref_count(page
) != 1);
19 page
= alloc_page(gfp
);
24 #if (EROFS_PCPUBUF_NR_PAGES > 0)
26 u8 data
[PAGE_SIZE
* EROFS_PCPUBUF_NR_PAGES
];
27 } ____cacheline_aligned_in_smp erofs_pcpubuf
[NR_CPUS
];
29 void *erofs_get_pcpubuf(unsigned int pagenr
)
32 return &erofs_pcpubuf
[smp_processor_id()].data
[pagenr
* PAGE_SIZE
];
36 #ifdef CONFIG_EROFS_FS_ZIP
37 /* global shrink count (for all mounted EROFS instances) */
38 static atomic_long_t erofs_global_shrink_cnt
;
40 #define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount)
41 #define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount)
43 static int erofs_workgroup_get(struct erofs_workgroup
*grp
)
48 o
= erofs_wait_on_workgroup_freezed(grp
);
52 if (atomic_cmpxchg(&grp
->refcount
, o
, o
+ 1) != o
)
55 /* decrease refcount paired by erofs_workgroup_put */
57 atomic_long_dec(&erofs_global_shrink_cnt
);
61 struct erofs_workgroup
*erofs_find_workgroup(struct super_block
*sb
,
62 pgoff_t index
, bool *tag
)
64 struct erofs_sb_info
*sbi
= EROFS_SB(sb
);
65 struct erofs_workgroup
*grp
;
69 grp
= radix_tree_lookup(&sbi
->workstn_tree
, index
);
71 *tag
= xa_pointer_tag(grp
);
72 grp
= xa_untag_pointer(grp
);
74 if (erofs_workgroup_get(grp
)) {
75 /* prefer to relax rcu read side */
80 DBG_BUGON(index
!= grp
->index
);
86 int erofs_register_workgroup(struct super_block
*sb
,
87 struct erofs_workgroup
*grp
,
90 struct erofs_sb_info
*sbi
;
93 /* grp shouldn't be broken or used before */
94 if (atomic_read(&grp
->refcount
) != 1) {
99 err
= radix_tree_preload(GFP_NOFS
);
104 xa_lock(&sbi
->workstn_tree
);
106 grp
= xa_tag_pointer(grp
, tag
);
109 * Bump up reference count before making this workgroup
110 * visible to other users in order to avoid potential UAF
111 * without serialized by workstn_lock.
113 __erofs_workgroup_get(grp
);
115 err
= radix_tree_insert(&sbi
->workstn_tree
, grp
->index
, grp
);
118 * it's safe to decrease since the workgroup isn't visible
119 * and refcount >= 2 (cannot be freezed).
121 __erofs_workgroup_put(grp
);
123 xa_unlock(&sbi
->workstn_tree
);
124 radix_tree_preload_end();
128 static void __erofs_workgroup_free(struct erofs_workgroup
*grp
)
130 atomic_long_dec(&erofs_global_shrink_cnt
);
131 erofs_workgroup_free_rcu(grp
);
134 int erofs_workgroup_put(struct erofs_workgroup
*grp
)
136 int count
= atomic_dec_return(&grp
->refcount
);
139 atomic_long_inc(&erofs_global_shrink_cnt
);
141 __erofs_workgroup_free(grp
);
145 static void erofs_workgroup_unfreeze_final(struct erofs_workgroup
*grp
)
147 erofs_workgroup_unfreeze(grp
, 0);
148 __erofs_workgroup_free(grp
);
151 static bool erofs_try_to_release_workgroup(struct erofs_sb_info
*sbi
,
152 struct erofs_workgroup
*grp
)
155 * If managed cache is on, refcount of workgroups
156 * themselves could be < 0 (freezed). In other words,
157 * there is no guarantee that all refcounts > 0.
159 if (!erofs_workgroup_try_to_freeze(grp
, 1))
163 * Note that all cached pages should be unattached
164 * before deleted from the radix tree. Otherwise some
165 * cached pages could be still attached to the orphan
166 * old workgroup when the new one is available in the tree.
168 if (erofs_try_to_free_all_cached_pages(sbi
, grp
)) {
169 erofs_workgroup_unfreeze(grp
, 1);
174 * It's impossible to fail after the workgroup is freezed,
175 * however in order to avoid some race conditions, add a
176 * DBG_BUGON to observe this in advance.
178 DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi
->workstn_tree
,
179 grp
->index
)) != grp
);
182 * If managed cache is on, last refcount should indicate
183 * the related workstation.
185 erofs_workgroup_unfreeze_final(grp
);
189 static unsigned long erofs_shrink_workstation(struct erofs_sb_info
*sbi
,
190 unsigned long nr_shrink
)
192 pgoff_t first_index
= 0;
193 void *batch
[PAGEVEC_SIZE
];
194 unsigned int freed
= 0;
198 xa_lock(&sbi
->workstn_tree
);
200 found
= radix_tree_gang_lookup(&sbi
->workstn_tree
,
201 batch
, first_index
, PAGEVEC_SIZE
);
203 for (i
= 0; i
< found
; ++i
) {
204 struct erofs_workgroup
*grp
= xa_untag_pointer(batch
[i
]);
206 first_index
= grp
->index
+ 1;
208 /* try to shrink each valid workgroup */
209 if (!erofs_try_to_release_workgroup(sbi
, grp
))
216 xa_unlock(&sbi
->workstn_tree
);
223 /* protected by 'erofs_sb_list_lock' */
224 static unsigned int shrinker_run_no
;
226 /* protects the mounted 'erofs_sb_list' */
227 static DEFINE_SPINLOCK(erofs_sb_list_lock
);
228 static LIST_HEAD(erofs_sb_list
);
230 void erofs_shrinker_register(struct super_block
*sb
)
232 struct erofs_sb_info
*sbi
= EROFS_SB(sb
);
234 mutex_init(&sbi
->umount_mutex
);
236 spin_lock(&erofs_sb_list_lock
);
237 list_add(&sbi
->list
, &erofs_sb_list
);
238 spin_unlock(&erofs_sb_list_lock
);
241 void erofs_shrinker_unregister(struct super_block
*sb
)
243 struct erofs_sb_info
*const sbi
= EROFS_SB(sb
);
245 mutex_lock(&sbi
->umount_mutex
);
246 /* clean up all remaining workgroups in memory */
247 erofs_shrink_workstation(sbi
, ~0UL);
249 spin_lock(&erofs_sb_list_lock
);
250 list_del(&sbi
->list
);
251 spin_unlock(&erofs_sb_list_lock
);
252 mutex_unlock(&sbi
->umount_mutex
);
255 static unsigned long erofs_shrink_count(struct shrinker
*shrink
,
256 struct shrink_control
*sc
)
258 return atomic_long_read(&erofs_global_shrink_cnt
);
261 static unsigned long erofs_shrink_scan(struct shrinker
*shrink
,
262 struct shrink_control
*sc
)
264 struct erofs_sb_info
*sbi
;
267 unsigned long nr
= sc
->nr_to_scan
;
269 unsigned long freed
= 0;
271 spin_lock(&erofs_sb_list_lock
);
273 run_no
= ++shrinker_run_no
;
274 } while (run_no
== 0);
276 /* Iterate over all mounted superblocks and try to shrink them */
277 p
= erofs_sb_list
.next
;
278 while (p
!= &erofs_sb_list
) {
279 sbi
= list_entry(p
, struct erofs_sb_info
, list
);
282 * We move the ones we do to the end of the list, so we stop
283 * when we see one we have already done.
285 if (sbi
->shrinker_run_no
== run_no
)
288 if (!mutex_trylock(&sbi
->umount_mutex
)) {
293 spin_unlock(&erofs_sb_list_lock
);
294 sbi
->shrinker_run_no
= run_no
;
296 freed
+= erofs_shrink_workstation(sbi
, nr
);
298 spin_lock(&erofs_sb_list_lock
);
299 /* Get the next list element before we move this one */
303 * Move this one to the end of the list to provide some
306 list_move_tail(&sbi
->list
, &erofs_sb_list
);
307 mutex_unlock(&sbi
->umount_mutex
);
312 spin_unlock(&erofs_sb_list_lock
);
316 static struct shrinker erofs_shrinker_info
= {
317 .scan_objects
= erofs_shrink_scan
,
318 .count_objects
= erofs_shrink_count
,
319 .seeks
= DEFAULT_SEEKS
,
322 int __init
erofs_init_shrinker(void)
324 return register_shrinker(&erofs_shrinker_info
);
327 void erofs_exit_shrinker(void)
329 unregister_shrinker(&erofs_shrinker_info
);
331 #endif /* !CONFIG_EROFS_FS_ZIP */