1 ext4: use a list to track all reclaimable objects for extent status tree
3 From: Zheng Liu <wenqing.lz@taobao.com>
5 Currently the shrinker needs to take a long time to reclaim some objects
6 from a extent status tree because there are a lot of objects that are
7 delayed. These objects could not be reclaimed because ext4 uses them to
8 finish seeking data/hole, finding delayed range and other works. If a
9 rb-tree has a large number of delayed objects, shrinker should scan more
10 objects and the latency will be high. This commit uses a list to track
11 all reclaimble objects in order to reduce the latency when the shrinker
12 tries to reclaim some objects from a extent status tree.
14 Cc: Andreas Dilger <adilger.kernel@dilger.ca>
15 Cc: Jan Kara <jack@suse.cz>
16 Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
17 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
19 fs/ext4/extents_status.c | 74 ++++++++++++++++++++++++++++----------------------
20 fs/ext4/extents_status.h | 2 ++
21 2 files changed, 44 insertions(+), 32 deletions(-)
23 diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
24 index 4768f7f..e845c4f 100644
25 --- a/fs/ext4/extents_status.c
26 +++ b/fs/ext4/extents_status.c
27 @@ -148,7 +148,8 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
28 static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
30 static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
32 + struct list_head *freeable,
34 static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
35 struct ext4_inode_info *locked_ei);
37 @@ -171,6 +172,7 @@ void ext4_exit_es(void)
38 void ext4_es_init_tree(struct ext4_es_tree *tree)
41 + INIT_LIST_HEAD(&tree->list);
42 tree->cache_es = NULL;
45 @@ -302,6 +304,7 @@ static struct extent_status *
46 ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
49 + struct ext4_inode_info *ei = EXT4_I(inode);
50 struct extent_status *es;
51 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
53 @@ -314,12 +317,13 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
54 * We don't count delayed extent because we never try to reclaim them
56 if (!ext4_es_is_delayed(es)) {
57 - EXT4_I(inode)->i_es_shk_nr++;
58 + list_add_tail(&es->list, &ei->i_es_tree.list);
60 percpu_counter_inc(&EXT4_SB(inode->i_sb)->
61 s_es_stats.es_stats_shk_cnt);
64 - EXT4_I(inode)->i_es_all_nr++;
66 percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
69 @@ -327,13 +331,15 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
71 static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
73 - EXT4_I(inode)->i_es_all_nr--;
74 + struct ext4_inode_info *ei = EXT4_I(inode);
77 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
79 /* Decrease the shrink counter when this es is not delayed */
80 if (!ext4_es_is_delayed(es)) {
81 - BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
82 - EXT4_I(inode)->i_es_shk_nr--;
83 + list_del(&es->list);
84 + BUG_ON(ei->i_es_shk_nr-- == 0);
85 percpu_counter_dec(&EXT4_SB(inode->i_sb)->
86 s_es_stats.es_stats_shk_cnt);
88 @@ -956,11 +962,24 @@ void ext4_es_list_del(struct inode *inode)
89 spin_unlock(&sbi->s_es_lock);
92 +static void dispose_list(struct inode *inode, struct list_head *head)
94 + while (!list_empty(head)) {
95 + struct extent_status *es;
97 + es = list_first_entry(head, struct extent_status, list);
98 + list_del_init(&es->list);
100 + ext4_es_free_extent(inode, es);
104 static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
105 struct ext4_inode_info *locked_ei)
107 struct ext4_inode_info *ei;
108 struct ext4_es_stats *es_stats;
109 + LIST_HEAD(freeable);
113 @@ -974,8 +993,6 @@ retry:
114 spin_lock(&sbi->s_es_lock);
115 nr_to_walk = sbi->s_es_nr_inode;
116 while (!list_empty(&sbi->s_es_list) && nr_to_walk-- > 0) {
119 ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
122 @@ -1002,11 +1019,10 @@ retry:
124 /* we only release s_es_lock once we have i_es_lock */
125 spin_unlock(&sbi->s_es_lock);
126 - shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
127 + nr_shrunk += __es_try_to_reclaim_extents(ei, &freeable,
129 write_unlock(&ei->i_es_lock);
131 - nr_shrunk += shrunk;
132 - nr_to_scan -= shrunk;
133 + dispose_list(&ei->vfs_inode, &freeable);
137 @@ -1023,8 +1039,11 @@ retry:
141 - if (locked_ei && nr_shrunk == 0)
142 - nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
143 + if (locked_ei && nr_shrunk == 0) {
144 + nr_shrunk = __es_try_to_reclaim_extents(locked_ei, &freeable,
146 + dispose_list(&locked_ei->vfs_inode, &freeable);
150 scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
151 @@ -1220,12 +1239,12 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
154 static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
156 + struct list_head *freeable,
159 struct inode *inode = &ei->vfs_inode;
160 struct ext4_es_tree *tree = &ei->i_es_tree;
161 - struct rb_node *node;
162 - struct extent_status *es;
163 + struct extent_status *es, *tmp;
164 unsigned long nr_shrunk = 0;
165 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
166 DEFAULT_RATELIMIT_BURST);
167 @@ -1237,21 +1256,12 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
169 ext4_warning(inode->i_sb, "forced shrink of precached extents");
171 - node = rb_first(&tree->root);
172 - while (node != NULL) {
173 - es = rb_entry(node, struct extent_status, rb_node);
174 - node = rb_next(&es->rb_node);
176 - * We can't reclaim delayed extent from status tree because
177 - * fiemap, bigallic, and seek_data/hole need to use it.
179 - if (!ext4_es_is_delayed(es)) {
180 - rb_erase(&es->rb_node, &tree->root);
181 - ext4_es_free_extent(inode, es);
183 - if (--nr_to_scan == 0)
186 + list_for_each_entry_safe(es, tmp, &tree->list, list) {
187 + rb_erase(&es->rb_node, &tree->root);
188 + list_move_tail(&es->list, freeable);
190 + if (--*nr_to_scan == 0)
193 tree->cache_es = NULL;
195 diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
196 index 0e6a33e..a45c7fe 100644
197 --- a/fs/ext4/extents_status.h
198 +++ b/fs/ext4/extents_status.h
199 @@ -54,6 +54,7 @@ struct ext4_extent;
201 struct extent_status {
202 struct rb_node rb_node;
203 + struct list_head list;
204 ext4_lblk_t es_lblk; /* first logical block extent covers */
205 ext4_lblk_t es_len; /* length of extent in block */
206 ext4_fsblk_t es_pblk; /* first physical block */
207 @@ -61,6 +62,7 @@ struct extent_status {
209 struct ext4_es_tree {
211 + struct list_head list;
212 struct extent_status *cache_es; /* recently accessed extent */