1 ext4: use a garbage collection algorithm to manage object
3 From: Zheng Liu <wenqing.lz@taobao.com>
5 For keeping useful extent cache in the tree, this commit uses a gc-like
6 algorithm to manage object. A new flag called '_ACCESSED' is defined to
7 track whether an extent cache is touched or not. When the shrinker tries
8 to reclaim some ojbects, an extent cache will be moved to the tail of
9 active list from inactive list if this flag is marked. The object in
10 active list will be reclaimed when we are under a high memory pressure.
11 After that, the aged extent cache should be kept as many as possible.
13 Cc: Andreas Dilger <adilger.kernel@dilger.ca>
14 Cc: Jan Kara <jack@suse.cz>
15 Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
16 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
18 fs/ext4/extents_status.c | 42 +++++++++++++++++++++++++++++++++---------
19 fs/ext4/extents_status.h | 31 ++++++++++++++++++++++++-------
20 2 files changed, 57 insertions(+), 16 deletions(-)
22 diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
23 index 2f81d1e..2f6bb538 100644
24 --- a/fs/ext4/extents_status.c
25 +++ b/fs/ext4/extents_status.c
26 @@ -149,7 +149,7 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
28 static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
29 struct list_head *freeable,
31 + int *nr_to_scan, int force);
32 static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
33 struct ext4_inode_info *locked_ei);
35 @@ -172,7 +172,8 @@ void ext4_exit_es(void)
36 void ext4_es_init_tree(struct ext4_es_tree *tree)
39 - INIT_LIST_HEAD(&tree->list);
40 + INIT_LIST_HEAD(&tree->active_list);
41 + INIT_LIST_HEAD(&tree->inactive_list);
42 tree->cache_es = NULL;
45 @@ -317,7 +318,7 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
46 * We don't count delayed extent because we never try to reclaim them
48 if (!ext4_es_is_delayed(es)) {
49 - list_add_tail(&es->list, &ei->i_es_tree.list);
50 + list_add_tail(&es->list, &ei->i_es_tree.inactive_list);
52 percpu_counter_inc(&EXT4_SB(inode->i_sb)->
53 s_es_stats.es_stats_shk_cnt);
54 @@ -787,6 +788,7 @@ out:
55 stats = &EXT4_SB(inode->i_sb)->s_es_stats;
58 + ext4_es_mark_accessed(es1);
59 es->es_lblk = es1->es_lblk;
60 es->es_len = es1->es_len;
61 es->es_pblk = es1->es_pblk;
62 @@ -1027,7 +1029,7 @@ retry:
65 nr_shrunk += __es_try_to_reclaim_extents(ei, &freeable,
67 + &nr_to_scan, retried);
68 write_unlock(&ei->i_es_lock);
69 dispose_list(&ei->vfs_inode, &freeable);
71 @@ -1048,7 +1050,7 @@ retry:
73 if (locked_ei && nr_shrunk == 0) {
74 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, &freeable,
77 dispose_list(&locked_ei->vfs_inode, &freeable);
80 @@ -1247,7 +1249,7 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
82 static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
83 struct list_head *freeable,
85 + int *nr_to_scan, int force)
87 struct inode *inode = &ei->vfs_inode;
88 struct ext4_es_tree *tree = &ei->i_es_tree;
89 @@ -1263,13 +1265,35 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
91 ext4_warning(inode->i_sb, "forced shrink of precached extents");
93 - list_for_each_entry_safe(es, tmp, &tree->list, list) {
94 + list_for_each_entry_safe(es, tmp, &tree->inactive_list, list) {
99 + if (ext4_es_is_accessed(es)) {
100 + list_move_tail(&es->list, &tree->active_list);
103 + rb_erase(&es->rb_node, &tree->root);
104 + list_move_tail(&es->list, freeable);
112 + list_for_each_entry_safe(es, tmp, &tree->active_list, list) {
117 rb_erase(&es->rb_node, &tree->root);
118 list_move_tail(&es->list, freeable);
120 - if (--*nr_to_scan == 0)
125 tree->cache_es = NULL;
128 diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
129 index a45c7fe..213e056 100644
130 --- a/fs/ext4/extents_status.h
131 +++ b/fs/ext4/extents_status.h
134 * These flags live in the high bits of extent_status.es_pblk
139 -#define EXTENT_STATUS_WRITTEN (1 << 3)
140 -#define EXTENT_STATUS_UNWRITTEN (1 << 2)
141 -#define EXTENT_STATUS_DELAYED (1 << 1)
142 -#define EXTENT_STATUS_HOLE (1 << 0)
143 +#define EXTENT_STATUS_WRITTEN (1 << 4)
144 +#define EXTENT_STATUS_UNWRITTEN (1 << 3)
145 +#define EXTENT_STATUS_DELAYED (1 << 2)
146 +#define EXTENT_STATUS_HOLE (1 << 1)
148 #define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
149 EXTENT_STATUS_UNWRITTEN | \
151 #define ES_UNWRITTEN (1ULL << 62)
152 #define ES_DELAYED (1ULL << 61)
153 #define ES_HOLE (1ULL << 60)
154 +#define ES_ACCESSED (1ULL << 59)
156 #define ES_MASK (ES_WRITTEN | ES_UNWRITTEN | \
157 - ES_DELAYED | ES_HOLE)
158 + ES_DELAYED | ES_HOLE | ES_ACCESSED)
162 @@ -62,7 +63,8 @@ struct extent_status {
164 struct ext4_es_tree {
166 - struct list_head list;
167 + struct list_head active_list;
168 + struct list_head inactive_list;
169 struct extent_status *cache_es; /* recently accessed extent */
172 @@ -114,6 +116,21 @@ static inline int ext4_es_is_hole(struct extent_status *es)
173 return (es->es_pblk & ES_HOLE) != 0;
176 +static inline int ext4_es_is_accessed(struct extent_status *es)
178 + return (es->es_pblk & ES_ACCESSED) != 0;
181 +static inline void ext4_es_mark_accessed(struct extent_status *es)
183 + es->es_pblk |= ES_ACCESSED;
186 +static inline void ext4_es_clear_accessed(struct extent_status *es)
188 + es->es_pblk &= ~ES_ACCESSED;
191 static inline unsigned int ext4_es_status(struct extent_status *es)
193 return es->es_pblk >> ES_SHIFT;
198 To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
199 the body of a message to majordomo@vger.kernel.org
200 More majordomo info at http://vger.kernel.org/majordomo-info.html