edit commit description
[ext4-patch-queue.git] / use-a-garbage-collection-algorithm-to-manage-object
blobf05155db3d26107e1b2099a6f091ea5c25a21bc8
1 ext4: use a garbage collection algorithm to manage object
3 From: Zheng Liu <wenqing.lz@taobao.com>
5 For keeping useful extent cache in the tree, this commit uses a gc-like
6 algorithm to manage object.  A new flag called '_ACCESSED' is defined to
7 track whether an extent cache is touched or not.  When the shrinker tries
8 to reclaim some ojbects, an extent cache will be moved to the tail of
9 active list from inactive list if this flag is marked.  The object in
10 active list will be reclaimed when we are under a high memory pressure.
11 After that, the aged extent cache should be kept as many as possible.
13 Cc: Andreas Dilger <adilger.kernel@dilger.ca>
14 Cc: Jan Kara <jack@suse.cz>
15 Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
16 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
17 ---
18  fs/ext4/extents_status.c |   42 +++++++++++++++++++++++++++++++++---------
19  fs/ext4/extents_status.h |   31 ++++++++++++++++++++++++-------
20  2 files changed, 57 insertions(+), 16 deletions(-)
22 diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
23 index 2f81d1e..2f6bb538 100644
24 --- a/fs/ext4/extents_status.c
25 +++ b/fs/ext4/extents_status.c
26 @@ -149,7 +149,7 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
27                               ext4_lblk_t end);
28  static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
29                                        struct list_head *freeable,
30 -                                      int *nr_to_scan);
31 +                                      int *nr_to_scan, int force);
32  static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
33                        struct ext4_inode_info *locked_ei);
35 @@ -172,7 +172,8 @@ void ext4_exit_es(void)
36  void ext4_es_init_tree(struct ext4_es_tree *tree)
37  {
38         tree->root = RB_ROOT;
39 -       INIT_LIST_HEAD(&tree->list);
40 +       INIT_LIST_HEAD(&tree->active_list);
41 +       INIT_LIST_HEAD(&tree->inactive_list);
42         tree->cache_es = NULL;
43  }
45 @@ -317,7 +318,7 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
46          * We don't count delayed extent because we never try to reclaim them
47          */
48         if (!ext4_es_is_delayed(es)) {
49 -               list_add_tail(&es->list, &ei->i_es_tree.list);
50 +               list_add_tail(&es->list, &ei->i_es_tree.inactive_list);
51                 ei->i_es_shk_nr++;
52                 percpu_counter_inc(&EXT4_SB(inode->i_sb)->
53                                         s_es_stats.es_stats_shk_cnt);
54 @@ -787,6 +788,7 @@ out:
55         stats = &EXT4_SB(inode->i_sb)->s_es_stats;
56         if (found) {
57                 BUG_ON(!es1);
58 +               ext4_es_mark_accessed(es1);
59                 es->es_lblk = es1->es_lblk;
60                 es->es_len = es1->es_len;
61                 es->es_pblk = es1->es_pblk;
62 @@ -1027,7 +1029,7 @@ retry:
63                 }
65                 nr_shrunk += __es_try_to_reclaim_extents(ei, &freeable,
66 -                                                        &nr_to_scan);
67 +                                                        &nr_to_scan, retried);
68                 write_unlock(&ei->i_es_lock);
69                 dispose_list(&ei->vfs_inode, &freeable);
71 @@ -1048,7 +1050,7 @@ retry:
73         if (locked_ei && nr_shrunk == 0) {
74                 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, &freeable,
75 -                                                       &nr_to_scan);
76 +                                                       &nr_to_scan, 1);
77                 dispose_list(&locked_ei->vfs_inode, &freeable);
78         }
80 @@ -1247,7 +1249,7 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
82  static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
83                                        struct list_head *freeable,
84 -                                      int *nr_to_scan)
85 +                                      int *nr_to_scan, int force)
86  {
87         struct inode *inode = &ei->vfs_inode;
88         struct ext4_es_tree *tree = &ei->i_es_tree;
89 @@ -1263,13 +1265,35 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
90             __ratelimit(&_rs))
91                 ext4_warning(inode->i_sb, "forced shrink of precached extents");
93 -       list_for_each_entry_safe(es, tmp, &tree->list, list) {
94 +       list_for_each_entry_safe(es, tmp, &tree->inactive_list, list) {
95 +               if (!*nr_to_scan)
96 +                       goto done;
97 +               --*nr_to_scan;
99 +               if (ext4_es_is_accessed(es)) {
100 +                       list_move_tail(&es->list, &tree->active_list);
101 +                       continue;
102 +               } else {
103 +                       rb_erase(&es->rb_node, &tree->root);
104 +                       list_move_tail(&es->list, freeable);
105 +                       nr_shrunk++;
106 +               }
107 +       }
109 +       if (!force)
110 +               goto done;
112 +       list_for_each_entry_safe(es, tmp, &tree->active_list, list) {
113 +               if (!*nr_to_scan)
114 +                       goto done;
115 +               --*nr_to_scan;
117                 rb_erase(&es->rb_node, &tree->root);
118                 list_move_tail(&es->list, freeable);
119                 nr_shrunk++;
120 -               if (--*nr_to_scan == 0)
121 -                       break;
122         }
124 +done:
125         tree->cache_es = NULL;
126         return nr_shrunk;
128 diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
129 index a45c7fe..213e056 100644
130 --- a/fs/ext4/extents_status.h
131 +++ b/fs/ext4/extents_status.h
132 @@ -29,12 +29,12 @@
133  /*
134   * These flags live in the high bits of extent_status.es_pblk
135   */
136 -#define ES_SHIFT       60
137 +#define ES_SHIFT       59
139 -#define EXTENT_STATUS_WRITTEN  (1 << 3)
140 -#define EXTENT_STATUS_UNWRITTEN (1 << 2)
141 -#define EXTENT_STATUS_DELAYED  (1 << 1)
142 -#define EXTENT_STATUS_HOLE     (1 << 0)
143 +#define EXTENT_STATUS_WRITTEN  (1 << 4)
144 +#define EXTENT_STATUS_UNWRITTEN (1 << 3)
145 +#define EXTENT_STATUS_DELAYED  (1 << 2)
146 +#define EXTENT_STATUS_HOLE     (1 << 1)
148  #define EXTENT_STATUS_FLAGS    (EXTENT_STATUS_WRITTEN | \
149                                  EXTENT_STATUS_UNWRITTEN | \
150 @@ -45,9 +45,10 @@
151  #define ES_UNWRITTEN           (1ULL << 62)
152  #define ES_DELAYED             (1ULL << 61)
153  #define ES_HOLE                        (1ULL << 60)
154 +#define ES_ACCESSED            (1ULL << 59)
156  #define ES_MASK                        (ES_WRITTEN | ES_UNWRITTEN | \
157 -                                ES_DELAYED | ES_HOLE)
158 +                                ES_DELAYED | ES_HOLE | ES_ACCESSED)
160  struct ext4_sb_info;
161  struct ext4_extent;
162 @@ -62,7 +63,8 @@ struct extent_status {
164  struct ext4_es_tree {
165         struct rb_root root;
166 -       struct list_head list;
167 +       struct list_head active_list;
168 +       struct list_head inactive_list;
169         struct extent_status *cache_es; /* recently accessed extent */
170  };
172 @@ -114,6 +116,21 @@ static inline int ext4_es_is_hole(struct extent_status *es)
173         return (es->es_pblk & ES_HOLE) != 0;
176 +static inline int ext4_es_is_accessed(struct extent_status *es)
178 +       return (es->es_pblk & ES_ACCESSED) != 0;
181 +static inline void ext4_es_mark_accessed(struct extent_status *es)
183 +       es->es_pblk |= ES_ACCESSED;
186 +static inline void ext4_es_clear_accessed(struct extent_status *es)
188 +       es->es_pblk &= ~ES_ACCESSED;
191  static inline unsigned int ext4_es_status(struct extent_status *es)
193         return es->es_pblk >> ES_SHIFT;
194 -- 
195 1.7.9.7
198 To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
199 the body of a message to majordomo@vger.kernel.org
200 More majordomo info at  http://vger.kernel.org/majordomo-info.html