Add patch SR-ext4-resize-mark-new-group-EXT_BG_INODE_ZEROED.patch
[ext4-patch-queue/an.git] / aneesh-7-dont-use-blocks-freed-but-not-yet-committed-in-buddy-cache-init
blobb13a477cb16e2047e5e68b40719884f169340ace
1 ext4: don't use blocks freed but not yet committed in buddy cache init
3 From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
5 When we generate buddy cache (especially during resize) we need to
6 make sure we don't use the blocks freed but not yet comitted.  This
7 makes sure we have the right value of free blocks count in the group
8 info and also in the bitmap.  This also ensures the ordered mode
9 consistency
11 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
12 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
13 ---
14  fs/ext4/mballoc.c |   83 ++++++++++++++++++++++++++++++++++++++--------------
15  1 files changed, 60 insertions(+), 23 deletions(-)
17 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
18 index f4753bd..7293209 100644
19 --- a/fs/ext4/mballoc.c
20 +++ b/fs/ext4/mballoc.c
21 @@ -335,6 +335,8 @@
22  static struct kmem_cache *ext4_free_ext_cachep;
23  static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
24                                         ext4_group_t group);
25 +static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
26 +                                               ext4_group_t group);
27  static int ext4_mb_init_per_dev_proc(struct super_block *sb);
28  static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
29  static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
30 @@ -858,7 +860,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
31                         /*
32                          * incore got set to the group block bitmap below
33                          */
34 +                       ext4_lock_group(sb, group);
35                         ext4_mb_generate_buddy(sb, data, incore, group);
36 +                       ext4_unlock_group(sb, group);
37                         incore = NULL;
38                 } else {
39                         /* this is block of bitmap */
40 @@ -872,6 +876,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
42                         /* mark all preallocated blks used in in-core bitmap */
43                         ext4_mb_generate_from_pa(sb, data, group);
44 +                       ext4_mb_generate_from_freelist(sb, data, group);
45                         ext4_unlock_group(sb, group);
47                         /* set incore so that the buddy information can be
48 @@ -3428,6 +3433,32 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
49  }
51  /*
52 + * the function goes through all block freed in the group
53 + * but not yet committed and marks them used in in-core bitmap.
54 + * buddy must be generated from this bitmap
55 + * Need to be called with ext4 group lock (ext4_lock_group)
56 + */
57 +static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
58 +                                               ext4_group_t group)
60 +       struct rb_node *n;
61 +       struct ext4_group_info *grp;
62 +       struct ext4_free_data *entry;
64 +       grp = ext4_get_group_info(sb, group);
65 +       n = rb_first(&(grp->bb_free_root));
67 +       while (n) {
68 +               entry = rb_entry(n, struct ext4_free_data, node);
69 +               mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
70 +                               bitmap, entry->start_blk,
71 +                               entry->count);
72 +               n = rb_next(n);
73 +       }
74 +       return;
77 +/*
78   * the function goes through all preallocation in this group and marks them
79   * used in in-core bitmap. buddy must be generated from this bitmap
80   * Need to be called with ext4 group lock (ext4_lock_group)
81 @@ -4527,27 +4558,22 @@ static int can_merge(struct ext4_free_data *entry1,
83  static noinline_for_stack int
84  ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
85 -                         ext4_group_t group, ext4_grpblk_t block, int count)
86 +                       struct ext4_free_data *new_entry)
87  {
88 +       ext4_grpblk_t block;
89 +       struct ext4_free_data *entry;
90         struct ext4_group_info *db = e4b->bd_info;
91         struct super_block *sb = e4b->bd_sb;
92         struct ext4_sb_info *sbi = EXT4_SB(sb);
93 -       struct ext4_free_data *entry, *new_entry;
94         struct rb_node **n = &db->bb_free_root.rb_node, *node;
95         struct rb_node *parent = NULL, *new_node;
98         BUG_ON(e4b->bd_bitmap_page == NULL);
99         BUG_ON(e4b->bd_buddy_page == NULL);
101 -       new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
102 -       new_entry->start_blk = block;
103 -       new_entry->group  = group;
104 -       new_entry->count = count;
105 -       new_entry->t_tid = handle->h_transaction->t_tid;
106         new_node = &new_entry->node;
107 +       block = new_entry->start_blk;
109 -       ext4_lock_group(sb, group);
110         if (!*n) {
111                 /* first free block exent. We need to
112                    protect buddy cache from being freed,
113 @@ -4565,7 +4591,6 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
114                 else if (block >= (entry->start_blk + entry->count))
115                         n = &(*n)->rb_right;
116                 else {
117 -                       ext4_unlock_group(sb, group);
118                         ext4_error(sb, __func__,
119                             "Double free of blocks %d (%d %d)\n",
120                             block, entry->start_blk, entry->count);
121 @@ -4607,7 +4632,6 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
122         spin_lock(&sbi->s_md_lock);
123         list_add(&new_entry->list, &handle->h_transaction->t_private_list);
124         spin_unlock(&sbi->s_md_lock);
125 -       ext4_unlock_group(sb, group);
126         return 0;
129 @@ -4712,15 +4736,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
130                         BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
131         }
132  #endif
133 -       mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
134 -                       bit, count);
136 -       /* We dirtied the bitmap block */
137 -       BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
138 -       err = ext4_journal_dirty_metadata(handle, bitmap_bh);
139 -       if (err)
140 -               goto error_return;
142         if (ac) {
143                 ac->ac_b_ex.fe_group = block_group;
144                 ac->ac_b_ex.fe_start = bit;
145 @@ -4734,11 +4749,29 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
146                 goto error_return;
147         }
148         if (metadata) {
149 -               /* blocks being freed are metadata. these blocks shouldn't
150 -                * be used until this transaction is committed */
151 -               ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
152 +               struct ext4_free_data *new_entry;
153 +               /*
154 +                * blocks being freed are metadata. these blocks shouldn't
155 +                * be used until this transaction is committed
156 +                */
157 +               new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
158 +               new_entry->start_blk = bit;
159 +               new_entry->group  = block_group;
160 +               new_entry->count = count;
161 +               new_entry->t_tid = handle->h_transaction->t_tid;
162 +               ext4_lock_group(sb, block_group);
163 +               mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
164 +                               bit, count);
165 +               ext4_mb_free_metadata(handle, &e4b, new_entry);
166 +               ext4_unlock_group(sb, block_group);
167         } else {
168                 ext4_lock_group(sb, block_group);
169 +               /* need to update group_info->bb_free and bitmap
170 +                * with group lock held. generate_buddy look at
171 +                * them with group lock_held
172 +                */
173 +               mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
174 +                               bit, count);
175                 mb_free_blocks(inode, &e4b, bit, count);
176                 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
177                 ext4_unlock_group(sb, block_group);
178 @@ -4761,6 +4794,10 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
180         *freed += count;
182 +       /* We dirtied the bitmap block */
183 +       BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
184 +       err = ext4_journal_dirty_metadata(handle, bitmap_bh);
186         /* And the group descriptor block */
187         BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
188         ret = ext4_journal_dirty_metadata(handle, gd_bh);
189 -- 
190 1.6.0.3.514.g2f91b
193 To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
194 the body of a message to majordomo@vger.kernel.org
195 More majordomo info at  http://vger.kernel.org/majordomo-info.html