Add patch SR-ext4-resize-mark-new-group-EXT_BG_INODE_ZEROED.patch
[ext4-patch-queue/an.git] / aneesh-2-add-blocks-added-during-resize-to-bitmap
blobd40945acf539f034f94c25abe8a03fb955e51ea1
1 ext4: Add blocks added during resize to bitmap
3 From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
5 With this change new blocks added during resize
6 are marked as free in the block bitmap and the
7 group is flagged with EXT4_GROUP_INFO_NEED_INIT_BIT
8 flag. This make sure when mballoc tries to allocate
9 blocks from the new group we would reload the
10 buddy information using the bitmap present in the disk.
12 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
13 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
14 ---
15  fs/ext4/balloc.c |  136 ++++++++++++------------------------------------------
16  fs/ext4/ext4.h   |    5 +-
17  fs/ext4/resize.c |   11 +----
18  3 files changed, 34 insertions(+), 118 deletions(-)
20 diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
21 index 2295f3e..fa69cb3 100644
22 --- a/fs/ext4/balloc.c
23 +++ b/fs/ext4/balloc.c
24 @@ -20,6 +20,7 @@
25  #include "ext4.h"
26  #include "ext4_jbd2.h"
27  #include "group.h"
28 +#include "mballoc.h"
30  /*
31   * balloc.c contains the blocks allocation and deallocation routines
32 @@ -350,62 +351,43 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
33  }
35  /**
36 - * ext4_free_blocks_sb() -- Free given blocks and update quota
37 + * ext4_add_groupblocks() -- Add given blocks to an existing group
38   * @handle:                    handle to this transaction
39   * @sb:                                super block
40 - * @block:                     start physcial block to free
41 + * @block:                     start physcial block to add to the block group
42   * @count:                     number of blocks to free
43 - * @pdquot_freed_blocks:       pointer to quota
44   *
45 - * XXX This function is only used by the on-line resizing code, which
46 - * should probably be fixed up to call the mballoc variant.  There
47 - * this needs to be cleaned up later; in fact, I'm not convinced this
48 - * is 100% correct in the face of the mballoc code.  The online resizing
49 - * code needs to be fixed up to more tightly (and correctly) interlock
50 - * with the mballoc code.
51 + * This marks the blocks as free in the bitmap. We ask the
52 + * mballoc to reload the buddy after this by setting group
53 + * EXT4_GROUP_INFO_NEED_INIT_BIT flag
54   */
55 -void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
56 -                        ext4_fsblk_t block, unsigned long count,
57 -                        unsigned long *pdquot_freed_blocks)
58 +void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
59 +                        ext4_fsblk_t block, unsigned long count)
60  {
61         struct buffer_head *bitmap_bh = NULL;
62         struct buffer_head *gd_bh;
63         ext4_group_t block_group;
64         ext4_grpblk_t bit;
65         unsigned int i;
66 -       unsigned int overflow;
67         struct ext4_group_desc *desc;
68         struct ext4_super_block *es;
69         struct ext4_sb_info *sbi;
70         int err = 0, ret;
71 -       ext4_grpblk_t group_freed;
72 +       ext4_grpblk_t blocks_freed;
73 +       struct ext4_group_info *grp;
75 -       *pdquot_freed_blocks = 0;
76         sbi = EXT4_SB(sb);
77         es = sbi->s_es;
78 -       if (block < le32_to_cpu(es->s_first_data_block) ||
79 -           block + count < block ||
80 -           block + count > ext4_blocks_count(es)) {
81 -               ext4_error(sb, "ext4_free_blocks",
82 -                          "Freeing blocks not in datazone - "
83 -                          "block = %llu, count = %lu", block, count);
84 -               goto error_return;
85 -       }
87 -       ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1);
88 +       ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
90 -do_more:
91 -       overflow = 0;
92         ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
93         /*
94          * Check to see if we are freeing blocks across a group
95          * boundary.
96          */
97         if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
98 -               overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
99 -               count -= overflow;
100 +               goto error_return;
101         }
102 -       brelse(bitmap_bh);
103         bitmap_bh = ext4_read_block_bitmap(sb, block_group);
104         if (!bitmap_bh)
105                 goto error_return;
106 @@ -418,18 +400,17 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
107             in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
108             in_range(block + count - 1, ext4_inode_table(sb, desc),
109                      sbi->s_itb_per_group)) {
110 -               ext4_error(sb, "ext4_free_blocks",
111 -                          "Freeing blocks in system zones - "
112 +               ext4_error(sb, __func__,
113 +                          "Adding blocks in system zones - "
114                            "Block = %llu, count = %lu",
115                            block, count);
116                 goto error_return;
117         }
119         /*
120 -        * We are about to start releasing blocks in the bitmap,
121 +        * We are about to add blocks to the bitmap,
122          * so we need undo access.
123          */
124 -       /* @@@ check errors */
125         BUFFER_TRACE(bitmap_bh, "getting undo access");
126         err = ext4_journal_get_undo_access(handle, bitmap_bh);
127         if (err)
128 @@ -445,87 +426,28 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
129         if (err)
130                 goto error_return;
132 -       jbd_lock_bh_state(bitmap_bh);
134 -       for (i = 0, group_freed = 0; i < count; i++) {
135 -               /*
136 -                * An HJ special.  This is expensive...
137 -                */
138 -#ifdef CONFIG_JBD2_DEBUG
139 -               jbd_unlock_bh_state(bitmap_bh);
140 -               {
141 -                       struct buffer_head *debug_bh;
142 -                       debug_bh = sb_find_get_block(sb, block + i);
143 -                       if (debug_bh) {
144 -                               BUFFER_TRACE(debug_bh, "Deleted!");
145 -                               if (!bh2jh(bitmap_bh)->b_committed_data)
146 -                                       BUFFER_TRACE(debug_bh,
147 -                                               "No commited data in bitmap");
148 -                               BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
149 -                               __brelse(debug_bh);
150 -                       }
151 -               }
152 -               jbd_lock_bh_state(bitmap_bh);
153 -#endif
154 -               if (need_resched()) {
155 -                       jbd_unlock_bh_state(bitmap_bh);
156 -                       cond_resched();
157 -                       jbd_lock_bh_state(bitmap_bh);
158 -               }
159 -               /* @@@ This prevents newly-allocated data from being
160 -                * freed and then reallocated within the same
161 -                * transaction.
162 -                *
163 -                * Ideally we would want to allow that to happen, but to
164 -                * do so requires making jbd2_journal_forget() capable of
165 -                * revoking the queued write of a data block, which
166 -                * implies blocking on the journal lock.  *forget()
167 -                * cannot block due to truncate races.
168 -                *
169 -                * Eventually we can fix this by making jbd2_journal_forget()
170 -                * return a status indicating whether or not it was able
171 -                * to revoke the buffer.  On successful revoke, it is
172 -                * safe not to set the allocation bit in the committed
173 -                * bitmap, because we know that there is no outstanding
174 -                * activity on the buffer any more and so it is safe to
175 -                * reallocate it.
176 -                */
177 -               BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
178 -               J_ASSERT_BH(bitmap_bh,
179 -                               bh2jh(bitmap_bh)->b_committed_data != NULL);
180 -               ext4_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
181 -                               bh2jh(bitmap_bh)->b_committed_data);
183 -               /*
184 -                * We clear the bit in the bitmap after setting the committed
185 -                * data bit, because this is the reverse order to that which
186 -                * the allocator uses.
187 -                */
188 +       for (i = 0, blocks_freed = 0; i < count; i++) {
189                 BUFFER_TRACE(bitmap_bh, "clear bit");
190                 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
191                                                 bit + i, bitmap_bh->b_data)) {
192 -                       jbd_unlock_bh_state(bitmap_bh);
193                         ext4_error(sb, __func__,
194                                    "bit already cleared for block %llu",
195                                    (ext4_fsblk_t)(block + i));
196 -                       jbd_lock_bh_state(bitmap_bh);
197                         BUFFER_TRACE(bitmap_bh, "bit already cleared");
198                 } else {
199 -                       group_freed++;
200 +                       blocks_freed++;
201                 }
202         }
203 -       jbd_unlock_bh_state(bitmap_bh);
205         spin_lock(sb_bgl_lock(sbi, block_group));
206 -       le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
207 +       le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed);
208         desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
209         spin_unlock(sb_bgl_lock(sbi, block_group));
210 -       percpu_counter_add(&sbi->s_freeblocks_counter, count);
211 +       percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
213         if (sbi->s_log_groups_per_flex) {
214                 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
215                 spin_lock(sb_bgl_lock(sbi, flex_group));
216 -               sbi->s_flex_groups[flex_group].free_blocks += count;
217 +               sbi->s_flex_groups[flex_group].free_blocks += blocks_freed;
218                 spin_unlock(sb_bgl_lock(sbi, flex_group));
219         }
221 @@ -536,15 +458,17 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
222         /* And the group descriptor block */
223         BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
224         ret = ext4_journal_dirty_metadata(handle, gd_bh);
225 -       if (!err) err = ret;
226 -       *pdquot_freed_blocks += group_freed;
228 -       if (overflow && !err) {
229 -               block += count;
230 -               count = overflow;
231 -               goto do_more;
232 -       }
233 +       if (!err)
234 +               err = ret;
235         sb->s_dirt = 1;
236 +       /*
237 +        * request to reload the buddy with the
238 +        * new bitmap information
239 +        */
240 +       grp = ext4_get_group_info(sb, block_group);
241 +       set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
242 +       ext4_mb_update_group_info(grp, blocks_freed);
244  error_return:
245         brelse(bitmap_bh);
246         ext4_std_error(sb, err);
247 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
248 index c05d639..4514fa5 100644
249 --- a/fs/ext4/ext4.h
250 +++ b/fs/ext4/ext4.h
251 @@ -1012,9 +1012,8 @@ extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
252  extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
253  extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
254                         ext4_fsblk_t block, unsigned long count, int metadata);
255 -extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
256 -                               ext4_fsblk_t block, unsigned long count,
257 -                               unsigned long *pdquot_freed_blocks);
258 +extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
259 +                               ext4_fsblk_t block, unsigned long count);
260  extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
261  extern void ext4_check_blocks_bitmap(struct super_block *);
262  extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
263 diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
264 index 498ccbc..b6a0609 100644
265 --- a/fs/ext4/resize.c
266 +++ b/fs/ext4/resize.c
267 @@ -975,9 +975,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
268         struct buffer_head *bh;
269         handle_t *handle;
270         int err;
271 -       unsigned long freed_blocks;
272         ext4_group_t group;
273 -       struct ext4_group_info *grp;
275         /* We don't need to worry about locking wrt other resizers just
276          * yet: we're going to revalidate es->s_blocks_count after
277 @@ -1076,7 +1074,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
278         unlock_super(sb);
279         ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
280                    o_blocks_count + add);
281 -       ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
282 +       /* We add the blocks to the bitmap and set the group need init bit */
283 +       ext4_add_groupblocks(handle, sb, o_blocks_count, add);
284         ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
285                    o_blocks_count + add);
286         if ((err = ext4_journal_stop(handle)))
287 @@ -1119,12 +1118,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
288                         ClearPageUptodate(page);
289                         page_cache_release(page);
290                 }
292 -               /* Get the info on the last group */
293 -               grp = ext4_get_group_info(sb, group);
295 -               /* Update free blocks in group info */
296 -               ext4_mb_update_group_info(grp, add);
297         }
299         if (test_opt(sb, DEBUG))
300 -- 
301 1.6.0.3.514.g2f91b
304 To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
305 the body of a message to majordomo@vger.kernel.org
306 More majordomo info at  http://vger.kernel.org/majordomo-info.html