1 ext4: don't use blocks freed but not yet committed in buddy cache init
3 From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
5 When we generate buddy cache (especially during resize) we need to
6 make sure we don't use the blocks freed but not yet comitted. This
7 makes sure we have the right value of free blocks count in the group
8 info and also in the bitmap. This also ensures the ordered mode
11 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
12 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
14 fs/ext4/mballoc.c | 83 ++++++++++++++++++++++++++++++++++++++--------------
15 1 files changed, 60 insertions(+), 23 deletions(-)
17 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
18 index f4753bd..7293209 100644
19 --- a/fs/ext4/mballoc.c
20 +++ b/fs/ext4/mballoc.c
22 static struct kmem_cache *ext4_free_ext_cachep;
23 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
25 +static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
26 + ext4_group_t group);
27 static int ext4_mb_init_per_dev_proc(struct super_block *sb);
28 static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
29 static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
30 @@ -858,7 +860,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
32 * incore got set to the group block bitmap below
34 + ext4_lock_group(sb, group);
35 ext4_mb_generate_buddy(sb, data, incore, group);
36 + ext4_unlock_group(sb, group);
39 /* this is block of bitmap */
40 @@ -872,6 +876,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
42 /* mark all preallocated blks used in in-core bitmap */
43 ext4_mb_generate_from_pa(sb, data, group);
44 + ext4_mb_generate_from_freelist(sb, data, group);
45 ext4_unlock_group(sb, group);
47 /* set incore so that the buddy information can be
48 @@ -3428,6 +3433,32 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
52 + * the function goes through all block freed in the group
53 + * but not yet committed and marks them used in in-core bitmap.
54 + * buddy must be generated from this bitmap
55 + * Need to be called with ext4 group lock (ext4_lock_group)
57 +static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
61 + struct ext4_group_info *grp;
62 + struct ext4_free_data *entry;
64 + grp = ext4_get_group_info(sb, group);
65 + n = rb_first(&(grp->bb_free_root));
68 + entry = rb_entry(n, struct ext4_free_data, node);
69 + mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
70 + bitmap, entry->start_blk,
78 * the function goes through all preallocation in this group and marks them
79 * used in in-core bitmap. buddy must be generated from this bitmap
80 * Need to be called with ext4 group lock (ext4_lock_group)
81 @@ -4527,27 +4558,22 @@ static int can_merge(struct ext4_free_data *entry1,
83 static noinline_for_stack int
84 ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
85 - ext4_group_t group, ext4_grpblk_t block, int count)
86 + struct ext4_free_data *new_entry)
88 + ext4_grpblk_t block;
89 + struct ext4_free_data *entry;
90 struct ext4_group_info *db = e4b->bd_info;
91 struct super_block *sb = e4b->bd_sb;
92 struct ext4_sb_info *sbi = EXT4_SB(sb);
93 - struct ext4_free_data *entry, *new_entry;
94 struct rb_node **n = &db->bb_free_root.rb_node, *node;
95 struct rb_node *parent = NULL, *new_node;
98 BUG_ON(e4b->bd_bitmap_page == NULL);
99 BUG_ON(e4b->bd_buddy_page == NULL);
101 - new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
102 - new_entry->start_blk = block;
103 - new_entry->group = group;
104 - new_entry->count = count;
105 - new_entry->t_tid = handle->h_transaction->t_tid;
106 new_node = &new_entry->node;
107 + block = new_entry->start_blk;
109 - ext4_lock_group(sb, group);
111 /* first free block exent. We need to
112 protect buddy cache from being freed,
113 @@ -4565,7 +4591,6 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
114 else if (block >= (entry->start_blk + entry->count))
117 - ext4_unlock_group(sb, group);
118 ext4_error(sb, __func__,
119 "Double free of blocks %d (%d %d)\n",
120 block, entry->start_blk, entry->count);
121 @@ -4607,7 +4632,6 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
122 spin_lock(&sbi->s_md_lock);
123 list_add(&new_entry->list, &handle->h_transaction->t_private_list);
124 spin_unlock(&sbi->s_md_lock);
125 - ext4_unlock_group(sb, group);
129 @@ -4712,15 +4736,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
130 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
133 - mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
136 - /* We dirtied the bitmap block */
137 - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
138 - err = ext4_journal_dirty_metadata(handle, bitmap_bh);
143 ac->ac_b_ex.fe_group = block_group;
144 ac->ac_b_ex.fe_start = bit;
145 @@ -4734,11 +4749,29 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
149 - /* blocks being freed are metadata. these blocks shouldn't
150 - * be used until this transaction is committed */
151 - ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
152 + struct ext4_free_data *new_entry;
154 + * blocks being freed are metadata. these blocks shouldn't
155 + * be used until this transaction is committed
157 + new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
158 + new_entry->start_blk = bit;
159 + new_entry->group = block_group;
160 + new_entry->count = count;
161 + new_entry->t_tid = handle->h_transaction->t_tid;
162 + ext4_lock_group(sb, block_group);
163 + mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
165 + ext4_mb_free_metadata(handle, &e4b, new_entry);
166 + ext4_unlock_group(sb, block_group);
168 ext4_lock_group(sb, block_group);
169 + /* need to update group_info->bb_free and bitmap
170 + * with group lock held. generate_buddy look at
171 + * them with group lock_held
173 + mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
175 mb_free_blocks(inode, &e4b, bit, count);
176 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
177 ext4_unlock_group(sb, block_group);
178 @@ -4761,6 +4794,10 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
182 + /* We dirtied the bitmap block */
183 + BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
184 + err = ext4_journal_dirty_metadata(handle, bitmap_bh);
186 /* And the group descriptor block */
187 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
188 ret = ext4_journal_dirty_metadata(handle, gd_bh);
193 To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
194 the body of a message to majordomo@vger.kernel.org
195 More majordomo info at http://vger.kernel.org/majordomo-info.html