Add patch SR-ext4-resize-mark-new-group-EXT_BG_INODE_ZEROED.patch
[ext4-patch-queue/an.git] / aneesh-8-fix-double-free-of-blocks
blob540d4e7eb4fef68e518f9d814f5fdfe96feea61c
1 ext4: Fix double free of blocks
3 From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
5 Blocks freed but not yet committed will be marked free in disk bitmap.
6 We need to consider them as used when releasing inode prealloc
7 space. Otherwise we would double free them via mb_free_blocks
8 ext4_mb_release_inode_pa looks at the block bitmap and mark the
9 block found free in the bitmap withing the inode prealloc space
10 as unused. If we have blocks allocated out of a prealloc space
11 and later freed we would have called ext4_mb_free_blocks on them
12 That would mark blocks as free in bitmap and later on journal commit
13 we would call mb_free_blocks on them, thus resuling in double free
15 ext4_lock_group is used to make sure nothing gets added to the free
16 list when we are doing a release_inode_pa
18 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
19 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
20 ---
21  fs/ext4/mballoc.c |   75 ++++++++++++++++++++++++++++++++++-------------------
22  1 files changed, 48 insertions(+), 27 deletions(-)
24 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
25 index 7293209..f58de20 100644
26 --- a/fs/ext4/mballoc.c
27 +++ b/fs/ext4/mballoc.c
28 @@ -3725,19 +3725,19 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
29   * TODO: optimize the case when there are no in-core structures yet
30   */
31  static noinline_for_stack int
32 -ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
33 -                       struct ext4_prealloc_space *pa,
34 -                       struct ext4_allocation_context *ac)
35 +ext4_mb_release_inode_pa(struct ext4_buddy *e4b, void *bitmap,
36 +                        struct ext4_prealloc_space *pa,
37 +                        struct ext4_allocation_context *ac)
38  {
39 -       struct super_block *sb = e4b->bd_sb;
40 -       struct ext4_sb_info *sbi = EXT4_SB(sb);
41 +       int err = 0;
42 +       int free = 0;
43 +       sector_t start;
44         unsigned int end;
45         unsigned int next;
46         ext4_group_t group;
47         ext4_grpblk_t bit;
48 -       sector_t start;
49 -       int err = 0;
50 -       int free = 0;
51 +       struct super_block *sb = e4b->bd_sb;
52 +       struct ext4_sb_info *sbi = EXT4_SB(sb);
54         BUG_ON(pa->pa_deleted == 0);
55         ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
56 @@ -3749,12 +3749,11 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
57                 ac->ac_inode = pa->pa_inode;
58                 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
59         }
61         while (bit < end) {
62 -               bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
63 +               bit = mb_find_next_zero_bit(bitmap, end, bit);
64                 if (bit >= end)
65                         break;
66 -               next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
67 +               next = mb_find_next_bit(bitmap, end, bit);
68                 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
69                                 le32_to_cpu(sbi->s_es->s_first_data_block);
70                 mb_debug("    free preallocated %u/%u in group %u\n",
71 @@ -3773,18 +3772,12 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
72                 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
73                 bit = next + 1;
74         }
75 -       if (free != pa->pa_free) {
76 -               printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
77 -                       pa, (unsigned long) pa->pa_lstart,
78 -                       (unsigned long) pa->pa_pstart,
79 -                       (unsigned long) pa->pa_len);
80 -               ext4_error(sb, __func__, "free %u, pa_free %u\n",
81 -                                               free, pa->pa_free);
82 -               /*
83 -                * pa is already deleted so we use the value obtained
84 -                * from the bitmap and continue.
85 -                */
86 -       }
87 +       /*
88 +        * The blocks allocated and later freed from this pa
89 +        * can result in pa_free being different from the
90 +        * bitmap free block count. This is because we don't
91 +        * update pa_len on releasing blocks.
92 +        */
93         atomic_add(free, &sbi->s_mb_discarded);
95         return err;
96 @@ -3840,6 +3833,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
97         struct ext4_allocation_context *ac;
98         struct list_head list;
99         struct ext4_buddy e4b;
100 +       void *bitmap;
101         int err;
102         int busy = 0;
103         int free = 0;
104 @@ -3855,12 +3849,21 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
105                                 "bitmap for %u\n", group);
106                 return 0;
107         }
109 +       /*
110 +        * Blocks freed but not yet committed will be marked free in
111 +        * disk bitmap.  We need to consider them as used when
112 +        * releasing inode pa.  Otherwise we would double free them via
113 +        * mb_free_blocks
114 +        */
115 +       bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
116 +       if (!bitmap)
117 +               return 0;
118         err = ext4_mb_load_buddy(sb, group, &e4b);
119         if (err) {
120                 __release(e4b->alloc_semp);
121                 ext4_error(sb, __func__, "Error in loading buddy "
122                                 "information for %u\n", group);
123 +               kfree(bitmap);
124                 put_bh(bitmap_bh);
125                 return 0;
126         }
127 @@ -3915,6 +3918,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
128                 goto out;
129         }
131 +       memcpy(bitmap, bitmap_bh->b_data, sb->s_blocksize);
132 +       ext4_mb_generate_from_freelist(sb, bitmap, group);
133         /* now free all selected PAs */
134         list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
136 @@ -3926,7 +3931,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
137                 if (pa->pa_linear)
138                         ext4_mb_release_group_pa(&e4b, pa, ac);
139                 else
140 -                       ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
141 +                       ext4_mb_release_inode_pa(&e4b, bitmap, pa, ac);
143                 list_del(&pa->u.pa_tmp_list);
144                 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
145 @@ -3937,6 +3942,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
146         if (ac)
147                 kmem_cache_free(ext4_ac_cachep, ac);
148         ext4_mb_release_desc(&e4b);
149 +       kfree(bitmap);
150         put_bh(bitmap_bh);
151         return free;
153 @@ -3961,6 +3967,7 @@ void ext4_discard_preallocations(struct inode *inode)
154         struct list_head list;
155         struct ext4_buddy e4b;
156         int err;
157 +       void *bitmap;
159         if (!S_ISREG(inode->i_mode)) {
160                 /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
161 @@ -4039,14 +4046,28 @@ void ext4_discard_preallocations(struct inode *inode)
162                         ext4_mb_release_desc(&e4b);
163                         continue;
164                 }
166 +               /* blocks freed but not yet committed will
167 +                * be marked free in disk bitmap. We need to
168 +                * consider them as used when releasing inode
169 +                * pa. Otherwise we would double free them
170 +                * via mb_free_blocks
171 +                */
172 +               bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
173 +               if (!bitmap) {
174 +                       ext4_mb_release_desc(&e4b);
175 +                       put_bh(bitmap_bh);
176 +                       continue;
177 +               }
178 +               memcpy(bitmap, bitmap_bh->b_data, sb->s_blocksize);
179                 ext4_lock_group(sb, group);
180 +               ext4_mb_generate_from_freelist(sb, bitmap, group);
181                 list_del(&pa->pa_group_list);
182 -               ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
183 +               ext4_mb_release_inode_pa(&e4b, bitmap, pa, ac);
184                 ext4_unlock_group(sb, group);
186                 ext4_mb_release_desc(&e4b);
187                 put_bh(bitmap_bh);
188 +               kfree(bitmap);
190                 list_del(&pa->u.pa_tmp_list);
191                 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
192 -- 
193 1.6.0.3.514.g2f91b
196 To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
197 the body of a message to majordomo@vger.kernel.org
198 More majordomo info at  http://vger.kernel.org/majordomo-info.html