add jbd2 speedup patches
[ext4-patch-queue.git] / fix-resize-large-itable
blobe410f03d779344bf583e178e7175c312da3efd76
1 ext4: fix online resize with very large inode tables
3 If a file system has a large number of inodes per block group, all of
4 the metadata blocks in a flex_bg may be larger than what can fit in a
5 single block group.  Unfortunately, ext4_alloc_group_tables() in
6 resize.c was never tested to see if it would handle this case
7 correctly, and there were a large number of bugs which caused the
8 following sequence to result in a BUG_ON:
10 kernel bug at fs/ext4/resize.c:409!
11    ...
12 call trace:
13  [<ffffffff81256768>] ext4_flex_group_add+0x1448/0x1830
14  [<ffffffff81257de2>] ext4_resize_fs+0x7b2/0xe80
15  [<ffffffff8123ac50>] ext4_ioctl+0xbf0/0xf00
16  [<ffffffff811c111d>] do_vfs_ioctl+0x2dd/0x4b0
17  [<ffffffff811b9df2>] ? final_putname+0x22/0x50
18  [<ffffffff811c1371>] sys_ioctl+0x81/0xa0
19  [<ffffffff81676aa9>] system_call_fastpath+0x16/0x1b
20 code: c8 4c 89 df e8 41 96 f8 ff 44 89 e8 49 01 c4 44 29 6d d4 0
21 rip  [<ffffffff81254fa1>] set_flexbg_block_bitmap+0x171/0x180
24 This can be reproduced with the following command sequence:
26    mke2fs -t ext4 -i 4096 /dev/vdd 1G
27    mount -t ext4 /dev/vdd /vdd
28    resize2fs /dev/vdd 8G
30 To fix this, we need to make sure the right thing happens when a block
31 group's inode table straddles two block groups, which means the
32 following bugs had to be fixed:
34 1) Not clearing the BLOCK_UNINIT flag in the second block group in
35    ext4_alloc_group_tables --- the was proximate cause of the BUG_ON.
37 2) Incorrectly determining how many block groups contained contiguous
38    free blocks in ext4_alloc_group_tables().
40 3) Incorrectly setting the start of the next block range to be marked
41    in use after a discontinuity in setup_new_flex_group_blocks().
43 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
44 Cc: stable@vger.kernel.org
45 ---
46  fs/ext4/resize.c | 32 ++++++++++++++++++++------------
47  1 file changed, 20 insertions(+), 12 deletions(-)
49 diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
50 index c5adbb3..69a6261 100644
51 --- a/fs/ext4/resize.c
52 +++ b/fs/ext4/resize.c
53 @@ -243,6 +243,7 @@ static int ext4_alloc_group_tables(struct super_block *sb,
54         ext4_group_t group;
55         ext4_group_t last_group;
56         unsigned overhead;
57 +       __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0;
59         BUG_ON(flex_gd->count == 0 || group_data == NULL);
61 @@ -266,7 +267,7 @@ next_group:
62         src_group++;
63         for (; src_group <= last_group; src_group++) {
64                 overhead = ext4_group_overhead_blocks(sb, src_group);
65 -               if (overhead != 0)
66 +               if (overhead == 0)
67                         last_blk += group_data[src_group - group].blocks_count;
68                 else
69                         break;
70 @@ -280,8 +281,7 @@ next_group:
71                 group = ext4_get_group_number(sb, start_blk - 1);
72                 group -= group_data[0].group;
73                 group_data[group].free_blocks_count--;
74 -               if (flexbg_size > 1)
75 -                       flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
76 +               flex_gd->bg_flags[group] &= uninit_mask;
77         }
79         /* Allocate inode bitmaps */
80 @@ -292,22 +292,30 @@ next_group:
81                 group = ext4_get_group_number(sb, start_blk - 1);
82                 group -= group_data[0].group;
83                 group_data[group].free_blocks_count--;
84 -               if (flexbg_size > 1)
85 -                       flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
86 +               flex_gd->bg_flags[group] &= uninit_mask;
87         }
89         /* Allocate inode tables */
90         for (; it_index < flex_gd->count; it_index++) {
91 -               if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk)
92 +               unsigned int itb = EXT4_SB(sb)->s_itb_per_group;
93 +               ext4_fsblk_t next_group_start;
95 +               if (start_blk + itb > last_blk)
96                         goto next_group;
97                 group_data[it_index].inode_table = start_blk;
98 -               group = ext4_get_group_number(sb, start_blk - 1);
99 +               group = ext4_get_group_number(sb, start_blk);
100 +               next_group_start = ext4_group_first_block_no(sb, group + 1);
101                 group -= group_data[0].group;
102 -               group_data[group].free_blocks_count -=
103 -                                       EXT4_SB(sb)->s_itb_per_group;
104 -               if (flexbg_size > 1)
105 -                       flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
107 +               if (start_blk + itb > next_group_start) {
108 +                       flex_gd->bg_flags[group + 1] &= uninit_mask;
109 +                       overhead = start_blk + itb - next_group_start;
110 +                       group_data[group + 1].free_blocks_count -= overhead;
111 +                       itb -= overhead;
112 +               }
114 +               group_data[group].free_blocks_count -= itb;
115 +               flex_gd->bg_flags[group] &= uninit_mask;
116                 start_blk += EXT4_SB(sb)->s_itb_per_group;
117         }
119 @@ -620,7 +628,7 @@ handle_ib:
120                         if (err)
121                                 goto out;
122                         count = group_table_count[j];
123 -                       start = group_data[i].block_bitmap;
124 +                       start = (&group_data[i].block_bitmap)[j];
125                         block = start;
126                 }