From 254fd2bfde77059f86ea43061e069a12dbdbf982 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 15 Feb 2014 22:38:32 -0500 Subject: [PATCH] add commit fix-resize-large-itable --- fix-resize-large-itable | 127 ++++++++++++++++++++++++++++++++++++++++++++++++ series | 1 + 2 files changed, 128 insertions(+) create mode 100644 fix-resize-large-itable diff --git a/fix-resize-large-itable b/fix-resize-large-itable new file mode 100644 index 00000000..e410f03d --- /dev/null +++ b/fix-resize-large-itable @@ -0,0 +1,127 @@ +ext4: fix online resize with very large inode tables + +If a file system has a large number of inodes per block group, all of +the metadata blocks in a flex_bg may be larger than what can fit in a +single block group. Unfortunately, ext4_alloc_group_tables() in +resize.c was never tested to see if it would handle this case +correctly, and there were a large number of bugs which caused the +following sequence to result in a BUG_ON: + +kernel bug at fs/ext4/resize.c:409! + ... +call trace: + [] ext4_flex_group_add+0x1448/0x1830 + [] ext4_resize_fs+0x7b2/0xe80 + [] ext4_ioctl+0xbf0/0xf00 + [] do_vfs_ioctl+0x2dd/0x4b0 + [] ? final_putname+0x22/0x50 + [] sys_ioctl+0x81/0xa0 + [] system_call_fastpath+0x16/0x1b +code: c8 4c 89 df e8 41 96 f8 ff 44 89 e8 49 01 c4 44 29 6d d4 0 +rip [] set_flexbg_block_bitmap+0x171/0x180 + + +This can be reproduced with the following command sequence: + + mke2fs -t ext4 -i 4096 /dev/vdd 1G + mount -t ext4 /dev/vdd /vdd + resize2fs /dev/vdd 8G + +To fix this, we need to make sure the right thing happens when a block +group's inode table straddles two block groups, which means the +following bugs had to be fixed: + +1) Not clearing the BLOCK_UNINIT flag in the second block group in + ext4_alloc_group_tables --- the was proximate cause of the BUG_ON. + +2) Incorrectly determining how many block groups contained contiguous + free blocks in ext4_alloc_group_tables(). + +3) Incorrectly setting the start of the next block range to be marked + in use after a discontinuity in setup_new_flex_group_blocks(). + +Signed-off-by: "Theodore Ts'o" +Cc: stable@vger.kernel.org +--- + fs/ext4/resize.c | 32 ++++++++++++++++++++------------ + 1 file changed, 20 insertions(+), 12 deletions(-) + +diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c +index c5adbb3..69a6261 100644 +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -243,6 +243,7 @@ static int ext4_alloc_group_tables(struct super_block *sb, + ext4_group_t group; + ext4_group_t last_group; + unsigned overhead; ++ __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0; + + BUG_ON(flex_gd->count == 0 || group_data == NULL); + +@@ -266,7 +267,7 @@ next_group: + src_group++; + for (; src_group <= last_group; src_group++) { + overhead = ext4_group_overhead_blocks(sb, src_group); +- if (overhead != 0) ++ if (overhead == 0) + last_blk += group_data[src_group - group].blocks_count; + else + break; +@@ -280,8 +281,7 @@ next_group: + group = ext4_get_group_number(sb, start_blk - 1); + group -= group_data[0].group; + group_data[group].free_blocks_count--; +- if (flexbg_size > 1) +- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; ++ flex_gd->bg_flags[group] &= uninit_mask; + } + + /* Allocate inode bitmaps */ +@@ -292,22 +292,30 @@ next_group: + group = ext4_get_group_number(sb, start_blk - 1); + group -= group_data[0].group; + group_data[group].free_blocks_count--; +- if (flexbg_size > 1) +- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; ++ flex_gd->bg_flags[group] &= uninit_mask; + } + + /* Allocate inode tables */ + for (; it_index < flex_gd->count; it_index++) { +- if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk) ++ unsigned int itb = EXT4_SB(sb)->s_itb_per_group; ++ ext4_fsblk_t next_group_start; ++ ++ if (start_blk + itb > last_blk) + goto next_group; + group_data[it_index].inode_table = start_blk; +- group = ext4_get_group_number(sb, start_blk - 1); ++ group = ext4_get_group_number(sb, start_blk); ++ next_group_start = ext4_group_first_block_no(sb, group + 1); + group -= group_data[0].group; +- group_data[group].free_blocks_count -= +- EXT4_SB(sb)->s_itb_per_group; +- if (flexbg_size > 1) +- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; + ++ if (start_blk + itb > next_group_start) { ++ flex_gd->bg_flags[group + 1] &= uninit_mask; ++ overhead = start_blk + itb - next_group_start; ++ group_data[group + 1].free_blocks_count -= overhead; ++ itb -= overhead; ++ } ++ ++ group_data[group].free_blocks_count -= itb; ++ flex_gd->bg_flags[group] &= uninit_mask; + start_blk += EXT4_SB(sb)->s_itb_per_group; + } + +@@ -620,7 +628,7 @@ handle_ib: + if (err) + goto out; + count = group_table_count[j]; +- start = group_data[i].block_bitmap; ++ start = (&group_data[i].block_bitmap)[j]; + block = start; + } + diff --git a/series b/series index 038a4d0f..77a5d3e2 100644 --- a/series +++ b/series @@ -8,6 +8,7 @@ cleanup-error-handling-in-swap_inode_boot_loader dont-try-to-set-hash-signedness-flag-if-fs-is-ro remove-an-unneeded-check-in-mext_page_mkuptodate jbd2-mark-file-local-functions-as-static +fix-resize-large-itable ########################################## # unstable patches -- 2.11.4.GIT