From a0e46d328558732026b6dc8d48549a00c2657c31 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 30 Oct 2017 08:33:18 -0400 Subject: [PATCH] add patch add-support-for-online-resizing-with-bigalloc --- add-support-for-online-resizing-with-bigalloc | 427 ++++++++++++++++++++++++++ series | 2 + timestamps | 7 +- 3 files changed, 433 insertions(+), 3 deletions(-) create mode 100644 add-support-for-online-resizing-with-bigalloc diff --git a/add-support-for-online-resizing-with-bigalloc b/add-support-for-online-resizing-with-bigalloc new file mode 100644 index 00000000..29b1e1b1 --- /dev/null +++ b/add-support-for-online-resizing-with-bigalloc @@ -0,0 +1,427 @@ +ext4: add support for online resizing with bigalloc + +From: harshads + +This patch adds support for online resizing on bigalloc file system by +implementing EXT4_IOC_RESIZE_FS ioctl. Old resize interfaces (add +block groups and extend last block group) are left untouched. Tests +performed with cluster sizes of 1, 2, 4 and 8 blocks (of size 4k) per +cluster. I will add these tests to xfstests. + +Signed-off-by: Harshad Shirwadkar +Signed-off-by: Theodore Ts'o +--- + fs/ext4/ext4.h | 4 +-- + fs/ext4/ioctl.c | 6 ---- + fs/ext4/mballoc.c | 28 ++++++++------- + fs/ext4/resize.c | 104 ++++++++++++++++++++++++++++++++++-------------------- + 4 files changed, 84 insertions(+), 58 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index b84aa1ca480a..53dcfd808567 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -534,8 +534,8 @@ struct ext4_new_group_data { + __u64 inode_table; + __u32 blocks_count; + __u16 reserved_blocks; +- __u16 unused; +- __u32 free_blocks_count; ++ __u16 mdata_blocks; ++ __u32 free_clusters_count; + }; + + /* Indexes used to index group tables in ext4_new_group_data */ +diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c +index 28cc412852af..e165b06767ff 100644 +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -702,12 +702,6 @@ group_add_out: + int err = 0, err2 = 0; + ext4_group_t o_group = EXT4_SB(sb)->s_groups_count; + +- if (ext4_has_feature_bigalloc(sb)) { +- ext4_msg(sb, KERN_ERR, +- "Online resizing not (yet) supported with bigalloc"); +- return -EOPNOTSUPP; +- } +- + if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, + sizeof(__u64))) { + return -EFAULT; +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index c1ab3ec30423..b3922e1c7eec 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -4925,8 +4925,11 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, + struct ext4_group_desc *desc; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_buddy e4b; +- int err = 0, ret, blk_free_count; +- ext4_grpblk_t blocks_freed; ++ int err = 0, ret, free_clusters_count; ++ ext4_grpblk_t clusters_freed; ++ ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block); ++ ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1); ++ unsigned long cluster_count = last_cluster - first_cluster + 1; + + ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); + +@@ -4938,8 +4941,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, + * Check to see if we are freeing blocks across a group + * boundary. + */ +- if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { +- ext4_warning(sb, "too much blocks added to group %u", ++ if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) { ++ ext4_warning(sb, "too many blocks added to group %u", + block_group); + err = -EINVAL; + goto error_return; +@@ -4985,14 +4988,14 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, + if (err) + goto error_return; + +- for (i = 0, blocks_freed = 0; i < count; i++) { ++ for (i = 0, clusters_freed = 0; i < cluster_count; i++) { + BUFFER_TRACE(bitmap_bh, "clear bit"); + if (!mb_test_bit(bit + i, bitmap_bh->b_data)) { + ext4_error(sb, "bit already cleared for block %llu", + (ext4_fsblk_t)(block + i)); + BUFFER_TRACE(bitmap_bh, "bit already cleared"); + } else { +- blocks_freed++; ++ clusters_freed++; + } + } + +@@ -5006,19 +5009,20 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, + * them with group lock_held + */ + ext4_lock_group(sb, block_group); +- mb_clear_bits(bitmap_bh->b_data, bit, count); +- mb_free_blocks(NULL, &e4b, bit, count); +- blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc); +- ext4_free_group_clusters_set(sb, desc, blk_free_count); ++ mb_clear_bits(bitmap_bh->b_data, bit, cluster_count); ++ mb_free_blocks(NULL, &e4b, bit, cluster_count); ++ free_clusters_count = clusters_freed + ++ ext4_free_group_clusters(sb, desc); ++ ext4_free_group_clusters_set(sb, desc, free_clusters_count); + ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh); + ext4_group_desc_csum_set(sb, block_group, desc); + ext4_unlock_group(sb, block_group); + percpu_counter_add(&sbi->s_freeclusters_counter, +- EXT4_NUM_B2C(sbi, blocks_freed)); ++ clusters_freed); + + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, block_group); +- atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), ++ atomic64_add(clusters_freed, + &sbi->s_flex_groups[flex_group].free_clusters); + } + +diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c +index cf681004b196..b3c8c7be8838 100644 +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -105,7 +105,7 @@ static int verify_group_input(struct super_block *sb, + + overhead = ext4_group_overhead_blocks(sb, group); + metaend = start + overhead; +- input->free_blocks_count = free_blocks_count = ++ input->free_clusters_count = free_blocks_count = + input->blocks_count - 2 - overhead - sbi->s_itb_per_group; + + if (test_opt(sb, DEBUG)) +@@ -256,6 +256,7 @@ static int ext4_alloc_group_tables(struct super_block *sb, + ext4_group_t last_group; + unsigned overhead; + __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0; ++ int i; + + BUG_ON(flex_gd->count == 0 || group_data == NULL); + +@@ -292,7 +293,7 @@ next_group: + group_data[bb_index].block_bitmap = start_blk++; + group = ext4_get_group_number(sb, start_blk - 1); + group -= group_data[0].group; +- group_data[group].free_blocks_count--; ++ group_data[group].mdata_blocks++; + flex_gd->bg_flags[group] &= uninit_mask; + } + +@@ -303,7 +304,7 @@ next_group: + group_data[ib_index].inode_bitmap = start_blk++; + group = ext4_get_group_number(sb, start_blk - 1); + group -= group_data[0].group; +- group_data[group].free_blocks_count--; ++ group_data[group].mdata_blocks++; + flex_gd->bg_flags[group] &= uninit_mask; + } + +@@ -322,15 +323,22 @@ next_group: + if (start_blk + itb > next_group_start) { + flex_gd->bg_flags[group + 1] &= uninit_mask; + overhead = start_blk + itb - next_group_start; +- group_data[group + 1].free_blocks_count -= overhead; ++ group_data[group + 1].mdata_blocks += overhead; + itb -= overhead; + } + +- group_data[group].free_blocks_count -= itb; ++ group_data[group].mdata_blocks += itb; + flex_gd->bg_flags[group] &= uninit_mask; + start_blk += EXT4_SB(sb)->s_itb_per_group; + } + ++ /* Update free clusters count to exclude metadata blocks */ ++ for (i = 0; i < flex_gd->count; i++) { ++ group_data[i].free_clusters_count -= ++ EXT4_NUM_B2C(EXT4_SB(sb), ++ group_data[i].mdata_blocks); ++ } ++ + if (test_opt(sb, DEBUG)) { + int i; + group = group_data[0].group; +@@ -340,12 +348,13 @@ next_group: + flexbg_size); + + for (i = 0; i < flex_gd->count; i++) { +- printk(KERN_DEBUG "adding %s group %u: %u " +- "blocks (%d free)\n", ++ ext4_debug( ++ "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n", + ext4_bg_has_super(sb, group + i) ? "normal" : + "no-super", group + i, + group_data[i].blocks_count, +- group_data[i].free_blocks_count); ++ group_data[i].free_clusters_count, ++ group_data[i].mdata_blocks); + } + } + return 0; +@@ -397,7 +406,7 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh) + } + + /* +- * set_flexbg_block_bitmap() mark @count blocks starting from @block used. ++ * set_flexbg_block_bitmap() mark clusters [@first_cluster, @last_cluster] used. + * + * Helper function for ext4_setup_new_group_blocks() which set . + * +@@ -407,22 +416,26 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh) + */ + static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, + struct ext4_new_flex_group_data *flex_gd, +- ext4_fsblk_t block, ext4_group_t count) ++ ext4_fsblk_t first_cluster, ext4_fsblk_t last_cluster) + { ++ struct ext4_sb_info *sbi = EXT4_SB(sb); ++ ext4_group_t count = last_cluster - first_cluster + 1; + ext4_group_t count2; + +- ext4_debug("mark blocks [%llu/%u] used\n", block, count); +- for (count2 = count; count > 0; count -= count2, block += count2) { ++ ext4_debug("mark clusters [%llu-%llu] used\n", first_cluster, ++ last_cluster); ++ for (count2 = count; count > 0; ++ count -= count2, first_cluster += count2) { + ext4_fsblk_t start; + struct buffer_head *bh; + ext4_group_t group; + int err; + +- group = ext4_get_group_number(sb, block); +- start = ext4_group_first_block_no(sb, group); ++ group = ext4_get_group_number(sb, EXT4_C2B(sbi, first_cluster)); ++ start = EXT4_B2C(sbi, ext4_group_first_block_no(sb, group)); + group -= flex_gd->groups[0].group; + +- count2 = EXT4_BLOCKS_PER_GROUP(sb) - (block - start); ++ count2 = EXT4_CLUSTERS_PER_GROUP(sb) - (first_cluster - start); + if (count2 > count) + count2 = count; + +@@ -443,9 +456,9 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, + err = ext4_journal_get_write_access(handle, bh); + if (err) + return err; +- ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block, +- block - start, count2); +- ext4_set_bits(bh->b_data, block - start, count2); ++ ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", ++ first_cluster, first_cluster - start, count2); ++ ext4_set_bits(bh->b_data, first_cluster - start, count2); + + err = ext4_handle_dirty_metadata(handle, NULL, bh); + if (unlikely(err)) +@@ -594,9 +607,10 @@ handle_bb: + if (overhead != 0) { + ext4_debug("mark backup superblock %#04llx (+0)\n", + start); +- ext4_set_bits(bh->b_data, 0, overhead); ++ ext4_set_bits(bh->b_data, 0, ++ EXT4_NUM_B2C(sbi, overhead)); + } +- ext4_mark_bitmap_end(group_data[i].blocks_count, ++ ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count), + sb->s_blocksize * 8, bh->b_data); + err = ext4_handle_dirty_metadata(handle, NULL, bh); + if (err) +@@ -641,7 +655,11 @@ handle_ib: + continue; + } + err = set_flexbg_block_bitmap(sb, handle, +- flex_gd, start, count); ++ flex_gd, ++ EXT4_B2C(sbi, start), ++ EXT4_B2C(sbi, ++ start + count ++ - 1)); + if (err) + goto out; + count = group_table_count[j]; +@@ -651,7 +669,11 @@ handle_ib: + + if (count) { + err = set_flexbg_block_bitmap(sb, handle, +- flex_gd, start, count); ++ flex_gd, ++ EXT4_B2C(sbi, start), ++ EXT4_B2C(sbi, ++ start + count ++ - 1)); + if (err) + goto out; + } +@@ -839,7 +861,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, + ext4_std_error(sb, err); + goto exit_inode; + } +- inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; ++ inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> ++ (9 - EXT4_SB(sb)->s_cluster_bits); + ext4_mark_iloc_dirty(handle, inode, &iloc); + memset(gdb_bh->b_data, 0, sb->s_blocksize); + err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); +@@ -934,6 +957,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, + { + struct super_block *sb = inode->i_sb; + int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); ++ int cluster_bits = EXT4_SB(sb)->s_cluster_bits; + struct buffer_head **primary; + struct buffer_head *dind; + struct ext4_iloc iloc; +@@ -1009,7 +1033,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, + if (!err) + err = err2; + } +- inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9; ++ ++ inode->i_blocks += reserved_gdb * sb->s_blocksize >> (9 - cluster_bits); + ext4_mark_iloc_dirty(handle, inode, &iloc); + + exit_bh: +@@ -1243,7 +1268,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, + ext4_group_t group; + __u16 *bg_flags = flex_gd->bg_flags; + int i, gdb_off, gdb_num, err = 0; +- ++ + + for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) { + group = group_data->group; +@@ -1270,7 +1295,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, + + ext4_inode_table_set(sb, gdp, group_data->inode_table); + ext4_free_group_clusters_set(sb, gdp, +- EXT4_NUM_B2C(sbi, group_data->free_blocks_count)); ++ group_data->free_clusters_count); + ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); + if (ext4_has_group_desc_csum(sb)) + ext4_itable_unused_set(sb, gdp, +@@ -1326,7 +1351,7 @@ static void ext4_update_super(struct super_block *sb, + */ + for (i = 0; i < flex_gd->count; i++) { + blocks_count += group_data[i].blocks_count; +- free_blocks += group_data[i].free_blocks_count; ++ free_blocks += EXT4_C2B(sbi, group_data[i].free_clusters_count); + } + + reserved_blocks = ext4_r_blocks_count(es) * 100; +@@ -1498,17 +1523,18 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, + ext4_fsblk_t n_blocks_count, + unsigned long flexbg_size) + { +- struct ext4_super_block *es = EXT4_SB(sb)->s_es; ++ struct ext4_sb_info *sbi = EXT4_SB(sb); ++ struct ext4_super_block *es = sbi->s_es; + struct ext4_new_group_data *group_data = flex_gd->groups; + ext4_fsblk_t o_blocks_count; + ext4_group_t n_group; + ext4_group_t group; + ext4_group_t last_group; + ext4_grpblk_t last; +- ext4_grpblk_t blocks_per_group; ++ ext4_grpblk_t clusters_per_group; + unsigned long i; + +- blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb); ++ clusters_per_group = EXT4_CLUSTERS_PER_GROUP(sb); + + o_blocks_count = ext4_blocks_count(es); + +@@ -1529,9 +1555,10 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, + int overhead; + + group_data[i].group = group + i; +- group_data[i].blocks_count = blocks_per_group; ++ group_data[i].blocks_count = EXT4_BLOCKS_PER_GROUP(sb); + overhead = ext4_group_overhead_blocks(sb, group + i); +- group_data[i].free_blocks_count = blocks_per_group - overhead; ++ group_data[i].mdata_blocks = overhead; ++ group_data[i].free_clusters_count = EXT4_CLUSTERS_PER_GROUP(sb); + if (ext4_has_group_desc_csum(sb)) { + flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | + EXT4_BG_INODE_UNINIT; +@@ -1545,10 +1572,10 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, + /* We need to initialize block bitmap of last group. */ + flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; + +- if ((last_group == n_group) && (last != blocks_per_group - 1)) { +- group_data[i - 1].blocks_count = last + 1; +- group_data[i - 1].free_blocks_count -= blocks_per_group- +- last - 1; ++ if ((last_group == n_group) && (last != clusters_per_group - 1)) { ++ group_data[i - 1].blocks_count = EXT4_C2B(sbi, last + 1); ++ group_data[i - 1].free_clusters_count -= clusters_per_group - ++ last - 1; + } + + return 1; +@@ -1795,7 +1822,8 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) + } + + /* Do a quick sanity check of the resize inode */ +- if (inode->i_blocks != 1 << (inode->i_blkbits - 9)) ++ if (inode->i_blocks != 1 << (inode->i_blkbits - ++ (9 - sbi->s_cluster_bits))) + goto invalid_resize_inode; + for (i = 0; i < EXT4_N_BLOCKS; i++) { + if (i == EXT4_DIND_BLOCK) { +@@ -1957,7 +1985,7 @@ retry: + if (n_group == o_group) + add = n_blocks_count - o_blocks_count; + else +- add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1); ++ add = EXT4_C2B(sbi, EXT4_CLUSTERS_PER_GROUP(sb) - (offset + 1)); + if (add > 0) { + err = ext4_group_extend_no_check(sb, o_blocks_count, add); + if (err) +-- +2.15.0.rc0.271.g36b669edcc-goog + + diff --git a/series b/series index d03b395d..5413b937 100644 --- a/series +++ b/series @@ -18,6 +18,8 @@ ext4-convert-timers-to-use-timer-setup fix-little-inconsistencies mention-noload-when-recovering-on-ro-device +add-support-for-online-resizing-with-bigalloc + #################################################### # unstable patches #################################################### diff --git a/timestamps b/timestamps index 51d27c39..bc3e8785 100755 --- a/timestamps +++ b/timestamps @@ -48,6 +48,7 @@ touch -d @1508344828 jbd2-convert-timers-to-use-timer_setup touch -d @1508345117 ext4-convert-timers-to-use-timer-setup touch -d @1508345435 fix-little-inconsistencies touch -d @1508346397 mention-noload-when-recovering-on-ro-device -touch -d @1508346487 series -touch -d @1508346495 status -touch -d @1508346499 timestamps +touch -d @1509284326 add-support-for-online-resizing-with-bigalloc +touch -d @1509285516 series +touch -d @1509289773 status +touch -d @1509366790 timestamps -- 2.11.4.GIT