From 351d79916ce9d05daeb79751764dea914b9530f7 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 10 Apr 2014 22:59:05 -0400 Subject: [PATCH] Add fix-collapse-range-patches-in-data-journalling-mode Also add missing fix-jbd2-warning-under-heavy-xattr-load patch and update timestamps --- ...collapse-range-patches-in-data-journalling-mode | 47 ++++++++ fix-jbd2-warning-under-heavy-xattr-load | 131 +++++++++++++++++++++ series | 1 + 3 files changed, 179 insertions(+) create mode 100644 fix-collapse-range-patches-in-data-journalling-mode create mode 100644 fix-jbd2-warning-under-heavy-xattr-load diff --git a/fix-collapse-range-patches-in-data-journalling-mode b/fix-collapse-range-patches-in-data-journalling-mode new file mode 100644 index 00000000..4772b861 --- /dev/null +++ b/fix-collapse-range-patches-in-data-journalling-mode @@ -0,0 +1,47 @@ +ext4: fix COLLAPSE_RANGE test failure in data journalling mode + +From: Namjae Jeon + +When mounting ext4 with data=journal option, xfstest shared/002 and +shared/004 are currently failing as checksum computed for testfile +does not match with the checksum computed in other journal modes. +In case of data=journal mode, a call to filemap_write_and_wait_range +will not flush anything to disk as buffers are not marked dirty in +write_end. In collapse range this call is followed by a call to +truncate_pagecache_range. Due to this, when checksum is computed, +a portion of file is re-read from disk which replace valid data with +NULL bytes and hence the reason for the difference in checksum. + +Calling ext4_force_commit before filemap_write_and_wait_range solves +the issue as it will mark the buffers dirty during commit transaction +which can be later synced by a call to filemap_write_and_wait_range. + +Signed-off-by: Namjae Jeon +Signed-off-by: Ashish Sangwan +Signed-off-by: "Theodore Ts'o" +--- + fs/ext4/extents.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index 243a02e..dc9333f 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -5375,6 +5375,13 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) + punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); + punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); + ++ /* Call ext4_force_commit to flush all data in case of data=journal. */ ++ if (ext4_should_journal_data(inode)) { ++ ret = ext4_force_commit(inode->i_sb); ++ if (ret) ++ return ret; ++ } ++ + /* Write out all dirty pages */ + ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1); + if (ret) +-- +1.7.9.5 + + diff --git a/fix-jbd2-warning-under-heavy-xattr-load b/fix-jbd2-warning-under-heavy-xattr-load new file mode 100644 index 00000000..928d971c --- /dev/null +++ b/fix-jbd2-warning-under-heavy-xattr-load @@ -0,0 +1,131 @@ +ext4: fix jbd2 warning under heavy xattr load + +From: Jan Kara + +When heavily exercising xattr code the assertion that +jbd2_journal_dirty_metadata() shouldn't return error was triggered: + +WARNING: at /srv/autobuild-ceph/gitbuilder.git/build/fs/jbd2/transaction.c:1237 +jbd2_journal_dirty_metadata+0x1ba/0x260() + +CPU: 0 PID: 8877 Comm: ceph-osd Tainted: G W 3.10.0-ceph-00049-g68d04c9 #1 +Hardware name: Dell Inc. PowerEdge R410/01V648, BIOS 1.6.3 02/07/2011 + ffffffff81a1d3c8 ffff880214469928 ffffffff816311b0 ffff880214469968 + ffffffff8103fae0 ffff880214469958 ffff880170a9dc30 ffff8802240fbe80 + 0000000000000000 ffff88020b366000 ffff8802256e7510 ffff880214469978 +Call Trace: + [] dump_stack+0x19/0x1b + [] warn_slowpath_common+0x70/0xa0 + [] warn_slowpath_null+0x1a/0x20 + [] jbd2_journal_dirty_metadata+0x1ba/0x260 + [] __ext4_handle_dirty_metadata+0xa3/0x140 + [] ext4_xattr_release_block+0x103/0x1f0 + [] ext4_xattr_block_set+0x1e0/0x910 + [] ext4_xattr_set_handle+0x38b/0x4a0 + [] ? trace_hardirqs_on+0xd/0x10 + [] ext4_xattr_set+0xc2/0x140 + [] ext4_xattr_user_set+0x47/0x50 + [] generic_setxattr+0x6e/0x90 + [] __vfs_setxattr_noperm+0x7b/0x1c0 + [] vfs_setxattr+0xc4/0xd0 + [] setxattr+0x13e/0x1e0 + [] ? __sb_start_write+0xe7/0x1b0 + [] ? mnt_want_write_file+0x28/0x60 + [] ? fget_light+0x3c/0x130 + [] ? mnt_want_write_file+0x28/0x60 + [] ? __mnt_want_write+0x58/0x70 + [] SyS_fsetxattr+0xbe/0x100 + [] system_call_fastpath+0x16/0x1b + +The reason for the warning is that buffer_head passed into +jbd2_journal_dirty_metadata() didn't have journal_head attached. This is +caused by the following race of two ext4_xattr_release_block() calls: + +CPU1 CPU2 +ext4_xattr_release_block() ext4_xattr_release_block() +lock_buffer(bh); +/* False */ +if (BHDR(bh)->h_refcount == cpu_to_le32(1)) +} else { + le32_add_cpu(&BHDR(bh)->h_refcount, -1); + unlock_buffer(bh); + lock_buffer(bh); + /* True */ + if (BHDR(bh)->h_refcount == cpu_to_le32(1)) + get_bh(bh); + ext4_free_blocks() + ... + jbd2_journal_forget() + jbd2_journal_unfile_buffer() + -> JH is gone + error = ext4_handle_dirty_xattr_block(handle, inode, bh); + -> triggers the warning + +We fix the problem by moving ext4_handle_dirty_xattr_block() under the +buffer lock. Sadly this cannot be done in nojournal mode as that +function can call sync_dirty_buffer() which would deadlock. Luckily in +nojournal mode the race is harmless (we only dirty already freed buffer) +and thus for nojournal mode we leave the dirtying outside of the buffer +lock. + +Reported-by: Sage Weil +Signed-off-by: Jan Kara +Signed-off-by: "Theodore Ts'o" +Cc: stable@vger.kernel.org +--- + fs/ext4/xattr.c | 23 +++++++++++++++++++---- + 1 file changed, 19 insertions(+), 4 deletions(-) + +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index e175e94116ac..55e611c1513c 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -517,8 +517,8 @@ static void ext4_xattr_update_super_block(handle_t *handle, + } + + /* +- * Release the xattr block BH: If the reference count is > 1, decrement +- * it; otherwise free the block. ++ * Release the xattr block BH: If the reference count is > 1, decrement it; ++ * otherwise free the block. + */ + static void + ext4_xattr_release_block(handle_t *handle, struct inode *inode, +@@ -538,16 +538,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, + if (ce) + mb_cache_entry_free(ce); + get_bh(bh); ++ unlock_buffer(bh); + ext4_free_blocks(handle, inode, bh, 0, 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); +- unlock_buffer(bh); + } else { + le32_add_cpu(&BHDR(bh)->h_refcount, -1); + if (ce) + mb_cache_entry_release(ce); ++ /* ++ * Beware of this ugliness: Releasing of xattr block references ++ * from different inodes can race and so we have to protect ++ * from a race where someone else frees the block (and releases ++ * its journal_head) before we are done dirtying the buffer. In ++ * nojournal mode this race is harmless and we actually cannot ++ * call ext4_handle_dirty_xattr_block() with locked buffer as ++ * that function can call sync_dirty_buffer() so for that case ++ * we handle the dirtying after unlocking the buffer. ++ */ ++ if (ext4_handle_valid(handle)) ++ error = ext4_handle_dirty_xattr_block(handle, inode, ++ bh); + unlock_buffer(bh); +- error = ext4_handle_dirty_xattr_block(handle, inode, bh); ++ if (!ext4_handle_valid(handle)) ++ error = ext4_handle_dirty_xattr_block(handle, inode, ++ bh); + if (IS_SYNC(inode)) + ext4_handle_sync(handle); + dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1)); +-- +1.8.1.4 + + diff --git a/series b/series index fcdfe28d..0e3d3724 100644 --- a/series +++ b/series @@ -6,6 +6,7 @@ initialize-multi-block-allocator-before-checking-block-descriptors note-the-error-in-ext4_end_bio fix-jbd2-warning-under-heavy-xattr-load update-PF_MEMALLOC-handling-in-ext4_write_inode +fix-collapse-range-patches-in-data-journalling-mode ########################################## # unstable patches -- 2.11.4.GIT