From 351d79916ce9d05daeb79751764dea914b9530f7 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 10 Apr 2014 22:59:05 -0400
Subject: [PATCH] Add fix-collapse-range-patches-in-data-journalling-mode

Also add missing fix-jbd2-warning-under-heavy-xattr-load patch and
update timestamps
---
 ...collapse-range-patches-in-data-journalling-mode |  47 ++++++++
 fix-jbd2-warning-under-heavy-xattr-load            | 131 +++++++++++++++++++++
 series                                             |   1 +
 3 files changed, 179 insertions(+)
 create mode 100644 fix-collapse-range-patches-in-data-journalling-mode
 create mode 100644 fix-jbd2-warning-under-heavy-xattr-load
diff --git a/fix-collapse-range-patches-in-data-journalling-mode b/fix-collapse-range-patches-in-data-journalling-mode
new file mode 100644
index 00000000..4772b861
--- /dev/null
+++ b/fix-collapse-range-patches-in-data-journalling-mode
@@ -0,0 +1,47 @@
+ext4: fix COLLAPSE_RANGE test failure in data journalling mode
+
+From: Namjae Jeon <namjae.jeon@samsung.com>
+
+When mounting ext4 with data=journal option, xfstest shared/002 and
+shared/004 are currently failing as checksum computed for testfile
+does not match with the checksum computed in other journal modes.
+In case of data=journal mode, a call to filemap_write_and_wait_range
+will not flush anything to disk as buffers are not marked dirty in
+write_end. In collapse range this call is followed by a call to
+truncate_pagecache_range. Due to this, when checksum is computed,
+a portion of file is re-read from disk which replace valid data with
+NULL bytes and hence the reason for the difference in checksum.
+
+Calling ext4_force_commit before filemap_write_and_wait_range solves
+the issue as it will mark the buffers dirty during commit transaction
+which can be later synced by a call to filemap_write_and_wait_range.
+
+Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
+Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+---
+ fs/ext4/extents.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 243a02e..dc9333f 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -5375,6 +5375,13 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+ 	punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
+ 	punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
+ 
++	/* Call ext4_force_commit to flush all data in case of data=journal. */
++	if (ext4_should_journal_data(inode)) {
++		ret = ext4_force_commit(inode->i_sb);
++		if (ret)
++			return ret;
++	}
++
+ 	/* Write out all dirty pages */
+ 	ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1);
+ 	if (ret)
+-- 
+1.7.9.5
+
+
diff --git a/fix-jbd2-warning-under-heavy-xattr-load b/fix-jbd2-warning-under-heavy-xattr-load
new file mode 100644
index 00000000..928d971c
--- /dev/null
+++ b/fix-jbd2-warning-under-heavy-xattr-load
@@ -0,0 +1,131 @@
+ext4: fix jbd2 warning under heavy xattr load
+
+From: Jan Kara <jack@suse.cz>
+
+When heavily exercising xattr code the assertion that
+jbd2_journal_dirty_metadata() shouldn't return error was triggered:
+
+WARNING: at /srv/autobuild-ceph/gitbuilder.git/build/fs/jbd2/transaction.c:1237
+jbd2_journal_dirty_metadata+0x1ba/0x260()
+
+CPU: 0 PID: 8877 Comm: ceph-osd Tainted: G    W 3.10.0-ceph-00049-g68d04c9 #1
+Hardware name: Dell Inc. PowerEdge R410/01V648, BIOS 1.6.3 02/07/2011
+ ffffffff81a1d3c8 ffff880214469928 ffffffff816311b0 ffff880214469968
+ ffffffff8103fae0 ffff880214469958 ffff880170a9dc30 ffff8802240fbe80
+ 0000000000000000 ffff88020b366000 ffff8802256e7510 ffff880214469978
+Call Trace:
+ [<ffffffff816311b0>] dump_stack+0x19/0x1b
+ [<ffffffff8103fae0>] warn_slowpath_common+0x70/0xa0
+ [<ffffffff8103fb2a>] warn_slowpath_null+0x1a/0x20
+ [<ffffffff81267c2a>] jbd2_journal_dirty_metadata+0x1ba/0x260
+ [<ffffffff81245093>] __ext4_handle_dirty_metadata+0xa3/0x140
+ [<ffffffff812561f3>] ext4_xattr_release_block+0x103/0x1f0
+ [<ffffffff81256680>] ext4_xattr_block_set+0x1e0/0x910
+ [<ffffffff8125795b>] ext4_xattr_set_handle+0x38b/0x4a0
+ [<ffffffff810a319d>] ? trace_hardirqs_on+0xd/0x10
+ [<ffffffff81257b32>] ext4_xattr_set+0xc2/0x140
+ [<ffffffff81258547>] ext4_xattr_user_set+0x47/0x50
+ [<ffffffff811935ce>] generic_setxattr+0x6e/0x90
+ [<ffffffff81193ecb>] __vfs_setxattr_noperm+0x7b/0x1c0
+ [<ffffffff811940d4>] vfs_setxattr+0xc4/0xd0
+ [<ffffffff8119421e>] setxattr+0x13e/0x1e0
+ [<ffffffff811719c7>] ? __sb_start_write+0xe7/0x1b0
+ [<ffffffff8118f2e8>] ? mnt_want_write_file+0x28/0x60
+ [<ffffffff8118c65c>] ? fget_light+0x3c/0x130
+ [<ffffffff8118f2e8>] ? mnt_want_write_file+0x28/0x60
+ [<ffffffff8118f1f8>] ? __mnt_want_write+0x58/0x70
+ [<ffffffff811946be>] SyS_fsetxattr+0xbe/0x100
+ [<ffffffff816407c2>] system_call_fastpath+0x16/0x1b
+
+The reason for the warning is that buffer_head passed into
+jbd2_journal_dirty_metadata() didn't have journal_head attached. This is
+caused by the following race of two ext4_xattr_release_block() calls:
+
+CPU1                                CPU2
+ext4_xattr_release_block()          ext4_xattr_release_block()
+lock_buffer(bh);
+/* False */
+if (BHDR(bh)->h_refcount == cpu_to_le32(1))
+} else {
+  le32_add_cpu(&BHDR(bh)->h_refcount, -1);
+  unlock_buffer(bh);
+                                    lock_buffer(bh);
+                                    /* True */
+                                    if (BHDR(bh)->h_refcount == cpu_to_le32(1))
+                                      get_bh(bh);
+                                      ext4_free_blocks()
+                                        ...
+                                        jbd2_journal_forget()
+                                          jbd2_journal_unfile_buffer()
+                                          -> JH is gone
+  error = ext4_handle_dirty_xattr_block(handle, inode, bh);
+  -> triggers the warning
+
+We fix the problem by moving ext4_handle_dirty_xattr_block() under the
+buffer lock. Sadly this cannot be done in nojournal mode as that
+function can call sync_dirty_buffer() which would deadlock. Luckily in
+nojournal mode the race is harmless (we only dirty already freed buffer)
+and thus for nojournal mode we leave the dirtying outside of the buffer
+lock.
+
+Reported-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: stable@vger.kernel.org
+---
+ fs/ext4/xattr.c | 23 +++++++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index e175e94116ac..55e611c1513c 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -517,8 +517,8 @@ static void ext4_xattr_update_super_block(handle_t *handle,
+ }
+ 
+ /*
+- * Release the xattr block BH: If the reference count is > 1, decrement
+- * it; otherwise free the block.
++ * Release the xattr block BH: If the reference count is > 1, decrement it;
++ * otherwise free the block.
+  */
+ static void
+ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+@@ -538,16 +538,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+ 		if (ce)
+ 			mb_cache_entry_free(ce);
+ 		get_bh(bh);
++		unlock_buffer(bh);
+ 		ext4_free_blocks(handle, inode, bh, 0, 1,
+ 				 EXT4_FREE_BLOCKS_METADATA |
+ 				 EXT4_FREE_BLOCKS_FORGET);
+-		unlock_buffer(bh);
+ 	} else {
+ 		le32_add_cpu(&BHDR(bh)->h_refcount, -1);
+ 		if (ce)
+ 			mb_cache_entry_release(ce);
++		/*
++		 * Beware of this ugliness: Releasing of xattr block references
++		 * from different inodes can race and so we have to protect
++		 * from a race where someone else frees the block (and releases
++		 * its journal_head) before we are done dirtying the buffer. In
++		 * nojournal mode this race is harmless and we actually cannot
++		 * call ext4_handle_dirty_xattr_block() with locked buffer as
++		 * that function can call sync_dirty_buffer() so for that case
++		 * we handle the dirtying after unlocking the buffer.
++		 */
++		if (ext4_handle_valid(handle))
++			error = ext4_handle_dirty_xattr_block(handle, inode,
++							      bh);
+ 		unlock_buffer(bh);
+-		error = ext4_handle_dirty_xattr_block(handle, inode, bh);
++		if (!ext4_handle_valid(handle))
++			error = ext4_handle_dirty_xattr_block(handle, inode,
++							      bh);
+ 		if (IS_SYNC(inode))
+ 			ext4_handle_sync(handle);
+ 		dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
+-- 
+1.8.1.4
+
+
diff --git a/series b/series
index fcdfe28d..0e3d3724 100644
--- a/series
+++ b/series
@@ -6,6 +6,7 @@ initialize-multi-block-allocator-before-checking-block-descriptors
 note-the-error-in-ext4_end_bio
 fix-jbd2-warning-under-heavy-xattr-load
 update-PF_MEMALLOC-handling-in-ext4_write_inode
+fix-collapse-range-patches-in-data-journalling-mode
 
 ##########################################
 # unstable patches
-- 
2.11.4.GIT