Add new patches for the next merge window
[ext4-patch-queue.git] / fix-jbd2-warning-under-heavy-xattr-load
blob928d971c1c4474c4801d42aa855287a8dc8d0ee6
1 ext4: fix jbd2 warning under heavy xattr load
3 From: Jan Kara <jack@suse.cz>
5 When heavily exercising xattr code the assertion that
6 jbd2_journal_dirty_metadata() shouldn't return error was triggered:
8 WARNING: at /srv/autobuild-ceph/gitbuilder.git/build/fs/jbd2/transaction.c:1237
9 jbd2_journal_dirty_metadata+0x1ba/0x260()
11 CPU: 0 PID: 8877 Comm: ceph-osd Tainted: G    W 3.10.0-ceph-00049-g68d04c9 #1
12 Hardware name: Dell Inc. PowerEdge R410/01V648, BIOS 1.6.3 02/07/2011
13  ffffffff81a1d3c8 ffff880214469928 ffffffff816311b0 ffff880214469968
14  ffffffff8103fae0 ffff880214469958 ffff880170a9dc30 ffff8802240fbe80
15  0000000000000000 ffff88020b366000 ffff8802256e7510 ffff880214469978
16 Call Trace:
17  [<ffffffff816311b0>] dump_stack+0x19/0x1b
18  [<ffffffff8103fae0>] warn_slowpath_common+0x70/0xa0
19  [<ffffffff8103fb2a>] warn_slowpath_null+0x1a/0x20
20  [<ffffffff81267c2a>] jbd2_journal_dirty_metadata+0x1ba/0x260
21  [<ffffffff81245093>] __ext4_handle_dirty_metadata+0xa3/0x140
22  [<ffffffff812561f3>] ext4_xattr_release_block+0x103/0x1f0
23  [<ffffffff81256680>] ext4_xattr_block_set+0x1e0/0x910
24  [<ffffffff8125795b>] ext4_xattr_set_handle+0x38b/0x4a0
25  [<ffffffff810a319d>] ? trace_hardirqs_on+0xd/0x10
26  [<ffffffff81257b32>] ext4_xattr_set+0xc2/0x140
27  [<ffffffff81258547>] ext4_xattr_user_set+0x47/0x50
28  [<ffffffff811935ce>] generic_setxattr+0x6e/0x90
29  [<ffffffff81193ecb>] __vfs_setxattr_noperm+0x7b/0x1c0
30  [<ffffffff811940d4>] vfs_setxattr+0xc4/0xd0
31  [<ffffffff8119421e>] setxattr+0x13e/0x1e0
32  [<ffffffff811719c7>] ? __sb_start_write+0xe7/0x1b0
33  [<ffffffff8118f2e8>] ? mnt_want_write_file+0x28/0x60
34  [<ffffffff8118c65c>] ? fget_light+0x3c/0x130
35  [<ffffffff8118f2e8>] ? mnt_want_write_file+0x28/0x60
36  [<ffffffff8118f1f8>] ? __mnt_want_write+0x58/0x70
37  [<ffffffff811946be>] SyS_fsetxattr+0xbe/0x100
38  [<ffffffff816407c2>] system_call_fastpath+0x16/0x1b
40 The reason for the warning is that buffer_head passed into
41 jbd2_journal_dirty_metadata() didn't have journal_head attached. This is
42 caused by the following race of two ext4_xattr_release_block() calls:
44 CPU1                                CPU2
45 ext4_xattr_release_block()          ext4_xattr_release_block()
46 lock_buffer(bh);
47 /* False */
48 if (BHDR(bh)->h_refcount == cpu_to_le32(1))
49 } else {
50   le32_add_cpu(&BHDR(bh)->h_refcount, -1);
51   unlock_buffer(bh);
52                                     lock_buffer(bh);
53                                     /* True */
54                                     if (BHDR(bh)->h_refcount == cpu_to_le32(1))
55                                       get_bh(bh);
56                                       ext4_free_blocks()
57                                         ...
58                                         jbd2_journal_forget()
59                                           jbd2_journal_unfile_buffer()
60                                           -> JH is gone
61   error = ext4_handle_dirty_xattr_block(handle, inode, bh);
62   -> triggers the warning
64 We fix the problem by moving ext4_handle_dirty_xattr_block() under the
65 buffer lock. Sadly this cannot be done in nojournal mode as that
66 function can call sync_dirty_buffer() which would deadlock. Luckily in
67 nojournal mode the race is harmless (we only dirty already freed buffer)
68 and thus for nojournal mode we leave the dirtying outside of the buffer
69 lock.
71 Reported-by: Sage Weil <sage@inktank.com>
72 Signed-off-by: Jan Kara <jack@suse.cz>
73 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
74 Cc: stable@vger.kernel.org
75 ---
76  fs/ext4/xattr.c | 23 +++++++++++++++++++----
77  1 file changed, 19 insertions(+), 4 deletions(-)
79 diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
80 index e175e94116ac..55e611c1513c 100644
81 --- a/fs/ext4/xattr.c
82 +++ b/fs/ext4/xattr.c
83 @@ -517,8 +517,8 @@ static void ext4_xattr_update_super_block(handle_t *handle,
84  }
86  /*
87 - * Release the xattr block BH: If the reference count is > 1, decrement
88 - * it; otherwise free the block.
89 + * Release the xattr block BH: If the reference count is > 1, decrement it;
90 + * otherwise free the block.
91   */
92  static void
93  ext4_xattr_release_block(handle_t *handle, struct inode *inode,
94 @@ -538,16 +538,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
95                 if (ce)
96                         mb_cache_entry_free(ce);
97                 get_bh(bh);
98 +               unlock_buffer(bh);
99                 ext4_free_blocks(handle, inode, bh, 0, 1,
100                                  EXT4_FREE_BLOCKS_METADATA |
101                                  EXT4_FREE_BLOCKS_FORGET);
102 -               unlock_buffer(bh);
103         } else {
104                 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
105                 if (ce)
106                         mb_cache_entry_release(ce);
107 +               /*
108 +                * Beware of this ugliness: Releasing of xattr block references
109 +                * from different inodes can race and so we have to protect
110 +                * from a race where someone else frees the block (and releases
111 +                * its journal_head) before we are done dirtying the buffer. In
112 +                * nojournal mode this race is harmless and we actually cannot
113 +                * call ext4_handle_dirty_xattr_block() with locked buffer as
114 +                * that function can call sync_dirty_buffer() so for that case
115 +                * we handle the dirtying after unlocking the buffer.
116 +                */
117 +               if (ext4_handle_valid(handle))
118 +                       error = ext4_handle_dirty_xattr_block(handle, inode,
119 +                                                             bh);
120                 unlock_buffer(bh);
121 -               error = ext4_handle_dirty_xattr_block(handle, inode, bh);
122 +               if (!ext4_handle_valid(handle))
123 +                       error = ext4_handle_dirty_xattr_block(handle, inode,
124 +                                                             bh);
125                 if (IS_SYNC(inode))
126                         ext4_handle_sync(handle);
127                 dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
128 -- 
129 1.8.1.4