update and add new version of speedup-jbd2_journal_dirty_metadata
[ext4-patch-queue.git] / fix-race-between-truncate-and-ext4_journalled_writepage
blob46e8145522594e818b7c304a2d262fc5889d0135
1 ext4: fix race between truncate and __ext4_journalled_writepage()
3 The commit cf108bca465d: "ext4: Invert the locking order of page_lock
4 and transaction start" caused __ext4_journalled_writepage() to drop
5 the page lock before the page was written back, as part of changing
6 the locking order to jbd2_journal_start -> page_lock.  However, this
7 introduced a potential race if there was a truncate racing with the
8 data=journalled writeback mode.
10 Fix this by grabbing the page lock after starting the journal handle,
11 and then checking to see if page had gotten truncated out from under
12 us.
14 This fixes a number of different warnings or BUG_ON's when running
15 xfstests generic/086 in data=journalled mode, including:
17 jbd2_journal_dirty_metadata: vdc-8: bad jh for block 115643: transaction (ee3fe7
18 c0, 164), jh->b_transaction (  (null), 0), jh->b_next_transaction (  (null), 0), jlist 0
20                           - and -
22 kernel BUG at /usr/projects/linux/ext4/fs/jbd2/transaction.c:2200!
23     ...
24 Call Trace:
25  [<c02b2ded>] ? __ext4_journalled_invalidatepage+0x117/0x117
26  [<c02b2de5>] __ext4_journalled_invalidatepage+0x10f/0x117
27  [<c02b2ded>] ? __ext4_journalled_invalidatepage+0x117/0x117
28  [<c027d883>] ? lock_buffer+0x36/0x36
29  [<c02b2dfa>] ext4_journalled_invalidatepage+0xd/0x22
30  [<c0229139>] do_invalidatepage+0x22/0x26
31  [<c0229198>] truncate_inode_page+0x5b/0x85
32  [<c022934b>] truncate_inode_pages_range+0x156/0x38c
33  [<c0229592>] truncate_inode_pages+0x11/0x15
34  [<c022962d>] truncate_pagecache+0x55/0x71
35  [<c02b913b>] ext4_setattr+0x4a9/0x560
36  [<c01ca542>] ? current_kernel_time+0x10/0x44
37  [<c026c4d8>] notify_change+0x1c7/0x2be
38  [<c0256a00>] do_truncate+0x65/0x85
39  [<c0226f31>] ? file_ra_state_init+0x12/0x29
41                           - and -
43 WARNING: CPU: 1 PID: 1331 at /usr/projects/linux/ext4/fs/jbd2/transaction.c:1396
44 irty_metadata+0x14a/0x1ae()
45     ...
46 Call Trace:
47  [<c01b879f>] ? console_unlock+0x3a1/0x3ce
48  [<c082cbb4>] dump_stack+0x48/0x60
49  [<c0178b65>] warn_slowpath_common+0x89/0xa0
50  [<c02ef2cf>] ? jbd2_journal_dirty_metadata+0x14a/0x1ae
51  [<c0178bef>] warn_slowpath_null+0x14/0x18
52  [<c02ef2cf>] jbd2_journal_dirty_metadata+0x14a/0x1ae
53  [<c02d8615>] __ext4_handle_dirty_metadata+0xd4/0x19d
54  [<c02b2f44>] write_end_fn+0x40/0x53
55  [<c02b4a16>] ext4_walk_page_buffers+0x4e/0x6a
56  [<c02b59e7>] ext4_writepage+0x354/0x3b8
57  [<c02b2f04>] ? mpage_release_unused_pages+0xd4/0xd4
58  [<c02b1b21>] ? wait_on_buffer+0x2c/0x2c
59  [<c02b5a4b>] ? ext4_writepage+0x3b8/0x3b8
60  [<c02b5a5b>] __writepage+0x10/0x2e
61  [<c0225956>] write_cache_pages+0x22d/0x32c
62  [<c02b5a4b>] ? ext4_writepage+0x3b8/0x3b8
63  [<c02b6ee8>] ext4_writepages+0x102/0x607
64  [<c019adfe>] ? sched_clock_local+0x10/0x10e
65  [<c01a8a7c>] ? __lock_is_held+0x2e/0x44
66  [<c01a8ad5>] ? lock_is_held+0x43/0x51
67  [<c0226dff>] do_writepages+0x1c/0x29
68  [<c0276bed>] __writeback_single_inode+0xc3/0x545
69  [<c0277c07>] writeback_sb_inodes+0x21f/0x36d
70     ...
72 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
73 Cc: stable@vger.kernel.org
74 ---
75  fs/ext4/inode.c | 23 +++++++++++++++++++----
76  1 file changed, 19 insertions(+), 4 deletions(-)
78 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
79 index 0554b0b..263a46c 100644
80 --- a/fs/ext4/inode.c
81 +++ b/fs/ext4/inode.c
82 @@ -1701,19 +1701,32 @@ static int __ext4_journalled_writepage(struct page *page,
83                 ext4_walk_page_buffers(handle, page_bufs, 0, len,
84                                        NULL, bget_one);
85         }
86 -       /* As soon as we unlock the page, it can go away, but we have
87 -        * references to buffers so we are safe */
88 +       /*
89 +        * We need to release the page lock before we start the
90 +        * journal, so grab a reference so the page won't disappear
91 +        * out from under us.
92 +        */
93 +       get_page(page);
94         unlock_page(page);
96         handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
97                                     ext4_writepage_trans_blocks(inode));
98         if (IS_ERR(handle)) {
99                 ret = PTR_ERR(handle);
100 -               goto out;
101 +               put_page(page);
102 +               goto out_no_pagelock;
103         }
105         BUG_ON(!ext4_handle_valid(handle));
107 +       lock_page(page);
108 +       put_page(page);
109 +       if (page->mapping != mapping) {
110 +               /* The page got truncated from under us */
111 +               ext4_journal_stop(handle);
112 +               ret = 0;
113 +               goto out;
114 +       }
116         if (inline_data) {
117                 BUFFER_TRACE(inode_bh, "get write access");
118                 ret = ext4_journal_get_write_access(handle, inode_bh);
119 @@ -1739,6 +1752,8 @@ static int __ext4_journalled_writepage(struct page *page,
120                                        NULL, bput_one);
121         ext4_set_inode_state(inode, EXT4_STATE_JDATA);
122  out:
123 +       unlock_page(page);
124 +out_no_pagelock:
125         brelse(inode_bh);
126         return ret;