1 ext4: fix race between truncate and __ext4_journalled_writepage()
3 The commit cf108bca465d: "ext4: Invert the locking order of page_lock
4 and transaction start" caused __ext4_journalled_writepage() to drop
5 the page lock before the page was written back, as part of changing
6 the locking order to jbd2_journal_start -> page_lock. However, this
7 introduced a potential race if there was a truncate racing with the
8 data=journalled writeback mode.
10 Fix this by grabbing the page lock after starting the journal handle,
11 and then checking to see if page had gotten truncated out from under
14 This fixes a number of different warnings or BUG_ON's when running
15 xfstests generic/086 in data=journalled mode, including:
17 jbd2_journal_dirty_metadata: vdc-8: bad jh for block 115643: transaction (ee3fe7
18 c0, 164), jh->b_transaction ( (null), 0), jh->b_next_transaction ( (null), 0), jlist 0
22 kernel BUG at /usr/projects/linux/ext4/fs/jbd2/transaction.c:2200!
25 [<c02b2ded>] ? __ext4_journalled_invalidatepage+0x117/0x117
26 [<c02b2de5>] __ext4_journalled_invalidatepage+0x10f/0x117
27 [<c02b2ded>] ? __ext4_journalled_invalidatepage+0x117/0x117
28 [<c027d883>] ? lock_buffer+0x36/0x36
29 [<c02b2dfa>] ext4_journalled_invalidatepage+0xd/0x22
30 [<c0229139>] do_invalidatepage+0x22/0x26
31 [<c0229198>] truncate_inode_page+0x5b/0x85
32 [<c022934b>] truncate_inode_pages_range+0x156/0x38c
33 [<c0229592>] truncate_inode_pages+0x11/0x15
34 [<c022962d>] truncate_pagecache+0x55/0x71
35 [<c02b913b>] ext4_setattr+0x4a9/0x560
36 [<c01ca542>] ? current_kernel_time+0x10/0x44
37 [<c026c4d8>] notify_change+0x1c7/0x2be
38 [<c0256a00>] do_truncate+0x65/0x85
39 [<c0226f31>] ? file_ra_state_init+0x12/0x29
43 WARNING: CPU: 1 PID: 1331 at /usr/projects/linux/ext4/fs/jbd2/transaction.c:1396
44 irty_metadata+0x14a/0x1ae()
47 [<c01b879f>] ? console_unlock+0x3a1/0x3ce
48 [<c082cbb4>] dump_stack+0x48/0x60
49 [<c0178b65>] warn_slowpath_common+0x89/0xa0
50 [<c02ef2cf>] ? jbd2_journal_dirty_metadata+0x14a/0x1ae
51 [<c0178bef>] warn_slowpath_null+0x14/0x18
52 [<c02ef2cf>] jbd2_journal_dirty_metadata+0x14a/0x1ae
53 [<c02d8615>] __ext4_handle_dirty_metadata+0xd4/0x19d
54 [<c02b2f44>] write_end_fn+0x40/0x53
55 [<c02b4a16>] ext4_walk_page_buffers+0x4e/0x6a
56 [<c02b59e7>] ext4_writepage+0x354/0x3b8
57 [<c02b2f04>] ? mpage_release_unused_pages+0xd4/0xd4
58 [<c02b1b21>] ? wait_on_buffer+0x2c/0x2c
59 [<c02b5a4b>] ? ext4_writepage+0x3b8/0x3b8
60 [<c02b5a5b>] __writepage+0x10/0x2e
61 [<c0225956>] write_cache_pages+0x22d/0x32c
62 [<c02b5a4b>] ? ext4_writepage+0x3b8/0x3b8
63 [<c02b6ee8>] ext4_writepages+0x102/0x607
64 [<c019adfe>] ? sched_clock_local+0x10/0x10e
65 [<c01a8a7c>] ? __lock_is_held+0x2e/0x44
66 [<c01a8ad5>] ? lock_is_held+0x43/0x51
67 [<c0226dff>] do_writepages+0x1c/0x29
68 [<c0276bed>] __writeback_single_inode+0xc3/0x545
69 [<c0277c07>] writeback_sb_inodes+0x21f/0x36d
72 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
73 Cc: stable@vger.kernel.org
75 fs/ext4/inode.c | 23 +++++++++++++++++++----
76 1 file changed, 19 insertions(+), 4 deletions(-)
78 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
79 index 0554b0b..263a46c 100644
82 @@ -1701,19 +1701,32 @@ static int __ext4_journalled_writepage(struct page *page,
83 ext4_walk_page_buffers(handle, page_bufs, 0, len,
86 - /* As soon as we unlock the page, it can go away, but we have
87 - * references to buffers so we are safe */
89 + * We need to release the page lock before we start the
90 + * journal, so grab a reference so the page won't disappear
91 + * out from under us.
96 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
97 ext4_writepage_trans_blocks(inode));
99 ret = PTR_ERR(handle);
102 + goto out_no_pagelock;
105 BUG_ON(!ext4_handle_valid(handle));
109 + if (page->mapping != mapping) {
110 + /* The page got truncated from under us */
111 + ext4_journal_stop(handle);
117 BUFFER_TRACE(inode_bh, "get write access");
118 ret = ext4_journal_get_write_access(handle, inode_bh);
119 @@ -1739,6 +1752,8 @@ static int __ext4_journalled_writepage(struct page *page,
121 ext4_set_inode_state(inode, EXT4_STATE_JDATA);