1 ext4: fix races between buffered IO and collapse / insert range
3 From: Jan Kara <jack@suse.com>
5 Current code implementing FALLOC_FL_COLLAPSE_RANGE and
6 FALLOC_FL_INSERT_RANGE is prone to races with buffered writes and page
7 faults. If buffered write or write via mmap manages to squeeze between
8 filemap_write_and_wait_range() and truncate_pagecache() in the
9 fallocate implementations, the written data is simply discarded by
10 truncate_pagecache() although it should have been shifted.
12 Fix the problem by moving filemap_write_and_wait_range() call inside
13 i_mutex and i_mmap_sem. That way we are protected against races with
14 both buffered writes and page faults.
16 Signed-off-by: Jan Kara <jack@suse.com>
17 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
19 fs/ext4/extents.c | 62 +++++++++++++++++++++++++++++--------------------------
20 1 file changed, 33 insertions(+), 29 deletions(-)
22 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
23 index 66ab89b58c1f..892245a55c53 100644
24 --- a/fs/ext4/extents.c
25 +++ b/fs/ext4/extents.c
26 @@ -5483,21 +5483,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
31 - * Need to round down offset to be aligned with page size boundary
32 - * for page size > block size.
34 - ioffset = round_down(offset, PAGE_SIZE);
36 - /* Write out all dirty pages */
37 - ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
42 - /* Take mutex lock */
43 mutex_lock(&inode->i_mutex);
46 * There is no need to overlap collapse range with EOF, in which case
47 * it is effectively a truncate operation
48 @@ -5518,10 +5504,32 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
49 inode_dio_wait(inode);
52 - * Prevent page faults from reinstantiating pages we have released from
53 + * Prevent page faults from reinstantiating we have released from
56 down_write(&EXT4_I(inode)->i_mmap_sem);
58 + * Need to round down offset to be aligned with page size boundary
59 + * for page size > block size.
61 + ioffset = round_down(offset, PAGE_SIZE);
63 + * Write tail of last page before removed range since it will get
64 + * removed from page cache below.
66 + ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
71 + * Write data that will be shifted to preserve them when discarding
72 + * page cache below. We are also protected from pages becoming dirty
75 + ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
79 truncate_pagecache(inode, ioffset);
81 credits = ext4_writepage_trans_blocks(inode);
82 @@ -5622,21 +5630,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
87 - * Need to round down to align start offset to page size boundary
88 - * for page size > block size.
90 - ioffset = round_down(offset, PAGE_SIZE);
92 - /* Write out all dirty pages */
93 - ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
98 - /* Take mutex lock */
99 mutex_lock(&inode->i_mutex);
101 /* Currently just for extent based files */
102 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
104 @@ -5664,6 +5658,16 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
107 down_write(&EXT4_I(inode)->i_mmap_sem);
109 + * Need to round down to align start offset to page size boundary
110 + * for page size > block size.
112 + ioffset = round_down(offset, PAGE_SIZE);
113 + /* Write out all dirty pages */
114 + ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
118 truncate_pagecache(inode, ioffset);
120 credits = ext4_writepage_trans_blocks(inode);