Final changes before pull request to Linus
[ext4-patch-queue.git] / fix-races-between-buffered-IO-and-collapse-insert-range
blobd5c7b94fed94215937ad7abc99a5d5f52f186963
1 ext4: fix races between buffered IO and collapse / insert range
3 From: Jan Kara <jack@suse.com>
5 Current code implementing FALLOC_FL_COLLAPSE_RANGE and
6 FALLOC_FL_INSERT_RANGE is prone to races with buffered writes and page
7 faults. If buffered write or write via mmap manages to squeeze between
8 filemap_write_and_wait_range() and truncate_pagecache() in the
9 fallocate implementations, the written data is simply discarded by
10 truncate_pagecache() although it should have been shifted.
12 Fix the problem by moving filemap_write_and_wait_range() call inside
13 i_mutex and i_mmap_sem. That way we are protected against races with
14 both buffered writes and page faults.
16 Signed-off-by: Jan Kara <jack@suse.com>
17 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
18 ---
19  fs/ext4/extents.c | 62 +++++++++++++++++++++++++++++--------------------------
20  1 file changed, 33 insertions(+), 29 deletions(-)
22 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
23 index 66ab89b58c1f..892245a55c53 100644
24 --- a/fs/ext4/extents.c
25 +++ b/fs/ext4/extents.c
26 @@ -5483,21 +5483,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
27                         return ret;
28         }
30 -       /*
31 -        * Need to round down offset to be aligned with page size boundary
32 -        * for page size > block size.
33 -        */
34 -       ioffset = round_down(offset, PAGE_SIZE);
36 -       /* Write out all dirty pages */
37 -       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
38 -                                          LLONG_MAX);
39 -       if (ret)
40 -               return ret;
42 -       /* Take mutex lock */
43         mutex_lock(&inode->i_mutex);
45         /*
46          * There is no need to overlap collapse range with EOF, in which case
47          * it is effectively a truncate operation
48 @@ -5518,10 +5504,32 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
49         inode_dio_wait(inode);
51         /*
52 -        * Prevent page faults from reinstantiating pages we have released from
53 +        * Prevent page faults from reinstantiating we have released from
54          * page cache.
55          */
56         down_write(&EXT4_I(inode)->i_mmap_sem);
57 +       /*
58 +        * Need to round down offset to be aligned with page size boundary
59 +        * for page size > block size.
60 +        */
61 +       ioffset = round_down(offset, PAGE_SIZE);
62 +       /*
63 +        * Write tail of last page before removed range since it will get
64 +        * removed from page cache below.
65 +        */
66 +       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
67 +                                          offset - ioffset);
68 +       if (ret)
69 +               goto out_mmap;
70 +       /*
71 +        * Write data that will be shifted to preserve them when discarding
72 +        * page cache below. We are also protected from pages becoming dirty
73 +        * by i_mmap_sem.
74 +        */
75 +       ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
76 +                                          LLONG_MAX);
77 +       if (ret)
78 +               goto out_mmap;
79         truncate_pagecache(inode, ioffset);
81         credits = ext4_writepage_trans_blocks(inode);
82 @@ -5622,21 +5630,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
83                         return ret;
84         }
86 -       /*
87 -        * Need to round down to align start offset to page size boundary
88 -        * for page size > block size.
89 -        */
90 -       ioffset = round_down(offset, PAGE_SIZE);
92 -       /* Write out all dirty pages */
93 -       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
94 -                       LLONG_MAX);
95 -       if (ret)
96 -               return ret;
98 -       /* Take mutex lock */
99         mutex_lock(&inode->i_mutex);
101         /* Currently just for extent based files */
102         if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
103                 ret = -EOPNOTSUPP;
104 @@ -5664,6 +5658,16 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
105          * page cache.
106          */
107         down_write(&EXT4_I(inode)->i_mmap_sem);
108 +       /*
109 +        * Need to round down to align start offset to page size boundary
110 +        * for page size > block size.
111 +        */
112 +       ioffset = round_down(offset, PAGE_SIZE);
113 +       /* Write out all dirty pages */
114 +       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
115 +                       LLONG_MAX);
116 +       if (ret)
117 +               goto out_mmap;
118         truncate_pagecache(inode, ioffset);
120         credits = ext4_writepage_trans_blocks(inode);
121 -- 
122 2.1.4