1 ext4: fix ext4_writepages() in presence of truncate
3 From: Jan Kara <jack@suse.cz>
5 Inode size can arbitrarily change while writeback is in progress. When
6 ext4_writepages() has prepared a long extent for mapping and truncate
7 then reduces i_size, mpage_map_and_submit_buffers() will always map just
8 one buffer in a page instead of all of them due to lblk < blocks check.
9 So we end up not using all blocks we've allocated (thus leaking them)
10 and also delalloc accounting goes wrong manifesting as a warning like:
12 ext4_da_release_space:1333: ext4_da_release_space: ino 12, to_free 1
13 with only 0 reserved data blocks
15 Note that the problem can happen only when blocksize < pagesize because
16 otherwise we have only a single buffer in the page.
18 Fix the problem by removing the size check from the mapping loop. We
19 have an extent allocated so we have to use it all before checking for
20 i_size. We also rename add_page_bufs_to_extent() to
21 mpage_process_page_bufs() and make that function submit the page for IO
22 if all buffers (upto EOF) in it are mapped.
24 Reported-by: Dave Jones <davej@redhat.com>
25 Reported-by: Zheng Liu <gnehzuil.liu@gmail.com>
26 Signed-off-by: Jan Kara <jack@suse.cz>
27 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
28 Cc: stable@vger.kernel.org
30 fs/ext4/inode.c | 107 +++++++++++++++++++++++++++++++++++++++-------------------------
31 1 file changed, 66 insertions(+), 41 deletions(-)
33 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
34 index 787497d..19fa2e0 100644
37 @@ -1890,6 +1890,26 @@ static int ext4_writepage(struct page *page,
41 +static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
44 + loff_t size = i_size_read(mpd->inode);
47 + BUG_ON(page->index != mpd->first_page);
48 + if (page->index == size >> PAGE_CACHE_SHIFT)
49 + len = size & ~PAGE_CACHE_MASK;
51 + len = PAGE_CACHE_SIZE;
52 + clear_page_dirty_for_io(page);
53 + err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
55 + mpd->wbc->nr_to_write--;
61 #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
64 @@ -1948,12 +1968,29 @@ static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
68 -static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
69 - struct buffer_head *head,
70 - struct buffer_head *bh,
73 + * mpage_process_page_bufs - submit page buffers for IO or add them to extent
75 + * @mpd - extent of blocks for mapping
76 + * @head - the first buffer in the page
77 + * @bh - buffer we should start processing from
78 + * @lblk - logical number of the block in the file corresponding to @bh
80 + * Walk through page buffers from @bh upto @head (exclusive) and either submit
81 + * the page for IO if all buffers in this page were mapped and there's no
82 + * accumulated extent of buffers to map or add buffers in the page to the
83 + * extent of buffers to map. The function returns 1 if the caller can continue
84 + * by processing the next page, 0 if it should stop adding buffers to the
85 + * extent to map because we cannot extend it anymore. It can also return value
86 + * < 0 in case of error during IO submission.
88 +static int mpage_process_page_bufs(struct mpage_da_data *mpd,
89 + struct buffer_head *head,
90 + struct buffer_head *bh,
93 struct inode *inode = mpd->inode;
95 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
98 @@ -1963,32 +2000,18 @@ static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
99 if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) {
100 /* Found extent to map? */
104 /* Everything mapped so far and we hit EOF */
108 } while (lblk++, (bh = bh->b_this_page) != head);
112 -static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
115 - loff_t size = i_size_read(mpd->inode);
118 - BUG_ON(page->index != mpd->first_page);
119 - if (page->index == size >> PAGE_CACHE_SHIFT)
120 - len = size & ~PAGE_CACHE_MASK;
122 - len = PAGE_CACHE_SIZE;
123 - clear_page_dirty_for_io(page);
124 - err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
126 - mpd->wbc->nr_to_write--;
130 + /* So far everything mapped? Submit the page for IO. */
131 + if (mpd->map.m_len == 0) {
132 + err = mpage_submit_page(mpd, head->b_page);
136 + return lblk < blocks;
140 @@ -2012,8 +2035,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
141 struct inode *inode = mpd->inode;
142 struct buffer_head *head, *bh;
143 int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
144 - ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
145 - >> inode->i_blkbits;
149 @@ -2048,18 +2069,26 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
152 mpd->map.m_flags = 0;
153 - add_page_bufs_to_extent(mpd, head, bh,
156 + * FIXME: If dioread_nolock supports
157 + * blocksize < pagesize, we need to make
158 + * sure we add size mapped so far to
159 + * io_end->size as the following call
160 + * can submit the page for IO.
162 + err = mpage_process_page_bufs(mpd, head,
164 pagevec_release(&pvec);
170 if (buffer_delay(bh)) {
171 clear_buffer_delay(bh);
172 bh->b_blocknr = pblock++;
174 clear_buffer_unwritten(bh);
175 - } while (++lblk < blocks &&
176 - (bh = bh->b_this_page) != head);
177 + } while (lblk++, (bh = bh->b_this_page) != head);
180 * FIXME: This is going to break if dioread_nolock
181 @@ -2328,14 +2357,10 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
182 lblk = ((ext4_lblk_t)page->index) <<
183 (PAGE_CACHE_SHIFT - blkbits);
184 head = page_buffers(page);
185 - if (!add_page_bufs_to_extent(mpd, head, head, lblk))
186 + err = mpage_process_page_bufs(mpd, head, head, lblk);
189 - /* So far everything mapped? Submit the page for IO. */
190 - if (mpd->map.m_len == 0) {
191 - err = mpage_submit_page(mpd, page);
198 * Accumulated enough dirty pages? This doesn't apply