Tag 1k block bug fixes for stable
[ext4-patch-queue.git] / fix-ext4_writepages-in-presence-of-truncate
blobaed94eb87d4e8ce62f076676ac9d4afb497d8e35
1 ext4: fix ext4_writepages() in presence of truncate
3 From: Jan Kara <jack@suse.cz>
5 Inode size can arbitrarily change while writeback is in progress. When
6 ext4_writepages() has prepared a long extent for mapping and truncate
7 then reduces i_size, mpage_map_and_submit_buffers() will always map just
8 one buffer in a page instead of all of them due to lblk < blocks check.
9 So we end up not using all blocks we've allocated (thus leaking them)
10 and also delalloc accounting goes wrong manifesting as a warning like:
12 ext4_da_release_space:1333: ext4_da_release_space: ino 12, to_free 1
13 with only 0 reserved data blocks
15 Note that the problem can happen only when blocksize < pagesize because
16 otherwise we have only a single buffer in the page.
18 Fix the problem by removing the size check from the mapping loop. We
19 have an extent allocated so we have to use it all before checking for
20 i_size. We also rename add_page_bufs_to_extent() to
21 mpage_process_page_bufs() and make that function submit the page for IO
22 if all buffers (upto EOF) in it are mapped.
24 Reported-by: Dave Jones <davej@redhat.com>
25 Reported-by: Zheng Liu <gnehzuil.liu@gmail.com>
26 Signed-off-by: Jan Kara <jack@suse.cz>
27 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
28 Cc: stable@vger.kernel.org
29 ---
30  fs/ext4/inode.c | 107 +++++++++++++++++++++++++++++++++++++++-------------------------
31  1 file changed, 66 insertions(+), 41 deletions(-)
33 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
34 index 787497d..19fa2e0 100644
35 --- a/fs/ext4/inode.c
36 +++ b/fs/ext4/inode.c
37 @@ -1890,6 +1890,26 @@ static int ext4_writepage(struct page *page,
38         return ret;
39  }
41 +static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
43 +       int len;
44 +       loff_t size = i_size_read(mpd->inode);
45 +       int err;
47 +       BUG_ON(page->index != mpd->first_page);
48 +       if (page->index == size >> PAGE_CACHE_SHIFT)
49 +               len = size & ~PAGE_CACHE_MASK;
50 +       else
51 +               len = PAGE_CACHE_SIZE;
52 +       clear_page_dirty_for_io(page);
53 +       err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
54 +       if (!err)
55 +               mpd->wbc->nr_to_write--;
56 +       mpd->first_page++;
58 +       return err;
61  #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
63  /*
64 @@ -1948,12 +1968,29 @@ static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
65         return false;
66  }
68 -static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
69 -                                   struct buffer_head *head,
70 -                                   struct buffer_head *bh,
71 -                                   ext4_lblk_t lblk)
72 +/*
73 + * mpage_process_page_bufs - submit page buffers for IO or add them to extent
74 + *
75 + * @mpd - extent of blocks for mapping
76 + * @head - the first buffer in the page
77 + * @bh - buffer we should start processing from
78 + * @lblk - logical number of the block in the file corresponding to @bh
79 + *
80 + * Walk through page buffers from @bh upto @head (exclusive) and either submit
81 + * the page for IO if all buffers in this page were mapped and there's no
82 + * accumulated extent of buffers to map or add buffers in the page to the
83 + * extent of buffers to map. The function returns 1 if the caller can continue
84 + * by processing the next page, 0 if it should stop adding buffers to the
85 + * extent to map because we cannot extend it anymore. It can also return value
86 + * < 0 in case of error during IO submission.
87 + */
88 +static int mpage_process_page_bufs(struct mpage_da_data *mpd,
89 +                                  struct buffer_head *head,
90 +                                  struct buffer_head *bh,
91 +                                  ext4_lblk_t lblk)
92  {
93         struct inode *inode = mpd->inode;
94 +       int err;
95         ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
96                                                         >> inode->i_blkbits;
98 @@ -1963,32 +2000,18 @@ static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
99                 if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) {
100                         /* Found extent to map? */
101                         if (mpd->map.m_len)
102 -                               return false;
103 +                               return 0;
104                         /* Everything mapped so far and we hit EOF */
105 -                       return true;
106 +                       break;
107                 }
108         } while (lblk++, (bh = bh->b_this_page) != head);
109 -       return true;
112 -static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
114 -       int len;
115 -       loff_t size = i_size_read(mpd->inode);
116 -       int err;
118 -       BUG_ON(page->index != mpd->first_page);
119 -       if (page->index == size >> PAGE_CACHE_SHIFT)
120 -               len = size & ~PAGE_CACHE_MASK;
121 -       else
122 -               len = PAGE_CACHE_SIZE;
123 -       clear_page_dirty_for_io(page);
124 -       err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
125 -       if (!err)
126 -               mpd->wbc->nr_to_write--;
127 -       mpd->first_page++;
129 -       return err;
130 +       /* So far everything mapped? Submit the page for IO. */
131 +       if (mpd->map.m_len == 0) {
132 +               err = mpage_submit_page(mpd, head->b_page);
133 +               if (err < 0)
134 +                       return err;
135 +       }
136 +       return lblk < blocks;
139  /*
140 @@ -2012,8 +2035,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
141         struct inode *inode = mpd->inode;
142         struct buffer_head *head, *bh;
143         int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
144 -       ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
145 -                                                       >> inode->i_blkbits;
146         pgoff_t start, end;
147         ext4_lblk_t lblk;
148         sector_t pblock;
149 @@ -2048,18 +2069,26 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
150                                          */
151                                         mpd->map.m_len = 0;
152                                         mpd->map.m_flags = 0;
153 -                                       add_page_bufs_to_extent(mpd, head, bh,
154 -                                                               lblk);
155 +                                       /*
156 +                                        * FIXME: If dioread_nolock supports
157 +                                        * blocksize < pagesize, we need to make
158 +                                        * sure we add size mapped so far to
159 +                                        * io_end->size as the following call
160 +                                        * can submit the page for IO.
161 +                                        */
162 +                                       err = mpage_process_page_bufs(mpd, head,
163 +                                                                     bh, lblk);
164                                         pagevec_release(&pvec);
165 -                                       return 0;
166 +                                       if (err > 0)
167 +                                               err = 0;
168 +                                       return err;
169                                 }
170                                 if (buffer_delay(bh)) {
171                                         clear_buffer_delay(bh);
172                                         bh->b_blocknr = pblock++;
173                                 }
174                                 clear_buffer_unwritten(bh);
175 -                       } while (++lblk < blocks &&
176 -                                (bh = bh->b_this_page) != head);
177 +                       } while (lblk++, (bh = bh->b_this_page) != head);
179                         /*
180                          * FIXME: This is going to break if dioread_nolock
181 @@ -2328,14 +2357,10 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
182                         lblk = ((ext4_lblk_t)page->index) <<
183                                 (PAGE_CACHE_SHIFT - blkbits);
184                         head = page_buffers(page);
185 -                       if (!add_page_bufs_to_extent(mpd, head, head, lblk))
186 +                       err = mpage_process_page_bufs(mpd, head, head, lblk);
187 +                       if (err <= 0)
188                                 goto out;
189 -                       /* So far everything mapped? Submit the page for IO. */
190 -                       if (mpd->map.m_len == 0) {
191 -                               err = mpage_submit_page(mpd, page);
192 -                               if (err < 0)
193 -                                       goto out;
194 -                       }
195 +                       err = 0;
197                         /*
198                          * Accumulated enough dirty pages? This doesn't apply