add patch remove-EXT4_STATE_ORDERED_MODE
[ext4-patch-queue.git] / include-mpage-functions-into-readpage.c
blob1935174484159067823dea2aa801461779b0d53a
1 ext4: copy mpage_readpage() and mpage_readpages() fs/ext4/readpage.c
3 Move the functions which we need from fs/mpage.c into
4 fs/ext4/readpage.c. This will allow us to proceed with the
5 refactorization of these functions and eventual merger with the
6 functions in fs/ext4/page_io.c.
8 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
9 ---
10 fs/ext4/readpage.c | 326 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
11 1 file changed, 320 insertions(+), 6 deletions(-)
13 diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
14 index b5249db..3b29da1 100644
15 --- a/fs/ext4/readpage.c
16 +++ b/fs/ext4/readpage.c
17 @@ -23,6 +23,7 @@
18 #include <linux/ratelimit.h>
19 #include <linux/aio.h>
20 #include <linux/bitops.h>
21 +#include <linux/cleancache.h>
23 #include "ext4_jbd2.h"
24 #include "xattr.h"
25 @@ -30,31 +31,344 @@
27 #include <trace/events/ext4.h>
29 -int ext4_readpage(struct file *file, struct page *page)
30 +/*
31 + * I/O completion handler for multipage BIOs.
32 + *
33 + * The mpage code never puts partial pages into a BIO (except for end-of-file).
34 + * If a page does not map to a contiguous run of blocks then it simply falls
35 + * back to block_read_full_page().
36 + *
37 + * Why is this? If a page's completion depends on a number of different BIOs
38 + * which can complete in any order (or at the same time) then determining the
39 + * status of that page is hard. See end_buffer_async_read() for the details.
40 + * There is no point in duplicating all that complexity.
41 + */
42 +static void mpage_end_io(struct bio *bio, int err)
44 + struct bio_vec *bv;
45 + int i;
47 + bio_for_each_segment_all(bv, bio, i) {
48 + struct page *page = bv->bv_page;
49 + page_endio(page, bio_data_dir(bio), err);
50 + }
52 + bio_put(bio);
55 +static struct bio *mpage_bio_submit(int rw, struct bio *bio)
57 + bio->bi_end_io = mpage_end_io;
58 + submit_bio(rw, bio);
59 + return NULL;
62 +static struct bio *
63 +mpage_alloc(struct block_device *bdev,
64 + sector_t first_sector, int nr_vecs,
65 + gfp_t gfp_flags)
67 + struct bio *bio;
69 + bio = bio_alloc(gfp_flags, nr_vecs);
71 + if (bio == NULL && (current->flags & PF_MEMALLOC)) {
72 + while (!bio && (nr_vecs /= 2))
73 + bio = bio_alloc(gfp_flags, nr_vecs);
74 + }
76 + if (bio) {
77 + bio->bi_bdev = bdev;
78 + bio->bi_iter.bi_sector = first_sector;
79 + }
80 + return bio;
83 +/*
84 + * support function for mpage_readpages. The fs supplied get_block might
85 + * return an up to date buffer. This is used to map that buffer into
86 + * the page, which allows readpage to avoid triggering a duplicate call
87 + * to get_block.
88 + *
89 + * The idea is to avoid adding buffers to pages that don't already have
90 + * them. So when the buffer is up to date and the page size == block size,
91 + * this marks the page up to date instead of adding new buffers.
92 + */
93 +static void
94 +map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
96 + struct inode *inode = page->mapping->host;
97 + struct buffer_head *page_bh, *head;
98 + int block = 0;
100 + if (!page_has_buffers(page)) {
101 + /*
102 + * don't make any buffers if there is only one buffer on
103 + * the page and the page just needs to be set up to date
104 + */
105 + if (inode->i_blkbits == PAGE_CACHE_SHIFT &&
106 + buffer_uptodate(bh)) {
107 + SetPageUptodate(page);
108 + return;
110 + create_empty_buffers(page, 1 << inode->i_blkbits, 0);
112 + head = page_buffers(page);
113 + page_bh = head;
114 + do {
115 + if (block == page_block) {
116 + page_bh->b_state = bh->b_state;
117 + page_bh->b_bdev = bh->b_bdev;
118 + page_bh->b_blocknr = bh->b_blocknr;
119 + break;
121 + page_bh = page_bh->b_this_page;
122 + block++;
123 + } while (page_bh != head);
127 + * This is the worker routine which does all the work of mapping the disk
128 + * blocks and constructs largest possible bios, submits them for IO if the
129 + * blocks are not contiguous on the disk.
131 + * We pass a buffer_head back and forth and use its buffer_mapped() flag to
132 + * represent the validity of its disk mapping and to decide when to do the next
133 + * get_block() call.
134 + */
135 +static struct bio *
136 +do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
137 + sector_t *last_block_in_bio, struct buffer_head *map_bh,
138 + unsigned long *first_logical_block, get_block_t get_block)
140 - int ret = -EAGAIN;
141 struct inode *inode = page->mapping->host;
142 + const unsigned blkbits = inode->i_blkbits;
143 + const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
144 + const unsigned blocksize = 1 << blkbits;
145 + sector_t block_in_file;
146 + sector_t last_block;
147 + sector_t last_block_in_file;
148 + sector_t blocks[MAX_BUF_PER_PAGE];
149 + unsigned page_block;
150 + unsigned first_hole = blocks_per_page;
151 + struct block_device *bdev = NULL;
152 + int length;
153 + int fully_mapped = 1;
154 + unsigned nblocks;
155 + unsigned relative_block;
157 + if (page_has_buffers(page))
158 + goto confused;
160 + block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
161 + last_block = block_in_file + nr_pages * blocks_per_page;
162 + last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
163 + if (last_block > last_block_in_file)
164 + last_block = last_block_in_file;
165 + page_block = 0;
167 + /*
168 + * Map blocks using the result from the previous get_blocks call first.
169 + */
170 + nblocks = map_bh->b_size >> blkbits;
171 + if (buffer_mapped(map_bh) && block_in_file > *first_logical_block &&
172 + block_in_file < (*first_logical_block + nblocks)) {
173 + unsigned map_offset = block_in_file - *first_logical_block;
174 + unsigned last = nblocks - map_offset;
176 + for (relative_block = 0; ; relative_block++) {
177 + if (relative_block == last) {
178 + clear_buffer_mapped(map_bh);
179 + break;
181 + if (page_block == blocks_per_page)
182 + break;
183 + blocks[page_block] = map_bh->b_blocknr + map_offset +
184 + relative_block;
185 + page_block++;
186 + block_in_file++;
188 + bdev = map_bh->b_bdev;
191 + /*
192 + * Then do more get_blocks calls until we are done with this page.
193 + */
194 + map_bh->b_page = page;
195 + while (page_block < blocks_per_page) {
196 + map_bh->b_state = 0;
197 + map_bh->b_size = 0;
199 + if (block_in_file < last_block) {
200 + map_bh->b_size = (last_block-block_in_file) << blkbits;
201 + if (get_block(inode, block_in_file, map_bh, 0))
202 + goto confused;
203 + *first_logical_block = block_in_file;
206 + if (!buffer_mapped(map_bh)) {
207 + fully_mapped = 0;
208 + if (first_hole == blocks_per_page)
209 + first_hole = page_block;
210 + page_block++;
211 + block_in_file++;
212 + continue;
215 + /* some filesystems will copy data into the page during
216 + * the get_block call, in which case we don't want to
217 + * read it again. map_buffer_to_page copies the data
218 + * we just collected from get_block into the page's buffers
219 + * so readpage doesn't have to repeat the get_block call
220 + */
221 + if (buffer_uptodate(map_bh)) {
222 + map_buffer_to_page(page, map_bh, page_block);
223 + goto confused;
226 + if (first_hole != blocks_per_page)
227 + goto confused; /* hole -> non-hole */
229 + /* Contiguous blocks? */
230 + if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1)
231 + goto confused;
232 + nblocks = map_bh->b_size >> blkbits;
233 + for (relative_block = 0; ; relative_block++) {
234 + if (relative_block == nblocks) {
235 + clear_buffer_mapped(map_bh);
236 + break;
237 + } else if (page_block == blocks_per_page)
238 + break;
239 + blocks[page_block] = map_bh->b_blocknr+relative_block;
240 + page_block++;
241 + block_in_file++;
243 + bdev = map_bh->b_bdev;
246 + if (first_hole != blocks_per_page) {
247 + zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE);
248 + if (first_hole == 0) {
249 + SetPageUptodate(page);
250 + unlock_page(page);
251 + goto out;
253 + } else if (fully_mapped) {
254 + SetPageMappedToDisk(page);
257 + if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) &&
258 + cleancache_get_page(page) == 0) {
259 + SetPageUptodate(page);
260 + goto confused;
263 + /*
264 + * This page will go to BIO. Do we need to send this BIO off first?
265 + */
266 + if (bio && (*last_block_in_bio != blocks[0] - 1))
267 + bio = mpage_bio_submit(READ, bio);
269 +alloc_new:
270 + if (bio == NULL) {
271 + if (first_hole == blocks_per_page) {
272 + if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
273 + page))
274 + goto out;
276 + bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
277 + min_t(int, nr_pages, bio_get_nr_vecs(bdev)),
278 + GFP_KERNEL);
279 + if (bio == NULL)
280 + goto confused;
283 + length = first_hole << blkbits;
284 + if (bio_add_page(bio, page, length, 0) < length) {
285 + bio = mpage_bio_submit(READ, bio);
286 + goto alloc_new;
289 + relative_block = block_in_file - *first_logical_block;
290 + nblocks = map_bh->b_size >> blkbits;
291 + if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
292 + (first_hole != blocks_per_page))
293 + bio = mpage_bio_submit(READ, bio);
294 + else
295 + *last_block_in_bio = blocks[blocks_per_page - 1];
296 +out:
297 + return bio;
299 +confused:
300 + if (bio)
301 + bio = mpage_bio_submit(READ, bio);
302 + if (!PageUptodate(page))
303 + block_read_full_page(page, get_block);
304 + else
305 + unlock_page(page);
306 + goto out;
309 +int ext4_readpage(struct file *file, struct page *page)
311 + unsigned long first_logical_block = 0;
312 + struct buffer_head map_bh;
313 + struct inode *inode = page->mapping->host;
314 + struct bio *bio = NULL;
315 + sector_t last_block_in_bio = 0;
316 + int ret = -EAGAIN;
318 trace_ext4_readpage(page);
320 if (ext4_has_inline_data(inode))
321 ret = ext4_readpage_inline(inode, page);
323 - if (ret == -EAGAIN)
324 - return mpage_readpage(page, ext4_get_block);
325 + if (ret != -EAGAIN)
326 + return ret;
328 - return ret;
329 + map_bh.b_state = 0;
330 + map_bh.b_size = 0;
331 + bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
332 + &map_bh, &first_logical_block, ext4_get_block);
333 + if (bio)
334 + mpage_bio_submit(READ, bio);
335 + return 0;
338 int ext4_readpages(struct file *file, struct address_space *mapping,
339 struct list_head *pages, unsigned nr_pages)
341 struct inode *inode = mapping->host;
342 + struct bio *bio = NULL;
343 + unsigned page_idx;
344 + sector_t last_block_in_bio = 0;
345 + struct buffer_head map_bh;
346 + unsigned long first_logical_block = 0;
348 /* If the file has inline data, no need to do readpages. */
349 if (ext4_has_inline_data(inode))
350 return 0;
352 - return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
353 + map_bh.b_state = 0;
354 + map_bh.b_size = 0;
355 + for (page_idx = 0; page_idx < nr_pages; page_idx++) {
356 + struct page *page = list_entry(pages->prev, struct page, lru);
358 + prefetchw(&page->flags);
359 + list_del(&page->lru);
360 + if (!add_to_page_cache_lru(page, mapping,
361 + page->index, GFP_KERNEL)) {
362 + bio = do_mpage_readpage(bio, page,
363 + nr_pages - page_idx,
364 + &last_block_in_bio, &map_bh,
365 + &first_logical_block,
366 + ext4_get_block);
368 + page_cache_release(page);
370 + BUG_ON(!list_empty(pages));
371 + if (bio)
372 + mpage_bio_submit(READ, bio);
373 + return 0;