include-mpage-functions-into-readpage.c

   1 ext4: copy mpage_readpage() and mpage_readpages() fs/ext4/readpage.c
   2
   3 Move the functions which we need from fs/mpage.c into
   4 fs/ext4/readpage.c.  This will allow us to proceed with the
   5 refactorization of these functions and eventual merger with the
   6 functions in fs/ext4/page_io.c.
   7
   8 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
   9 ---
  10  fs/ext4/readpage.c | 326 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
  11  1 file changed, 320 insertions(+), 6 deletions(-)
  12
  13 diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
  14 index b5249db..3b29da1 100644
  15 --- a/fs/ext4/readpage.c
  16 +++ b/fs/ext4/readpage.c
  17 @@ -23,6 +23,7 @@
  18  #include <linux/ratelimit.h>
  19  #include <linux/aio.h>
  20  #include <linux/bitops.h>
  21 +#include <linux/cleancache.h>
  22
  23  #include "ext4_jbd2.h"
  24  #include "xattr.h"
  25 @@ -30,31 +31,344 @@
  26
  27  #include <trace/events/ext4.h>
  28
  29 -int ext4_readpage(struct file *file, struct page *page)
  30 +/*
  31 + * I/O completion handler for multipage BIOs.
  32 + *
  33 + * The mpage code never puts partial pages into a BIO (except for end-of-file).
  34 + * If a page does not map to a contiguous run of blocks then it simply falls
  35 + * back to block_read_full_page().
  36 + *
  37 + * Why is this?  If a page's completion depends on a number of different BIOs
  38 + * which can complete in any order (or at the same time) then determining the
  39 + * status of that page is hard.  See end_buffer_async_read() for the details.
  40 + * There is no point in duplicating all that complexity.
  41 + */
  42 +static void mpage_end_io(struct bio *bio, int err)
  43 +{
  44 +       struct bio_vec *bv;
  45 +       int i;
  46 +
  47 +       bio_for_each_segment_all(bv, bio, i) {
  48 +               struct page *page = bv->bv_page;
  49 +               page_endio(page, bio_data_dir(bio), err);
  50 +       }
  51 +
  52 +       bio_put(bio);
  53 +}
  54 +
  55 +static struct bio *mpage_bio_submit(int rw, struct bio *bio)
  56 +{
  57 +       bio->bi_end_io = mpage_end_io;
  58 +       submit_bio(rw, bio);
  59 +       return NULL;
  60 +}
  61 +
  62 +static struct bio *
  63 +mpage_alloc(struct block_device *bdev,
  64 +               sector_t first_sector, int nr_vecs,
  65 +               gfp_t gfp_flags)
  66 +{
  67 +       struct bio *bio;
  68 +
  69 +       bio = bio_alloc(gfp_flags, nr_vecs);
  70 +
  71 +       if (bio == NULL && (current->flags & PF_MEMALLOC)) {
  72 +               while (!bio && (nr_vecs /= 2))
  73 +                       bio = bio_alloc(gfp_flags, nr_vecs);
  74 +       }
  75 +
  76 +       if (bio) {
  77 +               bio->bi_bdev = bdev;
  78 +               bio->bi_iter.bi_sector = first_sector;
  79 +       }
  80 +       return bio;
  81 +}
  82 +
  83 +/*
  84 + * support function for mpage_readpages.  The fs supplied get_block might
  85 + * return an up to date buffer.  This is used to map that buffer into
  86 + * the page, which allows readpage to avoid triggering a duplicate call
  87 + * to get_block.
  88 + *
  89 + * The idea is to avoid adding buffers to pages that don't already have
  90 + * them.  So when the buffer is up to date and the page size == block size,
  91 + * this marks the page up to date instead of adding new buffers.
  92 + */
  93 +static void
  94 +map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
  95 +{
  96 +       struct inode *inode = page->mapping->host;
  97 +       struct buffer_head *page_bh, *head;
  98 +       int block = 0;
  99 +
 100 +       if (!page_has_buffers(page)) {
 101 +               /*
 102 +                * don't make any buffers if there is only one buffer on
 103 +                * the page and the page just needs to be set up to date
 104 +                */
 105 +               if (inode->i_blkbits == PAGE_CACHE_SHIFT &&
 106 +                   buffer_uptodate(bh)) {
 107 +                       SetPageUptodate(page);
 108 +                       return;
 109 +               }
 110 +               create_empty_buffers(page, 1 << inode->i_blkbits, 0);
 111 +       }
 112 +       head = page_buffers(page);
 113 +       page_bh = head;
 114 +       do {
 115 +               if (block == page_block) {
 116 +                       page_bh->b_state = bh->b_state;
 117 +                       page_bh->b_bdev = bh->b_bdev;
 118 +                       page_bh->b_blocknr = bh->b_blocknr;
 119 +                       break;
 120 +               }
 121 +               page_bh = page_bh->b_this_page;
 122 +               block++;
 123 +       } while (page_bh != head);
 124 +}
 125 +
 126 +/*
 127 + * This is the worker routine which does all the work of mapping the disk
 128 + * blocks and constructs largest possible bios, submits them for IO if the
 129 + * blocks are not contiguous on the disk.
 130 + *
 131 + * We pass a buffer_head back and forth and use its buffer_mapped() flag to
 132 + * represent the validity of its disk mapping and to decide when to do the next
 133 + * get_block() call.
 134 + */
 135 +static struct bio *
 136 +do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 137 +               sector_t *last_block_in_bio, struct buffer_head *map_bh,
 138 +               unsigned long *first_logical_block, get_block_t get_block)
 139  {
 140 -       int ret = -EAGAIN;
 141         struct inode *inode = page->mapping->host;
 142 +       const unsigned blkbits = inode->i_blkbits;
 143 +       const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
 144 +       const unsigned blocksize = 1 << blkbits;
 145 +       sector_t block_in_file;
 146 +       sector_t last_block;
 147 +       sector_t last_block_in_file;
 148 +       sector_t blocks[MAX_BUF_PER_PAGE];
 149 +       unsigned page_block;
 150 +       unsigned first_hole = blocks_per_page;
 151 +       struct block_device *bdev = NULL;
 152 +       int length;
 153 +       int fully_mapped = 1;
 154 +       unsigned nblocks;
 155 +       unsigned relative_block;
 156 +
 157 +       if (page_has_buffers(page))
 158 +               goto confused;
 159 +
 160 +       block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
 161 +       last_block = block_in_file + nr_pages * blocks_per_page;
 162 +       last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
 163 +       if (last_block > last_block_in_file)
 164 +               last_block = last_block_in_file;
 165 +       page_block = 0;
 166 +
 167 +       /*
 168 +        * Map blocks using the result from the previous get_blocks call first.
 169 +        */
 170 +       nblocks = map_bh->b_size >> blkbits;
 171 +       if (buffer_mapped(map_bh) && block_in_file > *first_logical_block &&
 172 +                       block_in_file < (*first_logical_block + nblocks)) {
 173 +               unsigned map_offset = block_in_file - *first_logical_block;
 174 +               unsigned last = nblocks - map_offset;
 175 +
 176 +               for (relative_block = 0; ; relative_block++) {
 177 +                       if (relative_block == last) {
 178 +                               clear_buffer_mapped(map_bh);
 179 +                               break;
 180 +                       }
 181 +                       if (page_block == blocks_per_page)
 182 +                               break;
 183 +                       blocks[page_block] = map_bh->b_blocknr + map_offset +
 184 +                                               relative_block;
 185 +                       page_block++;
 186 +                       block_in_file++;
 187 +               }
 188 +               bdev = map_bh->b_bdev;
 189 +       }
 190 +
 191 +       /*
 192 +        * Then do more get_blocks calls until we are done with this page.
 193 +        */
 194 +       map_bh->b_page = page;
 195 +       while (page_block < blocks_per_page) {
 196 +               map_bh->b_state = 0;
 197 +               map_bh->b_size = 0;
 198 +
 199 +               if (block_in_file < last_block) {
 200 +                       map_bh->b_size = (last_block-block_in_file) << blkbits;
 201 +                       if (get_block(inode, block_in_file, map_bh, 0))
 202 +                               goto confused;
 203 +                       *first_logical_block = block_in_file;
 204 +               }
 205 +
 206 +               if (!buffer_mapped(map_bh)) {
 207 +                       fully_mapped = 0;
 208 +                       if (first_hole == blocks_per_page)
 209 +                               first_hole = page_block;
 210 +                       page_block++;
 211 +                       block_in_file++;
 212 +                       continue;
 213 +               }
 214 +
 215 +               /* some filesystems will copy data into the page during
 216 +                * the get_block call, in which case we don't want to
 217 +                * read it again.  map_buffer_to_page copies the data
 218 +                * we just collected from get_block into the page's buffers
 219 +                * so readpage doesn't have to repeat the get_block call
 220 +                */
 221 +               if (buffer_uptodate(map_bh)) {
 222 +                       map_buffer_to_page(page, map_bh, page_block);
 223 +                       goto confused;
 224 +               }
 225 +
 226 +               if (first_hole != blocks_per_page)
 227 +                       goto confused;          /* hole -> non-hole */
 228 +
 229 +               /* Contiguous blocks? */
 230 +               if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1)
 231 +                       goto confused;
 232 +               nblocks = map_bh->b_size >> blkbits;
 233 +               for (relative_block = 0; ; relative_block++) {
 234 +                       if (relative_block == nblocks) {
 235 +                               clear_buffer_mapped(map_bh);
 236 +                               break;
 237 +                       } else if (page_block == blocks_per_page)
 238 +                               break;
 239 +                       blocks[page_block] = map_bh->b_blocknr+relative_block;
 240 +                       page_block++;
 241 +                       block_in_file++;
 242 +               }
 243 +               bdev = map_bh->b_bdev;
 244 +       }
 245 +
 246 +       if (first_hole != blocks_per_page) {
 247 +               zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE);
 248 +               if (first_hole == 0) {
 249 +                       SetPageUptodate(page);
 250 +                       unlock_page(page);
 251 +                       goto out;
 252 +               }
 253 +       } else if (fully_mapped) {
 254 +               SetPageMappedToDisk(page);
 255 +       }
 256 +
 257 +       if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) &&
 258 +           cleancache_get_page(page) == 0) {
 259 +               SetPageUptodate(page);
 260 +               goto confused;
 261 +       }
 262 +
 263 +       /*
 264 +        * This page will go to BIO.  Do we need to send this BIO off first?
 265 +        */
 266 +       if (bio && (*last_block_in_bio != blocks[0] - 1))
 267 +               bio = mpage_bio_submit(READ, bio);
 268 +
 269 +alloc_new:
 270 +       if (bio == NULL) {
 271 +               if (first_hole == blocks_per_page) {
 272 +                       if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
 273 +                                                               page))
 274 +                               goto out;
 275 +               }
 276 +               bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
 277 +                               min_t(int, nr_pages, bio_get_nr_vecs(bdev)),
 278 +                               GFP_KERNEL);
 279 +               if (bio == NULL)
 280 +                       goto confused;
 281 +       }
 282 +
 283 +       length = first_hole << blkbits;
 284 +       if (bio_add_page(bio, page, length, 0) < length) {
 285 +               bio = mpage_bio_submit(READ, bio);
 286 +               goto alloc_new;
 287 +       }
 288 +
 289 +       relative_block = block_in_file - *first_logical_block;
 290 +       nblocks = map_bh->b_size >> blkbits;
 291 +       if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
 292 +           (first_hole != blocks_per_page))
 293 +               bio = mpage_bio_submit(READ, bio);
 294 +       else
 295 +               *last_block_in_bio = blocks[blocks_per_page - 1];
 296 +out:
 297 +       return bio;
 298 +
 299 +confused:
 300 +       if (bio)
 301 +               bio = mpage_bio_submit(READ, bio);
 302 +       if (!PageUptodate(page))
 303 +               block_read_full_page(page, get_block);
 304 +       else
 305 +               unlock_page(page);
 306 +       goto out;
 307 +}
 308 +
 309 +int ext4_readpage(struct file *file, struct page *page)
 310 +{
 311 +       unsigned long           first_logical_block = 0;
 312 +       struct buffer_head      map_bh;
 313 +       struct inode            *inode = page->mapping->host;
 314 +       struct bio              *bio = NULL;
 315 +       sector_t                last_block_in_bio = 0;
 316 +       int                     ret = -EAGAIN;
 317
 318         trace_ext4_readpage(page);
 319
 320         if (ext4_has_inline_data(inode))
 321                 ret = ext4_readpage_inline(inode, page);
 322
 323 -       if (ret == -EAGAIN)
 324 -               return mpage_readpage(page, ext4_get_block);
 325 +       if (ret != -EAGAIN)
 326 +               return ret;
 327
 328 -       return ret;
 329 +       map_bh.b_state = 0;
 330 +       map_bh.b_size = 0;
 331 +       bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
 332 +                       &map_bh, &first_logical_block, ext4_get_block);
 333 +       if (bio)
 334 +               mpage_bio_submit(READ, bio);
 335 +       return 0;
 336  }
 337
 338  int ext4_readpages(struct file *file, struct address_space *mapping,
 339                    struct list_head *pages, unsigned nr_pages)
 340  {
 341         struct inode *inode = mapping->host;
 342 +       struct bio *bio = NULL;
 343 +       unsigned page_idx;
 344 +       sector_t last_block_in_bio = 0;
 345 +       struct buffer_head map_bh;
 346 +       unsigned long first_logical_block = 0;
 347
 348         /* If the file has inline data, no need to do readpages. */
 349         if (ext4_has_inline_data(inode))
 350                 return 0;
 351
 352 -       return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
 353 +       map_bh.b_state = 0;
 354 +       map_bh.b_size = 0;
 355 +       for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 356 +               struct page *page = list_entry(pages->prev, struct page, lru);
 357 +
 358 +               prefetchw(&page->flags);
 359 +               list_del(&page->lru);
 360 +               if (!add_to_page_cache_lru(page, mapping,
 361 +                                       page->index, GFP_KERNEL)) {
 362 +                       bio = do_mpage_readpage(bio, page,
 363 +                                       nr_pages - page_idx,
 364 +                                       &last_block_in_bio, &map_bh,
 365 +                                       &first_logical_block,
 366 +                                       ext4_get_block);
 367 +               }
 368 +               page_cache_release(page);
 369 +       }
 370 +       BUG_ON(!list_empty(pages));
 371 +       if (bio)
 372 +               mpage_bio_submit(READ, bio);
 373 +       return 0;
 374  }
 375