ext4_invalidate_pages_when_delalloc_alloc_fail.patch

   1 ext4: invalidate pages if delalloc block allocation fails.
   2
   3 From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
   4
   5 We are a bit agressive in invalidating all the pages. But
   6 it is ok because we really don't know why the block allocation
   7 failed and it is better to come of the writeback path
   8 so that user can look for more info.
   9
  10 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
  11 ---
  12
  13  fs/ext4/inode.c |   85 ++++++++++++++++++++++++++++++++++++++++++++++++--------
  14  1 file changed, 73 insertions(+), 12 deletions(-)
  15
  16 Index: linux-2.6.27-rc3/fs/ext4/inode.c
  17 ===================================================================
  18 --- linux-2.6.27-rc3.orig/fs/ext4/inode.c       2008-08-18 11:55:43.000000000 -0700
  19 +++ linux-2.6.27-rc3/fs/ext4/inode.c    2008-08-18 11:56:07.000000000 -0700
  20 @@ -1783,6 +1783,39 @@ static inline void __unmap_underlying_bl
  21                 unmap_underlying_metadata(bdev, bh->b_blocknr + i);
  22  }
  23
  24 +static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
  25 +                                       sector_t logical, long blk_cnt)
  26 +{
  27 +       int nr_pages, i;
  28 +       pgoff_t index, end;
  29 +       struct pagevec pvec;
  30 +       struct inode *inode = mpd->inode;
  31 +       struct address_space *mapping = inode->i_mapping;
  32 +
  33 +       index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
  34 +       end   = (logical + blk_cnt - 1) >>
  35 +                               (PAGE_CACHE_SHIFT - inode->i_blkbits);
  36 +       while (index <= end) {
  37 +               nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
  38 +               if (nr_pages == 0)
  39 +                       break;
  40 +               for (i = 0; i < nr_pages; i++) {
  41 +                       struct page *page = pvec.pages[i];
  42 +                       index = page->index;
  43 +                       if (index > end)
  44 +                               break;
  45 +                       index++;
  46 +
  47 +                       BUG_ON(!PageLocked(page));
  48 +                       BUG_ON(PageWriteback(page));
  49 +                       block_invalidatepage(page, 0);
  50 +                       ClearPageUptodate(page);
  51 +                       unlock_page(page);
  52 +               }
  53 +       }
  54 +       return;
  55 +}
  56 +
  57  /*
  58   * mpage_da_map_blocks - go through given space
  59   *
  60 @@ -1792,7 +1825,7 @@ static inline void __unmap_underlying_bl
  61   * The function skips space we know is already mapped to disk blocks.
  62   *
  63   */
  64 -static void mpage_da_map_blocks(struct mpage_da_data *mpd)
  65 +static int  mpage_da_map_blocks(struct mpage_da_data *mpd)
  66  {
  67         int err = 0;
  68         struct buffer_head *lbh = &mpd->lbh;
  69 @@ -1803,7 +1836,7 @@ static void mpage_da_map_blocks(struct m
  70          * We consider only non-mapped and non-allocated blocks
  71          */
  72         if (buffer_mapped(lbh) && !buffer_delay(lbh))
  73 -               return;
  74 +               return 0;
  75
  76         new.b_state = lbh->b_state;
  77         new.b_blocknr = 0;
  78 @@ -1814,10 +1847,38 @@ static void mpage_da_map_blocks(struct m
  79          * to write simply return
  80          */
  81         if (!new.b_size)
  82 -               return;
  83 +               return 0;
  84         err = mpd->get_block(mpd->inode, next, &new, 1);
  85 -       if (err)
  86 -               return;
  87 +       if (err) {
  88 +
  89 +               /* If get block returns with error
  90 +                * we simply return. Later writepage
  91 +                * will redirty the page and writepages
  92 +                * will find the dirty page again
  93 +                */
  94 +               if (err == -EAGAIN)
  95 +                       return 0;
  96 +               /*
  97 +                * get block failure will cause us
  98 +                * to loop in writepages. Because
  99 +                * a_ops->writepage won't be able to
 100 +                * make progress. The page will be redirtied
 101 +                * by writepage and writepages will again
 102 +                * try to write the same.
 103 +                */
 104 +               printk(KERN_EMERG "%s block allocation failed for inode %lu "
 105 +                                 "at logical offset %llu with max blocks "
 106 +                                 "%zd with error %d\n",
 107 +                                 __func__, mpd->inode->i_ino,
 108 +                                 (unsigned long long)next,
 109 +                                 lbh->b_size >> mpd->inode->i_blkbits, err);
 110 +               printk(KERN_EMERG "This should not happen.!! "
 111 +                                       "Data will be lost\n");
 112 +               /* invlaidate all the pages */
 113 +               ext4_da_block_invalidatepages(mpd, next,
 114 +                               lbh->b_size >> mpd->inode->i_blkbits);
 115 +               return err;
 116 +       }
 117         BUG_ON(new.b_size == 0);
 118
 119         if (buffer_new(&new))
 120 @@ -1830,7 +1891,7 @@ static void mpage_da_map_blocks(struct m
 121         if (buffer_delay(lbh) || buffer_unwritten(lbh))
 122                 mpage_put_bnr_to_bhs(mpd, next, &new);
 123
 124 -       return;
 125 +       return 0;
 126  }
 127
 128  #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
 129 @@ -1887,8 +1948,8 @@ flush_it:
 130          * We couldn't merge the block to our extent, so we
 131          * need to flush current  extent and start new one
 132          */
 133 -       mpage_da_map_blocks(mpd);
 134 -       mpage_da_submit_io(mpd);
 135 +       if (mpage_da_map_blocks(mpd) == 0)
 136 +               mpage_da_submit_io(mpd);
 137         mpd->io_done = 1;
 138         return;
 139  }
 140 @@ -1930,8 +1991,8 @@ static int __mpage_da_writepage(struct p
 141                  * and start IO on them using writepage()
 142                  */
 143                 if (mpd->next_page != mpd->first_page) {
 144 -                       mpage_da_map_blocks(mpd);
 145 -                       mpage_da_submit_io(mpd);
 146 +                       if (mpage_da_map_blocks(mpd) == 0)
 147 +                               mpage_da_submit_io(mpd);
 148                         /*
 149                          * skip rest of the page in the page_vec
 150                          */
 151 @@ -2034,8 +2095,8 @@ static int mpage_da_writepages(struct ad
 152          * Handle last extent of pages
 153          */
 154         if (!mpd.io_done && mpd.next_page != mpd.first_page) {
 155 -               mpage_da_map_blocks(&mpd);
 156 -               mpage_da_submit_io(&mpd);
 157 +               if (mpage_da_map_blocks(&mpd) == 0)
 158 +                       mpage_da_submit_io(&mpd);
 159         }
 160
 161         wbc->nr_to_write = to_write - mpd.pages_written;