ext4-Retry-block-allocation-if-we-have-free-blocks.patch

   1 From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
   2
   3 ext4: Retry block allocation if we have free blocks left
   4
   5 When we truncate files, the meta-data blocks released are not reused
   6 untill we commit the truncate transaction.  That means delayed get_block
   7 request will return ENOSPC even if we have free blocks left.  Force a
   8 journal commit and retry block allocation if we get ENOSPC with free
   9 blocks left.
  10
  11 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
  12 Signed-off-by: Mingming Cao <cmm@us.ibm.com>
  13 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
  14 ---
  15  fs/ext4/inode.c |   81 +++++++++++++++++++++++++++++++++++++++-----------------
  16  1 file changed, 57 insertions(+), 24 deletions(-)
  17
  18 Index: linux-2.6.27-rc3/fs/ext4/inode.c
  19 ===================================================================
  20 --- linux-2.6.27-rc3.orig/fs/ext4/inode.c       2008-08-28 13:08:06.000000000 -0700
  21 +++ linux-2.6.27-rc3/fs/ext4/inode.c    2008-08-28 13:30:15.000000000 -0700
  22 @@ -1634,6 +1634,7 @@
  23         struct writeback_control *wbc;
  24         int io_done;
  25         long pages_written;
  26 +       int retval;
  27  };
  28
  29  /*
  30 @@ -1820,6 +1821,24 @@
  31         return;
  32  }
  33
  34 +static void ext4_print_free_blocks(struct inode *inode)
  35 +{
  36 +       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
  37 +       printk(KERN_EMERG "Total free blocks count %lld\n",
  38 +                       ext4_count_free_blocks(inode->i_sb));
  39 +       printk(KERN_EMERG "Free/Dirty block details\n");
  40 +       printk(KERN_EMERG "free_blocks=%lld\n",
  41 +                       percpu_counter_sum(&sbi->s_freeblocks_counter));
  42 +       printk(KERN_EMERG "dirty_blocks=%lld\n",
  43 +                       percpu_counter_sum(&sbi->s_dirtyblocks_counter));
  44 +       printk(KERN_EMERG "Block reservation details\n");
  45 +       printk(KERN_EMERG "i_reserved_data_blocks=%lu\n",
  46 +                       EXT4_I(inode)->i_reserved_data_blocks);
  47 +       printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n",
  48 +                       EXT4_I(inode)->i_reserved_meta_blocks);
  49 +       return;
  50 +}
  51 +
  52  /*
  53   * mpage_da_map_blocks - go through given space
  54   *
  55 @@ -1834,7 +1853,7 @@
  56         int err = 0;
  57         struct buffer_head new;
  58         struct buffer_head *lbh = &mpd->lbh;
  59 -       sector_t next = lbh->b_blocknr;
  60 +       sector_t next;
  61
  62         /*
  63          * We consider only non-mapped and non-allocated blocks
  64 @@ -1844,6 +1863,7 @@
  65         new.b_state = lbh->b_state;
  66         new.b_blocknr = 0;
  67         new.b_size = lbh->b_size;
  68 +       next = lbh->b_blocknr;
  69         /*
  70          * If we didn't accumulate anything
  71          * to write simply return
  72 @@ -1860,6 +1880,13 @@
  73                  */
  74                 if (err == -EAGAIN)
  75                         return 0;
  76 +
  77 +               if (err == -ENOSPC &&
  78 +                               ext4_count_free_blocks(mpd->inode->i_sb)) {
  79 +                       mpd->retval = err;
  80 +                       return 0;
  81 +               }
  82 +
  83                 /*
  84                  * get block failure will cause us
  85                  * to loop in writepages. Because
  86 @@ -1877,8 +1904,7 @@
  87                 printk(KERN_EMERG "This should not happen.!! "
  88                                         "Data will be lost\n");
  89                 if (err == -ENOSPC) {
  90 -                       printk(KERN_CRIT "Total free blocks count %lld\n",
  91 -                               ext4_count_free_blocks(mpd->inode->i_sb));
  92 +                       ext4_print_free_blocks(mpd->inode);
  93                 }
  94                 /* invlaidate all the pages */
  95                 ext4_da_block_invalidatepages(mpd, next,
  96 @@ -2085,39 +2111,36 @@
  97   */
  98  static int mpage_da_writepages(struct address_space *mapping,
  99                                struct writeback_control *wbc,
 100 -                              get_block_t get_block)
 101 +                              struct mpage_da_data *mpd)
 102  {
 103 -       struct mpage_da_data mpd;
 104         long to_write;
 105         int ret;
 106
 107 -       if (!get_block)
 108 +       if (!mpd->get_block)
 109                 return generic_writepages(mapping, wbc);
 110
 111 -       mpd.wbc = wbc;
 112 -       mpd.inode = mapping->host;
 113 -       mpd.lbh.b_size = 0;
 114 -       mpd.lbh.b_state = 0;
 115 -       mpd.lbh.b_blocknr = 0;
 116 -       mpd.first_page = 0;
 117 -       mpd.next_page = 0;
 118 -       mpd.get_block = get_block;
 119 -       mpd.io_done = 0;
 120 -       mpd.pages_written = 0;
 121 +       mpd->lbh.b_size = 0;
 122 +       mpd->lbh.b_state = 0;
 123 +       mpd->lbh.b_blocknr = 0;
 124 +       mpd->first_page = 0;
 125 +       mpd->next_page = 0;
 126 +       mpd->io_done = 0;
 127 +       mpd->pages_written = 0;
 128 +       mpd->retval = 0;
 129
 130         to_write = wbc->nr_to_write;
 131
 132 -       ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
 133 +       ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
 134
 135         /*
 136          * Handle last extent of pages
 137          */
 138 -       if (!mpd.io_done && mpd.next_page != mpd.first_page) {
 139 -               if (mpage_da_map_blocks(&mpd) == 0)
 140 -                       mpage_da_submit_io(&mpd);
 141 +       if (!mpd->io_done && mpd->next_page != mpd->first_page) {
 142 +               if (mpage_da_map_blocks(mpd) == 0)
 143 +                       mpage_da_submit_io(mpd);
 144         }
 145
 146 -       wbc->nr_to_write = to_write - mpd.pages_written;
 147 +       wbc->nr_to_write = to_write - mpd->pages_written;
 148         return ret;
 149  }
 150
 151 @@ -2357,6 +2380,7 @@
 152  {
 153         handle_t *handle = NULL;
 154         loff_t range_start = 0;
 155 +       struct mpage_da_data mpd;
 156         struct inode *inode = mapping->host;
 157         int needed_blocks, ret = 0, nr_to_writebump = 0;
 158         long to_write, pages_skipped = 0;
 159 @@ -2390,6 +2414,9 @@
 160         range_start =  wbc->range_start;
 161         pages_skipped = wbc->pages_skipped;
 162
 163 +       mpd.wbc = wbc;
 164 +       mpd.inode = mapping->host;
 165 +
 166  restart_loop:
 167         to_write = wbc->nr_to_write;
 168         while (!ret && to_write > 0) {
 169 @@ -2413,11 +2440,17 @@
 170                         dump_stack();
 171                         goto out_writepages;
 172                 }
 173 -
 174                 to_write -= wbc->nr_to_write;
 175 -               ret = mpage_da_writepages(mapping, wbc,
 176 -                                         ext4_da_get_block_write);
 177 +
 178 +               mpd.get_block = ext4_da_get_block_write;
 179 +               ret = mpage_da_writepages(mapping, wbc, &mpd);
 180 +
 181                 ext4_journal_stop(handle);
 182 +
 183 +               if (mpd.retval == -ENOSPC)
 184 +                       jbd2_journal_force_commit_nested(sbi->s_journal);
 185 +
 186 +               /* reset the retry count */
 187                 if (ret == MPAGE_DA_EXTENT_TAIL) {
 188                         /*
 189                          * got one extent now try with