More commit description fixups
[ext4-patch-queue.git] / JC5-da_writepages_credit_fix.patch
blob9682350e2e6d85a32f7e5ae4aaebacf0e323a8c0
1 ext4: journal credit fix for the delayed allocation's writepages() function
3 From: Mingming Cao <cmm@us.ibm.com>
5 Previous delalloc writepages implementation started a new transaction
6 outside of a loop which called get_block() to do the block allocation.
7 Since we didn't know exactly how many blocks would need to be allocated,
8 the estimated journal credits required was very conservative and caused
9 many issues.
11 With the reworked delayed allocation, a new transaction is created for
12 each get_block(), thus we don't need to guess how many credits for the
13 multiple chunk of allocation. We start every transaction with enough
14 credits for inserting a single exent. When estimate the credits for
15 indirect blocks to allocate a chunk of blocks, we need to know the
16 number of data blocks to allocate. We use the total number of reserved
17 delalloc datablocks; if that is too big, for non-extent files, we need
18 to limit the number of blocks to EXT4_MAX_TRANS_BLOCKS.
20 Code cleanup from Aneesh.
22 Signed-off-by: Mingming Cao <cmm@us.ibm.com>
23 Reviewed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
24 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
25 ---
26 fs/ext4/extents.c | 8 ++---
27 fs/ext4/inode.c | 74 +++++++++++++++++++++++++++++++++++++++---------------
28 2 files changed, 58 insertions(+), 24 deletions(-)
30 Index: linux-2.6.27-rc3/fs/ext4/inode.c
31 ===================================================================
32 --- linux-2.6.27-rc3.orig/fs/ext4/inode.c 2008-08-19 16:03:30.000000000 -0700
33 +++ linux-2.6.27-rc3/fs/ext4/inode.c 2008-08-19 16:16:33.000000000 -0700
34 @@ -1848,29 +1848,53 @@
35 static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
36 sector_t logical, struct buffer_head *bh)
38 - struct buffer_head *lbh = &mpd->lbh;
39 sector_t next;
40 + size_t b_size = bh->b_size;
41 + struct buffer_head *lbh = &mpd->lbh;
42 + int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
44 - next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits);
46 + /* check if thereserved journal credits might overflow */
47 + if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
48 + if (nrblocks >= EXT4_MAX_TRANS_DATA) {
49 + /*
50 + * With non-extent format we are limited by the journal
51 + * credit available. Total credit needed to insert
52 + * nrblocks contiguous blocks is dependent on the
53 + * nrblocks. So limit nrblocks.
54 + */
55 + goto flush_it;
56 + } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
57 + EXT4_MAX_TRANS_DATA) {
58 + /*
59 + * Adding the new buffer_head would make it cross the
60 + * allowed limit for which we have journal credit
61 + * reserved. So limit the new bh->b_size
62 + */
63 + b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
64 + mpd->inode->i_blkbits;
65 + /* we will do mpage_da_submit_io in the next loop */
66 + }
67 + }
69 * First block in the extent
71 if (lbh->b_size == 0) {
72 lbh->b_blocknr = logical;
73 - lbh->b_size = bh->b_size;
74 + lbh->b_size = b_size;
75 lbh->b_state = bh->b_state & BH_FLAGS;
76 return;
79 + next = lbh->b_blocknr + nrblocks;
81 * Can we merge the block to our big extent?
83 if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
84 - lbh->b_size += bh->b_size;
85 + lbh->b_size += b_size;
86 return;
89 +flush_it:
91 * We couldn't merge the block to our extent, so we
92 * need to flush current extent and start new one
93 @@ -2231,17 +2255,29 @@
97 - * For now just follow the DIO way to estimate the max credits
98 - * needed to write out EXT4_MAX_WRITEBACK_PAGES.
99 - * todo: need to calculate the max credits need for
100 - * extent based files, currently the DIO credits is based on
101 - * indirect-blocks mapping way.
103 - * Probably should have a generic way to calculate credits
104 - * for DIO, writepages, and truncate
105 + * This is called via ext4_da_writepages() to
106 + * calulate the total number of credits to reserve to fit
107 + * a single extent allocation into a single transaction,
108 + * ext4_da_writpeages() will loop calling this before
109 + * the block allocation.
111 -#define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS
112 -#define EXT4_MAX_WRITEBACK_CREDITS 25
114 +static int ext4_da_writepages_trans_blocks(struct inode *inode)
116 + int max_blocks = EXT4_I(inode)->i_reserved_data_blocks;
118 + /*
119 + * With non-extent format the journal credit needed to
120 + * insert nrblocks contiguous block is dependent on
121 + * number of contiguous block. So we will limit
122 + * number of contiguous block to a sane value
123 + */
124 + if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
125 + (max_blocks > EXT4_MAX_TRANS_DATA))
126 + max_blocks = EXT4_MAX_TRANS_DATA;
128 + return ext4_chunk_trans_blocks(inode, max_blocks);
131 static int ext4_da_writepages(struct address_space *mapping,
132 struct writeback_control *wbc)
133 @@ -2283,7 +2319,7 @@
134 * by delalloc
136 BUG_ON(ext4_should_journal_data(inode));
137 - needed_blocks = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
138 + needed_blocks = ext4_da_writepages_trans_blocks(inode);
140 /* start a new transaction*/
141 handle = ext4_journal_start(inode, needed_blocks);
142 @@ -4461,11 +4497,9 @@
143 * the modification of a single pages into a single transaction,
144 * which may include multile chunk of block allocations.
146 - * This could be called via ext4_write_begin() or later
147 - * ext4_da_writepages() in delalyed allocation case.
148 + * This could be called via ext4_write_begin()
150 - * In both case it's possible that we could allocating multiple
151 - * chunks of blocks. We need to consider the worse case, when
152 + * We need to consider the worse case, when
153 * one new block per extent.
155 int ext4_writepage_trans_blocks(struct inode *inode)
156 Index: linux-2.6.27-rc3/fs/ext4/extents.c
157 ===================================================================
158 --- linux-2.6.27-rc3.orig/fs/ext4/extents.c 2008-08-19 16:15:15.000000000 -0700
159 +++ linux-2.6.27-rc3/fs/ext4/extents.c 2008-08-19 16:16:33.000000000 -0700
160 @@ -1753,7 +1753,7 @@
161 * When pass the actual path, the caller should calculate credits
162 * under i_data_sem.
164 -int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
165 +int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
166 struct ext4_ext_path *path)
168 if (path) {
169 @@ -1772,12 +1772,12 @@
170 * and other metadat blocks still need to be
171 * accounted.
173 - /* 1 one bitmap, 1 block group descriptor */
174 + /* 1 bitmap, 1 block group descriptor */
175 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
179 - return ext4_chunk_trans_blocks(inode, num);
180 + return ext4_chunk_trans_blocks(inode, nrblocks);
184 @@ -1791,7 +1791,7 @@
185 * If the nrblocks are discontiguous, they could cause
186 * the whole tree split more than once, but this is really rare.
188 -int ext4_ext_index_trans_blocks(struct inode *inode, int num, int chunk)
189 +int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
191 int index;
192 int depth = ext_depth(inode);