1 ext4: write support for preallocated blocks
3 This patch adds write support to the uninitialized extents that get
4 created when a preallocation is done using fallocate(). It takes care of
5 splitting the extents into multiple (upto three) extents and merging the
6 new split extents with neighbouring ones, if possible.
8 Signed-off-by: Amit Arora <aarora@in.ibm.com>
11 Changes from Take3 to Take4:
13 Changes from Take2 to Take3:
14 1) Patch now rebased to 2.6.22-rc1 kernel.
15 Changes from Take1 to Take2:
16 1) Replaced BUG_ON with WARN_ON & ext4_error.
17 2) Added variable names to the function declaration of
18 ext4_ext_try_to_merge().
19 3) Updated variable declarations to use multiple-definitions-per-line.
20 4) "if((a=foo())).." was broken into "a=foo(); if(a).."
21 5) Removed extra spaces.
24 fs/ext4/extents.c | 234 +++++++++++++++++++++++++++++++++++-----
25 include/linux/ext4_fs_extents.h | 3
26 2 files changed, 210 insertions(+), 27 deletions(-)
28 Index: linux-2.6.22-rc1/fs/ext4/extents.c
29 ===================================================================
30 --- linux-2.6.22-rc1.orig/fs/ext4/extents.c
31 +++ linux-2.6.22-rc1/fs/ext4/extents.c
32 @@ -1140,6 +1140,54 @@ ext4_can_extents_be_merged(struct inode
36 + * This function tries to merge the "ex" extent to the next extent in the tree.
37 + * It always tries to merge towards right. If you want to merge towards
38 + * left, pass "ex - 1" as argument instead of "ex".
39 + * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
40 + * 1 if they got merged.
42 +int ext4_ext_try_to_merge(struct inode *inode,
43 + struct ext4_ext_path *path,
44 + struct ext4_extent *ex)
46 + struct ext4_extent_header *eh;
47 + unsigned int depth, len;
49 + int uninitialized = 0;
51 + depth = ext_depth(inode);
52 + BUG_ON(path[depth].p_hdr == NULL);
53 + eh = path[depth].p_hdr;
55 + while (ex < EXT_LAST_EXTENT(eh))
57 + if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
59 + /* merge with next extent! */
60 + if (ext4_ext_is_uninitialized(ex))
62 + ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
63 + + ext4_ext_get_actual_len(ex + 1));
65 + ext4_ext_mark_uninitialized(ex);
67 + if (ex + 1 < EXT_LAST_EXTENT(eh)) {
68 + len = (EXT_LAST_EXTENT(eh) - ex - 1)
69 + * sizeof(struct ext4_extent);
70 + memmove(ex + 1, ex + 2, len);
72 + eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1);
74 + WARN_ON(eh->eh_entries == 0);
75 + if (!eh->eh_entries)
76 + ext4_error(inode->i_sb, "ext4_ext_try_to_merge",
77 + "inode#%lu, eh->eh_entries = 0!", inode->i_ino);
84 * check if a portion of the "newext" extent overlaps with an
87 @@ -1327,25 +1375,7 @@ has_space:
90 /* try to merge extents to the right */
91 - while (nearex < EXT_LAST_EXTENT(eh)) {
92 - if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1))
94 - /* merge with next extent! */
95 - if (ext4_ext_is_uninitialized(nearex))
97 - nearex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(nearex)
98 - + ext4_ext_get_actual_len(nearex + 1));
100 - ext4_ext_mark_uninitialized(nearex);
102 - if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
103 - len = (EXT_LAST_EXTENT(eh) - nearex - 1)
104 - * sizeof(struct ext4_extent);
105 - memmove(nearex + 1, nearex + 2, len);
107 - eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
108 - BUG_ON(eh->eh_entries == 0);
110 + ext4_ext_try_to_merge(inode, path, nearex);
112 /* try to merge extents to the left */
114 @@ -2011,15 +2041,152 @@ void ext4_ext_release(struct super_block
119 + * This function is called by ext4_ext_get_blocks() if someone tries to write
120 + * to an uninitialized extent. It may result in splitting the uninitialized
121 + * extent into multiple extents (upto three - one initialized and two
123 + * There are three possibilities:
124 + * a> There is no split required: Entire extent should be initialized
125 + * b> Splits in two extents: Write is happening at either end of the extent
126 + * c> Splits in three extents: Somone is writing in middle of the extent
128 +int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode,
129 + struct ext4_ext_path *path,
130 + ext4_fsblk_t iblock,
131 + unsigned long max_blocks)
133 + struct ext4_extent *ex, newex;
134 + struct ext4_extent *ex1 = NULL;
135 + struct ext4_extent *ex2 = NULL;
136 + struct ext4_extent *ex3 = NULL;
137 + struct ext4_extent_header *eh;
138 + unsigned int allocated, ee_block, ee_len, depth;
139 + ext4_fsblk_t newblock;
143 + depth = ext_depth(inode);
144 + eh = path[depth].p_hdr;
145 + ex = path[depth].p_ext;
146 + ee_block = le32_to_cpu(ex->ee_block);
147 + ee_len = ext4_ext_get_actual_len(ex);
148 + allocated = ee_len - (iblock - ee_block);
149 + newblock = iblock - ee_block + ext_pblock(ex);
152 + /* ex1: ee_block to iblock - 1 : uninitialized */
153 + if (iblock > ee_block) {
155 + ex1->ee_len = cpu_to_le16(iblock - ee_block);
156 + ext4_ext_mark_uninitialized(ex1);
159 + /* for sanity, update the length of the ex2 extent before
160 + * we insert ex3, if ex1 is NULL. This is to avoid temporary
161 + * overlap of blocks.
163 + if (!ex1 && allocated > max_blocks)
164 + ex2->ee_len = cpu_to_le16(max_blocks);
165 + /* ex3: to ee_block + ee_len : uninitialised */
166 + if (allocated > max_blocks) {
167 + unsigned int newdepth;
169 + ex3->ee_block = cpu_to_le32(iblock + max_blocks);
170 + ext4_ext_store_pblock(ex3, newblock + max_blocks);
171 + ex3->ee_len = cpu_to_le16(allocated - max_blocks);
172 + ext4_ext_mark_uninitialized(ex3);
173 + err = ext4_ext_insert_extent(handle, inode, path, ex3);
176 + /* The depth, and hence eh & ex might change
177 + * as part of the insert above.
179 + newdepth = ext_depth(inode);
180 + if (newdepth != depth) {
182 + path = ext4_ext_find_extent(inode, iblock, NULL);
183 + if (IS_ERR(path)) {
184 + err = PTR_ERR(path);
188 + eh = path[depth].p_hdr;
189 + ex = path[depth].p_ext;
193 + allocated = max_blocks;
195 + /* If there was a change of depth as part of the
196 + * insertion of ex3 above, we need to update the length
197 + * of the ex1 extent again here
199 + if (ex1 && ex1 != ex) {
201 + ex1->ee_len = cpu_to_le16(iblock - ee_block);
202 + ext4_ext_mark_uninitialized(ex1);
205 + /* ex2: iblock to iblock + maxblocks-1 : initialised */
206 + ex2->ee_block = cpu_to_le32(iblock);
207 + ex2->ee_start = cpu_to_le32(newblock);
208 + ext4_ext_store_pblock(ex2, newblock);
209 + ex2->ee_len = cpu_to_le16(allocated);
212 + err = ext4_ext_get_access(handle, inode, path + depth);
215 + /* New (initialized) extent starts from the first block
216 + * in the current extent. i.e., ex2 == ex
217 + * We have to see if it can be merged with the extent
220 + if (ex2 > EXT_FIRST_EXTENT(eh)) {
221 + /* To merge left, pass "ex2 - 1" to try_to_merge(),
222 + * since it merges towards right _only_.
224 + ret = ext4_ext_try_to_merge(inode, path, ex2 - 1);
226 + err = ext4_ext_correct_indexes(handle, inode, path);
229 + depth = ext_depth(inode);
233 + /* Try to Merge towards right. This might be required
234 + * only when the whole extent is being written to.
235 + * i.e. ex2 == ex and ex3 == NULL.
238 + ret = ext4_ext_try_to_merge(inode, path, ex2);
240 + err = ext4_ext_correct_indexes(handle, inode, path);
245 + /* Mark modified extent as dirty */
246 + err = ext4_ext_dirty(handle, inode, path + depth);
249 + err = ext4_ext_insert_extent(handle, inode, path, &newex);
251 + return err ? err : allocated;
254 int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
256 unsigned long max_blocks, struct buffer_head *bh_result,
257 int create, int extend_disksize)
259 struct ext4_ext_path *path = NULL;
260 + struct ext4_extent_header *eh;
261 struct ext4_extent newex, *ex;
262 ext4_fsblk_t goal, newblock;
263 - int err = 0, depth;
264 + int err = 0, depth, ret;
265 unsigned long allocated = 0;
267 __clear_bit(BH_New, &bh_result->b_state);
268 @@ -2067,6 +2234,7 @@ int ext4_ext_get_blocks(handle_t *handle
269 * this is why assert can't be put in ext4_ext_find_extent()
271 BUG_ON(path[depth].p_ext == NULL && depth != 0);
272 + eh = path[depth].p_hdr;
274 ex = path[depth].p_ext;
276 @@ -2075,13 +2243,9 @@ int ext4_ext_get_blocks(handle_t *handle
277 unsigned short ee_len;
280 - * Allow future support for preallocated extents to be added
281 - * as an RO_COMPAT feature:
282 * Uninitialized extents are treated as holes, except that
283 - * we avoid (fail) allocating new blocks during a write.
284 + * we split out initialized portions during a write.
286 - if (le16_to_cpu(ex->ee_len) > EXT_MAX_LEN)
288 ee_len = ext4_ext_get_actual_len(ex);
289 /* if found extent covers block, simply return it */
290 if (iblock >= ee_block && iblock < ee_block + ee_len) {
291 @@ -2090,12 +2254,27 @@ int ext4_ext_get_blocks(handle_t *handle
292 allocated = ee_len - (iblock - ee_block);
293 ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock,
294 ee_block, ee_len, newblock);
296 /* Do not put uninitialized extent in the cache */
297 - if (!ext4_ext_is_uninitialized(ex))
298 + if (!ext4_ext_is_uninitialized(ex)) {
299 ext4_ext_put_in_cache(inode, ee_block,
301 EXT4_EXT_CACHE_EXTENT);
305 + if (create == EXT4_CREATE_UNINITIALIZED_EXT)
310 + ret = ext4_ext_convert_to_initialized(handle, inode,
321 @@ -2147,6 +2326,7 @@ int ext4_ext_get_blocks(handle_t *handle
323 /* previous routine could use block we allocated */
324 newblock = ext_pblock(&newex);
326 __set_bit(BH_New, &bh_result->b_state);
328 /* Cache only when it is _not_ an uninitialized extent */
329 Index: linux-2.6.22-rc1/include/linux/ext4_fs_extents.h
330 ===================================================================
331 --- linux-2.6.22-rc1.orig/include/linux/ext4_fs_extents.h
332 +++ linux-2.6.22-rc1/include/linux/ext4_fs_extents.h
333 @@ -202,6 +202,9 @@ static inline int ext4_ext_get_actual_le
335 extern int ext4_extent_tree_init(handle_t *, struct inode *);
336 extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
337 +extern int ext4_ext_try_to_merge(struct inode *inode,
338 + struct ext4_ext_path *path,
339 + struct ext4_extent *);
340 extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
341 extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
342 extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
344 To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
345 the body of a message to majordomo@vger.kernel.org
346 More majordomo info at http://vger.kernel.org/majordomo-info.html