Fix ext3-4-migrate.patch and mballoc-core.patches
[ext4-patch-queue.git] / ext4-online-defrag-free-space-fragmentation.patch
blob4def2debf15f480f513f84d25a4df7c8c4548cd9
1 From: Akira Fujita <a-fujita@rs.jp.nec.com>
2 Free space fragmentation functions
4 Defrag tries to move other files to make sufficient space
5 and reallocates the contiguous blocks for the target file.
8 Signed-off-by: Mingming Cao <cmm@us.ibm.com>
9 Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
10 Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
11 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
13 diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
14 index aed3456..3539b3b 100644
15 --- a/fs/ext4/balloc.c
16 +++ b/fs/ext4/balloc.c
17 @@ -383,7 +383,7 @@ restart:
18 * If the goal block is within the reservation window, return 1;
19 * otherwise, return 0;
21 -static int
22 +int
23 goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
24 ext4_group_t group, struct super_block *sb)
26 @@ -488,7 +488,7 @@ void ext4_rsv_window_add(struct super_block *sb,
27 * from the filesystem reservation window rb tree. Must be called with
28 * rsv_lock hold.
30 -static void rsv_window_remove(struct super_block *sb,
31 +void rsv_window_remove(struct super_block *sb,
32 struct ext4_reserve_window_node *rsv)
34 rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
35 @@ -503,7 +503,7 @@ static void rsv_window_remove(struct super_block *sb,
37 * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
39 -static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
40 +inline int rsv_is_empty(struct ext4_reserve_window *rsv)
42 /* a valid reservation end block could not be 0 */
43 return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
44 @@ -871,7 +871,7 @@ static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh)
45 * bitmap on disk and the last-committed copy in journal, until we find a
46 * bit free in both bitmaps.
48 -static ext4_grpblk_t
49 +ext4_grpblk_t
50 bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
51 ext4_grpblk_t maxblocks)
53 @@ -1241,7 +1241,7 @@ static int find_next_reservable_window(
54 * @bitmap_bh: the block group block bitmap
57 -static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
58 +int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
59 ext4_grpblk_t grp_goal, struct super_block *sb,
60 ext4_group_t group, struct buffer_head *bitmap_bh)
62 @@ -1385,7 +1385,7 @@ retry:
63 * expand the reservation window size if necessary on a best-effort
64 * basis before ext4_new_blocks() tries to allocate blocks,
66 -static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
67 +void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
68 struct super_block *sb, int size)
70 struct ext4_reserve_window_node *next_rsv;
71 diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
72 index 7807ca4..79a77be 100644
73 --- a/fs/ext4/defrag.c
74 +++ b/fs/ext4/defrag.c
75 @@ -13,6 +13,13 @@
76 #include <asm/uaccess.h>
77 #include "group.h"
79 +#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32)
80 +#define EXT_SET_EXTENT_DATA(src, dest) do { \
81 + dest.block = le32_to_cpu(src->ee_block); \
82 + dest.start = ext_pblock(src); \
83 + dest.len = le16_to_cpu(src->ee_len); \
84 + } while (0)
86 /* Will go away */
87 ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
89 @@ -260,6 +267,470 @@ ext4_ext_next_extent(struct inode *inode,
90 return 1;
93 +/**
94 + * ext4_ext_extents_info() - get extents information
95 + *
96 + * @ext_info: pointer to ext4_extents_info
97 + * @ext_info->ino describe an inode which is used to get extent
98 + * information
99 + * @ext_info->max_entries: defined by DEFRAG_MAX_ENT
100 + * @ext_info->entries: amount of extents (output)
101 + * @ext_info->ext[]: array of extent (output)
102 + * @ext_info->offset: starting block offset of targeted extent
103 + * (file relative)
105 + * @sb: for iget()
107 + * This function returns 0 if next extent(s) exists,
108 + * or returns 1 if next extent doesn't exist, otherwise returns error value.
109 + */
110 +static int ext4_ext_extents_info(struct ext4_extents_info *ext_info,
111 + struct super_block *sb)
113 + struct ext4_ext_path *path = NULL;
114 + struct ext4_extent *ext = NULL;
115 + struct inode *inode = NULL;
116 + ext4_lblk_t offset = ext_info->f_offset;
117 + int max_entries = ext_info->max_entries;
118 + int is_last_extent = 0;
119 + int depth = 0;
120 + int entries = 0;
121 + int err = 0;
123 + inode = iget(sb, ext_info->ino);
124 + if (!inode)
125 + return -EACCES;
127 + down_write(&EXT4_I(inode)->i_data_sem);
129 + /* if a file doesn't exist*/
130 + if ((!inode->i_nlink) || (inode->i_ino < 11) ||
131 + !S_ISREG(inode->i_mode)) {
132 + ext_info->entries = 0;
133 + err = -ENOENT;
134 + goto out;
137 + path = ext4_ext_find_extent(inode, offset, NULL);
138 + if (IS_ERR(path)) {
139 + err = PTR_ERR(path);
140 + path = NULL;
141 + goto out;
143 + depth = ext_depth(inode);
145 + /* if file size is 0, skip this one. */
146 + if (path[depth].p_ext == NULL) {
147 + ext_info->entries = 0;
148 + goto out;
150 + ext = path[depth].p_ext;
151 + EXT_SET_EXTENT_DATA(ext, ext_info->ext[entries]);
152 + entries = 1;
154 + /*
155 + * The ioctl repeats this loop 'max_entries' times.
156 + * So we have to call this function again if @inode had
157 + * more the number of extents than 'max_entries'.
158 + */
159 + while (entries < max_entries) {
160 + is_last_extent = ext4_ext_next_extent(inode, path, &ext);
161 + /* found next extent (not the last one)*/
162 + if (is_last_extent == 0) {
163 + EXT_SET_EXTENT_DATA(ext, ext_info->ext[entries]);
164 + entries++;
166 + /*
167 + * In case @inode has > 'max_entries' extents,
168 + * we must call this function again and restart from
169 + * 'max_entries * n + 1'th extent.
170 + * 'n' is the number of calling this function
171 + * at the same @inode.
172 + */
173 + if (entries == max_entries) {
174 + ext_info->f_offset =
175 + le32_to_cpu(ext->ee_block) +
176 + le32_to_cpu(ext->ee_len);
177 + /* check the extent is the last one or not*/
178 + is_last_extent =
179 + ext4_ext_next_extent(inode, path, &ext);
180 + if (is_last_extent == 1) {
181 + err = is_last_extent;
182 + } else if (is_last_extent < 0) {
183 + /*ERR*/
184 + err = is_last_extent;
185 + goto out;
187 + break;
190 + /* the extent is the last one */
191 + } else if (is_last_extent == 1) {
192 + ext_info->f_offset = 0;
193 + err = is_last_extent;
194 + break;
195 + } else {
196 + /* ERR */
197 + err = is_last_extent;
198 + goto out;
202 + ext_info->entries = entries;
204 +out:
205 + if (path) {
206 + ext4_ext_drop_refs(path);
207 + kfree(path);
209 + up_write(&EXT4_I(inode)->i_data_sem);
210 + iput(inode);
211 + return err;
214 +/**
215 + * ext4_ext_defrag_reserve - reserve blocks for defrag
216 + * @inode target inode
217 + * @goal block reservation goal
218 + * @len blocks count to reserve
220 + * This function returns 0 if succeeded, otherwise
221 + * returns error value
222 + */
224 +static int
225 +ext4_ext_defrag_reserve(struct inode *inode, ext4_fsblk_t goal, int len)
227 + struct super_block *sb = NULL;
228 + handle_t *handle = NULL;
229 + struct buffer_head *bitmap_bh = NULL;
230 + struct ext4_block_alloc_info *block_i;
231 + struct ext4_reserve_window_node *my_rsv = NULL;
232 + unsigned short windowsz = 0;
233 + ext4_group_t group_no;
234 + ext4_grpblk_t grp_target_blk;
235 + int err = 0;
237 + down_write(&EXT4_I(inode)->i_data_sem);
239 + handle = ext4_journal_start(inode, EXT4_RESERVE_TRANS_BLOCKS);
240 + if (IS_ERR(handle)) {
241 + err = PTR_ERR(handle);
242 + handle = NULL;
243 + goto out;
246 + if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) {
247 + ext4_init_block_alloc_info(inode);
248 + } else if (!S_ISREG(inode->i_mode)) {
249 + printk(KERN_ERR "ext4_ext_defrag_reserve:"
250 + " incorrect file type\n");
251 + err = -1;
252 + goto out;
255 + sb = inode->i_sb;
256 + if (!sb) {
257 + printk(KERN_ERR "ext4_ext_defrag_reserve: "
258 + "nonexistent device\n");
259 + err = -ENXIO;
260 + goto out;
262 + ext4_get_group_no_and_offset(sb, goal, &group_no,
263 + &grp_target_blk);
265 + block_i = EXT4_I(inode)->i_block_alloc_info;
267 + if (!block_i || ((windowsz =
268 + block_i->rsv_window_node.rsv_goal_size) == 0)) {
269 + printk(KERN_ERR "ex4_ext_defrag_reserve: unable to reserve\n");
270 + err = -1;
271 + goto out;
274 + my_rsv = &block_i->rsv_window_node;
276 + bitmap_bh = read_block_bitmap(sb, group_no);
277 + if (!bitmap_bh) {
278 + err = -ENOSPC;
279 + goto out;
282 + BUFFER_TRACE(bitmap_bh, "get undo access for new block");
283 + err = ext4_journal_get_undo_access(handle, bitmap_bh);
284 + if (err)
285 + goto out;
287 + err = alloc_new_reservation(my_rsv, grp_target_blk, sb,
288 + group_no, bitmap_bh);
289 + if (err < 0) {
290 + printk(KERN_ERR "defrag: reservation faild\n");
291 + ext4_discard_reservation(inode);
292 + goto out;
293 + } else {
294 + if (len > EXT4_DEFAULT_RESERVE_BLOCKS)
295 + try_to_extend_reservation(my_rsv, sb,
296 + len - EXT4_DEFAULT_RESERVE_BLOCKS);
300 +out:
301 + up_write(&EXT4_I(inode)->i_data_sem);
302 + ext4_journal_release_buffer(handle, bitmap_bh);
303 + brelse(bitmap_bh);
305 + if (handle)
306 + ext4_journal_stop(handle);
308 + return err;
311 +/**
312 + * ext4_ext_block_within_rsv - Is target extent reserved ?
313 + * @ inode inode of target file
314 + * @ ex_start start physical block number of the extent
315 + * which already moved
316 + * @ ex_len block length of the extent which already moved
318 + * This function returns 0 if succeeded, otherwise
319 + * returns error value
320 + */
321 +static int ext4_ext_block_within_rsv(struct inode *inode,
322 + ext4_fsblk_t ex_start, int ex_len)
324 + struct super_block *sb = inode->i_sb;
325 + struct ext4_block_alloc_info *block_i;
326 + ext4_group_t group_no;
327 + ext4_grpblk_t grp_blk;
328 + struct ext4_reserve_window_node *rsv;
330 + block_i = EXT4_I(inode)->i_block_alloc_info;
331 + if (block_i && block_i->rsv_window_node.rsv_goal_size > 0) {
332 + rsv = &block_i->rsv_window_node;
333 + if (rsv_is_empty(&rsv->rsv_window)) {
334 + printk(KERN_ERR "defrag: Can't defrag due to"
335 + " the empty reservation\n");
336 + return -ENOSPC;
338 + } else {
339 + printk(KERN_ERR "defrag: No i_block_alloc_info\n");
340 + return -ENOSPC;
343 + ext4_get_group_no_and_offset(sb, ex_start, &group_no, &grp_blk);
345 + if (!goal_in_my_reservation(&rsv->rsv_window, grp_blk, group_no, sb)
346 + || !goal_in_my_reservation(&rsv->rsv_window, grp_blk + ex_len - 1,
347 + group_no, sb)){
348 + printk(KERN_ERR "defrag: %d or %d in bg %lu is "
349 + "not in rsv_window\n", grp_blk,
350 + grp_blk + ex_len - 1, group_no);
351 + return -ENOSPC;
353 + return 0;
357 + * ext4_ext_fblocks_reserve() -
358 + * reserve free blocks by ext4_ext_defrag_reserve()
359 + * @inode: To get a block group number
360 + * @ext_info: freeblocks distribution which stored extent-like style
361 + * @ext_info->ext[] an array of struct ext4_extents_data
362 + */
363 +static int ext4_ext_fblocks_reserve(struct inode *inode,
364 + struct ext4_extents_info *ext_info)
366 + ext4_fsblk_t ex_start = 0;
367 + int i;
368 + int ret = 0;
369 + int len = 0;
371 + for (i = 0; i < ext_info->entries; i++) {
372 + ex_start = ext_info->ext[i].start;
373 + len = ext_info->ext[i].len;
375 + ret = ext4_ext_defrag_reserve(inode, ex_start, len);
376 + if (ret < 0) {
377 + printk(KERN_ERR "defrag: failed "
378 + "ext4_ext_defrag_reserve\n");
379 + goto ERR;
381 + ret = ext4_ext_block_within_rsv(inode, ex_start, len);
382 + if (ret < 0) {
383 + printk(KERN_ERR "defrag: failed "
384 + "ext4_ext_block_within_rsv\n");
385 + goto ERR;
388 + return ret;
390 +ERR:
391 + down_write(&EXT4_I(inode)->i_data_sem);
392 + ext4_discard_reservation(inode);
393 + up_write(&EXT4_I(inode)->i_data_sem);
394 + return ret;
397 +/**
398 + * ext4_ext_defrag_victim - Create free space for defrag
399 + * @filp target file
400 + * @ex_info target extents array to move
402 + * This function returns 0 if succeeded, otherwise
403 + * returns error value
404 + */
405 +static int ext4_ext_defrag_victim(struct file *target_filp,
406 + struct ext4_extents_info *ex_info)
408 + struct inode *target_inode = target_filp->f_dentry->d_inode;
409 + struct super_block *sb = target_inode->i_sb;
410 + struct file victim_file;
411 + struct dentry victim_dent;
412 + struct inode *victim_inode;
413 + ext4_fsblk_t goal = ex_info->goal;
414 + int ret = 0;
415 + int i = 0;
416 + struct ext4_extent_data ext;
417 + ext4_group_t group;
418 + ext4_grpblk_t grp_off;
420 + /* Setup dummy entent data */
421 + ext.len = 0;
423 + /* Get the inode of the victim file */
424 + victim_inode = iget(sb, ex_info->ino);
425 + if (!victim_inode)
426 + return -EACCES;
428 + /* Setup file for the victim file */
429 + victim_dent.d_inode = victim_inode;
430 + victim_file.f_dentry = &victim_dent;
431 + victim_file.f_mapping = victim_inode->i_mapping;
433 + /* Set the goal appropriate offset */
434 + if (goal == -1) {
435 + ext4_get_group_no_and_offset(victim_inode->i_sb,
436 + ex_info->ext[0].start, &group, &grp_off);
437 + goal = ext4_group_first_block_no(sb, group + 1);
440 + for (i = 0; i < ex_info->entries; i++) {
441 + /* Move original blocks to another block group */
442 + ret = ext4_ext_defrag(&victim_file, ex_info->ext[i].block,
443 + ex_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
444 + if (ret < 0) {
445 + printk(KERN_ERR "defrag: failed ext4_ext_defrag\n");
446 + goto ERR;
449 + /* Sync journal blocks before reservation */
450 + ret = ext4_force_commit(sb);
451 + if (ret) {
452 + printk(KERN_ERR "defrag: failed ext4_force_commit (%d)\n", ret);
453 + goto ERR;
457 + iput(victim_inode);
458 + return 0;
459 +ERR:
460 + down_write(&EXT4_I(target_inode)->i_data_sem);
461 + ext4_discard_reservation(target_inode);
462 + up_write(&EXT4_I(target_inode)->i_data_sem);
463 + iput(victim_inode);
464 + return ret;
467 +/**
468 + * ext4_ext_fblocks_distribution - Search free block distribution
469 + * @filp target file
470 + * @ex_info ext4_extents_info
472 + * This function returns 0 if succeeded, otherwise
473 + * returns error value
474 + */
475 +static int ext4_ext_fblocks_distribution(struct inode *inode,
476 + struct ext4_extents_info *ext_info)
478 + struct buffer_head *bitmap_bh = NULL;
479 + struct super_block *sb = inode->i_sb;
480 + struct ext4_super_block *es;
481 + handle_t *handle;
482 + ext4_group_t group_no;
483 + ext4_grpblk_t start, end;
484 + ext4_fsblk_t start_block = 0;
485 + int num = 0;
486 + int len = 0;
487 + int i = 0;
488 + int err = 0;
489 + int block_set = 0;
491 + if (!sb) {
492 + printk(KERN_ERR "ext4_ext_fblock_distribution: "
493 + "nonexitent device\n");
494 + return -ENOSPC;
496 + es = EXT4_SB(sb)->s_es;
498 + group_no = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
499 + start = ext_info->g_offset;
500 + end = EXT4_BLOCKS_PER_GROUP(sb) - 1;
502 + handle = ext4_journal_start(inode, 1);
503 + if (IS_ERR(handle)) {
504 + err = PTR_ERR(handle);
505 + return err;
508 + bitmap_bh = read_block_bitmap(sb, group_no);
509 + if (!bitmap_bh) {
510 + err = -EIO;
511 + goto out;
514 + BUFFER_TRACE(bitmap_bh, "get undo access for new block");
515 + err = ext4_journal_get_undo_access(handle, bitmap_bh);
516 + if (err)
517 + goto out;
519 + for (i = start; i <= end ; i++) {
520 + if (bitmap_search_next_usable_block(i, bitmap_bh, i + 1) >= 0) {
521 + len++;
522 + /* if the free block is the first one in a region */
523 + if (!block_set) {
524 + start_block =
525 + i + group_no * EXT4_BLOCKS_PER_GROUP(sb);
526 + block_set = 1;
528 + } else if (len) {
529 + ext_info->ext[num].start = start_block;
530 + ext_info->ext[num].len = len;
531 + num++;
532 + len = 0;
533 + block_set = 0;
534 + if (num == ext_info->max_entries) {
535 + ext_info->g_offset = i + 1;
536 + break;
539 + if ((i == end) && len) {
540 + ext_info->ext[num].start = start_block;
541 + ext_info->ext[num].len = len;
542 + num++;
546 + ext_info->entries = num;
547 +out:
548 + ext4_journal_release_buffer(handle, bitmap_bh);
549 + brelse(bitmap_bh);
551 + if (handle)
552 + ext4_journal_stop(handle);
554 + return err;
557 int ext4_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
558 unsigned long arg)
560 @@ -312,6 +783,74 @@ int ext4_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
561 unlock_kernel();
563 return put_user(block, p);
564 + } else if (cmd == EXT4_IOC_GROUP_INFO) {
565 + struct ext4_group_data_info grp_data;
567 + if (copy_from_user(&grp_data,
568 + (struct ext4_group_data_info __user *)arg,
569 + sizeof(grp_data)))
570 + return -EFAULT;
572 + grp_data.s_blocks_per_group =
573 + EXT4_BLOCKS_PER_GROUP(inode->i_sb);
574 + grp_data.s_inodes_per_group =
575 + EXT4_INODES_PER_GROUP(inode->i_sb);
577 + if (copy_to_user((struct ext4_group_data_info *)arg,
578 + &grp_data, sizeof(grp_data)))
579 + return -EFAULT;
580 + } else if (cmd == EXT4_IOC_FREE_BLOCKS_INFO) {
581 + struct ext4_extents_info ext_info;
583 + if (copy_from_user(&ext_info,
584 + (struct ext4_extents_info __user *)arg,
585 + sizeof(ext_info)))
586 + return -EFAULT;
588 + BUG_ON(ext_info.ino != inode->i_ino);
590 + err = ext4_ext_fblocks_distribution(inode, &ext_info);
592 + if (!err)
593 + err = copy_to_user((struct ext4_extents_info *)arg,
594 + &ext_info, sizeof(ext_info));
595 + } else if (cmd == EXT4_IOC_EXTENTS_INFO) {
596 + struct ext4_extents_info ext_info;
598 + if (copy_from_user(&ext_info,
599 + (struct ext4_extents_info __user *)arg,
600 + sizeof(ext_info)))
601 + return -EFAULT;
603 + err = ext4_ext_extents_info(&ext_info, inode->i_sb);
604 + if (err >= 0) {
605 + if (copy_to_user((struct ext4_extents_info __user *)arg,
606 + &ext_info, sizeof(ext_info)))
607 + return -EFAULT;
609 + } else if (cmd == EXT4_IOC_RESERVE_BLOCK) {
610 + struct ext4_extents_info ext_info;
612 + if (copy_from_user(&ext_info,
613 + (struct ext4_extents_info __user *)arg,
614 + sizeof(ext_info)))
615 + return -EFAULT;
617 + err = ext4_ext_fblocks_reserve(inode, &ext_info);
618 + } else if (cmd == EXT4_IOC_MOVE_VICTIM) {
619 + struct ext4_extents_info ext_info;
621 + if (copy_from_user(&ext_info,
622 + (struct ext4_extents_info __user *)arg,
623 + sizeof(ext_info)))
624 + return -EFAULT;
626 + err = ext4_ext_defrag_victim(filp, &ext_info);
628 + } else if (cmd == EXT4_IOC_BLOCK_RELEASE) {
629 + down_write(&EXT4_I(inode)->i_data_sem);
630 + ext4_discard_reservation(inode);
631 + up_write(&EXT4_I(inode)->i_data_sem);
632 } else if (cmd == EXT4_IOC_DEFRAG) {
633 struct ext4_ext_defrag_data defrag;
635 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
636 index 240a4fb..c2caf97 100644
637 --- a/fs/ext4/extents.c
638 +++ b/fs/ext4/extents.c
639 @@ -177,11 +177,17 @@ ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
640 static ext4_fsblk_t
641 ext4_ext_new_block(handle_t *handle, struct inode *inode,
642 struct ext4_ext_path *path,
643 - struct ext4_extent *ex, int *err)
644 + struct ext4_extent *ex, int *err,
645 + ext4_fsblk_t defrag_goal)
647 ext4_fsblk_t goal, newblock;
649 - goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
650 + if (defrag_goal) {
651 + goal = defrag_goal;
652 + } else {
653 + goal = ext4_ext_find_goal(inode, path,
654 + le32_to_cpu(ex->ee_block));
656 newblock = ext4_new_block(handle, inode, goal, err);
657 return newblock;
659 @@ -632,7 +638,8 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
661 static int ext4_ext_split(handle_t *handle, struct inode *inode,
662 struct ext4_ext_path *path,
663 - struct ext4_extent *newext, int at)
664 + struct ext4_extent *newext, int at,
665 + ext4_fsblk_t defrag_goal)
667 struct buffer_head *bh = NULL;
668 int depth = ext_depth(inode);
669 @@ -682,7 +689,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
670 /* allocate all needed blocks */
671 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
672 for (a = 0; a < depth - at; a++) {
673 - newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
674 + newblock = ext4_ext_new_block(handle, inode, path,
675 + newext, &err, defrag_goal);
676 if (newblock == 0)
677 goto cleanup;
678 ablocks[a] = newblock;
679 @@ -871,7 +879,8 @@ cleanup:
681 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
682 struct ext4_ext_path *path,
683 - struct ext4_extent *newext)
684 + struct ext4_extent *newext,
685 + ext4_fsblk_t defrag_goal)
687 struct ext4_ext_path *curp = path;
688 struct ext4_extent_header *neh;
689 @@ -880,7 +889,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
690 ext4_fsblk_t newblock;
691 int err = 0;
693 - newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
694 + newblock = ext4_ext_new_block(handle, inode, path,
695 + newext, &err, defrag_goal);
696 if (newblock == 0)
697 return err;
699 @@ -956,7 +966,8 @@ out:
701 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
702 struct ext4_ext_path *path,
703 - struct ext4_extent *newext)
704 + struct ext4_extent *newext,
705 + ext4_fsblk_t defrag_goal)
707 struct ext4_ext_path *curp;
708 int depth, i, err = 0;
709 @@ -976,7 +987,8 @@ repeat:
710 if (EXT_HAS_FREE_INDEX(curp)) {
711 /* if we found index with free entry, then use that
712 * entry: create all needed subtree and add new leaf */
713 - err = ext4_ext_split(handle, inode, path, newext, i);
714 + err = ext4_ext_split(handle, inode, path,
715 + newext, i, defrag_goal);
717 /* refill path */
718 ext4_ext_drop_refs(path);
719 @@ -987,7 +999,8 @@ repeat:
720 err = PTR_ERR(path);
721 } else {
722 /* tree is full, time to grow in depth */
723 - err = ext4_ext_grow_indepth(handle, inode, path, newext);
724 + err = ext4_ext_grow_indepth(handle, inode, path,
725 + newext, defrag_goal);
726 if (err)
727 goto out;
729 @@ -1433,6 +1446,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
730 struct ext4_ext_path *path,
731 struct ext4_extent *newext)
733 + return ext4_ext_insert_extent_defrag(handle, inode, path, newext, 0);
737 + * ext4_ext_insert_extent_defrag:
738 + * The difference from ext4_ext_insert_extent is to use the first block
739 + * in newext as the goal of the new index block.
740 + */
741 +int
742 +ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
743 + struct ext4_ext_path *path,
744 + struct ext4_extent *newext, int defrag)
746 struct ext4_extent_header * eh;
747 struct ext4_extent *ex, *fex;
748 struct ext4_extent *nearex; /* nearest extent */
749 @@ -1440,6 +1466,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
750 int depth, len, err;
751 ext4_lblk_t next;
752 unsigned uninitialized = 0;
753 + ext4_fsblk_t defrag_goal;
755 BUG_ON(ext4_ext_get_actual_len(newext) == 0);
756 depth = ext_depth(inode);
757 @@ -1500,11 +1527,17 @@ repeat:
758 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
761 + if (defrag) {
762 + defrag_goal = ext_pblock(newext);
763 + } else {
764 + defrag_goal = 0;
767 * There is no free space in the found leaf.
768 * We're gonna add a new leaf in the tree.
770 - err = ext4_ext_create_new_leaf(handle, inode, path, newext);
771 + err = ext4_ext_create_new_leaf(handle, inode, path,
772 + newext, defrag_goal);
773 if (err)
774 goto cleanup;
775 depth = ext_depth(inode);