Update to 2.6.24-git12; also remove all uses of iget() with iget_locked()
[ext4-patch-queue.git] / ext4-online-defrag-free-space-fragmentation.patch
blobd2fd71df50745c7b9d8a19ddd557eff43f395699
1 From: Akira Fujita <a-fujita@rs.jp.nec.com>
2 Free space fragmentation functions
4 Defrag tries to move other files to make sufficient space
5 and reallocates the contiguous blocks for the target file.
8 Signed-off-by: Mingming Cao <cmm@us.ibm.com>
9 Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
10 Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
11 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
13 diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
14 index 0dae175..608152e 100644
15 --- a/fs/ext4/balloc.c
16 +++ b/fs/ext4/balloc.c
17 @@ -383,7 +383,7 @@ restart:
18 * If the goal block is within the reservation window, return 1;
19 * otherwise, return 0;
21 -static int
22 +int
23 goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
24 ext4_group_t group, struct super_block *sb)
26 @@ -488,7 +488,7 @@ void ext4_rsv_window_add(struct super_block *sb,
27 * from the filesystem reservation window rb tree. Must be called with
28 * rsv_lock hold.
30 -static void rsv_window_remove(struct super_block *sb,
31 +void rsv_window_remove(struct super_block *sb,
32 struct ext4_reserve_window_node *rsv)
34 rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
35 @@ -503,7 +503,7 @@ static void rsv_window_remove(struct super_block *sb,
37 * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
39 -static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
40 +inline int rsv_is_empty(struct ext4_reserve_window *rsv)
42 /* a valid reservation end block could not be 0 */
43 return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
44 @@ -871,7 +871,7 @@ static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh)
45 * bitmap on disk and the last-committed copy in journal, until we find a
46 * bit free in both bitmaps.
48 -static ext4_grpblk_t
49 +ext4_grpblk_t
50 bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
51 ext4_grpblk_t maxblocks)
53 @@ -1241,7 +1241,7 @@ static int find_next_reservable_window(
54 * @bitmap_bh: the block group block bitmap
57 -static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
58 +int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
59 ext4_grpblk_t grp_goal, struct super_block *sb,
60 ext4_group_t group, struct buffer_head *bitmap_bh)
62 @@ -1385,7 +1385,7 @@ retry:
63 * expand the reservation window size if necessary on a best-effort
64 * basis before ext4_new_blocks() tries to allocate blocks,
66 -static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
67 +void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
68 struct super_block *sb, int size)
70 struct ext4_reserve_window_node *next_rsv;
71 diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
72 index 3d0d3e5..6370fb8 100644
73 --- a/fs/ext4/defrag.c
74 +++ b/fs/ext4/defrag.c
75 @@ -13,6 +13,13 @@
76 #include <asm/uaccess.h>
77 #include "group.h"
79 +#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32)
80 +#define EXT_SET_EXTENT_DATA(src, dest) do { \
81 + dest.block = le32_to_cpu(src->ee_block); \
82 + dest.start = ext_pblock(src); \
83 + dest.len = le16_to_cpu(src->ee_len); \
84 + } while (0)
87 * this structure is used to gather extents from the tree via ioctl
89 @@ -250,6 +257,478 @@ ext4_ext_next_extent(struct inode *inode,
90 return 1;
93 +/**
94 + * ext4_ext_extents_info() - get extents information
95 + *
96 + * @ext_info: pointer to ext4_extents_info
97 + * @ext_info->ino describe an inode which is used to get extent
98 + * information
99 + * @ext_info->max_entries: defined by DEFRAG_MAX_ENT
100 + * @ext_info->entries: amount of extents (output)
101 + * @ext_info->ext[]: array of extent (output)
102 + * @ext_info->offset: starting block offset of targeted extent
103 + * (file relative)
105 + * @sb: for iget()
107 + * This function returns 0 if next extent(s) exists,
108 + * or returns 1 if next extent doesn't exist, otherwise returns error value.
109 + */
110 +static int ext4_ext_extents_info(struct ext4_extents_info *ext_info,
111 + struct super_block *sb)
113 + struct ext4_ext_path *path = NULL;
114 + struct ext4_extent *ext = NULL;
115 + struct inode *inode = NULL;
116 + ext4_lblk_t offset = ext_info->f_offset;
117 + int max_entries = ext_info->max_entries;
118 + int is_last_extent = 0;
119 + int depth = 0;
120 + int entries = 0;
121 + int err = 0;
123 + inode = iget_locked(sb, ext_info->ino);
124 + if (!inode)
125 + return -EACCES;
126 + if (inode->i_state & I_NEW) {
127 + sb->s_op->read_inode(inode);
128 + unlock_new_inode(inode);
131 + down_write(&EXT4_I(inode)->i_data_sem);
133 + /* if a file doesn't exist*/
134 + if ((!inode->i_nlink) || (inode->i_ino < 11) ||
135 + !S_ISREG(inode->i_mode)) {
136 + ext_info->entries = 0;
137 + err = -ENOENT;
138 + goto out;
141 + path = ext4_ext_find_extent(inode, offset, NULL);
142 + if (IS_ERR(path)) {
143 + err = PTR_ERR(path);
144 + path = NULL;
145 + goto out;
147 + depth = ext_depth(inode);
149 + /* if file size is 0, skip this one. */
150 + if (path[depth].p_ext == NULL) {
151 + ext_info->entries = 0;
152 + goto out;
154 + ext = path[depth].p_ext;
155 + EXT_SET_EXTENT_DATA(ext, ext_info->ext[entries]);
156 + entries = 1;
158 + /*
159 + * The ioctl repeats this loop 'max_entries' times.
160 + * So we have to call this function again if @inode had
161 + * more the number of extents than 'max_entries'.
162 + */
163 + while (entries < max_entries) {
164 + is_last_extent = ext4_ext_next_extent(inode, path, &ext);
165 + /* found next extent (not the last one)*/
166 + if (is_last_extent == 0) {
167 + EXT_SET_EXTENT_DATA(ext, ext_info->ext[entries]);
168 + entries++;
170 + /*
171 + * In case @inode has > 'max_entries' extents,
172 + * we must call this function again and restart from
173 + * 'max_entries * n + 1'th extent.
174 + * 'n' is the number of calling this function
175 + * at the same @inode.
176 + */
177 + if (entries == max_entries) {
178 + ext_info->f_offset =
179 + le32_to_cpu(ext->ee_block) +
180 + le32_to_cpu(ext->ee_len);
181 + /* check the extent is the last one or not*/
182 + is_last_extent =
183 + ext4_ext_next_extent(inode, path, &ext);
184 + if (is_last_extent == 1) {
185 + err = is_last_extent;
186 + } else if (is_last_extent < 0) {
187 + /*ERR*/
188 + err = is_last_extent;
189 + goto out;
191 + break;
194 + /* the extent is the last one */
195 + } else if (is_last_extent == 1) {
196 + ext_info->f_offset = 0;
197 + err = is_last_extent;
198 + break;
199 + } else {
200 + /* ERR */
201 + err = is_last_extent;
202 + goto out;
206 + ext_info->entries = entries;
208 +out:
209 + if (path) {
210 + ext4_ext_drop_refs(path);
211 + kfree(path);
213 + up_write(&EXT4_I(inode)->i_data_sem);
214 + iput(inode);
215 + return err;
218 +/**
219 + * ext4_ext_defrag_reserve - reserve blocks for defrag
220 + * @inode target inode
221 + * @goal block reservation goal
222 + * @len blocks count to reserve
224 + * This function returns 0 if succeeded, otherwise
225 + * returns error value
226 + */
228 +static int
229 +ext4_ext_defrag_reserve(struct inode *inode, ext4_fsblk_t goal, int len)
231 + struct super_block *sb = NULL;
232 + handle_t *handle = NULL;
233 + struct buffer_head *bitmap_bh = NULL;
234 + struct ext4_block_alloc_info *block_i;
235 + struct ext4_reserve_window_node *my_rsv = NULL;
236 + unsigned short windowsz = 0;
237 + ext4_group_t group_no;
238 + ext4_grpblk_t grp_target_blk;
239 + int err = 0;
241 + down_write(&EXT4_I(inode)->i_data_sem);
243 + handle = ext4_journal_start(inode, EXT4_RESERVE_TRANS_BLOCKS);
244 + if (IS_ERR(handle)) {
245 + err = PTR_ERR(handle);
246 + handle = NULL;
247 + goto out;
250 + if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) {
251 + ext4_init_block_alloc_info(inode);
252 + } else if (!S_ISREG(inode->i_mode)) {
253 + printk(KERN_ERR "ext4_ext_defrag_reserve:"
254 + " incorrect file type\n");
255 + err = -1;
256 + goto out;
259 + sb = inode->i_sb;
260 + if (!sb) {
261 + printk(KERN_ERR "ext4_ext_defrag_reserve: "
262 + "nonexistent device\n");
263 + err = -ENXIO;
264 + goto out;
266 + ext4_get_group_no_and_offset(sb, goal, &group_no,
267 + &grp_target_blk);
269 + block_i = EXT4_I(inode)->i_block_alloc_info;
271 + if (!block_i || ((windowsz =
272 + block_i->rsv_window_node.rsv_goal_size) == 0)) {
273 + printk(KERN_ERR "ex4_ext_defrag_reserve: unable to reserve\n");
274 + err = -1;
275 + goto out;
278 + my_rsv = &block_i->rsv_window_node;
280 + bitmap_bh = read_block_bitmap(sb, group_no);
281 + if (!bitmap_bh) {
282 + err = -ENOSPC;
283 + goto out;
286 + BUFFER_TRACE(bitmap_bh, "get undo access for new block");
287 + err = ext4_journal_get_undo_access(handle, bitmap_bh);
288 + if (err)
289 + goto out;
291 + err = alloc_new_reservation(my_rsv, grp_target_blk, sb,
292 + group_no, bitmap_bh);
293 + if (err < 0) {
294 + printk(KERN_ERR "defrag: reservation faild\n");
295 + ext4_discard_reservation(inode);
296 + goto out;
297 + } else {
298 + if (len > EXT4_DEFAULT_RESERVE_BLOCKS)
299 + try_to_extend_reservation(my_rsv, sb,
300 + len - EXT4_DEFAULT_RESERVE_BLOCKS);
304 +out:
305 + up_write(&EXT4_I(inode)->i_data_sem);
306 + ext4_journal_release_buffer(handle, bitmap_bh);
307 + brelse(bitmap_bh);
309 + if (handle)
310 + ext4_journal_stop(handle);
312 + return err;
315 +/**
316 + * ext4_ext_block_within_rsv - Is target extent reserved ?
317 + * @ inode inode of target file
318 + * @ ex_start start physical block number of the extent
319 + * which already moved
320 + * @ ex_len block length of the extent which already moved
322 + * This function returns 0 if succeeded, otherwise
323 + * returns error value
324 + */
325 +static int ext4_ext_block_within_rsv(struct inode *inode,
326 + ext4_fsblk_t ex_start, int ex_len)
328 + struct super_block *sb = inode->i_sb;
329 + struct ext4_block_alloc_info *block_i;
330 + ext4_group_t group_no;
331 + ext4_grpblk_t grp_blk;
332 + struct ext4_reserve_window_node *rsv;
334 + block_i = EXT4_I(inode)->i_block_alloc_info;
335 + if (block_i && block_i->rsv_window_node.rsv_goal_size > 0) {
336 + rsv = &block_i->rsv_window_node;
337 + if (rsv_is_empty(&rsv->rsv_window)) {
338 + printk(KERN_ERR "defrag: Can't defrag due to"
339 + " the empty reservation\n");
340 + return -ENOSPC;
342 + } else {
343 + printk(KERN_ERR "defrag: No i_block_alloc_info\n");
344 + return -ENOSPC;
347 + ext4_get_group_no_and_offset(sb, ex_start, &group_no, &grp_blk);
349 + if (!goal_in_my_reservation(&rsv->rsv_window, grp_blk, group_no, sb)
350 + || !goal_in_my_reservation(&rsv->rsv_window, grp_blk + ex_len - 1,
351 + group_no, sb)){
352 + printk(KERN_ERR "defrag: %d or %d in bg %lu is "
353 + "not in rsv_window\n", grp_blk,
354 + grp_blk + ex_len - 1, group_no);
355 + return -ENOSPC;
357 + return 0;
361 + * ext4_ext_fblocks_reserve() -
362 + * reserve free blocks by ext4_ext_defrag_reserve()
363 + * @inode: To get a block group number
364 + * @ext_info: freeblocks distribution which stored extent-like style
365 + * @ext_info->ext[] an array of struct ext4_extents_data
366 + */
367 +static int ext4_ext_fblocks_reserve(struct inode *inode,
368 + struct ext4_extents_info *ext_info)
370 + ext4_fsblk_t ex_start = 0;
371 + int i;
372 + int ret = 0;
373 + int len = 0;
375 + for (i = 0; i < ext_info->entries; i++) {
376 + ex_start = ext_info->ext[i].start;
377 + len = ext_info->ext[i].len;
379 + ret = ext4_ext_defrag_reserve(inode, ex_start, len);
380 + if (ret < 0) {
381 + printk(KERN_ERR "defrag: failed "
382 + "ext4_ext_defrag_reserve\n");
383 + goto ERR;
385 + ret = ext4_ext_block_within_rsv(inode, ex_start, len);
386 + if (ret < 0) {
387 + printk(KERN_ERR "defrag: failed "
388 + "ext4_ext_block_within_rsv\n");
389 + goto ERR;
392 + return ret;
394 +ERR:
395 + down_write(&EXT4_I(inode)->i_data_sem);
396 + ext4_discard_reservation(inode);
397 + up_write(&EXT4_I(inode)->i_data_sem);
398 + return ret;
401 +/**
402 + * ext4_ext_defrag_victim - Create free space for defrag
403 + * @filp target file
404 + * @ex_info target extents array to move
406 + * This function returns 0 if succeeded, otherwise
407 + * returns error value
408 + */
409 +static int ext4_ext_defrag_victim(struct file *target_filp,
410 + struct ext4_extents_info *ex_info)
412 + struct inode *target_inode = target_filp->f_dentry->d_inode;
413 + struct super_block *sb = target_inode->i_sb;
414 + struct file victim_file;
415 + struct dentry victim_dent;
416 + struct inode *victim_inode;
417 + ext4_fsblk_t goal = ex_info->goal;
418 + int ret = 0;
419 + int i = 0;
420 + struct ext4_extent_data ext;
421 + ext4_group_t group;
422 + ext4_grpblk_t grp_off;
424 + /* Setup dummy entent data */
425 + ext.len = 0;
427 + /* Get the inode of the victim file */
428 + victim_inode = iget_locked(sb, ex_info->ino);
429 + if (!victim_inode)
430 + return -EACCES;
431 + if (victim_inode->i_state & I_NEW) {
432 + sb->s_op->read_inode(victim_inode);
433 + unlock_new_inode(victim_inode);
436 + /* Setup file for the victim file */
437 + victim_dent.d_inode = victim_inode;
438 + victim_file.f_dentry = &victim_dent;
439 + victim_file.f_mapping = victim_inode->i_mapping;
441 + /* Set the goal appropriate offset */
442 + if (goal == -1) {
443 + ext4_get_group_no_and_offset(victim_inode->i_sb,
444 + ex_info->ext[0].start, &group, &grp_off);
445 + goal = ext4_group_first_block_no(sb, group + 1);
448 + for (i = 0; i < ex_info->entries; i++) {
449 + /* Move original blocks to another block group */
450 + ret = ext4_ext_defrag(&victim_file, ex_info->ext[i].block,
451 + ex_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
452 + if (ret < 0) {
453 + printk(KERN_ERR "defrag: failed ext4_ext_defrag\n");
454 + goto ERR;
457 + /* Sync journal blocks before reservation */
458 + ret = ext4_force_commit(sb);
459 + if (ret) {
460 + printk(KERN_ERR "defrag: failed ext4_force_commit (%d)\n", ret);
461 + goto ERR;
465 + iput(victim_inode);
466 + return 0;
467 +ERR:
468 + down_write(&EXT4_I(target_inode)->i_data_sem);
469 + ext4_discard_reservation(target_inode);
470 + up_write(&EXT4_I(target_inode)->i_data_sem);
471 + iput(victim_inode);
472 + return ret;
475 +/**
476 + * ext4_ext_fblocks_distribution - Search free block distribution
477 + * @filp target file
478 + * @ex_info ext4_extents_info
480 + * This function returns 0 if succeeded, otherwise
481 + * returns error value
482 + */
483 +static int ext4_ext_fblocks_distribution(struct inode *inode,
484 + struct ext4_extents_info *ext_info)
486 + struct buffer_head *bitmap_bh = NULL;
487 + struct super_block *sb = inode->i_sb;
488 + struct ext4_super_block *es;
489 + handle_t *handle;
490 + ext4_group_t group_no;
491 + ext4_grpblk_t start, end;
492 + ext4_fsblk_t start_block = 0;
493 + int num = 0;
494 + int len = 0;
495 + int i = 0;
496 + int err = 0;
497 + int block_set = 0;
499 + if (!sb) {
500 + printk(KERN_ERR "ext4_ext_fblock_distribution: "
501 + "nonexitent device\n");
502 + return -ENOSPC;
504 + es = EXT4_SB(sb)->s_es;
506 + group_no = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
507 + start = ext_info->g_offset;
508 + end = EXT4_BLOCKS_PER_GROUP(sb) - 1;
510 + handle = ext4_journal_start(inode, 1);
511 + if (IS_ERR(handle)) {
512 + err = PTR_ERR(handle);
513 + return err;
516 + bitmap_bh = read_block_bitmap(sb, group_no);
517 + if (!bitmap_bh) {
518 + err = -EIO;
519 + goto out;
522 + BUFFER_TRACE(bitmap_bh, "get undo access for new block");
523 + err = ext4_journal_get_undo_access(handle, bitmap_bh);
524 + if (err)
525 + goto out;
527 + for (i = start; i <= end ; i++) {
528 + if (bitmap_search_next_usable_block(i, bitmap_bh, i + 1) >= 0) {
529 + len++;
530 + /* if the free block is the first one in a region */
531 + if (!block_set) {
532 + start_block =
533 + i + group_no * EXT4_BLOCKS_PER_GROUP(sb);
534 + block_set = 1;
536 + } else if (len) {
537 + ext_info->ext[num].start = start_block;
538 + ext_info->ext[num].len = len;
539 + num++;
540 + len = 0;
541 + block_set = 0;
542 + if (num == ext_info->max_entries) {
543 + ext_info->g_offset = i + 1;
544 + break;
547 + if ((i == end) && len) {
548 + ext_info->ext[num].start = start_block;
549 + ext_info->ext[num].len = len;
550 + num++;
554 + ext_info->entries = num;
555 +out:
556 + ext4_journal_release_buffer(handle, bitmap_bh);
557 + brelse(bitmap_bh);
559 + if (handle)
560 + ext4_journal_stop(handle);
562 + return err;
565 int ext4_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
566 unsigned long arg)
568 @@ -302,6 +781,74 @@ int ext4_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
569 unlock_kernel();
571 return put_user(block, p);
572 + } else if (cmd == EXT4_IOC_GROUP_INFO) {
573 + struct ext4_group_data_info grp_data;
575 + if (copy_from_user(&grp_data,
576 + (struct ext4_group_data_info __user *)arg,
577 + sizeof(grp_data)))
578 + return -EFAULT;
580 + grp_data.s_blocks_per_group =
581 + EXT4_BLOCKS_PER_GROUP(inode->i_sb);
582 + grp_data.s_inodes_per_group =
583 + EXT4_INODES_PER_GROUP(inode->i_sb);
585 + if (copy_to_user((struct ext4_group_data_info *)arg,
586 + &grp_data, sizeof(grp_data)))
587 + return -EFAULT;
588 + } else if (cmd == EXT4_IOC_FREE_BLOCKS_INFO) {
589 + struct ext4_extents_info ext_info;
591 + if (copy_from_user(&ext_info,
592 + (struct ext4_extents_info __user *)arg,
593 + sizeof(ext_info)))
594 + return -EFAULT;
596 + BUG_ON(ext_info.ino != inode->i_ino);
598 + err = ext4_ext_fblocks_distribution(inode, &ext_info);
600 + if (!err)
601 + err = copy_to_user((struct ext4_extents_info *)arg,
602 + &ext_info, sizeof(ext_info));
603 + } else if (cmd == EXT4_IOC_EXTENTS_INFO) {
604 + struct ext4_extents_info ext_info;
606 + if (copy_from_user(&ext_info,
607 + (struct ext4_extents_info __user *)arg,
608 + sizeof(ext_info)))
609 + return -EFAULT;
611 + err = ext4_ext_extents_info(&ext_info, inode->i_sb);
612 + if (err >= 0) {
613 + if (copy_to_user((struct ext4_extents_info __user *)arg,
614 + &ext_info, sizeof(ext_info)))
615 + return -EFAULT;
617 + } else if (cmd == EXT4_IOC_RESERVE_BLOCK) {
618 + struct ext4_extents_info ext_info;
620 + if (copy_from_user(&ext_info,
621 + (struct ext4_extents_info __user *)arg,
622 + sizeof(ext_info)))
623 + return -EFAULT;
625 + err = ext4_ext_fblocks_reserve(inode, &ext_info);
626 + } else if (cmd == EXT4_IOC_MOVE_VICTIM) {
627 + struct ext4_extents_info ext_info;
629 + if (copy_from_user(&ext_info,
630 + (struct ext4_extents_info __user *)arg,
631 + sizeof(ext_info)))
632 + return -EFAULT;
634 + err = ext4_ext_defrag_victim(filp, &ext_info);
636 + } else if (cmd == EXT4_IOC_BLOCK_RELEASE) {
637 + down_write(&EXT4_I(inode)->i_data_sem);
638 + ext4_discard_reservation(inode);
639 + up_write(&EXT4_I(inode)->i_data_sem);
640 } else if (cmd == EXT4_IOC_DEFRAG) {
641 struct ext4_ext_defrag_data defrag;
643 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
644 index 1fb733d..5c4af51 100644
645 --- a/fs/ext4/extents.c
646 +++ b/fs/ext4/extents.c
647 @@ -177,11 +177,17 @@ ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
648 static ext4_fsblk_t
649 ext4_ext_new_block(handle_t *handle, struct inode *inode,
650 struct ext4_ext_path *path,
651 - struct ext4_extent *ex, int *err)
652 + struct ext4_extent *ex, int *err,
653 + ext4_fsblk_t defrag_goal)
655 ext4_fsblk_t goal, newblock;
657 - goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
658 + if (defrag_goal) {
659 + goal = defrag_goal;
660 + } else {
661 + goal = ext4_ext_find_goal(inode, path,
662 + le32_to_cpu(ex->ee_block));
664 newblock = ext4_new_block(handle, inode, goal, err);
665 return newblock;
667 @@ -632,7 +638,8 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
669 static int ext4_ext_split(handle_t *handle, struct inode *inode,
670 struct ext4_ext_path *path,
671 - struct ext4_extent *newext, int at)
672 + struct ext4_extent *newext, int at,
673 + ext4_fsblk_t defrag_goal)
675 struct buffer_head *bh = NULL;
676 int depth = ext_depth(inode);
677 @@ -682,7 +689,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
678 /* allocate all needed blocks */
679 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
680 for (a = 0; a < depth - at; a++) {
681 - newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
682 + newblock = ext4_ext_new_block(handle, inode, path,
683 + newext, &err, defrag_goal);
684 if (newblock == 0)
685 goto cleanup;
686 ablocks[a] = newblock;
687 @@ -871,7 +879,8 @@ cleanup:
689 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
690 struct ext4_ext_path *path,
691 - struct ext4_extent *newext)
692 + struct ext4_extent *newext,
693 + ext4_fsblk_t defrag_goal)
695 struct ext4_ext_path *curp = path;
696 struct ext4_extent_header *neh;
697 @@ -880,7 +889,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
698 ext4_fsblk_t newblock;
699 int err = 0;
701 - newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
702 + newblock = ext4_ext_new_block(handle, inode, path,
703 + newext, &err, defrag_goal);
704 if (newblock == 0)
705 return err;
707 @@ -956,7 +966,8 @@ out:
709 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
710 struct ext4_ext_path *path,
711 - struct ext4_extent *newext)
712 + struct ext4_extent *newext,
713 + ext4_fsblk_t defrag_goal)
715 struct ext4_ext_path *curp;
716 int depth, i, err = 0;
717 @@ -976,7 +987,8 @@ repeat:
718 if (EXT_HAS_FREE_INDEX(curp)) {
719 /* if we found index with free entry, then use that
720 * entry: create all needed subtree and add new leaf */
721 - err = ext4_ext_split(handle, inode, path, newext, i);
722 + err = ext4_ext_split(handle, inode, path,
723 + newext, i, defrag_goal);
725 /* refill path */
726 ext4_ext_drop_refs(path);
727 @@ -987,7 +999,8 @@ repeat:
728 err = PTR_ERR(path);
729 } else {
730 /* tree is full, time to grow in depth */
731 - err = ext4_ext_grow_indepth(handle, inode, path, newext);
732 + err = ext4_ext_grow_indepth(handle, inode, path,
733 + newext, defrag_goal);
734 if (err)
735 goto out;
737 @@ -1433,6 +1446,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
738 struct ext4_ext_path *path,
739 struct ext4_extent *newext)
741 + return ext4_ext_insert_extent_defrag(handle, inode, path, newext, 0);
745 + * ext4_ext_insert_extent_defrag:
746 + * The difference from ext4_ext_insert_extent is to use the first block
747 + * in newext as the goal of the new index block.
748 + */
749 +int
750 +ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
751 + struct ext4_ext_path *path,
752 + struct ext4_extent *newext, int defrag)
754 struct ext4_extent_header * eh;
755 struct ext4_extent *ex, *fex;
756 struct ext4_extent *nearex; /* nearest extent */
757 @@ -1440,6 +1466,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
758 int depth, len, err;
759 ext4_lblk_t next;
760 unsigned uninitialized = 0;
761 + ext4_fsblk_t defrag_goal;
763 BUG_ON(ext4_ext_get_actual_len(newext) == 0);
764 depth = ext_depth(inode);
765 @@ -1500,11 +1527,17 @@ repeat:
766 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
769 + if (defrag) {
770 + defrag_goal = ext_pblock(newext);
771 + } else {
772 + defrag_goal = 0;
775 * There is no free space in the found leaf.
776 * We're gonna add a new leaf in the tree.
778 - err = ext4_ext_create_new_leaf(handle, inode, path, newext);
779 + err = ext4_ext_create_new_leaf(handle, inode, path,
780 + newext, defrag_goal);
781 if (err)
782 goto cleanup;
783 depth = ext_depth(inode);