Further updates of Documentation/filesystem/ext4.txt
[ext4-patch-queue.git] / ext4-online-defrag-move-victim-files.patch
blobb0061bdf06bb49a7a106bb514fa4a3a4cf3b42ba
1 ext4: online defrag-- Move victim files for the target file (-f mode)
3 From: Akira Fujita <a-fujita@rs.jp.nec.com>
5 Move victim files to make sufficient space and reallocates
6 the contiguous blocks for the target file.
8 Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
9 Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
10 ---
11 fs/ext4/balloc.c | 10 -
12 fs/ext4/defrag.c | 460 +++++++++++++++++++++++++++++++++++++++++++++----
13 fs/ext4/ext4.h | 29 ++-
14 fs/ext4/ext4_extents.h | 5
15 fs/ext4/extents.c | 53 ++++-
16 fs/ext4/ioctl.c | 5
17 fs/ext4/mballoc.c | 5
18 fs/ext4/mballoc.h | 1
19 8 files changed, 521 insertions(+), 47 deletions(-)
21 Index: linux-2.6.26-rc6/fs/ext4/balloc.c
22 ===================================================================
23 --- linux-2.6.26-rc6.orig/fs/ext4/balloc.c 2008-06-17 10:43:43.000000000 -0700
24 +++ linux-2.6.26-rc6/fs/ext4/balloc.c 2008-06-17 10:43:44.000000000 -0700
25 @@ -428,7 +428,7 @@ restart:
26 * If the goal block is within the reservation window, return 1;
27 * otherwise, return 0;
29 -static int
30 +int
31 goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
32 ext4_group_t group, struct super_block *sb)
34 @@ -533,7 +533,7 @@ void ext4_rsv_window_add(struct super_bl
35 * from the filesystem reservation window rb tree. Must be called with
36 * rsv_lock hold.
38 -static void rsv_window_remove(struct super_block *sb,
39 +void rsv_window_remove(struct super_block *sb,
40 struct ext4_reserve_window_node *rsv)
42 rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
43 @@ -548,7 +548,7 @@ static void rsv_window_remove(struct sup
45 * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
47 -static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
48 +inline int rsv_is_empty(struct ext4_reserve_window *rsv)
50 /* a valid reservation end block could not be 0 */
51 return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
52 @@ -1284,7 +1284,7 @@ static int find_next_reservable_window(
53 * @bitmap_bh: the block group block bitmap
56 -static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
57 +int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
58 ext4_grpblk_t grp_goal, struct super_block *sb,
59 ext4_group_t group, struct buffer_head *bitmap_bh)
61 @@ -1428,7 +1428,7 @@ retry:
62 * expand the reservation window size if necessary on a best-effort
63 * basis before ext4_new_blocks() tries to allocate blocks,
65 -static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
66 +void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
67 struct super_block *sb, int size)
69 struct ext4_reserve_window_node *next_rsv;
70 Index: linux-2.6.26-rc6/fs/ext4/defrag.c
71 ===================================================================
72 --- linux-2.6.26-rc6.orig/fs/ext4/defrag.c 2008-06-17 10:43:43.000000000 -0700
73 +++ linux-2.6.26-rc6/fs/ext4/defrag.c 2008-06-17 10:43:44.000000000 -0700
74 @@ -218,6 +218,267 @@ out:
77 /**
78 + * ext4_defrag_reserve_blocks - Reserve blocks for defrag
79 + *
80 + * @org_inode: original inode
81 + * @goal: the goal offset of the block reservation
82 + * @len: blocks count we need to reserve
83 + *
84 + * This function returns 0 if succeed, otherwise returns error value.
85 + */
87 +static int
88 +ext4_defrag_reserve_blocks(struct inode *org_inode, ext4_fsblk_t goal, int len)
90 + struct super_block *sb = NULL;
91 + handle_t *handle;
92 + struct buffer_head *bitmap_bh = NULL;
93 + struct ext4_block_alloc_info *block_i;
94 + struct ext4_reserve_window_node *my_rsv = NULL;
95 + unsigned short windowsz = 0;
96 + ext4_group_t group_no;
97 + ext4_grpblk_t grp_target_blk;
98 + int err = 0;
100 + down_write(&EXT4_I(org_inode)->i_data_sem);
102 + handle = ext4_journal_start(org_inode, EXT4_RESERVE_TRANS_BLOCKS);
103 + if (IS_ERR(handle)) {
104 + err = PTR_ERR(handle);
105 + handle = NULL;
106 + goto out;
109 + if (S_ISREG(org_inode->i_mode) &&
110 + !EXT4_I(org_inode)->i_block_alloc_info) {
111 + ext4_init_block_alloc_info(org_inode);
112 + } else if (!S_ISREG(org_inode->i_mode)) {
113 + printk(KERN_ERR "ext4 defrag: Invalid file type\n");
114 + err = -EINVAL;
115 + goto out;
118 + sb = org_inode->i_sb;
119 + if (!sb) {
120 + printk(KERN_ERR "ext4 defrag: Non-existent device\n");
121 + err = -ENXIO;
122 + goto out;
124 + ext4_get_group_no_and_offset(sb, goal, &group_no,
125 + &grp_target_blk);
127 + block_i = EXT4_I(org_inode)->i_block_alloc_info;
128 + /* Block reservation should be enabled */
129 + BUG_ON(!block_i);
131 + windowsz = block_i->rsv_window_node.rsv_goal_size;
132 + /* Goal size should be set */
133 + BUG_ON(!windowsz);
135 + my_rsv = &block_i->rsv_window_node;
137 + bitmap_bh = ext4_read_block_bitmap(sb, group_no);
138 + if (!bitmap_bh) {
139 + err = -ENOSPC;
140 + goto out;
143 + BUFFER_TRACE(bitmap_bh, "get undo access for new block");
144 + err = ext4_journal_get_undo_access(handle, bitmap_bh);
145 + if (err)
146 + goto out;
148 + err = alloc_new_reservation(my_rsv, grp_target_blk, sb,
149 + group_no, bitmap_bh);
150 + if (err < 0) {
151 + printk(KERN_ERR "ext4 defrag: Block reservation failed."
152 + "offset [%d], bg[%lu]\n", grp_target_blk, group_no);
153 + ext4_discard_reservation(org_inode);
154 + goto out;
155 + } else if (len > EXT4_DEFAULT_RESERVE_BLOCKS) {
156 + try_to_extend_reservation(my_rsv, sb,
157 + len - EXT4_DEFAULT_RESERVE_BLOCKS);
160 +out:
161 + up_write(&EXT4_I(org_inode)->i_data_sem);
162 + ext4_journal_release_buffer(handle, bitmap_bh);
163 + brelse(bitmap_bh);
165 + if (handle)
166 + ext4_journal_stop(handle);
168 + return err;
171 +/**
172 + * ext4_defrag_block_within_rsv - Is target extent reserved ?
174 + * @org_inode: original inode
175 + * @ex_start: physical block offset of the extent which already moved
176 + * @ex_len: block length of the extent
178 + * This function returns 0 if succeed, otherwise returns error value.
179 + */
180 +static int
181 +ext4_defrag_block_within_rsv(struct inode *org_inode, ext4_fsblk_t ex_start,
182 + int ex_len)
184 + struct super_block *sb = org_inode->i_sb;
185 + struct ext4_block_alloc_info *block_i;
186 + ext4_group_t group_no;
187 + ext4_grpblk_t grp_blk;
188 + struct ext4_reserve_window_node *rsv;
190 + block_i = EXT4_I(org_inode)->i_block_alloc_info;
191 + /* Block reservation should be enabled */
192 + BUG_ON(!block_i);
194 + /* Goal size should be set */
195 + BUG_ON(!block_i->rsv_window_node.rsv_goal_size);
197 + rsv = &block_i->rsv_window_node;
198 + if (rsv_is_empty(&rsv->rsv_window)) {
199 + printk(KERN_ERR "ext4 defrag: Reservation window is empty\n");
200 + return -ENOSPC;
203 + ext4_get_group_no_and_offset(sb, ex_start, &group_no, &grp_blk);
205 + if (!goal_in_my_reservation(&rsv->rsv_window, grp_blk, group_no, sb)
206 + || !goal_in_my_reservation(&rsv->rsv_window,
207 + grp_blk + ex_len - 1, group_no, sb)){
208 + /* Goal blocks are not in the reservation window */
209 + printk(KERN_ERR "ext4 defrag: %d or %d in bg %lu is "
210 + "not in rsv_window\n", grp_blk,
211 + grp_blk + ex_len - 1, group_no);
212 + return -ENOSPC;
214 + return 0;
218 + * ext4_defrag_reserve_fblocks -
219 + * Reserve free blocks with ext4_defrag_reserve_blocks
221 + * @org_inode: original inode to get a block group number
222 + * @ext_info: freeblocks distribution which stored extent-like style
223 + * @ext_info->ext[]: an array of struct ext4_extents_data
225 + * This function returns 0 if succeed, otherwise returns error value.
226 + */
227 +static int
228 +ext4_defrag_reserve_fblocks(struct inode *org_inode,
229 + struct ext4_extents_info *ext_info)
231 + ext4_fsblk_t ex_start = 0;
232 + int i, len, ret;
234 + for (i = 0; i < ext_info->entries; i++) {
235 + ex_start = ext_info->ext[i].start;
236 + len = ext_info->ext[i].len;
238 + ret = ext4_defrag_reserve_blocks(org_inode, ex_start, len);
239 + if (ret < 0) {
240 + printk(KERN_ERR "ext4 defrag: "
241 + "Block reservation failed. offset [%llu], "
242 + "length [%d]\n", ex_start, len);
243 + goto err;
246 + /* Confirm that blocks are in the reservation window */
247 + ret = ext4_defrag_block_within_rsv(org_inode, ex_start, len);
248 + if (ret < 0) {
249 + printk(KERN_ERR "ext4 defrag: "
250 + "Reservation window is not set. "
251 + "offset [%llu], length [%d]\n", ex_start, len);
252 + goto err;
255 + return ret;
257 +err:
258 + down_write(&EXT4_I(org_inode)->i_data_sem);
259 + ext4_discard_reservation(org_inode);
260 + up_write(&EXT4_I(org_inode)->i_data_sem);
261 + return ret;
264 +/**
265 + * ext4_defrag_move_victim - Create free space for defrag
267 + * @target_filp: target file
268 + * @ext_info: target extents array to move
270 + * This function returns 0 if succeed, otherwise
271 + * returns error value.
272 + */
273 +static int
274 +ext4_defrag_move_victim(struct file *target_filp,
275 + struct ext4_extents_info *ext_info)
277 + struct inode *org_inode = target_filp->f_dentry->d_inode;
278 + struct super_block *sb = org_inode->i_sb;
279 + struct file victim_file;
280 + struct dentry victim_dent;
281 + struct inode *victim_inode;
282 + struct ext4_extent_data ext;
283 + ext4_fsblk_t goal = ext_info->goal;
284 + ext4_group_t group;
285 + ext4_grpblk_t grp_off;
286 + int ret, i;
288 + /* Setup dummy extent data */
289 + ext.len = 0;
291 + /* Get the inode of the victim file */
292 + victim_inode = ext4_iget(sb, ext_info->ino);
293 + if (IS_ERR(victim_inode))
294 + return PTR_ERR(victim_inode);
296 + /* Setup file for the victim file */
297 + victim_dent.d_inode = victim_inode;
298 + victim_file.f_dentry = &victim_dent;
299 + victim_file.f_mapping = victim_inode->i_mapping;
301 + /* Set the goal appropriate offset */
302 + if (goal == -1) {
303 + ext4_get_group_no_and_offset(victim_inode->i_sb,
304 + ext_info->ext[0].start, &group, &grp_off);
305 + goal = ext4_group_first_block_no(sb, group + 1);
308 + for (i = 0; i < ext_info->entries; i++) {
309 + /* Move original blocks to another block group */
310 + ret = ext4_defrag(&victim_file, ext_info->ext[i].block,
311 + ext_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
312 + if (ret < 0) {
313 + printk(KERN_ERR "ext4 defrag: "
314 + "Moving victim file failed. ino [%llu]\n",
315 + ext_info->ino);
316 + goto err;
319 + /* Sync journal blocks before reservation */
320 + ret = ext4_force_commit(sb);
321 + if (ret) {
322 + printk(KERN_ERR "ext4 defrag: "
323 + "ext4_force_commit failed(%d)\n", ret);
324 + goto err;
328 + iput(victim_inode);
329 + return 0;
330 +err:
331 + down_write(&EXT4_I(org_inode)->i_data_sem);
332 + ext4_discard_reservation(org_inode);
333 + up_write(&EXT4_I(org_inode)->i_data_sem);
334 + iput(victim_inode);
335 + return ret;
338 +/**
339 * ext4_defrag_fblocks_distribution - Search free blocks distribution
341 * @org_inode: original inode
342 @@ -383,6 +644,29 @@ int ext4_defrag_ioctl(struct inode *inod
343 &ext_info, sizeof(ext_info)))
344 return -EFAULT;
346 + } else if (cmd == EXT4_IOC_RESERVE_BLOCK) {
347 + struct ext4_extents_info ext_info;
349 + if (copy_from_user(&ext_info,
350 + (struct ext4_extents_info __user *)arg,
351 + sizeof(ext_info)))
352 + return -EFAULT;
354 + err = ext4_defrag_reserve_fblocks(inode, &ext_info);
355 + } else if (cmd == EXT4_IOC_MOVE_VICTIM) {
356 + struct ext4_extents_info ext_info;
358 + if (copy_from_user(&ext_info,
359 + (struct ext4_extents_info __user *)arg,
360 + sizeof(ext_info)))
361 + return -EFAULT;
363 + err = ext4_defrag_move_victim(filp, &ext_info);
365 + } else if (cmd == EXT4_IOC_BLOCK_RELEASE) {
366 + down_write(&EXT4_I(inode)->i_data_sem);
367 + ext4_discard_reservation(inode);
368 + up_write(&EXT4_I(inode)->i_data_sem);
369 } else if (cmd == EXT4_IOC_DEFRAG) {
370 struct ext4_ext_defrag_data defrag;
371 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
372 @@ -409,7 +693,8 @@ int ext4_defrag_ioctl(struct inode *inod
375 err = ext4_defrag(filp, defrag.start_offset,
376 - defrag.defrag_size, defrag.goal);
377 + defrag.defrag_size, defrag.goal, defrag.flag,
378 + &defrag.ext);
381 return err;
382 @@ -425,6 +710,7 @@ int ext4_defrag_ioctl(struct inode *inod
383 * @start_ext: first new extent to be merged
384 * @new_ext: middle of new extent to be merged
385 * @end_ext: last new extent to be merged
386 + * @phase: phase of the force defrag mode
388 * This function returns 0 if succeed, otherwise returns error value.
390 @@ -432,14 +718,20 @@ static int
391 ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode,
392 struct ext4_extent *o_start, struct ext4_extent *o_end,
393 struct ext4_extent *start_ext, struct ext4_extent *new_ext,
394 - struct ext4_extent *end_ext)
395 + struct ext4_extent *end_ext, int phase)
397 struct ext4_ext_path *org_path = NULL;
398 ext4_lblk_t eblock = 0;
399 int new_flag = 0;
400 int end_flag = 0;
401 + int defrag_flag;
402 int err;
404 + if (phase == DEFRAG_FORCE_VICTIM)
405 + defrag_flag = 1;
406 + else
407 + defrag_flag = 0;
409 if (le16_to_cpu(start_ext->ee_len) &&
410 le16_to_cpu(new_ext->ee_len) &&
411 le16_to_cpu(end_ext->ee_len)) {
412 @@ -516,8 +808,8 @@ ext4_defrag_merge_across_blocks(handle_t
413 org_path = NULL;
414 goto out;
416 - err = ext4_ext_insert_extent(handle, org_inode,
417 - org_path, new_ext);
418 + err = ext4_ext_insert_extent_defrag(handle, org_inode,
419 + org_path, new_ext, defrag_flag);
420 if (err)
421 goto out;
423 @@ -530,8 +822,8 @@ ext4_defrag_merge_across_blocks(handle_t
424 org_path = NULL;
425 goto out;
427 - err = ext4_ext_insert_extent(handle, org_inode,
428 - org_path, end_ext);
429 + err = ext4_ext_insert_extent_defrag(handle, org_inode,
430 + org_path, end_ext, defrag_flag);
431 if (err)
432 goto out;
434 @@ -609,6 +901,7 @@ ext4_defrag_merge_inside_block(struct ex
435 * @new_ext: middle of new extent to be merged
436 * @end_ext: last new extent to be merged
437 * @replaced: the number of blocks which will be replaced with new_ext
438 + * @phase: phase of the force defrag mode
440 * This function returns 0 if succeed, otherwise returns error value.
442 @@ -617,7 +910,7 @@ ext4_defrag_merge_extents(handle_t *hand
443 struct ext4_ext_path *org_path,
444 struct ext4_extent *o_start, struct ext4_extent *o_end,
445 struct ext4_extent *start_ext, struct ext4_extent *new_ext,
446 - struct ext4_extent *end_ext, ext4_fsblk_t replaced)
447 + struct ext4_extent *end_ext, ext4_fsblk_t replaced, int phase)
449 struct ext4_extent_header *eh;
450 unsigned need_slots, slots_range;
451 @@ -655,7 +948,7 @@ ext4_defrag_merge_extents(handle_t *hand
453 ret = ext4_defrag_merge_across_blocks(handle, org_inode,
454 o_start, o_end, start_ext, new_ext,
455 - end_ext);
456 + end_ext, phase);
457 if (ret < 0)
458 return ret;
459 } else {
460 @@ -688,13 +981,14 @@ ext4_defrag_merge_extents(handle_t *hand
461 * @org_path: path indicates first extent to be defraged
462 * @dext: destination extent
463 * @from: start offset on the target file
464 + * @phase: phase of the force defrag mode
466 * This function returns 0 if succeed, otherwise returns error value.
468 static int
469 ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
470 struct ext4_ext_path *org_path, struct ext4_extent *dext,
471 - ext4_lblk_t *from)
472 + ext4_lblk_t *from, int phase)
474 struct ext4_extent *oext, *o_start = NULL, *o_end = NULL, *prev_ext;
475 struct ext4_extent new_ext, start_ext, end_ext;
476 @@ -795,7 +1089,7 @@ ext4_defrag_leaf_block(handle_t *handle,
477 + le16_to_cpu(oext->ee_len) - 1) {
478 ret = ext4_defrag_merge_extents(handle, org_inode,
479 org_path, o_start, o_end, &start_ext,
480 - &new_ext, &end_ext, replaced);
481 + &new_ext, &end_ext, replaced, phase);
482 if (ret < 0)
483 return ret;
485 @@ -847,6 +1141,7 @@ ext4_defrag_leaf_block(handle_t *handle,
486 * @from_page: page offset of org_inode
487 * @dest_from_page: page offset of dest_inode
488 * @count_page: page count to be replaced
489 + * @phase: phase of the force defrag mode
491 * This function returns 0 if succeed, otherwise returns error value.
492 * Replace extents for blocks from "from" to "from + count - 1".
493 @@ -854,7 +1149,7 @@ ext4_defrag_leaf_block(handle_t *handle,
494 static int
495 ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
496 struct inode *dest_inode, pgoff_t from_page,
497 - pgoff_t dest_from_page, pgoff_t count_page)
498 + pgoff_t dest_from_page, pgoff_t count_page, int phase)
500 struct ext4_ext_path *org_path = NULL;
501 struct ext4_ext_path *dest_path = NULL;
502 @@ -922,7 +1217,7 @@ ext4_defrag_replace_branches(handle_t *h
504 /* Loop for the original extent blocks */
505 err = ext4_defrag_leaf_block(handle, org_inode,
506 - org_path, dext, &from);
507 + org_path, dext, &from, phase);
508 if (err < 0)
509 goto out;
511 @@ -932,7 +1227,7 @@ ext4_defrag_replace_branches(handle_t *h
512 * e.g. ext4_defrag_merge_extents()
514 err = ext4_defrag_leaf_block(handle, dest_inode,
515 - dest_path, swap_ext, &dest_off);
516 + dest_path, swap_ext, &dest_off, -1);
517 if (err < 0)
518 goto out;
520 @@ -1028,6 +1323,7 @@ out:
521 * @req_blocks: contiguous blocks count we need
522 * @iblock: target file offset
523 * @goal: goal offset
524 + * @phase: phase of the force defrag mode
527 static void
528 @@ -1036,8 +1332,22 @@ ext4_defrag_fill_ar(struct inode *org_in
529 struct ext4_ext_path *org_path,
530 struct ext4_ext_path *dest_path,
531 ext4_fsblk_t req_blocks, ext4_lblk_t iblock,
532 - ext4_fsblk_t goal)
533 + ext4_fsblk_t goal, int phase)
535 + ext4_group_t org_grp_no;
536 + ext4_grpblk_t org_blk_off;
537 + int org_depth = ext_depth(org_inode);
539 + if (phase == DEFRAG_FORCE_VICTIM) {
540 + ext4_get_group_no_and_offset(org_inode->i_sb,
541 + ext_pblock(org_path[org_depth].p_ext),
542 + &org_grp_no, &org_blk_off);
543 + ar->excepted_group = org_grp_no;
544 + } else {
545 + /* Allocate contiguous blocks to any block group */
546 + ar->excepted_group = -1;
549 ar->inode = dest_inode;
550 ar->len = req_blocks;
551 ar->logical = iblock;
552 @@ -1101,19 +1411,70 @@ ext4_defrag_alloc_blocks(handle_t *handl
556 + * ext4_defrag_check_phase
557 + * - Check condition of the allocated blocks (only force defrag mode)
559 + * @ar: allocation request for multiple block allocation
560 + * @dest_grp_no: block group num of the allocated blocks
561 + * @goal_grp_no: block group num of the destination of block allocation
562 + * @alloc_total: sum total of the allocated blocks
563 + * @req_blocks: contiguous blocks count we need
564 + * @phase: phase of the force defrag mode
566 + * This function returns 0 if succeed, otherwise returns error value.
567 + */
568 +static int
569 +ext4_defrag_check_phase(struct ext4_allocation_request *ar,
570 + ext4_group_t dest_grp_no, ext4_group_t goal_grp_no,
571 + ext4_fsblk_t alloc_total, ext4_lblk_t req_blocks,
572 + int phase)
574 + int err = 0;
576 + switch (phase) {
577 + case DEFRAG_FORCE_TRY:
578 + /* If there is not enough space, return -ENOSPC. */
579 + if (ar->len != req_blocks)
580 + /* -ENOSPC triggers DEFRAG_FORCE_VICTIM phase. */
581 + err = -ENOSPC;
582 + break;
583 + case DEFRAG_FORCE_VICTIM:
584 + /* We can't allocate new blocks in the same block group. */
585 + if (dest_grp_no == ar->excepted_group) {
586 + printk(KERN_ERR "ext4 defrag: Failed to allocate"
587 + " victim file to other block group\n");
588 + err = -ENOSPC;
590 + break;
591 + case DEFRAG_FORCE_GATHER:
592 + /* Maybe reserved blocks are already used by other process. */
593 + if (dest_grp_no != goal_grp_no
594 + || alloc_total != req_blocks) {
595 + printk(KERN_ERR "ext4 defrag: Reserved blocks are"
596 + " already used by other process\n");
597 + err = -EIO;
599 + break;
602 + return err;
605 +/**
606 * ext4_defrag_partial - Defrag a file per page
608 * @tmp_inode: temporary inode
609 * @filp: pointer to file
610 * @org_offset: page index on original file
611 * @dest_offset: page index on temporary file
612 + * @phase: phase of the force defrag mode
615 * This function returns 0 if succeed, otherwise returns error value.
617 static int
618 ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
619 - pgoff_t org_offset, pgoff_t dest_offset)
620 + pgoff_t org_offset, pgoff_t dest_offset, int phase)
622 struct inode *org_inode = filp->f_dentry->d_inode;
623 struct address_space *mapping = org_inode->i_mapping;
624 @@ -1180,7 +1541,7 @@ ext4_defrag_partial(struct inode *tmp_in
625 /* Release old bh and drop refs */
626 try_to_release_page(page, 0);
627 ret = ext4_defrag_replace_branches(handle, org_inode, tmp_inode,
628 - org_offset, dest_offset, 1);
629 + org_offset, dest_offset, 1, phase);
631 if (ret < 0)
632 goto out;
633 @@ -1227,6 +1588,7 @@ out:
634 * @tar_end: the last block number of the allocated blocks
635 * @sum_tmp: the extents count in the allocated blocks
636 * @goal: block offset for allocaton
637 + * @phase: phase of the force defrag mode
640 * This function returns the values as below.
641 @@ -1237,7 +1599,7 @@ out:
642 static int
643 ext4_defrag_comp_ext_count(struct inode *org_inode,
644 struct ext4_ext_path *org_path, ext4_lblk_t tar_end,
645 - int sum_tmp, ext4_fsblk_t goal)
646 + int sum_tmp, ext4_fsblk_t goal, int phase)
648 struct ext4_extent *ext = NULL;
649 int depth = ext_depth(org_inode);
650 @@ -1264,7 +1626,8 @@ ext4_defrag_comp_ext_count(struct inode
651 if (sum_org == sum_tmp && !goal) {
652 /* Not improved */
653 ret = 1;
654 - } else if (sum_org < sum_tmp) {
655 + } else if (sum_org < sum_tmp &&
656 + phase != DEFRAG_FORCE_VICTIM) {
657 /* Fragment increased */
658 ret = -ENOSPC;
659 printk(KERN_ERR "ext4 defrag: "
660 @@ -1293,6 +1656,7 @@ ext4_defrag_comp_ext_count(struct inode
661 * @tar_blocks: the number of blocks to allocate
662 * @iblock: file related offset
663 * @goal: block offset for allocaton
664 + * @phase: phase of the force defrag mode
667 * This function returns the value as below:
668 @@ -1304,7 +1668,7 @@ static int
669 ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
670 struct ext4_ext_path *org_path, ext4_lblk_t tar_start,
671 ext4_lblk_t tar_blocks, ext4_lblk_t iblock,
672 - ext4_fsblk_t goal)
673 + ext4_fsblk_t goal, int phase)
675 handle_t *handle;
676 struct ext4_extent_header *eh = NULL;
677 @@ -1314,6 +1678,8 @@ ext4_defrag_new_extent_tree(struct inode
678 ext4_fsblk_t alloc_total = 0;
679 ext4_fsblk_t newblock = 0;
680 ext4_lblk_t tar_end = tar_start + tar_blocks - 1;
681 + ext4_group_t dest_group_no, goal_group_no;
682 + ext4_grpblk_t dest_blk_off, goal_blk_off;
683 int sum_tmp = 0;
684 int metadata = 1;
685 int ret, ret2;
686 @@ -1330,7 +1696,7 @@ ext4_defrag_new_extent_tree(struct inode
688 /* Fill struct ext4_allocation_request with necessary info */
689 ext4_defrag_fill_ar(org_inode, tmp_inode, &ar, org_path,
690 - dest_path, tar_blocks, iblock, goal);
691 + dest_path, tar_blocks, iblock, goal, phase);
693 handle = ext4_journal_start(tmp_inode, 0);
694 if (IS_ERR(handle)) {
695 @@ -1338,6 +1704,9 @@ ext4_defrag_new_extent_tree(struct inode
696 goto out2;
699 + ext4_get_group_no_and_offset(tmp_inode->i_sb, goal,
700 + &goal_group_no, &goal_blk_off);
702 while (alloc_total != tar_blocks) {
703 /* Allocate blocks */
704 ret = ext4_defrag_alloc_blocks(handle, org_inode, tmp_inode,
705 @@ -1345,8 +1714,20 @@ ext4_defrag_new_extent_tree(struct inode
706 if (ret < 0)
707 goto out;
709 + ext4_get_group_no_and_offset(tmp_inode->i_sb, newblock,
710 + &dest_group_no, &dest_blk_off);
712 alloc_total += ar.len;
714 + /* the checks that done in force mode */
715 + if (phase) {
716 + ret = ext4_defrag_check_phase(&ar, dest_group_no,
717 + goal_group_no, alloc_total,
718 + tar_blocks, phase);
719 + if (ret < 0)
720 + goto out;
723 newex.ee_block = cpu_to_le32(alloc_total - ar.len);
724 ext4_ext_store_pblock(&newex, newblock);
725 newex.ee_len = cpu_to_le16(ar.len);
726 @@ -1356,13 +1737,14 @@ ext4_defrag_new_extent_tree(struct inode
727 if (ret < 0)
728 goto out;
730 - ar.goal = newblock + ar.len;
731 + if (!phase)
732 + ar.goal = newblock + ar.len;
733 ar.len = tar_blocks - alloc_total;
734 sum_tmp++;
737 ret = ext4_defrag_comp_ext_count(org_inode, org_path, tar_end,
738 - sum_tmp, goal);
739 + sum_tmp, goal, phase);
741 out:
742 if (ret < 0 || ret == 1) {
743 @@ -1393,14 +1775,16 @@ out2:
744 * ext4_defrag_check - Check the enviroment whether a defrag can be done
746 * @org_inode: original inode
747 + * @ext: extent to be moved (only defrag force mode)
748 * @defrag_size: size of defrag in blocks
749 * @goal: poiter to block offset for allocation
750 + * @phase: phase of the force defrag mode
752 * This function returns 0 if succeed, otherwise returns error value.
754 static int
755 -ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size,
756 - ext4_fsblk_t *goal)
757 +ext4_defrag_check(struct inode *org_inode, struct ext4_extent_data *ext,
758 + ext4_lblk_t defrag_size, ext4_fsblk_t *goal, int *phase)
761 /* ext4 online defrag supports only 4KB block size */
762 @@ -1417,6 +1801,17 @@ ext4_defrag_check(struct inode *org_inod
763 return -EOPNOTSUPP;
766 + if (ext->len) {
767 + /* Setup for the force defrag mode */
768 + if (ext->len < defrag_size) {
769 + printk(KERN_ERR "ext4 defrag: "
770 + "Invalid length of extent\n");
771 + return -EINVAL;
773 + *phase = DEFRAG_FORCE_GATHER;
774 + *goal = ext->start;
777 return 0;
780 @@ -1495,13 +1890,16 @@ out:
781 * @block_start: starting offset to defrag in blocks
782 * @defrag_size: size of defrag in blocks
783 * @goal: block offset for allocation
784 + * @phase: phase of the force defrag mode
785 + * @ext: extent to be moved (only defrag force mode)
787 * This function returns the number of blocks if succeed, otherwise
788 * returns error value.
791 ext4_defrag(struct file *filp, ext4_lblk_t block_start,
792 - ext4_lblk_t defrag_size, ext4_fsblk_t goal)
793 + ext4_lblk_t defrag_size, ext4_fsblk_t goal, int phase,
794 + struct ext4_extent_data *ext)
796 struct inode *org_inode = filp->f_dentry->d_inode, *tmp_inode = NULL;
797 struct ext4_ext_path *org_path = NULL, *holecheck_path = NULL;
798 @@ -1511,7 +1909,7 @@ ext4_defrag(struct file *filp, ext4_lblk
799 int ret, depth, seq_extents, last_extent = 0;
801 /* Check the filesystem enviroment whether defrag can be done */
802 - ret = ext4_defrag_check(org_inode, defrag_size, &goal);
803 + ret = ext4_defrag_check(org_inode, ext, defrag_size, &goal, &phase);
804 if (ret < 0)
805 return ret;
807 @@ -1627,11 +2025,11 @@ ext4_defrag(struct file *filp, ext4_lblk
809 ret = ext4_defrag_new_extent_tree(org_inode, tmp_inode,
810 org_path, seq_start, seq_blocks,
811 - block_start, goal);
812 + block_start, goal, phase);
814 if (ret < 0) {
815 break;
816 - } else if (ret == 1) {
817 + } else if (ret == 1 && (!goal || (goal && !phase))) {
818 ret = 0;
819 seq_start = le32_to_cpu(ext_cur->ee_block);
820 goto CLEANUP;
821 @@ -1655,7 +2053,7 @@ ext4_defrag(struct file *filp, ext4_lblk
822 while (page_offset <= seq_end_page) {
823 /* Swap original branches with new branches */
824 ret = ext4_defrag_partial(tmp_inode, filp,
825 - page_offset, dest_offset);
826 + page_offset, dest_offset, phase);
827 if (ret < 0)
828 goto out;
830 @@ -1708,6 +2106,10 @@ out:
831 kfree(holecheck_path);
834 + if (phase == DEFRAG_FORCE_GATHER)
835 + /* Release reserved block in force mode */
836 + ext4_discard_reservation(org_inode);
838 up_write(&EXT4_I(org_inode)->i_data_sem);
839 mutex_unlock(&org_inode->i_mutex);
841 Index: linux-2.6.26-rc6/fs/ext4/ext4.h
842 ===================================================================
843 --- linux-2.6.26-rc6.orig/fs/ext4/ext4.h 2008-06-17 10:43:43.000000000 -0700
844 +++ linux-2.6.26-rc6/fs/ext4/ext4.h 2008-06-17 10:43:44.000000000 -0700
845 @@ -97,6 +97,11 @@ struct ext4_allocation_request {
846 unsigned long len;
847 /* flags. see above EXT4_MB_HINT_* */
848 unsigned long flags;
849 + /*
850 + * for ext4 online defrag:
851 + * the block group which is excepted from allocation target
852 + */
853 + long long excepted_group;
857 @@ -306,6 +311,9 @@ struct ext4_new_group_data {
858 #define EXT4_IOC_GROUP_INFO _IOW('f', 11, struct ext4_group_data_info)
859 #define EXT4_IOC_FREE_BLOCKS_INFO _IOW('f', 12, struct ext4_extents_info)
860 #define EXT4_IOC_EXTENTS_INFO _IOW('f', 13, struct ext4_extents_info)
861 +#define EXT4_IOC_RESERVE_BLOCK _IOW('f', 14, struct ext4_extents_info)
862 +#define EXT4_IOC_MOVE_VICTIM _IOW('f', 15, struct ext4_extents_info)
863 +#define EXT4_IOC_BLOCK_RELEASE _IO('f', 8)
866 * ioctl commands in 32 bit emulation
867 @@ -334,8 +342,15 @@ struct ext4_new_group_data {
869 * DEFRAG_MAX_ENT: the maximum number of extents for exchanging between
870 * kernel-space and user-space per an ioctl
871 + * DEFRAG_FORCE_TRY: check whether we have free space fragmentation or not
872 + * DEFRAG_FORCE_VICTIM: move victim extents to make sufficient space
873 + * DEFRAG_FORCE_GATHER: move the target file into the free space made in the
874 + * DEFRAG_FORCE_VICTIM phase
876 #define DEFRAG_MAX_ENT 32
877 +#define DEFRAG_FORCE_TRY 1
878 +#define DEFRAG_FORCE_VICTIM 2
879 +#define DEFRAG_FORCE_GATHER 3
881 struct ext4_extent_data {
882 ext4_lblk_t block; /* start logical block number */
883 @@ -347,6 +362,8 @@ struct ext4_ext_defrag_data {
884 ext4_lblk_t start_offset; /* start offset to defrag in blocks */
885 ext4_lblk_t defrag_size; /* size of defrag in blocks */
886 ext4_fsblk_t goal; /* block offset for allocation */
887 + int flag; /* free space mode flag */
888 + struct ext4_extent_data ext;
891 struct ext4_group_data_info {
892 @@ -1045,8 +1062,17 @@ extern struct ext4_group_desc * ext4_get
893 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
894 extern void ext4_init_block_alloc_info(struct inode *);
895 extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
896 +extern void try_to_extend_reservation(struct ext4_reserve_window_node *,
897 + struct super_block *, int);
898 +extern int alloc_new_reservation(struct ext4_reserve_window_node *,
899 + ext4_grpblk_t, struct super_block *,
900 + ext4_group_t, struct buffer_head *);
901 extern ext4_grpblk_t bitmap_search_next_usable_block(ext4_grpblk_t,
902 struct buffer_head *, ext4_grpblk_t);
903 +extern int rsv_is_empty(struct ext4_reserve_window *rsv);
904 +extern int goal_in_my_reservation(struct ext4_reserve_window *rsv,
905 + ext4_grpblk_t grp_goal, ext4_group_t group,
906 + struct super_block *sb);
908 /* dir.c */
909 extern int ext4_check_dir_entry(const char *, struct inode *,
910 @@ -1173,7 +1199,8 @@ extern void ext4_inode_table_set(struct
911 extern int ext4_ext_journal_restart(handle_t *handle, int needed);
912 /* defrag.c */
913 extern int ext4_defrag(struct file *filp, ext4_lblk_t block_start,
914 - ext4_lblk_t defrag_size, ext4_fsblk_t goal);
915 + ext4_lblk_t defrag_size, ext4_fsblk_t goal,
916 + int flag, struct ext4_extent_data *ext);
917 extern int ext4_defrag_ioctl(struct inode *, struct file *, unsigned int,
918 unsigned long);
920 Index: linux-2.6.26-rc6/fs/ext4/ext4_extents.h
921 ===================================================================
922 --- linux-2.6.26-rc6.orig/fs/ext4/ext4_extents.h 2008-06-17 10:43:42.000000000 -0700
923 +++ linux-2.6.26-rc6/fs/ext4/ext4_extents.h 2008-06-17 10:43:44.000000000 -0700
924 @@ -234,5 +234,10 @@ extern void ext4_ext_drop_refs(struct ex
925 extern ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
926 struct ext4_ext_path *path,
927 ext4_lblk_t block);
928 +extern int ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
929 + struct ext4_ext_path *path,
930 + struct ext4_extent *newext, int defrag);
931 +extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
933 #endif /* _EXT4_EXTENTS */
935 Index: linux-2.6.26-rc6/fs/ext4/extents.c
936 ===================================================================
937 --- linux-2.6.26-rc6.orig/fs/ext4/extents.c 2008-06-17 10:43:42.000000000 -0700
938 +++ linux-2.6.26-rc6/fs/ext4/extents.c 2008-06-17 10:43:44.000000000 -0700
939 @@ -185,11 +185,17 @@ ext4_fsblk_t ext4_ext_find_goal(struct i
940 static ext4_fsblk_t
941 ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
942 struct ext4_ext_path *path,
943 - struct ext4_extent *ex, int *err)
944 + struct ext4_extent *ex, int *err,
945 + ext4_fsblk_t defrag_goal)
947 ext4_fsblk_t goal, newblock;
949 - goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
950 + if (defrag_goal)
951 + goal = defrag_goal;
952 + else
953 + goal = ext4_ext_find_goal(inode, path,
954 + le32_to_cpu(ex->ee_block));
956 newblock = ext4_new_meta_block(handle, inode, goal, err);
957 return newblock;
959 @@ -673,7 +679,8 @@ static int ext4_ext_insert_index(handle_
961 static int ext4_ext_split(handle_t *handle, struct inode *inode,
962 struct ext4_ext_path *path,
963 - struct ext4_extent *newext, int at)
964 + struct ext4_extent *newext, int at,
965 + ext4_fsblk_t defrag_goal)
967 struct buffer_head *bh = NULL;
968 int depth = ext_depth(inode);
969 @@ -724,7 +731,7 @@ static int ext4_ext_split(handle_t *hand
970 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
971 for (a = 0; a < depth - at; a++) {
972 newblock = ext4_ext_new_meta_block(handle, inode, path,
973 - newext, &err);
974 + newext, &err, defrag_goal);
975 if (newblock == 0)
976 goto cleanup;
977 ablocks[a] = newblock;
978 @@ -911,7 +918,8 @@ cleanup:
980 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
981 struct ext4_ext_path *path,
982 - struct ext4_extent *newext)
983 + struct ext4_extent *newext,
984 + ext4_fsblk_t defrag_goal)
986 struct ext4_ext_path *curp = path;
987 struct ext4_extent_header *neh;
988 @@ -920,7 +928,8 @@ static int ext4_ext_grow_indepth(handle_
989 ext4_fsblk_t newblock;
990 int err = 0;
992 - newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err);
993 + newblock = ext4_ext_new_meta_block(handle, inode, path,
994 + newext, &err, defrag_goal);
995 if (newblock == 0)
996 return err;
998 @@ -996,7 +1005,8 @@ out:
1000 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
1001 struct ext4_ext_path *path,
1002 - struct ext4_extent *newext)
1003 + struct ext4_extent *newext,
1004 + ext4_fsblk_t defrag_goal)
1006 struct ext4_ext_path *curp;
1007 int depth, i, err = 0;
1008 @@ -1016,7 +1026,8 @@ repeat:
1009 if (EXT_HAS_FREE_INDEX(curp)) {
1010 /* if we found index with free entry, then use that
1011 * entry: create all needed subtree and add new leaf */
1012 - err = ext4_ext_split(handle, inode, path, newext, i);
1013 + err = ext4_ext_split(handle, inode, path, newext, i,
1014 + defrag_goal);
1015 if (err)
1016 goto out;
1018 @@ -1029,7 +1040,8 @@ repeat:
1019 err = PTR_ERR(path);
1020 } else {
1021 /* tree is full, time to grow in depth */
1022 - err = ext4_ext_grow_indepth(handle, inode, path, newext);
1023 + err = ext4_ext_grow_indepth(handle, inode, path,
1024 + newext, defrag_goal);
1025 if (err)
1026 goto out;
1028 @@ -1209,7 +1221,7 @@ ext4_ext_search_right(struct inode *inod
1029 * allocated block. Thus, index entries have to be consistent
1030 * with leaves.
1032 -static ext4_lblk_t
1033 +ext4_lblk_t
1034 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1036 int depth;
1037 @@ -1475,6 +1487,19 @@ int ext4_ext_insert_extent(handle_t *han
1038 struct ext4_ext_path *path,
1039 struct ext4_extent *newext)
1041 + return ext4_ext_insert_extent_defrag(handle, inode, path, newext, 0);
1045 + * ext4_ext_insert_extent_defrag:
1046 + * The difference from ext4_ext_insert_extent is to use the first block
1047 + * in newext as the goal of the new index block.
1048 + */
1049 +int
1050 +ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
1051 + struct ext4_ext_path *path,
1052 + struct ext4_extent *newext, int defrag)
1054 struct ext4_extent_header * eh;
1055 struct ext4_extent *ex, *fex;
1056 struct ext4_extent *nearex; /* nearest extent */
1057 @@ -1482,6 +1507,7 @@ int ext4_ext_insert_extent(handle_t *han
1058 int depth, len, err;
1059 ext4_lblk_t next;
1060 unsigned uninitialized = 0;
1061 + ext4_fsblk_t defrag_goal;
1063 BUG_ON(ext4_ext_get_actual_len(newext) == 0);
1064 depth = ext_depth(inode);
1065 @@ -1542,11 +1568,16 @@ repeat:
1066 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
1069 + if (defrag)
1070 + defrag_goal = ext_pblock(newext);
1071 + else
1072 + defrag_goal = 0;
1074 * There is no free space in the found leaf.
1075 * We're gonna add a new leaf in the tree.
1077 - err = ext4_ext_create_new_leaf(handle, inode, path, newext);
1078 + err = ext4_ext_create_new_leaf(handle, inode, path,
1079 + newext, defrag_goal);
1080 if (err)
1081 goto cleanup;
1082 depth = ext_depth(inode);
1083 Index: linux-2.6.26-rc6/fs/ext4/ioctl.c
1084 ===================================================================
1085 --- linux-2.6.26-rc6.orig/fs/ext4/ioctl.c 2008-06-17 10:43:43.000000000 -0700
1086 +++ linux-2.6.26-rc6/fs/ext4/ioctl.c 2008-06-17 10:43:44.000000000 -0700
1087 @@ -245,7 +245,10 @@ setversion_out:
1088 case EXT4_IOC_DEFRAG:
1089 case EXT4_IOC_GROUP_INFO:
1090 case EXT4_IOC_FREE_BLOCKS_INFO:
1091 - case EXT4_IOC_EXTENTS_INFO: {
1092 + case EXT4_IOC_EXTENTS_INFO:
1093 + case EXT4_IOC_RESERVE_BLOCK:
1094 + case EXT4_IOC_MOVE_VICTIM:
1095 + case EXT4_IOC_BLOCK_RELEASE: {
1096 return ext4_defrag_ioctl(inode, filp, cmd, arg);
1098 case EXT4_IOC_GROUP_ADD: {
1099 Index: linux-2.6.26-rc6/fs/ext4/mballoc.c
1100 ===================================================================
1101 --- linux-2.6.26-rc6.orig/fs/ext4/mballoc.c 2008-06-17 10:43:39.000000000 -0700
1102 +++ linux-2.6.26-rc6/fs/ext4/mballoc.c 2008-06-17 10:43:44.000000000 -0700
1103 @@ -1766,6 +1766,10 @@ repeat:
1104 if (group == EXT4_SB(sb)->s_groups_count)
1105 group = 0;
1107 + if (ac->ac_excepted_group != -1 &&
1108 + group == ac->ac_excepted_group)
1109 + continue;
1111 /* quick check to skip empty groups */
1112 grp = ext4_get_group_info(ac->ac_sb, group);
1113 if (grp->bb_free == 0)
1114 @@ -3966,6 +3970,7 @@ ext4_mb_initialize_context(struct ext4_a
1115 ac->ac_bitmap_page = NULL;
1116 ac->ac_buddy_page = NULL;
1117 ac->ac_lg = NULL;
1118 + ac->ac_excepted_group = ar->excepted_group;
1120 /* we have to define context: we'll we work with a file or
1121 * locality group. this is a policy, actually */
1122 Index: linux-2.6.26-rc6/fs/ext4/mballoc.h
1123 ===================================================================
1124 --- linux-2.6.26-rc6.orig/fs/ext4/mballoc.h 2008-06-17 10:21:23.000000000 -0700
1125 +++ linux-2.6.26-rc6/fs/ext4/mballoc.h 2008-06-17 10:43:44.000000000 -0700
1126 @@ -205,6 +205,7 @@ struct ext4_allocation_context {
1127 struct page *ac_buddy_page;
1128 struct ext4_prealloc_space *ac_pa;
1129 struct ext4_locality_group *ac_lg;
1130 + long long ac_excepted_group;
1133 #define AC_STATUS_CONTINUE 1