Add ext4-printk-throttling patch
[ext4-patch-queue.git] / ext4-online-defrag-move-victim-files.patch
blob71f4e604844b3c46c8b23d4ab8ee993a0edefa07
1 ext4: online defrag-- Move victim files for the target file (-f mode)
3 From: Akira Fujita <a-fujita@rs.jp.nec.com>
5 Move victim files to make sufficient space and reallocates
6 the contiguous blocks for the target file.
8 Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
9 Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
10 diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
11 index 64ec04c..2344a96 100644
12 --- a/fs/ext4/balloc.c
13 +++ b/fs/ext4/balloc.c
14 @@ -433,7 +433,7 @@ restart:
15 * If the goal block is within the reservation window, return 1;
16 * otherwise, return 0;
18 -static int
19 +int
20 goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
21 ext4_group_t group, struct super_block *sb)
23 @@ -538,7 +538,7 @@ void ext4_rsv_window_add(struct super_block *sb,
24 * from the filesystem reservation window rb tree. Must be called with
25 * rsv_lock hold.
27 -static void rsv_window_remove(struct super_block *sb,
28 +void rsv_window_remove(struct super_block *sb,
29 struct ext4_reserve_window_node *rsv)
31 rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
32 @@ -553,7 +553,7 @@ static void rsv_window_remove(struct super_block *sb,
34 * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
36 -static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
37 +inline int rsv_is_empty(struct ext4_reserve_window *rsv)
39 /* a valid reservation end block could not be 0 */
40 return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
41 @@ -1289,7 +1289,7 @@ static int find_next_reservable_window(
42 * @bitmap_bh: the block group block bitmap
45 -static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
46 +int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
47 ext4_grpblk_t grp_goal, struct super_block *sb,
48 ext4_group_t group, struct buffer_head *bitmap_bh)
50 @@ -1433,7 +1433,7 @@ retry:
51 * expand the reservation window size if necessary on a best-effort
52 * basis before ext4_new_blocks() tries to allocate blocks,
54 -static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
55 +void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
56 struct super_block *sb, int size)
58 struct ext4_reserve_window_node *next_rsv;
59 diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
60 index eee4b6a..2b78c33 100644
61 --- a/fs/ext4/defrag.c
62 +++ b/fs/ext4/defrag.c
63 @@ -218,6 +218,267 @@ out:
66 /**
67 + * ext4_defrag_reserve_blocks - Reserve blocks for defrag
68 + *
69 + * @org_inode: original inode
70 + * @goal: the goal offset of the block reservation
71 + * @len: blocks count we need to reserve
72 + *
73 + * This function returns 0 if succeed, otherwise returns error value.
74 + */
76 +static int
77 +ext4_defrag_reserve_blocks(struct inode *org_inode, ext4_fsblk_t goal, int len)
79 + struct super_block *sb = NULL;
80 + handle_t *handle;
81 + struct buffer_head *bitmap_bh = NULL;
82 + struct ext4_block_alloc_info *block_i;
83 + struct ext4_reserve_window_node *my_rsv = NULL;
84 + unsigned short windowsz = 0;
85 + ext4_group_t group_no;
86 + ext4_grpblk_t grp_target_blk;
87 + int err = 0;
89 + down_write(&EXT4_I(org_inode)->i_data_sem);
91 + handle = ext4_journal_start(org_inode, EXT4_RESERVE_TRANS_BLOCKS);
92 + if (IS_ERR(handle)) {
93 + err = PTR_ERR(handle);
94 + handle = NULL;
95 + goto out;
96 + }
98 + if (S_ISREG(org_inode->i_mode) &&
99 + !EXT4_I(org_inode)->i_block_alloc_info) {
100 + ext4_init_block_alloc_info(org_inode);
101 + } else if (!S_ISREG(org_inode->i_mode)) {
102 + printk(KERN_ERR "ext4 defrag: Invalid file type\n");
103 + err = -EINVAL;
104 + goto out;
107 + sb = org_inode->i_sb;
108 + if (!sb) {
109 + printk(KERN_ERR "ext4 defrag: Non-existent device\n");
110 + err = -ENXIO;
111 + goto out;
113 + ext4_get_group_no_and_offset(sb, goal, &group_no,
114 + &grp_target_blk);
116 + block_i = EXT4_I(org_inode)->i_block_alloc_info;
117 + /* Block reservation should be enabled */
118 + BUG_ON(!block_i);
120 + windowsz = block_i->rsv_window_node.rsv_goal_size;
121 + /* Goal size should be set */
122 + BUG_ON(!windowsz);
124 + my_rsv = &block_i->rsv_window_node;
126 + bitmap_bh = ext4_read_block_bitmap(sb, group_no);
127 + if (!bitmap_bh) {
128 + err = -ENOSPC;
129 + goto out;
132 + BUFFER_TRACE(bitmap_bh, "get undo access for new block");
133 + err = ext4_journal_get_undo_access(handle, bitmap_bh);
134 + if (err)
135 + goto out;
137 + err = alloc_new_reservation(my_rsv, grp_target_blk, sb,
138 + group_no, bitmap_bh);
139 + if (err < 0) {
140 + printk(KERN_ERR "ext4 defrag: Block reservation failed."
141 + "offset [%d], bg[%lu]\n", grp_target_blk, group_no);
142 + ext4_discard_reservation(org_inode);
143 + goto out;
144 + } else if (len > EXT4_DEFAULT_RESERVE_BLOCKS) {
145 + try_to_extend_reservation(my_rsv, sb,
146 + len - EXT4_DEFAULT_RESERVE_BLOCKS);
149 +out:
150 + up_write(&EXT4_I(org_inode)->i_data_sem);
151 + ext4_journal_release_buffer(handle, bitmap_bh);
152 + brelse(bitmap_bh);
154 + if (handle)
155 + ext4_journal_stop(handle);
157 + return err;
160 +/**
161 + * ext4_defrag_block_within_rsv - Is target extent reserved ?
163 + * @org_inode: original inode
164 + * @ex_start: physical block offset of the extent which already moved
165 + * @ex_len: block length of the extent
167 + * This function returns 0 if succeed, otherwise returns error value.
168 + */
169 +static int
170 +ext4_defrag_block_within_rsv(struct inode *org_inode, ext4_fsblk_t ex_start,
171 + int ex_len)
173 + struct super_block *sb = org_inode->i_sb;
174 + struct ext4_block_alloc_info *block_i;
175 + ext4_group_t group_no;
176 + ext4_grpblk_t grp_blk;
177 + struct ext4_reserve_window_node *rsv;
179 + block_i = EXT4_I(org_inode)->i_block_alloc_info;
180 + /* Block reservation should be enabled */
181 + BUG_ON(!block_i);
183 + /* Goal size should be set */
184 + BUG_ON(!block_i->rsv_window_node.rsv_goal_size);
186 + rsv = &block_i->rsv_window_node;
187 + if (rsv_is_empty(&rsv->rsv_window)) {
188 + printk(KERN_ERR "ext4 defrag: Reservation window is empty\n");
189 + return -ENOSPC;
192 + ext4_get_group_no_and_offset(sb, ex_start, &group_no, &grp_blk);
194 + if (!goal_in_my_reservation(&rsv->rsv_window, grp_blk, group_no, sb)
195 + || !goal_in_my_reservation(&rsv->rsv_window,
196 + grp_blk + ex_len - 1, group_no, sb)){
197 + /* Goal blocks are not in the reservation window */
198 + printk(KERN_ERR "ext4 defrag: %d or %d in bg %lu is "
199 + "not in rsv_window\n", grp_blk,
200 + grp_blk + ex_len - 1, group_no);
201 + return -ENOSPC;
203 + return 0;
207 + * ext4_defrag_reserve_fblocks -
208 + * Reserve free blocks with ext4_defrag_reserve_blocks
210 + * @org_inode: original inode to get a block group number
211 + * @ext_info: freeblocks distribution which stored extent-like style
212 + * @ext_info->ext[]: an array of struct ext4_extents_data
214 + * This function returns 0 if succeed, otherwise returns error value.
215 + */
216 +static int
217 +ext4_defrag_reserve_fblocks(struct inode *org_inode,
218 + struct ext4_extents_info *ext_info)
220 + ext4_fsblk_t ex_start = 0;
221 + int i, len, ret;
223 + for (i = 0; i < ext_info->entries; i++) {
224 + ex_start = ext_info->ext[i].start;
225 + len = ext_info->ext[i].len;
227 + ret = ext4_defrag_reserve_blocks(org_inode, ex_start, len);
228 + if (ret < 0) {
229 + printk(KERN_ERR "ext4 defrag: "
230 + "Block reservation failed. offset [%llu], "
231 + "length [%d]\n", ex_start, len);
232 + goto err;
235 + /* Confirm that blocks are in the reservation window */
236 + ret = ext4_defrag_block_within_rsv(org_inode, ex_start, len);
237 + if (ret < 0) {
238 + printk(KERN_ERR "ext4 defrag: "
239 + "Reservation window is not set. "
240 + "offset [%llu], length [%d]\n", ex_start, len);
241 + goto err;
244 + return ret;
246 +err:
247 + down_write(&EXT4_I(org_inode)->i_data_sem);
248 + ext4_discard_reservation(org_inode);
249 + up_write(&EXT4_I(org_inode)->i_data_sem);
250 + return ret;
253 +/**
254 + * ext4_defrag_move_victim - Create free space for defrag
256 + * @target_filp: target file
257 + * @ext_info: target extents array to move
259 + * This function returns 0 if succeed, otherwise
260 + * returns error value.
261 + */
262 +static int
263 +ext4_defrag_move_victim(struct file *target_filp,
264 + struct ext4_extents_info *ext_info)
266 + struct inode *org_inode = target_filp->f_dentry->d_inode;
267 + struct super_block *sb = org_inode->i_sb;
268 + struct file victim_file;
269 + struct dentry victim_dent;
270 + struct inode *victim_inode;
271 + struct ext4_extent_data ext;
272 + ext4_fsblk_t goal = ext_info->goal;
273 + ext4_group_t group;
274 + ext4_grpblk_t grp_off;
275 + int ret, i;
277 + /* Setup dummy extent data */
278 + ext.len = 0;
280 + /* Get the inode of the victim file */
281 + victim_inode = ext4_iget(sb, ext_info->ino);
282 + if (IS_ERR(victim_inode))
283 + return PTR_ERR(victim_inode);
285 + /* Setup file for the victim file */
286 + victim_dent.d_inode = victim_inode;
287 + victim_file.f_dentry = &victim_dent;
288 + victim_file.f_mapping = victim_inode->i_mapping;
290 + /* Set the goal appropriate offset */
291 + if (goal == -1) {
292 + ext4_get_group_no_and_offset(victim_inode->i_sb,
293 + ext_info->ext[0].start, &group, &grp_off);
294 + goal = ext4_group_first_block_no(sb, group + 1);
297 + for (i = 0; i < ext_info->entries; i++) {
298 + /* Move original blocks to another block group */
299 + ret = ext4_defrag(&victim_file, ext_info->ext[i].block,
300 + ext_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
301 + if (ret < 0) {
302 + printk(KERN_ERR "ext4 defrag: "
303 + "Moving victim file failed. ino [%llu]\n",
304 + ext_info->ino);
305 + goto err;
308 + /* Sync journal blocks before reservation */
309 + ret = ext4_force_commit(sb);
310 + if (ret) {
311 + printk(KERN_ERR "ext4 defrag: "
312 + "ext4_force_commit failed(%d)\n", ret);
313 + goto err;
317 + iput(victim_inode);
318 + return 0;
319 +err:
320 + down_write(&EXT4_I(org_inode)->i_data_sem);
321 + ext4_discard_reservation(org_inode);
322 + up_write(&EXT4_I(org_inode)->i_data_sem);
323 + iput(victim_inode);
324 + return ret;
327 +/**
328 * ext4_defrag_fblocks_distribution - Search free blocks distribution
330 * @org_inode: original inode
331 @@ -383,6 +644,29 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
332 &ext_info, sizeof(ext_info)))
333 return -EFAULT;
335 + } else if (cmd == EXT4_IOC_RESERVE_BLOCK) {
336 + struct ext4_extents_info ext_info;
338 + if (copy_from_user(&ext_info,
339 + (struct ext4_extents_info __user *)arg,
340 + sizeof(ext_info)))
341 + return -EFAULT;
343 + err = ext4_defrag_reserve_fblocks(inode, &ext_info);
344 + } else if (cmd == EXT4_IOC_MOVE_VICTIM) {
345 + struct ext4_extents_info ext_info;
347 + if (copy_from_user(&ext_info,
348 + (struct ext4_extents_info __user *)arg,
349 + sizeof(ext_info)))
350 + return -EFAULT;
352 + err = ext4_defrag_move_victim(filp, &ext_info);
354 + } else if (cmd == EXT4_IOC_BLOCK_RELEASE) {
355 + down_write(&EXT4_I(inode)->i_data_sem);
356 + ext4_discard_reservation(inode);
357 + up_write(&EXT4_I(inode)->i_data_sem);
358 } else if (cmd == EXT4_IOC_DEFRAG) {
359 struct ext4_ext_defrag_data defrag;
360 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
361 @@ -409,7 +693,8 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
364 err = ext4_defrag(filp, defrag.start_offset,
365 - defrag.defrag_size, defrag.goal);
366 + defrag.defrag_size, defrag.goal, defrag.flag,
367 + &defrag.ext);
370 return err;
371 @@ -425,6 +710,7 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
372 * @start_ext: first new extent to be merged
373 * @new_ext: middle of new extent to be merged
374 * @end_ext: last new extent to be merged
375 + * @phase: phase of the force defrag mode
377 * This function returns 0 if succeed, otherwise returns error value.
379 @@ -432,14 +718,20 @@ static int
380 ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode,
381 struct ext4_extent *o_start, struct ext4_extent *o_end,
382 struct ext4_extent *start_ext, struct ext4_extent *new_ext,
383 - struct ext4_extent *end_ext)
384 + struct ext4_extent *end_ext, int phase)
386 struct ext4_ext_path *org_path = NULL;
387 ext4_lblk_t eblock = 0;
388 int new_flag = 0;
389 int end_flag = 0;
390 + int defrag_flag;
391 int err;
393 + if (phase == DEFRAG_FORCE_VICTIM)
394 + defrag_flag = 1;
395 + else
396 + defrag_flag = 0;
398 if (le16_to_cpu(start_ext->ee_len) &&
399 le16_to_cpu(new_ext->ee_len) &&
400 le16_to_cpu(end_ext->ee_len)) {
401 @@ -516,8 +808,8 @@ ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode,
402 org_path = NULL;
403 goto out;
405 - err = ext4_ext_insert_extent(handle, org_inode,
406 - org_path, new_ext);
407 + err = ext4_ext_insert_extent_defrag(handle, org_inode,
408 + org_path, new_ext, defrag_flag);
409 if (err)
410 goto out;
412 @@ -530,8 +822,8 @@ ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode,
413 org_path = NULL;
414 goto out;
416 - err = ext4_ext_insert_extent(handle, org_inode,
417 - org_path, end_ext);
418 + err = ext4_ext_insert_extent_defrag(handle, org_inode,
419 + org_path, end_ext, defrag_flag);
420 if (err)
421 goto out;
423 @@ -609,6 +901,7 @@ ext4_defrag_merge_inside_block(struct ext4_extent *o_start,
424 * @new_ext: middle of new extent to be merged
425 * @end_ext: last new extent to be merged
426 * @replaced: the number of blocks which will be replaced with new_ext
427 + * @phase: phase of the force defrag mode
429 * This function returns 0 if succeed, otherwise returns error value.
431 @@ -617,7 +910,7 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode,
432 struct ext4_ext_path *org_path,
433 struct ext4_extent *o_start, struct ext4_extent *o_end,
434 struct ext4_extent *start_ext, struct ext4_extent *new_ext,
435 - struct ext4_extent *end_ext, ext4_fsblk_t replaced)
436 + struct ext4_extent *end_ext, ext4_fsblk_t replaced, int phase)
438 struct ext4_extent_header *eh;
439 unsigned need_slots, slots_range;
440 @@ -655,7 +948,7 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode,
442 ret = ext4_defrag_merge_across_blocks(handle, org_inode,
443 o_start, o_end, start_ext, new_ext,
444 - end_ext);
445 + end_ext, phase);
446 if (ret < 0)
447 return ret;
448 } else {
449 @@ -688,13 +981,14 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode,
450 * @org_path: path indicates first extent to be defraged
451 * @dext: destination extent
452 * @from: start offset on the target file
453 + * @phase: phase of the force defrag mode
455 * This function returns 0 if succeed, otherwise returns error value.
457 static int
458 ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
459 struct ext4_ext_path *org_path, struct ext4_extent *dext,
460 - ext4_lblk_t *from)
461 + ext4_lblk_t *from, int phase)
463 struct ext4_extent *oext, *o_start = NULL, *o_end = NULL, *prev_ext;
464 struct ext4_extent new_ext, start_ext, end_ext;
465 @@ -795,7 +1089,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
466 + le16_to_cpu(oext->ee_len) - 1) {
467 ret = ext4_defrag_merge_extents(handle, org_inode,
468 org_path, o_start, o_end, &start_ext,
469 - &new_ext, &end_ext, replaced);
470 + &new_ext, &end_ext, replaced, phase);
471 if (ret < 0)
472 return ret;
474 @@ -847,6 +1141,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
475 * @from_page: page offset of org_inode
476 * @dest_from_page: page offset of dest_inode
477 * @count_page: page count to be replaced
478 + * @phase: phase of the force defrag mode
480 * This function returns 0 if succeed, otherwise returns error value.
481 * Replace extents for blocks from "from" to "from + count - 1".
482 @@ -854,7 +1149,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
483 static int
484 ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
485 struct inode *dest_inode, pgoff_t from_page,
486 - pgoff_t dest_from_page, pgoff_t count_page)
487 + pgoff_t dest_from_page, pgoff_t count_page, int phase)
489 struct ext4_ext_path *org_path = NULL;
490 struct ext4_ext_path *dest_path = NULL;
491 @@ -922,7 +1217,7 @@ ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
493 /* Loop for the original extent blocks */
494 err = ext4_defrag_leaf_block(handle, org_inode,
495 - org_path, dext, &from);
496 + org_path, dext, &from, phase);
497 if (err < 0)
498 goto out;
500 @@ -932,7 +1227,7 @@ ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
501 * e.g. ext4_defrag_merge_extents()
503 err = ext4_defrag_leaf_block(handle, dest_inode,
504 - dest_path, swap_ext, &dest_off);
505 + dest_path, swap_ext, &dest_off, -1);
506 if (err < 0)
507 goto out;
509 @@ -1028,6 +1323,7 @@ out:
510 * @req_blocks: contiguous blocks count we need
511 * @iblock: target file offset
512 * @goal: goal offset
513 + * @phase: phase of the force defrag mode
516 static void
517 @@ -1036,8 +1332,22 @@ ext4_defrag_fill_ar(struct inode *org_inode, struct inode *dest_inode,
518 struct ext4_ext_path *org_path,
519 struct ext4_ext_path *dest_path,
520 ext4_fsblk_t req_blocks, ext4_lblk_t iblock,
521 - ext4_fsblk_t goal)
522 + ext4_fsblk_t goal, int phase)
524 + ext4_group_t org_grp_no;
525 + ext4_grpblk_t org_blk_off;
526 + int org_depth = ext_depth(org_inode);
528 + if (phase == DEFRAG_FORCE_VICTIM) {
529 + ext4_get_group_no_and_offset(org_inode->i_sb,
530 + ext_pblock(org_path[org_depth].p_ext),
531 + &org_grp_no, &org_blk_off);
532 + ar->excepted_group = org_grp_no;
533 + } else {
534 + /* Allocate contiguous blocks to any block group */
535 + ar->excepted_group = -1;
538 ar->inode = dest_inode;
539 ar->len = req_blocks;
540 ar->logical = iblock;
541 @@ -1101,19 +1411,70 @@ ext4_defrag_alloc_blocks(handle_t *handle, struct inode *org_inode,
545 + * ext4_defrag_check_phase
546 + * - Check condition of the allocated blocks (only force defrag mode)
548 + * @ar: allocation request for multiple block allocation
549 + * @dest_grp_no: block group num of the allocated blocks
550 + * @goal_grp_no: block group num of the destination of block allocation
551 + * @alloc_total: sum total of the allocated blocks
552 + * @req_blocks: contiguous blocks count we need
553 + * @phase: phase of the force defrag mode
555 + * This function returns 0 if succeed, otherwise returns error value.
556 + */
557 +static int
558 +ext4_defrag_check_phase(struct ext4_allocation_request *ar,
559 + ext4_group_t dest_grp_no, ext4_group_t goal_grp_no,
560 + ext4_fsblk_t alloc_total, ext4_lblk_t req_blocks,
561 + int phase)
563 + int err = 0;
565 + switch (phase) {
566 + case DEFRAG_FORCE_TRY:
567 + /* If there is not enough space, return -ENOSPC. */
568 + if (ar->len != req_blocks)
569 + /* -ENOSPC triggers DEFRAG_FORCE_VICTIM phase. */
570 + err = -ENOSPC;
571 + break;
572 + case DEFRAG_FORCE_VICTIM:
573 + /* We can't allocate new blocks in the same block group. */
574 + if (dest_grp_no == ar->excepted_group) {
575 + printk(KERN_ERR "ext4 defrag: Failed to allocate"
576 + " victim file to other block group\n");
577 + err = -ENOSPC;
579 + break;
580 + case DEFRAG_FORCE_GATHER:
581 + /* Maybe reserved blocks are already used by other process. */
582 + if (dest_grp_no != goal_grp_no
583 + || alloc_total != req_blocks) {
584 + printk(KERN_ERR "ext4 defrag: Reserved blocks are"
585 + " already used by other process\n");
586 + err = -EIO;
588 + break;
591 + return err;
594 +/**
595 * ext4_defrag_partial - Defrag a file per page
597 * @tmp_inode: temporary inode
598 * @filp: pointer to file
599 * @org_offset: page index on original file
600 * @dest_offset: page index on temporary file
601 + * @phase: phase of the force defrag mode
604 * This function returns 0 if succeed, otherwise returns error value.
606 static int
607 ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
608 - pgoff_t org_offset, pgoff_t dest_offset)
609 + pgoff_t org_offset, pgoff_t dest_offset, int phase)
611 struct inode *org_inode = filp->f_dentry->d_inode;
612 struct address_space *mapping = org_inode->i_mapping;
613 @@ -1180,7 +1541,7 @@ ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
614 /* Release old bh and drop refs */
615 try_to_release_page(page, 0);
616 ret = ext4_defrag_replace_branches(handle, org_inode, tmp_inode,
617 - org_offset, dest_offset, 1);
618 + org_offset, dest_offset, 1, phase);
620 if (ret < 0)
621 goto out;
622 @@ -1227,6 +1588,7 @@ out:
623 * @tar_end: the last block number of the allocated blocks
624 * @sum_tmp: the extents count in the allocated blocks
625 * @goal: block offset for allocaton
626 + * @phase: phase of the force defrag mode
629 * This function returns the values as below.
630 @@ -1237,7 +1599,7 @@ out:
631 static int
632 ext4_defrag_comp_ext_count(struct inode *org_inode,
633 struct ext4_ext_path *org_path, ext4_lblk_t tar_end,
634 - int sum_tmp, ext4_fsblk_t goal)
635 + int sum_tmp, ext4_fsblk_t goal, int phase)
637 struct ext4_extent *ext = NULL;
638 int depth = ext_depth(org_inode);
639 @@ -1264,7 +1626,8 @@ ext4_defrag_comp_ext_count(struct inode *org_inode,
640 if (sum_org == sum_tmp && !goal) {
641 /* Not improved */
642 ret = 1;
643 - } else if (sum_org < sum_tmp) {
644 + } else if (sum_org < sum_tmp &&
645 + phase != DEFRAG_FORCE_VICTIM) {
646 /* Fragment increased */
647 ret = -ENOSPC;
648 printk(KERN_ERR "ext4 defrag: "
649 @@ -1293,6 +1656,7 @@ ext4_defrag_comp_ext_count(struct inode *org_inode,
650 * @tar_blocks: the number of blocks to allocate
651 * @iblock: file related offset
652 * @goal: block offset for allocaton
653 + * @phase: phase of the force defrag mode
656 * This function returns the value as below:
657 @@ -1304,7 +1668,7 @@ static int
658 ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
659 struct ext4_ext_path *org_path, ext4_lblk_t tar_start,
660 ext4_lblk_t tar_blocks, ext4_lblk_t iblock,
661 - ext4_fsblk_t goal)
662 + ext4_fsblk_t goal, int phase)
664 handle_t *handle;
665 struct ext4_extent_header *eh = NULL;
666 @@ -1314,6 +1678,8 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
667 ext4_fsblk_t alloc_total = 0;
668 ext4_fsblk_t newblock = 0;
669 ext4_lblk_t tar_end = tar_start + tar_blocks - 1;
670 + ext4_group_t dest_group_no, goal_group_no;
671 + ext4_grpblk_t dest_blk_off, goal_blk_off;
672 int sum_tmp = 0;
673 int metadata = 1;
674 int ret, ret2;
675 @@ -1330,7 +1696,7 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
677 /* Fill struct ext4_allocation_request with necessary info */
678 ext4_defrag_fill_ar(org_inode, tmp_inode, &ar, org_path,
679 - dest_path, tar_blocks, iblock, goal);
680 + dest_path, tar_blocks, iblock, goal, phase);
682 handle = ext4_journal_start(tmp_inode, 0);
683 if (IS_ERR(handle)) {
684 @@ -1338,6 +1704,9 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
685 goto out2;
688 + ext4_get_group_no_and_offset(tmp_inode->i_sb, goal,
689 + &goal_group_no, &goal_blk_off);
691 while (alloc_total != tar_blocks) {
692 /* Allocate blocks */
693 ret = ext4_defrag_alloc_blocks(handle, org_inode, tmp_inode,
694 @@ -1345,8 +1714,20 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
695 if (ret < 0)
696 goto out;
698 + ext4_get_group_no_and_offset(tmp_inode->i_sb, newblock,
699 + &dest_group_no, &dest_blk_off);
701 alloc_total += ar.len;
703 + /* the checks that done in force mode */
704 + if (phase) {
705 + ret = ext4_defrag_check_phase(&ar, dest_group_no,
706 + goal_group_no, alloc_total,
707 + tar_blocks, phase);
708 + if (ret < 0)
709 + goto out;
712 newex.ee_block = cpu_to_le32(alloc_total - ar.len);
713 ext4_ext_store_pblock(&newex, newblock);
714 newex.ee_len = cpu_to_le16(ar.len);
715 @@ -1356,13 +1737,14 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
716 if (ret < 0)
717 goto out;
719 - ar.goal = newblock + ar.len;
720 + if (!phase)
721 + ar.goal = newblock + ar.len;
722 ar.len = tar_blocks - alloc_total;
723 sum_tmp++;
726 ret = ext4_defrag_comp_ext_count(org_inode, org_path, tar_end,
727 - sum_tmp, goal);
728 + sum_tmp, goal, phase);
730 out:
731 if (ret < 0 || ret == 1) {
732 @@ -1393,14 +1775,16 @@ out2:
733 * ext4_defrag_check - Check the enviroment whether a defrag can be done
735 * @org_inode: original inode
736 + * @ext: extent to be moved (only defrag force mode)
737 * @defrag_size: size of defrag in blocks
738 * @goal: poiter to block offset for allocation
739 + * @phase: phase of the force defrag mode
741 * This function returns 0 if succeed, otherwise returns error value.
743 static int
744 -ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size,
745 - ext4_fsblk_t *goal)
746 +ext4_defrag_check(struct inode *org_inode, struct ext4_extent_data *ext,
747 + ext4_lblk_t defrag_size, ext4_fsblk_t *goal, int *phase)
750 /* ext4 online defrag supports only 4KB block size */
751 @@ -1417,6 +1801,17 @@ ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size,
752 return -EOPNOTSUPP;
755 + if (ext->len) {
756 + /* Setup for the force defrag mode */
757 + if (ext->len < defrag_size) {
758 + printk(KERN_ERR "ext4 defrag: "
759 + "Invalid length of extent\n");
760 + return -EINVAL;
762 + *phase = DEFRAG_FORCE_GATHER;
763 + *goal = ext->start;
766 return 0;
769 @@ -1495,13 +1890,16 @@ out:
770 * @block_start: starting offset to defrag in blocks
771 * @defrag_size: size of defrag in blocks
772 * @goal: block offset for allocation
773 + * @phase: phase of the force defrag mode
774 + * @ext: extent to be moved (only defrag force mode)
776 * This function returns the number of blocks if succeed, otherwise
777 * returns error value.
780 ext4_defrag(struct file *filp, ext4_lblk_t block_start,
781 - ext4_lblk_t defrag_size, ext4_fsblk_t goal)
782 + ext4_lblk_t defrag_size, ext4_fsblk_t goal, int phase,
783 + struct ext4_extent_data *ext)
785 struct inode *org_inode = filp->f_dentry->d_inode, *tmp_inode = NULL;
786 struct ext4_ext_path *org_path = NULL, *holecheck_path = NULL;
787 @@ -1511,7 +1909,7 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
788 int ret, depth, seq_extents, last_extent = 0;
790 /* Check the filesystem enviroment whether defrag can be done */
791 - ret = ext4_defrag_check(org_inode, defrag_size, &goal);
792 + ret = ext4_defrag_check(org_inode, ext, defrag_size, &goal, &phase);
793 if (ret < 0)
794 return ret;
796 @@ -1627,11 +2025,11 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
798 ret = ext4_defrag_new_extent_tree(org_inode, tmp_inode,
799 org_path, seq_start, seq_blocks,
800 - block_start, goal);
801 + block_start, goal, phase);
803 if (ret < 0) {
804 break;
805 - } else if (ret == 1) {
806 + } else if (ret == 1 && (!goal || (goal && !phase))) {
807 ret = 0;
808 seq_start = le32_to_cpu(ext_cur->ee_block);
809 goto CLEANUP;
810 @@ -1655,7 +2053,7 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
811 while (page_offset <= seq_end_page) {
812 /* Swap original branches with new branches */
813 ret = ext4_defrag_partial(tmp_inode, filp,
814 - page_offset, dest_offset);
815 + page_offset, dest_offset, phase);
816 if (ret < 0)
817 goto out;
819 @@ -1708,6 +2106,10 @@ out:
820 kfree(holecheck_path);
823 + if (phase == DEFRAG_FORCE_GATHER)
824 + /* Release reserved block in force mode */
825 + ext4_discard_reservation(org_inode);
827 up_write(&EXT4_I(org_inode)->i_data_sem);
828 mutex_unlock(&org_inode->i_mutex);
830 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
831 index 5b8c24b..c17249b 100644
832 --- a/fs/ext4/ext4.h
833 +++ b/fs/ext4/ext4.h
834 @@ -97,6 +97,11 @@ struct ext4_allocation_request {
835 unsigned long len;
836 /* flags. see above EXT4_MB_HINT_* */
837 unsigned long flags;
838 + /*
839 + * for ext4 online defrag:
840 + * the block group which is excepted from allocation target
841 + */
842 + long long excepted_group;
846 @@ -306,6 +311,9 @@ struct ext4_new_group_data {
847 #define EXT4_IOC_GROUP_INFO _IOW('f', 11, struct ext4_group_data_info)
848 #define EXT4_IOC_FREE_BLOCKS_INFO _IOW('f', 12, struct ext4_extents_info)
849 #define EXT4_IOC_EXTENTS_INFO _IOW('f', 13, struct ext4_extents_info)
850 +#define EXT4_IOC_RESERVE_BLOCK _IOW('f', 14, struct ext4_extents_info)
851 +#define EXT4_IOC_MOVE_VICTIM _IOW('f', 15, struct ext4_extents_info)
852 +#define EXT4_IOC_BLOCK_RELEASE _IO('f', 8)
855 * ioctl commands in 32 bit emulation
856 @@ -334,8 +342,15 @@ struct ext4_new_group_data {
858 * DEFRAG_MAX_ENT: the maximum number of extents for exchanging between
859 * kernel-space and user-space per an ioctl
860 + * DEFRAG_FORCE_TRY: check whether we have free space fragmentation or not
861 + * DEFRAG_FORCE_VICTIM: move victim extents to make sufficient space
862 + * DEFRAG_FORCE_GATHER: move the target file into the free space made in the
863 + * DEFRAG_FORCE_VICTIM phase
865 #define DEFRAG_MAX_ENT 32
866 +#define DEFRAG_FORCE_TRY 1
867 +#define DEFRAG_FORCE_VICTIM 2
868 +#define DEFRAG_FORCE_GATHER 3
870 struct ext4_extent_data {
871 ext4_lblk_t block; /* start logical block number */
872 @@ -347,6 +362,8 @@ struct ext4_ext_defrag_data {
873 ext4_lblk_t start_offset; /* start offset to defrag in blocks */
874 ext4_lblk_t defrag_size; /* size of defrag in blocks */
875 ext4_fsblk_t goal; /* block offset for allocation */
876 + int flag; /* free space mode flag */
877 + struct ext4_extent_data ext;
880 struct ext4_group_data_info {
881 @@ -1046,8 +1063,17 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
882 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
883 extern void ext4_init_block_alloc_info(struct inode *);
884 extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
885 +extern void try_to_extend_reservation(struct ext4_reserve_window_node *,
886 + struct super_block *, int);
887 +extern int alloc_new_reservation(struct ext4_reserve_window_node *,
888 + ext4_grpblk_t, struct super_block *,
889 + ext4_group_t, struct buffer_head *);
890 extern ext4_grpblk_t bitmap_search_next_usable_block(ext4_grpblk_t,
891 struct buffer_head *, ext4_grpblk_t);
892 +extern int rsv_is_empty(struct ext4_reserve_window *rsv);
893 +extern int goal_in_my_reservation(struct ext4_reserve_window *rsv,
894 + ext4_grpblk_t grp_goal, ext4_group_t group,
895 + struct super_block *sb);
897 /* dir.c */
898 extern int ext4_check_dir_entry(const char *, struct inode *,
899 @@ -1182,7 +1208,8 @@ extern void ext4_inode_table_set(struct super_block *sb,
900 extern int ext4_ext_journal_restart(handle_t *handle, int needed);
901 /* defrag.c */
902 extern int ext4_defrag(struct file *filp, ext4_lblk_t block_start,
903 - ext4_lblk_t defrag_size, ext4_fsblk_t goal);
904 + ext4_lblk_t defrag_size, ext4_fsblk_t goal,
905 + int flag, struct ext4_extent_data *ext);
906 extern int ext4_defrag_ioctl(struct inode *, struct file *, unsigned int,
907 unsigned long);
909 diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
910 index 2fc0469..0f2c744 100644
911 --- a/fs/ext4/ext4_extents.h
912 +++ b/fs/ext4/ext4_extents.h
913 @@ -236,5 +236,10 @@ extern void ext4_ext_drop_refs(struct ext4_ext_path *path);
914 extern ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
915 struct ext4_ext_path *path,
916 ext4_lblk_t block);
917 +extern int ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
918 + struct ext4_ext_path *path,
919 + struct ext4_extent *newext, int defrag);
920 +extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
922 #endif /* _EXT4_EXTENTS */
924 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
925 index 4e6dba1..3f06029 100644
926 --- a/fs/ext4/extents.c
927 +++ b/fs/ext4/extents.c
928 @@ -185,11 +185,17 @@ ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
929 static ext4_fsblk_t
930 ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
931 struct ext4_ext_path *path,
932 - struct ext4_extent *ex, int *err)
933 + struct ext4_extent *ex, int *err,
934 + ext4_fsblk_t defrag_goal)
936 ext4_fsblk_t goal, newblock;
938 - goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
939 + if (defrag_goal)
940 + goal = defrag_goal;
941 + else
942 + goal = ext4_ext_find_goal(inode, path,
943 + le32_to_cpu(ex->ee_block));
945 newblock = ext4_new_meta_block(handle, inode, goal, err);
946 return newblock;
948 @@ -674,7 +680,8 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
950 static int ext4_ext_split(handle_t *handle, struct inode *inode,
951 struct ext4_ext_path *path,
952 - struct ext4_extent *newext, int at)
953 + struct ext4_extent *newext, int at,
954 + ext4_fsblk_t defrag_goal)
956 struct buffer_head *bh = NULL;
957 int depth = ext_depth(inode);
958 @@ -725,7 +732,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
959 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
960 for (a = 0; a < depth - at; a++) {
961 newblock = ext4_ext_new_meta_block(handle, inode, path,
962 - newext, &err);
963 + newext, &err, defrag_goal);
964 if (newblock == 0)
965 goto cleanup;
966 ablocks[a] = newblock;
967 @@ -912,7 +919,8 @@ cleanup:
969 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
970 struct ext4_ext_path *path,
971 - struct ext4_extent *newext)
972 + struct ext4_extent *newext,
973 + ext4_fsblk_t defrag_goal)
975 struct ext4_ext_path *curp = path;
976 struct ext4_extent_header *neh;
977 @@ -921,7 +929,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
978 ext4_fsblk_t newblock;
979 int err = 0;
981 - newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err);
982 + newblock = ext4_ext_new_meta_block(handle, inode, path,
983 + newext, &err, defrag_goal);
984 if (newblock == 0)
985 return err;
987 @@ -997,7 +1006,8 @@ out:
989 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
990 struct ext4_ext_path *path,
991 - struct ext4_extent *newext)
992 + struct ext4_extent *newext,
993 + ext4_fsblk_t defrag_goal)
995 struct ext4_ext_path *curp;
996 int depth, i, err = 0;
997 @@ -1017,7 +1027,8 @@ repeat:
998 if (EXT_HAS_FREE_INDEX(curp)) {
999 /* if we found index with free entry, then use that
1000 * entry: create all needed subtree and add new leaf */
1001 - err = ext4_ext_split(handle, inode, path, newext, i);
1002 + err = ext4_ext_split(handle, inode, path, newext, i,
1003 + defrag_goal);
1004 if (err)
1005 goto out;
1007 @@ -1030,7 +1041,8 @@ repeat:
1008 err = PTR_ERR(path);
1009 } else {
1010 /* tree is full, time to grow in depth */
1011 - err = ext4_ext_grow_indepth(handle, inode, path, newext);
1012 + err = ext4_ext_grow_indepth(handle, inode, path,
1013 + newext, defrag_goal);
1014 if (err)
1015 goto out;
1017 @@ -1210,7 +1222,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1018 * allocated block. Thus, index entries have to be consistent
1019 * with leaves.
1021 -static ext4_lblk_t
1022 +ext4_lblk_t
1023 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1025 int depth;
1026 @@ -1476,6 +1488,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1027 struct ext4_ext_path *path,
1028 struct ext4_extent *newext)
1030 + return ext4_ext_insert_extent_defrag(handle, inode, path, newext, 0);
1034 + * ext4_ext_insert_extent_defrag:
1035 + * The difference from ext4_ext_insert_extent is to use the first block
1036 + * in newext as the goal of the new index block.
1037 + */
1038 +int
1039 +ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
1040 + struct ext4_ext_path *path,
1041 + struct ext4_extent *newext, int defrag)
1043 struct ext4_extent_header *eh;
1044 struct ext4_extent *ex, *fex;
1045 struct ext4_extent *nearex; /* nearest extent */
1046 @@ -1483,6 +1508,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1047 int depth, len, err;
1048 ext4_lblk_t next;
1049 unsigned uninitialized = 0;
1050 + ext4_fsblk_t defrag_goal;
1052 BUG_ON(ext4_ext_get_actual_len(newext) == 0);
1053 depth = ext_depth(inode);
1054 @@ -1543,11 +1569,16 @@ repeat:
1055 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
1058 + if (defrag)
1059 + defrag_goal = ext_pblock(newext);
1060 + else
1061 + defrag_goal = 0;
1063 * There is no free space in the found leaf.
1064 * We're gonna add a new leaf in the tree.
1066 - err = ext4_ext_create_new_leaf(handle, inode, path, newext);
1067 + err = ext4_ext_create_new_leaf(handle, inode, path,
1068 + newext, defrag_goal);
1069 if (err)
1070 goto cleanup;
1071 depth = ext_depth(inode);
1072 diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
1073 index b548c2a..e1fb56c 100644
1074 --- a/fs/ext4/ioctl.c
1075 +++ b/fs/ext4/ioctl.c
1076 @@ -245,7 +245,10 @@ setversion_out:
1077 case EXT4_IOC_DEFRAG:
1078 case EXT4_IOC_GROUP_INFO:
1079 case EXT4_IOC_FREE_BLOCKS_INFO:
1080 - case EXT4_IOC_EXTENTS_INFO: {
1081 + case EXT4_IOC_EXTENTS_INFO:
1082 + case EXT4_IOC_RESERVE_BLOCK:
1083 + case EXT4_IOC_MOVE_VICTIM:
1084 + case EXT4_IOC_BLOCK_RELEASE: {
1085 return ext4_defrag_ioctl(inode, filp, cmd, arg);
1087 case EXT4_IOC_GROUP_ADD: {
1088 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
1089 index a46bd0c..2eda913 100644
1090 --- a/fs/ext4/mballoc.c
1091 +++ b/fs/ext4/mballoc.c
1092 @@ -1775,6 +1775,10 @@ repeat:
1093 if (group == EXT4_SB(sb)->s_groups_count)
1094 group = 0;
1096 + if (ac->ac_excepted_group != -1 &&
1097 + group == ac->ac_excepted_group)
1098 + continue;
1100 /* quick check to skip empty groups */
1101 grp = ext4_get_group_info(ac->ac_sb, group);
1102 if (grp->bb_free == 0)
1103 @@ -4157,6 +4161,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
1104 ac->ac_bitmap_page = NULL;
1105 ac->ac_buddy_page = NULL;
1106 ac->ac_lg = NULL;
1107 + ac->ac_excepted_group = ar->excepted_group;
1109 /* we have to define context: we'll we work with a file or
1110 * locality group. this is a policy, actually */
1111 diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
1112 index c7c9906..6b46c86 100644
1113 --- a/fs/ext4/mballoc.h
1114 +++ b/fs/ext4/mballoc.h
1115 @@ -211,6 +211,7 @@ struct ext4_allocation_context {
1116 struct page *ac_buddy_page;
1117 struct ext4_prealloc_space *ac_pa;
1118 struct ext4_locality_group *ac_lg;
1119 + long long ac_excepted_group;
1122 #define AC_STATUS_CONTINUE 1