Remove ext2/ext3 BKL patches which conflict with the ro bind mount patches.
[ext4-patch-queue.git] / ext4-online-defrag-relocate-file-data.patch
blobb279c467003f65d3e6e42905abaefaeb2243492d
1 ext4: online defrag -- Move the file data to the new blocks
3 From: Akira Fujita <a-fujita@rs.jp.nec.com>
5 Move the blocks on the temporary inode to the original inode
6 by a page.
7 1. Read the file data from the old blocks to the page
8 2. Move the block on the temporary inode to the original inode
9 3. Write the file data on the page into the new blocks
12 Signed-off-by: Mingming Cao <cmm@us.ibm.com>
13 Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
14 Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
15 ---
17 fs/ext4/defrag.c | 911 +++++++++++++++++++++++++++++++++++++++++++++++++++++
18 fs/ext4/extents.c | 4
19 fs/ext4/inode.c | 3
20 3 files changed, 914 insertions(+), 4 deletions(-)
23 diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
24 index 4908b85..7807ca4 100644
25 --- a/fs/ext4/defrag.c
26 +++ b/fs/ext4/defrag.c
27 @@ -33,6 +43,112 @@ struct ext4_extent_tree_stats {
28 int leaf_num;
31 +int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
32 + ext4_lblk_t num, ext_prepare_callback func,
33 + void *cbdata)
35 + struct ext4_ext_path *path = NULL;
36 + struct ext4_ext_cache cbex;
37 + struct ext4_extent *ex;
38 + ext4_lblk_t next, start = 0, end = 0;
39 + ext4_lblk_t last = block + num;
40 + int depth, exists, err = 0;
42 + BUG_ON(func == NULL);
43 + BUG_ON(inode == NULL);
45 + while (block < last && block != EXT_MAX_BLOCK) {
46 + num = last - block;
47 + /* find extent for this block */
48 + path = ext4_ext_find_extent(inode, block, path);
49 + if (IS_ERR(path)) {
50 + err = PTR_ERR(path);
51 + path = NULL;
52 + break;
53 + }
55 + depth = ext_depth(inode);
56 + BUG_ON(path[depth].p_hdr == NULL);
57 + ex = path[depth].p_ext;
58 + next = ext4_ext_next_allocated_block(path);
60 + exists = 0;
61 + if (!ex) {
62 + /* there is no extent yet, so try to allocate
63 + * all requested space */
64 + start = block;
65 + end = block + num;
66 + } else if (le32_to_cpu(ex->ee_block) > block) {
67 + /* need to allocate space before found extent */
68 + start = block;
69 + end = le32_to_cpu(ex->ee_block);
70 + if (block + num < end)
71 + end = block + num;
72 + } else if (block >= le32_to_cpu(ex->ee_block)
73 + + ext4_ext_get_actual_len(ex)) {
74 + /* need to allocate space after found extent */
75 + start = block;
76 + end = block + num;
77 + if (end >= next)
78 + end = next;
79 + } else if (block >= le32_to_cpu(ex->ee_block)) {
80 + /*
81 + * some part of requested space is covered
82 + * by found extent
83 + */
84 + start = block;
85 + end = le32_to_cpu(ex->ee_block)
86 + + ext4_ext_get_actual_len(ex);
87 + if (block + num < end)
88 + end = block + num;
89 + exists = 1;
90 + } else {
91 + BUG();
92 + }
93 + BUG_ON(end <= start);
95 + if (!exists) {
96 + cbex.ec_block = start;
97 + cbex.ec_len = end - start;
98 + cbex.ec_start = 0;
99 + cbex.ec_type = EXT4_EXT_CACHE_GAP;
100 + } else {
101 + cbex.ec_block = le32_to_cpu(ex->ee_block);
102 + cbex.ec_len = ext4_ext_get_actual_len(ex);
103 + cbex.ec_start = ext_pblock(ex);
104 + cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
107 + BUG_ON(cbex.ec_len == 0);
108 + err = func(inode, path, &cbex, cbdata);
109 + ext4_ext_drop_refs(path);
111 + if (err < 0)
112 + break;
113 + if (err == EXT_REPEAT)
114 + continue;
115 + else if (err == EXT_BREAK) {
116 + err = 0;
117 + break;
120 + if (ext_depth(inode) != depth) {
121 + /* depth was changed. we have to realloc path */
122 + kfree(path);
123 + path = NULL;
126 + block = cbex.ec_block + cbex.ec_len;
129 + if (path) {
130 + ext4_ext_drop_refs(path);
131 + kfree(path);
134 + return err;
137 static int
138 ext4_ext_store_extent_cb(struct inode *inode,
139 struct ext4_ext_path *path,
140 @@ -212,6 +328,619 @@ int ext4_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
144 + * ext4_ext_merge_across - merge extents across leaf block
146 + * @handle journal handle
147 + * @inode target file's inode
148 + * @o_start first original extent to be defraged
149 + * @o_end last original extent to be defraged
150 + * @start_ext first new extent to be merged
151 + * @new_ext middle of new extent to be merged
152 + * @end_ext last new extent to be merged
153 + * @flag defrag mode (e.g. -f)
155 + * This function returns 0 if succeed, otherwise returns error value.
156 + */
157 +static int
158 +ext4_ext_merge_across_blocks(handle_t *handle, struct inode *inode,
159 + struct ext4_extent *o_start,
160 + struct ext4_extent *o_end, struct ext4_extent *start_ext,
161 + struct ext4_extent *new_ext, struct ext4_extent *end_ext,
162 + int flag)
164 + struct ext4_ext_path *org_path = NULL;
165 + ext4_lblk_t eblock = 0;
166 + int err = 0;
167 + int new_flag = 0;
168 + int end_flag = 0;
169 + int defrag_flag;
171 + if (flag == DEFRAG_FORCE_VICTIM)
172 + defrag_flag = 1;
173 + else
174 + defrag_flag = 0;
176 + if (le16_to_cpu(start_ext->ee_len) &&
177 + le16_to_cpu(new_ext->ee_len) &&
178 + le16_to_cpu(end_ext->ee_len)) {
180 + if ((o_start) == (o_end)) {
182 + /* start_ext new_ext end_ext
183 + * dest |---------|-----------|--------|
184 + * org |------------------------------|
185 + */
187 + end_flag = 1;
188 + } else {
190 + /* start_ext new_ext end_ext
191 + * dest |---------|----------|---------|
192 + * org |---------------|--------------|
193 + */
195 + o_end->ee_block = end_ext->ee_block;
196 + o_end->ee_len = end_ext->ee_len;
197 + ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
200 + o_start->ee_len = start_ext->ee_len;
201 + new_flag = 1;
203 + } else if ((le16_to_cpu(start_ext->ee_len)) &&
204 + (le16_to_cpu(new_ext->ee_len)) &&
205 + (!le16_to_cpu(end_ext->ee_len)) &&
206 + ((o_start) == (o_end))) {
208 + /* start_ext new_ext
209 + * dest |--------------|---------------|
210 + * org |------------------------------|
211 + */
213 + o_start->ee_len = start_ext->ee_len;
214 + new_flag = 1;
216 + } else if ((!le16_to_cpu(start_ext->ee_len)) &&
217 + (le16_to_cpu(new_ext->ee_len)) &&
218 + (le16_to_cpu(end_ext->ee_len)) &&
219 + ((o_start) == (o_end))) {
221 + /* new_ext end_ext
222 + * dest |--------------|---------------|
223 + * org |------------------------------|
224 + */
226 + o_end->ee_block = end_ext->ee_block;
227 + o_end->ee_len = end_ext->ee_len;
228 + ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
230 + /* If new_ext was first block */
231 + if (!new_ext->ee_block)
232 + eblock = 0;
233 + else
234 + eblock = le32_to_cpu(new_ext->ee_block);
236 + new_flag = 1;
237 + } else {
238 + printk(KERN_ERR "Unexpected case \n");
239 + return -EIO;
242 + if (new_flag) {
243 + org_path = ext4_ext_find_extent(inode, eblock, NULL);
244 + if (IS_ERR(org_path)) {
245 + err = PTR_ERR(org_path);
246 + org_path = NULL;
247 + goto ERR;
249 + err = ext4_ext_insert_extent_defrag(handle, inode,
250 + org_path, new_ext, defrag_flag);
251 + if (err)
252 + goto ERR;
255 + if (end_flag) {
256 + org_path = ext4_ext_find_extent(inode,
257 + le32_to_cpu(end_ext->ee_block) - 1, org_path);
258 + if (IS_ERR(org_path)) {
259 + err = PTR_ERR(org_path);
260 + org_path = NULL;
261 + goto ERR;
263 + err = ext4_ext_insert_extent_defrag(handle, inode,
264 + org_path, end_ext, defrag_flag);
265 + if (err)
266 + goto ERR;
268 +ERR:
269 + if (org_path) {
270 + ext4_ext_drop_refs(org_path);
271 + kfree(org_path);
274 + return err;
278 +/**
279 + * ext4_ext_merge_inside_block - merge new extent to the extent block
281 + * @handle journal handle
282 + * @inode target file's inode
283 + * @o_start first original extent to be defraged
284 + * @o_end last original extent to be merged
285 + * @start_ext first new extent to be merged
286 + * @new_ext middle of new extent to be merged
287 + * @end_ext last new extent to be merged
288 + * @eh extent header of target leaf block
289 + * @replaced the number of blocks which will be replaced with new_ext
290 + * @range_to_move used to dicide how to merge
292 + * This function always returns 0.
294 +static int
295 +ext4_ext_merge_inside_block(handle_t *handle, struct inode *inode,
296 + struct ext4_extent *o_start, struct ext4_extent *o_end,
297 + struct ext4_extent *start_ext, struct ext4_extent *new_ext,
298 + struct ext4_extent *end_ext, struct ext4_extent_header *eh,
299 + ext4_fsblk_t replaced, int range_to_move)
301 + int i = 0;
302 + unsigned len;
304 + /* Move the existing extents */
305 + if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
306 + len = EXT_LAST_EXTENT(eh) - (o_end + 1) + 1;
307 + len = len * sizeof(struct ext4_extent);
308 + memmove(o_end + 1 + range_to_move, o_end + 1, len);
311 + /* Insert start entry */
312 + if (le16_to_cpu(start_ext->ee_len))
313 + o_start[i++].ee_len = start_ext->ee_len;
315 + /* Insert new entry */
316 + if (le16_to_cpu(new_ext->ee_len)) {
317 + o_start[i].ee_block = new_ext->ee_block;
318 + o_start[i].ee_len = cpu_to_le16(replaced);
319 + ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
322 + /* Insert end entry */
323 + if (end_ext->ee_len)
324 + o_start[i] = *end_ext;
326 + /* Increment the total entries counter on the extent block */
327 + eh->eh_entries
328 + = cpu_to_le16(le16_to_cpu(eh->eh_entries) + range_to_move);
330 + return 0;
333 +/**
334 + * ext4_ext_merge_extents - merge new extent
336 + * @handle journal handle
337 + * @inode target file's inode
338 + * @org_path path indicates first extent to be defraged
339 + * @o_start first original extent to be defraged
340 + * @o_end last original extent to be defraged
341 + * @start_ext first new extent to be merged
342 + * @new_ext middle of new extent to be merged
343 + * @end_ext last new extent to be merged
344 + * @replaced the number of blocks which will be replaced with new_ext
345 + * @flag defrag mode (e.g. -f)
347 + * This function returns 0 if succeed, otherwise returns error value.
348 + */
349 +static int
350 +ext4_ext_merge_extents(handle_t *handle, struct inode *inode,
351 + struct ext4_ext_path *org_path,
352 + struct ext4_extent *o_start, struct ext4_extent *o_end,
353 + struct ext4_extent *start_ext, struct ext4_extent *new_ext,
354 + struct ext4_extent *end_ext, ext4_fsblk_t replaced, int flag)
356 + struct ext4_extent_header *eh;
357 + unsigned need_slots, slots_range;
358 + int range_to_move, depth, ret;
360 + /* The extents need to be inserted
361 + * start_extent + new_extent + end_extent
362 + */
363 + need_slots = (le16_to_cpu(start_ext->ee_len) ? 1 : 0) +
364 + (le16_to_cpu(end_ext->ee_len) ? 1 : 0) +
365 + (le16_to_cpu(new_ext->ee_len) ? 1 : 0);
367 + /* The number of slots between start and end */
368 + slots_range = o_end - o_start + 1;
370 + /* Range to move the end of extent */
371 + range_to_move = need_slots - slots_range;
372 + depth = org_path->p_depth;
373 + org_path += depth;
374 + eh = org_path->p_hdr;
376 + if (depth) {
377 + /* Register to journal */
378 + ret = ext4_journal_get_write_access(handle, org_path->p_bh);
379 + if (ret)
380 + return ret;
383 + /* expansion */
384 + if ((range_to_move > 0) &&
385 + (range_to_move > le16_to_cpu(eh->eh_max)
386 + - le16_to_cpu(eh->eh_entries))) {
388 + ret = ext4_ext_merge_across_blocks(handle, inode, o_start,
389 + o_end, start_ext, new_ext,
390 + end_ext, flag);
391 + if (ret < 0)
392 + return ret;
393 + } else {
394 + ret = ext4_ext_merge_inside_block(handle, inode, o_start,
395 + o_end, start_ext, new_ext, end_ext,
396 + eh, replaced, range_to_move);
397 + if (ret < 0)
398 + return ret;
401 + if (depth) {
402 + ret = ext4_journal_dirty_metadata(handle, org_path->p_bh);
403 + if (ret)
404 + return ret;
405 + } else {
406 + ret = ext4_mark_inode_dirty(handle, inode);
407 + if (ret < 0)
408 + return ret;
411 + return 0;
415 +/**
416 + * ext4_ext_defrag_leaf_block - Defragmentation for one leaf extent block.
417 + * @handle journal handle
418 + * @org_inode target inode
419 + * @org_path path indicates first extent to be defraged
420 + * @dext destination extent
421 + * @from start offset on the target file
422 + * @flag defrag mode (e.g. -f)
424 + * This function returns 0 if succeed, otherwise returns error value.
425 + */
426 +static int
427 +ext4_ext_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
428 + struct ext4_ext_path *org_path, struct ext4_extent *dext,
429 + ext4_lblk_t *from, int flag)
431 + unsigned long depth;
432 + ext4_fsblk_t replaced = 0;
433 + struct ext4_extent *oext, *o_start = NULL, *o_end = NULL, *prev_ext;
434 + struct ext4_extent new_ext, start_ext, end_ext;
435 + ext4_lblk_t new_end, lblock;
436 + unsigned short len;
437 + ext4_fsblk_t new_phys_end;
438 + int ret;
440 + depth = ext_depth(org_inode);
441 + start_ext.ee_len = end_ext.ee_len = 0;
442 + o_start = o_end = oext = org_path[depth].p_ext;
443 + ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
444 + new_ext.ee_len = dext->ee_len;
445 + len = le16_to_cpu(new_ext.ee_len);
446 + new_ext.ee_block = cpu_to_le32(*from);
447 + lblock = le32_to_cpu(oext->ee_block);
448 + new_end = le32_to_cpu(new_ext.ee_block)
449 + + le16_to_cpu(new_ext.ee_len) - 1;
450 + new_phys_end = ext_pblock(&new_ext)
451 + + le16_to_cpu(new_ext.ee_len) - 1;
453 + /* First original extent
454 + * dest |---------------|
455 + * org |---------------|
456 + */
457 + if (le32_to_cpu(new_ext.ee_block) >
458 + le32_to_cpu(oext->ee_block) &&
459 + le32_to_cpu(new_ext.ee_block) <
460 + le32_to_cpu(oext->ee_block)
461 + + le16_to_cpu(oext->ee_len)) {
462 + start_ext.ee_len = cpu_to_le32(le32_to_cpu(new_ext.ee_block)
463 + - le32_to_cpu(oext->ee_block));
464 + replaced += le16_to_cpu(oext->ee_len)
465 + - le16_to_cpu(start_ext.ee_len);
466 + } else if (oext > EXT_FIRST_EXTENT(org_path[depth].p_hdr)) {
467 + /* We can merge previous extent. */
468 + prev_ext = oext - 1;
469 + if (((ext_pblock(prev_ext) + le32_to_cpu(prev_ext->ee_len))
470 + == ext_pblock(&new_ext))
471 + && (le32_to_cpu(prev_ext->ee_block)
472 + + le32_to_cpu(prev_ext->ee_len)
473 + == le32_to_cpu(new_ext.ee_block))) {
474 + o_start = prev_ext;
475 + start_ext.ee_len = cpu_to_le32(
476 + le16_to_cpu(prev_ext->ee_len)
477 + + le16_to_cpu(new_ext.ee_len));
478 + new_ext.ee_len = 0;
481 + for (;;) {
482 + /* The extent for destination must be found. */
483 + BUG_ON(!oext || lblock != le32_to_cpu(oext->ee_block));
484 + lblock += le16_to_cpu(oext->ee_len);
486 + /* Middle of original extent
487 + * dest |-------------------|
488 + * org |-----------------|
489 + */
490 + if (le32_to_cpu(new_ext.ee_block) <=
491 + le32_to_cpu(oext->ee_block) &&
492 + new_end >= le32_to_cpu(oext->ee_block)
493 + + le16_to_cpu(oext->ee_len) - 1)
494 + replaced += le16_to_cpu(oext->ee_len);
496 + /* Last original extent
497 + * dest |----------------|
498 + * org |---------------|
499 + */
500 + if (new_end >= le32_to_cpu(oext->ee_block) &&
501 + new_end < le32_to_cpu(oext->ee_block)
502 + + le16_to_cpu(oext->ee_len) - 1) {
503 + end_ext.ee_len
504 + = cpu_to_le16(le32_to_cpu(oext->ee_block)
505 + + le16_to_cpu(oext->ee_len) - 1 - new_end);
506 + ext4_ext_store_pblock(&end_ext, (ext_pblock(o_end)
507 + + cpu_to_le16(oext->ee_len)
508 + - cpu_to_le16(end_ext.ee_len)));
509 + end_ext.ee_block
510 + = cpu_to_le32(le32_to_cpu(o_end->ee_block)
511 + + le16_to_cpu(oext->ee_len)
512 + - le16_to_cpu(end_ext.ee_len));
513 + replaced += le16_to_cpu(oext->ee_len)
514 + - le16_to_cpu(end_ext.ee_len);
517 + /* Detected the block end, reached the number of replaced
518 + * blocks to dext->ee_len. Then, merge the extent.
519 + */
520 + if (oext == EXT_LAST_EXTENT(org_path[depth].p_hdr) ||
521 + new_end <= le32_to_cpu(oext->ee_block)
522 + + le16_to_cpu(oext->ee_len) - 1) {
523 + ret = ext4_ext_merge_extents(handle, org_inode,
524 + org_path, o_start, o_end, &start_ext,
525 + &new_ext, &end_ext, replaced, flag);
526 + if (ret < 0)
527 + return ret;
529 + /* All expected blocks are replaced */
530 + if (le16_to_cpu(new_ext.ee_len) <= 0) {
531 + if (DQUOT_ALLOC_BLOCK(org_inode, len))
532 + return -EDQUOT;
533 + return 0;
536 + /* re-calculate new_ext */
537 + new_ext.ee_len = cpu_to_le32(le16_to_cpu(new_ext.ee_len)
538 + - replaced);
539 + new_ext.ee_block =
540 + cpu_to_le32(le32_to_cpu(new_ext.ee_block)
541 + + replaced);
542 + ext4_ext_store_pblock(&new_ext, ext_pblock(&new_ext)
543 + + replaced);
544 + replaced = 0;
545 + start_ext.ee_len = end_ext.ee_len = 0;
546 + o_start = NULL;
548 + /* All expected blocks are replaced */
549 + if (le16_to_cpu(new_ext.ee_len) <= 0) {
550 + if (DQUOT_ALLOC_BLOCK(org_inode, len))
551 + return -EDQUOT;
552 + return 0;
556 + /* Get next extent for original. */
557 + if (org_path)
558 + ext4_ext_drop_refs(org_path);
559 + org_path = ext4_ext_find_extent(org_inode, lblock, org_path);
560 + if (IS_ERR(org_path)) {
561 + ret = PTR_ERR(org_path);
562 + org_path = NULL;
563 + return ret;
565 + depth = ext_depth(org_inode);
566 + oext = org_path[depth].p_ext;
567 + if (oext->ee_block + oext->ee_len <= lblock)
568 + return -ENOENT;
570 + o_end = oext;
571 + if (!o_start)
572 + o_start = oext;
576 +/**
577 + * ext4_ext_replace_branches - replace original extents with new extents.
578 + * @org_inode Original inode
579 + * @dest_inode temporary inode
580 + * @from_page Page offset
581 + * @count_page Page count to be replaced
582 + * @flag defrag mode (e.g. -f)
584 + * This function returns 0 if succeed, otherwise returns error value.
585 + * Replace extents for blocks from "from" to "from + count - 1".
586 + */
587 +static int
588 +ext4_ext_replace_branches(struct inode *org_inode, struct inode *dest_inode,
589 + pgoff_t from_page, pgoff_t dest_from_page,
590 + pgoff_t count_page, int flag)
592 + handle_t *handle = NULL;
593 + struct ext4_ext_path *org_path = NULL;
594 + struct ext4_ext_path *dest_path = NULL;
595 + struct ext4_extent *oext, *dext, *swap_ext;
596 + struct ext4_extent tmp_ext, tmp_ext2;
597 + ext4_lblk_t from, count, dest_off, diff, org_diff;
598 + int err = 0;
599 + int depth;
600 + int replaced_count = 0;
601 + unsigned jnum;
603 + from = (ext4_lblk_t)from_page <<
604 + (PAGE_CACHE_SHIFT - dest_inode->i_blkbits);
605 + count = (ext4_lblk_t)count_page <<
606 + (PAGE_CACHE_SHIFT - dest_inode->i_blkbits);
607 + dest_off = (ext4_lblk_t)dest_from_page <<
608 + (PAGE_CACHE_SHIFT - dest_inode->i_blkbits);
609 + jnum = ext4_ext_writepage_trans_blocks(org_inode, count) + 3;
610 + handle = ext4_journal_start(org_inode, jnum);
611 + if (IS_ERR(handle)) {
612 + err = PTR_ERR(handle);
613 + goto out;
616 + /* Get the original extent for the block "from" */
617 + org_path = ext4_ext_find_extent(org_inode, from, NULL);
618 + if (IS_ERR(org_path)) {
619 + err = PTR_ERR(org_path);
620 + org_path = NULL;
621 + goto out;
624 + /* Get the destination extent for the head */
625 + dest_path = ext4_ext_find_extent(dest_inode, dest_off, NULL);
626 + if (IS_ERR(dest_path)) {
627 + err = PTR_ERR(dest_path);
628 + dest_path = NULL;
629 + goto out;
631 + depth = ext_depth(dest_inode);
632 + dext = dest_path[depth].p_ext;
633 + /* When dext is too large, pick up the target range. */
634 + diff = dest_off - le32_to_cpu(dext->ee_block);
635 + ext4_ext_store_pblock(&tmp_ext, ext_pblock(dext) + diff);
636 + tmp_ext.ee_block = cpu_to_le32(le32_to_cpu(dext->ee_block) + diff);
637 + tmp_ext.ee_len = cpu_to_le16(le16_to_cpu(dext->ee_len) - diff);
638 + if (count < le16_to_cpu(tmp_ext.ee_len))
639 + tmp_ext.ee_len = cpu_to_le16(count);
640 + dext = &tmp_ext;
642 + depth = ext_depth(org_inode);
643 + oext = org_path[depth].p_ext;
644 + org_diff = from - le32_to_cpu(oext->ee_block);
645 + ext4_ext_store_pblock(&tmp_ext2, ext_pblock(oext) + org_diff);
646 + tmp_ext2.ee_block = tmp_ext.ee_block;
648 + /* adjust extent length when blocksize != pagesize */
649 + if (tmp_ext.ee_len <= (oext->ee_len - org_diff)) {
650 + tmp_ext2.ee_len = tmp_ext.ee_len;
651 + } else {
652 + tmp_ext2.ee_len = oext->ee_len - org_diff;
653 + tmp_ext.ee_len = tmp_ext2.ee_len;
655 + swap_ext = &tmp_ext2;
657 + /* loop for the destination extents */
658 + while (1) {
659 + /* The extent for destination must be found. */
660 + BUG_ON(!dext || dest_off != le32_to_cpu(dext->ee_block));
662 + /* loop for the original extent blocks */
663 + err = ext4_ext_defrag_leaf_block(handle, org_inode,
664 + org_path, dext, &from, flag);
665 + if (err < 0)
666 + goto out;
668 + /* We need the function which fixes extent information for
669 + * inserting.
670 + * e.g. ext4_ext_merge_extents().
671 + */
672 + err = ext4_ext_defrag_leaf_block(handle, dest_inode,
673 + dest_path, swap_ext, &dest_off, -1);
674 + if (err < 0)
675 + goto out;
677 + replaced_count += le16_to_cpu(dext->ee_len);
678 + dest_off += le16_to_cpu(dext->ee_len);
679 + from += le16_to_cpu(dext->ee_len);
681 + /* Already moved the expected blocks */
682 + if (replaced_count >= count)
683 + break;
685 + if (org_path)
686 + ext4_ext_drop_refs(org_path);
687 + org_path = ext4_ext_find_extent(org_inode, from, NULL);
688 + if (IS_ERR(org_path)) {
689 + err = PTR_ERR(org_path);
690 + org_path = NULL;
691 + goto out;
693 + depth = ext_depth(org_inode);
694 + oext = org_path[depth].p_ext;
695 + if (oext->ee_block + oext->ee_len <= from) {
696 + err = 0;
697 + goto out;
700 + if (dest_path)
701 + ext4_ext_drop_refs(dest_path);
702 + dest_path = ext4_ext_find_extent(dest_inode, dest_off, NULL);
703 + if (IS_ERR(dest_path)) {
704 + err = PTR_ERR(dest_path);
705 + dest_path = NULL;
706 + goto out;
708 + depth = ext_depth(dest_inode);
709 + dext = dest_path[depth].p_ext;
710 + if (dext->ee_block + dext->ee_len <= dest_off) {
711 + err = 0;
712 + goto out;
715 + /* When dext is too large, pick up the target range. */
716 + diff = dest_off - le32_to_cpu(dext->ee_block);
717 + ext4_ext_store_pblock(&tmp_ext, ext_pblock(dext) + diff);
718 + tmp_ext.ee_block =
719 + cpu_to_le32(le32_to_cpu(dext->ee_block) + diff);
720 + tmp_ext.ee_len = cpu_to_le16(le16_to_cpu(dext->ee_len) - diff);
722 + if ((count - replaced_count) < le16_to_cpu(tmp_ext.ee_len))
723 + tmp_ext.ee_len = count - replaced_count ;
725 + dext = &tmp_ext;
727 + org_diff = from - le32_to_cpu(oext->ee_block);
728 + ext4_ext_store_pblock(&tmp_ext2, ext_pblock(oext) + org_diff);
729 + tmp_ext2.ee_block = tmp_ext.ee_block;
731 + /* adjust extent length when blocksize != pagesize */
732 + if (tmp_ext.ee_len <= (oext->ee_len - org_diff)) {
733 + tmp_ext2.ee_len = tmp_ext.ee_len;
734 + } else {
735 + tmp_ext2.ee_len = oext->ee_len - org_diff;
736 + tmp_ext.ee_len = tmp_ext2.ee_len;
738 + swap_ext = &tmp_ext2;
741 +out:
742 + if (handle)
743 + ext4_journal_stop(handle);
744 + if (org_path) {
745 + ext4_ext_drop_refs(org_path);
746 + kfree(org_path);
748 + if (dest_path) {
749 + ext4_ext_drop_refs(dest_path);
750 + kfree(dest_path);
753 + return err;
756 +/**
757 * ext4_ext_alloc_blocks - allocate contiguous blocks to temporary inode
758 * @dest_inode temporary inode for multiple block allocation
759 * @org_inode original inode
760 @@ -413,6 +1142,188 @@ out2:
764 + * ext4_ext_defrag_partial - defrag original file partially
765 + * @filp: pointer to file
766 + * @org_offset: page index on original file
767 + * @dest_offset: page index on temporary file
768 + * @flag: defrag mode (e.g. -f)
770 + * This function returns 0 if succeeded, otherwise returns error value
771 + */
772 +static int
773 +ext4_ext_defrag_partial(struct inode *tmp_inode, struct file *filp,
774 + pgoff_t org_offset, pgoff_t dest_offset, int flag)
776 + struct inode *inode = filp->f_dentry->d_inode;
777 + struct address_space *mapping = inode->i_mapping;
778 + struct page *page;
779 + pgoff_t offset_in_page = PAGE_SIZE;
780 + int ret = 0;
782 + up_write(&EXT4_I(inode)->i_data_sem);
783 + page = read_cache_page(inode->i_mapping, org_offset,
784 + (filler_t *)inode->i_mapping->a_ops->readpage, NULL);
785 + down_write(&EXT4_I(inode)->i_data_sem);
787 + if (IS_ERR(page)) {
788 + ret = PTR_ERR(page);
789 + return ret;
792 + lock_page(page);
794 + /*
795 + * try_to_release_page() doesn't call relasepage in writeback mode.
796 + * We should care about the order of writing to the same file
797 + * by multiple defrag processes.
798 + * It needs to call wait_on_page_writeback() to wait for the
799 + * writeback of the page.
800 + */
801 + if (PageWriteback(page))
802 + wait_on_page_writeback(page);
804 + /* release old bh and drop refs */
805 + try_to_release_page(page, 0);
806 + ret = ext4_ext_replace_branches(inode, tmp_inode, org_offset,
807 + dest_offset, 1, flag);
808 + if (ret < 0)
809 + goto ERR;
811 + /* Clear the inode cache not to refer to the old data. */
812 + ext4_ext_invalidate_cache(inode);
814 + if (org_offset == ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
815 + offset_in_page = (inode->i_size & (PAGE_CACHE_SIZE - 1));
816 + /*
817 + * If org_offset is the last page and i_size is
818 + * multiples of PAGE_CACHE_SIZE, set PAGE_CACHE_SIZE to
819 + * offset_in_page not to be 0.
820 + */
821 + if (offset_in_page == 0)
822 + offset_in_page = PAGE_CACHE_SIZE;
825 + up_write(&EXT4_I(inode)->i_data_sem);
826 + ret = mapping->a_ops->prepare_write(filp, page,
827 + 0, offset_in_page);
828 + down_write(&EXT4_I(inode)->i_data_sem);
829 + if (ret)
830 + goto ERR;
832 + ret = mapping->a_ops->commit_write(filp, page,
833 + 0, offset_in_page);
834 +ERR:
835 + unlock_page(page);
836 + page_cache_release(page);
838 + return (ret < 0 ? ret : 0);
841 +/**
842 + * ext4_ext_defrag_partial2 - defrag_partial with write_{begin, end}
843 + * @filp: pointer to file
844 + * @org_offset: page index on original file
845 + * @dest_offset: page index on temporary file
846 + * @flag: defrag mode (e.g. -f)
848 + * This function returns 0 if succeeded, otherwise returns error value
849 + */
850 +static int
851 +ext4_ext_defrag_partial2(struct inode *tmp_inode, struct file *filp,
852 + pgoff_t org_offset, pgoff_t dest_offset, int flag)
854 + struct inode *inode = filp->f_dentry->d_inode;
855 + struct address_space *mapping = inode->i_mapping;
856 + struct buffer_head *bh;
857 + struct page *page;
858 + const struct address_space_operations *a_ops = mapping->a_ops;
859 + pgoff_t offset_in_page = PAGE_SIZE;
860 + int ret = 0;
861 + int blocksize = inode->i_sb->s_blocksize;
862 + int blocks_per_page = 0;
863 + int i = 0;
864 + long long offs = org_offset << PAGE_CACHE_SHIFT;
865 + unsigned long blk_off = 0;
866 + unsigned int w_flags = 0;
867 + void *fsdata;
869 + if (segment_eq(get_fs(), KERNEL_DS))
870 + w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
872 + if (org_offset == ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
873 + offset_in_page = (inode->i_size & (PAGE_CACHE_SIZE - 1));
874 + /*
875 + * If org_offset is the last page and i_size is
876 + * multiples of PAGE_CACHE_SIZE, set PAGE_CACHE_SIZE to
877 + * offset_in_page not to be 0.
878 + */
879 + if (offset_in_page == 0)
880 + offset_in_page = PAGE_CACHE_SIZE;
883 + up_write(&EXT4_I(inode)->i_data_sem);
884 + ret = a_ops->write_begin(filp, mapping, offs,
885 + offset_in_page, w_flags, &page, &fsdata);
886 + down_write(&EXT4_I(inode)->i_data_sem);
888 + if (unlikely(ret < 0))
889 + goto ERR;
891 + if (!PageUptodate(page)) {
892 + mapping->a_ops->readpage(filp, page);
893 + lock_page(page);
896 + /*
897 + * try_to_release_page() doesn't call relasepage in writeback mode.
898 + * We should care about the order of writing to the same file
899 + * by multiple defrag processes.
900 + * It needs to call wait_on_page_writeback() to wait for the
901 + * writeback of the page.
902 + */
903 + if (PageWriteback(page))
904 + wait_on_page_writeback(page);
906 + /* release old bh and drop refs */
907 + try_to_release_page(page, 0);
908 + ret = ext4_ext_replace_branches(inode, tmp_inode, org_offset,
909 + dest_offset, 1, flag);
911 + if (ret < 0)
912 + goto ERR;
914 + /* Clear the inode cache not to refer to the old data. */
915 + ext4_ext_invalidate_cache(inode);
917 + if (!page_has_buffers(page))
918 + create_empty_buffers(page, 1 << inode->i_blkbits, 0);
920 + blocks_per_page = PAGE_SIZE / blocksize;
921 + blk_off = org_offset * blocks_per_page;
923 + bh = page_buffers(page);
924 + for (i = 0; i < blocks_per_page; i++) {
925 + up_write(&EXT4_I(inode)->i_data_sem);
926 + ret = ext4_get_block(inode, blk_off++, bh, 0);
927 + down_write(&EXT4_I(inode)->i_data_sem);
929 + if (ret < 0)
930 + goto ERR;
932 + if (bh->b_this_page != NULL)
933 + bh = bh->b_this_page;
936 + ret = a_ops->write_end(filp, mapping, offs, offset_in_page,
937 + offset_in_page, page, fsdata);
939 + if (unlikely(ret < 0))
940 + goto ERR;
941 +ERR:
942 + return (ret < 0 ? ret : 0);
945 +/**
946 * ext4_ext_new_extent_tree - allocate contiguous blocks
947 * @inode: inode of the original file
948 * @tmp_inode: inode of the temporary file
949 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
950 index f0cd509..240a4fb 100644
951 --- a/fs/ext4/extents.c
952 +++ b/fs/ext4/extents.c
953 @@ -349,7 +349,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
954 #define ext4_ext_show_leaf(inode,path)
955 #endif
957 -static void ext4_ext_drop_refs(struct ext4_ext_path *path)
958 +void ext4_ext_drop_refs(struct ext4_ext_path *path)
960 int depth = path->p_depth;
961 int i;
962 @@ -1167,7 +1167,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
963 * allocated block. Thus, index entries have to be consistent
964 * with leaves.
966 -static ext4_lblk_t
967 +ext4_lblk_t
968 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
970 int depth;
971 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
972 index 0dea2f5..fb250bd 100644
973 --- a/fs/ext4/inode.c
974 +++ b/fs/ext4/inode.c
975 @@ -939,8 +939,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
976 up_write((&EXT4_I(inode)->i_data_sem));
977 return retval;
980 -static int ext4_get_block(struct inode *inode, sector_t iblock,
981 +int ext4_get_block(struct inode *inode, sector_t iblock,
982 struct buffer_head *bh_result, int create)
984 handle_t *handle = ext4_journal_current_handle();