Remove BKL removal patches, since they don't seem to be portable on
[ext4-patch-queue.git] / ext4-online-defrag-alloc-contiguous-blks.patch
blobeaf5baa4b2352132e60ef238aec0dd2fbf3cd6eb
1 ext4: online defrag-- Allocate new contiguous blocks with mballoc
2 From: Akira Fujita <a-fujita@rs.jp.nec.com>
4 Search contiguous free blocks with mutil-block allocation
5 and allocate them for the temporary inode.
7 Signed-off-by: Mingming Cao <cmm@us.ibm.com>
8 Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
9 Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
10 ---
11 ---
12 fs/ext4/defrag.c | 766 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
13 fs/ext4/extents.c | 8
14 fs/ext4/inode.c | 2
15 fs/ext4/ioctl.c | 13
16 fs/ext4/mballoc.c | 7
17 5 files changed, 791 insertions(+), 5 deletions(-)
19 Index: linux-2.6.24-rc8/fs/ext4/defrag.c
20 ===================================================================
21 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
22 +++ linux-2.6.24-rc8/fs/ext4/defrag.c 2008-01-24 11:19:54.000000000 -0800
23 @@ -0,0 +1,766 @@
24 +#include <linux/module.h>
25 +#include <linux/fs.h>
26 +#include <linux/time.h>
27 +#include <linux/ext4_jbd2.h>
28 +#include <linux/jbd2.h>
29 +#include <linux/highuid.h>
30 +#include <linux/pagemap.h>
31 +#include <linux/quotaops.h>
32 +#include <linux/string.h>
33 +#include <linux/slab.h>
34 +#include <linux/falloc.h>
35 +#include <linux/ext4_fs_extents.h>
36 +#include <asm/uaccess.h>
37 +#include "group.h"
39 +/*
40 + * this structure is used to gather extents from the tree via ioctl
41 + */
42 +struct ext4_extent_buf {
43 + ext4_fsblk_t start;
44 + int buflen;
45 + void *buffer;
46 + void *cur;
47 + int err;
48 +};
50 +/*
51 + * this structure is used to collect stats info about the tree
52 + */
53 +struct ext4_extent_tree_stats {
54 + int depth;
55 + int extents_num;
56 + int leaf_num;
57 +};
59 +static int
60 +ext4_ext_store_extent_cb(struct inode *inode,
61 + struct ext4_ext_path *path,
62 + struct ext4_ext_cache *newex,
63 + struct ext4_extent_buf *buf)
66 + if (newex->ec_type != EXT4_EXT_CACHE_EXTENT)
67 + return EXT_CONTINUE;
69 + if (buf->err < 0)
70 + return EXT_BREAK;
71 + if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen)
72 + return EXT_BREAK;
74 + if (!copy_to_user(buf->cur, newex, sizeof(*newex))) {
75 + buf->err++;
76 + buf->cur += sizeof(*newex);
77 + } else {
78 + buf->err = -EFAULT;
79 + return EXT_BREAK;
80 + }
81 + return EXT_CONTINUE;
84 +static int
85 +ext4_ext_collect_stats_cb(struct inode *inode,
86 + struct ext4_ext_path *path,
87 + struct ext4_ext_cache *ex,
88 + struct ext4_extent_tree_stats *buf)
90 + int depth;
92 + if (ex->ec_type != EXT4_EXT_CACHE_EXTENT)
93 + return EXT_CONTINUE;
95 + depth = ext_depth(inode);
96 + buf->extents_num++;
97 + if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr))
98 + buf->leaf_num++;
99 + return EXT_CONTINUE;
102 +/**
103 + * ext4_ext_next_extent - search for next extent and set it to "extent"
104 + * @inode: inode of the the original file
105 + * @path: this will obtain data for next extent
106 + * @extent: pointer to next extent we have just gotten
108 + * This function returns 0 or 1(last_entry) if succeeded, otherwise
109 + * returns -EIO
110 + */
111 +static int
112 +ext4_ext_next_extent(struct inode *inode,
113 + struct ext4_ext_path *path,
114 + struct ext4_extent **extent)
116 + int ppos;
117 + int leaf_ppos = path->p_depth;
119 + ppos = leaf_ppos;
120 + if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
121 + /* leaf block */
122 + *extent = ++path[ppos].p_ext;
123 + return 0;
126 + while (--ppos >= 0) {
127 + if (EXT_LAST_INDEX(path[ppos].p_hdr) >
128 + path[ppos].p_idx) {
129 + int cur_ppos = ppos;
131 + /* index block */
132 + path[ppos].p_idx++;
133 + path[ppos].p_block =
134 + idx_pblock(path[ppos].p_idx);
135 + if (path[ppos+1].p_bh)
136 + brelse(path[ppos+1].p_bh);
137 + path[ppos+1].p_bh =
138 + sb_bread(inode->i_sb, path[ppos].p_block);
139 + if (!path[ppos+1].p_bh)
140 + return -EIO;
141 + path[ppos+1].p_hdr =
142 + ext_block_hdr(path[ppos+1].p_bh);
144 + /* halfway index block */
145 + while (++cur_ppos < leaf_ppos) {
146 + path[cur_ppos].p_idx =
147 + EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
148 + path[cur_ppos].p_block =
149 + idx_pblock(path[cur_ppos].p_idx);
150 + if (path[cur_ppos+1].p_bh)
151 + brelse(path[cur_ppos+1].p_bh);
152 + path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
153 + path[cur_ppos].p_block);
154 + if (!path[cur_ppos+1].p_bh)
155 + return -EIO;
156 + path[cur_ppos+1].p_hdr =
157 + ext_block_hdr(path[cur_ppos+1].p_bh);
160 + /* leaf block */
161 + path[leaf_ppos].p_ext = *extent =
162 + EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
163 + return 0;
166 + /* last_extent */
167 + return 1;
170 +int ext4_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
171 + unsigned long arg)
173 + int err = 0;
174 + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL ||
175 + cmd == EXT4_IOC_FIBMAP))
176 + return -EINVAL;
178 + if (cmd == EXT4_IOC_GET_EXTENTS) {
179 + struct ext4_extent_buf buf;
181 + if (copy_from_user(&buf, (void *) arg, sizeof(buf)))
182 + return -EFAULT;
184 + buf.cur = buf.buffer;
185 + buf.err = 0;
186 + down_write(&EXT4_I(inode)->i_data_sem);
187 + err = ext4_ext_walk_space(inode, buf.start, EXT_MAX_BLOCK,
188 + (void *)ext4_ext_store_extent_cb, &buf);
189 + up_write(&EXT4_I(inode)->i_data_sem);
190 + if (err == 0)
191 + err = buf.err;
192 + } else if (cmd == EXT4_IOC_GET_TREE_STATS) {
193 + struct ext4_extent_tree_stats buf;
195 + down_write(&EXT4_I(inode)->i_data_sem);
196 + buf.depth = ext_depth(inode);
197 + buf.extents_num = 0;
198 + buf.leaf_num = 0;
199 + err = ext4_ext_walk_space(inode, 0, EXT_MAX_BLOCK,
200 + (void *)ext4_ext_collect_stats_cb, &buf);
201 + up_write(&EXT4_I(inode)->i_data_sem);
202 + if (!err)
203 + err = copy_to_user((void *) arg, &buf, sizeof(buf));
204 + } else if (cmd == EXT4_IOC_GET_TREE_DEPTH) {
205 + down_write(&EXT4_I(inode)->i_data_sem);
206 + err = ext_depth(inode);
207 + up_write(&EXT4_I(inode)->i_data_sem);
208 + } else if (cmd == EXT4_IOC_FIBMAP) {
209 + ext4_fsblk_t __user *p = (ext4_fsblk_t __user *)arg;
210 + ext4_fsblk_t block = 0;
211 + struct address_space *mapping = filp->f_mapping;
213 + if (copy_from_user(&block, (ext4_fsblk_t __user *)arg,
214 + sizeof(block)))
215 + return -EFAULT;
217 + lock_kernel();
218 + block = ext4_bmap(mapping, block);
219 + unlock_kernel();
221 + return put_user(block, p);
222 + } else if (cmd == EXT4_IOC_DEFRAG) {
223 + struct ext4_ext_defrag_data defrag;
225 + if (copy_from_user(&defrag,
226 + (struct ext4_ext_defrag_data __user *)arg,
227 + sizeof(defrag)))
228 + return -EFAULT;
229 + err = ext4_ext_defrag(filp, defrag.start_offset,
230 + defrag.defrag_size, defrag.goal, defrag.flag,
231 + &defrag.ext);
234 + return err;
237 +/**
238 + * ext4_ext_alloc_blocks - allocate contiguous blocks to temporary inode
239 + * @dest_inode temporary inode for multiple block allocation
240 + * @org_inode original inode
241 + * @iblock file related offset
242 + * @total_blocks contiguous blocks count
243 + * @goal block offset for allocation
244 + * @phase phase of create free space mode
246 + * If succeed, fuction returns count of extent we got,
247 + * otherwise returns err.
248 + */
249 +static int ext4_ext_alloc_blocks(struct inode *dest_inode,
250 + struct inode *org_inode, ext4_lblk_t iblock,
251 + ext4_fsblk_t total_blocks, ext4_fsblk_t goal, int phase)
253 + handle_t *handle = NULL;
254 + struct ext4_ext_path *dest_path = NULL;
255 + struct ext4_ext_path *org_path = NULL;
256 + struct ext4_extent newex;
257 + struct ext4_allocation_request ar;
258 + struct buffer_head *bh = NULL;
259 + struct super_block *org_sb = org_inode->i_sb;
260 + ext4_fsblk_t newblock = 0;
261 + ext4_fsblk_t rest = total_blocks;
262 + ext4_fsblk_t alloc_total = 0;
263 + unsigned long org_len;
264 + ext4_group_t dest_grp_no, org_grp_no, goal_grp_no;
265 + ext4_grpblk_t dest_blk_off, org_blk_off, goal_blk_off;
266 + int org_depth = ext_depth(org_inode);
267 + int metadata = 1;
268 + int count = 0;
269 + int credits = 0;
270 + int err = 0;
271 + int err2 = 0;
272 + int len_cnt = 0;
274 + ar.len = total_blocks;
275 + org_len = ar.len;
277 + /* Calculate group nubmer of org_inode block */
278 + if (phase == DEFRAG_FORCE_VICTIM) {
279 + org_path = ext4_ext_find_extent(org_inode, iblock, org_path);
280 + if (IS_ERR(org_path)) {
281 + err = PTR_ERR(org_path);
282 + org_path = NULL;
283 + goto out2;
285 + ext4_get_group_no_and_offset(org_inode->i_sb,
286 + ext_pblock(org_path[org_depth].p_ext),
287 + &org_grp_no, &org_blk_off);
288 + ar.excepted_group = org_grp_no;
289 + } else {
290 + ar.excepted_group = -1;
293 + /* Find first extent. */
294 + dest_path = ext4_ext_find_extent(dest_inode, iblock, dest_path);
295 + if (IS_ERR(dest_path)) {
296 + err = PTR_ERR(dest_path);
297 + dest_path = NULL;
298 + goto out2;
301 + ar.inode = dest_inode;
302 + ar.flags = EXT4_MB_HINT_DATA | EXT4_MB_HINT_RESERVED
303 + | EXT4_MB_HINT_NOPREALLOC;
304 + if (goal) {
305 + ar.goal = goal;
306 + } else {
307 + ar.goal = ext4_ext_find_goal(dest_inode, dest_path, iblock);
310 + ar.logical = iblock;
311 + ar.lleft = 0;
312 + ar.pleft = 0;
313 + ar.lright = 0;
314 + ar.pright = 0;
316 + handle = ext4_journal_start(dest_inode, credits);
317 + if (IS_ERR(handle)) {
318 + err = PTR_ERR(handle);
319 + goto out2;
322 + while (alloc_total != total_blocks) {
323 + credits = ext4_ext_calc_credits_for_insert(dest_inode,
324 + dest_path);
325 + handle = ext4_ext_journal_restart(handle,
326 + credits + EXT4_TRANS_META_BLOCKS);
328 + if (IS_ERR(handle))
329 + return PTR_ERR(handle);
331 + newblock = ext4_mb_new_blocks(handle, &ar, &err);
333 + if (err) {
334 + /* Failed to get the contiguous blocks */
335 + goto out;
336 + } else if ((ar.len != org_len) &&
337 + (phase == DEFRAG_FORCE_TRY)) {
338 + ext4_free_blocks(handle, org_inode, newblock,
339 + ar.len, metadata);
340 + /* go to force mode */
341 + err = -ENOSPC;
342 + goto out;
343 + } else {
344 + /*
345 + * If ext4_mb_new_blocks() allcates
346 + * the block which used to be the metadata block,
347 + * its dirty buffer_head causes the overwriting
348 + * with old metadata.
349 + * We should call unmap_underlying_metadata()
350 + * to clear the dirty flag.
351 + */
352 + for (len_cnt = 0; len_cnt < ar.len; len_cnt++) {
353 + bh = sb_find_get_block(org_sb,
354 + newblock + len_cnt);
355 + unmap_underlying_metadata(org_sb->s_bdev,
356 + newblock + len_cnt);
359 + alloc_total += ar.len;
360 + ext4_get_group_no_and_offset(dest_inode->i_sb,
361 + goal, &goal_grp_no, &goal_blk_off);
362 + ext4_get_group_no_and_offset(dest_inode->i_sb,
363 + newblock, &dest_grp_no, &dest_blk_off);
364 + /* We can't allocate at the same block group */
365 + switch (phase) {
366 + case DEFRAG_FORCE_VICTIM:
367 + if (dest_grp_no == org_grp_no) {
368 + printk(KERN_ERR "defrag: Can't allocate"
369 + " in same block group\n");
370 + ext4_free_blocks(handle, org_inode,
371 + newblock, ar.len, metadata);
372 + err = -ENOSPC;
373 + goto out;
375 + break;
376 + case DEFRAG_FORCE_GATHER:
377 + /* Maybe reserved blocks are already used by
378 + other process */
379 + if (dest_grp_no != goal_grp_no
380 + || alloc_total != total_blocks) {
381 + printk(KERN_ERR "defrag: Already used"
382 + " the specified blocks\n");
383 + ext4_free_blocks(handle, org_inode,
384 + newblock, ar.len, metadata);
385 + err = -EIO;
386 + goto out;
388 + break;
391 + newex.ee_block = cpu_to_le32(alloc_total - ar.len);
392 + ext4_ext_store_pblock(&newex, newblock);
393 + newex.ee_len = cpu_to_le16(ar.len);
395 + if (!phase)
396 + ar.goal = newblock + ar.len;
397 + rest = rest - ar.len;
398 + ar.len = rest;
400 + err = ext4_ext_insert_extent(handle, dest_inode,
401 + dest_path, &newex);
402 + if (!err) {
403 + count++;
404 + } else {
405 + ext4_free_blocks(handle, org_inode,
406 + newblock, ar.len, metadata);
407 + goto out;
412 +out:
413 + /* Faild case: We have to remove halfway blocks */
414 + if (err)
415 + err2 = ext4_ext_remove_space(dest_inode, 0);
417 + /* Successful case */
418 + if (dest_path) {
419 + ext4_ext_drop_refs(dest_path);
420 + kfree(dest_path);
422 + if (org_path) {
423 + ext4_ext_drop_refs(org_path);
424 + kfree(org_path);
426 +out2:
427 + ext4_journal_stop(handle);
429 + if (err2) {
430 + return err2;
431 + } else if (err) {
432 + return err;
434 + /* return extents count */
435 + return count;
438 +/**
439 + * ext4_ext_new_extent_tree - allocate contiguous blocks
440 + * @inode: inode of the original file
441 + * @tmp_inode: inode of the temporary file
442 + * @path: the structure holding some info about
443 + * original extent tree
444 + * @tar_start: starting offset to allocate in blocks
445 + * @tar_blocks: the number of blocks to allocate
446 + * @iblock: file related offset
447 + * @goal: block offset for allocaton
448 + * @flag: phase of create free space mode
450 + * This function returns the value as below:
451 + * 0(succeeded)
452 + * 1(not improved)
453 + * negative value(error)
454 + */
455 +static int
456 +ext4_ext_new_extent_tree(struct inode *inode, struct inode *tmp_inode,
457 + struct ext4_ext_path *path, ext4_lblk_t tar_start,
458 + ext4_lblk_t tar_blocks, ext4_lblk_t iblock,
459 + ext4_fsblk_t goal, int flag)
461 + struct ext4_extent *ext = NULL;
462 + struct ext4_extent_header *eh = NULL;
463 + ext4_lblk_t tar_end = tar_start + tar_blocks - 1;
464 + int sum_org = 0, sum_tmp = 0;
465 + int ret = 0, depth;
466 + int last_extent = 0;
468 + eh = ext_inode_hdr(tmp_inode);
469 + eh->eh_depth = 0;
471 + /* allocate contiguous blocks */
472 + sum_tmp = ext4_ext_alloc_blocks(tmp_inode, inode, iblock,
473 + tar_blocks, goal, flag);
474 + if (sum_tmp < 0) {
475 + ret = sum_tmp;
476 + goto ERR;
479 + depth = ext_depth(inode);
480 + ext = path[depth].p_ext;
481 + while (1) {
482 + if (!last_extent)
483 + ++sum_org;
485 + if (tar_end <= le32_to_cpu(ext->ee_block) +
486 + le32_to_cpu(ext->ee_len) - 1 ||
487 + last_extent) {
489 + if ((sum_org == sum_tmp) && !goal) {
490 + /* not improved */
491 + if (!(ret =
492 + ext4_ext_remove_space(tmp_inode, 0)))
493 + ret = 1;
494 + } else if (sum_org < sum_tmp &&
495 + flag != DEFRAG_FORCE_VICTIM) {
496 + /* fragment increased */
497 + if (!(ret =
498 + ext4_ext_remove_space(tmp_inode, 0)))
499 + ret = -ENOSPC;
500 + printk("defrag failed due to no space\n");
502 + break;
504 + if ((last_extent =
505 + ext4_ext_next_extent(tmp_inode,
506 + path, &ext)) < 0) {
507 + ret = last_extent;
508 + break;
511 +ERR:
512 + return ret;
515 +/**
516 + * ext4_ext_defrag - defrag whole file
517 + * @filp: pointer to file
518 + * @from: starting offset to defrag in blocks
519 + * @defrag_size: size of defrag in blocks
520 + * @goal: block offset for allocation
521 + * @flag: phase of create free space mode
522 + * @ext: extent to be moved (only -f)
524 + * This function returns the number of blocks if succeeded, otherwise
525 + * returns error value
526 + */
527 +int
528 +ext4_ext_defrag(struct file *filp, ext4_lblk_t block_start,
529 + ext4_lblk_t defrag_size, ext4_fsblk_t goal,
530 + int flag, struct ext4_extent_data *ext)
532 + struct inode *inode = filp->f_dentry->d_inode, *tmp_inode = NULL;
533 + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
534 + struct ext4_ext_path *path = NULL, *holecheck_path = NULL;
535 + struct ext4_extent *ext_prev = NULL, *ext_cur = NULL, *ext_dummy = NULL;
536 + handle_t *handle;
537 + ext4_lblk_t block_end = block_start + defrag_size - 1;
538 + ext4_lblk_t seq_blocks = 0, seq_start = 0;
539 + ext4_lblk_t add_blocks = 0;
540 + ext4_lblk_t file_end = (inode->i_size - 1) >> inode->i_blkbits;
541 + pgoff_t page_offset = 0, dest_offset = 0, seq_end_page = 0;
542 + int ret = 0, depth = 0, last_extent = 0, seq_extents = 0;
544 + /* Check goal offset if goal offset was given from userspace. */
545 + if (((0 < goal) && (ext4_blocks_count(es) < goal)) && (goal != -1)) {
546 + printk(KERN_ERR "defrag: incorrect goal number %llu, "
547 + "you can set goal until %llu\n", goal,
548 + ext4_blocks_count(es));
549 + ret = -EINVAL;
550 + goto ERR1;
553 + /* Setup for fixed blocks mode */
554 + if (ext->len) {
555 + if (ext->len < defrag_size) {
556 + printk("Cannot defrag due to the insufficient"
557 + " specified free blocks\n");
558 + return -EINVAL;
560 + flag = DEFRAG_FORCE_GATHER;
561 + goal = ext->start;
564 + if (file_end < block_end)
565 + defrag_size -= block_end - file_end;
567 + mutex_lock(&inode->i_mutex);
568 + down_write(&EXT4_I(inode)->i_data_sem);
570 + path = ext4_ext_find_extent(inode, block_start, NULL);
571 + if (IS_ERR(path)) {
572 + ret = PTR_ERR(path);
573 + path = NULL;
574 + goto ERR2;
577 + /* get path structure to check hole */
578 + holecheck_path = ext4_ext_find_extent(inode, block_start, NULL);
579 + if (IS_ERR(holecheck_path)) {
580 + ret = PTR_ERR(holecheck_path);
581 + holecheck_path = NULL;
582 + goto ERR2;
585 + depth = ext_depth(inode);
586 + ext_cur = holecheck_path[depth].p_ext;
587 + if (ext_cur == NULL)
588 + goto ERR2;
590 + /*
591 + * if block_start was within the hole, get proper extent whose ee_block
592 + * is beyond block_start
593 + */
594 + if (le32_to_cpu(ext_cur->ee_block) +
595 + le32_to_cpu(ext_cur->ee_len) - 1 < block_start) {
596 + if ((last_extent =
597 + ext4_ext_next_extent(inode, holecheck_path,
598 + &ext_cur)) < 0) {
599 + ret = last_extent;
600 + goto ERR2;
602 + if ((last_extent =
603 + ext4_ext_next_extent(inode, path,
604 + &ext_dummy)) < 0) {
605 + ret = last_extent;
606 + goto ERR2;
609 + seq_extents = 1;
610 + seq_start = ext_cur->ee_block;
612 + /* no blocks existed within designated range */
613 + if (le32_to_cpu(ext_cur->ee_block) > block_end) {
614 + printk("nothing done due to the lack of contiguous blocks\n");
615 + goto ERR2;
618 + /* adjust start blocks */
619 + add_blocks = min(ext_cur->ee_block +
620 + ext_cur->ee_len, block_end + 1) -
621 + max(ext_cur->ee_block, block_start);
623 + while (!last_extent && ext_cur->ee_block <= block_end) {
624 + seq_blocks += add_blocks;
626 + handle = ext4_journal_start(inode,
627 + EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
628 + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
629 + 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) + 1);
630 + if (IS_ERR(handle)) {
631 + ret = PTR_ERR(handle);
632 + goto ERR1;
634 + tmp_inode = ext4_new_inode(handle,
635 + inode->i_sb->s_root->d_inode, S_IFREG);
636 + if (IS_ERR(tmp_inode)) {
637 + ret = -ENOMEM;
638 + ext4_journal_stop(handle);
639 + tmp_inode = NULL;
640 + goto ERR1;
643 + i_size_write(tmp_inode, i_size_read(inode));
644 + tmp_inode->i_nlink = 0;
645 + ext4_ext_tree_init(handle, tmp_inode);
646 + ext4_orphan_add(handle, tmp_inode);
647 + ext4_journal_stop(handle);
649 + /* adjust tail blocks */
650 + if (seq_start + seq_blocks - 1 > block_end)
651 + seq_blocks = block_end - seq_start + 1;
653 + ext_prev = ext_cur;
654 + if ((last_extent =
655 + ext4_ext_next_extent(inode, holecheck_path,
656 + &ext_cur)) < 0) {
657 + ret = last_extent;
658 + break;
660 + if (!last_extent)
661 + seq_extents++;
662 + add_blocks = le16_to_cpu(ext_cur->ee_len);
664 + /* found hole or reached the tail of either a designated range
665 + * or the file
666 + */
667 + if ((le32_to_cpu(ext_prev->ee_block) +
668 + le16_to_cpu(ext_prev->ee_len) ==
669 + le32_to_cpu(ext_cur->ee_block) &&
670 + block_end >= le32_to_cpu(ext_cur->ee_block) &&
671 + !last_extent)) {
672 + if (tmp_inode) {
673 + iput(tmp_inode);
674 + tmp_inode = NULL;
676 + continue;
679 + /* found an isolated block */
680 + if ((seq_extents == 1) && !goal) {
681 + seq_start = ext_cur->ee_block;
682 + goto CLEANUP;
685 + ret = ext4_ext_new_extent_tree(inode, tmp_inode, path,
686 + seq_start, seq_blocks, block_start, goal, flag);
688 + if (ret < 0) {
689 + break;
690 + } else if ((ret == 1) && (!goal || (goal && !flag))) {
691 + ret = 0;
692 + seq_start = le32_to_cpu(ext_cur->ee_block);
693 + goto CLEANUP;
696 + page_offset = seq_start >>
697 + (PAGE_CACHE_SHIFT - inode->i_blkbits);
698 + seq_end_page = (seq_start + seq_blocks - 1) >>
699 + (PAGE_CACHE_SHIFT - inode->i_blkbits);
701 + dest_offset = 0;
702 + seq_start = le32_to_cpu(ext_cur->ee_block);
704 + /* Discard all preallocations.
705 + * This is provisional solution.
706 + * When true ext4_mb_return_to_preallocation() is
707 + * implemented, this will be removed.
708 + */
709 + ext4_mb_discard_inode_preallocations(inode);
711 + if (inode->i_mapping->a_ops->write_begin) {
712 + while (page_offset <= seq_end_page) {
713 + /* replace original branches for new branches */
714 + ret = ext4_ext_defrag_partial2(tmp_inode,
715 + filp, page_offset,
716 + dest_offset, flag);
717 + if (ret < 0)
718 + goto ERR2;
720 + page_offset++;
721 + dest_offset++;
723 + } else {
724 + while (page_offset <= seq_end_page) {
725 + /* replace original branches for new branches */
726 + ret = ext4_ext_defrag_partial(tmp_inode,
727 + filp, page_offset,
728 + dest_offset, flag);
729 + if (ret < 0)
730 + goto ERR2;
732 + page_offset++;
733 + dest_offset++;
737 + /* decrease buffer counter */
738 + if (holecheck_path)
739 + ext4_ext_drop_refs(holecheck_path);
740 + holecheck_path =
741 + ext4_ext_find_extent(inode, seq_start, holecheck_path);
742 + if (IS_ERR(holecheck_path)) {
743 + ret = PTR_ERR(holecheck_path);
744 + holecheck_path = NULL;
745 + break;
747 + depth = holecheck_path->p_depth;
749 +CLEANUP:
750 + /* decrease buffer counter */
751 + if (path)
752 + ext4_ext_drop_refs(path);
753 + path = ext4_ext_find_extent(inode, seq_start, path);
754 + if (IS_ERR(path)) {
755 + ret = PTR_ERR(path);
756 + path = NULL;
757 + break;
760 + ext_cur = holecheck_path[depth].p_ext;
761 + add_blocks = le16_to_cpu(ext_cur->ee_len);
762 + seq_blocks = 0;
763 + dest_offset = 0;
764 + seq_extents = 1;
766 + if (tmp_inode) {
767 + iput(tmp_inode);
768 + tmp_inode = NULL;
772 +ERR2:
773 + if (path) {
774 + ext4_ext_drop_refs(path);
775 + kfree(path);
777 + if (holecheck_path) {
778 + ext4_ext_drop_refs(holecheck_path);
779 + kfree(holecheck_path);
781 +ERR1:
782 + up_write(&EXT4_I(inode)->i_data_sem);
783 + mutex_unlock(&inode->i_mutex);
785 + if (tmp_inode)
786 + iput(tmp_inode);
788 + return (ret ? ret : defrag_size);
790 Index: linux-2.6.24-rc8/fs/ext4/extents.c
791 ===================================================================
792 --- linux-2.6.24-rc8.orig/fs/ext4/extents.c 2008-01-24 11:19:32.000000000 -0800
793 +++ linux-2.6.24-rc8/fs/ext4/extents.c 2008-01-24 11:19:54.000000000 -0800
794 @@ -48,7 +48,7 @@
795 * ext_pblock:
796 * combine low and high parts of physical block number into ext4_fsblk_t
798 -static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
799 +ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
801 ext4_fsblk_t block;
803 @@ -92,7 +92,7 @@ static void ext4_idx_store_pblock(struct
804 ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
807 -static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed)
808 +handle_t *ext4_ext_journal_restart(handle_t *handle, int needed)
810 int err;
812 @@ -142,7 +142,7 @@ static int ext4_ext_dirty(handle_t *hand
813 return err;
816 -static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
817 +ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
818 struct ext4_ext_path *path,
819 ext4_lblk_t block)
821 @@ -1952,7 +1952,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path
822 return 1;
825 -static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
826 +int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
828 struct super_block *sb = inode->i_sb;
829 int depth = ext_depth(inode);
830 Index: linux-2.6.24-rc8/fs/ext4/inode.c
831 ===================================================================
832 --- linux-2.6.24-rc8.orig/fs/ext4/inode.c 2008-01-24 11:19:34.000000000 -0800
833 +++ linux-2.6.24-rc8/fs/ext4/inode.c 2008-01-24 11:19:54.000000000 -0800
834 @@ -1508,7 +1508,7 @@ out:
835 * So, if we see any bmap calls here on a modified, data-journaled file,
836 * take extra steps to flush any blocks which might be in the cache.
838 -static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
839 +sector_t ext4_bmap(struct address_space *mapping, sector_t block)
841 struct inode *inode = mapping->host;
842 journal_t *journal;
843 Index: linux-2.6.24-rc8/fs/ext4/ioctl.c
844 ===================================================================
845 --- linux-2.6.24-rc8.orig/fs/ext4/ioctl.c 2008-01-24 11:19:07.000000000 -0800
846 +++ linux-2.6.24-rc8/fs/ext4/ioctl.c 2008-01-24 11:19:54.000000000 -0800
847 @@ -231,6 +231,19 @@ flags_err:
849 return err;
851 + case EXT4_IOC_GET_EXTENTS:
852 + case EXT4_IOC_GET_TREE_STATS:
853 + case EXT4_IOC_GET_TREE_DEPTH:
854 + case EXT4_IOC_FIBMAP:
855 + case EXT4_IOC_DEFRAG:
856 + case EXT4_IOC_GROUP_INFO:
857 + case EXT4_IOC_FREE_BLOCKS_INFO:
858 + case EXT4_IOC_EXTENTS_INFO:
859 + case EXT4_IOC_RESERVE_BLOCK:
860 + case EXT4_IOC_MOVE_VICTIM:
861 + case EXT4_IOC_BLOCK_RELEASE: {
862 + return ext4_ext_ioctl(inode, filp, cmd, arg);
864 case EXT4_IOC_GROUP_ADD: {
865 struct ext4_new_group_data input;
866 struct super_block *sb = inode->i_sb;
867 Index: linux-2.6.24-rc8/fs/ext4/mballoc.c
868 ===================================================================
869 --- linux-2.6.24-rc8.orig/fs/ext4/mballoc.c 2008-01-24 11:19:18.000000000 -0800
870 +++ linux-2.6.24-rc8/fs/ext4/mballoc.c 2008-01-24 11:19:54.000000000 -0800
871 @@ -527,6 +527,7 @@ struct ext4_allocation_context {
872 struct page *ac_buddy_page;
873 struct ext4_prealloc_space *ac_pa;
874 struct ext4_locality_group *ac_lg;
875 + long long ac_excepted_group;
878 #define AC_STATUS_CONTINUE 1
879 @@ -2005,6 +2006,11 @@ repeat:
880 if (group == EXT4_SB(sb)->s_groups_count)
881 group = 0;
883 + if (ac->ac_excepted_group != -1 &&
884 + group == ac->ac_excepted_group) {
885 + continue;
888 /* quick check to skip empty groups */
889 grp = ext4_get_group_info(ac->ac_sb, group);
890 if (grp->bb_free == 0)
891 @@ -4165,6 +4171,7 @@ static int ext4_mb_initialize_context(st
892 ac->ac_bitmap_page = NULL;
893 ac->ac_buddy_page = NULL;
894 ac->ac_lg = NULL;
895 + ac->ac_excepted_group = ar->excepted_group;
897 /* we have to define context: we'll we work with a file or
898 * locality group. this is a policy, actually */