Pull in the ext4-remove-obsolete-fragments.patch from rc7-mm1
[ext4-patch-queue.git] / ext3-4-migrate.patch
blobe2754fb3af1b9c937937ff0e91642f83cd7c03c0
1 Add EXT4_IOC_MIGRATE ioctl
3 From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
5 The below patch add ioctl for migrating ext3 indirect block mapped inode
6 to ext4 extent mapped inode.
8 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
9 ---
11 fs/ext4/Makefile | 2
12 fs/ext4/ioctl.c | 3
13 fs/ext4/migrate.c | 630 ++++++++++++++++++++++++++++++++++++++++++++++++
14 include/linux/ext4_fs.h | 4
15 4 files changed, 638 insertions(+), 1 deletion(-)
18 Index: linux-2.6.23-rc6/fs/ext4/Makefile
19 ===================================================================
20 --- linux-2.6.23-rc6.orig/fs/ext4/Makefile 2007-09-18 17:18:59.000000000 -0700
21 +++ linux-2.6.23-rc6/fs/ext4/Makefile 2007-09-20 17:26:15.000000000 -0700
22 @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
24 ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
25 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
26 - ext4_jbd2.o
27 + ext4_jbd2.o migrate.o
29 ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
30 ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
31 Index: linux-2.6.23-rc6/fs/ext4/ioctl.c
32 ===================================================================
33 --- linux-2.6.23-rc6.orig/fs/ext4/ioctl.c 2007-09-18 17:18:59.000000000 -0700
34 +++ linux-2.6.23-rc6/fs/ext4/ioctl.c 2007-09-20 17:26:15.000000000 -0700
35 @@ -249,6 +249,9 @@ flags_err:
36 return err;
39 + case EXT4_IOC_MIGRATE:
40 + return ext4_ext_migrate(inode, filp, cmd, arg);
42 default:
43 return -ENOTTY;
45 Index: linux-2.6.23-rc6/fs/ext4/migrate.c
46 ===================================================================
47 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
48 +++ linux-2.6.23-rc6/fs/ext4/migrate.c 2007-09-20 17:26:15.000000000 -0700
49 @@ -0,0 +1,630 @@
50 +/*
51 + * Copyright IBM Corporation, 2007
52 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
53 + *
54 + * This program is free software; you can redistribute it and/or modify it
55 + * under the terms of version 2.1 of the GNU Lesser General Public License
56 + * as published by the Free Software Foundation.
57 + *
58 + * This program is distributed in the hope that it would be useful, but
59 + * WITHOUT ANY WARRANTY; without even the implied warranty of
60 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
61 + *
62 + */
64 +#include <linux/module.h>
65 +#include <linux/ext4_jbd2.h>
66 +#include <linux/ext4_fs_extents.h>
68 +struct list_blocks_struct {
69 + int first_block, last_block;
70 + ext4_fsblk_t first_pblock, last_pblock;
71 +};
73 +/* will go away */
74 +static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
76 + ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
77 + ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
80 +static int finish_range(handle_t *handle, struct inode *inode,
81 + struct list_blocks_struct *lb)
84 + int retval = 0, needed;
85 + struct ext4_extent newext;
86 + struct ext4_ext_path *path;
87 + if (lb->first_pblock == 0)
88 + return 0;
90 + /* Add the extent to temp inode*/
91 + newext.ee_block = cpu_to_le32(lb->first_block);
92 + newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block +1);
93 + ext4_ext_store_pblock(&newext, lb->first_pblock);
94 + path = ext4_ext_find_extent(inode, lb->first_block, NULL);
96 + if (IS_ERR(path)) {
97 + retval = PTR_ERR(path);
98 + goto err_out;
99 + }
101 + /*
102 + * Calculate the credit needed to inserting this extent
103 + * Since we are doing this in loop we may accumalate extra
104 + * credit. But below we try to not accumalate too much
105 + * of them by restarting the journal.
106 + */
107 + needed = ext4_ext_calc_credits_for_insert(inode, path);
109 + /*
110 + * Make sure the credit we accumalated is not really high
111 + */
113 + if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
115 + retval = ext4_journal_restart(handle, needed);
116 + if (retval)
117 + goto err_out;
121 + if (needed && (retval = ext4_journal_extend(handle, needed)) != 0) {
122 + /*
123 + * IF not able to extend the journal restart the journal
124 + */
125 + retval = ext4_journal_restart(handle, needed);
126 + if (retval)
127 + goto err_out;
130 + retval = ext4_ext_insert_extent(handle, inode, path, &newext);
132 +err_out:
133 + lb->first_pblock = 0;
134 + return retval;
136 +static int update_extent_range(handle_t *handle, struct inode *inode,
137 + ext4_fsblk_t pblock, int blk_num,
138 + struct list_blocks_struct *lb)
140 + int retval;
142 + /*
143 + * See if we can add on to the existing range (if it exists)
144 + */
145 + if (lb->first_pblock &&
146 + (lb->last_pblock+1 == pblock) &&
147 + (lb->last_block+1 == blk_num)) {
148 + lb->last_pblock = pblock;
149 + lb->last_block = blk_num;
150 + return 0;
152 + /*
153 + * Start a new range.
154 + */
155 + retval = finish_range(handle, inode, lb);
156 + lb->first_pblock = lb->last_pblock = pblock;
157 + lb->first_block = lb->last_block = blk_num;
159 + return retval;
163 +static int update_ind_extent_range(handle_t *handle, struct inode *inode,
164 + ext4_fsblk_t pblock, int *blk_nump,
165 + struct list_blocks_struct *lb)
167 + struct buffer_head *bh;
168 + __le32 *i_data;
169 + int i, retval = 0;
170 + int blk_count = *blk_nump;
171 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
173 + if (!pblock) {
174 + /* Only update the file block number */
175 + *blk_nump += max_entries;
176 + return 0;
179 + bh = sb_bread(inode->i_sb, pblock);
180 + if (!bh)
181 + return -EIO;
183 + i_data = (__le32 *)bh->b_data;
185 + for (i = 0; i < max_entries; i++, blk_count++) {
186 + if (i_data[i]) {
187 + retval = update_extent_range(handle, inode,
188 + le32_to_cpu(i_data[i]),
189 + blk_count, lb);
190 + if (retval)
191 + break;
195 + /* Update the file block number */
196 + *blk_nump = blk_count;
197 + brelse(bh);
198 + return retval;
201 +static int update_dind_extent_range(handle_t *handle, struct inode *inode,
202 + ext4_fsblk_t pblock, int *blk_nump,
203 + struct list_blocks_struct *lb)
205 + struct buffer_head *bh;
206 + __le32 *i_data;
207 + int i, retval = 0;
208 + int blk_count = *blk_nump;
209 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
211 + if (!pblock) {
212 + /* Only update the file block number */
213 + *blk_nump += max_entries * max_entries;
214 + return 0;
217 + bh = sb_bread(inode->i_sb, pblock);
218 + if (!bh)
219 + return -EIO;
221 + i_data = (__le32 *)bh->b_data;
223 + for (i = 0; i < max_entries; i++) {
224 + if (i_data[i]) {
225 + retval = update_ind_extent_range(handle, inode,
226 + le32_to_cpu(i_data[i]),
227 + &blk_count, lb);
228 + if (retval)
229 + break;
230 + } else {
231 + /* Only update the file block number */
232 + blk_count += max_entries;
236 + /* Update the file block number */
237 + *blk_nump = blk_count;
238 + brelse(bh);
239 + return retval;
242 +static int update_tind_extent_range(handle_t *handle, struct inode *inode,
243 + ext4_fsblk_t pblock, int *blk_nump,
244 + struct list_blocks_struct *lb)
246 + struct buffer_head *bh;
247 + __le32 *i_data;
248 + int i, retval = 0;
249 + int blk_count = *blk_nump;
250 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
252 + if (!pblock) {
253 + /* Only update the file block number */
254 + *blk_nump += max_entries * max_entries * max_entries;
255 + return 0;
258 + bh = sb_bread(inode->i_sb, pblock);
259 + if (!bh)
260 + return -EIO;
262 + i_data = (__le32 *)bh->b_data;
264 + for (i = 0; i < max_entries; i++) {
265 + if (i_data[i]) {
266 + retval = update_dind_extent_range(handle, inode,
267 + le32_to_cpu(i_data[i]),
268 + &blk_count, lb);
269 + if (retval)
270 + break;
271 + } else {
272 + /* Only update the file block number */
273 + blk_count += max_entries * max_entries;
277 + /* Update the file block number */
278 + *blk_nump = blk_count;
279 + brelse(bh);
280 + return retval;
285 +static int free_dind_blocks(handle_t *handle,
286 + struct inode *inode, __le32 i_data)
288 + int i;
289 + __le32 *tmp_idata;
290 + struct buffer_head *bh;
291 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
293 + bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
294 + if (!bh)
295 + return -EIO;
297 + tmp_idata = (__le32 *)bh->b_data;
298 + for (i = 0; i < max_entries; i++) {
299 + if (tmp_idata[i]) {
300 + ext4_free_blocks(handle, inode,
301 + le32_to_cpu(tmp_idata[i]), 1);
304 + brelse(bh);
305 + ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1);
307 + return 0;
312 +static int free_tind_blocks(handle_t *handle,
313 + struct inode *inode, __le32 i_data)
315 + int i, retval = 0;
316 + __le32 *tmp_idata;
317 + struct buffer_head *bh;
318 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
320 + bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
321 + if (!bh)
322 + return -EIO;
324 + tmp_idata = (__le32 *)bh->b_data;
326 + for (i = 0; i < max_entries; i++) {
327 + if (tmp_idata[i]) {
328 + retval = free_dind_blocks(handle,
329 + inode, tmp_idata[i]);
330 + if (retval) {
331 + brelse(bh);
332 + return retval;
336 + brelse(bh);
337 + ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1);
339 + return 0;
344 +static int free_ind_block(handle_t *handle, struct inode *inode)
346 + int retval;
347 + struct ext4_inode_info *ei = EXT4_I(inode);
349 + if (ei->i_data[EXT4_IND_BLOCK]) {
351 + ext4_free_blocks(handle, inode,
352 + le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1);
356 + if (ei->i_data[EXT4_DIND_BLOCK]) {
357 + retval = free_dind_blocks(handle, inode,
358 + ei->i_data[EXT4_DIND_BLOCK]);
359 + if (retval)
360 + return retval;
363 + if (ei->i_data[EXT4_TIND_BLOCK]) {
364 + retval = free_tind_blocks(handle, inode,
365 + ei->i_data[EXT4_TIND_BLOCK]);
366 + if (retval)
367 + return retval;
371 + return 0;
373 +static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
374 + struct inode *tmp_inode, int retval)
376 + struct ext4_inode_info *ei = EXT4_I(inode);
377 + struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
380 + retval = free_ind_block(handle, inode);
381 + if (retval)
382 + goto err_out;
384 + /*
385 + * One credit accounted for writing the
386 + * i_data field of the original inode
387 + */
388 + if ((retval = ext4_journal_extend(handle, 1)) != 0) {
390 + retval = ext4_journal_restart(handle, 1);
391 + if (retval)
392 + goto err_out;
395 + /*
396 + * We have the extent map build with the tmp inode.
397 + * Now copy the i_data across
398 + */
399 + ei->i_flags |= EXT4_EXTENTS_FL;
400 + memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
402 + /*
403 + * Update i_blocks with the new blocks that got
404 + * allocated while adding extents for extent index
405 + * blocks.
407 + * While converting to extents we need not
408 + * update the orignal inode i_blocks for extent blocks
409 + * via quota APIs. The quota update happened via tmp_inode already.
410 + */
411 + spin_lock(&inode->i_lock);
412 + inode->i_blocks += tmp_inode->i_blocks;
413 + spin_unlock(&inode->i_lock);
415 + ext4_mark_inode_dirty(handle, inode);
417 +err_out:
419 + return retval;
422 +/* Will go away */
423 +static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
425 + ext4_fsblk_t block;
427 + block = le32_to_cpu(ix->ei_leaf);
428 + block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
429 + return block;
432 +static int free_ext_idx(handle_t *handle, struct inode *inode,
433 + struct ext4_extent_idx *ix)
435 + int i, retval = 0;
436 + ext4_fsblk_t block;
437 + struct buffer_head *bh;
438 + struct ext4_extent_header *eh;
441 + block = idx_pblock(ix);
442 + bh = sb_bread(inode->i_sb, block);
443 + if (!bh)
444 + return -EIO;
446 + eh = (struct ext4_extent_header *)bh->b_data;
447 + if (eh->eh_depth == 0) {
449 + brelse(bh);
450 + ext4_free_blocks(handle, inode, block, 1);
452 + } else {
454 + ix = EXT_FIRST_INDEX(eh);
455 + for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
456 + retval = free_ext_idx(handle, inode, ix);
457 + if (retval)
458 + return retval;
463 + return retval;
467 + * Free the extent meta data blocks only
468 + */
469 +static int free_ext_block(handle_t *handle, struct inode *inode)
471 + int i, retval = 0;
472 + struct ext4_inode_info *ei = EXT4_I(inode);
473 + struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
474 + struct ext4_extent_idx *ix;
475 + if (eh->eh_depth == 0) {
476 + /*
477 + * No extra blocks allocated for extent meta data
478 + */
479 + return 0;
481 + ix = EXT_FIRST_INDEX(eh);
482 + for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
483 + retval = free_ext_idx(handle, inode, ix);
484 + if (retval)
485 + return retval;
488 + return retval;
491 +int ext4_ext_migrate(struct inode * inode, struct file * filp,
492 + unsigned int cmd, unsigned long arg)
494 + handle_t *handle;
495 + int retval = 0, i;
496 + __le32 *i_data;
497 + int blk_count = 0;
498 + struct ext4_inode_info *ei;
499 + struct inode *tmp_inode = NULL;
500 + struct list_blocks_struct lb;
501 + unsigned long max_entries;
504 + if (!test_opt(inode->i_sb, EXTENTS)) {
505 + /*
506 + * if mounted with noextents
507 + * we don't allow the migrate
508 + */
509 + return -EINVAL;
512 + if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
513 + return -EINVAL;
515 + mutex_lock(&EXT4_I(inode)->truncate_mutex);
518 + handle = ext4_journal_start(inode,
519 + EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
520 + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
521 + 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
522 + + 1);
523 + if (IS_ERR(handle)) {
524 + retval = PTR_ERR(handle);
525 + goto err_out;
528 + tmp_inode = ext4_new_inode(handle,
529 + inode->i_sb->s_root->d_inode,
530 + S_IFREG);
532 + if (IS_ERR(tmp_inode)) {
533 + retval = -ENOMEM;
534 + ext4_journal_stop(handle);
535 + tmp_inode = NULL;
536 + goto err_out;
539 + i_size_write(tmp_inode, i_size_read(inode));
540 + /*
541 + * We don't want the inode to be reclaimed
542 + * if we got interrupted in between. We have
543 + * this tmp inode carrying reference to the
544 + * data blocks of the original file. We set
545 + * the i_nlink to zero at the last stage after
546 + * switching the original file to extent format
547 + */
548 + tmp_inode->i_nlink = 1;
550 + ext4_ext_tree_init(handle, tmp_inode);
551 + ext4_orphan_add(handle, tmp_inode);
552 + ext4_journal_stop(handle);
554 + ei = EXT4_I(inode);
555 + i_data = ei->i_data;
556 + memset(&lb, 0, sizeof(lb));
558 + /* 32 bit block address 4 bytes */
559 + max_entries = inode->i_sb->s_blocksize >> 2;
561 + /*
562 + * start with one credit accounted for
563 + * superblock modification.
565 + * For the tmp_inode we already have commited the
566 + * trascation that created the inode. Later as and
567 + * when we add extents we extent the journal
568 + */
569 + handle = ext4_journal_start(inode, 1);
570 + for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
572 + if (i_data[i]) {
573 + retval = update_extent_range(handle, tmp_inode,
574 + le32_to_cpu(i_data[i]),
575 + blk_count, &lb);
576 + if (retval)
577 + goto err_out;
581 + if (i_data[EXT4_IND_BLOCK]) {
582 + retval = update_ind_extent_range(handle, tmp_inode,
583 + le32_to_cpu(i_data[EXT4_IND_BLOCK]),
584 + &blk_count, &lb);
585 + if (retval)
586 + goto err_out;
587 + } else {
588 + blk_count += max_entries;
591 + if (i_data[EXT4_DIND_BLOCK]) {
592 + retval = update_dind_extent_range(handle, tmp_inode,
593 + le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
594 + &blk_count, &lb);
595 + if (retval)
596 + goto err_out;
597 + } else {
598 + blk_count += max_entries * max_entries;
602 + if (i_data[EXT4_TIND_BLOCK]) {
603 + retval = update_tind_extent_range(handle, tmp_inode,
604 + le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
605 + &blk_count, &lb);
606 + if (retval)
607 + goto err_out;
610 + /*
611 + * Build the last extent
612 + */
613 + retval = finish_range(handle, tmp_inode, &lb);
615 +err_out:
616 + /*
617 + * We are either freeing extent information or indirect
618 + * blocks. During this we touch superblock, group descriptor
619 + * and block bitmap. Later we mark the tmp_inode dirty
620 + * via ext4_ext_tree_init. So allocate a credit of 4
621 + * We may update quota (user and group).
623 + * FIXME!! we may be touching bitmaps in different block groups.
624 + */
626 + if (ext4_journal_extend(handle,
627 + 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) {
629 + ext4_journal_restart(handle,
630 + 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
633 + if (retval) {
634 + /*
635 + * Failure case delete the extent information with the
636 + * tmp_inode
637 + */
638 + free_ext_block(handle, tmp_inode);
640 + } else {
642 + retval = ext4_ext_swap_inode_data(handle, inode,
643 + tmp_inode, retval);
646 + /*
647 + * Mark the tmp_inode as of size zero
648 + */
649 + i_size_write(tmp_inode, 0);
651 + /*
652 + * set the i_blocks count to zero
653 + * so that the ext4_delete_inode does the
654 + * right job
656 + * We don't need to take the i_lock because
657 + * the inode is not visible to user space.
658 + */
659 + tmp_inode->i_blocks = 0;
661 + /* Reset the extent details */
662 + ext4_ext_tree_init(handle, tmp_inode);
664 + /*
665 + * Set the i_nlink to zero so that
666 + * generic_drop_inode really deletes the
667 + * inode
668 + */
669 + tmp_inode->i_nlink = 0;
671 + ext4_journal_stop(handle);
673 + mutex_unlock(&EXT4_I(inode)->truncate_mutex);
675 + if (tmp_inode)
676 + iput(tmp_inode);
678 + return retval;
680 Index: linux-2.6.23-rc6/include/linux/ext4_fs.h
681 ===================================================================
682 --- linux-2.6.23-rc6.orig/include/linux/ext4_fs.h 2007-09-20 17:26:12.000000000 -0700
683 +++ linux-2.6.23-rc6/include/linux/ext4_fs.h 2007-09-20 17:26:15.000000000 -0700
684 @@ -255,6 +255,7 @@ struct ext4_new_group_data {
685 #endif
686 #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
687 #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
688 +#define EXT4_IOC_MIGRATE _IO('f', 7)
691 * ioctl commands in 32 bit emulation
692 @@ -981,6 +982,9 @@ extern int ext4_ioctl (struct inode *, s
693 unsigned long);
694 extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
696 +/* migrate.c */
697 +extern int ext4_ext_migrate (struct inode *, struct file *, unsigned int,
698 + unsigned long);
699 /* namei.c */
700 extern int ext4_orphan_add(handle_t *, struct inode *);
701 extern int ext4_orphan_del(handle_t *, struct inode *);