1 Add EXT4_IOC_MIGRATE ioctl
3 From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
5 The below patch add ioctl for migrating ext3 indirect block mapped inode
6 to ext4 extent mapped inode.
8 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
13 fs/ext4/migrate.c | 630 ++++++++++++++++++++++++++++++++++++++++++++++++
14 include/linux/ext4_fs.h | 4
15 4 files changed, 638 insertions(+), 1 deletion(-)
18 Index: linux-2.6.23-rc6/fs/ext4/Makefile
19 ===================================================================
20 --- linux-2.6.23-rc6.orig/fs/ext4/Makefile 2007-09-18 17:18:59.000000000 -0700
21 +++ linux-2.6.23-rc6/fs/ext4/Makefile 2007-09-20 17:26:15.000000000 -0700
22 @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
24 ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
25 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
27 + ext4_jbd2.o migrate.o
29 ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
30 ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
31 Index: linux-2.6.23-rc6/fs/ext4/ioctl.c
32 ===================================================================
33 --- linux-2.6.23-rc6.orig/fs/ext4/ioctl.c 2007-09-18 17:18:59.000000000 -0700
34 +++ linux-2.6.23-rc6/fs/ext4/ioctl.c 2007-09-20 17:26:15.000000000 -0700
35 @@ -249,6 +249,9 @@ flags_err:
39 + case EXT4_IOC_MIGRATE:
40 + return ext4_ext_migrate(inode, filp, cmd, arg);
45 Index: linux-2.6.23-rc6/fs/ext4/migrate.c
46 ===================================================================
47 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
48 +++ linux-2.6.23-rc6/fs/ext4/migrate.c 2007-09-20 17:26:15.000000000 -0700
51 + * Copyright IBM Corporation, 2007
52 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
54 + * This program is free software; you can redistribute it and/or modify it
55 + * under the terms of version 2.1 of the GNU Lesser General Public License
56 + * as published by the Free Software Foundation.
58 + * This program is distributed in the hope that it would be useful, but
59 + * WITHOUT ANY WARRANTY; without even the implied warranty of
60 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
64 +#include <linux/module.h>
65 +#include <linux/ext4_jbd2.h>
66 +#include <linux/ext4_fs_extents.h>
68 +struct list_blocks_struct {
69 + int first_block, last_block;
70 + ext4_fsblk_t first_pblock, last_pblock;
74 +static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
76 + ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
77 + ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
80 +static int finish_range(handle_t *handle, struct inode *inode,
81 + struct list_blocks_struct *lb)
84 + int retval = 0, needed;
85 + struct ext4_extent newext;
86 + struct ext4_ext_path *path;
87 + if (lb->first_pblock == 0)
90 + /* Add the extent to temp inode*/
91 + newext.ee_block = cpu_to_le32(lb->first_block);
92 + newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block +1);
93 + ext4_ext_store_pblock(&newext, lb->first_pblock);
94 + path = ext4_ext_find_extent(inode, lb->first_block, NULL);
97 + retval = PTR_ERR(path);
102 + * Calculate the credit needed to inserting this extent
103 + * Since we are doing this in loop we may accumalate extra
104 + * credit. But below we try to not accumalate too much
105 + * of them by restarting the journal.
107 + needed = ext4_ext_calc_credits_for_insert(inode, path);
110 + * Make sure the credit we accumalated is not really high
113 + if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
115 + retval = ext4_journal_restart(handle, needed);
121 + if (needed && (retval = ext4_journal_extend(handle, needed)) != 0) {
123 + * IF not able to extend the journal restart the journal
125 + retval = ext4_journal_restart(handle, needed);
130 + retval = ext4_ext_insert_extent(handle, inode, path, &newext);
133 + lb->first_pblock = 0;
136 +static int update_extent_range(handle_t *handle, struct inode *inode,
137 + ext4_fsblk_t pblock, int blk_num,
138 + struct list_blocks_struct *lb)
143 + * See if we can add on to the existing range (if it exists)
145 + if (lb->first_pblock &&
146 + (lb->last_pblock+1 == pblock) &&
147 + (lb->last_block+1 == blk_num)) {
148 + lb->last_pblock = pblock;
149 + lb->last_block = blk_num;
153 + * Start a new range.
155 + retval = finish_range(handle, inode, lb);
156 + lb->first_pblock = lb->last_pblock = pblock;
157 + lb->first_block = lb->last_block = blk_num;
163 +static int update_ind_extent_range(handle_t *handle, struct inode *inode,
164 + ext4_fsblk_t pblock, int *blk_nump,
165 + struct list_blocks_struct *lb)
167 + struct buffer_head *bh;
170 + int blk_count = *blk_nump;
171 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
174 + /* Only update the file block number */
175 + *blk_nump += max_entries;
179 + bh = sb_bread(inode->i_sb, pblock);
183 + i_data = (__le32 *)bh->b_data;
185 + for (i = 0; i < max_entries; i++, blk_count++) {
187 + retval = update_extent_range(handle, inode,
188 + le32_to_cpu(i_data[i]),
195 + /* Update the file block number */
196 + *blk_nump = blk_count;
201 +static int update_dind_extent_range(handle_t *handle, struct inode *inode,
202 + ext4_fsblk_t pblock, int *blk_nump,
203 + struct list_blocks_struct *lb)
205 + struct buffer_head *bh;
208 + int blk_count = *blk_nump;
209 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
212 + /* Only update the file block number */
213 + *blk_nump += max_entries * max_entries;
217 + bh = sb_bread(inode->i_sb, pblock);
221 + i_data = (__le32 *)bh->b_data;
223 + for (i = 0; i < max_entries; i++) {
225 + retval = update_ind_extent_range(handle, inode,
226 + le32_to_cpu(i_data[i]),
231 + /* Only update the file block number */
232 + blk_count += max_entries;
236 + /* Update the file block number */
237 + *blk_nump = blk_count;
242 +static int update_tind_extent_range(handle_t *handle, struct inode *inode,
243 + ext4_fsblk_t pblock, int *blk_nump,
244 + struct list_blocks_struct *lb)
246 + struct buffer_head *bh;
249 + int blk_count = *blk_nump;
250 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
253 + /* Only update the file block number */
254 + *blk_nump += max_entries * max_entries * max_entries;
258 + bh = sb_bread(inode->i_sb, pblock);
262 + i_data = (__le32 *)bh->b_data;
264 + for (i = 0; i < max_entries; i++) {
266 + retval = update_dind_extent_range(handle, inode,
267 + le32_to_cpu(i_data[i]),
272 + /* Only update the file block number */
273 + blk_count += max_entries * max_entries;
277 + /* Update the file block number */
278 + *blk_nump = blk_count;
285 +static int free_dind_blocks(handle_t *handle,
286 + struct inode *inode, __le32 i_data)
290 + struct buffer_head *bh;
291 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
293 + bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
297 + tmp_idata = (__le32 *)bh->b_data;
298 + for (i = 0; i < max_entries; i++) {
299 + if (tmp_idata[i]) {
300 + ext4_free_blocks(handle, inode,
301 + le32_to_cpu(tmp_idata[i]), 1);
305 + ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1);
312 +static int free_tind_blocks(handle_t *handle,
313 + struct inode *inode, __le32 i_data)
317 + struct buffer_head *bh;
318 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
320 + bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
324 + tmp_idata = (__le32 *)bh->b_data;
326 + for (i = 0; i < max_entries; i++) {
327 + if (tmp_idata[i]) {
328 + retval = free_dind_blocks(handle,
329 + inode, tmp_idata[i]);
337 + ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1);
344 +static int free_ind_block(handle_t *handle, struct inode *inode)
347 + struct ext4_inode_info *ei = EXT4_I(inode);
349 + if (ei->i_data[EXT4_IND_BLOCK]) {
351 + ext4_free_blocks(handle, inode,
352 + le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1);
356 + if (ei->i_data[EXT4_DIND_BLOCK]) {
357 + retval = free_dind_blocks(handle, inode,
358 + ei->i_data[EXT4_DIND_BLOCK]);
363 + if (ei->i_data[EXT4_TIND_BLOCK]) {
364 + retval = free_tind_blocks(handle, inode,
365 + ei->i_data[EXT4_TIND_BLOCK]);
373 +static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
374 + struct inode *tmp_inode, int retval)
376 + struct ext4_inode_info *ei = EXT4_I(inode);
377 + struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
380 + retval = free_ind_block(handle, inode);
385 + * One credit accounted for writing the
386 + * i_data field of the original inode
388 + if ((retval = ext4_journal_extend(handle, 1)) != 0) {
390 + retval = ext4_journal_restart(handle, 1);
396 + * We have the extent map build with the tmp inode.
397 + * Now copy the i_data across
399 + ei->i_flags |= EXT4_EXTENTS_FL;
400 + memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
403 + * Update i_blocks with the new blocks that got
404 + * allocated while adding extents for extent index
407 + * While converting to extents we need not
408 + * update the orignal inode i_blocks for extent blocks
409 + * via quota APIs. The quota update happened via tmp_inode already.
411 + spin_lock(&inode->i_lock);
412 + inode->i_blocks += tmp_inode->i_blocks;
413 + spin_unlock(&inode->i_lock);
415 + ext4_mark_inode_dirty(handle, inode);
423 +static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
425 + ext4_fsblk_t block;
427 + block = le32_to_cpu(ix->ei_leaf);
428 + block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
432 +static int free_ext_idx(handle_t *handle, struct inode *inode,
433 + struct ext4_extent_idx *ix)
436 + ext4_fsblk_t block;
437 + struct buffer_head *bh;
438 + struct ext4_extent_header *eh;
441 + block = idx_pblock(ix);
442 + bh = sb_bread(inode->i_sb, block);
446 + eh = (struct ext4_extent_header *)bh->b_data;
447 + if (eh->eh_depth == 0) {
450 + ext4_free_blocks(handle, inode, block, 1);
454 + ix = EXT_FIRST_INDEX(eh);
455 + for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
456 + retval = free_ext_idx(handle, inode, ix);
467 + * Free the extent meta data blocks only
469 +static int free_ext_block(handle_t *handle, struct inode *inode)
472 + struct ext4_inode_info *ei = EXT4_I(inode);
473 + struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
474 + struct ext4_extent_idx *ix;
475 + if (eh->eh_depth == 0) {
477 + * No extra blocks allocated for extent meta data
481 + ix = EXT_FIRST_INDEX(eh);
482 + for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
483 + retval = free_ext_idx(handle, inode, ix);
491 +int ext4_ext_migrate(struct inode * inode, struct file * filp,
492 + unsigned int cmd, unsigned long arg)
498 + struct ext4_inode_info *ei;
499 + struct inode *tmp_inode = NULL;
500 + struct list_blocks_struct lb;
501 + unsigned long max_entries;
504 + if (!test_opt(inode->i_sb, EXTENTS)) {
506 + * if mounted with noextents
507 + * we don't allow the migrate
512 + if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
515 + mutex_lock(&EXT4_I(inode)->truncate_mutex);
518 + handle = ext4_journal_start(inode,
519 + EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
520 + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
521 + 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
523 + if (IS_ERR(handle)) {
524 + retval = PTR_ERR(handle);
528 + tmp_inode = ext4_new_inode(handle,
529 + inode->i_sb->s_root->d_inode,
532 + if (IS_ERR(tmp_inode)) {
534 + ext4_journal_stop(handle);
539 + i_size_write(tmp_inode, i_size_read(inode));
541 + * We don't want the inode to be reclaimed
542 + * if we got interrupted in between. We have
543 + * this tmp inode carrying reference to the
544 + * data blocks of the original file. We set
545 + * the i_nlink to zero at the last stage after
546 + * switching the original file to extent format
548 + tmp_inode->i_nlink = 1;
550 + ext4_ext_tree_init(handle, tmp_inode);
551 + ext4_orphan_add(handle, tmp_inode);
552 + ext4_journal_stop(handle);
554 + ei = EXT4_I(inode);
555 + i_data = ei->i_data;
556 + memset(&lb, 0, sizeof(lb));
558 + /* 32 bit block address 4 bytes */
559 + max_entries = inode->i_sb->s_blocksize >> 2;
562 + * start with one credit accounted for
563 + * superblock modification.
565 + * For the tmp_inode we already have commited the
566 + * trascation that created the inode. Later as and
567 + * when we add extents we extent the journal
569 + handle = ext4_journal_start(inode, 1);
570 + for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
573 + retval = update_extent_range(handle, tmp_inode,
574 + le32_to_cpu(i_data[i]),
581 + if (i_data[EXT4_IND_BLOCK]) {
582 + retval = update_ind_extent_range(handle, tmp_inode,
583 + le32_to_cpu(i_data[EXT4_IND_BLOCK]),
588 + blk_count += max_entries;
591 + if (i_data[EXT4_DIND_BLOCK]) {
592 + retval = update_dind_extent_range(handle, tmp_inode,
593 + le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
598 + blk_count += max_entries * max_entries;
602 + if (i_data[EXT4_TIND_BLOCK]) {
603 + retval = update_tind_extent_range(handle, tmp_inode,
604 + le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
611 + * Build the last extent
613 + retval = finish_range(handle, tmp_inode, &lb);
617 + * We are either freeing extent information or indirect
618 + * blocks. During this we touch superblock, group descriptor
619 + * and block bitmap. Later we mark the tmp_inode dirty
620 + * via ext4_ext_tree_init. So allocate a credit of 4
621 + * We may update quota (user and group).
623 + * FIXME!! we may be touching bitmaps in different block groups.
626 + if (ext4_journal_extend(handle,
627 + 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) {
629 + ext4_journal_restart(handle,
630 + 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
635 + * Failure case delete the extent information with the
638 + free_ext_block(handle, tmp_inode);
642 + retval = ext4_ext_swap_inode_data(handle, inode,
643 + tmp_inode, retval);
647 + * Mark the tmp_inode as of size zero
649 + i_size_write(tmp_inode, 0);
652 + * set the i_blocks count to zero
653 + * so that the ext4_delete_inode does the
656 + * We don't need to take the i_lock because
657 + * the inode is not visible to user space.
659 + tmp_inode->i_blocks = 0;
661 + /* Reset the extent details */
662 + ext4_ext_tree_init(handle, tmp_inode);
665 + * Set the i_nlink to zero so that
666 + * generic_drop_inode really deletes the
669 + tmp_inode->i_nlink = 0;
671 + ext4_journal_stop(handle);
673 + mutex_unlock(&EXT4_I(inode)->truncate_mutex);
680 Index: linux-2.6.23-rc6/include/linux/ext4_fs.h
681 ===================================================================
682 --- linux-2.6.23-rc6.orig/include/linux/ext4_fs.h 2007-09-20 17:26:12.000000000 -0700
683 +++ linux-2.6.23-rc6/include/linux/ext4_fs.h 2007-09-20 17:26:15.000000000 -0700
684 @@ -255,6 +255,7 @@ struct ext4_new_group_data {
686 #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
687 #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
688 +#define EXT4_IOC_MIGRATE _IO('f', 7)
691 * ioctl commands in 32 bit emulation
692 @@ -981,6 +982,9 @@ extern int ext4_ioctl (struct inode *, s
694 extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
697 +extern int ext4_ext_migrate (struct inode *, struct file *, unsigned int,
700 extern int ext4_orphan_add(handle_t *, struct inode *);
701 extern int ext4_orphan_del(handle_t *, struct inode *);