More patch description fixups. Standardize case.
[ext4-patch-queue.git] / ext3-4-migrate.patch
blob00342cbe02570af8f33098ac60a559a91b34c90d
1 ext4: Add EXT4_IOC_MIGRATE ioctl
3 From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
5 The below patch add ioctl for migrating ext3 indirect block mapped inode
6 to ext4 extent mapped inode.
8 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
9 ---
11 fs/ext4/Makefile | 2
12 fs/ext4/ioctl.c | 3
13 fs/ext4/migrate.c | 634 ++++++++++++++++++++++++++++++++++++++++++++++++
14 include/linux/ext4_fs.h | 4
15 4 files changed, 642 insertions(+), 1 deletion(-)
16 create mode 100644 fs/ext4/migrate.c
19 Index: linux-2.6.24-rc7/fs/ext4/Makefile
20 ===================================================================
21 --- linux-2.6.24-rc7.orig/fs/ext4/Makefile 2008-01-06 13:45:38.000000000 -0800
22 +++ linux-2.6.24-rc7/fs/ext4/Makefile 2008-01-16 15:02:41.000000000 -0800
23 @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
25 ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
26 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
27 - ext4_jbd2.o
28 + ext4_jbd2.o migrate.o
30 ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
31 ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
32 Index: linux-2.6.24-rc7/fs/ext4/ioctl.c
33 ===================================================================
34 --- linux-2.6.24-rc7.orig/fs/ext4/ioctl.c 2008-01-16 14:49:20.000000000 -0800
35 +++ linux-2.6.24-rc7/fs/ext4/ioctl.c 2008-01-16 15:02:40.000000000 -0800
36 @@ -254,6 +254,9 @@ flags_err:
37 return err;
40 + case EXT4_IOC_MIGRATE:
41 + return ext4_ext_migrate(inode, filp, cmd, arg);
43 default:
44 return -ENOTTY;
46 Index: linux-2.6.24-rc7/fs/ext4/migrate.c
47 ===================================================================
48 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
49 +++ linux-2.6.24-rc7/fs/ext4/migrate.c 2008-01-16 15:03:01.000000000 -0800
50 @@ -0,0 +1,634 @@
51 +/*
52 + * Copyright IBM Corporation, 2007
53 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
54 + *
55 + * This program is free software; you can redistribute it and/or modify it
56 + * under the terms of version 2.1 of the GNU Lesser General Public License
57 + * as published by the Free Software Foundation.
58 + *
59 + * This program is distributed in the hope that it would be useful, but
60 + * WITHOUT ANY WARRANTY; without even the implied warranty of
61 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
62 + *
63 + */
65 +#include <linux/module.h>
66 +#include <linux/ext4_jbd2.h>
67 +#include <linux/ext4_fs_extents.h>
69 +struct list_blocks_struct {
70 + ext4_lblk_t first_block, last_block;
71 + ext4_fsblk_t first_pblock, last_pblock;
72 +};
74 +/* will go away */
75 +static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
77 + ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
78 + ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1)
79 + & 0xffff);
82 +static int finish_range(handle_t *handle, struct inode *inode,
83 + struct list_blocks_struct *lb)
86 + int retval = 0, needed;
87 + struct ext4_extent newext;
88 + struct ext4_ext_path *path;
89 + if (lb->first_pblock == 0)
90 + return 0;
92 + /* Add the extent to temp inode*/
93 + newext.ee_block = cpu_to_le32(lb->first_block);
94 + newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
95 + ext4_ext_store_pblock(&newext, lb->first_pblock);
96 + path = ext4_ext_find_extent(inode, lb->first_block, NULL);
98 + if (IS_ERR(path)) {
99 + retval = PTR_ERR(path);
100 + goto err_out;
103 + /*
104 + * Calculate the credit needed to inserting this extent
105 + * Since we are doing this in loop we may accumalate extra
106 + * credit. But below we try to not accumalate too much
107 + * of them by restarting the journal.
108 + */
109 + needed = ext4_ext_calc_credits_for_insert(inode, path);
111 + /*
112 + * Make sure the credit we accumalated is not really high
113 + */
115 + if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
117 + retval = ext4_journal_restart(handle, needed);
118 + if (retval)
119 + goto err_out;
123 + if (needed) {
124 + retval = ext4_journal_extend(handle, needed);
125 + if (retval != 0) {
126 + /*
127 + * IF not able to extend the journal restart the journal
128 + */
129 + retval = ext4_journal_restart(handle, needed);
130 + if (retval)
131 + goto err_out;
135 + retval = ext4_ext_insert_extent(handle, inode, path, &newext);
137 +err_out:
138 + lb->first_pblock = 0;
139 + return retval;
141 +static int update_extent_range(handle_t *handle, struct inode *inode,
142 + ext4_fsblk_t pblock, ext4_lblk_t blk_num,
143 + struct list_blocks_struct *lb)
145 + int retval;
147 + /*
148 + * See if we can add on to the existing range (if it exists)
149 + */
150 + if (lb->first_pblock &&
151 + (lb->last_pblock+1 == pblock) &&
152 + (lb->last_block+1 == blk_num)) {
153 + lb->last_pblock = pblock;
154 + lb->last_block = blk_num;
155 + return 0;
157 + /*
158 + * Start a new range.
159 + */
160 + retval = finish_range(handle, inode, lb);
161 + lb->first_pblock = lb->last_pblock = pblock;
162 + lb->first_block = lb->last_block = blk_num;
164 + return retval;
168 +static int update_ind_extent_range(handle_t *handle, struct inode *inode,
169 + ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
170 + struct list_blocks_struct *lb)
172 + struct buffer_head *bh;
173 + __le32 *i_data;
174 + int i, retval = 0;
175 + ext4_lblk_t blk_count = *blk_nump;
176 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
178 + if (!pblock) {
179 + /* Only update the file block number */
180 + *blk_nump += max_entries;
181 + return 0;
184 + bh = sb_bread(inode->i_sb, pblock);
185 + if (!bh)
186 + return -EIO;
188 + i_data = (__le32 *)bh->b_data;
190 + for (i = 0; i < max_entries; i++, blk_count++) {
191 + if (i_data[i]) {
192 + retval = update_extent_range(handle, inode,
193 + le32_to_cpu(i_data[i]),
194 + blk_count, lb);
195 + if (retval)
196 + break;
200 + /* Update the file block number */
201 + *blk_nump = blk_count;
202 + brelse(bh);
203 + return retval;
206 +static int update_dind_extent_range(handle_t *handle, struct inode *inode,
207 + ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
208 + struct list_blocks_struct *lb)
210 + struct buffer_head *bh;
211 + __le32 *i_data;
212 + int i, retval = 0;
213 + ext4_lblk_t blk_count = *blk_nump;
214 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
216 + if (!pblock) {
217 + /* Only update the file block number */
218 + *blk_nump += max_entries * max_entries;
219 + return 0;
222 + bh = sb_bread(inode->i_sb, pblock);
223 + if (!bh)
224 + return -EIO;
226 + i_data = (__le32 *)bh->b_data;
228 + for (i = 0; i < max_entries; i++) {
229 + if (i_data[i]) {
230 + retval = update_ind_extent_range(handle, inode,
231 + le32_to_cpu(i_data[i]),
232 + &blk_count, lb);
233 + if (retval)
234 + break;
235 + } else {
236 + /* Only update the file block number */
237 + blk_count += max_entries;
241 + /* Update the file block number */
242 + *blk_nump = blk_count;
243 + brelse(bh);
244 + return retval;
247 +static int update_tind_extent_range(handle_t *handle, struct inode *inode,
248 + ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
249 + struct list_blocks_struct *lb)
251 + struct buffer_head *bh;
252 + __le32 *i_data;
253 + int i, retval = 0;
254 + ext4_lblk_t blk_count = *blk_nump;
255 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
257 + if (!pblock) {
258 + /* Only update the file block number */
259 + *blk_nump += max_entries * max_entries * max_entries;
260 + return 0;
263 + bh = sb_bread(inode->i_sb, pblock);
264 + if (!bh)
265 + return -EIO;
267 + i_data = (__le32 *)bh->b_data;
269 + for (i = 0; i < max_entries; i++) {
270 + if (i_data[i]) {
271 + retval = update_dind_extent_range(handle, inode,
272 + le32_to_cpu(i_data[i]),
273 + &blk_count, lb);
274 + if (retval)
275 + break;
276 + } else {
277 + /* Only update the file block number */
278 + blk_count += max_entries * max_entries;
282 + /* Update the file block number */
283 + *blk_nump = blk_count;
284 + brelse(bh);
285 + return retval;
290 +static int free_dind_blocks(handle_t *handle,
291 + struct inode *inode, __le32 i_data)
293 + int i;
294 + __le32 *tmp_idata;
295 + struct buffer_head *bh;
296 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
298 + bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
299 + if (!bh)
300 + return -EIO;
302 + tmp_idata = (__le32 *)bh->b_data;
303 + for (i = 0; i < max_entries; i++) {
304 + if (tmp_idata[i]) {
305 + ext4_free_blocks(handle, inode,
306 + le32_to_cpu(tmp_idata[i]), 1);
309 + brelse(bh);
310 + ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1);
312 + return 0;
317 +static int free_tind_blocks(handle_t *handle,
318 + struct inode *inode, __le32 i_data)
320 + int i, retval = 0;
321 + __le32 *tmp_idata;
322 + struct buffer_head *bh;
323 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
325 + bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
326 + if (!bh)
327 + return -EIO;
329 + tmp_idata = (__le32 *)bh->b_data;
331 + for (i = 0; i < max_entries; i++) {
332 + if (tmp_idata[i]) {
333 + retval = free_dind_blocks(handle,
334 + inode, tmp_idata[i]);
335 + if (retval) {
336 + brelse(bh);
337 + return retval;
341 + brelse(bh);
342 + ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1);
344 + return 0;
349 +static int free_ind_block(handle_t *handle, struct inode *inode)
351 + int retval;
352 + struct ext4_inode_info *ei = EXT4_I(inode);
354 + if (ei->i_data[EXT4_IND_BLOCK]) {
356 + ext4_free_blocks(handle, inode,
357 + le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1);
361 + if (ei->i_data[EXT4_DIND_BLOCK]) {
362 + retval = free_dind_blocks(handle, inode,
363 + ei->i_data[EXT4_DIND_BLOCK]);
364 + if (retval)
365 + return retval;
368 + if (ei->i_data[EXT4_TIND_BLOCK]) {
369 + retval = free_tind_blocks(handle, inode,
370 + ei->i_data[EXT4_TIND_BLOCK]);
371 + if (retval)
372 + return retval;
376 + return 0;
378 +static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
379 + struct inode *tmp_inode, int retval)
381 + struct ext4_inode_info *ei = EXT4_I(inode);
382 + struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
385 + retval = free_ind_block(handle, inode);
386 + if (retval)
387 + goto err_out;
389 + /*
390 + * One credit accounted for writing the
391 + * i_data field of the original inode
392 + */
393 + retval = ext4_journal_extend(handle, 1);
394 + if (retval != 0) {
395 + retval = ext4_journal_restart(handle, 1);
396 + if (retval)
397 + goto err_out;
400 + /*
401 + * We have the extent map build with the tmp inode.
402 + * Now copy the i_data across
403 + */
404 + ei->i_flags |= EXT4_EXTENTS_FL;
405 + memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
407 + /*
408 + * Update i_blocks with the new blocks that got
409 + * allocated while adding extents for extent index
410 + * blocks.
412 + * While converting to extents we need not
413 + * update the orignal inode i_blocks for extent blocks
414 + * via quota APIs. The quota update happened via tmp_inode already.
415 + */
416 + spin_lock(&inode->i_lock);
417 + inode->i_blocks += tmp_inode->i_blocks;
418 + spin_unlock(&inode->i_lock);
420 + ext4_mark_inode_dirty(handle, inode);
422 +err_out:
424 + return retval;
427 +/* Will go away */
428 +static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
430 + ext4_fsblk_t block;
432 + block = le32_to_cpu(ix->ei_leaf_lo);
433 + block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
434 + return block;
437 +static int free_ext_idx(handle_t *handle, struct inode *inode,
438 + struct ext4_extent_idx *ix)
440 + int i, retval = 0;
441 + ext4_fsblk_t block;
442 + struct buffer_head *bh;
443 + struct ext4_extent_header *eh;
446 + block = idx_pblock(ix);
447 + bh = sb_bread(inode->i_sb, block);
448 + if (!bh)
449 + return -EIO;
451 + eh = (struct ext4_extent_header *)bh->b_data;
452 + if (eh->eh_depth == 0) {
454 + brelse(bh);
455 + ext4_free_blocks(handle, inode, block, 1);
457 + } else {
459 + ix = EXT_FIRST_INDEX(eh);
460 + for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
461 + retval = free_ext_idx(handle, inode, ix);
462 + if (retval)
463 + return retval;
468 + return retval;
472 + * Free the extent meta data blocks only
473 + */
474 +static int free_ext_block(handle_t *handle, struct inode *inode)
476 + int i, retval = 0;
477 + struct ext4_inode_info *ei = EXT4_I(inode);
478 + struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
479 + struct ext4_extent_idx *ix;
480 + if (eh->eh_depth == 0) {
481 + /*
482 + * No extra blocks allocated for extent meta data
483 + */
484 + return 0;
486 + ix = EXT_FIRST_INDEX(eh);
487 + for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
488 + retval = free_ext_idx(handle, inode, ix);
489 + if (retval)
490 + return retval;
493 + return retval;
496 +int ext4_ext_migrate(struct inode *inode, struct file *filp,
497 + unsigned int cmd, unsigned long arg)
499 + handle_t *handle;
500 + int retval = 0, i;
501 + __le32 *i_data;
502 + ext4_lblk_t blk_count = 0;
503 + struct ext4_inode_info *ei;
504 + struct inode *tmp_inode = NULL;
505 + struct list_blocks_struct lb;
506 + unsigned long max_entries;
509 + if (!test_opt(inode->i_sb, EXTENTS)) {
510 + /*
511 + * if mounted with noextents
512 + * we don't allow the migrate
513 + */
514 + return -EINVAL;
517 + if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
518 + return -EINVAL;
520 + down_write(&EXT4_I(inode)->i_data_sem);
523 + handle = ext4_journal_start(inode,
524 + EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
525 + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
526 + 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
527 + + 1);
528 + if (IS_ERR(handle)) {
529 + retval = PTR_ERR(handle);
530 + goto err_out;
533 + tmp_inode = ext4_new_inode(handle,
534 + inode->i_sb->s_root->d_inode,
535 + S_IFREG);
537 + if (IS_ERR(tmp_inode)) {
538 + retval = -ENOMEM;
539 + ext4_journal_stop(handle);
540 + tmp_inode = NULL;
541 + goto err_out;
544 + i_size_write(tmp_inode, i_size_read(inode));
545 + /*
546 + * We don't want the inode to be reclaimed
547 + * if we got interrupted in between. We have
548 + * this tmp inode carrying reference to the
549 + * data blocks of the original file. We set
550 + * the i_nlink to zero at the last stage after
551 + * switching the original file to extent format
552 + */
553 + tmp_inode->i_nlink = 1;
555 + ext4_ext_tree_init(handle, tmp_inode);
556 + ext4_orphan_add(handle, tmp_inode);
557 + ext4_journal_stop(handle);
559 + ei = EXT4_I(inode);
560 + i_data = ei->i_data;
561 + memset(&lb, 0, sizeof(lb));
563 + /* 32 bit block address 4 bytes */
564 + max_entries = inode->i_sb->s_blocksize >> 2;
566 + /*
567 + * start with one credit accounted for
568 + * superblock modification.
570 + * For the tmp_inode we already have commited the
571 + * trascation that created the inode. Later as and
572 + * when we add extents we extent the journal
573 + */
574 + handle = ext4_journal_start(inode, 1);
575 + for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
577 + if (i_data[i]) {
578 + retval = update_extent_range(handle, tmp_inode,
579 + le32_to_cpu(i_data[i]),
580 + blk_count, &lb);
581 + if (retval)
582 + goto err_out;
586 + if (i_data[EXT4_IND_BLOCK]) {
587 + retval = update_ind_extent_range(handle, tmp_inode,
588 + le32_to_cpu(i_data[EXT4_IND_BLOCK]),
589 + &blk_count, &lb);
590 + if (retval)
591 + goto err_out;
592 + } else {
593 + blk_count += max_entries;
596 + if (i_data[EXT4_DIND_BLOCK]) {
597 + retval = update_dind_extent_range(handle, tmp_inode,
598 + le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
599 + &blk_count, &lb);
600 + if (retval)
601 + goto err_out;
602 + } else {
603 + blk_count += max_entries * max_entries;
607 + if (i_data[EXT4_TIND_BLOCK]) {
608 + retval = update_tind_extent_range(handle, tmp_inode,
609 + le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
610 + &blk_count, &lb);
611 + if (retval)
612 + goto err_out;
615 + /*
616 + * Build the last extent
617 + */
618 + retval = finish_range(handle, tmp_inode, &lb);
620 +err_out:
621 + /*
622 + * We are either freeing extent information or indirect
623 + * blocks. During this we touch superblock, group descriptor
624 + * and block bitmap. Later we mark the tmp_inode dirty
625 + * via ext4_ext_tree_init. So allocate a credit of 4
626 + * We may update quota (user and group).
628 + * FIXME!! we may be touching bitmaps in different block groups.
629 + */
631 + if (ext4_journal_extend(handle,
632 + 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) {
634 + ext4_journal_restart(handle,
635 + 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
638 + if (retval) {
639 + /*
640 + * Failure case delete the extent information with the
641 + * tmp_inode
642 + */
643 + free_ext_block(handle, tmp_inode);
645 + } else {
647 + retval = ext4_ext_swap_inode_data(handle, inode,
648 + tmp_inode, retval);
651 + /*
652 + * Mark the tmp_inode as of size zero
653 + */
654 + i_size_write(tmp_inode, 0);
656 + /*
657 + * set the i_blocks count to zero
658 + * so that the ext4_delete_inode does the
659 + * right job
661 + * We don't need to take the i_lock because
662 + * the inode is not visible to user space.
663 + */
664 + tmp_inode->i_blocks = 0;
666 + /* Reset the extent details */
667 + ext4_ext_tree_init(handle, tmp_inode);
669 + /*
670 + * Set the i_nlink to zero so that
671 + * generic_drop_inode really deletes the
672 + * inode
673 + */
674 + tmp_inode->i_nlink = 0;
676 + ext4_journal_stop(handle);
678 + up_write(&EXT4_I(inode)->i_data_sem);
680 + if (tmp_inode)
681 + iput(tmp_inode);
683 + return retval;
685 Index: linux-2.6.24-rc7/include/linux/ext4_fs.h
686 ===================================================================
687 --- linux-2.6.24-rc7.orig/include/linux/ext4_fs.h 2008-01-16 14:49:20.000000000 -0800
688 +++ linux-2.6.24-rc7/include/linux/ext4_fs.h 2008-01-16 15:02:42.000000000 -0800
689 @@ -243,6 +243,7 @@ struct ext4_new_group_data {
690 #endif
691 #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
692 #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
693 +#define EXT4_IOC_MIGRATE _IO('f', 7)
696 * ioctl commands in 32 bit emulation
697 @@ -983,6 +984,9 @@ extern int ext4_ioctl (struct inode *, s
698 unsigned long);
699 extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
701 +/* migrate.c */
702 +extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int,
703 + unsigned long);
704 /* namei.c */
705 extern int ext4_orphan_add(handle_t *, struct inode *);
706 extern int ext4_orphan_del(handle_t *, struct inode *);