From 783c39194a9e836b5c5baec219f394088b6d736e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 3 Dec 2017 21:16:55 -0500 Subject: [PATCH] Rebase to v4.15-rc2 --- add-IOMAP_F_DATA_INLINE-flag | 49 --- add-ext4-journal-lazy-mount-option | 71 ++- add-ext4_should_use_dax | 55 --- add-iomap-support-for-inline-data | 114 ----- add-journal-no-cleanup-option | 35 +- add-sanity-check-for-encryption-and-dax | 69 --- add-support-for-online-resizing-with-bigalloc | 427 ------------------- ext4-convert-timers-to-use-timer-setup | 58 --- fix-fallocate-and-delalloc-i_size-interaction | 58 --- fix-little-inconsistencies | 46 -- improve-smp-scalability-for-inode-generation | 82 ---- jbd2-convert-timers-to-use-timer_setup | 58 --- journal-superblock-changes | 40 +- mention-noload-when-recovering-on-ro-device | 46 -- prevent-data-corruption-with-inline-data-and-dax | 82 ---- prevent-data-corruption-with-journaling-and-dax | 75 ---- remove-duplicate-extended-attributes-defs | 81 ---- retry-allocations-conservatively | 51 --- series | 23 +- switch-from-blkno-to-disk-offset | 216 ---------- switch-to-iomap-for-SEEK_HOLE_DATA | 521 ----------------------- timestamps | 45 +- 22 files changed, 81 insertions(+), 2221 deletions(-) delete mode 100644 add-IOMAP_F_DATA_INLINE-flag delete mode 100644 add-ext4_should_use_dax delete mode 100644 add-iomap-support-for-inline-data delete mode 100644 add-sanity-check-for-encryption-and-dax delete mode 100644 add-support-for-online-resizing-with-bigalloc delete mode 100644 ext4-convert-timers-to-use-timer-setup delete mode 100644 fix-fallocate-and-delalloc-i_size-interaction delete mode 100644 fix-little-inconsistencies delete mode 100644 improve-smp-scalability-for-inode-generation delete mode 100644 jbd2-convert-timers-to-use-timer_setup delete mode 100644 mention-noload-when-recovering-on-ro-device delete mode 100644 prevent-data-corruption-with-inline-data-and-dax delete mode 100644 prevent-data-corruption-with-journaling-and-dax delete mode 100644 remove-duplicate-extended-attributes-defs delete mode 100644 retry-allocations-conservatively delete mode 100644 switch-from-blkno-to-disk-offset delete mode 100644 switch-to-iomap-for-SEEK_HOLE_DATA diff --git a/add-IOMAP_F_DATA_INLINE-flag b/add-IOMAP_F_DATA_INLINE-flag deleted file mode 100644 index 08611847..00000000 --- a/add-IOMAP_F_DATA_INLINE-flag +++ /dev/null @@ -1,49 +0,0 @@ -iomap: Add IOMAP_F_DATA_INLINE flag - -From: Andreas Gruenbacher - -Add a new IOMAP_F_DATA_INLINE flag to indicate that a mapping is in a -disk area that contains data as well as metadata. In iomap_fiemap, map -this flag to FIEMAP_EXTENT_DATA_INLINE. - -Signed-off-by: Andreas Gruenbacher -Signed-off-by: Theodore Ts'o -Reviewed-by: Jan Kara ---- - fs/iomap.c | 2 ++ - include/linux/iomap.h | 5 +++-- - 2 files changed, 5 insertions(+), 2 deletions(-) - -diff --git a/fs/iomap.c b/fs/iomap.c -index 622c731c57a0..20b303ac109b 100644 ---- a/fs/iomap.c -+++ b/fs/iomap.c -@@ -510,6 +510,8 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi, - flags |= FIEMAP_EXTENT_MERGED; - if (iomap->flags & IOMAP_F_SHARED) - flags |= FIEMAP_EXTENT_SHARED; -+ if (iomap->flags & IOMAP_F_DATA_INLINE) -+ flags |= FIEMAP_EXTENT_DATA_INLINE; - - return fiemap_fill_next_extent(fi, iomap->offset, - iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0, -diff --git a/include/linux/iomap.h b/include/linux/iomap.h -index 7b8a615fa021..2b0790dbd6ea 100644 ---- a/include/linux/iomap.h -+++ b/include/linux/iomap.h -@@ -26,8 +26,9 @@ struct vm_fault; - /* - * Flags that only need to be reported for IOMAP_REPORT requests: - */ --#define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */ --#define IOMAP_F_SHARED 0x20 /* block shared with another file */ -+#define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */ -+#define IOMAP_F_SHARED 0x20 /* block shared with another file */ -+#define IOMAP_F_DATA_INLINE 0x40 /* data inline in the inode */ - - /* - * Magic value for addr: --- -2.13.3 - - diff --git a/add-ext4-journal-lazy-mount-option b/add-ext4-journal-lazy-mount-option index 8948f811..14cf8b7a 100644 --- a/add-ext4-journal-lazy-mount-option +++ b/add-ext4-journal-lazy-mount-option @@ -9,15 +9,15 @@ Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 1 + fs/ext4/inode.c | 2 +- - fs/ext4/ioctl.c | 48 +++++++++++++++++++++++++++++++++--------------- + fs/ext4/ioctl.c | 42 ++++++++++++++++++++++++++++++++---------- fs/ext4/super.c | 56 ++++++++++++++++++++++++++++++++++++++++++++------------ - 4 files changed, 79 insertions(+), 28 deletions(-) + 4 files changed, 78 insertions(+), 23 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index d3108a82f0fb..e76696b303d8 100644 +index d1389c9fb8a1..fa05cccda31c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h -@@ -1145,6 +1145,7 @@ struct ext4_inode_info { +@@ -1109,6 +1109,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_JOURNAL_NOCLEANUP 0x2000000 /* Preserve the journal on unmount */ @@ -26,10 +26,10 @@ index d3108a82f0fb..e76696b303d8 100644 #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 4b5892e31b80..374a9767a0ed 100644 +index baa9f5ea16f1..a709e77016e0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c -@@ -3190,7 +3190,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) +@@ -3275,7 +3275,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) filemap_write_and_wait(mapping); } @@ -39,7 +39,7 @@ index 4b5892e31b80..374a9767a0ed 100644 /* * This is a REALLY heavyweight approach, but the use of diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c -index b383ebf4020c..ba234eb5a1bd 100644 +index 1eec25014f62..1d1bf751d142 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -242,6 +242,20 @@ static int ext4_ioctl_setflags(struct inode *inode, @@ -63,8 +63,8 @@ index b383ebf4020c..ba234eb5a1bd 100644 if ((flags ^ oldflags) & EXT4_EXTENTS_FL) migrate = 1; -@@ -489,6 +503,22 @@ int ext4_goingdown(struct super_block *sb, unsigned long arg) - return 0; +@@ -630,6 +644,22 @@ static long ext4_ioctl_group_add(struct file *file, + return err; } +/* @@ -86,7 +86,7 @@ index b383ebf4020c..ba234eb5a1bd 100644 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); -@@ -606,11 +636,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +@@ -748,11 +778,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) goto group_extend_out; err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); @@ -99,20 +99,7 @@ index b383ebf4020c..ba234eb5a1bd 100644 if (err == 0) err = err2; mnt_drop_write_file(filp); -@@ -696,11 +722,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) - goto group_add_out; - - err = ext4_group_add(sb, &input); -- if (EXT4_SB(sb)->s_journal) { -- jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); -- err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); -- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); -- } -+ err2 = flush_fs_group_descriptors(sb); - if (err == 0) - err = err2; - mnt_drop_write_file(filp); -@@ -786,11 +808,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +@@ -890,11 +916,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) goto resizefs_out; err = ext4_resize_fs(sb, n_blocks_count); @@ -126,29 +113,29 @@ index b383ebf4020c..ba234eb5a1bd 100644 err = err2; mnt_drop_write_file(filp); diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index d411cc613bf0..00ecc96be253 100644 +index 06da44b3d8e1..ba9a6ef78454 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c -@@ -868,7 +868,8 @@ static void ext4_put_super(struct super_block *sb) +@@ -888,7 +888,8 @@ static void ext4_put_super(struct super_block *sb) + ext4_mb_release(sb); ext4_ext_release(sb); - if (!(sb->s_flags & MS_RDONLY) && !aborted && -- !test_opt(sb, JOURNAL_NOCLEANUP)) { -+ !test_opt(sb, JOURNAL_NOCLEANUP) && +- if (!sb_rdonly(sb) && !aborted && !test_opt(sb, JOURNAL_NOCLEANUP)) { ++ if (!sb_rdonly(sb) && !aborted && !test_opt(sb, JOURNAL_NOCLEANUP) && + !test_opt(sb, JOURNAL_LAZY)) { ext4_clear_feature_journal_needs_recovery(sb); es->s_state = cpu_to_le16(sbi->s_mount_state); } -@@ -1310,6 +1311,7 @@ enum { +@@ -1349,6 +1350,7 @@ enum { Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_dioread_nolock, Opt_dioread_lock, Opt_journal_nocleanup, Opt_journal_cleanup, + Opt_journal_nolazy, Opt_journal_lazy, Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, - Opt_max_dir_size_kb, Opt_nojournal_checksum, + Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache, }; -@@ -1396,6 +1398,8 @@ static const match_table_t tokens = { - {Opt_test_dummy_encryption, "test_dummy_encryption"}, +@@ -1437,6 +1439,8 @@ static const match_table_t tokens = { + {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */ {Opt_journal_nocleanup, "journal_nocleanup"}, {Opt_journal_cleanup, "journal_cleanup"}, + {Opt_journal_lazy, "journal_lazy"}, @@ -156,8 +143,8 @@ index d411cc613bf0..00ecc96be253 100644 {Opt_removed, "check=none"}, /* mount option from ext2/3 */ {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ {Opt_removed, "reservation"}, /* mount option from ext2/3 */ -@@ -1604,6 +1608,8 @@ static const struct mount_opts { - {Opt_test_dummy_encryption, 0, MOPT_GTE0}, +@@ -1647,6 +1651,8 @@ static const struct mount_opts { + {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET}, {Opt_journal_nocleanup, EXT4_MOUNT_JOURNAL_NOCLEANUP, MOPT_SET}, {Opt_journal_cleanup, EXT4_MOUNT_JOURNAL_NOCLEANUP, MOPT_CLEAR}, + {Opt_journal_lazy, EXT4_MOUNT_JOURNAL_LAZY, MOPT_SET}, @@ -165,7 +152,7 @@ index d411cc613bf0..00ecc96be253 100644 {Opt_err, 0, 0} }; -@@ -4355,6 +4361,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) +@@ -4456,6 +4462,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) journal->j_flags |= JBD2_NO_CLEANUP; else journal->j_flags &= ~JBD2_NO_CLEANUP; @@ -176,7 +163,7 @@ index d411cc613bf0..00ecc96be253 100644 write_unlock(&journal->j_state_lock); } -@@ -4588,6 +4598,24 @@ static int ext4_load_journal(struct super_block *sb, +@@ -4690,6 +4700,24 @@ static int ext4_load_journal(struct super_block *sb, EXT4_SB(sb)->s_journal = journal; ext4_clear_journal_err(sb, es); @@ -201,7 +188,7 @@ index d411cc613bf0..00ecc96be253 100644 return 0; } -@@ -4674,6 +4702,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb, +@@ -4776,6 +4804,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb, { journal_t *journal = EXT4_SB(sb)->s_journal; @@ -211,7 +198,7 @@ index d411cc613bf0..00ecc96be253 100644 if (!ext4_has_feature_journal(sb)) { BUG_ON(journal != NULL); return; -@@ -4810,21 +4841,20 @@ static int ext4_freeze(struct super_block *sb) +@@ -4911,21 +4942,20 @@ static int ext4_freeze(struct super_block *sb) journal = EXT4_SB(sb)->s_journal; if (journal) { @@ -242,8 +229,8 @@ index d411cc613bf0..00ecc96be253 100644 error = ext4_commit_super(sb, 1); out: if (journal) -@@ -4842,7 +4872,7 @@ static int ext4_unfreeze(struct super_block *sb) - if ((sb->s_flags & MS_RDONLY) || ext4_forced_shutdown(EXT4_SB(sb))) +@@ -4943,7 +4973,7 @@ static int ext4_unfreeze(struct super_block *sb) + if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb))) return 0; - if (EXT4_SB(sb)->s_journal) { @@ -251,7 +238,7 @@ index d411cc613bf0..00ecc96be253 100644 /* Reset the needs_recovery flag before the fs is unlocked. */ ext4_set_feature_journal_needs_recovery(sb); } -@@ -5358,6 +5388,8 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, +@@ -5469,6 +5499,8 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, * We don't need to lock updates but journal_flush() could * otherwise be livelocked... */ diff --git a/add-ext4_should_use_dax b/add-ext4_should_use_dax deleted file mode 100644 index c5c07c54..00000000 --- a/add-ext4_should_use_dax +++ /dev/null @@ -1,55 +0,0 @@ -ext4: add ext4_should_use_dax() - -From: Ross Zwisler - -This helper, in the spirit of ext4_should_dioread_nolock() et al., replaces -the complex conditional in ext4_set_inode_flags(). - -Signed-off-by: Ross Zwisler -Signed-off-by: Theodore Ts'o -Reviewed-by: Jan Kara ---- - fs/ext4/inode.c | 19 ++++++++++++++++--- - 1 file changed, 16 insertions(+), 3 deletions(-) - -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 3207333..525dd63 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -4577,6 +4577,21 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) - !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); - } - -+static bool ext4_should_use_dax(struct inode *inode) -+{ -+ if (!test_opt(inode->i_sb, DAX)) -+ return false; -+ if (!S_ISREG(inode->i_mode)) -+ return false; -+ if (ext4_should_journal_data(inode)) -+ return false; -+ if (ext4_has_inline_data(inode)) -+ return false; -+ if (ext4_encrypted_inode(inode)) -+ return false; -+ return true; -+} -+ - void ext4_set_inode_flags(struct inode *inode) - { - unsigned int flags = EXT4_I(inode)->i_flags; -@@ -4592,9 +4607,7 @@ void ext4_set_inode_flags(struct inode *inode) - new_fl |= S_NOATIME; - if (flags & EXT4_DIRSYNC_FL) - new_fl |= S_DIRSYNC; -- if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) && -- !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) && -- !ext4_encrypted_inode(inode)) -+ if (ext4_should_use_dax(inode)) - new_fl |= S_DAX; - inode_set_flags(inode, new_fl, - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); --- -2.9.5 - - diff --git a/add-iomap-support-for-inline-data b/add-iomap-support-for-inline-data deleted file mode 100644 index 46cb5cd9..00000000 --- a/add-iomap-support-for-inline-data +++ /dev/null @@ -1,114 +0,0 @@ -ext4: Add iomap support for inline data - -From: Andreas Gruenbacher - -Report inline data as a IOMAP_F_DATA_INLINE mapping. This allows to use -iomap_seek_hole and iomap_seek_data in ext4_llseek and makes switching -to iomap_fiemap in ext4_fiemap easier. - -Signed-off-by: Andreas Gruenbacher -Signed-off-by: Theodore Ts'o -Reviewed-by: Jan Kara ---- - fs/ext4/ext4.h | 4 ++++ - fs/ext4/inline.c | 33 +++++++++++++++++++++++++++++++++ - fs/ext4/inode.c | 16 ++++++++++++++-- - 3 files changed, 51 insertions(+), 2 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index e2abe01c8c6b..ae3e4a25821a 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -3048,6 +3048,10 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, - extern int ext4_inline_data_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, - int *has_inline, __u64 start, __u64 len); -+ -+struct iomap; -+extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap); -+ - extern int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed); -diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c -index 28c5c3abddb3..f0bbc8cb6555 100644 ---- a/fs/ext4/inline.c -+++ b/fs/ext4/inline.c -@@ -12,6 +12,7 @@ - * GNU General Public License for more details. - */ - -+#include - #include - - #include "ext4_jbd2.h" -@@ -1827,6 +1828,38 @@ int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) - return ret; - } - -+int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap) -+{ -+ __u64 addr; -+ int error = -EAGAIN; -+ struct ext4_iloc iloc; -+ -+ down_read(&EXT4_I(inode)->xattr_sem); -+ if (!ext4_has_inline_data(inode)) -+ goto out; -+ -+ error = ext4_get_inode_loc(inode, &iloc); -+ if (error) -+ goto out; -+ -+ addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; -+ addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; -+ addr += offsetof(struct ext4_inode, i_block); -+ -+ brelse(iloc.bh); -+ -+ iomap->addr = addr; -+ iomap->offset = 0; -+ iomap->length = min_t(loff_t, ext4_get_inline_size(inode), -+ i_size_read(inode)); -+ iomap->type = 0; -+ iomap->flags = IOMAP_F_DATA_INLINE; -+ -+out: -+ up_read(&EXT4_I(inode)->xattr_sem); -+ return error; -+} -+ - int ext4_inline_data_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, - int *has_inline, __u64 start, __u64 len) -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index d9e633c12aae..7755f41bdfc3 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -3404,8 +3404,20 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - struct ext4_map_blocks map; - int ret; - -- if (WARN_ON_ONCE(ext4_has_inline_data(inode))) -- return -ERANGE; -+ -+ if (flags & IOMAP_REPORT) { -+ if (ext4_has_inline_data(inode)) { -+ ret = ext4_inline_data_iomap(inode, iomap); -+ if (ret != -EAGAIN) { -+ if (ret == 0 && offset >= iomap->length) -+ ret = -ENOENT; -+ return ret; -+ } -+ } -+ } else { -+ if (WARN_ON_ONCE(ext4_has_inline_data(inode))) -+ return -ERANGE; -+ } - - map.m_lblk = first_block; - map.m_len = last_block - first_block + 1; --- -2.13.3 - - diff --git a/add-journal-no-cleanup-option b/add-journal-no-cleanup-option index a1b3f599..831bd740 100644 --- a/add-journal-no-cleanup-option +++ b/add-journal-no-cleanup-option @@ -6,16 +6,16 @@ journal replay code. Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 1 + - fs/ext4/super.c | 12 +++++++++++- + fs/ext4/super.c | 11 ++++++++++- fs/jbd2/journal.c | 12 +++++++++--- include/linux/jbd2.h | 1 + - 4 files changed, 22 insertions(+), 4 deletions(-) + 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index 9ebde0cd632e..e247c2a2a06c 100644 +index 4e091eae38b1..d1389c9fb8a1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h -@@ -1150,6 +1150,7 @@ struct ext4_inode_info { +@@ -1108,6 +1108,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ @@ -24,20 +24,19 @@ index 9ebde0cd632e..e247c2a2a06c 100644 #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index c9202be4cc1a..40d9cac94797 100644 +index 6783af7ec115..06da44b3d8e1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c -@@ -889,7 +889,8 @@ static void ext4_put_super(struct super_block *sb) +@@ -888,7 +888,7 @@ static void ext4_put_super(struct super_block *sb) ext4_mb_release(sb); ext4_ext_release(sb); -- if (!(sb->s_flags & MS_RDONLY) && !aborted) { -+ if (!(sb->s_flags & MS_RDONLY) && !aborted && -+ !test_opt(sb, JOURNAL_NOCLEANUP)) { +- if (!sb_rdonly(sb) && !aborted) { ++ if (!sb_rdonly(sb) && !aborted && !test_opt(sb, JOURNAL_NOCLEANUP)) { ext4_clear_feature_journal_needs_recovery(sb); es->s_state = cpu_to_le16(sbi->s_mount_state); } -@@ -1348,6 +1349,7 @@ enum { +@@ -1348,6 +1348,7 @@ enum { Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_dioread_nolock, Opt_dioread_lock, @@ -45,7 +44,7 @@ index c9202be4cc1a..40d9cac94797 100644 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache, }; -@@ -1434,6 +1436,8 @@ static const match_table_t tokens = { +@@ -1434,6 +1435,8 @@ static const match_table_t tokens = { {Opt_test_dummy_encryption, "test_dummy_encryption"}, {Opt_nombcache, "nombcache"}, {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */ @@ -54,7 +53,7 @@ index c9202be4cc1a..40d9cac94797 100644 {Opt_removed, "check=none"}, /* mount option from ext2/3 */ {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ {Opt_removed, "reservation"}, /* mount option from ext2/3 */ -@@ -1642,6 +1646,8 @@ static const struct mount_opts { +@@ -1642,6 +1645,8 @@ static const struct mount_opts { {Opt_max_dir_size_kb, 0, MOPT_GTE0}, {Opt_test_dummy_encryption, 0, MOPT_GTE0}, {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET}, @@ -63,7 +62,7 @@ index c9202be4cc1a..40d9cac94797 100644 {Opt_err, 0, 0} }; -@@ -4423,6 +4429,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) +@@ -4447,6 +4452,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; else journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; @@ -75,10 +74,10 @@ index c9202be4cc1a..40d9cac94797 100644 } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c -index 7c6254330951..352c9491e668 100644 +index b01f07f65d59..218c50dd9dfc 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c -@@ -1708,6 +1708,11 @@ int jbd2_journal_destroy(journal_t *journal) +@@ -1724,6 +1724,11 @@ int jbd2_journal_destroy(journal_t *journal) if (journal->j_running_transaction) jbd2_journal_commit_transaction(journal); @@ -90,7 +89,7 @@ index 7c6254330951..352c9491e668 100644 /* Force any old transactions to disk */ /* Totally anal locking here... */ -@@ -1735,7 +1740,9 @@ int jbd2_journal_destroy(journal_t *journal) +@@ -1751,7 +1756,9 @@ int jbd2_journal_destroy(journal_t *journal) spin_unlock(&journal->j_list_lock); if (journal->j_sb_buffer) { @@ -101,7 +100,7 @@ index 7c6254330951..352c9491e668 100644 mutex_lock_io(&journal->j_checkpoint_mutex); write_lock(&journal->j_state_lock); -@@ -1746,8 +1753,7 @@ int jbd2_journal_destroy(journal_t *journal) +@@ -1762,8 +1769,7 @@ int jbd2_journal_destroy(journal_t *journal) jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_PREFLUSH | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); @@ -112,7 +111,7 @@ index 7c6254330951..352c9491e668 100644 } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h -index 606b6bce3a5b..fd14143d244b 100644 +index 296d1e0ea87b..58ec2b764abd 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1130,6 +1130,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) diff --git a/add-sanity-check-for-encryption-and-dax b/add-sanity-check-for-encryption-and-dax deleted file mode 100644 index 9562990e..00000000 --- a/add-sanity-check-for-encryption-and-dax +++ /dev/null @@ -1,69 +0,0 @@ -ext4: add sanity check for encryption + DAX - -From: Ross Zwisler - -We prevent DAX from being used on inodes which are using ext4's built in -encryption via a check in ext4_set_inode_flags(). We do have what appears -to be an unsafe transition of S_DAX in ext4_set_context(), though, where -S_DAX can get disabled without us doing a proper writeback + invalidate. - -There are also issues with mm-level races when changing the value of S_DAX, -as well as issues with the VM_MIXEDMAP flag: - -https://www.spinics.net/lists/linux-xfs/msg09859.html - -I actually think we are safe in this case because of the following: - -1) You can't encrypt an existing file. Encryption can only be set on an -empty directory, with new inodes in that directory being created with -encryption turned on, so I don't think it's possible to turn encryption on -for a file that has open DAX mmaps or outstanding I/Os. - -2) There is no way to turn encryption off on a given file. Once an inode -is encrypted, it stays encrypted for the life of that inode, so we don't -have to worry about the case where we turn encryption off and S_DAX -suddenly turns on. - -3) The only way we end up in ext4_set_context() to turn on encryption is -when we are creating a new file in the encrypted directory. This happens -as part of ext4_create() before the inode has been allowed to do any I/O. -Here's the call tree: - - ext4_create() - __ext4_new_inode() - ext4_set_inode_flags() // sets S_DAX - fscrypt_inherit_context() - fscrypt_get_encryption_info(); - ext4_set_context() // sets EXT4_INODE_ENCRYPT, clears S_DAX - -So, I actually think it's safe to transition S_DAX in ext4_set_context() -without any locking, writebacks or invalidations. I've added a -WARN_ON_ONCE() sanity check to make sure that we are notified if we ever -encounter a case where we are encrypting an inode that already has data, -in which case we need to add code to safely transition S_DAX. - -Signed-off-by: Ross Zwisler -Signed-off-by: Theodore Ts'o -Reviewed-by: Jan Kara ---- - fs/ext4/super.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index 4251e50..c090780 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -1159,6 +1159,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, - if (inode->i_ino == EXT4_ROOT_INO) - return -EPERM; - -+ if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode))) -+ return -EINVAL; -+ - res = ext4_convert_inline_data(inode); - if (res) - return res; --- -2.9.5 - - diff --git a/add-support-for-online-resizing-with-bigalloc b/add-support-for-online-resizing-with-bigalloc deleted file mode 100644 index 29b1e1b1..00000000 --- a/add-support-for-online-resizing-with-bigalloc +++ /dev/null @@ -1,427 +0,0 @@ -ext4: add support for online resizing with bigalloc - -From: harshads - -This patch adds support for online resizing on bigalloc file system by -implementing EXT4_IOC_RESIZE_FS ioctl. Old resize interfaces (add -block groups and extend last block group) are left untouched. Tests -performed with cluster sizes of 1, 2, 4 and 8 blocks (of size 4k) per -cluster. I will add these tests to xfstests. - -Signed-off-by: Harshad Shirwadkar -Signed-off-by: Theodore Ts'o ---- - fs/ext4/ext4.h | 4 +-- - fs/ext4/ioctl.c | 6 ---- - fs/ext4/mballoc.c | 28 ++++++++------- - fs/ext4/resize.c | 104 ++++++++++++++++++++++++++++++++++-------------------- - 4 files changed, 84 insertions(+), 58 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index b84aa1ca480a..53dcfd808567 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -534,8 +534,8 @@ struct ext4_new_group_data { - __u64 inode_table; - __u32 blocks_count; - __u16 reserved_blocks; -- __u16 unused; -- __u32 free_blocks_count; -+ __u16 mdata_blocks; -+ __u32 free_clusters_count; - }; - - /* Indexes used to index group tables in ext4_new_group_data */ -diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c -index 28cc412852af..e165b06767ff 100644 ---- a/fs/ext4/ioctl.c -+++ b/fs/ext4/ioctl.c -@@ -702,12 +702,6 @@ group_add_out: - int err = 0, err2 = 0; - ext4_group_t o_group = EXT4_SB(sb)->s_groups_count; - -- if (ext4_has_feature_bigalloc(sb)) { -- ext4_msg(sb, KERN_ERR, -- "Online resizing not (yet) supported with bigalloc"); -- return -EOPNOTSUPP; -- } -- - if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, - sizeof(__u64))) { - return -EFAULT; -diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c -index c1ab3ec30423..b3922e1c7eec 100644 ---- a/fs/ext4/mballoc.c -+++ b/fs/ext4/mballoc.c -@@ -4925,8 +4925,11 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, - struct ext4_group_desc *desc; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_buddy e4b; -- int err = 0, ret, blk_free_count; -- ext4_grpblk_t blocks_freed; -+ int err = 0, ret, free_clusters_count; -+ ext4_grpblk_t clusters_freed; -+ ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block); -+ ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1); -+ unsigned long cluster_count = last_cluster - first_cluster + 1; - - ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); - -@@ -4938,8 +4941,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, - * Check to see if we are freeing blocks across a group - * boundary. - */ -- if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { -- ext4_warning(sb, "too much blocks added to group %u", -+ if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) { -+ ext4_warning(sb, "too many blocks added to group %u", - block_group); - err = -EINVAL; - goto error_return; -@@ -4985,14 +4988,14 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, - if (err) - goto error_return; - -- for (i = 0, blocks_freed = 0; i < count; i++) { -+ for (i = 0, clusters_freed = 0; i < cluster_count; i++) { - BUFFER_TRACE(bitmap_bh, "clear bit"); - if (!mb_test_bit(bit + i, bitmap_bh->b_data)) { - ext4_error(sb, "bit already cleared for block %llu", - (ext4_fsblk_t)(block + i)); - BUFFER_TRACE(bitmap_bh, "bit already cleared"); - } else { -- blocks_freed++; -+ clusters_freed++; - } - } - -@@ -5006,19 +5009,20 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, - * them with group lock_held - */ - ext4_lock_group(sb, block_group); -- mb_clear_bits(bitmap_bh->b_data, bit, count); -- mb_free_blocks(NULL, &e4b, bit, count); -- blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc); -- ext4_free_group_clusters_set(sb, desc, blk_free_count); -+ mb_clear_bits(bitmap_bh->b_data, bit, cluster_count); -+ mb_free_blocks(NULL, &e4b, bit, cluster_count); -+ free_clusters_count = clusters_freed + -+ ext4_free_group_clusters(sb, desc); -+ ext4_free_group_clusters_set(sb, desc, free_clusters_count); - ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh); - ext4_group_desc_csum_set(sb, block_group, desc); - ext4_unlock_group(sb, block_group); - percpu_counter_add(&sbi->s_freeclusters_counter, -- EXT4_NUM_B2C(sbi, blocks_freed)); -+ clusters_freed); - - if (sbi->s_log_groups_per_flex) { - ext4_group_t flex_group = ext4_flex_group(sbi, block_group); -- atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), -+ atomic64_add(clusters_freed, - &sbi->s_flex_groups[flex_group].free_clusters); - } - -diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c -index cf681004b196..b3c8c7be8838 100644 ---- a/fs/ext4/resize.c -+++ b/fs/ext4/resize.c -@@ -105,7 +105,7 @@ static int verify_group_input(struct super_block *sb, - - overhead = ext4_group_overhead_blocks(sb, group); - metaend = start + overhead; -- input->free_blocks_count = free_blocks_count = -+ input->free_clusters_count = free_blocks_count = - input->blocks_count - 2 - overhead - sbi->s_itb_per_group; - - if (test_opt(sb, DEBUG)) -@@ -256,6 +256,7 @@ static int ext4_alloc_group_tables(struct super_block *sb, - ext4_group_t last_group; - unsigned overhead; - __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0; -+ int i; - - BUG_ON(flex_gd->count == 0 || group_data == NULL); - -@@ -292,7 +293,7 @@ next_group: - group_data[bb_index].block_bitmap = start_blk++; - group = ext4_get_group_number(sb, start_blk - 1); - group -= group_data[0].group; -- group_data[group].free_blocks_count--; -+ group_data[group].mdata_blocks++; - flex_gd->bg_flags[group] &= uninit_mask; - } - -@@ -303,7 +304,7 @@ next_group: - group_data[ib_index].inode_bitmap = start_blk++; - group = ext4_get_group_number(sb, start_blk - 1); - group -= group_data[0].group; -- group_data[group].free_blocks_count--; -+ group_data[group].mdata_blocks++; - flex_gd->bg_flags[group] &= uninit_mask; - } - -@@ -322,15 +323,22 @@ next_group: - if (start_blk + itb > next_group_start) { - flex_gd->bg_flags[group + 1] &= uninit_mask; - overhead = start_blk + itb - next_group_start; -- group_data[group + 1].free_blocks_count -= overhead; -+ group_data[group + 1].mdata_blocks += overhead; - itb -= overhead; - } - -- group_data[group].free_blocks_count -= itb; -+ group_data[group].mdata_blocks += itb; - flex_gd->bg_flags[group] &= uninit_mask; - start_blk += EXT4_SB(sb)->s_itb_per_group; - } - -+ /* Update free clusters count to exclude metadata blocks */ -+ for (i = 0; i < flex_gd->count; i++) { -+ group_data[i].free_clusters_count -= -+ EXT4_NUM_B2C(EXT4_SB(sb), -+ group_data[i].mdata_blocks); -+ } -+ - if (test_opt(sb, DEBUG)) { - int i; - group = group_data[0].group; -@@ -340,12 +348,13 @@ next_group: - flexbg_size); - - for (i = 0; i < flex_gd->count; i++) { -- printk(KERN_DEBUG "adding %s group %u: %u " -- "blocks (%d free)\n", -+ ext4_debug( -+ "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n", - ext4_bg_has_super(sb, group + i) ? "normal" : - "no-super", group + i, - group_data[i].blocks_count, -- group_data[i].free_blocks_count); -+ group_data[i].free_clusters_count, -+ group_data[i].mdata_blocks); - } - } - return 0; -@@ -397,7 +406,7 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh) - } - - /* -- * set_flexbg_block_bitmap() mark @count blocks starting from @block used. -+ * set_flexbg_block_bitmap() mark clusters [@first_cluster, @last_cluster] used. - * - * Helper function for ext4_setup_new_group_blocks() which set . - * -@@ -407,22 +416,26 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh) - */ - static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, - struct ext4_new_flex_group_data *flex_gd, -- ext4_fsblk_t block, ext4_group_t count) -+ ext4_fsblk_t first_cluster, ext4_fsblk_t last_cluster) - { -+ struct ext4_sb_info *sbi = EXT4_SB(sb); -+ ext4_group_t count = last_cluster - first_cluster + 1; - ext4_group_t count2; - -- ext4_debug("mark blocks [%llu/%u] used\n", block, count); -- for (count2 = count; count > 0; count -= count2, block += count2) { -+ ext4_debug("mark clusters [%llu-%llu] used\n", first_cluster, -+ last_cluster); -+ for (count2 = count; count > 0; -+ count -= count2, first_cluster += count2) { - ext4_fsblk_t start; - struct buffer_head *bh; - ext4_group_t group; - int err; - -- group = ext4_get_group_number(sb, block); -- start = ext4_group_first_block_no(sb, group); -+ group = ext4_get_group_number(sb, EXT4_C2B(sbi, first_cluster)); -+ start = EXT4_B2C(sbi, ext4_group_first_block_no(sb, group)); - group -= flex_gd->groups[0].group; - -- count2 = EXT4_BLOCKS_PER_GROUP(sb) - (block - start); -+ count2 = EXT4_CLUSTERS_PER_GROUP(sb) - (first_cluster - start); - if (count2 > count) - count2 = count; - -@@ -443,9 +456,9 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, - err = ext4_journal_get_write_access(handle, bh); - if (err) - return err; -- ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block, -- block - start, count2); -- ext4_set_bits(bh->b_data, block - start, count2); -+ ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", -+ first_cluster, first_cluster - start, count2); -+ ext4_set_bits(bh->b_data, first_cluster - start, count2); - - err = ext4_handle_dirty_metadata(handle, NULL, bh); - if (unlikely(err)) -@@ -594,9 +607,10 @@ handle_bb: - if (overhead != 0) { - ext4_debug("mark backup superblock %#04llx (+0)\n", - start); -- ext4_set_bits(bh->b_data, 0, overhead); -+ ext4_set_bits(bh->b_data, 0, -+ EXT4_NUM_B2C(sbi, overhead)); - } -- ext4_mark_bitmap_end(group_data[i].blocks_count, -+ ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count), - sb->s_blocksize * 8, bh->b_data); - err = ext4_handle_dirty_metadata(handle, NULL, bh); - if (err) -@@ -641,7 +655,11 @@ handle_ib: - continue; - } - err = set_flexbg_block_bitmap(sb, handle, -- flex_gd, start, count); -+ flex_gd, -+ EXT4_B2C(sbi, start), -+ EXT4_B2C(sbi, -+ start + count -+ - 1)); - if (err) - goto out; - count = group_table_count[j]; -@@ -651,7 +669,11 @@ handle_ib: - - if (count) { - err = set_flexbg_block_bitmap(sb, handle, -- flex_gd, start, count); -+ flex_gd, -+ EXT4_B2C(sbi, start), -+ EXT4_B2C(sbi, -+ start + count -+ - 1)); - if (err) - goto out; - } -@@ -839,7 +861,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, - ext4_std_error(sb, err); - goto exit_inode; - } -- inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; -+ inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> -+ (9 - EXT4_SB(sb)->s_cluster_bits); - ext4_mark_iloc_dirty(handle, inode, &iloc); - memset(gdb_bh->b_data, 0, sb->s_blocksize); - err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); -@@ -934,6 +957,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, - { - struct super_block *sb = inode->i_sb; - int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); -+ int cluster_bits = EXT4_SB(sb)->s_cluster_bits; - struct buffer_head **primary; - struct buffer_head *dind; - struct ext4_iloc iloc; -@@ -1009,7 +1033,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, - if (!err) - err = err2; - } -- inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9; -+ -+ inode->i_blocks += reserved_gdb * sb->s_blocksize >> (9 - cluster_bits); - ext4_mark_iloc_dirty(handle, inode, &iloc); - - exit_bh: -@@ -1243,7 +1268,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, - ext4_group_t group; - __u16 *bg_flags = flex_gd->bg_flags; - int i, gdb_off, gdb_num, err = 0; -- -+ - - for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) { - group = group_data->group; -@@ -1270,7 +1295,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, - - ext4_inode_table_set(sb, gdp, group_data->inode_table); - ext4_free_group_clusters_set(sb, gdp, -- EXT4_NUM_B2C(sbi, group_data->free_blocks_count)); -+ group_data->free_clusters_count); - ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); - if (ext4_has_group_desc_csum(sb)) - ext4_itable_unused_set(sb, gdp, -@@ -1326,7 +1351,7 @@ static void ext4_update_super(struct super_block *sb, - */ - for (i = 0; i < flex_gd->count; i++) { - blocks_count += group_data[i].blocks_count; -- free_blocks += group_data[i].free_blocks_count; -+ free_blocks += EXT4_C2B(sbi, group_data[i].free_clusters_count); - } - - reserved_blocks = ext4_r_blocks_count(es) * 100; -@@ -1498,17 +1523,18 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, - ext4_fsblk_t n_blocks_count, - unsigned long flexbg_size) - { -- struct ext4_super_block *es = EXT4_SB(sb)->s_es; -+ struct ext4_sb_info *sbi = EXT4_SB(sb); -+ struct ext4_super_block *es = sbi->s_es; - struct ext4_new_group_data *group_data = flex_gd->groups; - ext4_fsblk_t o_blocks_count; - ext4_group_t n_group; - ext4_group_t group; - ext4_group_t last_group; - ext4_grpblk_t last; -- ext4_grpblk_t blocks_per_group; -+ ext4_grpblk_t clusters_per_group; - unsigned long i; - -- blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb); -+ clusters_per_group = EXT4_CLUSTERS_PER_GROUP(sb); - - o_blocks_count = ext4_blocks_count(es); - -@@ -1529,9 +1555,10 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, - int overhead; - - group_data[i].group = group + i; -- group_data[i].blocks_count = blocks_per_group; -+ group_data[i].blocks_count = EXT4_BLOCKS_PER_GROUP(sb); - overhead = ext4_group_overhead_blocks(sb, group + i); -- group_data[i].free_blocks_count = blocks_per_group - overhead; -+ group_data[i].mdata_blocks = overhead; -+ group_data[i].free_clusters_count = EXT4_CLUSTERS_PER_GROUP(sb); - if (ext4_has_group_desc_csum(sb)) { - flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | - EXT4_BG_INODE_UNINIT; -@@ -1545,10 +1572,10 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, - /* We need to initialize block bitmap of last group. */ - flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; - -- if ((last_group == n_group) && (last != blocks_per_group - 1)) { -- group_data[i - 1].blocks_count = last + 1; -- group_data[i - 1].free_blocks_count -= blocks_per_group- -- last - 1; -+ if ((last_group == n_group) && (last != clusters_per_group - 1)) { -+ group_data[i - 1].blocks_count = EXT4_C2B(sbi, last + 1); -+ group_data[i - 1].free_clusters_count -= clusters_per_group - -+ last - 1; - } - - return 1; -@@ -1795,7 +1822,8 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) - } - - /* Do a quick sanity check of the resize inode */ -- if (inode->i_blocks != 1 << (inode->i_blkbits - 9)) -+ if (inode->i_blocks != 1 << (inode->i_blkbits - -+ (9 - sbi->s_cluster_bits))) - goto invalid_resize_inode; - for (i = 0; i < EXT4_N_BLOCKS; i++) { - if (i == EXT4_DIND_BLOCK) { -@@ -1957,7 +1985,7 @@ retry: - if (n_group == o_group) - add = n_blocks_count - o_blocks_count; - else -- add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1); -+ add = EXT4_C2B(sbi, EXT4_CLUSTERS_PER_GROUP(sb) - (offset + 1)); - if (add > 0) { - err = ext4_group_extend_no_check(sb, o_blocks_count, add); - if (err) --- -2.15.0.rc0.271.g36b669edcc-goog - - diff --git a/ext4-convert-timers-to-use-timer-setup b/ext4-convert-timers-to-use-timer-setup deleted file mode 100644 index db0ab6c9..00000000 --- a/ext4-convert-timers-to-use-timer-setup +++ /dev/null @@ -1,58 +0,0 @@ -ext4: convert timers to use timer_setup() - -From: Kees Cook - -In preparation for unconditionally passing the struct timer_list pointer to -all timer callbacks, switch to using the new timer_setup() and from_timer() -to pass the timer pointer explicitly. - -Signed-off-by: Kees Cook -Signed-off-by: Theodore Ts'o -Reviewed-by: Reviewed-by: Jan Kara -Cc: Andreas Dilger -Cc: linux-ext4@vger.kernel.org ---- - fs/ext4/super.c | 14 +++++--------- - 1 file changed, 5 insertions(+), 9 deletions(-) - -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index b5d393321b7b..759281cec51f 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -2793,14 +2793,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) - * This function is called once a day if we have errors logged - * on the file system - */ --static void print_daily_error_info(unsigned long arg) -+static void print_daily_error_info(struct timer_list *t) - { -- struct super_block *sb = (struct super_block *) arg; -- struct ext4_sb_info *sbi; -- struct ext4_super_block *es; -- -- sbi = EXT4_SB(sb); -- es = sbi->s_es; -+ struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report); -+ struct super_block *sb = sbi->s_sb; -+ struct ext4_super_block *es = sbi->s_es; - - if (es->s_error_count) - /* fsck newer than v1.41.13 is needed to clean this condition. */ -@@ -3982,8 +3979,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) - get_random_bytes(&sbi->s_next_generation, sizeof(u32)); - spin_lock_init(&sbi->s_next_gen_lock); - -- setup_timer(&sbi->s_err_report, print_daily_error_info, -- (unsigned long) sb); -+ timer_setup(&sbi->s_err_report, print_daily_error_info, 0); - - /* Register extent status tree shrinker */ - if (ext4_es_register_shrinker(sbi)) --- -2.7.4 - - --- -Kees Cook -Pixel Security - diff --git a/fix-fallocate-and-delalloc-i_size-interaction b/fix-fallocate-and-delalloc-i_size-interaction deleted file mode 100644 index e132e28b..00000000 --- a/fix-fallocate-and-delalloc-i_size-interaction +++ /dev/null @@ -1,58 +0,0 @@ -ext4: fix interaction between i_size, fallocate, and delalloc after a crash - -If there are pending writes subject to delayed allocation, then i_size -will show size after the writes have completed, while i_disksize -contains the value of i_size on the disk (since the writes have not -been persisted to disk). - -If fallocate(2) is called with the FALLOC_FL_KEEP_SIZE flag, either -with or without the FALLOC_FL_ZERO_RANGE flag set, and the new size -after the fallocate(2) is between i_size and i_disksize, then after a -crash, if a journal commit has resulted in the changes made by the -fallocate() call to be persisted after a crash, but the delayed -allocation write has not resolved itself, i_size would not be updated, -and this would cause the following e2fsck complaint: - -Inode 12, end of extent exceeds allowed value - (logical block 33, physical block 33441, len 7) - -This can only take place on a sparse file, where the fallocate(2) call -is allocating blocks in a range which is before a pending delayed -allocation write which is extending i_size. Since this situation is -quite rare, and the window in which the crash must take place is -typically < 30 seconds, in practice this condition will rarely happen. - -Nevertheless, it can be triggered in testing, and in particular by -xfstests generic/456. - -Signed-off-by: Theodore Ts'o -Reported-by: Amir Goldstein -Cc: stable@vger.kernel.org ---- - fs/ext4/extents.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index 97f0fd06728d..07bca11749d4 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -4794,7 +4794,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, - } - - if (!(mode & FALLOC_FL_KEEP_SIZE) && -- offset + len > i_size_read(inode)) { -+ (offset + len > i_size_read(inode) || -+ offset + len > EXT4_I(inode)->i_disksize)) { - new_size = offset + len; - ret = inode_newsize_ok(inode, new_size); - if (ret) -@@ -4965,7 +4966,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) - } - - if (!(mode & FALLOC_FL_KEEP_SIZE) && -- offset + len > i_size_read(inode)) { -+ (offset + len > i_size_read(inode) || -+ offset + len > EXT4_I(inode)->i_disksize)) { - new_size = offset + len; - ret = inode_newsize_ok(inode, new_size); - if (ret) diff --git a/fix-little-inconsistencies b/fix-little-inconsistencies deleted file mode 100644 index c68fec3a..00000000 --- a/fix-little-inconsistencies +++ /dev/null @@ -1,46 +0,0 @@ -Documentation: fix little inconsistencies - -From: Pavel Machek - -Fix little inconsistencies in Documentation: make case and spacing -match surrounding text. - -Signed-off-by: Pavel Machek -Signed-off-by: Theodore Ts'o -Reviewed-by: Darrick J. Wong ---- -diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt -index 5a8f7f4..75236c0 100644 ---- a/Documentation/filesystems/ext4.txt -+++ b/Documentation/filesystems/ext4.txt -@@ -94,10 +94,10 @@ Note: More extensive information for getting started with ext4 can be - * ability to pack bitmaps and inode tables into larger virtual groups via the - flex_bg feature - * large file support --* Inode allocation using large virtual block groups via flex_bg -+* inode allocation using large virtual block groups via flex_bg - * delayed allocation - * large block (up to pagesize) support --* efficient new ordered mode in JBD2 and ext4(avoid using buffer head to force -+* efficient new ordered mode in JBD2 and ext4 (avoid using buffer head to force - the ordering) - - [1] Filesystems with a block size of 1k may see a limit imposed by the -@@ -105,7 +105,7 @@ directory hash tree having a maximum depth of two. - - 2.2 Candidate features for future inclusion - --* Online defrag (patches available but not well tested) -+* online defrag (patches available but not well tested) - * reduced mke2fs time via lazy itable initialization in conjunction with - the uninit_bg feature (capability to do this is available in e2fsprogs - but a kernel thread to do lazy zeroing of unused inode table blocks -@@ -602,7 +602,7 @@ Table of Ext4 specific ioctls - bitmaps and inode table, the userspace tool thus - just passes the new number of blocks. - --EXT4_IOC_SWAP_BOOT Swap i_blocks and associated attributes -+ EXT4_IOC_SWAP_BOOT Swap i_blocks and associated attributes - (like i_blocks, i_size, i_flags, ...) from - the specified inode with inode - EXT4_BOOT_LOADER_INO (#5). This is typically diff --git a/improve-smp-scalability-for-inode-generation b/improve-smp-scalability-for-inode-generation deleted file mode 100644 index c2ebb49e..00000000 --- a/improve-smp-scalability-for-inode-generation +++ /dev/null @@ -1,82 +0,0 @@ -ext4: improve smp scalability for inode generation - -->s_next_generation is protected by s_next_gen_lock but its usage -pattern is very primitive. We don't actually need sequentially -increasing new generation numbers, so let's use prandom_u32() instead. - -Reported-by: Dmitry Monakhov -Signed-off-by: Theodore Ts'o -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index 53ce95b52fd8..5e6d7b6f50c7 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -1355,8 +1355,6 @@ struct ext4_sb_info { - int s_first_ino; - unsigned int s_inode_readahead_blks; - unsigned int s_inode_goal; -- spinlock_t s_next_gen_lock; -- u32 s_next_generation; - u32 s_hash_seed[4]; - int s_def_hash_version; - int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ -diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c -index ee823022aa34..da79eb5dba40 100644 ---- a/fs/ext4/ialloc.c -+++ b/fs/ext4/ialloc.c -@@ -1138,9 +1138,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, - inode->i_ino); - goto out; - } -- spin_lock(&sbi->s_next_gen_lock); -- inode->i_generation = sbi->s_next_generation++; -- spin_unlock(&sbi->s_next_gen_lock); -+ inode->i_generation = prandom_u32(); - - /* Precompute checksum seed for inode metadata */ - if (ext4_has_metadata_csum(sb)) { -diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c -index 144bbda2b808..23a4766f6678 100644 ---- a/fs/ext4/ioctl.c -+++ b/fs/ext4/ioctl.c -@@ -14,6 +14,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -98,7 +99,6 @@ static long swap_inode_boot_loader(struct super_block *sb, - int err; - struct inode *inode_bl; - struct ext4_inode_info *ei_bl; -- struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) - return -EINVAL; -@@ -157,10 +157,8 @@ static long swap_inode_boot_loader(struct super_block *sb, - - inode->i_ctime = inode_bl->i_ctime = current_time(inode); - -- spin_lock(&sbi->s_next_gen_lock); -- inode->i_generation = sbi->s_next_generation++; -- inode_bl->i_generation = sbi->s_next_generation++; -- spin_unlock(&sbi->s_next_gen_lock); -+ inode->i_generation = prandom_u32(); -+ inode_bl->i_generation = prandom_u32(); - - ext4_discard_preallocations(inode); - -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index 3a278faf5868..9f2e3eb5131f 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -3982,8 +3982,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) - } - - sbi->s_gdb_count = db_count; -- get_random_bytes(&sbi->s_next_generation, sizeof(u32)); -- spin_lock_init(&sbi->s_next_gen_lock); - - timer_setup(&sbi->s_err_report, print_daily_error_info, 0); - diff --git a/jbd2-convert-timers-to-use-timer_setup b/jbd2-convert-timers-to-use-timer_setup deleted file mode 100644 index 0df233bd..00000000 --- a/jbd2-convert-timers-to-use-timer_setup +++ /dev/null @@ -1,58 +0,0 @@ -jbd2: convert timers to use timer_setup() - -From: Kees Cook - -In preparation for unconditionally passing the struct timer_list pointer to -all timer callbacks, switch to using the new timer_setup() and from_timer() -to pass the timer pointer explicitly. - -Signed-off-by: Kees Cook -Signed-off-by: Theodore Ts'o -Reviewed-by: Jan Kara -Cc: linux-ext4@vger.kernel.org -Cc: Thomas Gleixner ---- -This requires commit 686fef928bba ("timer: Prepare to change timer -callback argument type") in v4.14-rc3, but should be otherwise -stand-alone. ---- - fs/jbd2/journal.c | 9 ++++----- - 1 file changed, 4 insertions(+), 5 deletions(-) - -diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c -index 7d5ef3bf3f3e..d2a85c9720e9 100644 ---- a/fs/jbd2/journal.c -+++ b/fs/jbd2/journal.c -@@ -165,11 +165,11 @@ static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) - * Helper function used to manage commit timeouts - */ - --static void commit_timeout(unsigned long __data) -+static void commit_timeout(struct timer_list *t) - { -- struct task_struct * p = (struct task_struct *) __data; -+ journal_t *journal = from_timer(journal, t, j_commit_timer); - -- wake_up_process(p); -+ wake_up_process(journal->j_task); - } - - /* -@@ -197,8 +197,7 @@ static int kjournald2(void *arg) - * Set up an interval timer which can be used to trigger a commit wakeup - * after the commit interval expires - */ -- setup_timer(&journal->j_commit_timer, commit_timeout, -- (unsigned long)current); -+ timer_setup(&journal->j_commit_timer, commit_timeout, 0); - - set_freezable(); - --- -2.7.4 - - --- -Kees Cook -Pixel Security - diff --git a/journal-superblock-changes b/journal-superblock-changes index 131f1bb7..3660e9d4 100644 --- a/journal-superblock-changes +++ b/journal-superblock-changes @@ -14,10 +14,10 @@ Signed-off-by: Theodore Ts'o 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index 2e03a0a88d92..7d3343cb36a0 100644 +index 7c46693a14d7..6783af7ec115 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c -@@ -2064,9 +2064,10 @@ int ext4_seq_options_show(struct seq_file *seq, void *offset) +@@ -2108,9 +2108,10 @@ int ext4_seq_options_show(struct seq_file *seq, void *offset) } static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, @@ -29,16 +29,16 @@ index 2e03a0a88d92..7d3343cb36a0 100644 int res = 0; if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { -@@ -2074,7 +2075,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, +@@ -2118,7 +2119,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, "forcing read-only mode"); - res = MS_RDONLY; + res = SB_RDONLY; } - if (read_only) + if (read_only || res) goto done; if (!(sbi->s_mount_state & EXT4_VALID_FS)) ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " -@@ -2095,6 +2096,15 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, +@@ -2139,6 +2140,15 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, ext4_msg(sb, KERN_WARNING, "warning: checktime reached, " "running e2fsck is recommended"); @@ -54,7 +54,7 @@ index 2e03a0a88d92..7d3343cb36a0 100644 if (!sbi->s_journal) es->s_state &= cpu_to_le16(~EXT4_VALID_FS); if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) -@@ -2104,7 +2114,17 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, +@@ -2148,7 +2158,17 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, ext4_update_dynamic_rev(sb); if (sbi->s_journal) ext4_set_feature_journal_needs_recovery(sb); @@ -73,20 +73,20 @@ index 2e03a0a88d92..7d3343cb36a0 100644 ext4_commit_super(sb, 1); done: if (test_opt(sb, DEBUG)) -@@ -4032,8 +4052,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) +@@ -4116,8 +4136,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); - sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; - no_journal: - sbi->s_mb_cache = ext4_xattr_create_cache(); - if (!sbi->s_mb_cache) { -@@ -4048,12 +4066,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + if (!test_opt(sb, NO_MBCACHE)) { + sbi->s_ea_block_cache = ext4_xattr_create_cache(); +@@ -4144,12 +4162,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount_wq; } -- if (DUMMY_ENCRYPTION_ENABLED(sbi) && !(sb->s_flags & MS_RDONLY) && +- if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) && - !ext4_has_feature_encrypt(sb)) { - ext4_set_feature_encrypt(sb); - ext4_commit_super(sb, 1); @@ -95,19 +95,19 @@ index 2e03a0a88d92..7d3343cb36a0 100644 /* * Get the # of file system overhead blocks from the * superblock if present. -@@ -4102,7 +4114,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) +@@ -4198,7 +4210,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount4; } -- if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) -+ err = ext4_setup_super(sb, es, journal_devnum, sb->s_flags & MS_RDONLY); +- if (ext4_setup_super(sb, es, sb_rdonly(sb))) ++ err = ext4_setup_super(sb, es, journal_devnum, sb_rdonly(sb)); + if (err < 0) + goto failed_mount4a; + if (err) - sb->s_flags |= MS_RDONLY; + sb->s_flags |= SB_RDONLY; /* determine the minimum size of new large inodes, if present */ -@@ -4197,6 +4212,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) +@@ -4293,6 +4308,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } #endif /* CONFIG_QUOTA */ @@ -117,7 +117,7 @@ index 2e03a0a88d92..7d3343cb36a0 100644 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; -@@ -4561,15 +4579,6 @@ static int ext4_load_journal(struct super_block *sb, +@@ -4663,15 +4681,6 @@ static int ext4_load_journal(struct super_block *sb, EXT4_SB(sb)->s_journal = journal; ext4_clear_journal_err(sb, es); @@ -133,13 +133,13 @@ index 2e03a0a88d92..7d3343cb36a0 100644 return 0; } -@@ -5032,8 +5041,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) +@@ -5139,8 +5148,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal) ext4_clear_journal_err(sb, es); sbi->s_mount_state = le16_to_cpu(es->s_state); - if (!ext4_setup_super(sb, es, 0)) -- sb->s_flags &= ~MS_RDONLY; -+ sb->s_flags &= ~MS_RDONLY; +- sb->s_flags &= ~SB_RDONLY; ++ sb->s_flags &= ~SB_RDONLY; + err = ext4_setup_super(sb, es, 0, 0); + if (err) + goto restore_opts; diff --git a/mention-noload-when-recovering-on-ro-device b/mention-noload-when-recovering-on-ro-device deleted file mode 100644 index 21427565..00000000 --- a/mention-noload-when-recovering-on-ro-device +++ /dev/null @@ -1,46 +0,0 @@ -ext4: mention noload when recovering on read-only device - -From: Simon Ruderich - -Help the user to find the appropriate mount option to continue mounting -the file system on a read-only device if the journal requires recovery. - -Signed-off-by: Simon Ruderich -Signed-off-by: Theodore Ts'o ---- -Hello, - -I tried to mount an ext4 which required recovery from a read-only -device and stumbled over this error. I freaked out for a second -(trying to restore from a backup) and it took me a (short) while -to figure out what to do and I thought it would be useful to give -the user a hand by pointing to the required mount option. - -Regards -Simon - - fs/ext4/super.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index d61a70e2193a..f497b79da5cc 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -4589,7 +4589,8 @@ static int ext4_load_journal(struct super_block *sb, - "required on readonly filesystem"); - if (really_read_only) { - ext4_msg(sb, KERN_ERR, "write access " -- "unavailable, cannot proceed"); -+ "unavailable, cannot proceed " -+ "(try mounting with noload)"); - return -EROFS; - } - ext4_msg(sb, KERN_INFO, "write access will " --- -2.14.1 - --- -+ privacy is necessary -+ using gnupg http://gnupg.org -+ public key id: 0x92FEFDB7E44C32F9 - diff --git a/prevent-data-corruption-with-inline-data-and-dax b/prevent-data-corruption-with-inline-data-and-dax deleted file mode 100644 index 470ecd30..00000000 --- a/prevent-data-corruption-with-inline-data-and-dax +++ /dev/null @@ -1,82 +0,0 @@ -ext4: prevent data corruption with inline data + DAX - -From: Ross Zwisler - -If an inode has inline data it is currently prevented from using DAX by a -check in ext4_set_inode_flags(). When the inode grows inline data via -ext4_create_inline_data() or removes its inline data via -ext4_destroy_inline_data_nolock(), the value of S_DAX can change. - -Currently these changes are unsafe because we don't hold off page faults -and I/O, write back dirty radix tree entries and invalidate all mappings. -There are also issues with mm-level races when changing the value of S_DAX, -as well as issues with the VM_MIXEDMAP flag: - -https://www.spinics.net/lists/linux-xfs/msg09859.html - -The unsafe transition of S_DAX can reliably cause data corruption, as shown -by the following fstest: - -https://patchwork.kernel.org/patch/9948381/ - -Fix this issue by preventing the DAX mount option from being used on -filesystems that were created to support inline data. Inline data is an -option given to mkfs.ext4. - -Signed-off-by: Ross Zwisler -Signed-off-by: Theodore Ts'o -Reviewed-by: Jan Kara -CC: stable@vger.kernel.org ---- - fs/ext4/inline.c | 10 ---------- - fs/ext4/super.c | 5 +++++ - 2 files changed, 5 insertions(+), 10 deletions(-) - -diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c -index 28c5c3a..fd95019 100644 ---- a/fs/ext4/inline.c -+++ b/fs/ext4/inline.c -@@ -302,11 +302,6 @@ static int ext4_create_inline_data(handle_t *handle, - EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE; - ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); - ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); -- /* -- * Propagate changes to inode->i_flags as well - e.g. S_DAX may -- * get cleared -- */ -- ext4_set_inode_flags(inode); - get_bh(is.iloc.bh); - error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); - -@@ -451,11 +446,6 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle, - } - } - ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA); -- /* -- * Propagate changes to inode->i_flags as well - e.g. S_DAX may -- * get set. -- */ -- ext4_set_inode_flags(inode); - - get_bh(is.iloc.bh); - error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index c9e7be5..4251e50 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -3707,6 +3707,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) - } - - if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { -+ if (ext4_has_feature_inline_data(sb)) { -+ ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem" -+ " that may contain inline data"); -+ goto failed_mount; -+ } - err = bdev_dax_supported(sb, blocksize); - if (err) - goto failed_mount; --- -2.9.5 - - diff --git a/prevent-data-corruption-with-journaling-and-dax b/prevent-data-corruption-with-journaling-and-dax deleted file mode 100644 index cf38085f..00000000 --- a/prevent-data-corruption-with-journaling-and-dax +++ /dev/null @@ -1,75 +0,0 @@ -ext4: prevent data corruption with journaling + DAX - -From: Ross Zwisler - -The current code has the potential for data corruption when changing an -inode's journaling mode, as that can result in a subsequent unsafe change -in S_DAX. - -I've captured an instance of this data corruption in the following fstest: - -https://patchwork.kernel.org/patch/9948377/ - -Prevent this data corruption from happening by disallowing changes to the -journaling mode if the '-o dax' mount option was used. This means that for -a given filesystem we could have a mix of inodes using either DAX or -data journaling, but whatever state the inodes are in will be held for the -duration of the mount. - -Signed-off-by: Ross Zwisler -Signed-off-by: Theodore Ts'o -Reviewed-by: Jan Kara -Cc: stable@vger.kernel.org ---- - fs/ext4/inode.c | 5 ----- - fs/ext4/ioctl.c | 16 +++++++++++++--- - 2 files changed, 13 insertions(+), 8 deletions(-) - -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index e963508..3207333 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -5971,11 +5971,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) - ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); - } - ext4_set_aops(inode); -- /* -- * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated. -- * E.g. S_DAX may get cleared / set. -- */ -- ext4_set_inode_flags(inode); - - jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); -diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c -index afb66d4..b0b754b 100644 ---- a/fs/ext4/ioctl.c -+++ b/fs/ext4/ioctl.c -@@ -290,10 +290,20 @@ static int ext4_ioctl_setflags(struct inode *inode, - if (err) - goto flags_out; - -- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) -+ if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { -+ /* -+ * Changes to the journaling mode can cause unsafe changes to -+ * S_DAX if we are using the DAX mount option. -+ */ -+ if (test_opt(inode->i_sb, DAX)) { -+ err = -EBUSY; -+ goto flags_out; -+ } -+ - err = ext4_change_inode_journal_flag(inode, jflag); -- if (err) -- goto flags_out; -+ if (err) -+ goto flags_out; -+ } - if (migrate) { - if (flags & EXT4_EXTENTS_FL) - err = ext4_ext_migrate(inode); --- -2.9.5 - - diff --git a/remove-duplicate-extended-attributes-defs b/remove-duplicate-extended-attributes-defs deleted file mode 100644 index 5fdb0460..00000000 --- a/remove-duplicate-extended-attributes-defs +++ /dev/null @@ -1,81 +0,0 @@ -ext4: remove duplicate extended attributes defs - -From: Ross Zwisler - -The following commit: - -commit 9b7365fc1c82 ("ext4: add FS_IOC_FSSETXATTR/FS_IOC_FSGETXATTR -interface support") - -added several defines related to extended attributes to ext4.h. They were -added within an #ifndef FS_IOC_FSGETXATTR block with the comment: - -/* Until the uapi changes get merged for project quota... */ - -Those uapi changes were merged by this commit: - -commit 334e580a6f97 ("fs: XFS_IOC_FS[SG]SETXATTR to FS_IOC_FS[SG]ETXATTR -promotion") - -so all the definitions needed by ext4 are available in -include/uapi/linux/fs.h. Remove the duplicates from ext4.h. - -Signed-off-by: Ross Zwisler -Signed-off-by: Theodore Ts'o -Reviewed-by: Jan Kara ---- - fs/ext4/ext4.h | 37 ------------------------------------- - 1 file changed, 37 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index 84b9da1..83a857f 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -644,43 +644,6 @@ enum { - #define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT - #define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY - --#ifndef FS_IOC_FSGETXATTR --/* Until the uapi changes get merged for project quota... */ -- --#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr) --#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr) -- --/* -- * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR. -- */ --struct fsxattr { -- __u32 fsx_xflags; /* xflags field value (get/set) */ -- __u32 fsx_extsize; /* extsize field value (get/set)*/ -- __u32 fsx_nextents; /* nextents field value (get) */ -- __u32 fsx_projid; /* project identifier (get/set) */ -- unsigned char fsx_pad[12]; --}; -- --/* -- * Flags for the fsx_xflags field -- */ --#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */ --#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */ --#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */ --#define FS_XFLAG_APPEND 0x00000010 /* all writes append */ --#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ --#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */ --#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */ --#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ --#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ --#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ --#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ --#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ --#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ --#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ --#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ --#endif /* !defined(FS_IOC_FSGETXATTR) */ -- - #define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR - #define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR - --- -2.9.5 - - diff --git a/retry-allocations-conservatively b/retry-allocations-conservatively deleted file mode 100644 index 33dc319f..00000000 --- a/retry-allocations-conservatively +++ /dev/null @@ -1,51 +0,0 @@ -ext4: retry allocations conservatively - -Now that we no longer try to reserve metadata blocks for delayed -allocations (which tended to overestimate the required number of -blocks significantly), we really don't need retry allocations when the -disk is very full as aggressively any more. - -The only time when it makes sense to retry an allocation is if we have -freshly deleted blocks that will only become available after a -transaction commit. And if we lose that race, it's not worth it to -try more than once. - -Signed-off-by: Theodore Ts'o ---- - fs/ext4/balloc.c | 15 +++++++-------- - 1 file changed, 7 insertions(+), 8 deletions(-) - -diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c -index e04ec868e37e..a3798b25a8dc 100644 ---- a/fs/ext4/balloc.c -+++ b/fs/ext4/balloc.c -@@ -600,22 +600,21 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi, - * ext4_should_retry_alloc() is called when ENOSPC is returned, and if - * it is profitable to retry the operation, this function will wait - * for the current or committing transaction to complete, and then -- * return TRUE. -- * -- * if the total number of retries exceed three times, return FALSE. -+ * return TRUE. We will only retry once. - */ - int ext4_should_retry_alloc(struct super_block *sb, int *retries) - { - if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) || -- (*retries)++ > 3 || -+ (*retries)++ > 1 || - !EXT4_SB(sb)->s_journal) - return 0; - -- jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); -- - smp_mb(); -- if (EXT4_SB(sb)->s_mb_free_pending) -- jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); -+ if (EXT4_SB(sb)->s_mb_free_pending == 0) -+ return 0; -+ -+ jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); -+ jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); - return 1; - } - diff --git a/series b/series index 24546c4a..8b959c97 100644 --- a/series +++ b/series @@ -1,25 +1,4 @@ -# v4.14-rc3 - -switch-from-blkno-to-disk-offset -add-IOMAP_F_DATA_INLINE-flag -add-iomap-support-for-inline-data -switch-to-iomap-for-SEEK_HOLE_DATA -retry-allocations-conservatively -fix-fallocate-and-delalloc-i_size-interaction - -prevent-data-corruption-with-inline-data-and-dax -prevent-data-corruption-with-journaling-and-dax -add-sanity-check-for-encryption-and-dax -add-ext4_should_use_dax -remove-duplicate-extended-attributes-defs - -jbd2-convert-timers-to-use-timer_setup -ext4-convert-timers-to-use-timer-setup -fix-little-inconsistencies -mention-noload-when-recovering-on-ro-device - -add-support-for-online-resizing-with-bigalloc -improve-smp-scalability-for-inode-generation +# v4.15-rc2 #################################################### # unstable patches diff --git a/switch-from-blkno-to-disk-offset b/switch-from-blkno-to-disk-offset deleted file mode 100644 index 97325941..00000000 --- a/switch-from-blkno-to-disk-offset +++ /dev/null @@ -1,216 +0,0 @@ -iomap: Switch from blkno to disk offset - -From: Andreas Gruenbacher - -Replace iomap->blkno, the sector number, with iomap->addr, the disk -offset in bytes. For invalid disk offsets, use the special value -IOMAP_NULL_ADDR instead of IOMAP_NULL_BLOCK. - -This allows to use iomap for mappings which are not block aligned, such -as inline data on ext4. - -Signed-off-by: Andreas Gruenbacher -Signed-off-by: Theodore Ts'o -Reviewed-by: Darrick J. Wong # iomap, xfs -Reviewed-by: Jan Kara ---- - fs/buffer.c | 4 ++-- - fs/dax.c | 2 +- - fs/ext2/inode.c | 4 ++-- - fs/ext4/inode.c | 4 ++-- - fs/iomap.c | 11 +++++------ - fs/nfsd/blocklayout.c | 4 ++-- - fs/xfs/xfs_iomap.c | 6 +++--- - include/linux/iomap.h | 10 +++++----- - 8 files changed, 22 insertions(+), 23 deletions(-) - -diff --git a/fs/buffer.c b/fs/buffer.c -index 170df856bdb9..bd4d0923cdce 100644 ---- a/fs/buffer.c -+++ b/fs/buffer.c -@@ -1978,8 +1978,8 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, - case IOMAP_MAPPED: - if (offset >= i_size_read(inode)) - set_buffer_new(bh); -- bh->b_blocknr = (iomap->blkno >> (inode->i_blkbits - 9)) + -- ((offset - iomap->offset) >> inode->i_blkbits); -+ bh->b_blocknr = (iomap->addr + offset - iomap->offset) >> -+ inode->i_blkbits; - set_buffer_mapped(bh); - break; - } -diff --git a/fs/dax.c b/fs/dax.c -index 6afcacb3a87b..feccb4ec45d2 100644 ---- a/fs/dax.c -+++ b/fs/dax.c -@@ -938,7 +938,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range); - - static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) - { -- return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9); -+ return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9; - } - - static loff_t -diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c -index 4dca6f348714..1b8fc73de4a1 100644 ---- a/fs/ext2/inode.c -+++ b/fs/ext2/inode.c -@@ -820,11 +820,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - - if (ret == 0) { - iomap->type = IOMAP_HOLE; -- iomap->blkno = IOMAP_NULL_BLOCK; -+ iomap->addr = IOMAP_NULL_ADDR; - iomap->length = 1 << blkbits; - } else { - iomap->type = IOMAP_MAPPED; -- iomap->blkno = (sector_t)bno << (blkbits - 9); -+ iomap->addr = (u64)bno << blkbits; - iomap->length = (u64)ret << blkbits; - iomap->flags |= IOMAP_F_MERGED; - } -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 31db875bc7a1..d9e633c12aae 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -3472,7 +3472,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - - if (ret == 0) { - iomap->type = IOMAP_HOLE; -- iomap->blkno = IOMAP_NULL_BLOCK; -+ iomap->addr = IOMAP_NULL_ADDR; - iomap->length = (u64)map.m_len << blkbits; - } else { - if (map.m_flags & EXT4_MAP_MAPPED) { -@@ -3483,7 +3483,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - WARN_ON_ONCE(1); - return -EIO; - } -- iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9); -+ iomap->addr = (u64)map.m_pblk << blkbits; - iomap->length = (u64)map.m_len << blkbits; - } - -diff --git a/fs/iomap.c b/fs/iomap.c -index 269b24a01f32..622c731c57a0 100644 ---- a/fs/iomap.c -+++ b/fs/iomap.c -@@ -350,8 +350,8 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, - static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes, - struct iomap *iomap) - { -- sector_t sector = iomap->blkno + -- (((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9); -+ sector_t sector = (iomap->addr + -+ (pos & PAGE_MASK) - iomap->offset) >> 9; - - return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, sector, - offset, bytes); -@@ -512,9 +512,8 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi, - flags |= FIEMAP_EXTENT_SHARED; - - return fiemap_fill_next_extent(fi, iomap->offset, -- iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0, -+ iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0, - iomap->length, flags); -- - } - - static loff_t -@@ -807,7 +806,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, - bio = bio_alloc(GFP_KERNEL, 1); - bio_set_dev(bio, iomap->bdev); - bio->bi_iter.bi_sector = -- iomap->blkno + ((pos - iomap->offset) >> 9); -+ (iomap->addr + pos - iomap->offset) >> 9; - bio->bi_private = dio; - bio->bi_end_io = iomap_dio_bio_end_io; - -@@ -886,7 +885,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, - bio = bio_alloc(GFP_KERNEL, nr_pages); - bio_set_dev(bio, iomap->bdev); - bio->bi_iter.bi_sector = -- iomap->blkno + ((pos - iomap->offset) >> 9); -+ (iomap->addr + pos - iomap->offset) >> 9; - bio->bi_write_hint = dio->iocb->ki_hint; - bio->bi_private = dio; - bio->bi_end_io = iomap_dio_bio_end_io; -diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c -index c862c2489df0..2d1d37b27dc7 100644 ---- a/fs/nfsd/blocklayout.c -+++ b/fs/nfsd/blocklayout.c -@@ -65,7 +65,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, - bex->es = PNFS_BLOCK_READ_DATA; - else - bex->es = PNFS_BLOCK_READWRITE_DATA; -- bex->soff = (iomap.blkno << 9); -+ bex->soff = iomap.addr; - break; - case IOMAP_UNWRITTEN: - if (seg->iomode & IOMODE_RW) { -@@ -78,7 +78,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, - } - - bex->es = PNFS_BLOCK_INVALID_DATA; -- bex->soff = (iomap.blkno << 9); -+ bex->soff = iomap.addr; - break; - } - /*FALLTHRU*/ -diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c -index a1909bc064e9..ac22a5c00079 100644 ---- a/fs/xfs/xfs_iomap.c -+++ b/fs/xfs/xfs_iomap.c -@@ -54,13 +54,13 @@ xfs_bmbt_to_iomap( - struct xfs_mount *mp = ip->i_mount; - - if (imap->br_startblock == HOLESTARTBLOCK) { -- iomap->blkno = IOMAP_NULL_BLOCK; -+ iomap->addr = IOMAP_NULL_ADDR; - iomap->type = IOMAP_HOLE; - } else if (imap->br_startblock == DELAYSTARTBLOCK) { -- iomap->blkno = IOMAP_NULL_BLOCK; -+ iomap->addr = IOMAP_NULL_ADDR; - iomap->type = IOMAP_DELALLOC; - } else { -- iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock); -+ iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock)); - if (imap->br_state == XFS_EXT_UNWRITTEN) - iomap->type = IOMAP_UNWRITTEN; - else -diff --git a/include/linux/iomap.h b/include/linux/iomap.h -index f64dc6ce5161..7b8a615fa021 100644 ---- a/include/linux/iomap.h -+++ b/include/linux/iomap.h -@@ -15,8 +15,8 @@ struct vm_fault; - */ - #define IOMAP_HOLE 0x01 /* no blocks allocated, need allocation */ - #define IOMAP_DELALLOC 0x02 /* delayed allocation blocks */ --#define IOMAP_MAPPED 0x03 /* blocks allocated @blkno */ --#define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */ -+#define IOMAP_MAPPED 0x03 /* blocks allocated at @addr */ -+#define IOMAP_UNWRITTEN 0x04 /* blocks allocated at @addr in unwritten state */ - - /* - * Flags for all iomap mappings: -@@ -30,12 +30,12 @@ struct vm_fault; - #define IOMAP_F_SHARED 0x20 /* block shared with another file */ - - /* -- * Magic value for blkno: -+ * Magic value for addr: - */ --#define IOMAP_NULL_BLOCK -1LL /* blkno is not valid */ -+#define IOMAP_NULL_ADDR -1ULL /* addr is not valid */ - - struct iomap { -- sector_t blkno; /* 1st sector of mapping, 512b units */ -+ u64 addr; /* disk offset of mapping, bytes */ - loff_t offset; /* file offset of mapping, bytes */ - u64 length; /* length of mapping, bytes */ - u16 type; /* type of mapping */ --- -2.13.3 - - diff --git a/switch-to-iomap-for-SEEK_HOLE_DATA b/switch-to-iomap-for-SEEK_HOLE_DATA deleted file mode 100644 index 0e64b16a..00000000 --- a/switch-to-iomap-for-SEEK_HOLE_DATA +++ /dev/null @@ -1,521 +0,0 @@ -ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA - -From: Christoph Hellwig - -Switch to the iomap_seek_hole and iomap_seek_data helpers for -implementing lseek SEEK_HOLE / SEEK_DATA, and remove all the code that -isn't needed any more. - -Note that with this patch ext4 will now always depend on the iomap code -instead of only when CONFIG_DAX is enabled, and it requires adding a -call into the extent status tree for iomap_begin as well to properly -deal with delalloc extents. - -Signed-off-by: Christoph Hellwig -Signed-off-by: Andreas Gruenbacher -Signed-off-by: Theodore Ts'o -Reviewed-by: Jan Kara -[More fixes and cleanups by Andreas] ---- - fs/ext4/Kconfig | 1 + - fs/ext4/ext4.h | 3 - - fs/ext4/file.c | 263 +++----------------------------------------------------- - fs/ext4/inode.c | 109 ++++++++--------------- - 4 files changed, 49 insertions(+), 327 deletions(-) - -diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig -index e38039fd96ff..73b850f5659c 100644 ---- a/fs/ext4/Kconfig -+++ b/fs/ext4/Kconfig -@@ -37,6 +37,7 @@ config EXT4_FS - select CRC16 - select CRYPTO - select CRYPTO_CRC32C -+ select FS_IOMAP - help - This is the next generation of the ext3 filesystem. - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index ae3e4a25821a..6fd1fe7456eb 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -2515,9 +2515,6 @@ extern void ext4_da_update_reserve_space(struct inode *inode, - int used, int quota_claim); - extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, - ext4_fsblk_t pblk, ext4_lblk_t len); --extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, -- unsigned int map_len, -- struct extent_status *result); - - /* indirect.c */ - extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, -diff --git a/fs/ext4/file.c b/fs/ext4/file.c -index 57dcaea762c3..3958cd1343a9 100644 ---- a/fs/ext4/file.c -+++ b/fs/ext4/file.c -@@ -20,6 +20,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -438,248 +439,6 @@ static int ext4_file_open(struct inode * inode, struct file * filp) - } - - /* -- * Here we use ext4_map_blocks() to get a block mapping for a extent-based -- * file rather than ext4_ext_walk_space() because we can introduce -- * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same -- * function. When extent status tree has been fully implemented, it will -- * track all extent status for a file and we can directly use it to -- * retrieve the offset for SEEK_DATA/SEEK_HOLE. -- */ -- --/* -- * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to -- * lookup page cache to check whether or not there has some data between -- * [startoff, endoff] because, if this range contains an unwritten extent, -- * we determine this extent as a data or a hole according to whether the -- * page cache has data or not. -- */ --static int ext4_find_unwritten_pgoff(struct inode *inode, -- int whence, -- ext4_lblk_t end_blk, -- loff_t *offset) --{ -- struct pagevec pvec; -- unsigned int blkbits; -- pgoff_t index; -- pgoff_t end; -- loff_t endoff; -- loff_t startoff; -- loff_t lastoff; -- int found = 0; -- -- blkbits = inode->i_sb->s_blocksize_bits; -- startoff = *offset; -- lastoff = startoff; -- endoff = (loff_t)end_blk << blkbits; -- -- index = startoff >> PAGE_SHIFT; -- end = (endoff - 1) >> PAGE_SHIFT; -- -- pagevec_init(&pvec, 0); -- do { -- int i; -- unsigned long nr_pages; -- -- nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, -- &index, end); -- if (nr_pages == 0) -- break; -- -- for (i = 0; i < nr_pages; i++) { -- struct page *page = pvec.pages[i]; -- struct buffer_head *bh, *head; -- -- /* -- * If current offset is smaller than the page offset, -- * there is a hole at this offset. -- */ -- if (whence == SEEK_HOLE && lastoff < endoff && -- lastoff < page_offset(pvec.pages[i])) { -- found = 1; -- *offset = lastoff; -- goto out; -- } -- -- lock_page(page); -- -- if (unlikely(page->mapping != inode->i_mapping)) { -- unlock_page(page); -- continue; -- } -- -- if (!page_has_buffers(page)) { -- unlock_page(page); -- continue; -- } -- -- if (page_has_buffers(page)) { -- lastoff = page_offset(page); -- bh = head = page_buffers(page); -- do { -- if (lastoff + bh->b_size <= startoff) -- goto next; -- if (buffer_uptodate(bh) || -- buffer_unwritten(bh)) { -- if (whence == SEEK_DATA) -- found = 1; -- } else { -- if (whence == SEEK_HOLE) -- found = 1; -- } -- if (found) { -- *offset = max_t(loff_t, -- startoff, lastoff); -- unlock_page(page); -- goto out; -- } --next: -- lastoff += bh->b_size; -- bh = bh->b_this_page; -- } while (bh != head); -- } -- -- lastoff = page_offset(page) + PAGE_SIZE; -- unlock_page(page); -- } -- -- pagevec_release(&pvec); -- } while (index <= end); -- -- /* There are no pages upto endoff - that would be a hole in there. */ -- if (whence == SEEK_HOLE && lastoff < endoff) { -- found = 1; -- *offset = lastoff; -- } --out: -- pagevec_release(&pvec); -- return found; --} -- --/* -- * ext4_seek_data() retrieves the offset for SEEK_DATA. -- */ --static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) --{ -- struct inode *inode = file->f_mapping->host; -- struct extent_status es; -- ext4_lblk_t start, last, end; -- loff_t dataoff, isize; -- int blkbits; -- int ret; -- -- inode_lock(inode); -- -- isize = i_size_read(inode); -- if (offset < 0 || offset >= isize) { -- inode_unlock(inode); -- return -ENXIO; -- } -- -- blkbits = inode->i_sb->s_blocksize_bits; -- start = offset >> blkbits; -- last = start; -- end = isize >> blkbits; -- dataoff = offset; -- -- do { -- ret = ext4_get_next_extent(inode, last, end - last + 1, &es); -- if (ret <= 0) { -- /* No extent found -> no data */ -- if (ret == 0) -- ret = -ENXIO; -- inode_unlock(inode); -- return ret; -- } -- -- last = es.es_lblk; -- if (last != start) -- dataoff = (loff_t)last << blkbits; -- if (!ext4_es_is_unwritten(&es)) -- break; -- -- /* -- * If there is a unwritten extent at this offset, -- * it will be as a data or a hole according to page -- * cache that has data or not. -- */ -- if (ext4_find_unwritten_pgoff(inode, SEEK_DATA, -- es.es_lblk + es.es_len, &dataoff)) -- break; -- last += es.es_len; -- dataoff = (loff_t)last << blkbits; -- cond_resched(); -- } while (last <= end); -- -- inode_unlock(inode); -- -- if (dataoff > isize) -- return -ENXIO; -- -- return vfs_setpos(file, dataoff, maxsize); --} -- --/* -- * ext4_seek_hole() retrieves the offset for SEEK_HOLE. -- */ --static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) --{ -- struct inode *inode = file->f_mapping->host; -- struct extent_status es; -- ext4_lblk_t start, last, end; -- loff_t holeoff, isize; -- int blkbits; -- int ret; -- -- inode_lock(inode); -- -- isize = i_size_read(inode); -- if (offset < 0 || offset >= isize) { -- inode_unlock(inode); -- return -ENXIO; -- } -- -- blkbits = inode->i_sb->s_blocksize_bits; -- start = offset >> blkbits; -- last = start; -- end = isize >> blkbits; -- holeoff = offset; -- -- do { -- ret = ext4_get_next_extent(inode, last, end - last + 1, &es); -- if (ret < 0) { -- inode_unlock(inode); -- return ret; -- } -- /* Found a hole? */ -- if (ret == 0 || es.es_lblk > last) { -- if (last != start) -- holeoff = (loff_t)last << blkbits; -- break; -- } -- /* -- * If there is a unwritten extent at this offset, -- * it will be as a data or a hole according to page -- * cache that has data or not. -- */ -- if (ext4_es_is_unwritten(&es) && -- ext4_find_unwritten_pgoff(inode, SEEK_HOLE, -- last + es.es_len, &holeoff)) -- break; -- -- last += es.es_len; -- holeoff = (loff_t)last << blkbits; -- cond_resched(); -- } while (last <= end); -- -- inode_unlock(inode); -- -- if (holeoff > isize) -- holeoff = isize; -- -- return vfs_setpos(file, holeoff, maxsize); --} -- --/* - * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values - * by calling generic_file_llseek_size() with the appropriate maxbytes - * value for each. -@@ -695,18 +454,24 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence) - maxbytes = inode->i_sb->s_maxbytes; - - switch (whence) { -- case SEEK_SET: -- case SEEK_CUR: -- case SEEK_END: -+ default: - return generic_file_llseek_size(file, offset, whence, - maxbytes, i_size_read(inode)); -- case SEEK_DATA: -- return ext4_seek_data(file, offset, maxbytes); - case SEEK_HOLE: -- return ext4_seek_hole(file, offset, maxbytes); -+ inode_lock_shared(inode); -+ offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops); -+ inode_unlock_shared(inode); -+ break; -+ case SEEK_DATA: -+ inode_lock_shared(inode); -+ offset = iomap_seek_data(inode, offset, &ext4_iomap_ops); -+ inode_unlock_shared(inode); -+ break; - } - -- return -EINVAL; -+ if (offset < 0) -+ return offset; -+ return vfs_setpos(file, offset, maxbytes); - } - - const struct file_operations ext4_file_operations = { -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 7755f41bdfc3..edfe95f81274 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -3393,7 +3393,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait) - return try_to_free_buffers(page); - } - --#ifdef CONFIG_FS_DAX - static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - unsigned flags, struct iomap *iomap) - { -@@ -3402,6 +3401,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - unsigned long first_block = offset >> blkbits; - unsigned long last_block = (offset + length - 1) >> blkbits; - struct ext4_map_blocks map; -+ bool delalloc = false; - int ret; - - -@@ -3422,9 +3422,33 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - map.m_lblk = first_block; - map.m_len = last_block - first_block + 1; - -- if (!(flags & IOMAP_WRITE)) { -+ if (flags & IOMAP_REPORT) { - ret = ext4_map_blocks(NULL, inode, &map, 0); -- } else { -+ if (ret < 0) -+ return ret; -+ -+ if (ret == 0) { -+ ext4_lblk_t end = map.m_lblk + map.m_len - 1; -+ struct extent_status es; -+ -+ ext4_es_find_delayed_extent_range(inode, map.m_lblk, end, &es); -+ -+ if (!es.es_len || es.es_lblk > end) { -+ /* entire range is a hole */ -+ } else if (es.es_lblk > map.m_lblk) { -+ /* range starts with a hole */ -+ map.m_len = es.es_lblk - map.m_lblk; -+ } else { -+ ext4_lblk_t offs = 0; -+ -+ if (es.es_lblk < map.m_lblk) -+ offs = map.m_lblk - es.es_lblk; -+ map.m_lblk = es.es_lblk + offs; -+ map.m_len = es.es_len - offs; -+ delalloc = true; -+ } -+ } -+ } else if (flags & IOMAP_WRITE) { - int dio_credits; - handle_t *handle; - int retries = 0; -@@ -3475,17 +3499,21 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - } - } - ext4_journal_stop(handle); -+ } else { -+ ret = ext4_map_blocks(NULL, inode, &map, 0); -+ if (ret < 0) -+ return ret; - } - - iomap->flags = 0; - iomap->bdev = inode->i_sb->s_bdev; - iomap->dax_dev = sbi->s_daxdev; - iomap->offset = first_block << blkbits; -+ iomap->length = (u64)map.m_len << blkbits; - - if (ret == 0) { -- iomap->type = IOMAP_HOLE; -+ iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE; - iomap->addr = IOMAP_NULL_ADDR; -- iomap->length = (u64)map.m_len << blkbits; - } else { - if (map.m_flags & EXT4_MAP_MAPPED) { - iomap->type = IOMAP_MAPPED; -@@ -3496,11 +3524,11 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - return -EIO; - } - iomap->addr = (u64)map.m_pblk << blkbits; -- iomap->length = (u64)map.m_len << blkbits; - } - - if (map.m_flags & EXT4_MAP_NEW) - iomap->flags |= IOMAP_F_NEW; -+ - return 0; - } - -@@ -3561,8 +3589,6 @@ const struct iomap_ops ext4_iomap_ops = { - .iomap_end = ext4_iomap_end, - }; - --#endif -- - static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, - ssize_t size, void *private) - { -@@ -6118,70 +6144,3 @@ int ext4_filemap_fault(struct vm_fault *vmf) - - return err; - } -- --/* -- * Find the first extent at or after @lblk in an inode that is not a hole. -- * Search for @map_len blocks at most. The extent is returned in @result. -- * -- * The function returns 1 if we found an extent. The function returns 0 in -- * case there is no extent at or after @lblk and in that case also sets -- * @result->es_len to 0. In case of error, the error code is returned. -- */ --int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, -- unsigned int map_len, struct extent_status *result) --{ -- struct ext4_map_blocks map; -- struct extent_status es = {}; -- int ret; -- -- map.m_lblk = lblk; -- map.m_len = map_len; -- -- /* -- * For non-extent based files this loop may iterate several times since -- * we do not determine full hole size. -- */ -- while (map.m_len > 0) { -- ret = ext4_map_blocks(NULL, inode, &map, 0); -- if (ret < 0) -- return ret; -- /* There's extent covering m_lblk? Just return it. */ -- if (ret > 0) { -- int status; -- -- ext4_es_store_pblock(result, map.m_pblk); -- result->es_lblk = map.m_lblk; -- result->es_len = map.m_len; -- if (map.m_flags & EXT4_MAP_UNWRITTEN) -- status = EXTENT_STATUS_UNWRITTEN; -- else -- status = EXTENT_STATUS_WRITTEN; -- ext4_es_store_status(result, status); -- return 1; -- } -- ext4_es_find_delayed_extent_range(inode, map.m_lblk, -- map.m_lblk + map.m_len - 1, -- &es); -- /* Is delalloc data before next block in extent tree? */ -- if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) { -- ext4_lblk_t offset = 0; -- -- if (es.es_lblk < lblk) -- offset = lblk - es.es_lblk; -- result->es_lblk = es.es_lblk + offset; -- ext4_es_store_pblock(result, -- ext4_es_pblock(&es) + offset); -- result->es_len = es.es_len - offset; -- ext4_es_store_status(result, ext4_es_status(&es)); -- -- return 1; -- } -- /* There's a hole at m_lblk, advance us after it */ -- map.m_lblk += map.m_len; -- map_len -= map.m_len; -- map.m_len = map_len; -- cond_resched(); -- } -- result->es_len = 0; -- return 0; --} --- -2.13.3 - - diff --git a/timestamps b/timestamps index b9347f88..dcb409f7 100755 --- a/timestamps +++ b/timestamps @@ -21,35 +21,18 @@ touch -d @1490569756 save-patch touch -d @1493511621 old-patches touch -d @1496678952 add-reg_convert_inline_data_nolock touch -d @1496698150 fix-up-ext4_try_to_write_inline_data -touch -d @1504192305 stable-boundary-undo.patch -touch -d @1504192365 jbd2-suppress-extra-newline-in-jbd2_debug -touch -d @1504192425 jbd2-dont-double-bump-transaction-number -touch -d @1504192485 journal-superblock-changes -touch -d @1504192545 add-journal-no-cleanup-option -touch -d @1504192605 add-support-for-log-metadata-block-tracking-in-log -touch -d @1504192665 add-indirection-to-metadata-block-read-paths -touch -d @1504192725 cleaner -touch -d @1504192785 load-jmap-from-journal -touch -d @1504192845 disable-writeback -touch -d @1504192905 add-ext4-journal-lazy-mount-option -touch -d @1506894954 switch-from-blkno-to-disk-offset -touch -d @1506895014 add-IOMAP_F_DATA_INLINE-flag -touch -d @1506895074 add-iomap-support-for-inline-data -touch -d @1506895134 switch-to-iomap-for-SEEK_HOLE_DATA -touch -d @1506895194 retry-allocations-conservatively -touch -d @1507345795 fix-fallocate-and-delalloc-i_size-interaction -touch -d @1507345855 stable-boundary -touch -d @1507823554 prevent-data-corruption-with-inline-data-and-dax -touch -d @1507823648 prevent-data-corruption-with-journaling-and-dax -touch -d @1507823885 add-sanity-check-for-encryption-and-dax -touch -d @1507824059 add-ext4_should_use_dax -touch -d @1507824588 remove-duplicate-extended-attributes-defs -touch -d @1508344828 jbd2-convert-timers-to-use-timer_setup -touch -d @1508345117 ext4-convert-timers-to-use-timer-setup -touch -d @1508345435 fix-little-inconsistencies -touch -d @1508346397 mention-noload-when-recovering-on-ro-device -touch -d @1509284326 add-support-for-online-resizing-with-bigalloc -touch -d @1510197508 series -touch -d @1510197800 improve-smp-scalability-for-inode-generation -touch -d @1510450600 status touch -d @1510450662 timestamps +touch -d @1512316967 stable-boundary +touch -d @1512317027 stable-boundary-undo.patch +touch -d @1512317087 jbd2-suppress-extra-newline-in-jbd2_debug +touch -d @1512317147 jbd2-dont-double-bump-transaction-number +touch -d @1512343076 series +touch -d @1512353445 journal-superblock-changes +touch -d @1512353604 add-journal-no-cleanup-option +touch -d @1512353608 add-support-for-log-metadata-block-tracking-in-log +touch -d @1512353609 add-indirection-to-metadata-block-read-paths +touch -d @1512353610 cleaner +touch -d @1512353610 load-jmap-from-journal +touch -d @1512353611 disable-writeback +touch -d @1512353785 add-ext4-journal-lazy-mount-option +touch -d @1512353794 status -- 2.11.4.GIT