From ceac12e656fb2a99e7c6704f0a035a21f82097d7 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 19 Feb 2016 00:34:04 -0500 Subject: [PATCH] add fix-crashes-in-dioread_nolock-mode and drop the revert of i_data_sum locking cleanups for dioread_nolock --- fix-crashes-in-dioread_nolock-mode | 80 +++++++++ ...-i_data_sum-locking-cleanups-for-dioread_nolock | 181 --------------------- series | 2 +- timestamps | 8 +- 4 files changed, 85 insertions(+), 186 deletions(-) create mode 100644 fix-crashes-in-dioread_nolock-mode delete mode 100644 revert-i_data_sum-locking-cleanups-for-dioread_nolock diff --git a/fix-crashes-in-dioread_nolock-mode b/fix-crashes-in-dioread_nolock-mode new file mode 100644 index 00000000..9fd89dfe --- /dev/null +++ b/fix-crashes-in-dioread_nolock-mode @@ -0,0 +1,80 @@ +ext4: fix crashes in dioread_nolock mode + +From: Jan Kara + +Competing overwrite DIO in dioread_nolock mode will just overwrite +pointer to io_end in the inode. This may result in data corruption or +extent conversion happening from IO completion interrupt because we +don't properly set buffer_defer_completion() when unlocked DIO races +with locked DIO to unwritten extent. + +Since unlocked DIO doesn't need io_end for anything, just avoid +allocating it and corrupting pointer from inode for locked DIO. +A cleaner fix would be to avoid these games with io_end pointer from the +inode but that requires more intrusive changes so we leave that for +later. + +Cc: stable@vger.kernel.org +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +--- + fs/ext4/inode.c | 40 ++++++++++++++++++++-------------------- + 1 file changed, 20 insertions(+), 20 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 83bc8bfb3bea..ee8ca1ff4023 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -3253,29 +3253,29 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + * case, we allocate an io_end structure to hook to the iocb. + */ + iocb->private = NULL; +- ext4_inode_aio_set(inode, NULL); +- if (!is_sync_kiocb(iocb)) { +- io_end = ext4_init_io_end(inode, GFP_NOFS); +- if (!io_end) { +- ret = -ENOMEM; +- goto retake_lock; +- } +- /* +- * Grab reference for DIO. Will be dropped in ext4_end_io_dio() +- */ +- iocb->private = ext4_get_io_end(io_end); +- /* +- * we save the io structure for current async direct +- * IO, so that later ext4_map_blocks() could flag the +- * io structure whether there is a unwritten extents +- * needs to be converted when IO is completed. +- */ +- ext4_inode_aio_set(inode, io_end); +- } +- + if (overwrite) { + get_block_func = ext4_get_block_overwrite; + } else { ++ ext4_inode_aio_set(inode, NULL); ++ if (!is_sync_kiocb(iocb)) { ++ io_end = ext4_init_io_end(inode, GFP_NOFS); ++ if (!io_end) { ++ ret = -ENOMEM; ++ goto retake_lock; ++ } ++ /* ++ * Grab reference for DIO. Will be dropped in ++ * ext4_end_io_dio() ++ */ ++ iocb->private = ext4_get_io_end(io_end); ++ /* ++ * we save the io structure for current async direct ++ * IO, so that later ext4_map_blocks() could flag the ++ * io structure whether there is a unwritten extents ++ * needs to be converted when IO is completed. ++ */ ++ ext4_inode_aio_set(inode, io_end); ++ } + get_block_func = ext4_get_block_write; + dio_flags = DIO_LOCKING; + } +-- +2.6.2 + diff --git a/revert-i_data_sum-locking-cleanups-for-dioread_nolock b/revert-i_data_sum-locking-cleanups-for-dioread_nolock deleted file mode 100644 index b34229c0..00000000 --- a/revert-i_data_sum-locking-cleanups-for-dioread_nolock +++ /dev/null @@ -1,181 +0,0 @@ -ext4: revert i_data_sum locking cleanups for dioread_nolock - -From: Eric Whitney - -Commit 2bcba4781fa3 ("ext4: get rid of EXT4_GET_BLOCKS_NO_LOCK flag") -can cause a kernel panic when xfstest generic/300 is run on a file -system mounted with dioread_nolock. The panic is typically triggered -from check_irqs_on() (fs/buffer.c: 1272), and happens because -ext4_end_io_dio() is being called in an interrupt context rather than -from a workqueue for AIO. This suggests that buffer_defer_completion -may not be set properly when creating an unwritten extent for async -direct I/O. - -Revert the locking changes until this problem can be resolved. Patch -applied to 4.5-rc3 and tested with a full xfstest-bld auto group run. - -Signed-off-by: Eric Whitney -Signed-off-by: Theodore Ts'o ---- - fs/ext4/ext4.h | 6 ++++-- - fs/ext4/inode.c | 43 +++++++++++++++++++++++-------------------- - include/trace/events/ext4.h | 1 + - 3 files changed, 28 insertions(+), 22 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index 0662b28..36fcf2a 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -563,10 +563,12 @@ enum { - #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 - /* Request will not result in inode size update (user for fallocate) */ - #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 -+ /* Do not take i_data_sem locking in ext4_map_blocks */ -+#define EXT4_GET_BLOCKS_NO_LOCK 0x0100 - /* Convert written extents to unwritten */ --#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100 -+#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200 - /* Write zeros to newly created written extents */ --#define EXT4_GET_BLOCKS_ZERO 0x0200 -+#define EXT4_GET_BLOCKS_ZERO 0x0300 - #define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\ - EXT4_GET_BLOCKS_ZERO) - -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 83bc8bf..f72dc04 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -418,7 +418,8 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, - * out taking i_data_sem. So at the time the unwritten extent - * could be converted. - */ -- down_read(&EXT4_I(inode)->i_data_sem); -+ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) -+ down_read(&EXT4_I(inode)->i_data_sem); - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { - retval = ext4_ext_map_blocks(handle, inode, map, flags & - EXT4_GET_BLOCKS_KEEP_SIZE); -@@ -426,7 +427,8 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, - retval = ext4_ind_map_blocks(handle, inode, map, flags & - EXT4_GET_BLOCKS_KEEP_SIZE); - } -- up_read((&EXT4_I(inode)->i_data_sem)); -+ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) -+ up_read((&EXT4_I(inode)->i_data_sem)); - - /* - * We don't check m_len because extent will be collpased in status -@@ -522,7 +524,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, - * Try to see if we can get the block without requesting a new - * file system block. - */ -- down_read(&EXT4_I(inode)->i_data_sem); -+ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) -+ down_read(&EXT4_I(inode)->i_data_sem); - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { - retval = ext4_ext_map_blocks(handle, inode, map, flags & - EXT4_GET_BLOCKS_KEEP_SIZE); -@@ -553,7 +556,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, - if (ret < 0) - retval = ret; - } -- up_read((&EXT4_I(inode)->i_data_sem)); -+ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) -+ up_read((&EXT4_I(inode)->i_data_sem)); - - found: - if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { -@@ -703,7 +707,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, - map.m_lblk = iblock; - map.m_len = bh->b_size >> inode->i_blkbits; - -- if (flags && !handle) { -+ if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) { - /* Direct IO write... */ - if (map.m_len > DIO_MAX_BLOCKS) - map.m_len = DIO_MAX_BLOCKS; -@@ -898,6 +902,9 @@ int do_journal_get_write_access(handle_t *handle, - return ret; - } - -+static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, -+ struct buffer_head *bh_result, int create); -+ - #ifdef CONFIG_EXT4_FS_ENCRYPTION - static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, - get_block_t *get_block) -@@ -3070,21 +3077,13 @@ int ext4_get_block_write(struct inode *inode, sector_t iblock, - EXT4_GET_BLOCKS_IO_CREATE_EXT); - } - --static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock, -+static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) - { -- int ret; -- -- ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n", -+ ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n", - inode->i_ino, create); -- ret = _ext4_get_block(inode, iblock, bh_result, 0); -- /* -- * Blocks should have been preallocated! ext4_file_write_iter() checks -- * that. -- */ -- WARN_ON_ONCE(!buffer_mapped(bh_result)); -- -- return ret; -+ return _ext4_get_block(inode, iblock, bh_result, -+ EXT4_GET_BLOCKS_NO_LOCK); - } - - #ifdef CONFIG_FS_DAX -@@ -3230,8 +3229,10 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - /* If we do a overwrite dio, i_mutex locking can be released */ - overwrite = *((int *)iocb->private); - -- if (overwrite) -+ if (overwrite) { -+ down_read(&EXT4_I(inode)->i_data_sem); - inode_unlock(inode); -+ } - - /* - * We could direct write to holes and fallocate. -@@ -3274,7 +3275,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - } - - if (overwrite) { -- get_block_func = ext4_get_block_overwrite; -+ get_block_func = ext4_get_block_write_nolock; - } else { - get_block_func = ext4_get_block_write; - dio_flags = DIO_LOCKING; -@@ -3330,8 +3331,10 @@ retake_lock: - if (iov_iter_rw(iter) == WRITE) - inode_dio_end(inode); - /* take i_mutex locking again if we do a ovewrite dio */ -- if (overwrite) -+ if (overwrite) { -+ up_read(&EXT4_I(inode)->i_data_sem); - inode_lock(inode); -+ } - - return ret; - } -diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h -index 4e4b2fa..6599e75 100644 ---- a/include/trace/events/ext4.h -+++ b/include/trace/events/ext4.h -@@ -43,6 +43,7 @@ struct extent_status; - { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ - { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ - { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \ -+ { EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" }, \ - { EXT4_GET_BLOCKS_ZERO, "ZERO" }) - - #define show_mflags(flags) __print_flags(flags, "", \ --- -2.1.4 - - diff --git a/series b/series index 1e9f487f..c17b05e6 100644 --- a/series +++ b/series @@ -8,9 +8,9 @@ add-a-line-break-for-proc-mb_groups-display fix-potential-integer-overflow dont-read-blocks-from-disk-after-extents-being-swapped remove-unused-parameter-newblock -revert-i_data_sum-locking-cleanups-for-dioread_nolock fix-memleak-in-ext4_readdir fix-bh-b_state-corruption +fix-crashes-in-dioread_nolock-mode ########################################## # unstable patches diff --git a/timestamps b/timestamps index 676a911c..8e5c2ba3 100755 --- a/timestamps +++ b/timestamps @@ -35,10 +35,10 @@ touch -d @1455254236 add-a-line-break-for-proc-mb_groups-display touch -d @1455257759 fix-potential-integer-overflow touch -d @1455258043 dont-read-blocks-from-disk-after-extents-being-swapped touch -d @1455258180 remove-unused-parameter-newblock -touch -d @1455596913 revert-i_data_sum-locking-cleanups-for-dioread_nolock touch -d @1455600019 fix-memleak-in-ext4_readdir touch -d @1455600079 stable-boundary -touch -d @1455600732 timestamps -touch -d @1455858476 series touch -d @1455859105 fix-bh-b_state-corruption -touch -d @1455859105 status +touch -d @1455859479 series +touch -d @1455860001 fix-crashes-in-dioread_nolock-mode +touch -d @1455860008 status +touch -d @1455860032 timestamps -- 2.11.4.GIT