From 82c7b49064262c33ba1057338f44e92faa30e62c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 8 Mar 2016 23:08:36 -0500 Subject: [PATCH] add patch use-i_mutex-to-serialize-unaligned-AIO-DIO --- series | 1 + use-i_mutex-to-serialize-unaligned-AIO-DIO | 139 +++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 use-i_mutex-to-serialize-unaligned-AIO-DIO diff --git a/series b/series index 41bb085a..2f43d5da 100644 --- a/series +++ b/series @@ -24,6 +24,7 @@ unify-revoke-and-tag-block-checksum-handling save-some-atomic-ops-in-JI_COMMIT_RUNNING-handling pack-ioend-structure-better +use-i_mutex-to-serialize-unaligned-AIO-DIO ########################################## # unstable patches diff --git a/use-i_mutex-to-serialize-unaligned-AIO-DIO b/use-i_mutex-to-serialize-unaligned-AIO-DIO new file mode 100644 index 00000000..b30ec28a --- /dev/null +++ b/use-i_mutex-to-serialize-unaligned-AIO-DIO @@ -0,0 +1,139 @@ +ext4: use i_mutex to serialize unaligned AIO DIO + +From: Jan Kara + +Currently we've used hashed aio_mutex to serialize unaligned AIO DIO. +However the code cleanups that happened after 2011 when the lock was +introduced made aio_mutex acquired at almost the same places where we +already have exclusion using i_mutex. So just use i_mutex for the +exclusion of unaligned AIO DIO. + +The change moves waiting for pending unwritten extent conversion under +i_mutex. That makes special handling of O_APPEND writes unnecessary and +also avoids possible livelocking of unaligned AIO DIO with aligned one +(nothing was preventing contiguous stream of aligned AIO DIOs to let +unaligned AIO DIO wait forever). + +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +--- + fs/ext4/ext4.h | 3 --- + fs/ext4/file.c | 32 +++++++++++++------------------- + fs/ext4/super.c | 5 +---- + 3 files changed, 14 insertions(+), 26 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 1046621ef64d..b02b2e58805a 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -3284,10 +3284,7 @@ static inline void ext4_inode_resume_unlocked_dio(struct inode *inode) + #define EXT4_WQ_HASH_SZ 37 + #define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\ + EXT4_WQ_HASH_SZ]) +-#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\ +- EXT4_WQ_HASH_SZ]) + extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; +-extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; + + #define EXT4_RESIZING 0 + extern int ext4_resize_begin(struct super_block *sb); +diff --git a/fs/ext4/file.c b/fs/ext4/file.c +index 1126436dada1..7d5fb122fd26 100644 +--- a/fs/ext4/file.c ++++ b/fs/ext4/file.c +@@ -93,31 +93,29 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) + { + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(iocb->ki_filp); +- struct mutex *aio_mutex = NULL; + struct blk_plug plug; + int o_direct = iocb->ki_flags & IOCB_DIRECT; ++ int unaligned_aio = 0; + int overwrite = 0; + ssize_t ret; + ++ inode_lock(inode); ++ ret = generic_write_checks(iocb, from); ++ if (ret <= 0) ++ goto out; ++ + /* +- * Unaligned direct AIO must be serialized; see comment above +- * In the case of O_APPEND, assume that we must always serialize ++ * Unaligned direct AIO must be serialized among each other as zeroing ++ * of partial blocks of two competing unaligned AIOs can result in data ++ * corruption. + */ +- if (o_direct && +- ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && ++ if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && + !is_sync_kiocb(iocb) && +- (iocb->ki_flags & IOCB_APPEND || +- ext4_unaligned_aio(inode, from, iocb->ki_pos))) { +- aio_mutex = ext4_aio_mutex(inode); +- mutex_lock(aio_mutex); ++ ext4_unaligned_aio(inode, from, iocb->ki_pos)) { ++ unaligned_aio = 1; + ext4_unwritten_wait(inode); + } + +- inode_lock(inode); +- ret = generic_write_checks(iocb, from); +- if (ret <= 0) +- goto out; +- + /* + * If we have encountered a bitmap-format file, the size limit + * is smaller than s_maxbytes, which is for extent-mapped files. +@@ -139,7 +137,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) + blk_start_plug(&plug); + + /* check whether we do a DIO overwrite or not */ +- if (ext4_should_dioread_nolock(inode) && !aio_mutex && ++ if (ext4_should_dioread_nolock(inode) && !unaligned_aio && + !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { + struct ext4_map_blocks map; + unsigned int blkbits = inode->i_blkbits; +@@ -181,14 +179,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) + if (o_direct) + blk_finish_plug(&plug); + +- if (aio_mutex) +- mutex_unlock(aio_mutex); + return ret; + + out: + inode_unlock(inode); +- if (aio_mutex) +- mutex_unlock(aio_mutex); + return ret; + } + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 3ed01ec011d7..6d8a01b4f535 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -5321,7 +5321,6 @@ MODULE_ALIAS_FS("ext4"); + + /* Shared across all ext4 file systems */ + wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; +-struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; + + static int __init ext4_init_fs(void) + { +@@ -5334,10 +5333,8 @@ static int __init ext4_init_fs(void) + /* Build-time check for flags consistency */ + ext4_check_flag_values(); + +- for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { +- mutex_init(&ext4__aio_mutex[i]); ++ for (i = 0; i < EXT4_WQ_HASH_SZ; i++) + init_waitqueue_head(&ext4__ioend_wq[i]); +- } + + err = ext4_init_es(); + if (err) +-- +2.6.2 + + -- 2.11.4.GIT