1 ext4: introduce new i_write_mutex to protect fallocate
3 From: Namjae Jeon <namjae.jeon@samsung.com>
5 Introduce new i_write_mutex to protect new writes from coming while doing
6 fallocate operations. Also, get rid of aio_mutex as it is covered by
9 Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
10 Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
11 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
13 fs/ext4/ext4.h | 6 +++---
14 fs/ext4/extents.c | 19 +++++++++++++++----
15 fs/ext4/file.c | 23 +++++++++++++----------
16 fs/ext4/inode.c | 7 ++++++-
17 fs/ext4/super.c | 3 +--
18 5 files changed, 38 insertions(+), 20 deletions(-)
20 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
21 index 1479e2a..0519715 100644
24 @@ -943,6 +943,9 @@ struct ext4_inode_info {
26 /* Precomputed uuid+inum+igen checksum for seeding inode checksums */
29 + /* protects fallocate operations racing with new writes */
30 + struct mutex i_write_mutex;
34 @@ -2805,10 +2808,7 @@ static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
35 #define EXT4_WQ_HASH_SZ 37
36 #define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
38 -#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
40 extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
41 -extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
43 #define EXT4_RESIZING 0
44 extern int ext4_resize_begin(struct super_block *sb);
45 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
46 index 5bbe4256..cb23a34 100644
47 --- a/fs/ext4/extents.c
48 +++ b/fs/ext4/extents.c
49 @@ -4741,6 +4741,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
50 if (!S_ISREG(inode->i_mode))
53 + mutex_lock(&EXT4_I(inode)->i_write_mutex);
56 * Write out all dirty pages to avoid race conditions
58 @@ -4748,8 +4750,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
59 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
60 ret = filemap_write_and_wait_range(mapping, offset,
64 + mutex_unlock(&EXT4_I(inode)->i_write_mutex);
70 @@ -4761,8 +4765,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
71 start = round_up(offset, 1 << blkbits);
72 end = round_down((offset + len), 1 << blkbits);
74 - if (start < offset || end > offset + len)
75 + if (start < offset || end > offset + len) {
76 + mutex_unlock(&EXT4_I(inode)->i_write_mutex);
79 partial = (offset + len) & ((1 << blkbits) - 1);
81 lblk = start >> blkbits;
82 @@ -4859,6 +4865,7 @@ out_dio:
83 ext4_inode_resume_unlocked_dio(inode);
85 mutex_unlock(&inode->i_mutex);
86 + mutex_unlock(&EXT4_I(inode)->i_write_mutex);
90 @@ -5411,11 +5418,13 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
91 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
92 punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
94 + mutex_lock(&EXT4_I(inode)->i_write_mutex);
96 /* Call ext4_force_commit to flush all data in case of data=journal. */
97 if (ext4_should_journal_data(inode)) {
98 ret = ext4_force_commit(inode->i_sb);
101 + goto out_i_write_mutex;
105 @@ -5428,7 +5437,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
106 ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
110 + goto out_i_write_mutex;
112 /* Take mutex lock */
113 mutex_lock(&inode->i_mutex);
114 @@ -5501,5 +5510,7 @@ out_dio:
115 ext4_inode_resume_unlocked_dio(inode);
117 mutex_unlock(&inode->i_mutex);
119 + mutex_unlock(&EXT4_I(inode)->i_write_mutex);
122 diff --git a/fs/ext4/file.c b/fs/ext4/file.c
123 index 4e8bc284..e5cd87f 100644
126 @@ -97,7 +97,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
128 struct file *file = iocb->ki_filp;
129 struct inode *inode = file_inode(iocb->ki_filp);
130 - struct mutex *aio_mutex = NULL;
131 + bool unaligned_direct_aio = false;
132 struct blk_plug plug;
133 int o_direct = file->f_flags & O_DIRECT;
135 @@ -106,6 +106,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
137 BUG_ON(iocb->ki_pos != pos);
139 + mutex_lock(&EXT4_I(inode)->i_write_mutex);
142 * Unaligned direct AIO must be serialized; see comment above
143 * In the case of O_APPEND, assume that we must always serialize
144 @@ -115,8 +117,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
145 !is_sync_kiocb(iocb) &&
146 (file->f_flags & O_APPEND ||
147 ext4_unaligned_aio(inode, iov, nr_segs, pos))) {
148 - aio_mutex = ext4_aio_mutex(inode);
149 - mutex_lock(aio_mutex);
150 + unaligned_direct_aio = true;
151 ext4_unwritten_wait(inode);
154 @@ -134,8 +135,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
155 if ((pos > sbi->s_bitmap_maxbytes) ||
156 (pos == sbi->s_bitmap_maxbytes && length > 0)) {
157 mutex_unlock(&inode->i_mutex);
160 + mutex_unlock(&EXT4_I(inode)->i_write_mutex);
164 if (pos + length > sbi->s_bitmap_maxbytes) {
165 @@ -150,8 +151,9 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
166 iocb->private = &overwrite;
168 /* check whether we do a DIO overwrite or not */
169 - if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
170 - !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
171 + if (ext4_should_dioread_nolock(inode) &&
172 + !unaligned_direct_aio && !file->f_mapping->nrpages &&
173 + pos + length <= i_size_read(inode)) {
174 struct ext4_map_blocks map;
175 unsigned int blkbits = inode->i_blkbits;
177 @@ -181,6 +183,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
179 ret = __generic_file_aio_write(iocb, iov, nr_segs);
180 mutex_unlock(&inode->i_mutex);
181 + if (!unaligned_direct_aio)
182 + mutex_unlock(&EXT4_I(inode)->i_write_mutex);
186 @@ -192,9 +196,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
188 blk_finish_plug(&plug);
192 - mutex_unlock(aio_mutex);
193 + if (unaligned_direct_aio)
194 + mutex_unlock(&EXT4_I(inode)->i_write_mutex);
198 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
199 index 645de3e..55f999a 100644
200 --- a/fs/ext4/inode.c
201 +++ b/fs/ext4/inode.c
202 @@ -3534,6 +3534,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
204 trace_ext4_punch_hole(inode, offset, length, 0);
206 + mutex_lock(&EXT4_I(inode)->i_write_mutex);
209 * Write out all dirty pages to avoid race conditions
211 @@ -3541,8 +3543,10 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
212 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
213 ret = filemap_write_and_wait_range(mapping, offset,
214 offset + length - 1);
217 + mutex_unlock(&EXT4_I(inode)->i_write_mutex);
222 mutex_lock(&inode->i_mutex);
223 @@ -3643,6 +3647,7 @@ out_dio:
224 ext4_inode_resume_unlocked_dio(inode);
226 mutex_unlock(&inode->i_mutex);
227 + mutex_unlock(&EXT4_I(inode)->i_write_mutex);
231 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
232 index b9b9aab..7667a5b 100644
233 --- a/fs/ext4/super.c
234 +++ b/fs/ext4/super.c
235 @@ -904,6 +904,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
236 atomic_set(&ei->i_ioend_count, 0);
237 atomic_set(&ei->i_unwritten, 0);
238 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
239 + mutex_init(&ei->i_write_mutex);
241 return &ei->vfs_inode;
243 @@ -5516,7 +5517,6 @@ static void ext4_exit_feat_adverts(void)
245 /* Shared across all ext4 file systems */
246 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
247 -struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
249 static int __init ext4_init_fs(void)
251 @@ -5529,7 +5529,6 @@ static int __init ext4_init_fs(void)
252 ext4_check_flag_values();
254 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
255 - mutex_init(&ext4__aio_mutex[i]);
256 init_waitqueue_head(&ext4__ioend_wq[i]);