add patch create-function-to-read-journal-inode
[ext4-patch-queue.git] / optimize-dioread-locking
blobca4cdaad04e49ca24e531165637c767962c031cd
1 ext4: Allow parallel DIO reads
3 From: Jan Kara <jack@suse.cz>
5 We can easily support parallel direct IO reads. We only have to make
6 sure we cannot expose uninitialized data by reading allocated block to
7 which data was not written yet, or which was already truncated. That is
8 easily achieved by holding inode_lock in shared mode - that excludes all
9 writes, truncates, hole punches. We also have to guard against page
10 writeback allocating blocks for delay-allocated pages - that race is
11 handled by the fact that we writeback all the pages in the affected
12 range and the lock protects us from new pages being created there.
14 Signed-off-by: Jan Kara <jack@suse.cz>
15 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
16 ---
17  fs/ext4/inode.c | 40 ++++++++++++++++++----------------------
18  1 file changed, 18 insertions(+), 22 deletions(-)
20 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
21 index 3988315..d8a4afc 100644
22 --- a/fs/ext4/inode.c
23 +++ b/fs/ext4/inode.c
24 @@ -3528,35 +3528,31 @@ out:
26  static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
27  {
28 -       int unlocked = 0;
29 -       struct inode *inode = iocb->ki_filp->f_mapping->host;
30 +       struct address_space *mapping = iocb->ki_filp->f_mapping;
31 +       struct inode *inode = mapping->host;
32         ssize_t ret;
34 -       if (ext4_should_dioread_nolock(inode)) {
35 -               /*
36 -                * Nolock dioread optimization may be dynamically disabled
37 -                * via ext4_inode_block_unlocked_dio(). Check inode's state
38 -                * while holding extra i_dio_count ref.
39 -                */
40 -               inode_dio_begin(inode);
41 -               smp_mb();
42 -               if (unlikely(ext4_test_inode_state(inode,
43 -                                                   EXT4_STATE_DIOREAD_LOCK)))
44 -                       inode_dio_end(inode);
45 -               else
46 -                       unlocked = 1;
47 -       }
48 +       /*
49 +        * Shared inode_lock is enough for us - it protects against concurrent
50 +        * writes & truncates and since we take care of writing back page cache,
51 +        * we are protected against page writeback as well.
52 +        */
53 +       inode_lock_shared(inode);
54         if (IS_DAX(inode)) {
55 -               ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block,
56 -                               NULL, unlocked ? 0 : DIO_LOCKING);
57 +               ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
58         } else {
59 +               size_t count = iov_iter_count(iter);
61 +               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
62 +                                                  iocb->ki_pos + count);
63 +               if (ret)
64 +                       goto out_unlock;
65                 ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
66                                            iter, ext4_dio_get_block,
67 -                                          NULL, NULL,
68 -                                          unlocked ? 0 : DIO_LOCKING);
69 +                                          NULL, NULL, 0);
70         }
71 -       if (unlocked)
72 -               inode_dio_end(inode);
73 +out_unlock:
74 +       inode_unlock_shared(inode);
75         return ret;
76  }