add patch fix-check-of-dqget-return-value-in-ext4_ioctl_setproject
[ext4-patch-queue.git] / fix-races-between-changing-journal-mode-and-ext4_writepages
blob447dae6a524a2597f82d17e0d8e1f1f9fdf003af
1 ext4: fix races between changing inode journal mode and ext4_writepages
3 From: Daeho Jeong <daeho.jeong@samsung.com>
5 In ext4, there is a race condition between changing inode journal mode
6 and ext4_writepages(). While ext4_writepages() is executed on a
7 non-journalled mode inode, the inode's journal mode could be enabled
8 by ioctl() and then, some pages dirtied after switching the journal
9 mode will be still exposed to ext4_writepages() in non-journaled mode.
10 To resolve this problem, we use fs-wide per-cpu rw semaphore by Jan
11 Kara's suggestion because we don't want to waste ext4_inode_info's
12 space for this extra rare case.
14 Signed-off-by: Daeho Jeong <daeho.jeong@samsung.com>
15 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
16 Reviewed-by: Jan Kara <jack@suse.cz>
17 ---
18  fs/ext4/ext4.h                |    4 ++++
19  fs/ext4/inode.c               |   15 ++++++++++++---
20  fs/ext4/super.c               |    4 ++++
21  kernel/locking/percpu-rwsem.c |    1 +
22  4 files changed, 21 insertions(+), 3 deletions(-)
24 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
25 index 157b458..c757a3d 100644
26 --- a/fs/ext4/ext4.h
27 +++ b/fs/ext4/ext4.h
28 @@ -33,6 +33,7 @@
29  #include <linux/ratelimit.h>
30  #include <crypto/hash.h>
31  #include <linux/falloc.h>
32 +#include <linux/percpu-rwsem.h>
33  #ifdef __KERNEL__
34  #include <linux/compat.h>
35  #endif
36 @@ -1475,6 +1476,9 @@ struct ext4_sb_info {
37         struct ratelimit_state s_err_ratelimit_state;
38         struct ratelimit_state s_warning_ratelimit_state;
39         struct ratelimit_state s_msg_ratelimit_state;
41 +       /* Barrier between changing inodes' journal flags and writepages ops. */
42 +       struct percpu_rw_semaphore s_journal_flag_rwsem;
43  };
45  static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
46 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
47 index 71fab4c..4f45f24 100644
48 --- a/fs/ext4/inode.c
49 +++ b/fs/ext4/inode.c
50 @@ -2476,11 +2476,14 @@ static int ext4_writepages(struct address_space *mapping,
51         struct blk_plug plug;
52         bool give_up_on_write = false;
54 +       percpu_down_read(&sbi->s_journal_flag_rwsem);
55         trace_ext4_writepages(inode, wbc);
57 -       if (dax_mapping(mapping))
58 -               return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
59 -                                                  wbc);
60 +       if (dax_mapping(mapping)) {
61 +               ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
62 +                                                 wbc);
63 +               goto out_writepages;
64 +       }
66         /*
67          * No pages to write? This is mainly a kludge to avoid starting
68 @@ -2650,6 +2653,7 @@ retry:
69  out_writepages:
70         trace_ext4_writepages_result(inode, wbc, ret,
71                                      nr_to_write - wbc->nr_to_write);
72 +       percpu_up_read(&sbi->s_journal_flag_rwsem);
73         return ret;
74  }
76 @@ -5366,6 +5370,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
77         journal_t *journal;
78         handle_t *handle;
79         int err;
80 +       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
82         /*
83          * We have to be very careful here: changing a data block's
84 @@ -5405,6 +5410,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
85                 }
86         }
88 +       percpu_down_write(&sbi->s_journal_flag_rwsem);
89         jbd2_journal_lock_updates(journal);
91         /*
92 @@ -5421,6 +5427,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
93                 err = jbd2_journal_flush(journal);
94                 if (err < 0) {
95                         jbd2_journal_unlock_updates(journal);
96 +                       percpu_up_write(&sbi->s_journal_flag_rwsem);
97                         ext4_inode_resume_unlocked_dio(inode);
98                         return err;
99                 }
100 @@ -5429,6 +5436,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
101         ext4_set_aops(inode);
103         jbd2_journal_unlock_updates(journal);
104 +       percpu_up_write(&sbi->s_journal_flag_rwsem);
106         if (val)
107                 up_write(&EXT4_I(inode)->i_mmap_sem);
108         ext4_inode_resume_unlocked_dio(inode);
109 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
110 index 3ed01ec..a12950d 100644
111 --- a/fs/ext4/super.c
112 +++ b/fs/ext4/super.c
113 @@ -861,6 +861,7 @@ static void ext4_put_super(struct super_block *sb)
114         percpu_counter_destroy(&sbi->s_freeinodes_counter);
115         percpu_counter_destroy(&sbi->s_dirs_counter);
116         percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
117 +       percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
118         brelse(sbi->s_sbh);
119  #ifdef CONFIG_QUOTA
120         for (i = 0; i < EXT4_MAXQUOTAS; i++)
121 @@ -3926,6 +3927,9 @@ no_journal:
122         if (!err)
123                 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
124                                           GFP_KERNEL);
125 +       if (!err)
126 +               err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem);
128         if (err) {
129                 ext4_msg(sb, KERN_ERR, "insufficient memory");
130                 goto failed_mount6;
131 diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
132 index f231e0b..bec0b64 100644
133 --- a/kernel/locking/percpu-rwsem.c
134 +++ b/kernel/locking/percpu-rwsem.c
135 @@ -37,6 +37,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
136         free_percpu(brw->fast_read_ctr);
137         brw->fast_read_ctr = NULL; /* catch use after free bugs */
139 +EXPORT_SYMBOL_GPL(percpu_free_rwsem);
141  /*
142   * This is the fast-path for down_read/up_read. If it succeeds we rely
143 -- 
144 1.7.9.5