add patch set-lazytime-on-remount
[ext4-patch-queue.git] / fix-ocfs2-corrupt-when-updating-journal-superblock-fails
blob4cf08c1c9a8292803e5f91bd1c7a7bf8ba6917d8
1 jbd2: fix ocfs2 corrupt when updating journal superblock fails
3 From: Joseph Qi <joseph.qi@huawei.com>
5 If updating journal superblock fails after journal data has been
6 flushed, the error is omitted and this will mislead the caller as a
7 normal case.  In ocfs2, the checkpoint will be treated successfully
8 and the other node can get the lock to update. Since the sb_start is
9 still pointing to the old log block, it will rewrite the journal data
10 during journal recovery by the other node. Thus the new updates will
11 be overwritten and ocfs2 corrupts.  So in above case we have to return
12 the error, and ocfs2_commit_cache will take care of the error and
13 prevent the other node to do update first.  And only after recovering
14 journal it can do the new updates.
16 The issue discussion mail can be found at:
17 https://oss.oracle.com/pipermail/ocfs2-devel/2015-June/010856.html
18 http://comments.gmane.org/gmane.comp.file-systems.ext4/48841
20 [ Fixed bug in patch which allowed a non-negative error return from
21   jbd2_cleanup_journal_tail() to leak out of jbd2_fjournal_flush(); this
22   was causing xfstests ext4/306 to fail. -- Ted ]
24 Reported-by: Yiwen Jiang <jiangyiwen@huawei.com>
25 Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
26 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
27 Tested-by: Yiwen Jiang <jiangyiwen@huawei.com>
28 Cc: Junxiao Bi <junxiao.bi@oracle.com>
29 Cc: <stable@vger.kernel.org>
30 ---
31  fs/jbd2/checkpoint.c |  5 ++---
32  fs/jbd2/journal.c    | 38 +++++++++++++++++++++++++++++++-------
33  include/linux/jbd2.h |  4 ++--
34  3 files changed, 35 insertions(+), 12 deletions(-)
36 diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
37 index 6b7b73a..4227dc4 100644
38 --- a/fs/jbd2/checkpoint.c
39 +++ b/fs/jbd2/checkpoint.c
40 @@ -390,7 +390,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
41         unsigned long   blocknr;
43         if (is_journal_aborted(journal))
44 -               return 1;
45 +               return -EIO;
47         if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
48                 return 1;
49 @@ -407,8 +407,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
50         if (journal->j_flags & JBD2_BARRIER)
51                 blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
53 -       __jbd2_update_log_tail(journal, first_tid, blocknr);
54 -       return 0;
55 +       return __jbd2_update_log_tail(journal, first_tid, blocknr);
56  }
59 diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
60 index 303ccd9..5804466 100644
61 --- a/fs/jbd2/journal.c
62 +++ b/fs/jbd2/journal.c
63 @@ -876,9 +876,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
64   *
65   * Requires j_checkpoint_mutex
66   */
67 -void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
68 +int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
69  {
70         unsigned long freed;
71 +       int ret;
73         BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
75 @@ -888,7 +889,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
76          * space and if we lose sb update during power failure we'd replay
77          * old transaction with possibly newly overwritten data.
78          */
79 -       jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
80 +       ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
81 +       if (ret)
82 +               goto out;
84         write_lock(&journal->j_state_lock);
85         freed = block - journal->j_tail;
86         if (block < journal->j_tail)
87 @@ -904,6 +908,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
88         journal->j_tail_sequence = tid;
89         journal->j_tail = block;
90         write_unlock(&journal->j_state_lock);
92 +out:
93 +       return ret;
94  }
96  /*
97 @@ -1322,7 +1329,7 @@ static int journal_reset(journal_t *journal)
98         return jbd2_journal_start_thread(journal);
99  }
101 -static void jbd2_write_superblock(journal_t *journal, int write_op)
102 +static int jbd2_write_superblock(journal_t *journal, int write_op)
104         struct buffer_head *bh = journal->j_sb_buffer;
105         journal_superblock_t *sb = journal->j_superblock;
106 @@ -1361,7 +1368,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
107                 printk(KERN_ERR "JBD2: Error %d detected when updating "
108                        "journal superblock for %s.\n", ret,
109                        journal->j_devname);
110 +               jbd2_journal_abort(journal, ret);
111         }
113 +       return ret;
116  /**
117 @@ -1374,10 +1384,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
118   * Update a journal's superblock information about log tail and write it to
119   * disk, waiting for the IO to complete.
120   */
121 -void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
122 +int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
123                                      unsigned long tail_block, int write_op)
125         journal_superblock_t *sb = journal->j_superblock;
126 +       int ret;
128         BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
129         jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
130 @@ -1386,13 +1397,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
131         sb->s_sequence = cpu_to_be32(tail_tid);
132         sb->s_start    = cpu_to_be32(tail_block);
134 -       jbd2_write_superblock(journal, write_op);
135 +       ret = jbd2_write_superblock(journal, write_op);
136 +       if (ret)
137 +               goto out;
139         /* Log is no longer empty */
140         write_lock(&journal->j_state_lock);
141         WARN_ON(!sb->s_sequence);
142         journal->j_flags &= ~JBD2_FLUSHED;
143         write_unlock(&journal->j_state_lock);
145 +out:
146 +       return ret;
149  /**
150 @@ -1941,7 +1957,14 @@ int jbd2_journal_flush(journal_t *journal)
151                 return -EIO;
153         mutex_lock(&journal->j_checkpoint_mutex);
154 -       jbd2_cleanup_journal_tail(journal);
155 +       if (!err) {
156 +               err = jbd2_cleanup_journal_tail(journal);
157 +               if (err < 0) {
158 +                       mutex_unlock(&journal->j_checkpoint_mutex);
159 +                       goto out;
160 +               }
161 +               err = 0;
162 +       }
164         /* Finally, mark the journal as really needing no recovery.
165          * This sets s_start==0 in the underlying superblock, which is
166 @@ -1957,7 +1980,8 @@ int jbd2_journal_flush(journal_t *journal)
167         J_ASSERT(journal->j_head == journal->j_tail);
168         J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
169         write_unlock(&journal->j_state_lock);
170 -       return 0;
171 +out:
172 +       return err;
175  /**
176 diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
177 index 20e7f78..edb640a 100644
178 --- a/include/linux/jbd2.h
179 +++ b/include/linux/jbd2.h
180 @@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal);
181  int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
182  int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
183                               unsigned long *block);
184 -void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
185 +int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
186  void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
188  /* Commit management */
189 @@ -1157,7 +1157,7 @@ extern int           jbd2_journal_recover    (journal_t *journal);
190  extern int        jbd2_journal_wipe       (journal_t *, int);
191  extern int        jbd2_journal_skip_recovery   (journal_t *);
192  extern void       jbd2_journal_update_sb_errno(journal_t *);
193 -extern void       jbd2_journal_update_sb_log_tail      (journal_t *, tid_t,
194 +extern int        jbd2_journal_update_sb_log_tail      (journal_t *, tid_t,
195                                 unsigned long, int);
196  extern void       __jbd2_journal_abort_hard    (journal_t *);
197  extern void       jbd2_journal_abort      (journal_t *, int);