fix-ocfs2-corrupt-when-updating-journal-superblock-fails

   1 jbd2: fix ocfs2 corrupt when updating journal superblock fails
   2
   3 From: Joseph Qi <joseph.qi@huawei.com>
   4
   5 If updating journal superblock fails after journal data has been
   6 flushed, the error is omitted and this will mislead the caller as a
   7 normal case.  In ocfs2, the checkpoint will be treated successfully
   8 and the other node can get the lock to update. Since the sb_start is
   9 still pointing to the old log block, it will rewrite the journal data
  10 during journal recovery by the other node. Thus the new updates will
  11 be overwritten and ocfs2 corrupts.  So in above case we have to return
  12 the error, and ocfs2_commit_cache will take care of the error and
  13 prevent the other node to do update first.  And only after recovering
  14 journal it can do the new updates.
  15
  16 The issue discussion mail can be found at:
  17 https://oss.oracle.com/pipermail/ocfs2-devel/2015-June/010856.html
  18 http://comments.gmane.org/gmane.comp.file-systems.ext4/48841
  19
  20 Reported-by: Yiwen Jiang <jiangyiwen@huawei.com>
  21 Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
  22 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
  23 Tested-by: Yiwen Jiang <jiangyiwen@huawei.com>
  24 Cc: Junxiao Bi <junxiao.bi@oracle.com>
  25 Cc: <stable@vger.kernel.org>
  26 ---
  27  fs/jbd2/checkpoint.c |  5 ++---
  28  fs/jbd2/journal.c    | 37 ++++++++++++++++++++++++++++++-------
  29  include/linux/jbd2.h |  4 ++--
  30  3 files changed, 34 insertions(+), 12 deletions(-)
  31
  32 diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
  33 index 988b32e..82e5b7d 100644
  34 --- a/fs/jbd2/checkpoint.c
  35 +++ b/fs/jbd2/checkpoint.c
  36 @@ -390,7 +390,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
  37         unsigned long   blocknr;
  38
  39         if (is_journal_aborted(journal))
  40 -               return 1;
  41 +               return -EIO;
  42
  43         if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
  44                 return 1;
  45 @@ -407,8 +407,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
  46         if (journal->j_flags & JBD2_BARRIER)
  47                 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
  48
  49 -       __jbd2_update_log_tail(journal, first_tid, blocknr);
  50 -       return 0;
  51 +       return __jbd2_update_log_tail(journal, first_tid, blocknr);
  52  }
  53
  54
  55 diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
  56 index b96bd80..6b33a42 100644
  57 --- a/fs/jbd2/journal.c
  58 +++ b/fs/jbd2/journal.c
  59 @@ -885,9 +885,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
  60   *
  61   * Requires j_checkpoint_mutex
  62   */
  63 -void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
  64 +int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
  65  {
  66         unsigned long freed;
  67 +       int ret;
  68
  69         BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
  70
  71 @@ -897,7 +898,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
  72          * space and if we lose sb update during power failure we'd replay
  73          * old transaction with possibly newly overwritten data.
  74          */
  75 -       jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
  76 +       ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
  77 +       if (ret)
  78 +               goto out;
  79 +
  80         write_lock(&journal->j_state_lock);
  81         freed = block - journal->j_tail;
  82         if (block < journal->j_tail)
  83 @@ -913,6 +917,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
  84         journal->j_tail_sequence = tid;
  85         journal->j_tail = block;
  86         write_unlock(&journal->j_state_lock);
  87 +
  88 +out:
  89 +       return ret;
  90  }
  91
  92  /*
  93 @@ -1331,7 +1338,7 @@ static int journal_reset(journal_t *journal)
  94         return jbd2_journal_start_thread(journal);
  95  }
  96
  97 -static void jbd2_write_superblock(journal_t *journal, int write_op)
  98 +static int jbd2_write_superblock(journal_t *journal, int write_op)
  99  {
 100         struct buffer_head *bh = journal->j_sb_buffer;
 101         journal_superblock_t *sb = journal->j_superblock;
 102 @@ -1370,7 +1377,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
 103                 printk(KERN_ERR "JBD2: Error %d detected when updating "
 104                        "journal superblock for %s.\n", ret,
 105                        journal->j_devname);
 106 +               jbd2_journal_abort(journal, ret);
 107         }
 108 +
 109 +       return ret;
 110  }
 111
 112  /**
 113 @@ -1383,10 +1393,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
 114   * Update a journal's superblock information about log tail and write it to
 115   * disk, waiting for the IO to complete.
 116   */
 117 -void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
 118 +int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
 119                                      unsigned long tail_block, int write_op)
 120  {
 121         journal_superblock_t *sb = journal->j_superblock;
 122 +       int ret;
 123
 124         BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
 125         jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
 126 @@ -1395,13 +1406,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
 127         sb->s_sequence = cpu_to_be32(tail_tid);
 128         sb->s_start    = cpu_to_be32(tail_block);
 129
 130 -       jbd2_write_superblock(journal, write_op);
 131 +       ret = jbd2_write_superblock(journal, write_op);
 132 +       if (ret)
 133 +               goto out;
 134
 135         /* Log is no longer empty */
 136         write_lock(&journal->j_state_lock);
 137         WARN_ON(!sb->s_sequence);
 138         journal->j_flags &= ~JBD2_FLUSHED;
 139         write_unlock(&journal->j_state_lock);
 140 +
 141 +out:
 142 +       return ret;
 143  }
 144
 145  /**
 146 @@ -1950,7 +1966,13 @@ int jbd2_journal_flush(journal_t *journal)
 147                 return -EIO;
 148
 149         mutex_lock(&journal->j_checkpoint_mutex);
 150 -       jbd2_cleanup_journal_tail(journal);
 151 +       if (!err) {
 152 +               err = jbd2_cleanup_journal_tail(journal);
 153 +               if (err < 0) {
 154 +                       mutex_unlock(&journal->j_checkpoint_mutex);
 155 +                       goto out;
 156 +               }
 157 +       }
 158
 159         /* Finally, mark the journal as really needing no recovery.
 160          * This sets s_start==0 in the underlying superblock, which is
 161 @@ -1966,7 +1988,8 @@ int jbd2_journal_flush(journal_t *journal)
 162         J_ASSERT(journal->j_head == journal->j_tail);
 163         J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
 164         write_unlock(&journal->j_state_lock);
 165 -       return 0;
 166 +out:
 167 +       return err;
 168  }
 169
 170  /**
 171 diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
 172 index 20e7f78..edb640a 100644
 173 --- a/include/linux/jbd2.h
 174 +++ b/include/linux/jbd2.h
 175 @@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal);
 176  int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
 177  int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
 178                               unsigned long *block);
 179 -void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
 180 +int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
 181  void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
 182
 183  /* Commit management */
 184 @@ -1157,7 +1157,7 @@ extern int           jbd2_journal_recover    (journal_t *journal);
 185  extern int        jbd2_journal_wipe       (journal_t *, int);
 186  extern int        jbd2_journal_skip_recovery   (journal_t *);
 187  extern void       jbd2_journal_update_sb_errno(journal_t *);
 188 -extern void       jbd2_journal_update_sb_log_tail      (journal_t *, tid_t,
 189 +extern int        jbd2_journal_update_sb_log_tail      (journal_t *, tid_t,
 190                                 unsigned long, int);
 191  extern void       __jbd2_journal_abort_hard    (journal_t *);
 192  extern void       jbd2_journal_abort      (journal_t *, int);
 193 --
 194 1.8.4.3
 195
 196
 197 --
 198 To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
 199 the body of a message to majordomo@vger.kernel.org
 200 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 201