From 0924cac5da04b450262b6de83825b1aa23fd8540 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 15 Jun 2015 14:49:54 -0400 Subject: [PATCH] add patch fix-ocfs2-corrupt-when-updating-journal-superblock-fails --- ...-corrupt-when-updating-journal-superblock-fails | 201 +++++++++++++++++++++ series | 1 + timestamps | 7 +- 3 files changed, 206 insertions(+), 3 deletions(-) create mode 100644 fix-ocfs2-corrupt-when-updating-journal-superblock-fails diff --git a/fix-ocfs2-corrupt-when-updating-journal-superblock-fails b/fix-ocfs2-corrupt-when-updating-journal-superblock-fails new file mode 100644 index 00000000..3a641b31 --- /dev/null +++ b/fix-ocfs2-corrupt-when-updating-journal-superblock-fails @@ -0,0 +1,201 @@ +jbd2: fix ocfs2 corrupt when updating journal superblock fails + +From: Joseph Qi + +If updating journal superblock fails after journal data has been +flushed, the error is omitted and this will mislead the caller as a +normal case. In ocfs2, the checkpoint will be treated successfully +and the other node can get the lock to update. Since the sb_start is +still pointing to the old log block, it will rewrite the journal data +during journal recovery by the other node. Thus the new updates will +be overwritten and ocfs2 corrupts. So in above case we have to return +the error, and ocfs2_commit_cache will take care of the error and +prevent the other node to do update first. And only after recovering +journal it can do the new updates. + +The issue discussion mail can be found at: +https://oss.oracle.com/pipermail/ocfs2-devel/2015-June/010856.html +http://comments.gmane.org/gmane.comp.file-systems.ext4/48841 + +Reported-by: Yiwen Jiang +Signed-off-by: Joseph Qi +Signed-off-by: Theodore Ts'o +Tested-by: Yiwen Jiang +Cc: Junxiao Bi +Cc: +--- + fs/jbd2/checkpoint.c | 5 ++--- + fs/jbd2/journal.c | 37 ++++++++++++++++++++++++++++++------- + include/linux/jbd2.h | 4 ++-- + 3 files changed, 34 insertions(+), 12 deletions(-) + +diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c +index 988b32e..82e5b7d 100644 +--- a/fs/jbd2/checkpoint.c ++++ b/fs/jbd2/checkpoint.c +@@ -390,7 +390,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) + unsigned long blocknr; + + if (is_journal_aborted(journal)) +- return 1; ++ return -EIO; + + if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) + return 1; +@@ -407,8 +407,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) + if (journal->j_flags & JBD2_BARRIER) + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + +- __jbd2_update_log_tail(journal, first_tid, blocknr); +- return 0; ++ return __jbd2_update_log_tail(journal, first_tid, blocknr); + } + + +diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c +index b96bd80..6b33a42 100644 +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -885,9 +885,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, + * + * Requires j_checkpoint_mutex + */ +-void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) ++int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) + { + unsigned long freed; ++ int ret; + + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); + +@@ -897,7 +898,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) + * space and if we lose sb update during power failure we'd replay + * old transaction with possibly newly overwritten data. + */ +- jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); ++ ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); ++ if (ret) ++ goto out; ++ + write_lock(&journal->j_state_lock); + freed = block - journal->j_tail; + if (block < journal->j_tail) +@@ -913,6 +917,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) + journal->j_tail_sequence = tid; + journal->j_tail = block; + write_unlock(&journal->j_state_lock); ++ ++out: ++ return ret; + } + + /* +@@ -1331,7 +1338,7 @@ static int journal_reset(journal_t *journal) + return jbd2_journal_start_thread(journal); + } + +-static void jbd2_write_superblock(journal_t *journal, int write_op) ++static int jbd2_write_superblock(journal_t *journal, int write_op) + { + struct buffer_head *bh = journal->j_sb_buffer; + journal_superblock_t *sb = journal->j_superblock; +@@ -1370,7 +1377,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) + printk(KERN_ERR "JBD2: Error %d detected when updating " + "journal superblock for %s.\n", ret, + journal->j_devname); ++ jbd2_journal_abort(journal, ret); + } ++ ++ return ret; + } + + /** +@@ -1383,10 +1393,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) + * Update a journal's superblock information about log tail and write it to + * disk, waiting for the IO to complete. + */ +-void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, ++int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, + unsigned long tail_block, int write_op) + { + journal_superblock_t *sb = journal->j_superblock; ++ int ret; + + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); + jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", +@@ -1395,13 +1406,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, + sb->s_sequence = cpu_to_be32(tail_tid); + sb->s_start = cpu_to_be32(tail_block); + +- jbd2_write_superblock(journal, write_op); ++ ret = jbd2_write_superblock(journal, write_op); ++ if (ret) ++ goto out; + + /* Log is no longer empty */ + write_lock(&journal->j_state_lock); + WARN_ON(!sb->s_sequence); + journal->j_flags &= ~JBD2_FLUSHED; + write_unlock(&journal->j_state_lock); ++ ++out: ++ return ret; + } + + /** +@@ -1950,7 +1966,13 @@ int jbd2_journal_flush(journal_t *journal) + return -EIO; + + mutex_lock(&journal->j_checkpoint_mutex); +- jbd2_cleanup_journal_tail(journal); ++ if (!err) { ++ err = jbd2_cleanup_journal_tail(journal); ++ if (err < 0) { ++ mutex_unlock(&journal->j_checkpoint_mutex); ++ goto out; ++ } ++ } + + /* Finally, mark the journal as really needing no recovery. + * This sets s_start==0 in the underlying superblock, which is +@@ -1966,7 +1988,8 @@ int jbd2_journal_flush(journal_t *journal) + J_ASSERT(journal->j_head == journal->j_tail); + J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); + write_unlock(&journal->j_state_lock); +- return 0; ++out: ++ return err; + } + + /** +diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h +index 20e7f78..edb640a 100644 +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal); + int jbd2_journal_next_log_block(journal_t *, unsigned long long *); + int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, + unsigned long *block); +-void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); ++int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); + void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); + + /* Commit management */ +@@ -1157,7 +1157,7 @@ extern int jbd2_journal_recover (journal_t *journal); + extern int jbd2_journal_wipe (journal_t *, int); + extern int jbd2_journal_skip_recovery (journal_t *); + extern void jbd2_journal_update_sb_errno(journal_t *); +-extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t, ++extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t, + unsigned long, int); + extern void __jbd2_journal_abort_hard (journal_t *); + extern void jbd2_journal_abort (journal_t *, int); +-- +1.8.4.3 + + +-- +To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in +the body of a message to majordomo@vger.kernel.org +More majordomo info at http://vger.kernel.org/majordomo-info.html + diff --git a/series b/series index 328892bc..fc68d82f 100644 --- a/series +++ b/series @@ -51,6 +51,7 @@ use_GFP_NOFS_in_jbd2_cleanup_journal_tail recalculate-journal-credits-as-inode-depth-changes wait-for-existing-dio-workers-in-ext4_alloc_file_blocks mballoc-avoid-20-argument-function-call +fix-ocfs2-corrupt-when-updating-journal-superblock-fails ########################################## # unstable patches diff --git a/timestamps b/timestamps index d3eea8e9..79a8bde0 100755 --- a/timestamps +++ b/timestamps @@ -66,7 +66,8 @@ touch -d @1434341882 use_GFP_NOFS_in_jbd2_cleanup_journal_tail touch -d @1434342046 recalculate-journal-credits-as-inode-depth-changes touch -d @1434342233 wait-for-existing-dio-workers-in-ext4_alloc_file_blocks touch -d @1434342778 mballoc-avoid-20-argument-function-call -touch -d @1434342803 series touch -d @1434342826 stable-boundary -touch -d @1434342836 status -touch -d @1434342985 timestamps +touch -d @1434393361 fix-ocfs2-corrupt-when-updating-journal-superblock-fails +touch -d @1434393706 series +touch -d @1434393712 status +touch -d @1434394185 timestamps -- 2.11.4.GIT