From 17138809122d12ee2e9631d3818db9ce8653aa9a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 10 Mar 2016 00:00:13 -0500 Subject: [PATCH] add patch fix-jbd2_journal_destroy-for-umount-path --- fix-jbd2_journal_destroy-for-umount-path | 115 +++++++++++++++++++++++++++++++ series | 2 + timestamps | 9 +-- 3 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 fix-jbd2_journal_destroy-for-umount-path diff --git a/fix-jbd2_journal_destroy-for-umount-path b/fix-jbd2_journal_destroy-for-umount-path new file mode 100644 index 00000000..11ccf987 --- /dev/null +++ b/fix-jbd2_journal_destroy-for-umount-path @@ -0,0 +1,115 @@ +jbd2: fix jbd2_journal_destory() for umount path + +From: OGAWA Hirofumi + +On umount path, jbd2_journal_destroy() writes latest transaction ID +(->j_tail_sequence) to be used at next mount. + +The bug is that ->j_tail_sequence is not holding latest transaction ID +in some cases. So, at next mount, there is chance to conflict with +remaining (not overwritten yet) transactions. + + mount (id=10) + write transaction (id=11) + write transaction (id=12) + umount (id=10) <= the bug doesn't write latest ID + + mount (id=10) + write transaction (id=11) + crash + + recovery + transaction (id=11) + transaction (id=12) <= valid transaction ID, but old commit + must not replay + +Like above, this bug become the cause of recovery failure, or FS +corruption. + +So why ->j_tail_sequence doesn't point latest ID? + +Because if checkpoint transactions was reclaimed by memory pressure +(i.e. bdev_try_to_free_page()), then ->j_tail_sequence is not updated. +(And another case is, __jbd2_journal_clean_checkpoint_list() is called +with empty transaction.) + +So in above cases, ->j_tail_sequence is not pointing latest +transaction ID at umount path. Plus, REQ_FLUSH for checkpoint is not +done too. + +So, to fix this problem with minimum changes, this patch updates +->j_tail_sequence, and issue REQ_FLUSH. (With more complex changes, +some optimizations would be possible to avoid unnecessary REQ_FLUSH +for example though.) + +BTW, + + journal->j_tail_sequence = + ++journal->j_transaction_sequence; + +Increment of ->j_transaction_sequence seems to be unnecessary, but +ext3 does this. + +Signed-off-by: OGAWA Hirofumi +Signed-off-by: Theodore Ts'o +--- + + fs/jbd2/journal.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +diff -puN fs/jbd2/journal.c~ext4-umount-fix fs/jbd2/journal.c +--- linux/fs/jbd2/journal.c~ext4-umount-fix 2016-02-25 03:26:32.710407670 +0900 ++++ linux-hirofumi/fs/jbd2/journal.c 2016-02-25 03:26:32.711407673 +0900 +@@ -1412,7 +1412,7 @@ out: + * Update a journal's dynamic superblock fields to show that journal is empty. + * Write updated superblock to disk waiting for IO to complete. + */ +-static void jbd2_mark_journal_empty(journal_t *journal) ++static void jbd2_mark_journal_empty(journal_t *journal, int write_op) + { + journal_superblock_t *sb = journal->j_superblock; + +@@ -1430,7 +1430,7 @@ static void jbd2_mark_journal_empty(jour + sb->s_start = cpu_to_be32(0); + read_unlock(&journal->j_state_lock); + +- jbd2_write_superblock(journal, WRITE_FUA); ++ jbd2_write_superblock(journal, write_op); + + /* Log is no longer empty */ + write_lock(&journal->j_state_lock); +@@ -1716,7 +1716,13 @@ int jbd2_journal_destroy(journal_t *jour + if (journal->j_sb_buffer) { + if (!is_journal_aborted(journal)) { + mutex_lock(&journal->j_checkpoint_mutex); +- jbd2_mark_journal_empty(journal); ++ ++ write_lock(&journal->j_state_lock); ++ journal->j_tail_sequence = ++ ++journal->j_transaction_sequence; ++ write_unlock(&journal->j_state_lock); ++ ++ jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA); + mutex_unlock(&journal->j_checkpoint_mutex); + } else + err = -EIO; +@@ -1975,7 +1981,7 @@ int jbd2_journal_flush(journal_t *journa + * the magic code for a fully-recovered superblock. Any future + * commits of data to the journal will restore the current + * s_start value. */ +- jbd2_mark_journal_empty(journal); ++ jbd2_mark_journal_empty(journal, WRITE_FUA); + mutex_unlock(&journal->j_checkpoint_mutex); + write_lock(&journal->j_state_lock); + J_ASSERT(!journal->j_running_transaction); +@@ -2021,7 +2027,7 @@ int jbd2_journal_wipe(journal_t *journal + if (write) { + /* Lock to make assertions happy... */ + mutex_lock(&journal->j_checkpoint_mutex); +- jbd2_mark_journal_empty(journal); ++ jbd2_mark_journal_empty(journal, WRITE_FUA); + mutex_unlock(&journal->j_checkpoint_mutex); + } + +_ + diff --git a/series b/series index 039b3fae..6586829f 100644 --- a/series +++ b/series @@ -36,6 +36,8 @@ return-hole-from-ext4_map_blocks cleanup-handling-of-bh-b_state-in-DAX-mmap more-efficient-SEEK_DATA-implementation +fix-jbd2_journal_destroy-for-umount-path + ########################################## # unstable patches #################################################### diff --git a/timestamps b/timestamps index 4139957b..3b0e98f5 100755 --- a/timestamps +++ b/timestamps @@ -52,12 +52,13 @@ touch -d @1457496490 rename-and-split-get-blocks-function touch -d @1457498146 move-trans-handling-and-completion-deferal-out-of-_ext4_get_block touch -d @1457498206 simplify-io_end-handling-for-AIO-DIO touch -d @1457498361 remove-i_ioend_count -touch -d @1457498421 stable-boundary touch -d @1457580415 fix-setting-of-referenced-bit-in-ext4_es_lookup_extent touch -d @1457581617 factor-out-determining-of-hole-size touch -d @1457582040 return-hole-from-ext4_map_blocks touch -d @1457582607 cleanup-handling-of-bh-b_state-in-DAX-mmap touch -d @1457583073 more-efficient-SEEK_DATA-implementation -touch -d @1457583485 status -touch -d @1457585148 series -touch -d @1457585152 timestamps +touch -d @1457583133 stable-boundary +touch -d @1457585245 fix-jbd2_journal_destroy-for-umount-path +touch -d @1457585257 series +touch -d @1457585260 status +touch -d @1457585351 timestamps -- 2.11.4.GIT