add patch fix-jbd2_journal_destroy-for-umount-path
[ext4-patch-queue.git] / fix-jbd2_journal_destroy-for-umount-path
blob11ccf987b71fc2266f88487385e7f9d8e103079d
1 jbd2: fix jbd2_journal_destory() for umount path
3 From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
5 On umount path, jbd2_journal_destroy() writes latest transaction ID
6 (->j_tail_sequence) to be used at next mount.
8 The bug is that ->j_tail_sequence is not holding latest transaction ID
9 in some cases. So, at next mount, there is chance to conflict with
10 remaining (not overwritten yet) transactions.
12         mount (id=10)
13         write transaction (id=11)
14         write transaction (id=12)
15         umount (id=10) <= the bug doesn't write latest ID
17         mount (id=10)
18         write transaction (id=11)
19         crash
21         recovery
22         transaction (id=11)
23         transaction (id=12) <= valid transaction ID, but old commit
24                                must not replay
26 Like above, this bug become the cause of recovery failure, or FS
27 corruption.
29 So why ->j_tail_sequence doesn't point latest ID?
31 Because if checkpoint transactions was reclaimed by memory pressure
32 (i.e. bdev_try_to_free_page()), then ->j_tail_sequence is not updated.
33 (And another case is, __jbd2_journal_clean_checkpoint_list() is called
34 with empty transaction.)
36 So in above cases, ->j_tail_sequence is not pointing latest
37 transaction ID at umount path. Plus, REQ_FLUSH for checkpoint is not
38 done too.
40 So, to fix this problem with minimum changes, this patch updates
41 ->j_tail_sequence, and issue REQ_FLUSH.  (With more complex changes,
42 some optimizations would be possible to avoid unnecessary REQ_FLUSH
43 for example though.)
45 BTW,
47         journal->j_tail_sequence =
48                 ++journal->j_transaction_sequence;
50 Increment of ->j_transaction_sequence seems to be unnecessary, but
51 ext3 does this.
53 Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
54 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
55 ---
57  fs/jbd2/journal.c |   16 +++++++++++-----
58  1 file changed, 11 insertions(+), 5 deletions(-)
60 diff -puN fs/jbd2/journal.c~ext4-umount-fix fs/jbd2/journal.c
61 --- linux/fs/jbd2/journal.c~ext4-umount-fix     2016-02-25 03:26:32.710407670 +0900
62 +++ linux-hirofumi/fs/jbd2/journal.c    2016-02-25 03:26:32.711407673 +0900
63 @@ -1412,7 +1412,7 @@ out:
64   * Update a journal's dynamic superblock fields to show that journal is empty.
65   * Write updated superblock to disk waiting for IO to complete.
66   */
67 -static void jbd2_mark_journal_empty(journal_t *journal)
68 +static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
69  {
70         journal_superblock_t *sb = journal->j_superblock;
72 @@ -1430,7 +1430,7 @@ static void jbd2_mark_journal_empty(jour
73         sb->s_start    = cpu_to_be32(0);
74         read_unlock(&journal->j_state_lock);
76 -       jbd2_write_superblock(journal, WRITE_FUA);
77 +       jbd2_write_superblock(journal, write_op);
79         /* Log is no longer empty */
80         write_lock(&journal->j_state_lock);
81 @@ -1716,7 +1716,13 @@ int jbd2_journal_destroy(journal_t *jour
82         if (journal->j_sb_buffer) {
83                 if (!is_journal_aborted(journal)) {
84                         mutex_lock(&journal->j_checkpoint_mutex);
85 -                       jbd2_mark_journal_empty(journal);
87 +                       write_lock(&journal->j_state_lock);
88 +                       journal->j_tail_sequence =
89 +                               ++journal->j_transaction_sequence;
90 +                       write_unlock(&journal->j_state_lock);
92 +                       jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA);
93                         mutex_unlock(&journal->j_checkpoint_mutex);
94                 } else
95                         err = -EIO;
96 @@ -1975,7 +1981,7 @@ int jbd2_journal_flush(journal_t *journa
97          * the magic code for a fully-recovered superblock.  Any future
98          * commits of data to the journal will restore the current
99          * s_start value. */
100 -       jbd2_mark_journal_empty(journal);
101 +       jbd2_mark_journal_empty(journal, WRITE_FUA);
102         mutex_unlock(&journal->j_checkpoint_mutex);
103         write_lock(&journal->j_state_lock);
104         J_ASSERT(!journal->j_running_transaction);
105 @@ -2021,7 +2027,7 @@ int jbd2_journal_wipe(journal_t *journal
106         if (write) {
107                 /* Lock to make assertions happy... */
108                 mutex_lock(&journal->j_checkpoint_mutex);
109 -               jbd2_mark_journal_empty(journal);
110 +               jbd2_mark_journal_empty(journal, WRITE_FUA);
111                 mutex_unlock(&journal->j_checkpoint_mutex);
112         }