add patch drop-unneeded-BUFFER_TRACE-in-ext4_delete_inline_entry
[ext4-patch-queue.git] / fix-jbd2_journal_destroy-for-umount-path
blobf8b80d8de40e6a5d2f427b1fcb109cccb382abfb
1 jbd2: fix FS corruption possibility in jbd2_journal_destroy() on umount path
3 From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
5 On umount path, jbd2_journal_destroy() writes latest transaction ID
6 (->j_tail_sequence) to be used at next mount.
8 The bug is that ->j_tail_sequence is not holding latest transaction ID
9 in some cases. So, at next mount, there is chance to conflict with
10 remaining (not overwritten yet) transactions.
12         mount (id=10)
13         write transaction (id=11)
14         write transaction (id=12)
15         umount (id=10) <= the bug doesn't write latest ID
17         mount (id=10)
18         write transaction (id=11)
19         crash
21         mount
22         [recovery process]
23                 transaction (id=11)
24                 transaction (id=12) <= valid transaction ID, but old commit
25                                        must not replay
27 Like above, this bug become the cause of recovery failure, or FS
28 corruption.
30 So why ->j_tail_sequence doesn't point latest ID?
32 Because if checkpoint transactions was reclaimed by memory pressure
33 (i.e. bdev_try_to_free_page()), then ->j_tail_sequence is not updated.
34 (And another case is, __jbd2_journal_clean_checkpoint_list() is called
35 with empty transaction.)
37 So in above cases, ->j_tail_sequence is not pointing latest
38 transaction ID at umount path. Plus, REQ_FLUSH for checkpoint is not
39 done too.
41 So, to fix this problem with minimum changes, this patch updates
42 ->j_tail_sequence, and issue REQ_FLUSH.  (With more complex changes,
43 some optimizations would be possible to avoid unnecessary REQ_FLUSH
44 for example though.)
46 BTW,
48         journal->j_tail_sequence =
49                 ++journal->j_transaction_sequence;
51 Increment of ->j_transaction_sequence seems to be unnecessary, but
52 ext3 does this.
54 Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
55 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
56 Cc: stable@vger.kernel.org
57 ---
59  fs/jbd2/journal.c |   17 ++++++++++++-----
60  1 file changed, 12 insertions(+), 5 deletions(-)
62 diff -puN fs/jbd2/journal.c~ext4-umount-fix fs/jbd2/journal.c
63 --- linux/fs/jbd2/journal.c~ext4-umount-fix     2016-02-28 10:16:41.997093008 +0900
64 +++ linux-hirofumi/fs/jbd2/journal.c    2016-02-28 23:01:25.865647233 +0900
65 @@ -1408,11 +1408,12 @@ out:
66  /**
67   * jbd2_mark_journal_empty() - Mark on disk journal as empty.
68   * @journal: The journal to update.
69 + * @write_op: With which operation should we write the journal sb
70   *
71   * Update a journal's dynamic superblock fields to show that journal is empty.
72   * Write updated superblock to disk waiting for IO to complete.
73   */
74 -static void jbd2_mark_journal_empty(journal_t *journal)
75 +static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
76  {
77         journal_superblock_t *sb = journal->j_superblock;
79 @@ -1430,7 +1431,7 @@ static void jbd2_mark_journal_empty(jour
80         sb->s_start    = cpu_to_be32(0);
81         read_unlock(&journal->j_state_lock);
83 -       jbd2_write_superblock(journal, WRITE_FUA);
84 +       jbd2_write_superblock(journal, write_op);
86         /* Log is no longer empty */
87         write_lock(&journal->j_state_lock);
88 @@ -1716,7 +1717,13 @@ int jbd2_journal_destroy(journal_t *jour
89         if (journal->j_sb_buffer) {
90                 if (!is_journal_aborted(journal)) {
91                         mutex_lock(&journal->j_checkpoint_mutex);
92 -                       jbd2_mark_journal_empty(journal);
94 +                       write_lock(&journal->j_state_lock);
95 +                       journal->j_tail_sequence =
96 +                               ++journal->j_transaction_sequence;
97 +                       write_unlock(&journal->j_state_lock);
99 +                       jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA);
100                         mutex_unlock(&journal->j_checkpoint_mutex);
101                 } else
102                         err = -EIO;
103 @@ -1975,7 +1982,7 @@ int jbd2_journal_flush(journal_t *journa
104          * the magic code for a fully-recovered superblock.  Any future
105          * commits of data to the journal will restore the current
106          * s_start value. */
107 -       jbd2_mark_journal_empty(journal);
108 +       jbd2_mark_journal_empty(journal, WRITE_FUA);
109         mutex_unlock(&journal->j_checkpoint_mutex);
110         write_lock(&journal->j_state_lock);
111         J_ASSERT(!journal->j_running_transaction);
112 @@ -2021,7 +2028,7 @@ int jbd2_journal_wipe(journal_t *journal
113         if (write) {
114                 /* Lock to make assertions happy... */
115                 mutex_lock(&journal->j_checkpoint_mutex);
116 -               jbd2_mark_journal_empty(journal);
117 +               jbd2_mark_journal_empty(journal, WRITE_FUA);
118                 mutex_unlock(&journal->j_checkpoint_mutex);
119         }