add patch unlock-unused-pages-more-timely-when-doing-writeback
[ext4-patch-queue.git] / fix-deadlock-while-checkpoint-thread-waits-commit-thread-to-finish
blob790469e531473eebe6786db83048e2a2a5793f5c
1 jbd2: fix deadlock while checkpoint thread waits commit thread to finish
3 From: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
5 This issue was found when I tried to put checkpoint work in a separate thread,
6 the deadlock below happened:
7          Thread1                                |   Thread2
8 __jbd2_log_wait_for_space                       |
9 jbd2_log_do_checkpoint (hold j_checkpoint_mutex)|
10   if (jh->b_transaction != NULL)                |
11     ...                                         |
12     jbd2_log_start_commit(journal, tid);        |jbd2_update_log_tail
13                                                 |  will lock j_checkpoint_mutex,
14                                                 |  but will be blocked here.
15                                                 |
16     jbd2_log_wait_commit(journal, tid);         |
17     wait_event(journal->j_wait_done_commit,     |
18      !tid_gt(tid, journal->j_commit_sequence)); |
19      ...                                        |wake_up(j_wait_done_commit)
20   }                                             |
22 then deadlock occurs, Thread1 will never be waken up.
24 To fix this issue, drop j_checkpoint_mutex in jbd2_log_do_checkpoint()
25 when we are going to wait for transaction commit.
27 Reviewed-by: Jan Kara <jack@suse.cz>
28 Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
29 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
30 ---
31  fs/jbd2/checkpoint.c | 17 +++++++++++++++--
32  fs/jbd2/journal.c    |  2 +-
33  2 files changed, 16 insertions(+), 3 deletions(-)
35 diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
36 index 26f8d7e46462..02e0b79753e7 100644
37 --- a/fs/jbd2/checkpoint.c
38 +++ b/fs/jbd2/checkpoint.c
39 @@ -113,7 +113,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
40         nblocks = jbd2_space_needed(journal);
41         while (jbd2_log_space_left(journal) < nblocks) {
42                 write_unlock(&journal->j_state_lock);
43 -               mutex_lock(&journal->j_checkpoint_mutex);
44 +               mutex_lock_io(&journal->j_checkpoint_mutex);
46                 /*
47                  * Test again, another process may have checkpointed while we
48 @@ -276,9 +276,22 @@ int jbd2_log_do_checkpoint(journal_t *journal)
49                 "JBD2: %s: Waiting for Godot: block %llu\n",
50                 journal->j_devname, (unsigned long long) bh->b_blocknr);
52 +                       if (batch_count)
53 +                               __flush_batch(journal, &batch_count);
54                         jbd2_log_start_commit(journal, tid);
55 +                       /*
56 +                        * jbd2_journal_commit_transaction() may want
57 +                        * to take the checkpoint_mutex if JBD2_FLUSHED
58 +                        * is set, jbd2_update_log_tail() called by
59 +                        * jbd2_journal_commit_transaction() may also take
60 +                        * checkpoint_mutex.  So we need to temporarily
61 +                        * drop it.
62 +                        */
63 +                       mutex_unlock(&journal->j_checkpoint_mutex);
64                         jbd2_log_wait_commit(journal, tid);
65 -                       goto retry;
66 +                       mutex_lock_io(&journal->j_checkpoint_mutex);
67 +                       spin_lock(&journal->j_list_lock);
68 +                       goto restart;
69                 }
70                 if (!buffer_dirty(bh)) {
71                         if (unlikely(buffer_write_io_error(bh)) && !result)
72 diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
73 index 8ef6b6daaa7a..88d8f22d2cba 100644
74 --- a/fs/jbd2/journal.c
75 +++ b/fs/jbd2/journal.c
76 @@ -2067,7 +2067,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
77         err = jbd2_journal_skip_recovery(journal);
78         if (write) {
79                 /* Lock to make assertions happy... */
80 -               mutex_lock(&journal->j_checkpoint_mutex);
81 +               mutex_lock_io(&journal->j_checkpoint_mutex);
82                 jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
83                 mutex_unlock(&journal->j_checkpoint_mutex);
84         }
85 -- 
86 2.17.2