add patch remove-redundant-condition-check
[ext4-patch-queue.git] / avoid-long-hold-times-of-j_state_lock-while-committing
blob89718724e9305dee2a0fce6c7cebd69aa3764d11
1 jbd2: avoid long hold times of j_state_lock while committing a transaction
3 From: Jan Kara <jack@suse.cz>
5 We can hold j_state_lock for writing at the beginning of
6 jbd2_journal_commit_transaction() for a rather long time (reportedly for
7 30 ms) due cleaning revoke bits of all revoked buffers under it. The
8 handling of revoke tables as well as cleaning of t_reserved_list, and
9 checkpoint lists does not need j_state_lock for anything. It is only
10 needed to prevent new handles from joining the transaction. Generally
11 T_LOCKED transaction state prevents new handles from joining the
12 transaction - except for reserved handles which have to allowed to join
13 while we wait for other handles to complete.
15 To prevent reserved handles from joining the transaction while cleaning
16 up lists, add new transaction state T_SWITCH and watch for it when
17 starting reserved handles. With this we can just drop the lock for
18 operations that don't need it.
20 Reported-and-tested-by: Adrian Hunter <adrian.hunter@intel.com>
21 Suggested-by: "Theodore Y. Ts'o" <tytso@mit.edu>
22 Signed-off-by: Jan Kara <jack@suse.cz>
23 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
24 ---
25  fs/jbd2/commit.c      |  3 +++
26  fs/jbd2/transaction.c | 43 ++++++++++++++++++++++++++++++++++++++-----
27  include/linux/jbd2.h  |  1 +
28  3 files changed, 42 insertions(+), 5 deletions(-)
30 Changes since v1:
31 - fixed handling of reserved handles
32 - rebased on top of 4.20-rc1
33 - tested dioread-nolock xfstest run
35 diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
36 index 150cc030b4d7..2eb55c3361a8 100644
37 --- a/fs/jbd2/commit.c
38 +++ b/fs/jbd2/commit.c
39 @@ -439,6 +439,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
40                 finish_wait(&journal->j_wait_updates, &wait);
41         }
42         spin_unlock(&commit_transaction->t_handle_lock);
43 +       commit_transaction->t_state = T_SWITCH;
44 +       write_unlock(&journal->j_state_lock);
46         J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
47                         journal->j_max_transaction_buffers);
48 @@ -505,6 +507,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
49         atomic_sub(atomic_read(&journal->j_reserved_credits),
50                    &commit_transaction->t_outstanding_credits);
52 +       write_lock(&journal->j_state_lock);
53         trace_jbd2_commit_flushing(journal, commit_transaction);
54         stats.run.rs_flushing = jiffies;
55         stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
56 diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
57 index c0b66a7a795b..116d8251fbff 100644
58 --- a/fs/jbd2/transaction.c
59 +++ b/fs/jbd2/transaction.c
60 @@ -138,9 +138,9 @@ static inline void update_t_max_wait(transaction_t *transaction,
61  }
63  /*
64 - * Wait until running transaction passes T_LOCKED state. Also starts the commit
65 - * if needed. The function expects running transaction to exist and releases
66 - * j_state_lock.
67 + * Wait until running transaction passes to T_FLUSH state and new transaction
68 + * can thus be started. Also starts the commit if needed. The function expects
69 + * running transaction to exist and releases j_state_lock.
70   */
71  static void wait_transaction_locked(journal_t *journal)
72         __releases(journal->j_state_lock)
73 @@ -160,6 +160,32 @@ static void wait_transaction_locked(journal_t *journal)
74         finish_wait(&journal->j_wait_transaction_locked, &wait);
75  }
77 +/*
78 + * Wait until running transaction transitions from T_SWITCH to T_FLUSH
79 + * state and new transaction can thus be started. The function releases
80 + * j_state_lock.
81 + */
82 +static void wait_transaction_switching(journal_t *journal)
83 +       __releases(journal->j_state_lock)
85 +       DEFINE_WAIT(wait);
87 +       if (WARN_ON(!journal->j_running_transaction ||
88 +                   journal->j_running_transaction->t_state != T_SWITCH))
89 +               return;
90 +       prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
91 +                       TASK_UNINTERRUPTIBLE);
92 +       read_unlock(&journal->j_state_lock);
93 +       /*
94 +        * We don't call jbd2_might_wait_for_commit() here as there's no
95 +        * waiting for outstanding handles happening anymore in T_SWITCH state
96 +        * and handling of reserved handles actually relies on that for
97 +        * correctness.
98 +        */
99 +       schedule();
100 +       finish_wait(&journal->j_wait_transaction_locked, &wait);
103  static void sub_reserved_credits(journal_t *journal, int blocks)
105         atomic_sub(blocks, &journal->j_reserved_credits);
106 @@ -183,7 +209,8 @@ static int add_transaction_credits(journal_t *journal, int blocks,
107          * If the current transaction is locked down for commit, wait
108          * for the lock to be released.
109          */
110 -       if (t->t_state == T_LOCKED) {
111 +       if (t->t_state != T_RUNNING) {
112 +               WARN_ON_ONCE(t->t_state >= T_FLUSH);
113                 wait_transaction_locked(journal);
114                 return 1;
115         }
116 @@ -360,8 +387,14 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
117                 /*
118                  * We have handle reserved so we are allowed to join T_LOCKED
119                  * transaction and we don't have to check for transaction size
120 -                * and journal space.
121 +                * and journal space. But we still have to wait while running
122 +                * transaction is being switched to a committing one as it
123 +                * won't wait for any handles anymore.
124                  */
125 +               if (transaction->t_state == T_SWITCH) {
126 +                       wait_transaction_switching(journal);
127 +                       goto repeat;
128 +               }
129                 sub_reserved_credits(journal, blocks);
130                 handle->h_reserved = 0;
131         }
132 diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
133 index b708e5169d1d..118d00a64184 100644
134 --- a/include/linux/jbd2.h
135 +++ b/include/linux/jbd2.h
136 @@ -575,6 +575,7 @@ struct transaction_s
137         enum {
138                 T_RUNNING,
139                 T_LOCKED,
140 +               T_SWITCH,
141                 T_FLUSH,
142                 T_COMMIT,
143                 T_COMMIT_DFLUSH,
144 -- 
145 2.16.4