1 jbd2: speedup jbd2_journal_get_[write|undo]_access()
3 From: Jan Kara <jack@suse.cz>
5 jbd2_journal_get_write_access() and jbd2_journal_get_create_access() are
6 frequently called for buffers that are already part of the running
7 transaction - most frequently it is the case for bitmaps, inode table
8 blocks, and superblock. Since in such cases we have nothing to do, it is
9 unfortunate we still grab reference to journal head, lock the bh, lock
10 bh_state only to find out there's nothing to do.
12 Improving this is a bit subtle though since until we find out journal
13 head is attached to the running transaction, it can disappear from under
14 us because checkpointing / commit decided it's no longer needed. We deal
15 with this by protecting journal_head slab with RCU. We still have to be
16 careful about journal head being freed & reallocated within slab and
17 about exposing journal head in consistent state (in particular
18 b_modified and b_frozen_data must be in correct state before we allow
19 user to touch the buffer).
21 Signed-off-by: Jan Kara <jack@suse.cz>
22 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
24 fs/jbd2/journal.c | 2 +-
25 fs/jbd2/transaction.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++---
26 2 files changed, 73 insertions(+), 5 deletions(-)
28 diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
29 index b96bd8076b70..f29872ed4097 100644
30 --- a/fs/jbd2/journal.c
31 +++ b/fs/jbd2/journal.c
32 @@ -2330,7 +2330,7 @@ static int jbd2_journal_init_journal_head_cache(void)
33 jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
34 sizeof(struct journal_head),
36 - SLAB_TEMPORARY, /* flags */
37 + SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU,
40 if (!jbd2_journal_head_cache) {
41 diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
42 index 5207825d1038..a91f639af6c3 100644
43 --- a/fs/jbd2/transaction.c
44 +++ b/fs/jbd2/transaction.c
45 @@ -901,6 +901,12 @@ repeat:
46 JBUFFER_TRACE(jh, "no transaction");
47 J_ASSERT_JH(jh, !jh->b_next_transaction);
48 JBUFFER_TRACE(jh, "file as BJ_Reserved");
50 + * Make sure all stores to jh (b_modified, b_frozen_data) are
51 + * visible before attaching it to the running transaction.
52 + * Paired with barrier in jbd2_write_access_granted()
55 spin_lock(&journal->j_list_lock);
56 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
57 spin_unlock(&journal->j_list_lock);
58 @@ -913,8 +919,7 @@ repeat:
59 if (jh->b_frozen_data) {
60 JBUFFER_TRACE(jh, "has frozen data");
61 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
62 - jh->b_next_transaction = transaction;
67 JBUFFER_TRACE(jh, "owned by older transaction");
68 @@ -968,6 +973,13 @@ repeat:
70 jbd2_freeze_jh_data(jh);
74 + * Make sure all stores to jh (b_modified, b_frozen_data) are visible
75 + * before attaching it to the running transaction. Paired with barrier
76 + * in jbd2_write_access_granted()
79 jh->b_next_transaction = transaction;
82 @@ -987,6 +999,55 @@ out:
86 +/* Fast check whether buffer is already attached to the required transaction */
87 +static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh)
89 + struct journal_head *jh;
92 + /* Dirty buffers require special handling... */
93 + if (buffer_dirty(bh))
97 + * RCU protects us from dereferencing freed pages. So the checks we do
98 + * are guaranteed not to oops. However the jh slab object can get freed
99 + * & reallocated while we work with it. So we have to be careful. When
100 + * we see jh attached to the running transaction, we know it must stay
101 + * so until the transaction is committed. Thus jh won't be freed and
102 + * will be attached to the same bh while we run. However it can
103 + * happen jh gets freed, reallocated, and attached to the transaction
104 + * just after we get pointer to it from bh. So we have to be careful
105 + * and recheck jh still belongs to our bh before we return success.
108 + if (!buffer_jbd(bh))
110 + /* This should be bh2jh() but that doesn't work with inline functions */
111 + jh = READ_ONCE(bh->b_private);
114 + if (jh->b_transaction != handle->h_transaction &&
115 + jh->b_next_transaction != handle->h_transaction)
118 + * There are two reasons for the barrier here:
119 + * 1) Make sure to fetch b_bh after we did previous checks so that we
120 + * detect when jh went through free, realloc, attach to transaction
121 + * while we were checking. Paired with implicit barrier in that path.
122 + * 2) So that access to bh done after jbd2_write_access_granted()
123 + * doesn't get reordered and see inconsistent state of concurrent
124 + * do_get_write_access().
127 + if (unlikely(jh->b_bh != bh))
136 * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
137 * @handle: transaction to add buffer modifications to
138 @@ -1000,9 +1061,13 @@ out:
140 int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
142 - struct journal_head *jh = jbd2_journal_add_journal_head(bh);
143 + struct journal_head *jh;
146 + if (jbd2_write_access_granted(handle, bh))
149 + jh = jbd2_journal_add_journal_head(bh);
150 /* We do not want to get caught playing with fields which the
151 * log thread also manipulates. Make sure that the buffer
152 * completes any outstanding IO before proceeding. */
153 @@ -1133,11 +1198,14 @@ out:
154 int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
157 - struct journal_head *jh = jbd2_journal_add_journal_head(bh);
158 + struct journal_head *jh;
159 char *committed_data = NULL;
161 JBUFFER_TRACE(jh, "entry");
162 + if (jbd2_write_access_granted(handle, bh))
165 + jh = jbd2_journal_add_journal_head(bh);
167 * Do this first --- it can drop the journal lock, so we want to
168 * make sure that obtaining the committed_data is done