update and add new version of speedup-jbd2_journal_dirty_metadata
[ext4-patch-queue.git] / speedup-jbd2_journal_get_write_undo_access
blobd5d9195a4625ca106e28ee4239664a058d6a0628
1 jbd2: speedup jbd2_journal_get_[write|undo]_access()
3 From: Jan Kara <jack@suse.cz>
5 jbd2_journal_get_write_access() and jbd2_journal_get_create_access() are
6 frequently called for buffers that are already part of the running
7 transaction - most frequently it is the case for bitmaps, inode table
8 blocks, and superblock. Since in such cases we have nothing to do, it is
9 unfortunate we still grab reference to journal head, lock the bh, lock
10 bh_state only to find out there's nothing to do.
12 Improving this is a bit subtle though since until we find out journal
13 head is attached to the running transaction, it can disappear from under
14 us because checkpointing / commit decided it's no longer needed. We deal
15 with this by protecting journal_head slab with RCU. We still have to be
16 careful about journal head being freed & reallocated within slab and
17 about exposing journal head in consistent state (in particular
18 b_modified and b_frozen_data must be in correct state before we allow
19 user to touch the buffer).
21 Signed-off-by: Jan Kara <jack@suse.cz>
22 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
23 ---
24  fs/jbd2/journal.c     |  2 +-
25  fs/jbd2/transaction.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++---
26  2 files changed, 73 insertions(+), 5 deletions(-)
28 diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
29 index b96bd8076b70..f29872ed4097 100644
30 --- a/fs/jbd2/journal.c
31 +++ b/fs/jbd2/journal.c
32 @@ -2330,7 +2330,7 @@ static int jbd2_journal_init_journal_head_cache(void)
33         jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
34                                 sizeof(struct journal_head),
35                                 0,              /* offset */
36 -                               SLAB_TEMPORARY, /* flags */
37 +                               SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU,
38                                 NULL);          /* ctor */
39         retval = 0;
40         if (!jbd2_journal_head_cache) {
41 diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
42 index 5207825d1038..a91f639af6c3 100644
43 --- a/fs/jbd2/transaction.c
44 +++ b/fs/jbd2/transaction.c
45 @@ -901,6 +901,12 @@ repeat:
46                 JBUFFER_TRACE(jh, "no transaction");
47                 J_ASSERT_JH(jh, !jh->b_next_transaction);
48                 JBUFFER_TRACE(jh, "file as BJ_Reserved");
49 +               /*
50 +                * Make sure all stores to jh (b_modified, b_frozen_data) are
51 +                * visible before attaching it to the running transaction.
52 +                * Paired with barrier in jbd2_write_access_granted()
53 +                */
54 +               smp_wmb();
55                 spin_lock(&journal->j_list_lock);
56                 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
57                 spin_unlock(&journal->j_list_lock);
58 @@ -913,8 +919,7 @@ repeat:
59         if (jh->b_frozen_data) {
60                 JBUFFER_TRACE(jh, "has frozen data");
61                 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
62 -               jh->b_next_transaction = transaction;
63 -               goto done;
64 +               goto attach_next;
65         }
67         JBUFFER_TRACE(jh, "owned by older transaction");
68 @@ -968,6 +973,13 @@ repeat:
69                 frozen_buffer = NULL;
70                 jbd2_freeze_jh_data(jh);
71         }
72 +attach_next:
73 +       /*
74 +        * Make sure all stores to jh (b_modified, b_frozen_data) are visible
75 +        * before attaching it to the running transaction. Paired with barrier
76 +        * in jbd2_write_access_granted()
77 +        */
78 +       smp_wmb();
79         jh->b_next_transaction = transaction;
81  done:
82 @@ -987,6 +999,55 @@ out:
83         return error;
84  }
86 +/* Fast check whether buffer is already attached to the required transaction */
87 +static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh)
89 +       struct journal_head *jh;
90 +       bool ret = false;
92 +       /* Dirty buffers require special handling... */
93 +       if (buffer_dirty(bh))
94 +               return false;
96 +       /*
97 +        * RCU protects us from dereferencing freed pages. So the checks we do
98 +        * are guaranteed not to oops. However the jh slab object can get freed
99 +        * & reallocated while we work with it. So we have to be careful. When
100 +        * we see jh attached to the running transaction, we know it must stay
101 +        * so until the transaction is committed. Thus jh won't be freed and
102 +        * will be attached to the same bh while we run.  However it can
103 +        * happen jh gets freed, reallocated, and attached to the transaction
104 +        * just after we get pointer to it from bh. So we have to be careful
105 +        * and recheck jh still belongs to our bh before we return success.
106 +        */
107 +       rcu_read_lock();
108 +       if (!buffer_jbd(bh))
109 +               goto out;
110 +       /* This should be bh2jh() but that doesn't work with inline functions */
111 +       jh = READ_ONCE(bh->b_private);
112 +       if (!jh)
113 +               goto out;
114 +       if (jh->b_transaction != handle->h_transaction &&
115 +           jh->b_next_transaction != handle->h_transaction)
116 +               goto out;
117 +       /*
118 +        * There are two reasons for the barrier here:
119 +        * 1) Make sure to fetch b_bh after we did previous checks so that we
120 +        * detect when jh went through free, realloc, attach to transaction
121 +        * while we were checking. Paired with implicit barrier in that path.
122 +        * 2) So that access to bh done after jbd2_write_access_granted()
123 +        * doesn't get reordered and see inconsistent state of concurrent
124 +        * do_get_write_access().
125 +        */
126 +       smp_mb();
127 +       if (unlikely(jh->b_bh != bh))
128 +               goto out;
129 +       ret = true;
130 +out:
131 +       rcu_read_unlock();
132 +       return ret;
135  /**
136   * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
137   * @handle: transaction to add buffer modifications to
138 @@ -1000,9 +1061,13 @@ out:
140  int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
142 -       struct journal_head *jh = jbd2_journal_add_journal_head(bh);
143 +       struct journal_head *jh;
144         int rc;
146 +       if (jbd2_write_access_granted(handle, bh))
147 +               return 0;
149 +       jh = jbd2_journal_add_journal_head(bh);
150         /* We do not want to get caught playing with fields which the
151          * log thread also manipulates.  Make sure that the buffer
152          * completes any outstanding IO before proceeding. */
153 @@ -1133,11 +1198,14 @@ out:
154  int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
156         int err;
157 -       struct journal_head *jh = jbd2_journal_add_journal_head(bh);
158 +       struct journal_head *jh;
159         char *committed_data = NULL;
161         JBUFFER_TRACE(jh, "entry");
162 +       if (jbd2_write_access_granted(handle, bh))
163 +               return 0;
165 +       jh = jbd2_journal_add_journal_head(bh);
166         /*
167          * Do this first --- it can drop the journal lock, so we want to
168          * make sure that obtaining the committed_data is done
169 -- 
170 2.1.4