Add ext4-printk-throttling patch
[ext4-patch-queue.git] / jbd2-dio-kjournal-race-EIO.patch
blob49b71e514db4e77fad6593123204ba92590635c3
1 jbd2: Fix jbd2_commit_transaction()/jbd2_journal_try_to_drop_buffers() race
3 From: Mingming Cao <cmm@us.ibm.com>
5 There are a few cases direct IO could race with kjournal flushing
6 data buffers which could result direct IO return EIO error.
8 1) journal_submit_data_buffers() tries to get bh_state lock. If
9 try lock fails, it drops the j_list_lock and sleeps for
10 bh_state lock, while holding a reference on the buffer.
11 In the meanwhile, journal_try_to_free_buffers() can clean up the
12 journal head could call try_to_free_buffers(). try_to_free_buffers()
13 would fail due to the reference held by journal_submit_data_buffers()
14 - which in turn causes failues for DIO (invalidate_inode_pages2()).
16 2) When the buffer is on t_locked_list waiting for IO to finish,
17 we hold a reference and give up the cpu, if we can't get
18 bh_state lock. This causes try_to_free_buffers() to fail.
20 3) when journal_submit_data_buffers() saw the buffer is dirty but failed
21 to lock the buffer bh1, journal_submit_data_buffers() released the
22 j_list_lock and submit other buffers collected from previous check, with
23 the reference to bh1 still hold. During this time
24 journal_try_to_free_buffers() could clean up the journal head of bh1 and
25 remove it from the t_syncdata_list. Then try_to_free_buffers() would
26 fail because the reference held by journal_submit_data_buffers()
28 4) journal_submit_data_buffers() already removed the jh from the bh
29 (when found the buffers are already being synced), but still keep a
30 reference to the buffer head. journal_try_to_free_buffers() could be
31 called. In that case try_to_free_buffers() will be called since there is
32 no jh related to this buffer, and failed due to
33 journal_submit_data_buffers() hasn't finish the cleanup business yet.
35 Fix for first three races is to drop the reference on the buffer head
36 when release the j_list_lock,
37 give up the cpu and re-try the whole operation.
39 This patch also fixes the race that data buffers has been
40 flushed to disk and journal head is cleard
41 by journal_submit_data_buffers() but did not get a chance to release
42 buffer head reference before the journal_try_to_free_buffers() kicked in.
45 Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
46 Signed-off-by: Mingming Cao <mcao@us.ibm.com>
47 ---
48 fs/jbd2/commit.c | 23 ++++++++++++++++-------
49 fs/jbd2/transaction.c | 13 +++++++++++++
50 2 files changed, 29 insertions(+), 7 deletions(-)
52 Index: linux-2.6.26-rc2/fs/jbd2/commit.c
53 ===================================================================
54 --- linux-2.6.26-rc2.orig/fs/jbd2/commit.c 2008-05-16 11:03:25.000000000 -0700
55 +++ linux-2.6.26-rc2/fs/jbd2/commit.c 2008-05-16 11:03:27.000000000 -0700
56 @@ -81,12 +81,16 @@ nope:
59 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
60 - * held. For ranking reasons we must trylock. If we lose, schedule away and
61 + * held. For ranking reasons we must trylock. If we lose, unlock the buffer
62 + * if needed, drop the reference on the buffer, schedule away and
63 * return 0. j_list_lock is dropped in this case.
65 -static int inverted_lock(journal_t *journal, struct buffer_head *bh)
66 +static int inverted_lock(journal_t *journal, struct buffer_head *bh, int locked)
68 if (!jbd_trylock_bh_state(bh)) {
69 + if (locked)
70 + unlock_buffer(bh);
71 + put_bh(bh);
72 spin_unlock(&journal->j_list_lock);
73 schedule();
74 return 0;
75 @@ -220,7 +224,7 @@ static int journal_wait_on_locked_list(j
76 ret = -EIO;
77 spin_lock(&journal->j_list_lock);
79 - if (!inverted_lock(journal, bh)) {
80 + if (!inverted_lock(journal, bh, 0)) {
81 put_bh(bh);
82 spin_lock(&journal->j_list_lock);
83 continue;
84 @@ -290,19 +294,24 @@ write_out_data:
85 if (buffer_dirty(bh)) {
86 if (test_set_buffer_locked(bh)) {
87 BUFFER_TRACE(bh, "needs blocking lock");
88 + put_bh(bh);
89 spin_unlock(&journal->j_list_lock);
90 /* Write out all data to prevent deadlocks */
91 journal_do_submit_data(wbuf, bufs);
92 bufs = 0;
93 - lock_buffer(bh);
94 spin_lock(&journal->j_list_lock);
95 + continue;
97 locked = 1;
99 - /* We have to get bh_state lock. Again out of order, sigh. */
100 - if (!inverted_lock(journal, bh)) {
101 - jbd_lock_bh_state(bh);
102 + /*
103 + * We have to get bh_state lock. If the try lock fails,
104 + * release the ref on the buffer, give up cpu and retry the
105 + * whole operation.
106 + */
107 + if (!inverted_lock(journal, bh, locked)) {
108 spin_lock(&journal->j_list_lock);
109 + continue;
111 /* Someone already cleaned up the buffer? */
112 if (!buffer_jbd(bh)
113 Index: linux-2.6.26-rc2/fs/jbd2/transaction.c
114 ===================================================================
115 --- linux-2.6.26-rc2.orig/fs/jbd2/transaction.c 2008-05-16 11:03:20.000000000 -0700
116 +++ linux-2.6.26-rc2/fs/jbd2/transaction.c 2008-05-16 11:03:27.000000000 -0700
117 @@ -1722,6 +1722,19 @@ int jbd2_journal_try_to_free_buffers(jou
118 goto busy;
119 } while ((bh = bh->b_this_page) != head);
120 ret = try_to_free_buffers(page);
121 + if (ret == 0) {
122 + /*
123 + * it is possible that journal_submit_data_buffers()
124 + * still holds the bh ref even if clears the jh
125 + * after journal_remove_journal_head,
126 + * which leads to try_to_free_buffers() failed
127 + * let's wait for journal_submit_data_buffers()
128 + * to finishing remove the bh from the sync_data_list
129 + */
130 + spin_lock(&journal->j_list_lock);
131 + ret = try_to_free_buffers(page);
132 + spin_unlock(&journal->j_list_lock);
134 busy:
135 return ret;