1 ext4: akpm's locking hack to fix locking delays
3 This is a port of the following patch from Andrew Morton to ext4:
5 http://lkml.org/lkml/2008/10/3/22
7 This fixes a major contention problem in do_get_write_access() when a
8 buffer is modified in both the current and committing transaction.
10 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
11 Cc: akpm@linux-foundation.org
13 fs/ext4/ext4.h | 3 +++
14 fs/ext4/super.c | 13 ++++++++++++-
15 fs/jbd2/transaction.c | 12 ++++++++++--
16 include/linux/jbd2.h | 1 +
17 4 files changed, 26 insertions(+), 3 deletions(-)
19 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
20 index ef9bf04..5c64ba6 100644
23 @@ -1010,6 +1010,9 @@ struct ext4_inode_info {
25 #define EXT4_MOUNT2_DUMMY_ENCRYPTION 0x80000000 /* Use dummy encryption */
27 +#define EXT4_MOUNT2_AKPM_LOCK_HACK 0x80000000 /* akpm lock hack */
30 #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
32 #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \
33 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
34 index 54ac1e5..d179efc 100644
37 @@ -1149,7 +1149,8 @@ enum {
38 Opt_inode_readahead_blks, Opt_journal_ioprio,
39 Opt_dioread_nolock, Opt_dioread_lock,
40 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
41 - Opt_max_dir_size_kb, Opt_encrypt_key_sig, Opt_dummy_encryption
42 + Opt_max_dir_size_kb, Opt_encrypt_key_sig, Opt_dummy_encryption,
46 static const match_table_t tokens = {
47 @@ -1206,6 +1207,7 @@ static const match_table_t tokens = {
48 {Opt_nobarrier, "nobarrier"},
49 {Opt_i_version, "i_version"},
50 {Opt_stripe, "stripe=%u"},
51 + {Opt_akpm_lock_hack, "akpm_lock_hack"},
52 {Opt_delalloc, "delalloc"},
53 {Opt_nodelalloc, "nodelalloc"},
54 {Opt_removed, "mblk_io_submit"},
55 @@ -1466,6 +1468,9 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
57 sb->s_flags |= MS_I_VERSION;
59 + case Opt_akpm_lock_hack:
60 + set_opt2(sb, AKPM_LOCK_HACK);
64 for (m = ext4_mount_opts; m->token != Opt_err; m++)
65 @@ -1834,6 +1839,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
66 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
67 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
68 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
69 + if (test_opt2(sb, AKPM_LOCK_HACK))
70 + seq_puts(seq, ",akpm_lock_hack");
71 if (sb->s_flags & MS_I_VERSION)
72 SEQ_OPTS_PUTS("i_version");
73 if (nodefs || sbi->s_stripe)
74 @@ -4431,6 +4438,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
75 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
77 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
78 + if (test_opt2(sb, AKPM_LOCK_HACK))
79 + journal->j_flags |= JBD2_LOCK_HACK;
81 + journal->j_flags &= ~JBD2_LOCK_HACK;
82 write_unlock(&journal->j_state_lock);
85 diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
86 index 93ffee2..4ba2b76 100644
87 --- a/fs/jbd2/transaction.c
88 +++ b/fs/jbd2/transaction.c
89 @@ -784,6 +784,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
90 char *frozen_buffer = NULL;
92 unsigned long start_lock, time_lock;
95 WARN_ON(!transaction);
96 if (is_handle_aborted(handle))
97 @@ -799,7 +800,13 @@ repeat:
98 /* @@@ Need to check for errors here at some point. */
100 start_lock = jiffies;
102 + if (journal->j_flags & JBD2_LOCK_HACK) {
103 + if (trylock_buffer(bh))
104 + locked = 1; /* lolz */
109 jbd_lock_bh_state(bh);
111 /* If it takes too long to lock the buffer, trace it */
112 @@ -846,7 +853,8 @@ repeat:
113 set_buffer_jbddirty(bh);
121 if (is_handle_aborted(handle)) {
122 diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
123 index 704b9a5..907a108 100644
124 --- a/include/linux/jbd2.h
125 +++ b/include/linux/jbd2.h
126 @@ -1007,6 +1007,7 @@ struct journal_s
127 #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
128 * data write error in ordered
130 +#define JBD2_LOCK_HACK 0x080 /* akpm's locking hack */
133 * Function declarations for the journaling transaction and buffer