1 ext4: Reduce contention on s_orphan_lock
3 From: Jan Kara <jack@suse.cz>
5 Shuffle code around in ext4_orphan_add() and ext4_orphan_del() so that
6 we avoid taking global s_orphan_lock in some cases and hold it for
7 shorter time in other cases.
9 Signed-off-by: Jan Kara <jack@suse.cz>
10 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
12 fs/ext4/namei.c | 109 +++++++++++++++++++++++++++++++++-----------------------
13 1 file changed, 65 insertions(+), 44 deletions(-)
15 diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
16 index 5fcaa85b6dc5..0486fbafb808 100644
19 @@ -2539,13 +2539,17 @@ static int empty_dir(struct inode *inode)
23 -/* ext4_orphan_add() links an unlinked or truncated inode into a list of
25 + * ext4_orphan_add() links an unlinked or truncated inode into a list of
26 * such inodes, starting at the superblock, in case we crash before the
27 * file is closed/deleted, or in case the inode truncate spans multiple
28 * transactions and the last transaction is not recovered after a crash.
30 * At filesystem recovery time, we walk this list deleting unlinked
31 * inodes and truncating linked inodes in ext4_orphan_cleanup().
33 + * Orphan list manipulation functions must be called under i_mutex unless
34 + * we are just creating the inode or deleting it.
36 int ext4_orphan_add(handle_t *handle, struct inode *inode)
38 @@ -2553,13 +2557,19 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
39 struct ext4_sb_info *sbi = EXT4_SB(sb);
40 struct ext4_iloc iloc;
47 - mutex_lock(&sbi->s_orphan_lock);
48 + WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
49 + !mutex_is_locked(&inode->i_mutex));
51 + * Exit early if inode already is on orphan list. This is a big speedup
52 + * since we don't have to contend on the global s_orphan_lock.
54 if (!list_empty(&EXT4_I(inode)->i_orphan))
59 * Orphan handling is only valid for files with data blocks
60 @@ -2573,44 +2583,47 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
61 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
62 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
67 err = ext4_reserve_inode_write(handle, inode, &iloc);
72 + mutex_lock(&sbi->s_orphan_lock);
74 * Due to previous errors inode may be already a part of on-disk
75 * orphan list. If so skip on-disk list modification.
77 - if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <=
78 - (le32_to_cpu(sbi->s_es->s_inodes_count)))
81 - /* Insert this inode at the head of the on-disk orphan list... */
82 - NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
83 - sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
84 - err = ext4_handle_dirty_super(handle, sb);
85 - rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
89 - /* Only add to the head of the in-memory list if all the
90 - * previous operations succeeded. If the orphan_add is going to
91 - * fail (possibly taking the journal offline), we can't risk
92 - * leaving the inode on the orphan list: stray orphan-list
93 - * entries can cause panics at unmount time.
95 - * This is safe: on error we're going to ignore the orphan list
96 - * anyway on the next recovery. */
99 - list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
100 + if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) >
101 + (le32_to_cpu(sbi->s_es->s_inodes_count))) {
102 + /* Insert this inode at the head of the on-disk orphan list */
103 + NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
104 + sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
107 + list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
108 + mutex_unlock(&sbi->s_orphan_lock);
111 + err = ext4_handle_dirty_super(handle, sb);
112 + rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
117 + * We have to remove inode from in-memory list if
118 + * addition to on disk orphan list failed. Stray orphan
119 + * list entries can cause panics at unmount time.
121 + mutex_lock(&sbi->s_orphan_lock);
122 + list_del(&EXT4_I(inode)->i_orphan);
123 + mutex_unlock(&sbi->s_orphan_lock);
126 jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
127 jbd_debug(4, "orphan inode %lu will point to %d\n",
128 inode->i_ino, NEXT_ORPHAN(inode));
130 - mutex_unlock(&sbi->s_orphan_lock);
132 ext4_std_error(sb, err);
135 @@ -2631,13 +2644,18 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
136 if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
139 - mutex_lock(&sbi->s_orphan_lock);
140 + WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
141 + !mutex_is_locked(&inode->i_mutex));
142 + /* Do this quick check before taking global s_orphan_lock. */
143 if (list_empty(&ei->i_orphan))
147 - ino_next = NEXT_ORPHAN(inode);
148 - prev = ei->i_orphan.prev;
150 + /* Grab inode buffer early before taking global s_orphan_lock */
151 + err = ext4_reserve_inode_write(handle, inode, &iloc);
154 + mutex_lock(&sbi->s_orphan_lock);
155 jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
157 list_del_init(&ei->i_orphan);
158 @@ -2646,20 +2664,23 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
159 * transaction handle with which to update the orphan list on
160 * disk, but we still need to remove the inode from the linked
165 - err = ext4_reserve_inode_write(handle, inode, &iloc);
167 + if (!handle || err) {
168 + mutex_unlock(&sbi->s_orphan_lock);
172 + ino_next = NEXT_ORPHAN(inode);
173 + prev = ei->i_orphan.prev;
174 if (prev == &sbi->s_orphan) {
175 jbd_debug(4, "superblock will point to %u\n", ino_next);
176 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
177 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
180 + mutex_unlock(&sbi->s_orphan_lock);
183 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
184 + mutex_unlock(&sbi->s_orphan_lock);
185 err = ext4_handle_dirty_super(handle, inode->i_sb);
187 struct ext4_iloc iloc2;
188 @@ -2669,20 +2690,20 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
189 jbd_debug(4, "orphan inode %lu will point to %u\n",
190 i_prev->i_ino, ino_next);
191 err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
194 + mutex_unlock(&sbi->s_orphan_lock);
197 NEXT_ORPHAN(i_prev) = ino_next;
198 + mutex_unlock(&sbi->s_orphan_lock);
199 err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
203 NEXT_ORPHAN(inode) = 0;
204 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
207 ext4_std_error(inode->i_sb, err);
209 - mutex_unlock(&sbi->s_orphan_lock);