1 ext4: fix deadlock between inline_data and ext4_expand_extra_isize_ea()
3 The xattr_sem deadlock problems fixed in commit 2e81a4eeedca: "ext4:
4 avoid deadlock when expanding inode size" didn't include the use of
5 xattr_sem in fs/ext4/inline.c. With the addition of project quota
6 which added a new extra inode field, this exposed deadlocks in the
7 inline_data code similar to the ones fixed by 2e81a4eeedca.
9 The deadlock can be reproduced via:
12 mke2fs -t ext4 -O inline_data -Fq -I 256 /dev/vdc 32768
13 mount -t ext4 -o debug_want_extra_isize=24 /dev/vdc /vdc
16 mount -t ext4 /dev/vdc /vdc
22 [ 11.160276] =============================================
23 [ 11.161960] [ INFO: possible recursive locking detected ]
24 [ 11.161960] 4.10.0-rc3-00015-g011b30a8a3cf #160 Tainted: G W
25 [ 11.161960] ---------------------------------------------
26 [ 11.161960] bash/2519 is trying to acquire lock:
27 [ 11.161960] (&ei->xattr_sem){++++..}, at: [<c1225a4b>] ext4_expand_extra_isize_ea+0x3d/0x4cd
29 [ 11.161960] but task is already holding lock:
30 [ 11.161960] (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152
32 [ 11.161960] other info that might help us debug this:
33 [ 11.161960] Possible unsafe locking scenario:
37 [ 11.161960] lock(&ei->xattr_sem);
38 [ 11.161960] lock(&ei->xattr_sem);
40 [ 11.161960] *** DEADLOCK ***
42 [ 11.161960] May be due to missing lock nesting notation
44 [ 11.161960] 4 locks held by bash/2519:
45 [ 11.161960] #0: (sb_writers#3){.+.+.+}, at: [<c11a2414>] mnt_want_write+0x1e/0x3e
46 [ 11.161960] #1: (&type->i_mutex_dir_key){++++++}, at: [<c119508b>] path_openat+0x338/0x67a
47 [ 11.161960] #2: (jbd2_handle){++++..}, at: [<c123314a>] start_this_handle+0x582/0x622
48 [ 11.161960] #3: (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152
50 [ 11.161960] stack backtrace:
51 [ 11.161960] CPU: 0 PID: 2519 Comm: bash Tainted: G W 4.10.0-rc3-00015-g011b30a8a3cf #160
52 [ 11.161960] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1 04/01/2014
53 [ 11.161960] Call Trace:
54 [ 11.161960] dump_stack+0x72/0xa3
55 [ 11.161960] __lock_acquire+0xb7c/0xcb9
56 [ 11.161960] ? kvm_clock_read+0x1f/0x29
57 [ 11.161960] ? __lock_is_held+0x36/0x66
58 [ 11.161960] ? __lock_is_held+0x36/0x66
59 [ 11.161960] lock_acquire+0x106/0x18a
60 [ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd
61 [ 11.161960] down_write+0x39/0x72
62 [ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd
63 [ 11.161960] ext4_expand_extra_isize_ea+0x3d/0x4cd
64 [ 11.161960] ? _raw_read_unlock+0x22/0x2c
65 [ 11.161960] ? jbd2_journal_extend+0x1e2/0x262
66 [ 11.161960] ? __ext4_journal_get_write_access+0x3d/0x60
67 [ 11.161960] ext4_mark_inode_dirty+0x17d/0x26d
68 [ 11.161960] ? ext4_add_dirent_to_inline.isra.12+0xa5/0xb2
69 [ 11.161960] ext4_add_dirent_to_inline.isra.12+0xa5/0xb2
70 [ 11.161960] ext4_try_add_inline_entry+0x69/0x152
71 [ 11.161960] ext4_add_entry+0xa3/0x848
72 [ 11.161960] ? __brelse+0x14/0x2f
73 [ 11.161960] ? _raw_spin_unlock_irqrestore+0x44/0x4f
74 [ 11.161960] ext4_add_nondir+0x17/0x5b
75 [ 11.161960] ext4_create+0xcf/0x133
76 [ 11.161960] ? ext4_mknod+0x12f/0x12f
77 [ 11.161960] lookup_open+0x39e/0x3fb
78 [ 11.161960] ? __wake_up+0x1a/0x40
79 [ 11.161960] ? lock_acquire+0x11e/0x18a
80 [ 11.161960] path_openat+0x35c/0x67a
81 [ 11.161960] ? sched_clock_cpu+0xd7/0xf2
82 [ 11.161960] do_filp_open+0x36/0x7c
83 [ 11.161960] ? _raw_spin_unlock+0x22/0x2c
84 [ 11.161960] ? __alloc_fd+0x169/0x173
85 [ 11.161960] do_sys_open+0x59/0xcc
86 [ 11.161960] SyS_open+0x1d/0x1f
87 [ 11.161960] do_int80_syscall_32+0x4f/0x61
88 [ 11.161960] entry_INT80_32+0x2f/0x2f
89 [ 11.161960] EIP: 0xb76ad469
90 [ 11.161960] EFLAGS: 00000286 CPU: 0
91 [ 11.161960] EAX: ffffffda EBX: 08168ac8 ECX: 00008241 EDX: 000001b6
92 [ 11.161960] ESI: b75e46bc EDI: b7755000 EBP: bfbdb108 ESP: bfbdafc0
93 [ 11.161960] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b
95 Cc: stable@vger.kernel.org # 3.10 (requires 2e81a4eeedca as a prereq)
96 Reported-by: George Spelvin <linux@sciencehorizons.net>
97 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
99 fs/ext4/inline.c | 66 ++++++++++++++++++++++++++++++------------------------------------
100 fs/ext4/xattr.c | 30 ++++++++++++------------------
101 fs/ext4/xattr.h | 32 ++++++++++++++++++++++++++++++++
102 3 files changed, 74 insertions(+), 54 deletions(-)
104 diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
105 index 437df6a1a841..99a5312ced52 100644
106 --- a/fs/ext4/inline.c
107 +++ b/fs/ext4/inline.c
108 @@ -381,7 +381,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
109 static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
113 + int ret, size, no_expand;
114 struct ext4_inode_info *ei = EXT4_I(inode);
116 if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
117 @@ -391,15 +391,14 @@ static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
121 - down_write(&EXT4_I(inode)->xattr_sem);
122 + ext4_write_lock_xattr(inode, &no_expand);
124 if (ei->i_inline_off)
125 ret = ext4_update_inline_data(handle, inode, len);
127 ret = ext4_create_inline_data(handle, inode, len);
129 - up_write(&EXT4_I(inode)->xattr_sem);
131 + ext4_write_unlock_xattr(inode, &no_expand);
135 @@ -533,7 +532,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
139 - int ret, needed_blocks;
140 + int ret, needed_blocks, no_expand;
141 handle_t *handle = NULL;
142 int retries = 0, sem_held = 0;
143 struct page *page = NULL;
144 @@ -573,7 +572,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
148 - down_write(&EXT4_I(inode)->xattr_sem);
149 + ext4_write_lock_xattr(inode, &no_expand);
151 /* If some one has already done this for us, just exit. */
152 if (!ext4_has_inline_data(inode)) {
153 @@ -610,7 +609,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
156 ext4_orphan_add(handle, inode);
157 - up_write(&EXT4_I(inode)->xattr_sem);
158 + ext4_write_unlock_xattr(inode, &no_expand);
160 ext4_journal_stop(handle);
162 @@ -636,7 +635,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
166 - up_write(&EXT4_I(inode)->xattr_sem);
167 + ext4_write_unlock_xattr(inode, &no_expand);
169 ext4_journal_stop(handle);
171 @@ -729,7 +728,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
172 int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
173 unsigned copied, struct page *page)
176 + int ret, no_expand;
178 struct ext4_iloc iloc;
180 @@ -747,7 +746,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
184 - down_write(&EXT4_I(inode)->xattr_sem);
185 + ext4_write_lock_xattr(inode, &no_expand);
186 BUG_ON(!ext4_has_inline_data(inode));
188 kaddr = kmap_atomic(page);
189 @@ -757,7 +756,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
190 /* clear page dirty so that writepages wouldn't work for us. */
191 ClearPageDirty(page);
193 - up_write(&EXT4_I(inode)->xattr_sem);
194 + ext4_write_unlock_xattr(inode, &no_expand);
198 @@ -768,7 +767,7 @@ ext4_journalled_write_inline_data(struct inode *inode,
203 + int ret, no_expand;
205 struct ext4_iloc iloc;
207 @@ -778,11 +777,11 @@ ext4_journalled_write_inline_data(struct inode *inode,
211 - down_write(&EXT4_I(inode)->xattr_sem);
212 + ext4_write_lock_xattr(inode, &no_expand);
213 kaddr = kmap_atomic(page);
214 ext4_write_inline_data(inode, &iloc, kaddr, 0, len);
215 kunmap_atomic(kaddr);
216 - up_write(&EXT4_I(inode)->xattr_sem);
217 + ext4_write_unlock_xattr(inode, &no_expand);
221 @@ -1259,7 +1258,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
222 int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
223 struct inode *dir, struct inode *inode)
225 - int ret, inline_size;
226 + int ret, inline_size, no_expand;
228 struct ext4_iloc iloc;
230 @@ -1267,7 +1266,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
234 - down_write(&EXT4_I(dir)->xattr_sem);
235 + ext4_write_lock_xattr(dir, &no_expand);
236 if (!ext4_has_inline_data(dir))
239 @@ -1313,7 +1312,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
242 ext4_mark_inode_dirty(handle, dir);
243 - up_write(&EXT4_I(dir)->xattr_sem);
244 + ext4_write_unlock_xattr(dir, &no_expand);
248 @@ -1673,7 +1672,7 @@ int ext4_delete_inline_entry(handle_t *handle,
249 struct buffer_head *bh,
250 int *has_inline_data)
252 - int err, inline_size;
253 + int err, inline_size, no_expand;
254 struct ext4_iloc iloc;
257 @@ -1681,7 +1680,7 @@ int ext4_delete_inline_entry(handle_t *handle,
261 - down_write(&EXT4_I(dir)->xattr_sem);
262 + ext4_write_lock_xattr(dir, &no_expand);
263 if (!ext4_has_inline_data(dir)) {
264 *has_inline_data = 0;
266 @@ -1715,7 +1714,7 @@ int ext4_delete_inline_entry(handle_t *handle,
268 ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size);
270 - up_write(&EXT4_I(dir)->xattr_sem);
271 + ext4_write_unlock_xattr(dir, &no_expand);
274 ext4_std_error(dir->i_sb, err);
275 @@ -1814,11 +1813,11 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
277 int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
280 + int ret, no_expand;
282 - down_write(&EXT4_I(inode)->xattr_sem);
283 + ext4_write_lock_xattr(inode, &no_expand);
284 ret = ext4_destroy_inline_data_nolock(handle, inode);
285 - up_write(&EXT4_I(inode)->xattr_sem);
286 + ext4_write_unlock_xattr(inode, &no_expand);
290 @@ -1903,7 +1902,7 @@ int ext4_try_to_evict_inline_data(handle_t *handle,
291 void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
294 - int inline_size, value_len, needed_blocks;
295 + int inline_size, value_len, needed_blocks, no_expand;
298 struct ext4_xattr_ibody_find is = {
299 @@ -1920,7 +1919,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
303 - down_write(&EXT4_I(inode)->xattr_sem);
304 + ext4_write_lock_xattr(inode, &no_expand);
305 if (!ext4_has_inline_data(inode)) {
307 ext4_journal_stop(handle);
308 @@ -1978,7 +1977,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
309 up_write(&EXT4_I(inode)->i_data_sem);
312 - up_write(&EXT4_I(inode)->xattr_sem);
313 + ext4_write_unlock_xattr(inode, &no_expand);
316 ext4_orphan_del(handle, inode);
317 @@ -1994,7 +1993,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
319 int ext4_convert_inline_data(struct inode *inode)
321 - int error, needed_blocks;
322 + int error, needed_blocks, no_expand;
324 struct ext4_iloc iloc;
326 @@ -2016,15 +2015,10 @@ int ext4_convert_inline_data(struct inode *inode)
330 - down_write(&EXT4_I(inode)->xattr_sem);
331 - if (!ext4_has_inline_data(inode)) {
332 - up_write(&EXT4_I(inode)->xattr_sem);
336 - error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
337 - up_write(&EXT4_I(inode)->xattr_sem);
339 + ext4_write_lock_xattr(inode, &no_expand);
340 + if (ext4_has_inline_data(inode))
341 + error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
342 + ext4_write_unlock_xattr(inode, &no_expand);
343 ext4_journal_stop(handle);
346 diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
347 index 5a94fa52b74f..c40bd55b6400 100644
348 --- a/fs/ext4/xattr.c
349 +++ b/fs/ext4/xattr.c
350 @@ -1188,16 +1188,14 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
351 struct ext4_xattr_block_find bs = {
352 .s = { .not_found = -ENODATA, },
354 - unsigned long no_expand;
360 if (strlen(name) > 255)
362 - down_write(&EXT4_I(inode)->xattr_sem);
363 - no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
364 - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
365 + ext4_write_lock_xattr(inode, &no_expand);
367 error = ext4_reserve_inode_write(handle, inode, &is.iloc);
369 @@ -1264,7 +1262,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
370 ext4_xattr_update_super_block(handle, inode->i_sb);
371 inode->i_ctime = current_time(inode);
373 - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
375 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
377 * The bh is consumed by ext4_mark_iloc_dirty, even with
378 @@ -1278,9 +1276,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
382 - if (no_expand == 0)
383 - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
384 - up_write(&EXT4_I(inode)->xattr_sem);
385 + ext4_write_unlock_xattr(inode, &no_expand);
389 @@ -1497,12 +1493,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
390 int error = 0, tried_min_extra_isize = 0;
391 int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
392 int isize_diff; /* How much do we need to grow i_extra_isize */
395 + if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
398 - down_write(&EXT4_I(inode)->xattr_sem);
400 - * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
402 - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
404 isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
405 if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
406 @@ -1584,17 +1579,16 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
407 EXT4_I(inode)->i_extra_isize = new_extra_isize;
410 - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
411 - up_write(&EXT4_I(inode)->xattr_sem);
412 + ext4_write_unlock_xattr(inode, &no_expand);
418 - * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
419 - * size expansion failed.
420 + * Inode size expansion failed; don't try again
422 - up_write(&EXT4_I(inode)->xattr_sem);
424 + ext4_write_unlock_xattr(inode, &no_expand);
428 diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
429 index a92e783fa057..099c8b670ef5 100644
430 --- a/fs/ext4/xattr.h
431 +++ b/fs/ext4/xattr.h
432 @@ -102,6 +102,38 @@ extern const struct xattr_handler ext4_xattr_security_handler;
434 #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c"
437 + * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes.
438 + * The first is to signal that there the inline xattrs and data are
439 + * taking up so much space that we might as well not keep trying to
440 + * expand it. The second is that xattr_sem is taken for writing, so
441 + * we shouldn't try to recurse into the inode expansion. For this
442 + * second case, we need to make sure that we take save and restore the
443 + * NO_EXPAND state flag appropriately.
445 +static inline void ext4_write_lock_xattr(struct inode *inode, int *save)
447 + down_write(&EXT4_I(inode)->xattr_sem);
448 + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
449 + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
452 +static inline int ext4_write_trylock_xattr(struct inode *inode, int *save)
454 + if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0)
456 + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
457 + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
461 +static inline void ext4_write_unlock_xattr(struct inode *inode, int *save)
464 + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
465 + up_write(&EXT4_I(inode)->xattr_sem);
468 extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
470 extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);