add patch load-jmap-from-journal to unstable patch series
[ext4-patch-queue.git] / fix-deadlock-in-inline_data-and-ext4_expand_extra_isize_ea
blob5a281ae6ff7c6f2f70cdb391092c47ac83ef8a34
1 ext4: fix deadlock between inline_data and ext4_expand_extra_isize_ea()
3 The xattr_sem deadlock problems fixed in commit 2e81a4eeedca: "ext4:
4 avoid deadlock when expanding inode size" didn't include the use of
5 xattr_sem in fs/ext4/inline.c.  With the addition of project quota
6 which added a new extra inode field, this exposed deadlocks in the
7 inline_data code similar to the ones fixed by 2e81a4eeedca.
9 The deadlock can be reproduced via:
11    dmesg -n 7
12    mke2fs -t ext4 -O inline_data -Fq -I 256 /dev/vdc 32768
13    mount -t ext4 -o debug_want_extra_isize=24 /dev/vdc /vdc
14    mkdir /vdc/a
15    umount /vdc
16    mount -t ext4 /dev/vdc /vdc
17    echo foo > /vdc/a/foo
19 and looks like this:
21 [   11.158815] 
22 [   11.160276] =============================================
23 [   11.161960] [ INFO: possible recursive locking detected ]
24 [   11.161960] 4.10.0-rc3-00015-g011b30a8a3cf #160 Tainted: G        W      
25 [   11.161960] ---------------------------------------------
26 [   11.161960] bash/2519 is trying to acquire lock:
27 [   11.161960]  (&ei->xattr_sem){++++..}, at: [<c1225a4b>] ext4_expand_extra_isize_ea+0x3d/0x4cd
28 [   11.161960] 
29 [   11.161960] but task is already holding lock:
30 [   11.161960]  (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152
31 [   11.161960] 
32 [   11.161960] other info that might help us debug this:
33 [   11.161960]  Possible unsafe locking scenario:
34 [   11.161960] 
35 [   11.161960]        CPU0
36 [   11.161960]        ----
37 [   11.161960]   lock(&ei->xattr_sem);
38 [   11.161960]   lock(&ei->xattr_sem);
39 [   11.161960] 
40 [   11.161960]  *** DEADLOCK ***
41 [   11.161960] 
42 [   11.161960]  May be due to missing lock nesting notation
43 [   11.161960] 
44 [   11.161960] 4 locks held by bash/2519:
45 [   11.161960]  #0:  (sb_writers#3){.+.+.+}, at: [<c11a2414>] mnt_want_write+0x1e/0x3e
46 [   11.161960]  #1:  (&type->i_mutex_dir_key){++++++}, at: [<c119508b>] path_openat+0x338/0x67a
47 [   11.161960]  #2:  (jbd2_handle){++++..}, at: [<c123314a>] start_this_handle+0x582/0x622
48 [   11.161960]  #3:  (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152
49 [   11.161960] 
50 [   11.161960] stack backtrace:
51 [   11.161960] CPU: 0 PID: 2519 Comm: bash Tainted: G        W       4.10.0-rc3-00015-g011b30a8a3cf #160
52 [   11.161960] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1 04/01/2014
53 [   11.161960] Call Trace:
54 [   11.161960]  dump_stack+0x72/0xa3
55 [   11.161960]  __lock_acquire+0xb7c/0xcb9
56 [   11.161960]  ? kvm_clock_read+0x1f/0x29
57 [   11.161960]  ? __lock_is_held+0x36/0x66
58 [   11.161960]  ? __lock_is_held+0x36/0x66
59 [   11.161960]  lock_acquire+0x106/0x18a
60 [   11.161960]  ? ext4_expand_extra_isize_ea+0x3d/0x4cd
61 [   11.161960]  down_write+0x39/0x72
62 [   11.161960]  ? ext4_expand_extra_isize_ea+0x3d/0x4cd
63 [   11.161960]  ext4_expand_extra_isize_ea+0x3d/0x4cd
64 [   11.161960]  ? _raw_read_unlock+0x22/0x2c
65 [   11.161960]  ? jbd2_journal_extend+0x1e2/0x262
66 [   11.161960]  ? __ext4_journal_get_write_access+0x3d/0x60
67 [   11.161960]  ext4_mark_inode_dirty+0x17d/0x26d
68 [   11.161960]  ? ext4_add_dirent_to_inline.isra.12+0xa5/0xb2
69 [   11.161960]  ext4_add_dirent_to_inline.isra.12+0xa5/0xb2
70 [   11.161960]  ext4_try_add_inline_entry+0x69/0x152
71 [   11.161960]  ext4_add_entry+0xa3/0x848
72 [   11.161960]  ? __brelse+0x14/0x2f
73 [   11.161960]  ? _raw_spin_unlock_irqrestore+0x44/0x4f
74 [   11.161960]  ext4_add_nondir+0x17/0x5b
75 [   11.161960]  ext4_create+0xcf/0x133
76 [   11.161960]  ? ext4_mknod+0x12f/0x12f
77 [   11.161960]  lookup_open+0x39e/0x3fb
78 [   11.161960]  ? __wake_up+0x1a/0x40
79 [   11.161960]  ? lock_acquire+0x11e/0x18a
80 [   11.161960]  path_openat+0x35c/0x67a
81 [   11.161960]  ? sched_clock_cpu+0xd7/0xf2
82 [   11.161960]  do_filp_open+0x36/0x7c
83 [   11.161960]  ? _raw_spin_unlock+0x22/0x2c
84 [   11.161960]  ? __alloc_fd+0x169/0x173
85 [   11.161960]  do_sys_open+0x59/0xcc
86 [   11.161960]  SyS_open+0x1d/0x1f
87 [   11.161960]  do_int80_syscall_32+0x4f/0x61
88 [   11.161960]  entry_INT80_32+0x2f/0x2f
89 [   11.161960] EIP: 0xb76ad469
90 [   11.161960] EFLAGS: 00000286 CPU: 0
91 [   11.161960] EAX: ffffffda EBX: 08168ac8 ECX: 00008241 EDX: 000001b6
92 [   11.161960] ESI: b75e46bc EDI: b7755000 EBP: bfbdb108 ESP: bfbdafc0
93 [   11.161960]  DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b
95 Cc: stable@vger.kernel.org # 3.10 (requires 2e81a4eeedca as a prereq)
96 Reported-by: George Spelvin <linux@sciencehorizons.net>
97 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
98 ---
99  fs/ext4/inline.c | 66 ++++++++++++++++++++++++++++++------------------------------------
100  fs/ext4/xattr.c  | 30 ++++++++++++------------------
101  fs/ext4/xattr.h  | 32 ++++++++++++++++++++++++++++++++
102  3 files changed, 74 insertions(+), 54 deletions(-)
104 diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
105 index 437df6a1a841..99a5312ced52 100644
106 --- a/fs/ext4/inline.c
107 +++ b/fs/ext4/inline.c
108 @@ -381,7 +381,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
109  static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
110                                     unsigned int len)
112 -       int ret, size;
113 +       int ret, size, no_expand;
114         struct ext4_inode_info *ei = EXT4_I(inode);
116         if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
117 @@ -391,15 +391,14 @@ static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
118         if (size < len)
119                 return -ENOSPC;
121 -       down_write(&EXT4_I(inode)->xattr_sem);
122 +       ext4_write_lock_xattr(inode, &no_expand);
124         if (ei->i_inline_off)
125                 ret = ext4_update_inline_data(handle, inode, len);
126         else
127                 ret = ext4_create_inline_data(handle, inode, len);
129 -       up_write(&EXT4_I(inode)->xattr_sem);
131 +       ext4_write_unlock_xattr(inode, &no_expand);
132         return ret;
135 @@ -533,7 +532,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
136                                               struct inode *inode,
137                                               unsigned flags)
139 -       int ret, needed_blocks;
140 +       int ret, needed_blocks, no_expand;
141         handle_t *handle = NULL;
142         int retries = 0, sem_held = 0;
143         struct page *page = NULL;
144 @@ -573,7 +572,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
145                 goto out;
146         }
148 -       down_write(&EXT4_I(inode)->xattr_sem);
149 +       ext4_write_lock_xattr(inode, &no_expand);
150         sem_held = 1;
151         /* If some one has already done this for us, just exit. */
152         if (!ext4_has_inline_data(inode)) {
153 @@ -610,7 +609,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
154                 put_page(page);
155                 page = NULL;
156                 ext4_orphan_add(handle, inode);
157 -               up_write(&EXT4_I(inode)->xattr_sem);
158 +               ext4_write_unlock_xattr(inode, &no_expand);
159                 sem_held = 0;
160                 ext4_journal_stop(handle);
161                 handle = NULL;
162 @@ -636,7 +635,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
163                 put_page(page);
164         }
165         if (sem_held)
166 -               up_write(&EXT4_I(inode)->xattr_sem);
167 +               ext4_write_unlock_xattr(inode, &no_expand);
168         if (handle)
169                 ext4_journal_stop(handle);
170         brelse(iloc.bh);
171 @@ -729,7 +728,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
172  int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
173                                unsigned copied, struct page *page)
175 -       int ret;
176 +       int ret, no_expand;
177         void *kaddr;
178         struct ext4_iloc iloc;
180 @@ -747,7 +746,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
181                 goto out;
182         }
184 -       down_write(&EXT4_I(inode)->xattr_sem);
185 +       ext4_write_lock_xattr(inode, &no_expand);
186         BUG_ON(!ext4_has_inline_data(inode));
188         kaddr = kmap_atomic(page);
189 @@ -757,7 +756,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
190         /* clear page dirty so that writepages wouldn't work for us. */
191         ClearPageDirty(page);
193 -       up_write(&EXT4_I(inode)->xattr_sem);
194 +       ext4_write_unlock_xattr(inode, &no_expand);
195         brelse(iloc.bh);
196  out:
197         return copied;
198 @@ -768,7 +767,7 @@ ext4_journalled_write_inline_data(struct inode *inode,
199                                   unsigned len,
200                                   struct page *page)
202 -       int ret;
203 +       int ret, no_expand;
204         void *kaddr;
205         struct ext4_iloc iloc;
207 @@ -778,11 +777,11 @@ ext4_journalled_write_inline_data(struct inode *inode,
208                 return NULL;
209         }
211 -       down_write(&EXT4_I(inode)->xattr_sem);
212 +       ext4_write_lock_xattr(inode, &no_expand);
213         kaddr = kmap_atomic(page);
214         ext4_write_inline_data(inode, &iloc, kaddr, 0, len);
215         kunmap_atomic(kaddr);
216 -       up_write(&EXT4_I(inode)->xattr_sem);
217 +       ext4_write_unlock_xattr(inode, &no_expand);
219         return iloc.bh;
221 @@ -1259,7 +1258,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
222  int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
223                               struct inode *dir, struct inode *inode)
225 -       int ret, inline_size;
226 +       int ret, inline_size, no_expand;
227         void *inline_start;
228         struct ext4_iloc iloc;
230 @@ -1267,7 +1266,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
231         if (ret)
232                 return ret;
234 -       down_write(&EXT4_I(dir)->xattr_sem);
235 +       ext4_write_lock_xattr(dir, &no_expand);
236         if (!ext4_has_inline_data(dir))
237                 goto out;
239 @@ -1313,7 +1312,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
241  out:
242         ext4_mark_inode_dirty(handle, dir);
243 -       up_write(&EXT4_I(dir)->xattr_sem);
244 +       ext4_write_unlock_xattr(dir, &no_expand);
245         brelse(iloc.bh);
246         return ret;
248 @@ -1673,7 +1672,7 @@ int ext4_delete_inline_entry(handle_t *handle,
249                              struct buffer_head *bh,
250                              int *has_inline_data)
252 -       int err, inline_size;
253 +       int err, inline_size, no_expand;
254         struct ext4_iloc iloc;
255         void *inline_start;
257 @@ -1681,7 +1680,7 @@ int ext4_delete_inline_entry(handle_t *handle,
258         if (err)
259                 return err;
261 -       down_write(&EXT4_I(dir)->xattr_sem);
262 +       ext4_write_lock_xattr(dir, &no_expand);
263         if (!ext4_has_inline_data(dir)) {
264                 *has_inline_data = 0;
265                 goto out;
266 @@ -1715,7 +1714,7 @@ int ext4_delete_inline_entry(handle_t *handle,
268         ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size);
269  out:
270 -       up_write(&EXT4_I(dir)->xattr_sem);
271 +       ext4_write_unlock_xattr(dir, &no_expand);
272         brelse(iloc.bh);
273         if (err != -ENOENT)
274                 ext4_std_error(dir->i_sb, err);
275 @@ -1814,11 +1813,11 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
277  int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
279 -       int ret;
280 +       int ret, no_expand;
282 -       down_write(&EXT4_I(inode)->xattr_sem);
283 +       ext4_write_lock_xattr(inode, &no_expand);
284         ret = ext4_destroy_inline_data_nolock(handle, inode);
285 -       up_write(&EXT4_I(inode)->xattr_sem);
286 +       ext4_write_unlock_xattr(inode, &no_expand);
288         return ret;
290 @@ -1903,7 +1902,7 @@ int ext4_try_to_evict_inline_data(handle_t *handle,
291  void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
293         handle_t *handle;
294 -       int inline_size, value_len, needed_blocks;
295 +       int inline_size, value_len, needed_blocks, no_expand;
296         size_t i_size;
297         void *value = NULL;
298         struct ext4_xattr_ibody_find is = {
299 @@ -1920,7 +1919,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
300         if (IS_ERR(handle))
301                 return;
303 -       down_write(&EXT4_I(inode)->xattr_sem);
304 +       ext4_write_lock_xattr(inode, &no_expand);
305         if (!ext4_has_inline_data(inode)) {
306                 *has_inline = 0;
307                 ext4_journal_stop(handle);
308 @@ -1978,7 +1977,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
309         up_write(&EXT4_I(inode)->i_data_sem);
310  out:
311         brelse(is.iloc.bh);
312 -       up_write(&EXT4_I(inode)->xattr_sem);
313 +       ext4_write_unlock_xattr(inode, &no_expand);
314         kfree(value);
315         if (inode->i_nlink)
316                 ext4_orphan_del(handle, inode);
317 @@ -1994,7 +1993,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
319  int ext4_convert_inline_data(struct inode *inode)
321 -       int error, needed_blocks;
322 +       int error, needed_blocks, no_expand;
323         handle_t *handle;
324         struct ext4_iloc iloc;
326 @@ -2016,15 +2015,10 @@ int ext4_convert_inline_data(struct inode *inode)
327                 goto out_free;
328         }
330 -       down_write(&EXT4_I(inode)->xattr_sem);
331 -       if (!ext4_has_inline_data(inode)) {
332 -               up_write(&EXT4_I(inode)->xattr_sem);
333 -               goto out;
334 -       }
336 -       error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
337 -       up_write(&EXT4_I(inode)->xattr_sem);
338 -out:
339 +       ext4_write_lock_xattr(inode, &no_expand);
340 +       if (ext4_has_inline_data(inode))
341 +               error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
342 +       ext4_write_unlock_xattr(inode, &no_expand);
343         ext4_journal_stop(handle);
344  out_free:
345         brelse(iloc.bh);
346 diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
347 index 5a94fa52b74f..c40bd55b6400 100644
348 --- a/fs/ext4/xattr.c
349 +++ b/fs/ext4/xattr.c
350 @@ -1188,16 +1188,14 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
351         struct ext4_xattr_block_find bs = {
352                 .s = { .not_found = -ENODATA, },
353         };
354 -       unsigned long no_expand;
355 +       int no_expand;
356         int error;
358         if (!name)
359                 return -EINVAL;
360         if (strlen(name) > 255)
361                 return -ERANGE;
362 -       down_write(&EXT4_I(inode)->xattr_sem);
363 -       no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
364 -       ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
365 +       ext4_write_lock_xattr(inode, &no_expand);
367         error = ext4_reserve_inode_write(handle, inode, &is.iloc);
368         if (error)
369 @@ -1264,7 +1262,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
370                 ext4_xattr_update_super_block(handle, inode->i_sb);
371                 inode->i_ctime = current_time(inode);
372                 if (!value)
373 -                       ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
374 +                       no_expand = 0;
375                 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
376                 /*
377                  * The bh is consumed by ext4_mark_iloc_dirty, even with
378 @@ -1278,9 +1276,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
379  cleanup:
380         brelse(is.iloc.bh);
381         brelse(bs.bh);
382 -       if (no_expand == 0)
383 -               ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
384 -       up_write(&EXT4_I(inode)->xattr_sem);
385 +       ext4_write_unlock_xattr(inode, &no_expand);
386         return error;
389 @@ -1497,12 +1493,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
390         int error = 0, tried_min_extra_isize = 0;
391         int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
392         int isize_diff; /* How much do we need to grow i_extra_isize */
393 +       int no_expand;
395 +       if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
396 +               return 0;
398 -       down_write(&EXT4_I(inode)->xattr_sem);
399 -       /*
400 -        * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
401 -        */
402 -       ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
403  retry:
404         isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
405         if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
406 @@ -1584,17 +1579,16 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
407         EXT4_I(inode)->i_extra_isize = new_extra_isize;
408         brelse(bh);
409  out:
410 -       ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
411 -       up_write(&EXT4_I(inode)->xattr_sem);
412 +       ext4_write_unlock_xattr(inode, &no_expand);
413         return 0;
415  cleanup:
416         brelse(bh);
417         /*
418 -        * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
419 -        * size expansion failed.
420 +        * Inode size expansion failed; don't try again
421          */
422 -       up_write(&EXT4_I(inode)->xattr_sem);
423 +       no_expand = 1;
424 +       ext4_write_unlock_xattr(inode, &no_expand);
425         return error;
428 diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
429 index a92e783fa057..099c8b670ef5 100644
430 --- a/fs/ext4/xattr.h
431 +++ b/fs/ext4/xattr.h
432 @@ -102,6 +102,38 @@ extern const struct xattr_handler ext4_xattr_security_handler;
434  #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c"
437 + * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes.
438 + * The first is to signal that there the inline xattrs and data are
439 + * taking up so much space that we might as well not keep trying to
440 + * expand it.  The second is that xattr_sem is taken for writing, so
441 + * we shouldn't try to recurse into the inode expansion.  For this
442 + * second case, we need to make sure that we take save and restore the
443 + * NO_EXPAND state flag appropriately.
444 + */
445 +static inline void ext4_write_lock_xattr(struct inode *inode, int *save)
447 +       down_write(&EXT4_I(inode)->xattr_sem);
448 +       *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
449 +       ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
452 +static inline int ext4_write_trylock_xattr(struct inode *inode, int *save)
454 +       if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0)
455 +               return 0;
456 +       *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
457 +       ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
458 +       return 1;
461 +static inline void ext4_write_unlock_xattr(struct inode *inode, int *save)
463 +       if (*save == 0)
464 +               ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
465 +       up_write(&EXT4_I(inode)->xattr_sem);
468  extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
470  extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);