1 ext4: fix transaction issues for ext4_fallocate and ext_zero_range
3 From: Dmitry Monakhov <dmonakhov@openvz.org>
5 After commit f282ac19d86f we use different transactions for
6 preallocation and i_disksize update which result in complain from fsck
7 after power-failure. spotted by generic/019. IMHO this is regression
8 because fs becomes inconsistent, even more 'e2fsck -p' will no longer
9 works (which drives admins go crazy) Same transaction requirement
10 applies ctime,mtime updates
12 testcase: xfstest generic/019
14 Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
15 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
16 Cc: stable@vger.kernel.org
18 fs/ext4/extents.c | 68 +++++++++++++++++++++++++++-------------------------
19 1 files changed, 35 insertions(+), 33 deletions(-)
21 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
22 index 39fbcbb..3fae328 100644
23 --- a/fs/ext4/extents.c
24 +++ b/fs/ext4/extents.c
25 @@ -4674,7 +4674,8 @@ retry:
28 static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
29 - ext4_lblk_t len, int flags, int mode)
30 + ext4_lblk_t len, loff_t new_size,
31 + int flags, int mode)
33 struct inode *inode = file_inode(file);
35 @@ -4683,8 +4684,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
37 struct ext4_map_blocks map;
44 * Don't normalize the request if it can fit in one extent so
45 * that it doesn't get unnecessarily split into multiple
46 @@ -4699,9 +4702,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
47 credits = ext4_chunk_trans_blocks(inode, len);
50 - while (ret >= 0 && ret < len) {
51 - map.m_lblk = map.m_lblk + ret;
52 - map.m_len = len = len - ret;
53 + while (ret >= 0 && len) {
54 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
57 @@ -4718,6 +4719,21 @@ retry:
58 ret2 = ext4_journal_stop(handle);
62 + map.m_len = len = len - ret;
63 + epos = (loff_t)map.m_lblk << inode->i_blkbits;
64 + inode->i_ctime = ext4_current_time(inode);
66 + if (epos > new_size)
68 + if (ext4_update_inode_size(inode, epos) & 0x1)
69 + inode->i_mtime = inode->i_ctime;
71 + if (epos > inode->i_size)
72 + ext4_set_inode_flag(inode,
73 + EXT4_INODE_EOFBLOCKS);
75 + ext4_mark_inode_dirty(handle, inode);
76 ret2 = ext4_journal_stop(handle);
79 @@ -4741,7 +4757,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
84 + int partial_begin, partial_end;
87 struct address_space *mapping = inode->i_mapping;
88 @@ -4781,7 +4797,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
90 if (start < offset || end > offset + len)
92 - partial = (offset + len) & ((1 << blkbits) - 1);
93 + partial_begin = offset & ((1 << blkbits) - 1);
94 + partial_end = (offset + len) & ((1 << blkbits) - 1);
96 lblk = start >> blkbits;
97 max_blocks = (end >> blkbits);
98 @@ -4815,7 +4832,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
99 * If we have a partial block after EOF we have to allocate
107 @@ -4823,6 +4840,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
109 /* Now release the pages and zero block aligned part of pages*/
110 truncate_pagecache_range(inode, start, end - 1);
111 + inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
113 /* Wait all existing dio workers, newcomers will block on i_mutex */
114 ext4_inode_block_unlocked_dio(inode);
115 @@ -4835,11 +4853,14 @@ static long ext4_zero_range(struct file *file, loff_t offset,
119 - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags,
121 + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
126 + if (!partial_begin && !partial_end)
129 /* In worst case we have to writeout two nonadjacent unwritten blocks */
130 credits = ext4_chunk_trans_blocks(inode, 1) * 2 -
131 EXT4_META_TRANS_BLOCKS(inode->i_sb);
132 @@ -4861,7 +4882,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
133 if ((offset + len) > i_size_read(inode))
134 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
137 ext4_mark_inode_dirty(handle, inode);
139 /* Zero out partial block at the edges of the range */
140 @@ -4888,7 +4908,6 @@ out_mutex:
141 long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
143 struct inode *inode = file_inode(file);
146 unsigned int max_blocks;
148 @@ -4944,32 +4963,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
152 - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode);
153 + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
158 - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
159 - if (IS_ERR(handle))
162 - inode->i_ctime = ext4_current_time(inode);
165 - if (ext4_update_inode_size(inode, new_size) & 0x1)
166 - inode->i_mtime = inode->i_ctime;
169 - * Mark that we allocate beyond EOF so the subsequent truncate
170 - * can proceed even if the new size is the same as i_size.
172 - if ((offset + len) > i_size_read(inode))
173 - ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
174 + if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
175 + ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
176 + EXT4_I(inode)->i_sync_tid);
178 - ext4_mark_inode_dirty(handle, inode);
179 - if (file->f_flags & O_SYNC)
180 - ext4_handle_sync(handle);
182 - ext4_journal_stop(handle);
184 mutex_unlock(&inode->i_mutex);
185 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
190 To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
191 the body of a message to majordomo@vger.kernel.org
192 More majordomo info at http://vger.kernel.org/majordomo-info.html