add patch fix-infinite-loop-when-recovering-corrupt-journal
[ext4-patch-queue.git] / fix-transaction-issues-for-ext4_fallocate-and-ext_zero_range
blobab8f7ef58a1d9f3e7cd9cd67c41dbfc303396f7d
1 ext4: fix transaction issues for ext4_fallocate and ext_zero_range
3 From: Dmitry Monakhov <dmonakhov@openvz.org>
5 After commit f282ac19d86f we use different transactions for
6 preallocation and i_disksize update which result in complain from fsck
7 after power-failure.  spotted by generic/019. IMHO this is regression
8 because fs becomes inconsistent, even more 'e2fsck -p' will no longer
9 works (which drives admins go crazy) Same transaction requirement
10 applies ctime,mtime updates
12 testcase: xfstest generic/019
14 Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
15 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
16 Cc: stable@vger.kernel.org
17 ---
18  fs/ext4/extents.c |   68 +++++++++++++++++++++++++++-------------------------
19  1 files changed, 35 insertions(+), 33 deletions(-)
21 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
22 index 39fbcbb..3fae328 100644
23 --- a/fs/ext4/extents.c
24 +++ b/fs/ext4/extents.c
25 @@ -4674,7 +4674,8 @@ retry:
26  }
28  static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
29 -                                 ext4_lblk_t len, int flags, int mode)
30 +                                 ext4_lblk_t len, loff_t new_size,
31 +                                 int flags, int mode)
32  {
33         struct inode *inode = file_inode(file);
34         handle_t *handle;
35 @@ -4683,8 +4684,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
36         int retries = 0;
37         struct ext4_map_blocks map;
38         unsigned int credits;
39 +       loff_t epos;
41         map.m_lblk = offset;
42 +       map.m_len = len;
43         /*
44          * Don't normalize the request if it can fit in one extent so
45          * that it doesn't get unnecessarily split into multiple
46 @@ -4699,9 +4702,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
47         credits = ext4_chunk_trans_blocks(inode, len);
49  retry:
50 -       while (ret >= 0 && ret < len) {
51 -               map.m_lblk = map.m_lblk + ret;
52 -               map.m_len = len = len - ret;
53 +       while (ret >= 0 && len) {
54                 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
55                                             credits);
56                 if (IS_ERR(handle)) {
57 @@ -4718,6 +4719,21 @@ retry:
58                         ret2 = ext4_journal_stop(handle);
59                         break;
60                 }
61 +               map.m_lblk += ret;
62 +               map.m_len = len = len - ret;
63 +               epos = (loff_t)map.m_lblk << inode->i_blkbits;
64 +               inode->i_ctime = ext4_current_time(inode);
65 +               if (new_size) {
66 +                       if (epos > new_size)
67 +                               epos = new_size;
68 +                       if (ext4_update_inode_size(inode, epos) & 0x1)
69 +                               inode->i_mtime = inode->i_ctime;
70 +               } else {
71 +                       if (epos > inode->i_size)
72 +                               ext4_set_inode_flag(inode,
73 +                                                   EXT4_INODE_EOFBLOCKS);
74 +               }
75 +               ext4_mark_inode_dirty(handle, inode);
76                 ret2 = ext4_journal_stop(handle);
77                 if (ret2)
78                         break;
79 @@ -4741,7 +4757,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
80         int ret = 0;
81         int flags;
82         int credits;
83 -       int partial;
84 +       int partial_begin, partial_end;
85         loff_t start, end;
86         ext4_lblk_t lblk;
87         struct address_space *mapping = inode->i_mapping;
88 @@ -4781,7 +4797,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
90         if (start < offset || end > offset + len)
91                 return -EINVAL;
92 -       partial = (offset + len) & ((1 << blkbits) - 1);
93 +       partial_begin = offset & ((1 << blkbits) - 1);
94 +       partial_end = (offset + len) & ((1 << blkbits) - 1);
96         lblk = start >> blkbits;
97         max_blocks = (end >> blkbits);
98 @@ -4815,7 +4832,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
99                  * If we have a partial block after EOF we have to allocate
100                  * the entire block.
101                  */
102 -               if (partial)
103 +               if (partial_end)
104                         max_blocks += 1;
105         }
107 @@ -4823,6 +4840,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
109                 /* Now release the pages and zero block aligned part of pages*/
110                 truncate_pagecache_range(inode, start, end - 1);
111 +               inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
113                 /* Wait all existing dio workers, newcomers will block on i_mutex */
114                 ext4_inode_block_unlocked_dio(inode);
115 @@ -4835,11 +4853,14 @@ static long ext4_zero_range(struct file *file, loff_t offset,
116                 if (ret)
117                         goto out_dio;
119 -               ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags,
120 -                                            mode);
121 +               ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
122 +                                            flags, mode);
123                 if (ret)
124                         goto out_dio;
125         }
126 +       if (!partial_begin && !partial_end)
127 +               goto out_dio;
129         /* In worst case we have to writeout two nonadjacent unwritten blocks */
130         credits = ext4_chunk_trans_blocks(inode, 1) * 2 -
131                 EXT4_META_TRANS_BLOCKS(inode->i_sb);
132 @@ -4861,7 +4882,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
133                 if ((offset + len) > i_size_read(inode))
134                         ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
135         }
137         ext4_mark_inode_dirty(handle, inode);
139         /* Zero out partial block at the edges of the range */
140 @@ -4888,7 +4908,6 @@ out_mutex:
141  long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
143         struct inode *inode = file_inode(file);
144 -       handle_t *handle;
145         loff_t new_size = 0;
146         unsigned int max_blocks;
147         int ret = 0;
148 @@ -4944,32 +4963,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
149                         goto out;
150         }
152 -       ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode);
153 +       ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
154 +                                    flags, mode);
155         if (ret)
156                 goto out;
158 -       handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
159 -       if (IS_ERR(handle))
160 -               goto out;
162 -       inode->i_ctime = ext4_current_time(inode);
164 -       if (new_size) {
165 -               if (ext4_update_inode_size(inode, new_size) & 0x1)
166 -                       inode->i_mtime = inode->i_ctime;
167 -       } else {
168 -               /*
169 -               * Mark that we allocate beyond EOF so the subsequent truncate
170 -               * can proceed even if the new size is the same as i_size.
171 -               */
172 -               if ((offset + len) > i_size_read(inode))
173 -                       ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
174 +       if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
175 +               ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
176 +                                               EXT4_I(inode)->i_sync_tid);
177         }
178 -       ext4_mark_inode_dirty(handle, inode);
179 -       if (file->f_flags & O_SYNC)
180 -               ext4_handle_sync(handle);
182 -       ext4_journal_stop(handle);
183  out:
184         mutex_unlock(&inode->i_mutex);
185         trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
186 -- 
187 1.7.1
190 To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
191 the body of a message to majordomo@vger.kernel.org
192 More majordomo info at  http://vger.kernel.org/majordomo-info.html