Update crypto patches
[ext4-patch-queue.git] / fix-suboptimal-seek_datahole_extents_traversal
blob3305accef489ebf89398014aa307ea8043b4f702
1 ext4: fix suboptimal seek_{data,hole} extents traversial
3 From: Dmitry Monakhov <dmonakhov@openvz.org>
5 It is ridiculous practice to scan inode block by block, this technique
6 applicable only for old indirect files. This takes significant amount
7 of time for really large files. Let's reuse ext4_fiemap which already
8 traverse inode-tree in most optimal meaner.
10 TESTCASE:
11 ftruncate64(fd, 0);
12 ftruncate64(fd, 1ULL << 40);
13 /* lseek will spin very long time */
14 lseek64(fd, 0, SEEK_DATA);
15 lseek64(fd, 0, SEEK_HOLE);
17 Original report: https://lkml.org/lkml/2014/10/16/620
19 Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
20 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
21 ---
22  fs/ext4/extents.c |    4 +-
23  fs/ext4/file.c    |  220 +++++++++++++++++++++++++---------------------------
24  2 files changed, 108 insertions(+), 116 deletions(-)
26 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
27 index bed4308..e5d3ead 100644
28 --- a/fs/ext4/extents.c
29 +++ b/fs/ext4/extents.c
30 @@ -5166,8 +5166,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
32         /* fallback to generic here if not in extents fmt */
33         if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
34 -               return generic_block_fiemap(inode, fieinfo, start, len,
35 -                       ext4_get_block);
36 +               return __generic_block_fiemap(inode, fieinfo, start, len,
37 +                                             ext4_get_block);
39         if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
40                 return -EBADR;
41 diff --git a/fs/ext4/file.c b/fs/ext4/file.c
42 index 8131be8..513c12c 100644
43 --- a/fs/ext4/file.c
44 +++ b/fs/ext4/file.c
45 @@ -273,24 +273,19 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
46   * we determine this extent as a data or a hole according to whether the
47   * page cache has data or not.
48   */
49 -static int ext4_find_unwritten_pgoff(struct inode *inode,
50 -                                    int whence,
51 -                                    struct ext4_map_blocks *map,
52 -                                    loff_t *offset)
53 +static int ext4_find_unwritten_pgoff(struct inode *inode, int whence,
54 +                                    loff_t endoff, loff_t *offset)
55  {
56         struct pagevec pvec;
57 -       unsigned int blkbits;
58         pgoff_t index;
59         pgoff_t end;
60 -       loff_t endoff;
61         loff_t startoff;
62         loff_t lastoff;
63         int found = 0;
65 -       blkbits = inode->i_sb->s_blocksize_bits;
66         startoff = *offset;
67         lastoff = startoff;
68 -       endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
71         index = startoff >> PAGE_CACHE_SHIFT;
72         end = endoff >> PAGE_CACHE_SHIFT;
73 @@ -408,147 +403,144 @@ out:
74  static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
75  {
76         struct inode *inode = file->f_mapping->host;
77 -       struct ext4_map_blocks map;
78 -       struct extent_status es;
79 -       ext4_lblk_t start, last, end;
80 -       loff_t dataoff, isize;
81 -       int blkbits;
82 -       int ret = 0;
83 +       struct fiemap_extent_info fie;
84 +       struct fiemap_extent ext[2];
85 +       loff_t next;
86 +       int i, ret = 0;
88         mutex_lock(&inode->i_mutex);
90 -       isize = i_size_read(inode);
91 -       if (offset >= isize) {
92 +       if (offset >= inode->i_size) {
93                 mutex_unlock(&inode->i_mutex);
94                 return -ENXIO;
95         }
97 -       blkbits = inode->i_sb->s_blocksize_bits;
98 -       start = offset >> blkbits;
99 -       last = start;
100 -       end = isize >> blkbits;
101 -       dataoff = offset;
103 -       do {
104 -               map.m_lblk = last;
105 -               map.m_len = end - last + 1;
106 -               ret = ext4_map_blocks(NULL, inode, &map, 0);
107 -               if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
108 -                       if (last != start)
109 -                               dataoff = (loff_t)last << blkbits;
110 +       fie.fi_flags = 0;
111 +       fie.fi_extents_max = 2;
112 +       fie.fi_extents_start = (struct fiemap_extent __user *) &ext;
113 +       while (1) {
114 +               mm_segment_t old_fs = get_fs();
116 +               fie.fi_extents_mapped = 0;
117 +               memset(ext, 0, sizeof(*ext) * fie.fi_extents_max);
119 +               set_fs(get_ds());
120 +               ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
121 +               set_fs(old_fs);
122 +               if (ret)
123                         break;
124 -               }
126 -               /*
127 -                * If there is a delay extent at this offset,
128 -                * it will be as a data.
129 -                */
130 -               ext4_es_find_delayed_extent_range(inode, last, last, &es);
131 -               if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
132 -                       if (last != start)
133 -                               dataoff = (loff_t)last << blkbits;
134 +               /* No extents found, EOF */
135 +               if (!fie.fi_extents_mapped) {
136 +                       ret = -ENXIO;
137                         break;
138                 }
139 +               for (i = 0; i < fie.fi_extents_mapped; i++) {
140 +                       next = (loff_t)(ext[i].fe_length + ext[i].fe_logical);
142 -               /*
143 -                * If there is a unwritten extent at this offset,
144 -                * it will be as a data or a hole according to page
145 -                * cache that has data or not.
146 -                */
147 -               if (map.m_flags & EXT4_MAP_UNWRITTEN) {
148 -                       int unwritten;
149 -                       unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
150 -                                                             &map, &dataoff);
151 -                       if (unwritten)
152 -                               break;
153 -               }
154 +                       if (offset < (loff_t)ext[i].fe_logical)
155 +                               offset = (loff_t)ext[i].fe_logical;
156 +                       /*
157 +                        * If extent is not unwritten, then it contains valid
158 +                        * data, mapped or delayed.
159 +                        */
160 +                       if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN))
161 +                               goto out;
163 -               last++;
164 -               dataoff = (loff_t)last << blkbits;
165 -       } while (last <= end);
166 +                       /*
167 +                        * If there is a unwritten extent at this offset,
168 +                        * it will be as a data or a hole according to page
169 +                        * cache that has data or not.
170 +                        */
171 +                       if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
172 +                                                     next, &offset))
173 +                               goto out;
175 +                       if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) {
176 +                               ret = -ENXIO;
177 +                               goto out;
178 +                       }
179 +                       offset = next;
180 +               }
181 +       }
182 +       if (offset > inode->i_size)
183 +               offset = inode->i_size;
184 +out:
185         mutex_unlock(&inode->i_mutex);
186 +       if (ret)
187 +               return ret;
189 -       if (dataoff > isize)
190 -               return -ENXIO;
192 -       return vfs_setpos(file, dataoff, maxsize);
193 +       return vfs_setpos(file, offset, maxsize);
196  /*
197 - * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
198 + * ext4_seek_hole() retrieves the offset for SEEK_HOLE
199   */
200  static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
202         struct inode *inode = file->f_mapping->host;
203 -       struct ext4_map_blocks map;
204 -       struct extent_status es;
205 -       ext4_lblk_t start, last, end;
206 -       loff_t holeoff, isize;
207 -       int blkbits;
208 -       int ret = 0;
209 +       struct fiemap_extent_info fie;
210 +       struct fiemap_extent ext[2];
211 +       loff_t next;
212 +       int i, ret = 0;
214         mutex_lock(&inode->i_mutex);
216 -       isize = i_size_read(inode);
217 -       if (offset >= isize) {
218 +       if (offset >= inode->i_size) {
219                 mutex_unlock(&inode->i_mutex);
220                 return -ENXIO;
221         }
223 -       blkbits = inode->i_sb->s_blocksize_bits;
224 -       start = offset >> blkbits;
225 -       last = start;
226 -       end = isize >> blkbits;
227 -       holeoff = offset;
228 +       fie.fi_flags = 0;
229 +       fie.fi_extents_max = 2;
230 +       fie.fi_extents_start = (struct fiemap_extent __user *)&ext;
231 +       while (1) {
232 +               mm_segment_t old_fs = get_fs();
234 -       do {
235 -               map.m_lblk = last;
236 -               map.m_len = end - last + 1;
237 -               ret = ext4_map_blocks(NULL, inode, &map, 0);
238 -               if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
239 -                       last += ret;
240 -                       holeoff = (loff_t)last << blkbits;
241 -                       continue;
242 -               }
243 +               fie.fi_extents_mapped = 0;
244 +               memset(ext, 0, sizeof(*ext));
246 -               /*
247 -                * If there is a delay extent at this offset,
248 -                * we will skip this extent.
249 -                */
250 -               ext4_es_find_delayed_extent_range(inode, last, last, &es);
251 -               if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
252 -                       last = es.es_lblk + es.es_len;
253 -                       holeoff = (loff_t)last << blkbits;
254 -                       continue;
255 -               }
256 +               set_fs(get_ds());
257 +               ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
258 +               set_fs(old_fs);
259 +               if (ret)
260 +                       break;
262 -               /*
263 -                * If there is a unwritten extent at this offset,
264 -                * it will be as a data or a hole according to page
265 -                * cache that has data or not.
266 -                */
267 -               if (map.m_flags & EXT4_MAP_UNWRITTEN) {
268 -                       int unwritten;
269 -                       unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
270 -                                                             &map, &holeoff);
271 -                       if (!unwritten) {
272 -                               last += ret;
273 -                               holeoff = (loff_t)last << blkbits;
274 +               /* No extents found */
275 +               if (!fie.fi_extents_mapped)
276 +                       break;
278 +               for (i = 0; i < fie.fi_extents_mapped; i++) {
279 +                       next = (loff_t)(ext[i].fe_logical + ext[i].fe_length);
280 +                       /*
281 +                        * If extent is not unwritten, then it contains valid
282 +                        * data, mapped or delayed.
283 +                        */
284 +                       if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) {
285 +                               if (offset < (loff_t)ext[i].fe_logical)
286 +                                       goto out;
287 +                               offset = next;
288                                 continue;
289                         }
290 -               }
292 -               /* find a hole */
293 -               break;
294 -       } while (last <= end);
295 +                       /*
296 +                        * If there is a unwritten extent at this offset,
297 +                        * it will be as a data or a hole according to page
298 +                        * cache that has data or not.
299 +                        */
300 +                       if (ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
301 +                                                     next, &offset))
302 +                               goto out;
304 +                       offset = next;
305 +                       if (ext[i].fe_flags & FIEMAP_EXTENT_LAST)
306 +                               goto out;
307 +               }
308 +       }
309 +       if (offset > inode->i_size)
310 +               offset = inode->i_size;
311 +out:
312         mutex_unlock(&inode->i_mutex);
313 +       if (ret)
314 +               return ret;
316 -       if (holeoff > isize)
317 -               holeoff = isize;
319 -       return vfs_setpos(file, holeoff, maxsize);
320 +       return vfs_setpos(file, offset, maxsize);
323  /*
324 -- 
325 1.7.1