Add debugging ioctls that allow querying and flushing the extent status cache
[ext4-patch-queue.git] / return-extent-cache-info-via-fiemap
blob07bf24cf0cdbc598de78185cf27a534573fb9a6c
1 ext4: add new ioctl EXT4_IOC_GET_ES_CACHE
3 For debugging reasons, it's useful to know the contents of the extent
4 cache.  Since the extent cache contains much of what is in the fiemap
5 ioctl, use an fiemap-style interface to return this information.
7 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
8 ---
9  fs/ext4/ext4.h           | 10 ++++++++
10  fs/ext4/extents.c        | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
11  fs/ext4/extents_status.c | 10 ++++++++
12  fs/ext4/extents_status.h |  1 +
13  fs/ext4/inode.c          |  6 ++---
14  fs/ext4/ioctl.c          | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
15  6 files changed, 182 insertions(+), 11 deletions(-)
17 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
18 index ee296797bcd2..e2d8ad27f4d1 100644
19 --- a/fs/ext4/ext4.h
20 +++ b/fs/ext4/ext4.h
21 @@ -652,6 +652,7 @@ enum {
22  /* ioctl codes 19--39 are reserved for fscrypt */
23  #define EXT4_IOC_CLEAR_ES_CACHE                _IO('f', 40)
24  #define EXT4_IOC_GETSTATE              _IOW('f', 41, __u32)
25 +#define EXT4_IOC_GET_ES_CACHE          _IOWR('f', 42, struct fiemap)
27  #define EXT4_IOC_FSGETXATTR            FS_IOC_FSGETXATTR
28  #define EXT4_IOC_FSSETXATTR            FS_IOC_FSSETXATTR
29 @@ -692,6 +693,12 @@ enum {
30  #define EXT4_IOC32_SETVERSION_OLD      FS_IOC32_SETVERSION
31  #endif
33 +/*
34 + * Returned by EXT4_IOC_GET_ES_CACHE as an additional possible flag.
35 + * It indicates that the entry in extent status cache is for a hole.
36 + */
37 +#define EXT4_FIEMAP_EXTENT_HOLE                0x08000000
39  /* Max physical block we can address w/o extents */
40  #define EXT4_MAX_BLOCK_FILE_PHYS       0xFFFFFFFF
42 @@ -3258,6 +3265,9 @@ extern int ext4_ext_check_inode(struct inode *inode);
43  extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
44  extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
45                         __u64 start, __u64 len);
46 +extern int ext4_get_es_cache(struct inode *inode,
47 +                            struct fiemap_extent_info *fieinfo,
48 +                            __u64 start, __u64 len);
49  extern int ext4_ext_precache(struct inode *inode);
50  extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
51  extern int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
52 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
53 index 92266a2da7d6..0620d495fd8a 100644
54 --- a/fs/ext4/extents.c
55 +++ b/fs/ext4/extents.c
56 @@ -2315,6 +2315,52 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
57         return err;
58  }
60 +static int ext4_fill_es_cache_info(struct inode *inode,
61 +                                  ext4_lblk_t block, ext4_lblk_t num,
62 +                                  struct fiemap_extent_info *fieinfo)
64 +       ext4_lblk_t next, end = block + num - 1;
65 +       struct extent_status es;
66 +       unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
67 +       unsigned int flags;
68 +       int err;
70 +       while (block <= end) {
71 +               next = 0;
72 +               flags = 0;
73 +               if (!ext4_es_lookup_extent(inode, block, &next, &es))
74 +                       break;
75 +               if (ext4_es_is_unwritten(&es))
76 +                       flags |= FIEMAP_EXTENT_UNWRITTEN;
77 +               if (ext4_es_is_delayed(&es))
78 +                       flags |= (FIEMAP_EXTENT_DELALLOC |
79 +                                 FIEMAP_EXTENT_UNKNOWN);
80 +               if (ext4_es_is_hole(&es))
81 +                       flags |= EXT4_FIEMAP_EXTENT_HOLE;
82 +               if (next == 0)
83 +                       flags |= FIEMAP_EXTENT_LAST;
84 +               if (flags & (FIEMAP_EXTENT_DELALLOC|
85 +                            EXT4_FIEMAP_EXTENT_HOLE))
86 +                       es.es_pblk = 0;
87 +               else
88 +                       es.es_pblk = ext4_es_pblock(&es);
89 +               err = fiemap_fill_next_extent(fieinfo,
90 +                               (__u64)es.es_lblk << blksize_bits,
91 +                               (__u64)es.es_pblk << blksize_bits,
92 +                               (__u64)es.es_len << blksize_bits,
93 +                               flags);
94 +               if (next == 0)
95 +                       break;
96 +               block = next;
97 +               if (err < 0)
98 +                       return err;
99 +               if (err == 1)
100 +                       return 0;
101 +       }
102 +       return 0;
106  /*
107   * ext4_ext_determine_hole - determine hole around given block
108   * @inode:     inode we lookup in
109 @@ -5017,8 +5063,6 @@ static int ext4_find_delayed_extent(struct inode *inode,
111         return next_del;
113 -/* fiemap flags we can handle specified here */
114 -#define EXT4_FIEMAP_FLAGS      (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
116  static int ext4_xattr_fiemap(struct inode *inode,
117                                 struct fiemap_extent_info *fieinfo)
118 @@ -5055,10 +5099,16 @@ static int ext4_xattr_fiemap(struct inode *inode,
119         return (error < 0 ? error : 0);
122 -int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
123 -               __u64 start, __u64 len)
124 +static int _ext4_fiemap(struct inode *inode,
125 +                       struct fiemap_extent_info *fieinfo,
126 +                       __u64 start, __u64 len,
127 +                       int (*fill)(struct inode *, ext4_lblk_t,
128 +                                   ext4_lblk_t,
129 +                                   struct fiemap_extent_info *))
131         ext4_lblk_t start_blk;
132 +       u32 ext4_fiemap_flags = FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR;
134         int error = 0;
136         if (ext4_has_inline_data(inode)) {
137 @@ -5075,14 +5125,18 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
138                 error = ext4_ext_precache(inode);
139                 if (error)
140                         return error;
141 +               fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
142         }
144         /* fallback to generic here if not in extents fmt */
145 -       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
146 +       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) &&
147 +           fill == ext4_fill_fiemap_extents)
148                 return generic_block_fiemap(inode, fieinfo, start, len,
149                         ext4_get_block);
151 -       if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
152 +       if (fill == ext4_fill_es_cache_info)
153 +               ext4_fiemap_flags &= FIEMAP_FLAG_XATTR;
154 +       if (fiemap_check_flags(fieinfo, ext4_fiemap_flags))
155                 return -EBADR;
157         if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
158 @@ -5101,12 +5155,36 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
159                  * Walk the extent tree gathering extent information
160                  * and pushing extents back to the user.
161                  */
162 -               error = ext4_fill_fiemap_extents(inode, start_blk,
163 -                                                len_blks, fieinfo);
164 +               error = fill(inode, start_blk, len_blks, fieinfo);
165         }
166         return error;
169 +int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
170 +               __u64 start, __u64 len)
172 +       return _ext4_fiemap(inode, fieinfo, start, len,
173 +                           ext4_fill_fiemap_extents);
176 +int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
177 +                     __u64 start, __u64 len)
179 +       if (ext4_has_inline_data(inode)) {
180 +               int has_inline;
182 +               down_read(&EXT4_I(inode)->xattr_sem);
183 +               has_inline = ext4_has_inline_data(inode);
184 +               up_read(&EXT4_I(inode)->xattr_sem);
185 +               if (has_inline)
186 +                       return 0;
187 +       }
189 +       return _ext4_fiemap(inode, fieinfo, start, len,
190 +                           ext4_fill_es_cache_info);
194  /*
195   * ext4_access_path:
196   * Function to access the path buffer for marking it dirty.
197 diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
198 index 02cc8eb3eb0e..a959adc59bcd 100644
199 --- a/fs/ext4/extents_status.c
200 +++ b/fs/ext4/extents_status.c
201 @@ -899,6 +899,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
202   * Return: 1 on found, 0 on not
203   */
204  int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
205 +                         ext4_lblk_t *next_lblk,
206                           struct extent_status *es)
208         struct ext4_es_tree *tree;
209 @@ -948,6 +949,15 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
210                 if (!ext4_es_is_referenced(es1))
211                         ext4_es_set_referenced(es1);
212                 stats->es_stats_cache_hits++;
213 +               if (next_lblk) {
214 +                       node = rb_next(&es1->rb_node);
215 +                       if (node) {
216 +                               es1 = rb_entry(node, struct extent_status,
217 +                                              rb_node);
218 +                               *next_lblk = es1->es_lblk;
219 +                       } else
220 +                               *next_lblk = 0;
221 +               }
222         } else {
223                 stats->es_stats_cache_misses++;
224         }
225 diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
226 index e16785f431e7..eb56a1289031 100644
227 --- a/fs/ext4/extents_status.h
228 +++ b/fs/ext4/extents_status.h
229 @@ -140,6 +140,7 @@ extern void ext4_es_find_extent_range(struct inode *inode,
230                                       ext4_lblk_t lblk, ext4_lblk_t end,
231                                       struct extent_status *es);
232  extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
233 +                                ext4_lblk_t *next_lblk,
234                                  struct extent_status *es);
235  extern bool ext4_es_scan_range(struct inode *inode,
236                                int (*matching_fn)(struct extent_status *es),
237 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
238 index a6523516d681..4b92c7603907 100644
239 --- a/fs/ext4/inode.c
240 +++ b/fs/ext4/inode.c
241 @@ -527,7 +527,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
242                 return -EFSCORRUPTED;
244         /* Lookup extent status tree firstly */
245 -       if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
246 +       if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
247                 if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
248                         map->m_pblk = ext4_es_pblock(&es) +
249                                         map->m_lblk - es.es_lblk;
250 @@ -695,7 +695,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
251                  * extent status tree.
252                  */
253                 if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
254 -                   ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
255 +                   ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
256                         if (ext4_es_is_written(&es))
257                                 goto out_sem;
258                 }
259 @@ -1868,7 +1868,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
260                   (unsigned long) map->m_lblk);
262         /* Lookup extent status tree firstly */
263 -       if (ext4_es_lookup_extent(inode, iblock, &es)) {
264 +       if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
265                 if (ext4_es_is_hole(&es)) {
266                         retval = 0;
267                         down_read(&EXT4_I(inode)->i_data_sem);
268 diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
269 index ffb7bde4900d..d6242b7b8718 100644
270 --- a/fs/ext4/ioctl.c
271 +++ b/fs/ext4/ioctl.c
272 @@ -745,6 +745,74 @@ static void ext4_fill_fsxattr(struct inode *inode, struct fsxattr *fa)
273                 fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid);
276 +/* copied from fs/ioctl.c */
277 +static int fiemap_check_ranges(struct super_block *sb,
278 +                              u64 start, u64 len, u64 *new_len)
280 +       u64 maxbytes = (u64) sb->s_maxbytes;
282 +       *new_len = len;
284 +       if (len == 0)
285 +               return -EINVAL;
287 +       if (start > maxbytes)
288 +               return -EFBIG;
290 +       /*
291 +        * Shrink request scope to what the fs can actually handle.
292 +        */
293 +       if (len > maxbytes || (maxbytes - len) < start)
294 +               *new_len = maxbytes - start;
296 +       return 0;
299 +/* So that the fiemap access checks can't overflow on 32 bit machines. */
300 +#define FIEMAP_MAX_EXTENTS     (UINT_MAX / sizeof(struct fiemap_extent))
302 +static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg)
304 +       struct fiemap fiemap;
305 +       struct fiemap __user *ufiemap = (struct fiemap __user *) arg;
306 +       struct fiemap_extent_info fieinfo = { 0, };
307 +       struct inode *inode = file_inode(filp);
308 +       struct super_block *sb = inode->i_sb;
309 +       u64 len;
310 +       int error;
312 +       if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap)))
313 +               return -EFAULT;
315 +       if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
316 +               return -EINVAL;
318 +       error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length,
319 +                                   &len);
320 +       if (error)
321 +               return error;
323 +       fieinfo.fi_flags = fiemap.fm_flags;
324 +       fieinfo.fi_extents_max = fiemap.fm_extent_count;
325 +       fieinfo.fi_extents_start = ufiemap->fm_extents;
327 +       if (fiemap.fm_extent_count != 0 &&
328 +           !access_ok(fieinfo.fi_extents_start,
329 +                      fieinfo.fi_extents_max * sizeof(struct fiemap_extent)))
330 +               return -EFAULT;
332 +       if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC)
333 +               filemap_write_and_wait(inode->i_mapping);
335 +       error = ext4_get_es_cache(inode, &fieinfo, fiemap.fm_start, len);
336 +       fiemap.fm_flags = fieinfo.fi_flags;
337 +       fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
338 +       if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap)))
339 +               error = -EFAULT;
341 +       return error;
344  long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
346         struct inode *inode = file_inode(filp);
347 @@ -1139,6 +1207,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
348                 return put_user(state, (__u32 __user *) arg);
349         }
351 +       case EXT4_IOC_GET_ES_CACHE:
352 +               return ext4_ioctl_get_es_cache(filp, arg);
354         case EXT4_IOC_FSGETXATTR:
355         {
356                 struct fsxattr fa;
357 @@ -1259,6 +1330,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
358         case FS_IOC_GETFSMAP:
359         case EXT4_IOC_CLEAR_ES_CACHE:
360         case EXT4_IOC_GETSTATE:
361 +       case EXT4_IOC_GET_ES_CACHE:
362                 break;
363         default:
364                 return -ENOIOCTLCMD;