Check in v2 version of "ext4: Punch hole and DAX fixes"
[ext4-patch-queue.git] / add-blkdiscard-ioctl
blob3de5b8475e302418c8a6dff3ed6ce056702b1acb
1 ext4: add BLKDISCARD ioctl
3 The blkdicard ioctl previously only worked on block devices.  Allow
4 this ioctl to work on ext4 files.
6 Google-Bug-Id: 11517631
8 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
9 ---
10  fs/ext4/ext4.h    |   5 +++
11  fs/ext4/extents.c |  38 +++++++++++++++++------
12  fs/ext4/ioctl.c   | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
13  3 files changed, 172 insertions(+), 9 deletions(-)
15 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
16 index 0179654f..fca1efb 100644
17 --- a/fs/ext4/ext4.h
18 +++ b/fs/ext4/ext4.h
19 @@ -2867,6 +2867,8 @@ extern int ext4_check_blockref(const char *, unsigned int,
20  /* extents.c */
21  struct ext4_ext_path;
22  struct ext4_extent;
23 +typedef int (*extent_iterator_t)(struct inode *inode, struct extent_status *es,
24 +                                unsigned int flags, void *private);
26  /*
27   * Maximum number of logical blocks in a file; ext4_extent's ee_block is
28 @@ -2912,6 +2914,9 @@ extern int ext4_find_delalloc_range(struct inode *inode,
29                                     ext4_lblk_t lblk_end);
30  extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
31  extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
32 +extern int ext4_extent_iterator(struct inode *inode,
33 +                               ext4_lblk_t block, ext4_lblk_t num,
34 +                               extent_iterator_t callback, void *private);
35  extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
36                         __u64 start, __u64 len);
37  extern int ext4_ext_precache(struct inode *inode);
38 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
39 index 5330ec2..1941a4f 100644
40 --- a/fs/ext4/extents.c
41 +++ b/fs/ext4/extents.c
42 @@ -2142,9 +2142,13 @@ cleanup:
43         return err;
44  }
46 -static int ext4_fill_fiemap_extents(struct inode *inode,
47 -                                   ext4_lblk_t block, ext4_lblk_t num,
48 -                                   struct fiemap_extent_info *fieinfo)
50 +typedef int (*extent_iterator_t)(struct inode *inode, struct extent_status *es,
51 +                                unsigned int flags, void *private);
53 +int ext4_extent_iterator(struct inode *inode,
54 +                        ext4_lblk_t block, ext4_lblk_t num,
55 +                        extent_iterator_t callback, void *private)
56  {
57         struct ext4_ext_path *path = NULL;
58         struct ext4_extent *ex;
59 @@ -2153,7 +2157,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
60         ext4_lblk_t last = block + num;
61         int exists, depth = 0, err = 0;
62         unsigned int flags = 0;
63 -       unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
65         while (block < last && block != EXT_MAX_BLOCKS) {
66                 num = last - block;
67 @@ -2270,11 +2273,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
68                 }
70                 if (exists) {
71 -                       err = fiemap_fill_next_extent(fieinfo,
72 -                               (__u64)es.es_lblk << blksize_bits,
73 -                               (__u64)es.es_pblk << blksize_bits,
74 -                               (__u64)es.es_len << blksize_bits,
75 -                               flags);
76 +                       err = callback(inode, &es, flags, private);
77                         if (err < 0)
78                                 break;
79                         if (err == 1) {
80 @@ -2291,6 +2290,27 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
81         return err;
82  }
84 +static int call_fill_fiemap(struct inode *inode, struct extent_status *es,
85 +                           unsigned int flags, void *private)
87 +       unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
89 +       return fiemap_fill_next_extent(private,
90 +                                      (__u64)es->es_lblk << blksize_bits,
91 +                                      (__u64)es->es_pblk << blksize_bits,
92 +                                      (__u64)es->es_len << blksize_bits,
93 +                                      flags);
96 +static int ext4_fill_fiemap_extents(struct inode *inode,
97 +                                   ext4_lblk_t block, ext4_lblk_t num,
98 +                                   struct fiemap_extent_info *fieinfo)
100 +       return ext4_extent_iterator(inode, block, num,
101 +                                   call_fill_fiemap, fieinfo);
105  /*
106   * ext4_ext_put_gap_in_cache:
107   * calculate boundaries of the gap that the requested block fits into
108 diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
109 index 2cb9e17..7fc02e3 100644
110 --- a/fs/ext4/ioctl.c
111 +++ b/fs/ext4/ioctl.c
112 @@ -206,6 +206,132 @@ static int uuid_is_zero(__u8 u[16])
113         return 1;
116 +static int discard_callback(struct inode *inode, struct extent_status *es,
117 +                           unsigned int flags, void *private)
119 +       struct ext4_map_blocks *map = private;
120 +       ext4_lblk_t es_lblk = es->es_lblk;
121 +       ext4_lblk_t es_len = es->es_len;
122 +       ext4_fsblk_t es_pblk = es->es_pblk;
124 +       if (flags & (FIEMAP_EXTENT_UNKNOWN |
125 +                    FIEMAP_EXTENT_ENCODED |
126 +                    FIEMAP_EXTENT_DATA_ENCRYPTED |
127 +                    FIEMAP_EXTENT_DELALLOC |
128 +                    FIEMAP_EXTENT_DATA_TAIL |
129 +                    FIEMAP_EXTENT_DATA_INLINE |
130 +                    FIEMAP_EXTENT_NOT_ALIGNED |
131 +                    FIEMAP_EXTENT_SHARED))
132 +               return 0;
134 +       if (es_lblk < map->m_lblk) {
135 +               ext4_lblk_t d = map->m_lblk - es_lblk;
136 +               if (d > es_len)
137 +                       return 0;
138 +               es_lblk += d;
139 +               es_pblk += d;
140 +               es_len -= d;
141 +       }
143 +       if (es_lblk + es_len > map->m_lblk + map->m_len)
144 +               es_len -= es_lblk + es_len - (map->m_lblk + map->m_len);
145 +#ifdef BLKDISCARD_DEBUG
146 +       ext4_msg(inode->i_sb, KERN_NOTICE, "discard: %llu len %lu",
147 +                (unsigned long long) es_pblk, (unsigned long) es_len);
148 +       return 0;
149 +#else
150 +       return sb_issue_discard(inode->i_sb, es_pblk, es_len, GFP_KERNEL, 0);
151 +#endif
154 +static int blkdiscard_inode(struct inode *inode, u64 start_offset, u64 len)
156 +       struct super_block *sb = inode->i_sb;
157 +       struct ext4_map_blocks map;
158 +       unsigned int num;
160 +       if (!S_ISREG(inode->i_mode))
161 +               return -EINVAL;
163 +       if (!blk_queue_discard(bdev_get_queue(sb->s_bdev)))
164 +              return -EOPNOTSUPP;
166 +       if (!bdev_discard_zeroes_data(sb->s_bdev) && !capable(CAP_SYS_ADMIN))
167 +              return -EOPNOTSUPP;
169 +       num = start_offset & (sb->s_blocksize - 1);
170 +       if (num) {
171 +               num = sb->s_blocksize - num;
172 +               start_offset += num;
173 +               len = (len > num) ? len - num : 0;
174 +       }
175 +       if (len == 0)
176 +               return 0;
177 +       if (start_offset > sb->s_maxbytes)
178 +               return -EFBIG;
179 +       if (len > sb->s_maxbytes || (sb->s_maxbytes - len) < start_offset)
180 +               len = sb->s_maxbytes - start_offset;
182 +       map.m_lblk = start_offset >> sb->s_blocksize_bits;
183 +       map.m_len = len >> sb->s_blocksize_bits;
185 +#ifdef BLKDISCARD_DEBUG
186 +       ext4_msg(sb, KERN_NOTICE, "blkdiscard range: %lu len %lu",
187 +                (unsigned long) map.m_lblk, (unsigned long) map.m_len);
188 +#endif
190 +       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
191 +               return ext4_extent_iterator(inode, map.m_lblk, map.m_len,
192 +                                           discard_callback, &map);
194 +       num = map.m_len;
195 +       while (num) {
196 +               int ret = ext4_map_blocks(NULL, inode, &map, 0);
198 +               if (ret < 0)
199 +                       return ret;
201 +               if (ret == 0) {
202 +#ifdef BLKDISCARD_DEBUG
203 +                       ext4_msg(sb, KERN_NOTICE,
204 +                                "skip: lblk %lu len %lu ret %lu num %lu",
205 +                                (unsigned long) map.m_lblk,
206 +                                (unsigned long) map.m_len,
207 +                                (unsigned long) ret,
208 +                                (unsigned long) num);
209 +#endif
210 +                       map.m_lblk++;
211 +                       num--;
212 +                       continue;
213 +               }
214 +#ifdef BLKDISCARD_DEBUG
215 +               ext4_msg(sb, KERN_NOTICE,
216 +                        "walk: lblk %lu pblk %llu len %lu ret %lu num %lu",
217 +                        (unsigned long) map.m_lblk,
218 +                        (unsigned long long) map.m_pblk,
219 +                        (unsigned long) map.m_len,
220 +                        (unsigned long) ret,
221 +                        (unsigned long) num);
222 +#endif
223 +               if (ret > num)
224 +                       ret = num;
225 +               map.m_lblk += ret;
226 +               num -= ret;
227 +               map.m_len = num;
229 +#ifdef BLKDISCARD_DEBUG
230 +               ext4_msg(sb, KERN_NOTICE, "discard: %llu len %lu",
231 +                        (unsigned long long) map.m_pblk, (unsigned long) ret);
232 +#else
233 +               ret = sb_issue_discard(sb, map.m_pblk, ret,
234 +                                      GFP_KERNEL, 0);
235 +               if (ret)
236 +                       return ret;
237 +#endif
238 +       }
239 +       return 0;
242  long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
244         struct inode *inode = file_inode(filp);
245 @@ -697,6 +823,17 @@ encryption_policy_out:
246                 return -EOPNOTSUPP;
247  #endif
248         }
249 +       case BLKDISCARD: {
250 +               uint64_t range[2];
252 +               if (!(filp->f_mode & FMODE_WRITE))
253 +                       return -EBADF;
255 +               if (copy_from_user(range, (void __user *)arg, sizeof(range)))
256 +                       return -EFAULT;
258 +               return blkdiscard_inode(file_inode(filp), range[0], range[1]);
259 +       }
260         default:
261                 return -ENOTTY;
262         }
263 @@ -764,6 +901,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
264         case EXT4_IOC_SET_ENCRYPTION_POLICY:
265         case EXT4_IOC_GET_ENCRYPTION_PWSALT:
266         case EXT4_IOC_GET_ENCRYPTION_POLICY:
267 +       case BLKDISCARD:
268                 break;
269         default:
270                 return -ENOIOCTLCMD;