add patch fix-COLLAPSE_RANGE-test-failure-for-bigalloc
[ext4-patch-queue.git] / add-blkdiscard-ioctl
blob5b8ea29a119b6891a39537dbd943ac6feb0e48d0
1 ext4: add BLKDISCARD ioctl
3 The blkdicard ioctl previously only worked on block devices.  Allow
4 this ioctl to work on ext4 files.
6 Google-Bug-Id: 11517631
8 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
9 ---
10  fs/ext4/ext4.h    |   5 +++++
11  fs/ext4/extents.c |  38 +++++++++++++++++++++++++++++---------
12  fs/ext4/ioctl.c   | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
13  3 files changed, 173 insertions(+), 9 deletions(-)
15 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
16 index ece5556..92bcf9f 100644
17 --- a/fs/ext4/ext4.h
18 +++ b/fs/ext4/ext4.h
19 @@ -2708,6 +2708,8 @@ extern int ext4_check_blockref(const char *, unsigned int,
20  /* extents.c */
21  struct ext4_ext_path;
22  struct ext4_extent;
23 +typedef int (*extent_iterator_t)(struct inode *inode, struct extent_status *es,
24 +                                unsigned int flags, void *private);
26  /*
27   * Maximum number of logical blocks in a file; ext4_extent's ee_block is
28 @@ -2752,6 +2754,9 @@ extern int ext4_find_delalloc_range(struct inode *inode,
29                                     ext4_lblk_t lblk_start,
30                                     ext4_lblk_t lblk_end);
31  extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
32 +extern int ext4_extent_iterator(struct inode *inode,
33 +                               ext4_lblk_t block, ext4_lblk_t num,
34 +                               extent_iterator_t callback, void *private);
35  extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
36                         __u64 start, __u64 len);
37  extern int ext4_ext_precache(struct inode *inode);
38 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
39 index 4410cc3..50103e9 100644
40 --- a/fs/ext4/extents.c
41 +++ b/fs/ext4/extents.c
42 @@ -2101,9 +2101,13 @@ cleanup:
43         return err;
44  }
46 -static int ext4_fill_fiemap_extents(struct inode *inode,
47 -                                   ext4_lblk_t block, ext4_lblk_t num,
48 -                                   struct fiemap_extent_info *fieinfo)
50 +typedef int (*extent_iterator_t)(struct inode *inode, struct extent_status *es,
51 +                                unsigned int flags, void *private);
53 +int ext4_extent_iterator(struct inode *inode,
54 +                        ext4_lblk_t block, ext4_lblk_t num,
55 +                        extent_iterator_t callback, void *private)
56  {
57         struct ext4_ext_path *path = NULL;
58         struct ext4_extent *ex;
59 @@ -2112,7 +2116,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
60         ext4_lblk_t last = block + num;
61         int exists, depth = 0, err = 0;
62         unsigned int flags = 0;
63 -       unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
65         while (block < last && block != EXT_MAX_BLOCKS) {
66                 num = last - block;
67 @@ -2236,11 +2239,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
68                 }
70                 if (exists) {
71 -                       err = fiemap_fill_next_extent(fieinfo,
72 -                               (__u64)es.es_lblk << blksize_bits,
73 -                               (__u64)es.es_pblk << blksize_bits,
74 -                               (__u64)es.es_len << blksize_bits,
75 -                               flags);
76 +                       err = callback(inode, &es, flags, private);
77                         if (err < 0)
78                                 break;
79                         if (err == 1) {
80 @@ -2260,6 +2259,27 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
81         return err;
82  }
84 +static int call_fill_fiemap(struct inode *inode, struct extent_status *es,
85 +                           unsigned int flags, void *private)
87 +       unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
89 +       return fiemap_fill_next_extent(private,
90 +                                      (__u64)es->es_lblk << blksize_bits,
91 +                                      (__u64)es->es_pblk << blksize_bits,
92 +                                      (__u64)es->es_len << blksize_bits,
93 +                                      flags);
96 +static int ext4_fill_fiemap_extents(struct inode *inode,
97 +                                   ext4_lblk_t block, ext4_lblk_t num,
98 +                                   struct fiemap_extent_info *fieinfo)
100 +       return ext4_extent_iterator(inode, block, num,
101 +                                   call_fill_fiemap, fieinfo);
105  /*
106   * ext4_ext_put_gap_in_cache:
107   * calculate boundaries of the gap that the requested block fits into
108 diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
109 index 60589b6..34964d4 100644
110 --- a/fs/ext4/ioctl.c
111 +++ b/fs/ext4/ioctl.c
112 @@ -213,6 +213,132 @@ swap_boot_out:
113         return err;
116 +static int discard_callback(struct inode *inode, struct extent_status *es,
117 +                           unsigned int flags, void *private)
119 +       struct ext4_map_blocks *map = private;
120 +       ext4_lblk_t es_lblk = es->es_lblk;
121 +       ext4_lblk_t es_len = es->es_len;
122 +       ext4_fsblk_t es_pblk = es->es_pblk;
124 +       if (flags & (FIEMAP_EXTENT_UNKNOWN |
125 +                    FIEMAP_EXTENT_ENCODED |
126 +                    FIEMAP_EXTENT_DATA_ENCRYPTED |
127 +                    FIEMAP_EXTENT_DELALLOC |
128 +                    FIEMAP_EXTENT_DATA_TAIL |
129 +                    FIEMAP_EXTENT_DATA_INLINE |
130 +                    FIEMAP_EXTENT_NOT_ALIGNED |
131 +                    FIEMAP_EXTENT_SHARED))
132 +               return 0;
134 +       if (es_lblk < map->m_lblk) {
135 +               ext4_lblk_t d = map->m_lblk - es_lblk;
136 +               if (d > es_len)
137 +                       return 0;
138 +               es_lblk += d;
139 +               es_pblk += d;
140 +               es_len -= d;
141 +       }
143 +       if (es_lblk + es_len > map->m_lblk + map->m_len)
144 +               es_len -= es_lblk + es_len - (map->m_lblk + map->m_len);
145 +#ifdef BLKDISCARD_DEBUG
146 +       ext4_msg(inode->i_sb, KERN_NOTICE, "discard: %llu len %lu",
147 +                (unsigned long long) es_pblk, (unsigned long) es_len);
148 +       return 0;
149 +#else
150 +       return sb_issue_discard(inode->i_sb, es_pblk, es_len, GFP_KERNEL, 0);
151 +#endif
154 +static int blkdiscard_inode(struct inode *inode, u64 start_offset, u64 len)
156 +       struct super_block *sb = inode->i_sb;
157 +       struct ext4_map_blocks map;
158 +       unsigned int num;
160 +       if (!S_ISREG(inode->i_mode))
161 +               return -EINVAL;
163 +       if (!blk_queue_discard(bdev_get_queue(sb->s_bdev)))
164 +              return -EOPNOTSUPP;
166 +       if (!bdev_discard_zeroes_data(sb->s_bdev) && !capable(CAP_SYS_ADMIN))
167 +              return -EOPNOTSUPP;
169 +       num = start_offset & (sb->s_blocksize - 1);
170 +       if (num) {
171 +               num = sb->s_blocksize - num;
172 +               start_offset += num;
173 +               len = (len > num) ? len - num : 0;
174 +       }
175 +       if (len == 0)
176 +               return 0;
177 +       if (start_offset > sb->s_maxbytes)
178 +               return -EFBIG;
179 +       if (len > sb->s_maxbytes || (sb->s_maxbytes - len) < start_offset)
180 +               len = sb->s_maxbytes - start_offset;
182 +       map.m_lblk = start_offset >> sb->s_blocksize_bits;
183 +       map.m_len = len >> sb->s_blocksize_bits;
185 +#ifdef BLKDISCARD_DEBUG
186 +       ext4_msg(sb, KERN_NOTICE, "blkdiscard range: %lu len %lu",
187 +                (unsigned long) map.m_lblk, (unsigned long) map.m_len);
188 +#endif
190 +       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
191 +               return ext4_extent_iterator(inode, map.m_lblk, map.m_len,
192 +                                           discard_callback, &map);
194 +       num = map.m_len;
195 +       while (num) {
196 +               int ret = ext4_map_blocks(NULL, inode, &map, 0);
198 +               if (ret < 0)
199 +                       return ret;
201 +               if (ret == 0) {
202 +#ifdef BLKDISCARD_DEBUG
203 +                       ext4_msg(sb, KERN_NOTICE,
204 +                                "skip: lblk %lu len %lu ret %lu num %lu",
205 +                                (unsigned long) map.m_lblk,
206 +                                (unsigned long) map.m_len,
207 +                                (unsigned long) ret,
208 +                                (unsigned long) num);
209 +#endif
210 +                       map.m_lblk++;
211 +                       num--;
212 +                       continue;
213 +               }
214 +#ifdef BLKDISCARD_DEBUG
215 +               ext4_msg(sb, KERN_NOTICE,
216 +                        "walk: lblk %lu pblk %llu len %lu ret %lu num %lu",
217 +                        (unsigned long) map.m_lblk,
218 +                        (unsigned long long) map.m_pblk,
219 +                        (unsigned long) map.m_len,
220 +                        (unsigned long) ret,
221 +                        (unsigned long) num);
222 +#endif
223 +               if (ret > num)
224 +                       ret = num;
225 +               map.m_lblk += ret;
226 +               num -= ret;
227 +               map.m_len = num;
229 +#ifdef BLKDISCARD_DEBUG
230 +               ext4_msg(sb, KERN_NOTICE, "discard: %llu len %lu",
231 +                        (unsigned long long) map.m_pblk, (unsigned long) ret);
232 +#else
233 +               ret = sb_issue_discard(sb, map.m_pblk, ret,
234 +                                      GFP_KERNEL, 0);
235 +               if (ret)
236 +                       return ret;
237 +#endif
238 +       }
239 +       return 0;
242  long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
244         struct inode *inode = file_inode(filp);
245 @@ -626,6 +752,18 @@ resizefs_out:
246         case EXT4_IOC_PRECACHE_EXTENTS:
247                 return ext4_ext_precache(inode);
249 +       case BLKDISCARD: {
250 +               uint64_t range[2];
252 +               if (!(filp->f_mode & FMODE_WRITE))
253 +                       return -EBADF;
255 +               if (copy_from_user(range, (void __user *)arg, sizeof(range)))
256 +                       return -EFAULT;
258 +               return blkdiscard_inode(file_inode(filp), range[0], range[1]);
259 +       }
261         default:
262                 return -ENOTTY;
263         }
264 @@ -690,6 +828,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
265         case FITRIM:
266         case EXT4_IOC_RESIZE_FS:
267         case EXT4_IOC_PRECACHE_EXTENTS:
268 +       case BLKDISCARD:
269                 break;
270         default:
271                 return -ENOIOCTLCMD;