Sync up with encryption patches
[ext4-patch-queue.git] / implement-the-ext4-encryption-write-path
blob289a67939ccd0abf22ef003fc08cac88893e1640
1 ext4 crypto: implement the ext4 encryption write path
3 From: Michael Halcrow <mhalcrow@google.com>
5 Pulls block_write_begin() into fs/ext4/inode.c because it might need
6 to do a low-level read of the existing data, in which case we need to
7 decrypt it.
9 Change-Id: I2337918809c43e18454a1d5621024d2699a98666
10 Signed-off-by: Michael Halcrow <mhalcrow@google.com>
11 Signed-off-by: Ildar Muslukhov <ildarm@google.com>
12 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
13 ---
14  fs/ext4/extents.c |  17 +++++++++
15  fs/ext4/ialloc.c  |   5 +++
16  fs/ext4/inode.c   | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
17  fs/ext4/page-io.c |  45 ++++++++++++++++++++---
18  4 files changed, 173 insertions(+), 6 deletions(-)
20 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
21 index 74580ea..973816b 100644
22 --- a/fs/ext4/extents.c
23 +++ b/fs/ext4/extents.c
24 @@ -3122,6 +3122,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
25         ee_len    = ext4_ext_get_actual_len(ex);
26         ee_pblock = ext4_ext_pblock(ex);
28 +       if (ext4_encrypted_inode(inode))
29 +               return ext4_encrypted_zeroout(inode, ex);
31         ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
32         if (ret > 0)
33                 ret = 0;
34 @@ -4898,6 +4901,20 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
35         ext4_lblk_t lblk;
36         unsigned int blkbits = inode->i_blkbits;
38 +       /*
39 +        * Encrypted inodes can't handle collapse range or insert
40 +        * range since we would need to re-encrypt blocks with a
41 +        * different IV or XTS tweak (which are based on the logical
42 +        * block number).
43 +        *
44 +        * XXX It's not clear why zero range isn't working, but we'll
45 +        * leave it disabled for encrypted inodes for now.  This is a
46 +        * bug we should fix....
47 +        */
48 +       if (ext4_encrypted_inode(inode) &&
49 +           (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)))
50 +               return -EOPNOTSUPP;
52         /* Return error if mode is not supported */
53         if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
54                      FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
55 diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
56 index 6ab6f63..247737e 100644
57 --- a/fs/ext4/ialloc.c
58 +++ b/fs/ext4/ialloc.c
59 @@ -996,6 +996,11 @@ got:
60         ei->i_block_group = group;
61         ei->i_last_alloc_group = ~0;
63 +       /* If the directory encrypted, then we should encrypt the inode. */
64 +       if ((S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) &&
65 +           ext4_encrypted_inode(dir))
66 +               ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
68         ext4_set_inode_flags(inode);
69         if (IS_DIRSYNC(inode))
70                 ext4_handle_sync(handle);
71 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
72 index cd30091..7c4527e 100644
73 --- a/fs/ext4/inode.c
74 +++ b/fs/ext4/inode.c
75 @@ -886,6 +886,95 @@ int do_journal_get_write_access(handle_t *handle,
77  static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
78                    struct buffer_head *bh_result, int create);
80 +#ifdef CONFIG_EXT4_FS_ENCRYPTION
81 +static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
82 +                                 get_block_t *get_block)
84 +       unsigned from = pos & (PAGE_CACHE_SIZE - 1);
85 +       unsigned to = from + len;
86 +       struct inode *inode = page->mapping->host;
87 +       unsigned block_start, block_end;
88 +       sector_t block;
89 +       int err = 0;
90 +       unsigned blocksize = inode->i_sb->s_blocksize;
91 +       unsigned bbits;
92 +       struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
93 +       bool decrypt = false;
95 +       BUG_ON(!PageLocked(page));
96 +       BUG_ON(from > PAGE_CACHE_SIZE);
97 +       BUG_ON(to > PAGE_CACHE_SIZE);
98 +       BUG_ON(from > to);
100 +       if (!page_has_buffers(page))
101 +               create_empty_buffers(page, blocksize, 0);
102 +       head = page_buffers(page);
103 +       bbits = ilog2(blocksize);
104 +       block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
106 +       for (bh = head, block_start = 0; bh != head || !block_start;
107 +           block++, block_start = block_end, bh = bh->b_this_page) {
108 +               block_end = block_start + blocksize;
109 +               if (block_end <= from || block_start >= to) {
110 +                       if (PageUptodate(page)) {
111 +                               if (!buffer_uptodate(bh))
112 +                                       set_buffer_uptodate(bh);
113 +                       }
114 +                       continue;
115 +               }
116 +               if (buffer_new(bh))
117 +                       clear_buffer_new(bh);
118 +               if (!buffer_mapped(bh)) {
119 +                       WARN_ON(bh->b_size != blocksize);
120 +                       err = get_block(inode, block, bh, 1);
121 +                       if (err)
122 +                               break;
123 +                       if (buffer_new(bh)) {
124 +                               unmap_underlying_metadata(bh->b_bdev,
125 +                                                         bh->b_blocknr);
126 +                               if (PageUptodate(page)) {
127 +                                       clear_buffer_new(bh);
128 +                                       set_buffer_uptodate(bh);
129 +                                       mark_buffer_dirty(bh);
130 +                                       continue;
131 +                               }
132 +                               if (block_end > to || block_start < from)
133 +                                       zero_user_segments(page, to, block_end,
134 +                                                          block_start, from);
135 +                               continue;
136 +                       }
137 +               }
138 +               if (PageUptodate(page)) {
139 +                       if (!buffer_uptodate(bh))
140 +                               set_buffer_uptodate(bh);
141 +                       continue;
142 +               }
143 +               if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
144 +                   !buffer_unwritten(bh) &&
145 +                   (block_start < from || block_end > to)) {
146 +                       ll_rw_block(READ, 1, &bh);
147 +                       *wait_bh++ = bh;
148 +                       decrypt = ext4_encrypted_inode(inode) &&
149 +                               S_ISREG(inode->i_mode);
150 +               }
151 +       }
152 +       /*
153 +        * If we issued read requests, let them complete.
154 +        */
155 +       while (wait_bh > wait) {
156 +               wait_on_buffer(*--wait_bh);
157 +               if (!buffer_uptodate(*wait_bh))
158 +                       err = -EIO;
159 +       }
160 +       if (unlikely(err))
161 +               page_zero_new_buffers(page, from, to);
162 +       else if (decrypt)
163 +               err = ext4_decrypt_one(inode, page);
164 +       return err;
166 +#endif
168  static int ext4_write_begin(struct file *file, struct address_space *mapping,
169                             loff_t pos, unsigned len, unsigned flags,
170                             struct page **pagep, void **fsdata)
171 @@ -948,11 +1037,19 @@ retry_journal:
172         /* In case writeback began while the page was unlocked */
173         wait_for_stable_page(page);
175 +#ifdef CONFIG_EXT4_FS_ENCRYPTION
176 +       if (ext4_should_dioread_nolock(inode))
177 +               ret = ext4_block_write_begin(page, pos, len,
178 +                                            ext4_get_block_write);
179 +       else
180 +               ret = ext4_block_write_begin(page, pos, len,
181 +                                            ext4_get_block);
182 +#else
183         if (ext4_should_dioread_nolock(inode))
184                 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
185         else
186                 ret = __block_write_begin(page, pos, len, ext4_get_block);
188 +#endif
189         if (!ret && ext4_should_journal_data(inode)) {
190                 ret = ext4_walk_page_buffers(handle, page_buffers(page),
191                                              from, to, NULL,
192 @@ -2574,7 +2671,12 @@ retry_journal:
193         /* In case writeback began while the page was unlocked */
194         wait_for_stable_page(page);
196 +#ifdef CONFIG_EXT4_FS_ENCRYPTION
197 +       ret = ext4_block_write_begin(page, pos, len,
198 +                                    ext4_da_get_block_prep);
199 +#else
200         ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
201 +#endif
202         if (ret < 0) {
203                 unlock_page(page);
204                 ext4_journal_stop(handle);
205 @@ -3032,6 +3134,9 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
206                 get_block_func = ext4_get_block_write;
207                 dio_flags = DIO_LOCKING;
208         }
209 +#ifdef CONFIG_EXT4_FS_ENCRYPTION
210 +       BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
211 +#endif
212         if (IS_DAX(inode))
213                 ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func,
214                                 ext4_end_io_dio, dio_flags);
215 @@ -3096,6 +3201,11 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
216         size_t count = iov_iter_count(iter);
217         ssize_t ret;
219 +#ifdef CONFIG_EXT4_FS_ENCRYPTION
220 +       if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
221 +               return 0;
222 +#endif
224         /*
225          * If we are doing data journalling we don't support O_DIRECT
226          */
227 diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
228 index 5687e47..51a5f12 100644
229 --- a/fs/ext4/page-io.c
230 +++ b/fs/ext4/page-io.c
231 @@ -67,6 +67,10 @@ static void ext4_finish_bio(struct bio *bio)
233         bio_for_each_segment_all(bvec, bio, i) {
234                 struct page *page = bvec->bv_page;
235 +#ifdef CONFIG_EXT4_FS_ENCRYPTION
236 +               struct page *data_page = NULL;
237 +               struct ext4_crypto_ctx *ctx = NULL;
238 +#endif
239                 struct buffer_head *bh, *head;
240                 unsigned bio_start = bvec->bv_offset;
241                 unsigned bio_end = bio_start + bvec->bv_len;
242 @@ -76,6 +80,15 @@ static void ext4_finish_bio(struct bio *bio)
243                 if (!page)
244                         continue;
246 +#ifdef CONFIG_EXT4_FS_ENCRYPTION
247 +               if (!page->mapping) {
248 +                       /* The bounce data pages are unmapped. */
249 +                       data_page = page;
250 +                       ctx = (struct ext4_crypto_ctx *)page_private(data_page);
251 +                       page = ctx->control_page;
252 +               }
253 +#endif
255                 if (error) {
256                         SetPageError(page);
257                         set_bit(AS_EIO, &page->mapping->flags);
258 @@ -100,8 +113,13 @@ static void ext4_finish_bio(struct bio *bio)
259                 } while ((bh = bh->b_this_page) != head);
260                 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
261                 local_irq_restore(flags);
262 -               if (!under_io)
263 +               if (!under_io) {
264 +#ifdef CONFIG_EXT4_FS_ENCRYPTION
265 +                       if (ctx)
266 +                               ext4_restore_control_page(data_page);
267 +#endif
268                         end_page_writeback(page);
269 +               }
270         }
273 @@ -376,6 +394,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
275  static int io_submit_add_bh(struct ext4_io_submit *io,
276                             struct inode *inode,
277 +                           struct page *page,
278                             struct buffer_head *bh)
280         int ret;
281 @@ -389,7 +408,7 @@ submit_and_retry:
282                 if (ret)
283                         return ret;
284         }
285 -       ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
286 +       ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
287         if (ret != bh->b_size)
288                 goto submit_and_retry;
289         io->io_next_block++;
290 @@ -402,6 +421,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
291                         struct writeback_control *wbc,
292                         bool keep_towrite)
294 +       struct page *data_page = NULL;
295         struct inode *inode = page->mapping->host;
296         unsigned block_start, blocksize;
297         struct buffer_head *bh, *head;
298 @@ -461,19 +481,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
299                 set_buffer_async_write(bh);
300         } while ((bh = bh->b_this_page) != head);
302 -       /* Now submit buffers to write */
303         bh = head = page_buffers(page);
305 +       if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
306 +               data_page = ext4_encrypt(inode, page);
307 +               if (IS_ERR(data_page)) {
308 +                       ret = PTR_ERR(data_page);
309 +                       data_page = NULL;
310 +                       goto out;
311 +               }
312 +       }
314 +       /* Now submit buffers to write */
315         do {
316                 if (!buffer_async_write(bh))
317                         continue;
318 -               ret = io_submit_add_bh(io, inode, bh);
319 +               ret = io_submit_add_bh(io, inode,
320 +                                      data_page ? data_page : page, bh);
321                 if (ret) {
322                         /*
323                          * We only get here on ENOMEM.  Not much else
324                          * we can do but mark the page as dirty, and
325                          * better luck next time.
326                          */
327 -                       redirty_page_for_writepage(wbc, page);
328                         break;
329                 }
330                 nr_submitted++;
331 @@ -482,6 +512,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
333         /* Error stopped previous loop? Clean up buffers... */
334         if (ret) {
335 +       out:
336 +               if (data_page)
337 +                       ext4_restore_control_page(data_page);
338 +               printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
339 +               redirty_page_for_writepage(wbc, page);
340                 do {
341                         clear_buffer_async_write(bh);
342                         bh = bh->b_this_page;