remove unnecessary iput checks
[ext4-patch-queue.git] / implement-the-ext4-encryption-write-path
blob502614b3741323befe64dadb436779039cc83920
1 From: Michael Halcrow <mhalcrow@google.com>
3 ext4: implement the ext4 encryption write path
5 With encryption enabled, we send one encrypted bounce page at a time
6 out to the block layer. This isn't the most efficient approach, but
7 it's relatively low-impact and can be improved later if need be.
8 There's talk amongst the ext4 maintainers around some day getting rid
9 of buffer heads, and the encryption path can fold into that work at
10 the time it's done.
12 Pulls block_write_begin() into fs/ext4/inode.c because it might need
13 to do a low-level read of the existing data, in which case we need to
14 decrypt it.
16 Signed-off-by: Michael Halcrow <mhalcrow@google.com>
17 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
18 Signed-off-by: Ildar Muslukhov <ildarm@google.com>
19 ---
20  fs/ext4/inode.c   | 114 +++++++++++++++++++++++++++++++++++++++--
21  fs/ext4/namei.c   |  11 +++-
22  fs/ext4/page-io.c | 148 +++++++++++++++++++++++++++++++++++++++++-------------
23  3 files changed, 230 insertions(+), 43 deletions(-)
25 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
26 index e9777f9..117b691 100644
27 --- a/fs/ext4/inode.c
28 +++ b/fs/ext4/inode.c
29 @@ -877,6 +877,101 @@ int do_journal_get_write_access(handle_t *handle,
31  static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
32                    struct buffer_head *bh_result, int create);
34 +static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
35 +                                 get_block_t *get_block)
37 +       unsigned from = pos & (PAGE_CACHE_SIZE - 1);
38 +       unsigned to = from + len;
39 +       struct inode *inode = page->mapping->host;
40 +       struct ext4_inode_info *ei = EXT4_I(inode);
41 +       unsigned block_start, block_end;
42 +       sector_t block;
43 +       int err = 0;
44 +       unsigned blocksize, bbits;
45 +       struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
46 +       bool decrypt = false;
47 +       struct ext4_crypto_ctx *ctx;
49 +       BUG_ON(!PageLocked(page));
50 +       BUG_ON(from > PAGE_CACHE_SIZE);
51 +       BUG_ON(to > PAGE_CACHE_SIZE);
52 +       BUG_ON(from > to);
54 +       head = create_page_buffers(page, inode, 0);
55 +       blocksize = head->b_size;
56 +       bbits = ilog2(blocksize);
57 +       block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
59 +       for(bh = head, block_start = 0; bh != head || !block_start;
60 +           block++, block_start=block_end, bh = bh->b_this_page) {
61 +               block_end = block_start + blocksize;
62 +               if (block_end <= from || block_start >= to) {
63 +                       if (PageUptodate(page)) {
64 +                               if (!buffer_uptodate(bh))
65 +                                       set_buffer_uptodate(bh);
66 +                       }
67 +                       continue;
68 +               }
69 +               if (buffer_new(bh))
70 +                       clear_buffer_new(bh);
71 +               if (!buffer_mapped(bh)) {
72 +                       WARN_ON(bh->b_size != blocksize);
73 +                       err = get_block(inode, block, bh, 1);
74 +                       if (err)
75 +                               break;
76 +                       if (buffer_new(bh)) {
77 +                               unmap_underlying_metadata(bh->b_bdev,
78 +                                                         bh->b_blocknr);
79 +                               if (PageUptodate(page)) {
80 +                                       clear_buffer_new(bh);
81 +                                       set_buffer_uptodate(bh);
82 +                                       mark_buffer_dirty(bh);
83 +                                       continue;
84 +                               }
85 +                               if (block_end > to || block_start < from)
86 +                                       zero_user_segments(page, to, block_end,
87 +                                                          block_start, from);
88 +                               continue;
89 +                       }
90 +               }
91 +               if (PageUptodate(page)) {
92 +                       if (!buffer_uptodate(bh))
93 +                               set_buffer_uptodate(bh);
94 +                       continue;
95 +               }
96 +               if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
97 +                   !buffer_unwritten(bh) &&
98 +                   (block_start < from || block_end > to)) {
99 +                       ll_rw_block(READ, 1, &bh);
100 +                       *wait_bh++=bh;
101 +                       decrypt = ext4_is_encryption_enabled(ei);
102 +               }
103 +       }
104 +       /*
105 +        * If we issued read requests, let them complete.
106 +        */
107 +       while (wait_bh > wait) {
108 +               wait_on_buffer(*--wait_bh);
109 +               if (!buffer_uptodate(*wait_bh))
110 +                       err = -EIO;
111 +       }
112 +       if (unlikely(err)) {
113 +               page_zero_new_buffers(page, from, to);
114 +       } else if (decrypt) {
115 +               ctx = ext4_get_crypto_ctx(false, &ei->i_encryption_key);
116 +               if (!ctx) {
117 +                       err = -ENOMEM;
118 +                       goto out;
119 +               }
120 +               err = ext4_decrypt(ctx, page);
121 +               ext4_release_crypto_ctx(ctx);
122 +       }
123 +out:
124 +       return err;
128  static int ext4_write_begin(struct file *file, struct address_space *mapping,
129                             loff_t pos, unsigned len, unsigned flags,
130                             struct page **pagep, void **fsdata)
131 @@ -940,10 +1035,11 @@ retry_journal:
132         wait_for_stable_page(page);
134         if (ext4_should_dioread_nolock(inode))
135 -               ret = __block_write_begin(page, pos, len, ext4_get_block_write);
136 +               ret = ext4_block_write_begin(page, pos, len,
137 +                                            ext4_get_block_write);
138         else
139 -               ret = __block_write_begin(page, pos, len, ext4_get_block);
141 +               ret = ext4_block_write_begin(page, pos, len,
142 +                                            ext4_get_block);
143         if (!ret && ext4_should_journal_data(inode)) {
144                 ret = ext4_walk_page_buffers(handle, page_buffers(page),
145                                              from, to, NULL,
146 @@ -2280,6 +2376,7 @@ static int ext4_writepages(struct address_space *mapping,
147         handle_t *handle = NULL;
148         struct mpage_da_data mpd;
149         struct inode *inode = mapping->host;
150 +       struct ext4_inode_info *ei = EXT4_I(inode);
151         int needed_blocks, rsv_blocks = 0, ret = 0;
152         struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
153         bool done;
154 @@ -2296,7 +2393,7 @@ static int ext4_writepages(struct address_space *mapping,
155         if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
156                 goto out_writepages;
158 -       if (ext4_should_journal_data(inode)) {
159 +       if (ext4_should_journal_data(inode) || ext4_is_encryption_enabled(ei)) {
160                 struct blk_plug plug;
162                 blk_start_plug(&plug);
163 @@ -2575,7 +2672,8 @@ retry_journal:
164         /* In case writeback began while the page was unlocked */
165         wait_for_stable_page(page);
167 -       ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
168 +       ret = ext4_block_write_begin(page, pos, len,
169 +                                    ext4_da_get_block_prep);
170         if (ret < 0) {
171                 unlock_page(page);
172                 ext4_journal_stop(handle);
173 @@ -2957,6 +3055,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
175         struct file *file = iocb->ki_filp;
176         struct inode *inode = file->f_mapping->host;
177 +       struct ext4_inode_info *ei = EXT4_I(inode);
178         ssize_t ret;
179         size_t count = iov_iter_count(iter);
180         int overwrite = 0;
181 @@ -3033,6 +3132,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
182                 get_block_func = ext4_get_block_write;
183                 dio_flags = DIO_LOCKING;
184         }
185 +       BUG_ON(ext4_is_encryption_enabled(ei));
186         ret = __blockdev_direct_IO(rw, iocb, inode,
187                                    inode->i_sb->s_bdev, iter,
188                                    offset,
189 @@ -3093,9 +3193,13 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
191         struct file *file = iocb->ki_filp;
192         struct inode *inode = file->f_mapping->host;
193 +       struct ext4_inode_info *ei = EXT4_I(inode);
194         size_t count = iov_iter_count(iter);
195         ssize_t ret;
197 +       if (ext4_is_encryption_enabled(ei))
198 +               return 0;
200         /*
201          * If we are doing data journalling we don't support O_DIRECT
202          */
203 diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
204 index 123798c..ba288db 100644
205 --- a/fs/ext4/namei.c
206 +++ b/fs/ext4/namei.c
207 @@ -2214,6 +2214,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
209         handle_t *handle;
210         struct inode *inode;
211 +       struct ext4_sb_info *sbi = EXT4_SB(dir->i_sb);
212         int err, credits, retries = 0;
214         dquot_initialize(dir);
215 @@ -2230,8 +2231,14 @@ retry:
216                 inode->i_fop = &ext4_file_operations;
217                 ext4_set_aops(inode);
218                 err = ext4_add_nondir(handle, dentry, inode);
219 -               if (!err && IS_DIRSYNC(dir))
220 -                       ext4_handle_sync(handle);
221 +               if (!err) {
222 +                       if (sbi->s_default_encryption_mode !=
223 +                           EXT4_ENCRYPTION_MODE_INVALID) {
224 +                               ext4_set_crypto_key(dentry);
225 +                       }
226 +                       if (IS_DIRSYNC(dir))
227 +                               ext4_handle_sync(handle);
228 +               }
229         }
230         if (handle)
231                 ext4_journal_stop(handle);
232 diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
233 index b24a254..b68d178 100644
234 --- a/fs/ext4/page-io.c
235 +++ b/fs/ext4/page-io.c
236 @@ -61,6 +61,18 @@ static void buffer_io_error(struct buffer_head *bh)
237                         (unsigned long long)bh->b_blocknr);
240 +static void ext4_restore_control_page(struct page *data_page)
242 +       struct ext4_crypto_ctx *ctx =
243 +               (struct ext4_crypto_ctx *)page_private(data_page);
245 +       set_bh_to_page(page_buffers(ctx->control_page), ctx->control_page);
246 +       set_page_private(data_page, (unsigned long)NULL);
247 +       ClearPagePrivate(data_page);
248 +       unlock_page(data_page);
249 +       ext4_release_crypto_ctx(ctx);
252  static void ext4_finish_bio(struct bio *bio)
254         int i;
255 @@ -69,6 +81,8 @@ static void ext4_finish_bio(struct bio *bio)
257         bio_for_each_segment_all(bvec, bio, i) {
258                 struct page *page = bvec->bv_page;
259 +               struct page *data_page = NULL;
260 +               struct ext4_crypto_ctx *ctx = NULL;
261                 struct buffer_head *bh, *head;
262                 unsigned bio_start = bvec->bv_offset;
263                 unsigned bio_end = bio_start + bvec->bv_len;
264 @@ -78,6 +92,13 @@ static void ext4_finish_bio(struct bio *bio)
265                 if (!page)
266                         continue;
268 +               if (!page->mapping) {
269 +                       /* The bounce data pages are unmapped. */
270 +                       data_page = page;
271 +                       ctx = (struct ext4_crypto_ctx *)page_private(data_page);
272 +                       page = ctx->control_page;
273 +               }
275                 if (error) {
276                         SetPageError(page);
277                         set_bit(AS_EIO, &page->mapping->flags);
278 @@ -102,8 +123,11 @@ static void ext4_finish_bio(struct bio *bio)
279                 } while ((bh = bh->b_this_page) != head);
280                 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
281                 local_irq_restore(flags);
282 -               if (!under_io)
283 +               if (!under_io) {
284 +                       if (ctx)
285 +                               ext4_restore_control_page(data_page);
286                         end_page_writeback(page);
287 +               }
288         }
291 @@ -398,40 +422,60 @@ submit_and_retry:
292         return 0;
295 -int ext4_bio_write_page(struct ext4_io_submit *io,
296 -                       struct page *page,
297 -                       int len,
298 -                       struct writeback_control *wbc,
299 -                       bool keep_towrite)
300 +static void ext4_abort_bio_write(struct page *page,
301 +                                struct writeback_control *wbc) {
302 +       struct buffer_head *bh, *head;
304 +       redirty_page_for_writepage(wbc, page);
305 +       bh = head = page_buffers(page);
306 +       do {
307 +               clear_buffer_async_write(bh);
308 +               bh = bh->b_this_page;
309 +       } while (bh != head);
312 +static int io_encrypt_submit_page(struct ext4_io_submit *io, struct page *page)
314 +       struct page *data_page = NULL;
315 +       struct ext4_crypto_ctx *ctx = NULL;
316         struct inode *inode = page->mapping->host;
317 -       unsigned block_start, blocksize;
318 +       struct ext4_inode_info *ei = EXT4_I(inode);
319 +       struct buffer_head *bh;
320 +       int res = 0;
322 +       ctx = ext4_get_crypto_ctx(true, &ei->i_encryption_key);
323 +       if (IS_ERR(ctx))
324 +               return PTR_ERR(ctx);
326 +       bh = page_buffers(page);
327 +       data_page = ext4_encrypt(ctx, page);
328 +       if (IS_ERR(data_page)) {
329 +               ext4_release_crypto_ctx(ctx);
330 +               res = PTR_ERR(data_page);
331 +               printk_ratelimited(KERN_ERR "%s: ext4_encrypt() returned %d\n",
332 +                                  __func__, res);
333 +               goto out;
334 +       }
335 +       lock_page(data_page);
336 +       res = io_submit_add_bh(io, inode, bh);
337 +       if (res)
338 +               ext4_restore_control_page(data_page);
339 +out:
340 +       return res;
343 +static int ext4_bio_write_buffers(struct ext4_io_submit *io,
344 +                                 struct page *page,
345 +                                 int len,
346 +                                 struct writeback_control *wbc)
348 +       struct inode *inode = page->mapping->host;
349 +       struct ext4_inode_info *ei = EXT4_I(inode);
350 +       unsigned block_start;
351         struct buffer_head *bh, *head;
352         int ret = 0;
353         int nr_submitted = 0;
355 -       blocksize = 1 << inode->i_blkbits;
357 -       BUG_ON(!PageLocked(page));
358 -       BUG_ON(PageWriteback(page));
360 -       if (keep_towrite)
361 -               set_page_writeback_keepwrite(page);
362 -       else
363 -               set_page_writeback(page);
364 -       ClearPageError(page);
366 -       /*
367 -        * Comments copied from block_write_full_page:
368 -        *
369 -        * The page straddles i_size.  It must be zeroed out on each and every
370 -        * writepage invocation because it may be mmapped.  "A file is mapped
371 -        * in multiples of the page size.  For a file that is not a multiple of
372 -        * the page size, the remaining memory is zeroed when mapped, and
373 -        * writes to that region are not written out to the file."
374 -        */
375 -       if (len < PAGE_CACHE_SIZE)
376 -               zero_user_segment(page, len, PAGE_CACHE_SIZE);
377         /*
378          * In the first loop we prepare and mark buffers to submit. We have to
379          * mark all buffers in the page before submitting so that
380 @@ -449,7 +493,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
381                 }
382                 if (!buffer_dirty(bh) || buffer_delay(bh) ||
383                     !buffer_mapped(bh) || buffer_unwritten(bh)) {
384 -                       /* A hole? We can safely clear the dirty bit */
385                         if (!buffer_mapped(bh))
386                                 clear_buffer_dirty(bh);
387                         if (io->io_bio)
388 @@ -468,14 +511,17 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
389         do {
390                 if (!buffer_async_write(bh))
391                         continue;
392 -               ret = io_submit_add_bh(io, inode, bh);
393 +               if (ext4_is_encryption_enabled(ei)) {
394 +                       ret = io_encrypt_submit_page(io, page);
395 +               } else {
396 +                       ret = io_submit_add_bh(io, inode, bh);
397 +               }
398                 if (ret) {
399                         /*
400                          * We only get here on ENOMEM.  Not much else
401                          * we can do but mark the page as dirty, and
402                          * better luck next time.
403                          */
404 -                       redirty_page_for_writepage(wbc, page);
405                         break;
406                 }
407                 nr_submitted++;
408 @@ -484,10 +530,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
410         /* Error stopped previous loop? Clean up buffers... */
411         if (ret) {
412 -               do {
413 -                       clear_buffer_async_write(bh);
414 -                       bh = bh->b_this_page;
415 -               } while (bh != head);
416 +               printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
417 +               ext4_abort_bio_write(page, wbc);
418         }
419         unlock_page(page);
420         /* Nothing submitted - we have to end page writeback */
421 @@ -495,3 +539,35 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
422                 end_page_writeback(page);
423         return ret;
426 +int ext4_bio_write_page(struct ext4_io_submit *io,
427 +                       struct page *page,
428 +                       int len,
429 +                       struct writeback_control *wbc,
430 +                       bool keep_towrite)
432 +       int ret = 0;
434 +       BUG_ON(!PageLocked(page));
435 +       BUG_ON(PageWriteback(page));
436 +       if (keep_towrite)
437 +               set_page_writeback_keepwrite(page);
438 +       else
439 +               set_page_writeback(page);
440 +       ClearPageError(page);
442 +       /*
443 +        * Comments copied from block_write_full_page_endio:
444 +        *
445 +        * The page straddles i_size.  It must be zeroed out on each and every
446 +        * writepage invocation because it may be mmapped.  "A file is mapped
447 +        * in multiples of the page size.  For a file that is not a multiple of
448 +        * the page size, the remaining memory is zeroed when mapped, and
449 +        * writes to that region are not written out to the file."
450 +        */
451 +       if (len < PAGE_CACHE_SIZE)
452 +               zero_user_segment(page, len, PAGE_CACHE_SIZE);
454 +       ret = ext4_bio_write_buffers(io, page, len, wbc);
455 +       return ret;
457 -- 
458 2.1.0.rc2.206.gedb03e5