1 From: Michael Halcrow <mhalcrow@google.com>
3 ext4: implement the ext4 encryption write path
5 With encryption enabled, we send one encrypted bounce page at a time
6 out to the block layer. This isn't the most efficient approach, but
7 it's relatively low-impact and can be improved later if need be.
8 There's talk amongst the ext4 maintainers around some day getting rid
9 of buffer heads, and the encryption path can fold into that work at
12 Pulls block_write_begin() into fs/ext4/inode.c because it might need
13 to do a low-level read of the existing data, in which case we need to
16 Signed-off-by: Michael Halcrow <mhalcrow@google.com>
17 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
18 Signed-off-by: Ildar Muslukhov <ildarm@google.com>
20 fs/ext4/inode.c | 114 +++++++++++++++++++++++++++++++++++++++--
21 fs/ext4/namei.c | 11 +++-
22 fs/ext4/page-io.c | 148 +++++++++++++++++++++++++++++++++++++++++-------------
23 3 files changed, 230 insertions(+), 43 deletions(-)
25 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
26 index e9777f9..117b691 100644
29 @@ -877,6 +877,101 @@ int do_journal_get_write_access(handle_t *handle,
31 static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
32 struct buffer_head *bh_result, int create);
34 +static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
35 + get_block_t *get_block)
37 + unsigned from = pos & (PAGE_CACHE_SIZE - 1);
38 + unsigned to = from + len;
39 + struct inode *inode = page->mapping->host;
40 + struct ext4_inode_info *ei = EXT4_I(inode);
41 + unsigned block_start, block_end;
44 + unsigned blocksize, bbits;
45 + struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
46 + bool decrypt = false;
47 + struct ext4_crypto_ctx *ctx;
49 + BUG_ON(!PageLocked(page));
50 + BUG_ON(from > PAGE_CACHE_SIZE);
51 + BUG_ON(to > PAGE_CACHE_SIZE);
54 + head = create_page_buffers(page, inode, 0);
55 + blocksize = head->b_size;
56 + bbits = ilog2(blocksize);
57 + block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
59 + for(bh = head, block_start = 0; bh != head || !block_start;
60 + block++, block_start=block_end, bh = bh->b_this_page) {
61 + block_end = block_start + blocksize;
62 + if (block_end <= from || block_start >= to) {
63 + if (PageUptodate(page)) {
64 + if (!buffer_uptodate(bh))
65 + set_buffer_uptodate(bh);
70 + clear_buffer_new(bh);
71 + if (!buffer_mapped(bh)) {
72 + WARN_ON(bh->b_size != blocksize);
73 + err = get_block(inode, block, bh, 1);
76 + if (buffer_new(bh)) {
77 + unmap_underlying_metadata(bh->b_bdev,
79 + if (PageUptodate(page)) {
80 + clear_buffer_new(bh);
81 + set_buffer_uptodate(bh);
82 + mark_buffer_dirty(bh);
85 + if (block_end > to || block_start < from)
86 + zero_user_segments(page, to, block_end,
91 + if (PageUptodate(page)) {
92 + if (!buffer_uptodate(bh))
93 + set_buffer_uptodate(bh);
96 + if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
97 + !buffer_unwritten(bh) &&
98 + (block_start < from || block_end > to)) {
99 + ll_rw_block(READ, 1, &bh);
101 + decrypt = ext4_is_encryption_enabled(ei);
105 + * If we issued read requests, let them complete.
107 + while (wait_bh > wait) {
108 + wait_on_buffer(*--wait_bh);
109 + if (!buffer_uptodate(*wait_bh))
112 + if (unlikely(err)) {
113 + page_zero_new_buffers(page, from, to);
114 + } else if (decrypt) {
115 + ctx = ext4_get_crypto_ctx(false, &ei->i_encryption_key);
120 + err = ext4_decrypt(ctx, page);
121 + ext4_release_crypto_ctx(ctx);
128 static int ext4_write_begin(struct file *file, struct address_space *mapping,
129 loff_t pos, unsigned len, unsigned flags,
130 struct page **pagep, void **fsdata)
131 @@ -940,10 +1035,11 @@ retry_journal:
132 wait_for_stable_page(page);
134 if (ext4_should_dioread_nolock(inode))
135 - ret = __block_write_begin(page, pos, len, ext4_get_block_write);
136 + ret = ext4_block_write_begin(page, pos, len,
137 + ext4_get_block_write);
139 - ret = __block_write_begin(page, pos, len, ext4_get_block);
141 + ret = ext4_block_write_begin(page, pos, len,
143 if (!ret && ext4_should_journal_data(inode)) {
144 ret = ext4_walk_page_buffers(handle, page_buffers(page),
146 @@ -2280,6 +2376,7 @@ static int ext4_writepages(struct address_space *mapping,
147 handle_t *handle = NULL;
148 struct mpage_da_data mpd;
149 struct inode *inode = mapping->host;
150 + struct ext4_inode_info *ei = EXT4_I(inode);
151 int needed_blocks, rsv_blocks = 0, ret = 0;
152 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
154 @@ -2296,7 +2393,7 @@ static int ext4_writepages(struct address_space *mapping,
155 if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
158 - if (ext4_should_journal_data(inode)) {
159 + if (ext4_should_journal_data(inode) || ext4_is_encryption_enabled(ei)) {
160 struct blk_plug plug;
162 blk_start_plug(&plug);
163 @@ -2575,7 +2672,8 @@ retry_journal:
164 /* In case writeback began while the page was unlocked */
165 wait_for_stable_page(page);
167 - ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
168 + ret = ext4_block_write_begin(page, pos, len,
169 + ext4_da_get_block_prep);
172 ext4_journal_stop(handle);
173 @@ -2957,6 +3055,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
175 struct file *file = iocb->ki_filp;
176 struct inode *inode = file->f_mapping->host;
177 + struct ext4_inode_info *ei = EXT4_I(inode);
179 size_t count = iov_iter_count(iter);
181 @@ -3033,6 +3132,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
182 get_block_func = ext4_get_block_write;
183 dio_flags = DIO_LOCKING;
185 + BUG_ON(ext4_is_encryption_enabled(ei));
186 ret = __blockdev_direct_IO(rw, iocb, inode,
187 inode->i_sb->s_bdev, iter,
189 @@ -3093,9 +3193,13 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
191 struct file *file = iocb->ki_filp;
192 struct inode *inode = file->f_mapping->host;
193 + struct ext4_inode_info *ei = EXT4_I(inode);
194 size_t count = iov_iter_count(iter);
197 + if (ext4_is_encryption_enabled(ei))
201 * If we are doing data journalling we don't support O_DIRECT
203 diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
204 index 123798c..ba288db 100644
205 --- a/fs/ext4/namei.c
206 +++ b/fs/ext4/namei.c
207 @@ -2214,6 +2214,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
211 + struct ext4_sb_info *sbi = EXT4_SB(dir->i_sb);
212 int err, credits, retries = 0;
214 dquot_initialize(dir);
215 @@ -2230,8 +2231,14 @@ retry:
216 inode->i_fop = &ext4_file_operations;
217 ext4_set_aops(inode);
218 err = ext4_add_nondir(handle, dentry, inode);
219 - if (!err && IS_DIRSYNC(dir))
220 - ext4_handle_sync(handle);
222 + if (sbi->s_default_encryption_mode !=
223 + EXT4_ENCRYPTION_MODE_INVALID) {
224 + ext4_set_crypto_key(dentry);
226 + if (IS_DIRSYNC(dir))
227 + ext4_handle_sync(handle);
231 ext4_journal_stop(handle);
232 diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
233 index b24a254..b68d178 100644
234 --- a/fs/ext4/page-io.c
235 +++ b/fs/ext4/page-io.c
236 @@ -61,6 +61,18 @@ static void buffer_io_error(struct buffer_head *bh)
237 (unsigned long long)bh->b_blocknr);
240 +static void ext4_restore_control_page(struct page *data_page)
242 + struct ext4_crypto_ctx *ctx =
243 + (struct ext4_crypto_ctx *)page_private(data_page);
245 + set_bh_to_page(page_buffers(ctx->control_page), ctx->control_page);
246 + set_page_private(data_page, (unsigned long)NULL);
247 + ClearPagePrivate(data_page);
248 + unlock_page(data_page);
249 + ext4_release_crypto_ctx(ctx);
252 static void ext4_finish_bio(struct bio *bio)
255 @@ -69,6 +81,8 @@ static void ext4_finish_bio(struct bio *bio)
257 bio_for_each_segment_all(bvec, bio, i) {
258 struct page *page = bvec->bv_page;
259 + struct page *data_page = NULL;
260 + struct ext4_crypto_ctx *ctx = NULL;
261 struct buffer_head *bh, *head;
262 unsigned bio_start = bvec->bv_offset;
263 unsigned bio_end = bio_start + bvec->bv_len;
264 @@ -78,6 +92,13 @@ static void ext4_finish_bio(struct bio *bio)
268 + if (!page->mapping) {
269 + /* The bounce data pages are unmapped. */
271 + ctx = (struct ext4_crypto_ctx *)page_private(data_page);
272 + page = ctx->control_page;
277 set_bit(AS_EIO, &page->mapping->flags);
278 @@ -102,8 +123,11 @@ static void ext4_finish_bio(struct bio *bio)
279 } while ((bh = bh->b_this_page) != head);
280 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
281 local_irq_restore(flags);
285 + ext4_restore_control_page(data_page);
286 end_page_writeback(page);
291 @@ -398,40 +422,60 @@ submit_and_retry:
295 -int ext4_bio_write_page(struct ext4_io_submit *io,
298 - struct writeback_control *wbc,
300 +static void ext4_abort_bio_write(struct page *page,
301 + struct writeback_control *wbc) {
302 + struct buffer_head *bh, *head;
304 + redirty_page_for_writepage(wbc, page);
305 + bh = head = page_buffers(page);
307 + clear_buffer_async_write(bh);
308 + bh = bh->b_this_page;
309 + } while (bh != head);
312 +static int io_encrypt_submit_page(struct ext4_io_submit *io, struct page *page)
314 + struct page *data_page = NULL;
315 + struct ext4_crypto_ctx *ctx = NULL;
316 struct inode *inode = page->mapping->host;
317 - unsigned block_start, blocksize;
318 + struct ext4_inode_info *ei = EXT4_I(inode);
319 + struct buffer_head *bh;
322 + ctx = ext4_get_crypto_ctx(true, &ei->i_encryption_key);
324 + return PTR_ERR(ctx);
326 + bh = page_buffers(page);
327 + data_page = ext4_encrypt(ctx, page);
328 + if (IS_ERR(data_page)) {
329 + ext4_release_crypto_ctx(ctx);
330 + res = PTR_ERR(data_page);
331 + printk_ratelimited(KERN_ERR "%s: ext4_encrypt() returned %d\n",
335 + lock_page(data_page);
336 + res = io_submit_add_bh(io, inode, bh);
338 + ext4_restore_control_page(data_page);
343 +static int ext4_bio_write_buffers(struct ext4_io_submit *io,
346 + struct writeback_control *wbc)
348 + struct inode *inode = page->mapping->host;
349 + struct ext4_inode_info *ei = EXT4_I(inode);
350 + unsigned block_start;
351 struct buffer_head *bh, *head;
353 int nr_submitted = 0;
355 - blocksize = 1 << inode->i_blkbits;
357 - BUG_ON(!PageLocked(page));
358 - BUG_ON(PageWriteback(page));
361 - set_page_writeback_keepwrite(page);
363 - set_page_writeback(page);
364 - ClearPageError(page);
367 - * Comments copied from block_write_full_page:
369 - * The page straddles i_size. It must be zeroed out on each and every
370 - * writepage invocation because it may be mmapped. "A file is mapped
371 - * in multiples of the page size. For a file that is not a multiple of
372 - * the page size, the remaining memory is zeroed when mapped, and
373 - * writes to that region are not written out to the file."
375 - if (len < PAGE_CACHE_SIZE)
376 - zero_user_segment(page, len, PAGE_CACHE_SIZE);
378 * In the first loop we prepare and mark buffers to submit. We have to
379 * mark all buffers in the page before submitting so that
380 @@ -449,7 +493,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
382 if (!buffer_dirty(bh) || buffer_delay(bh) ||
383 !buffer_mapped(bh) || buffer_unwritten(bh)) {
384 - /* A hole? We can safely clear the dirty bit */
385 if (!buffer_mapped(bh))
386 clear_buffer_dirty(bh);
388 @@ -468,14 +511,17 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
390 if (!buffer_async_write(bh))
392 - ret = io_submit_add_bh(io, inode, bh);
393 + if (ext4_is_encryption_enabled(ei)) {
394 + ret = io_encrypt_submit_page(io, page);
396 + ret = io_submit_add_bh(io, inode, bh);
400 * We only get here on ENOMEM. Not much else
401 * we can do but mark the page as dirty, and
402 * better luck next time.
404 - redirty_page_for_writepage(wbc, page);
408 @@ -484,10 +530,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
410 /* Error stopped previous loop? Clean up buffers... */
413 - clear_buffer_async_write(bh);
414 - bh = bh->b_this_page;
415 - } while (bh != head);
416 + printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
417 + ext4_abort_bio_write(page, wbc);
420 /* Nothing submitted - we have to end page writeback */
421 @@ -495,3 +539,35 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
422 end_page_writeback(page);
426 +int ext4_bio_write_page(struct ext4_io_submit *io,
429 + struct writeback_control *wbc,
434 + BUG_ON(!PageLocked(page));
435 + BUG_ON(PageWriteback(page));
437 + set_page_writeback_keepwrite(page);
439 + set_page_writeback(page);
440 + ClearPageError(page);
443 + * Comments copied from block_write_full_page_endio:
445 + * The page straddles i_size. It must be zeroed out on each and every
446 + * writepage invocation because it may be mmapped. "A file is mapped
447 + * in multiples of the page size. For a file that is not a multiple of
448 + * the page size, the remaining memory is zeroed when mapped, and
449 + * writes to that region are not written out to the file."
451 + if (len < PAGE_CACHE_SIZE)
452 + zero_user_segment(page, len, PAGE_CACHE_SIZE);
454 + ret = ext4_bio_write_buffers(io, page, len, wbc);
458 2.1.0.rc2.206.gedb03e5