1 ext4: Use page_mkwrite vma_operations to get mmap write notification.
3 From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
5 We would like to get notified when we are doing a write on mmap section.
6 This is needed with respect to preallocated area. We split the preallocated
7 area into initialzed extent and uninitialzed extent in the call back. This
8 let us handle ENOSPC better. Otherwise we get ENOSPC in the writepage and
9 that would result in data loss. The changes are also needed to handle ENOSPC
10 when writing to an mmap section of files with holes.
12 Acked-by: Jan Kara <jack@suse.cz>
13 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
14 Signed-off-by: Mingming Cao <cmm@us.ibm.com>
15 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
19 fs/ext4/file.c | 19 +++++++++++++-
20 fs/ext4/inode.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
21 3 files changed, 95 insertions(+), 1 deletions(-)
24 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
25 index 6605076..77cbb28 100644
28 @@ -1053,6 +1053,7 @@ extern void ext4_set_aops(struct inode *inode);
29 extern int ext4_writepage_trans_blocks(struct inode *);
30 extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
31 struct address_space *mapping, loff_t from);
32 +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
35 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
36 diff --git a/fs/ext4/file.c b/fs/ext4/file.c
37 index 4159be6..b9510ba 100644
40 @@ -123,6 +123,23 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
44 +static struct vm_operations_struct ext4_file_vm_ops = {
45 + .fault = filemap_fault,
46 + .page_mkwrite = ext4_page_mkwrite,
49 +static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
51 + struct address_space *mapping = file->f_mapping;
53 + if (!mapping->a_ops->readpage)
55 + file_accessed(file);
56 + vma->vm_ops = &ext4_file_vm_ops;
57 + vma->vm_flags |= VM_CAN_NONLINEAR;
61 const struct file_operations ext4_file_operations = {
62 .llseek = generic_file_llseek,
64 @@ -133,7 +150,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
66 .compat_ioctl = ext4_compat_ioctl,
68 - .mmap = generic_file_mmap,
69 + .mmap = ext4_file_mmap,
70 .open = generic_file_open,
71 .release = ext4_release_file,
72 .fsync = ext4_sync_file,
73 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
74 index 4a7ed29..bc52ef5 100644
77 @@ -3555,3 +3555,79 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
82 +static int ext4_bh_prepare_fill(handle_t *handle, struct buffer_head *bh)
84 + if (!buffer_mapped(bh)) {
86 + * Mark buffer as dirty so that
87 + * block_write_full_page() writes it
89 + set_buffer_dirty(bh);
94 +static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
96 + return !buffer_mapped(bh);
99 +int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
104 + struct file *file = vma->vm_file;
105 + struct inode *inode = file->f_path.dentry->d_inode;
106 + struct address_space *mapping = inode->i_mapping;
107 + struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE,
108 + .nr_to_write = 1 };
111 + * Get i_alloc_sem to stop truncates messing with the inode. We cannot
112 + * get i_mutex because we are already holding mmap_sem.
114 + down_read(&inode->i_alloc_sem);
115 + size = i_size_read(inode);
116 + if (page->mapping != mapping || size <= page_offset(page)
117 + || !PageUptodate(page)) {
118 + /* page got truncated from under us? */
122 + if (PageMappedToDisk(page))
125 + if (page->index == size >> PAGE_CACHE_SHIFT)
126 + len = size & ~PAGE_CACHE_MASK;
128 + len = PAGE_CACHE_SIZE;
130 + if (page_has_buffers(page)) {
131 + /* return if we have all the buffers mapped */
132 + if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
136 + * Now mark all the buffer head dirty so
137 + * that writepage can write it
139 + walk_page_buffers(NULL, page_buffers(page), 0, len,
140 + NULL, ext4_bh_prepare_fill);
143 + * OK, we need to fill the hole... Lock the page and do writepage.
144 + * We can't do write_begin and write_end here because we don't
145 + * have inode_mutex and that allow parallel write_begin, write_end call.
146 + * (lock_page prevent this from happening on the same page though)
149 + wbc.range_start = page_offset(page);
150 + wbc.range_end = page_offset(page) + len;
151 + ret = mapping->a_ops->writepage(page, &wbc);
152 + /* writepage unlocks the page */
154 + up_read(&inode->i_alloc_sem);