1 ext4: defer clearing of PageWriteback after extent conversion
3 From: Jan Kara <jack@suse.cz>
5 Currently PageWriteback bit gets cleared from put_io_page() called
6 from ext4_end_bio(). This is somewhat inconvenient as extent tree is
7 not fully updated at that time (unwritten extents are not marked as
8 written) so we cannot read the data back yet. This design was
9 dictated by lock ordering as we cannot start a transaction while
10 PageWriteback bit is set (we could easily deadlock with
11 ext4_da_writepages()). But now that we use transaction reservation
12 for extent conversion, locking issues are solved and we can move
13 PageWriteback bit clearing after extent conversion is done. As a
14 result we can remove wait for unwritt en extent conversion from
15 ext4_sync_file() because it already implicitely happe ns through
16 wait_on_page_writeback().
18 We implement deferring of PageWriteback clearing by queueing completed
19 bios to appropriate io_end and processing all the pages when io_end is
20 going to be freed instead of at the moment ext4_io_end() is called.
22 Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
23 Signed-off-by: Jan Kara <jack@suse.cz>
24 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
27 fs/ext4/fsync.c | 4 --
28 fs/ext4/page-io.c | 138 +++++++++++++++++++++++++++++++-----------------------
29 3 files changed, 82 insertions(+), 65 deletions(-)
31 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
32 index 55200f6..1352901 100644
35 @@ -180,8 +180,7 @@ struct ext4_map_blocks {
36 * Flags for ext4_io_end->flags
38 #define EXT4_IO_END_UNWRITTEN 0x0001
39 -#define EXT4_IO_END_ERROR 0x0002
40 -#define EXT4_IO_END_DIRECT 0x0004
41 +#define EXT4_IO_END_DIRECT 0x0002
44 * For converting uninitialized extents on a work queue. 'handle' is used for
45 @@ -192,6 +191,8 @@ typedef struct ext4_io_end {
46 handle_t *handle; /* handle reserved for extent
48 struct inode *inode; /* file being written to */
49 + struct bio *bio; /* Linked list of completed
50 + * bios covering the extent */
51 unsigned int flag; /* unwritten or not */
52 loff_t offset; /* offset in the file */
53 ssize_t size; /* size of the extent */
54 diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
55 index e0ba8a4..dcc881b 100644
58 @@ -132,10 +132,6 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
59 if (inode->i_sb->s_flags & MS_RDONLY)
62 - ret = ext4_flush_unwritten_io(inode);
67 ret = __sync_inode(inode, datasync);
68 if (!ret && !hlist_empty(&inode->i_dentry))
69 diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
70 index bcdfd6b..755741c 100644
71 --- a/fs/ext4/page-io.c
72 +++ b/fs/ext4/page-io.c
73 @@ -64,14 +64,83 @@ void ext4_ioend_shutdown(struct inode *inode)
74 cancel_work_sync(&EXT4_I(inode)->i_unrsv_conversion_work);
78 + * Print an buffer I/O error compatible with the fs/buffer.c. This
79 + * provides compatibility with dmesg scrapers that look for a specific
80 + * buffer I/O error message. We really need a unified error reporting
81 + * structure to userspace ala Digital Unix's uerf system, but it's
82 + * probably not going to happen in my lifetime, due to LKML politics...
84 +static void buffer_io_error(struct buffer_head *bh)
86 + char b[BDEVNAME_SIZE];
87 + printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
88 + bdevname(bh->b_bdev, b),
89 + (unsigned long long)bh->b_blocknr);
92 +static void ext4_finish_bio(struct bio *bio)
95 + int error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
97 + for (i = 0; i < bio->bi_vcnt; i++) {
98 + struct bio_vec *bvec = &bio->bi_io_vec[i];
99 + struct page *page = bvec->bv_page;
100 + struct buffer_head *bh, *head;
101 + unsigned bio_start = bvec->bv_offset;
102 + unsigned bio_end = bio_start + bvec->bv_len;
103 + unsigned under_io = 0;
104 + unsigned long flags;
110 + SetPageError(page);
111 + set_bit(AS_EIO, &page->mapping->flags);
113 + bh = head = page_buffers(page);
115 + * We check all buffers in the page under BH_Uptodate_Lock
116 + * to avoid races with other end io clearing async_write flags
118 + local_irq_save(flags);
119 + bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
121 + if (bh_offset(bh) < bio_start ||
122 + bh_offset(bh) + bh->b_size > bio_end) {
123 + if (buffer_async_write(bh))
127 + clear_buffer_async_write(bh);
129 + buffer_io_error(bh);
130 + } while ((bh = bh->b_this_page) != head);
131 + bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
132 + local_irq_restore(flags);
134 + end_page_writeback(page);
138 static void ext4_release_io_end(ext4_io_end_t *io_end)
140 + struct bio *bio, *next_bio;
142 BUG_ON(!list_empty(&io_end->list));
143 BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
144 WARN_ON(io_end->handle);
146 if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
147 wake_up_all(ext4_ioend_wq(io_end->inode));
149 + for (bio = io_end->bio; bio; bio = next_bio) {
150 + next_bio = bio->bi_private;
151 + ext4_finish_bio(bio);
154 if (io_end->flag & EXT4_IO_END_DIRECT)
155 inode_dio_done(io_end->inode);
157 @@ -267,79 +336,31 @@ ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
162 - * Print an buffer I/O error compatible with the fs/buffer.c. This
163 - * provides compatibility with dmesg scrapers that look for a specific
164 - * buffer I/O error message. We really need a unified error reporting
165 - * structure to userspace ala Digital Unix's uerf system, but it's
166 - * probably not going to happen in my lifetime, due to LKML politics...
168 -static void buffer_io_error(struct buffer_head *bh)
170 - char b[BDEVNAME_SIZE];
171 - printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
172 - bdevname(bh->b_bdev, b),
173 - (unsigned long long)bh->b_blocknr);
176 static void ext4_end_bio(struct bio *bio, int error)
178 ext4_io_end_t *io_end = bio->bi_private;
179 - struct inode *inode;
182 sector_t bi_sector = bio->bi_sector;
185 - inode = io_end->inode;
186 - blocksize = 1 << inode->i_blkbits;
187 - bio->bi_private = NULL;
188 bio->bi_end_io = NULL;
189 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
191 - for (i = 0; i < bio->bi_vcnt; i++) {
192 - struct bio_vec *bvec = &bio->bi_io_vec[i];
193 - struct page *page = bvec->bv_page;
194 - struct buffer_head *bh, *head;
195 - unsigned bio_start = bvec->bv_offset;
196 - unsigned bio_end = bio_start + bvec->bv_len;
197 - unsigned under_io = 0;
198 - unsigned long flags;
204 - SetPageError(page);
205 - set_bit(AS_EIO, &page->mapping->flags);
207 - bh = head = page_buffers(page);
208 + if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
210 - * We check all buffers in the page under BH_Uptodate_Lock
211 - * to avoid races with other end io clearing async_write flags
212 + * Link bio into list hanging from io_end. We have to do it
213 + * atomically as bio completions can be racing against each
216 - local_irq_save(flags);
217 - bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
219 - if (bh_offset(bh) < bio_start ||
220 - bh_offset(bh) + blocksize > bio_end) {
221 - if (buffer_async_write(bh))
225 - clear_buffer_async_write(bh);
227 - buffer_io_error(bh);
228 - } while ((bh = bh->b_this_page) != head);
229 - bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
230 - local_irq_restore(flags);
232 - end_page_writeback(page);
233 + bio->bi_private = xchg(&io_end->bio, bio);
235 + ext4_finish_bio(bio);
241 - io_end->flag |= EXT4_IO_END_ERROR;
242 + struct inode *inode = io_end->inode;
244 ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
245 "(offset %llu size %ld starting block %llu)",
247 @@ -348,7 +369,6 @@ static void ext4_end_bio(struct bio *bio, int error)
249 bi_sector >> (inode->i_blkbits - 9));
252 ext4_put_io_end_defer(io_end);