1 ext4: avoid unnecessary transaction stalls during writeback
3 From: Jan Kara <jack@suse.cz>
5 Currently ext4_writepages() submits all pages with transaction started.
6 When no page needs block allocation or extent conversion we can submit
7 all dirty pages in the inode while holding a single transaction handle
8 and when device is congested this can take significant amount of time.
9 Thus ext4_writepages() can block transaction commits for extended
12 Take for example a simple benchmark simulating PostgreSQL database
13 (pgioperf in mmtest). The benchmark runs 16 processes doing random reads
14 from a huge file, one process doing random writes to the huge file, and
15 one process doing sequential writes to a small files and frequently
16 running fsync. With unpatched kernel transaction commits take on average
17 ~18s with standard deviation of ~41s, top 5 commit times are:
19 274.466639s, 126.467347s, 86.992429s, 34.351563s, 31.517653s.
21 After this patch transaction commits take on average 0.1s with standard
22 deviation of 0.15s, top 5 commit times are:
24 0.563792s, 0.519980s, 0.509841s, 0.471700s, 0.469899s
26 Signed-off-by: Jan Kara <jack@suse.cz>
27 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
29 fs/ext4/inode.c | 24 ++++++++++++++++++++++++
30 1 file changed, 24 insertions(+)
32 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
33 index baa87e7d1426..ff55d430938b 100644
36 @@ -2171,6 +2171,9 @@ static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
38 /* First block in the extent? */
39 if (map->m_len == 0) {
40 + /* We cannot map unless handle is started... */
41 + if (!mpd->io_submit.io_end)
45 map->m_flags = bh->b_state & BH_FLAGS;
46 @@ -2223,6 +2226,9 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
47 /* Found extent to map? */
50 + /* Buffer needs mapping and handle is not started? */
51 + if (!mpd->io_submit.io_end)
53 /* Everything mapped so far and we hit EOF */
56 @@ -2739,6 +2745,21 @@ static int ext4_writepages(struct address_space *mapping,
57 tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page);
59 blk_start_plug(&plug);
62 + * First writeback pages that don't need mapping - we can avoid
63 + * starting a transaction unnecessarily and also avoid being blocked
64 + * in the block layer on device congestion while having transaction
67 + ret = mpage_prepare_extent_to_map(&mpd);
68 + /* Submit prepared bio */
69 + ext4_io_submit(&mpd.io_submit);
70 + /* Unlock pages we didn't use */
71 + mpage_release_unused_pages(&mpd, false);
75 while (!done && mpd.first_page <= mpd.last_page) {
76 /* For each extent of pages we use new io_end */
77 mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
78 @@ -2767,6 +2788,7 @@ static int ext4_writepages(struct address_space *mapping,
79 wbc->nr_to_write, inode->i_ino, ret);
80 /* Release allocated io_end */
81 ext4_put_io_end(mpd.io_submit.io_end);
82 + mpd.io_submit.io_end = NULL;
86 @@ -2816,6 +2838,7 @@ static int ext4_writepages(struct address_space *mapping,
87 ext4_journal_stop(handle);
89 ext4_put_io_end(mpd.io_submit.io_end);
90 + mpd.io_submit.io_end = NULL;
92 if (ret == -ENOSPC && sbi->s_journal) {
94 @@ -2831,6 +2854,7 @@ static int ext4_writepages(struct address_space *mapping,
99 blk_finish_plug(&plug);
100 if (!ret && !cycled && wbc->nr_to_write > 0) {