1 ext4: add support for a lazytime mount option
3 Add an optimization for the MS_LAZYTIME mount option so that we will
4 opportunistically write out any inodes with the I_DIRTY_TIME flag set
5 in a particular inode table block when we need to update some inode in
6 that inode table block anyway.
8 Also add some temporary code so that we can set the lazytime mount
9 option without needing a modified /sbin/mount program which can set
10 MS_LAZYTIME. We can eventually make this go away once util-linux has
13 Google-Bug-Id: 18297052
15 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
17 fs/ext4/inode.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++---
18 fs/ext4/super.c | 9 +++++++++
19 fs/inode.c | 36 ++++++++++++++++++++++++++++++++++++
20 include/linux/fs.h | 2 ++
21 include/trace/events/ext4.h | 30 ++++++++++++++++++++++++++++++
22 5 files changed, 123 insertions(+), 3 deletions(-)
24 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
25 index 5653fa4..8308c82 100644
28 @@ -4140,6 +4140,51 @@ static int ext4_inode_blocks_set(handle_t *handle,
32 + * Opportunistically update the other time fields for other inodes in
33 + * the same inode table block.
35 +static void ext4_update_other_inodes_time(struct super_block *sb,
36 + unsigned long orig_ino, char *buf)
38 + struct ext4_inode_info *ei;
39 + struct ext4_inode *raw_inode;
41 + struct inode *inode;
42 + int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
43 + int inode_size = EXT4_INODE_SIZE(sb);
45 + ino = orig_ino & ~(inodes_per_block - 1);
46 + for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
47 + if (ino == orig_ino)
49 + inode = find_active_inode_nowait(sb, ino);
51 + (inode->i_state & I_DIRTY_TIME) == 0 ||
52 + !spin_trylock(&inode->i_lock)) {
56 + inode->i_state &= ~I_DIRTY_TIME;
57 + inode->i_ts_dirty_day = 0;
58 + spin_unlock(&inode->i_lock);
59 + inode_requeue_dirtytime(inode);
62 + raw_inode = (struct ext4_inode *) buf;
64 + spin_lock(&ei->i_raw_lock);
65 + EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
66 + EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
67 + EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
68 + ext4_inode_csum_set(inode, raw_inode, ei);
69 + spin_unlock(&ei->i_raw_lock);
70 + trace_ext4_other_inode_update_time(inode, orig_ino);
77 * Post the struct inode info into an on-disk inode location in the
78 * buffer-cache. This gobbles the caller's reference to the
79 * buffer_head in the inode location struct.
80 @@ -4237,7 +4282,6 @@ static int ext4_do_update_inode(handle_t *handle,
81 for (block = 0; block < EXT4_N_BLOCKS; block++)
82 raw_inode->i_block[block] = ei->i_data[block];
85 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) {
86 raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
87 if (ei->i_extra_isize) {
88 @@ -4248,10 +4292,9 @@ static int ext4_do_update_inode(handle_t *handle,
89 cpu_to_le16(ei->i_extra_isize);
93 ext4_inode_csum_set(inode, raw_inode, ei);
95 spin_unlock(&ei->i_raw_lock);
96 + ext4_update_other_inodes_time(inode->i_sb, inode->i_ino, bh->b_data);
98 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
99 rc = ext4_handle_dirty_metadata(handle, NULL, bh);
100 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
101 index 58859bc..93a2b7a 100644
102 --- a/fs/ext4/super.c
103 +++ b/fs/ext4/super.c
104 @@ -1132,6 +1132,7 @@ enum {
105 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
106 Opt_usrquota, Opt_grpquota, Opt_i_version,
107 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
108 + Opt_lazytime, Opt_nolazytime,
109 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
110 Opt_inode_readahead_blks, Opt_journal_ioprio,
111 Opt_dioread_nolock, Opt_dioread_lock,
112 @@ -1195,6 +1196,8 @@ static const match_table_t tokens = {
113 {Opt_i_version, "i_version"},
114 {Opt_stripe, "stripe=%u"},
115 {Opt_delalloc, "delalloc"},
116 + {Opt_lazytime, "lazytime"},
117 + {Opt_nolazytime, "nolazytime"},
118 {Opt_nodelalloc, "nodelalloc"},
119 {Opt_removed, "mblk_io_submit"},
120 {Opt_removed, "nomblk_io_submit"},
121 @@ -1452,6 +1455,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
123 sb->s_flags |= MS_I_VERSION;
126 + sb->s_flags |= MS_LAZYTIME;
128 + case Opt_nolazytime:
129 + sb->s_flags &= ~MS_LAZYTIME;
133 for (m = ext4_mount_opts; m->token != Opt_err; m++)
134 diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
135 index 6cfb841..6e5abd6 100644
136 --- a/include/trace/events/ext4.h
137 +++ b/include/trace/events/ext4.h
138 @@ -73,6 +73,36 @@ struct extent_status;
139 { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
142 +TRACE_EVENT(ext4_other_inode_update_time,
143 + TP_PROTO(struct inode *inode, ino_t orig_ino),
145 + TP_ARGS(inode, orig_ino),
148 + __field( dev_t, dev )
149 + __field( ino_t, ino )
150 + __field( ino_t, orig_ino )
151 + __field( uid_t, uid )
152 + __field( gid_t, gid )
153 + __field( __u16, mode )
157 + __entry->orig_ino = orig_ino;
158 + __entry->dev = inode->i_sb->s_dev;
159 + __entry->ino = inode->i_ino;
160 + __entry->uid = i_uid_read(inode);
161 + __entry->gid = i_gid_read(inode);
162 + __entry->mode = inode->i_mode;
165 + TP_printk("dev %d,%d orig_ino %lu ino %lu mode 0%o uid %u gid %u",
166 + MAJOR(__entry->dev), MINOR(__entry->dev),
167 + (unsigned long) __entry->orig_ino,
168 + (unsigned long) __entry->ino, __entry->mode,
169 + __entry->uid, __entry->gid)
172 TRACE_EVENT(ext4_free_inode,
173 TP_PROTO(struct inode *inode),