1 ext4: add optimization for the lazytime mount option
3 Add an optimization for the MS_LAZYTIME mount option so that we will
4 opportunistically write out any inodes with the I_DIRTY_TIME flag set
5 in a particular inode table block when we need to update some inode in
6 that inode table block anyway.
8 Also add some temporary code so that we can set the lazytime mount
9 option without needing a modified /sbin/mount program which can set
10 MS_LAZYTIME. We can eventually make this go away once util-linux has
13 Google-Bug-Id: 18297052
15 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
17 fs/ext4/inode.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++--
18 fs/ext4/super.c | 10 ++++++++
19 include/trace/events/ext4.h | 30 ++++++++++++++++++++++++
20 3 files changed, 102 insertions(+), 2 deletions(-)
22 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
23 index 628df5b..9193ea1 100644
26 @@ -4139,6 +4139,65 @@ static int ext4_inode_blocks_set(handle_t *handle,
31 + unsigned long orig_ino;
32 + struct ext4_inode *raw_inode;
35 +static int other_inode_match(struct inode * inode, unsigned long ino,
38 + struct other_inode *oi = (struct other_inode *) data;
40 + if ((inode->i_ino != ino) ||
41 + (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
42 + I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
43 + ((inode->i_state & I_DIRTY_TIME) == 0))
45 + spin_lock(&inode->i_lock);
46 + if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
47 + I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
48 + (inode->i_state & I_DIRTY_TIME)) {
49 + struct ext4_inode_info *ei = EXT4_I(inode);
51 + inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
52 + spin_unlock(&inode->i_lock);
54 + spin_lock(&ei->i_raw_lock);
55 + EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
56 + EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
57 + EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
58 + ext4_inode_csum_set(inode, oi->raw_inode, ei);
59 + spin_unlock(&ei->i_raw_lock);
60 + trace_ext4_other_inode_update_time(inode, oi->orig_ino);
63 + spin_unlock(&inode->i_lock);
68 + * Opportunistically update the other time fields for other inodes in
69 + * the same inode table block.
71 +static void ext4_update_other_inodes_time(struct super_block *sb,
72 + unsigned long orig_ino, char *buf)
74 + struct other_inode oi;
76 + int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
77 + int inode_size = EXT4_INODE_SIZE(sb);
79 + oi.orig_ino = orig_ino;
80 + ino = orig_ino & ~(inodes_per_block - 1);
81 + for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
82 + if (ino == orig_ino)
84 + oi.raw_inode = (struct ext4_inode *) buf;
85 + (void) find_inode_nowait(sb, ino, other_inode_match, &oi);
90 * Post the struct inode info into an on-disk inode location in the
91 * buffer-cache. This gobbles the caller's reference to the
92 @@ -4248,10 +4307,11 @@ static int ext4_do_update_inode(handle_t *handle,
93 cpu_to_le16(ei->i_extra_isize);
97 ext4_inode_csum_set(inode, raw_inode, ei);
99 spin_unlock(&ei->i_raw_lock);
100 + if (inode->i_sb->s_flags & MS_LAZYTIME)
101 + ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
104 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
105 rc = ext4_handle_dirty_metadata(handle, NULL, bh);
106 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
107 index 74c5f53..362b23c 100644
108 --- a/fs/ext4/super.c
109 +++ b/fs/ext4/super.c
110 @@ -1139,6 +1139,7 @@ enum {
111 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
112 Opt_usrquota, Opt_grpquota, Opt_i_version,
113 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
114 + Opt_lazytime, Opt_nolazytime,
115 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
116 Opt_inode_readahead_blks, Opt_journal_ioprio,
117 Opt_dioread_nolock, Opt_dioread_lock,
118 @@ -1202,6 +1203,8 @@ static const match_table_t tokens = {
119 {Opt_i_version, "i_version"},
120 {Opt_stripe, "stripe=%u"},
121 {Opt_delalloc, "delalloc"},
122 + {Opt_lazytime, "lazytime"},
123 + {Opt_nolazytime, "nolazytime"},
124 {Opt_nodelalloc, "nodelalloc"},
125 {Opt_removed, "mblk_io_submit"},
126 {Opt_removed, "nomblk_io_submit"},
127 @@ -1459,6 +1462,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
129 sb->s_flags |= MS_I_VERSION;
132 + sb->s_flags |= MS_LAZYTIME;
134 + case Opt_nolazytime:
135 + sb->s_flags &= ~MS_LAZYTIME;
139 for (m = ext4_mount_opts; m->token != Opt_err; m++)
140 @@ -5020,6 +5029,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
144 + *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
145 ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
148 diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
149 index 6cfb841..6e5abd6 100644
150 --- a/include/trace/events/ext4.h
151 +++ b/include/trace/events/ext4.h
152 @@ -73,6 +73,36 @@ struct extent_status;
153 { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
156 +TRACE_EVENT(ext4_other_inode_update_time,
157 + TP_PROTO(struct inode *inode, ino_t orig_ino),
159 + TP_ARGS(inode, orig_ino),
162 + __field( dev_t, dev )
163 + __field( ino_t, ino )
164 + __field( ino_t, orig_ino )
165 + __field( uid_t, uid )
166 + __field( gid_t, gid )
167 + __field( __u16, mode )
171 + __entry->orig_ino = orig_ino;
172 + __entry->dev = inode->i_sb->s_dev;
173 + __entry->ino = inode->i_ino;
174 + __entry->uid = i_uid_read(inode);
175 + __entry->gid = i_gid_read(inode);
176 + __entry->mode = inode->i_mode;
179 + TP_printk("dev %d,%d orig_ino %lu ino %lu mode 0%o uid %u gid %u",
180 + MAJOR(__entry->dev), MINOR(__entry->dev),
181 + (unsigned long) __entry->orig_ino,
182 + (unsigned long) __entry->ino, __entry->mode,
183 + __entry->uid, __entry->gid)
186 TRACE_EVENT(ext4_free_inode,
187 TP_PROTO(struct inode *inode),