lazytime v5 patch output
[ext4-patch-queue.git] / ext4-add-lazytime-mount-option
blob59bb98ebdd1cb3544e652a691c6f53efaac222ec
1 ext4: add optimization for the lazytime mount option
3 Add an optimization for the MS_LAZYTIME mount option so that we will
4 opportunistically write out any inodes with the I_DIRTY_TIME flag set
5 in a particular inode table block when we need to update some inode in
6 that inode table block anyway.
8 Also add some temporary code so that we can set the lazytime mount
9 option without needing a modified /sbin/mount program which can set
10 MS_LAZYTIME.  We can eventually make this go away once util-linux has
11 added support.
13 Google-Bug-Id: 18297052
15 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
16 ---
17  fs/ext4/inode.c             | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
18  fs/ext4/super.c             |  9 ++++++++
19  include/trace/events/ext4.h | 30 +++++++++++++++++++++++++++
20  3 files changed, 102 insertions(+), 3 deletions(-)
22 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
23 index 5653fa4..0e60d90 100644
24 --- a/fs/ext4/inode.c
25 +++ b/fs/ext4/inode.c
26 @@ -4139,6 +4139,66 @@ static int ext4_inode_blocks_set(handle_t *handle,
27         return 0;
28  }
30 +struct other_inode {
31 +       unsigned long           orig_ino;
32 +       struct ext4_inode       *raw_inode;
33 +};
35 +static int other_inode_match(struct inode * inode, unsigned long ino,
36 +                            void *data)
38 +       struct other_inode *oi = (struct other_inode *) data;
40 +       if ((inode->i_ino != ino) ||
41 +           (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
42 +                              I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
43 +           ((inode->i_state & I_DIRTY_TIME) == 0))
44 +               return 0;
45 +       spin_lock(&inode->i_lock);
46 +       if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
47 +                               I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
48 +           (inode->i_state & I_DIRTY_TIME)) {
49 +               struct ext4_inode_info  *ei = EXT4_I(inode);
51 +               inode->i_state &= ~I_DIRTY_TIME;
52 +               inode->i_ts_dirty_day = 0;
53 +               spin_unlock(&inode->i_lock);
55 +               spin_lock(&ei->i_raw_lock);
56 +               EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
57 +               EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
58 +               EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
59 +               ext4_inode_csum_set(inode, oi->raw_inode, ei);
60 +               spin_unlock(&ei->i_raw_lock);
61 +               trace_ext4_other_inode_update_time(inode, oi->orig_ino);
62 +               return -1;
63 +       }
64 +       spin_unlock(&inode->i_lock);
65 +       return -1;
68 +/*
69 + * Opportunistically update the other time fields for other inodes in
70 + * the same inode table block.
71 + */
72 +static void ext4_update_other_inodes_time(struct super_block *sb,
73 +                                         unsigned long orig_ino, char *buf)
75 +       struct other_inode oi;
76 +       unsigned long ino;
77 +       int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
78 +       int inode_size = EXT4_INODE_SIZE(sb);
80 +       oi.orig_ino = orig_ino;
81 +       ino = orig_ino & ~(inodes_per_block - 1);
82 +       for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
83 +               if (ino == orig_ino)
84 +                       continue;
85 +               oi.raw_inode = (struct ext4_inode *) buf;
86 +               (void) find_inode_nowait(sb, ino, other_inode_match, &oi);
87 +       }
90  /*
91   * Post the struct inode info into an on-disk inode location in the
92   * buffer-cache.  This gobbles the caller's reference to the
93 @@ -4237,7 +4297,6 @@ static int ext4_do_update_inode(handle_t *handle,
94                 for (block = 0; block < EXT4_N_BLOCKS; block++)
95                         raw_inode->i_block[block] = ei->i_data[block];
96         }
98         if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) {
99                 raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
100                 if (ei->i_extra_isize) {
101 @@ -4248,10 +4307,11 @@ static int ext4_do_update_inode(handle_t *handle,
102                                 cpu_to_le16(ei->i_extra_isize);
103                 }
104         }
106         ext4_inode_csum_set(inode, raw_inode, ei);
108         spin_unlock(&ei->i_raw_lock);
109 +       if (inode->i_sb->s_flags & MS_LAZYTIME)
110 +               ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
111 +                                             bh->b_data);
113         BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
114         rc = ext4_handle_dirty_metadata(handle, NULL, bh);
115 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
116 index 58859bc..93a2b7a 100644
117 --- a/fs/ext4/super.c
118 +++ b/fs/ext4/super.c
119 @@ -1132,6 +1132,7 @@ enum {
120         Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
121         Opt_usrquota, Opt_grpquota, Opt_i_version,
122         Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
123 +       Opt_lazytime, Opt_nolazytime,
124         Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
125         Opt_inode_readahead_blks, Opt_journal_ioprio,
126         Opt_dioread_nolock, Opt_dioread_lock,
127 @@ -1195,6 +1196,8 @@ static const match_table_t tokens = {
128         {Opt_i_version, "i_version"},
129         {Opt_stripe, "stripe=%u"},
130         {Opt_delalloc, "delalloc"},
131 +       {Opt_lazytime, "lazytime"},
132 +       {Opt_nolazytime, "nolazytime"},
133         {Opt_nodelalloc, "nodelalloc"},
134         {Opt_removed, "mblk_io_submit"},
135         {Opt_removed, "nomblk_io_submit"},
136 @@ -1452,6 +1455,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
137         case Opt_i_version:
138                 sb->s_flags |= MS_I_VERSION;
139                 return 1;
140 +       case Opt_lazytime:
141 +               sb->s_flags |= MS_LAZYTIME;
142 +               return 1;
143 +       case Opt_nolazytime:
144 +               sb->s_flags &= ~MS_LAZYTIME;
145 +               return 1;
146         }
148         for (m = ext4_mount_opts; m->token != Opt_err; m++)
149 diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
150 index 6cfb841..6e5abd6 100644
151 --- a/include/trace/events/ext4.h
152 +++ b/include/trace/events/ext4.h
153 @@ -73,6 +73,36 @@ struct extent_status;
154         { FALLOC_FL_ZERO_RANGE,         "ZERO_RANGE"})
157 +TRACE_EVENT(ext4_other_inode_update_time,
158 +       TP_PROTO(struct inode *inode, ino_t orig_ino),
160 +       TP_ARGS(inode, orig_ino),
162 +       TP_STRUCT__entry(
163 +               __field(        dev_t,  dev                     )
164 +               __field(        ino_t,  ino                     )
165 +               __field(        ino_t,  orig_ino                )
166 +               __field(        uid_t,  uid                     )
167 +               __field(        gid_t,  gid                     )
168 +               __field(        __u16, mode                     )
169 +       ),
171 +       TP_fast_assign(
172 +               __entry->orig_ino = orig_ino;
173 +               __entry->dev    = inode->i_sb->s_dev;
174 +               __entry->ino    = inode->i_ino;
175 +               __entry->uid    = i_uid_read(inode);
176 +               __entry->gid    = i_gid_read(inode);
177 +               __entry->mode   = inode->i_mode;
178 +       ),
180 +       TP_printk("dev %d,%d orig_ino %lu ino %lu mode 0%o uid %u gid %u",
181 +                 MAJOR(__entry->dev), MINOR(__entry->dev),
182 +                 (unsigned long) __entry->orig_ino,
183 +                 (unsigned long) __entry->ino, __entry->mode,
184 +                 __entry->uid, __entry->gid)
187  TRACE_EVENT(ext4_free_inode,
188         TP_PROTO(struct inode *inode),