V3 version of Jan's Extent Status Cache improvements
[ext4-patch-queue.git] / ext4-add-lazytime-mount-option
blobf25c877351e2d7488ff4b6697bc164614e770fb5
1 ext4: add support for a lazytime mount option
3 Add an optimization for the MS_LAZYTIME mount option so that we will
4 opportunistically write out any inodes with the I_DIRTY_TIME flag set
5 in a particular inode table block when we need to update some inode in
6 that inode table block anyway.
8 Also add some temporary code so that we can set the lazytime mount
9 option without needing a modified /sbin/mount program which can set
10 MS_LAZYTIME.  We can eventually make this go away once util-linux has
11 added support.
13 Google-Bug-Id: 18297052
15 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
16 ---
17  fs/ext4/inode.c             | 48 +++++++++++++++++++++++++++++++++++++++++++++---
18  fs/ext4/super.c             |  9 +++++++++
19  fs/inode.c                  | 36 ++++++++++++++++++++++++++++++++++++
20  include/linux/fs.h          |  2 ++
21  include/trace/events/ext4.h | 30 ++++++++++++++++++++++++++++++
22  5 files changed, 122 insertions(+), 3 deletions(-)
24 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
25 index 3356ab5..03149b4 100644
26 --- a/fs/ext4/inode.c
27 +++ b/fs/ext4/inode.c
28 @@ -4163,6 +4163,50 @@ static int ext4_inode_blocks_set(handle_t *handle,
29  }
31  /*
32 + * Opportunistically update the other time fields for other inodes in
33 + * the same inode table block.
34 + */
35 +static void ext4_update_other_inodes_time(struct super_block *sb,
36 +                                         unsigned long orig_ino, char *buf)
38 +       struct ext4_inode_info  *ei;
39 +       struct ext4_inode       *raw_inode;
40 +       unsigned long           ino;
41 +       struct inode            *inode;
42 +       int             i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
43 +       int             inode_size = EXT4_INODE_SIZE(sb);
45 +       ino = orig_ino & ~(inodes_per_block - 1);
46 +       for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
47 +               if (ino == orig_ino)
48 +                       continue;
49 +               inode = find_active_inode_nowait(sb, ino);
50 +               if (!inode ||
51 +                   (inode->i_state & I_DIRTY_TIME) == 0 ||
52 +                   !spin_trylock(&inode->i_lock)) {
53 +                       iput(inode);
54 +                       continue;
55 +               }
56 +               inode->i_state &= ~I_DIRTY_TIME;
57 +               inode->i_ts_dirty_day = 0;
58 +               spin_unlock(&inode->i_lock);
60 +               ei = EXT4_I(inode);
61 +               raw_inode = (struct ext4_inode *) buf;
63 +               spin_lock(&ei->i_raw_lock);
64 +               EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
65 +               EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
66 +               EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
67 +               ext4_inode_csum_set(inode, raw_inode, ei);
68 +               spin_unlock(&ei->i_raw_lock);
69 +               trace_ext4_other_inode_update_time(inode, orig_ino);
70 +               iput(inode);
71 +       }
75 +/*
76   * Post the struct inode info into an on-disk inode location in the
77   * buffer-cache.  This gobbles the caller's reference to the
78   * buffer_head in the inode location struct.
79 @@ -4260,7 +4304,6 @@ static int ext4_do_update_inode(handle_t *handle,
80                 for (block = 0; block < EXT4_N_BLOCKS; block++)
81                         raw_inode->i_block[block] = ei->i_data[block];
82         }
84         if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) {
85                 raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
86                 if (ei->i_extra_isize) {
87 @@ -4271,10 +4314,9 @@ static int ext4_do_update_inode(handle_t *handle,
88                                 cpu_to_le16(ei->i_extra_isize);
89                 }
90         }
92         ext4_inode_csum_set(inode, raw_inode, ei);
94         spin_unlock(&ei->i_raw_lock);
95 +       ext4_update_other_inodes_time(inode->i_sb, inode->i_ino, bh->b_data);
97         BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
98         rc = ext4_handle_dirty_metadata(handle, NULL, bh);
99 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
100 index 4b79f39..1ac1914 100644
101 --- a/fs/ext4/super.c
102 +++ b/fs/ext4/super.c
103 @@ -1133,6 +1133,7 @@ enum {
104         Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
105         Opt_usrquota, Opt_grpquota, Opt_i_version,
106         Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
107 +       Opt_lazytime, Opt_nolazytime,
108         Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
109         Opt_inode_readahead_blks, Opt_journal_ioprio,
110         Opt_dioread_nolock, Opt_dioread_lock,
111 @@ -1195,6 +1196,8 @@ static const match_table_t tokens = {
112         {Opt_i_version, "i_version"},
113         {Opt_stripe, "stripe=%u"},
114         {Opt_delalloc, "delalloc"},
115 +       {Opt_lazytime, "lazytime"},
116 +       {Opt_nolazytime, "nolazytime"},
117         {Opt_nodelalloc, "nodelalloc"},
118         {Opt_removed, "mblk_io_submit"},
119         {Opt_removed, "nomblk_io_submit"},
120 @@ -1450,6 +1453,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
121         case Opt_i_version:
122                 sb->s_flags |= MS_I_VERSION;
123                 return 1;
124 +       case Opt_lazytime:
125 +               sb->s_flags |= MS_LAZYTIME;
126 +               return 1;
127 +       case Opt_nolazytime:
128 +               sb->s_flags &= ~MS_LAZYTIME;
129 +               return 1;
130         }
132         for (m = ext4_mount_opts; m->token != Opt_err; m++)
133 diff --git a/fs/inode.c b/fs/inode.c
134 index 6319ead..1f90591 100644
135 --- a/fs/inode.c
136 +++ b/fs/inode.c
137 @@ -1296,6 +1296,42 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
139  EXPORT_SYMBOL(ilookup);
141 +/**
142 + * find_active_inode_nowait - find an active inode in the inode cache
143 + * @sb:                super block of file system to search
144 + * @ino:       inode number to search for
145 + *
146 + * Search for an active inode @ino in the inode cache, and if the
147 + * inode is in the cache, the inode is returned with an incremented
148 + * reference count.  If the inode is being freed or is newly
149 + * initialized, return nothing instead of trying to wait for the inode
150 + * initialization or destruction to be complete.
151 + */
152 +struct inode *find_active_inode_nowait(struct super_block *sb,
153 +                                      unsigned long ino)
155 +       struct hlist_head *head = inode_hashtable + hash(sb, ino);
156 +       struct inode *inode, *ret_inode = NULL;
158 +       spin_lock(&inode_hash_lock);
159 +       hlist_for_each_entry(inode, head, i_hash) {
160 +               if ((inode->i_ino != ino) ||
161 +                   (inode->i_sb != sb))
162 +                       continue;
163 +               spin_lock(&inode->i_lock);
164 +               if ((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) == 0) {
165 +                       __iget(inode);
166 +                       ret_inode = inode;
167 +               }
168 +               spin_unlock(&inode->i_lock);
169 +               goto out;
170 +       }
171 +out:
172 +       spin_unlock(&inode_hash_lock);
173 +       return ret_inode;
175 +EXPORT_SYMBOL(find_active_inode_nowait);
177  int insert_inode_locked(struct inode *inode)
179         struct super_block *sb = inode->i_sb;
180 diff --git a/include/linux/fs.h b/include/linux/fs.h
181 index e3574cd..dbbd642 100644
182 --- a/include/linux/fs.h
183 +++ b/include/linux/fs.h
184 @@ -2413,6 +2413,8 @@ extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
186  extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
187  extern struct inode * iget_locked(struct super_block *, unsigned long);
188 +extern struct inode *find_active_inode_nowait(struct super_block *,
189 +                                             unsigned long);
190  extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
191  extern int insert_inode_locked(struct inode *);
192  #ifdef CONFIG_DEBUG_LOCK_ALLOC
193 diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
194 index ff4bd1b..ba649cb 100644
195 --- a/include/trace/events/ext4.h
196 +++ b/include/trace/events/ext4.h
197 @@ -75,6 +75,36 @@ struct extent_status;
198         { FALLOC_FL_ZERO_RANGE,         "ZERO_RANGE"})
201 +TRACE_EVENT(ext4_other_inode_update_time,
202 +       TP_PROTO(struct inode *inode, ino_t orig_ino),
204 +       TP_ARGS(inode, orig_ino),
206 +       TP_STRUCT__entry(
207 +               __field(        dev_t,  dev                     )
208 +               __field(        ino_t,  ino                     )
209 +               __field(        ino_t,  orig_ino                )
210 +               __field(        uid_t,  uid                     )
211 +               __field(        gid_t,  gid                     )
212 +               __field(        __u16, mode                     )
213 +       ),
215 +       TP_fast_assign(
216 +               __entry->orig_ino = orig_ino;
217 +               __entry->dev    = inode->i_sb->s_dev;
218 +               __entry->ino    = inode->i_ino;
219 +               __entry->uid    = i_uid_read(inode);
220 +               __entry->gid    = i_gid_read(inode);
221 +               __entry->mode   = inode->i_mode;
222 +       ),
224 +       TP_printk("dev %d,%d orig_ino %lu ino %lu mode 0%o uid %u gid %u",
225 +                 MAJOR(__entry->dev), MINOR(__entry->dev),
226 +                 (unsigned long) __entry->orig_ino,
227 +                 (unsigned long) __entry->ino, __entry->mode,
228 +                 __entry->uid, __entry->gid)
231  TRACE_EVENT(ext4_free_inode,
232         TP_PROTO(struct inode *inode),