Add make-proc-generic and centralize-proc-files patches. Update
[ext4-patch-queue.git] / inode-readahead
blob57cc75636c0b0e642b0fd78fdab10884a66b0797
1 ext4: Use readahead when reading an inode from the inode table
3 With modern hard drives, reading 64k takes roughly the same time as
4 reading a 4k block.  So request readahead for adjacent inode table
5 blocks to reduce the time it takes when iterating over directories
6 (especially when doing this in htree sort order) in a cold cache case.
7 With this patch, the time it takes to run "git status" on a kernel
8 tree after flushing the caches via "echo 3 > /proc/sys/vm/drop_caches"
9 is reduced by 21%.
11 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
12 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
13 index 163c445..fc7ce2e 100644
14 --- a/fs/ext4/ext4.h
15 +++ b/fs/ext4/ext4.h
16 @@ -790,6 +790,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
17  #define        EXT4_DEF_RESUID         0
18  #define        EXT4_DEF_RESGID         0
20 +#define EXT4_DEF_INODE_READAHEAD_BITS  5
22  /*
23   * Default mount options
24   */
25 diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
26 index f92af01..04e1fd2 100644
27 --- a/fs/ext4/ext4_sb.h
28 +++ b/fs/ext4/ext4_sb.h
29 @@ -52,6 +52,7 @@ struct ext4_sb_info {
30         int s_desc_per_block_bits;
31         int s_inode_size;
32         int s_first_ino;
33 +       unsigned int s_inode_readahead_bits;
34         spinlock_t s_next_gen_lock;
35         u32 s_next_generation;
36         u32 s_hash_seed[4];
37 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
38 index eed1265..5c19604 100644
39 --- a/fs/ext4/inode.c
40 +++ b/fs/ext4/inode.c
41 @@ -3833,41 +3833,6 @@ out_stop:
42         ext4_journal_stop(handle);
43  }
45 -static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
46 -               unsigned long ino, struct ext4_iloc *iloc)
48 -       ext4_group_t block_group;
49 -       unsigned long offset;
50 -       ext4_fsblk_t block;
51 -       struct ext4_group_desc *gdp;
53 -       if (!ext4_valid_inum(sb, ino)) {
54 -               /*
55 -                * This error is already checked for in namei.c unless we are
56 -                * looking at an NFS filehandle, in which case no error
57 -                * report is needed
58 -                */
59 -               return 0;
60 -       }
62 -       block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
63 -       gdp = ext4_get_group_desc(sb, block_group, NULL);
64 -       if (!gdp)
65 -               return 0;
67 -       /*
68 -        * Figure out the offset within the block group inode table
69 -        */
70 -       offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
71 -               EXT4_INODE_SIZE(sb);
72 -       block = ext4_inode_table(sb, gdp) +
73 -               (offset >> EXT4_BLOCK_SIZE_BITS(sb));
75 -       iloc->block_group = block_group;
76 -       iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
77 -       return block;
80  /*
81   * ext4_get_inode_loc returns with an extra refcount against the inode's
82   * underlying buffer_head on success. If 'in_mem' is true, we have all
83 @@ -3877,19 +3842,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
84  static int __ext4_get_inode_loc(struct inode *inode,
85                                 struct ext4_iloc *iloc, int in_mem)
86  {
87 -       ext4_fsblk_t block;
88 -       struct buffer_head *bh;
89 +       struct ext4_group_desc  *gdp;
90 +       struct buffer_head      *bh;
91 +       struct super_block      *sb = inode->i_sb;
92 +       ext4_fsblk_t            block;
93 +       int                     inodes_per_block, inode_offset;
95 +       iloc->bh = 0;
96 +       if (!ext4_valid_inum(sb, inode->i_ino))
97 +               return -EIO;
99 -       block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc);
100 -       if (!block)
101 +       iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
102 +       gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
103 +       if (!gdp)
104                 return -EIO;
106 -       bh = sb_getblk(inode->i_sb, block);
107 +       /*
108 +        * Figure out the offset within the block group inode table
109 +        */
110 +       inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb));
111 +       inode_offset = ((inode->i_ino - 1) %
112 +                       EXT4_INODES_PER_GROUP(sb));
113 +       block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
114 +       iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
116 +       bh = sb_getblk(sb, block);
117         if (!bh) {
118 -               ext4_error (inode->i_sb, "ext4_get_inode_loc",
119 -                               "unable to read inode block - "
120 -                               "inode=%lu, block=%llu",
121 -                                inode->i_ino, block);
122 +               ext4_error(sb, "ext4_get_inode_loc", "unable to read "
123 +                          "inode block - inode=%lu, block=%llu",
124 +                          inode->i_ino, block);
125                 return -EIO;
126         }
127         if (!buffer_uptodate(bh)) {
128 @@ -3917,28 +3898,12 @@ static int __ext4_get_inode_loc(struct inode *inode,
129                  */
130                 if (in_mem) {
131                         struct buffer_head *bitmap_bh;
132 -                       struct ext4_group_desc *desc;
133 -                       int inodes_per_buffer;
134 -                       int inode_offset, i;
135 -                       ext4_group_t block_group;
136 -                       int start;
138 -                       block_group = (inode->i_ino - 1) /
139 -                                       EXT4_INODES_PER_GROUP(inode->i_sb);
140 -                       inodes_per_buffer = bh->b_size /
141 -                               EXT4_INODE_SIZE(inode->i_sb);
142 -                       inode_offset = ((inode->i_ino - 1) %
143 -                                       EXT4_INODES_PER_GROUP(inode->i_sb));
144 -                       start = inode_offset & ~(inodes_per_buffer - 1);
145 +                       int i, start;
147 -                       /* Is the inode bitmap in cache? */
148 -                       desc = ext4_get_group_desc(inode->i_sb,
149 -                                               block_group, NULL);
150 -                       if (!desc)
151 -                               goto make_io;
152 +                       start = inode_offset & ~(inodes_per_block - 1);
154 -                       bitmap_bh = sb_getblk(inode->i_sb,
155 -                               ext4_inode_bitmap(inode->i_sb, desc));
156 +                       /* Is the inode bitmap in cache? */
157 +                       bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
158                         if (!bitmap_bh)
159                                 goto make_io;
161 @@ -3951,14 +3916,14 @@ static int __ext4_get_inode_loc(struct inode *inode,
162                                 brelse(bitmap_bh);
163                                 goto make_io;
164                         }
165 -                       for (i = start; i < start + inodes_per_buffer; i++) {
166 +                       for (i = start; i < start + inodes_per_block; i++) {
167                                 if (i == inode_offset)
168                                         continue;
169                                 if (ext4_test_bit(i, bitmap_bh->b_data))
170                                         break;
171                         }
172                         brelse(bitmap_bh);
173 -                       if (i == start + inodes_per_buffer) {
174 +                       if (i == start + inodes_per_block) {
175                                 /* all other inodes are free, so skip I/O */
176                                 memset(bh->b_data, 0, bh->b_size);
177                                 set_buffer_uptodate(bh);
178 @@ -3969,6 +3934,31 @@ static int __ext4_get_inode_loc(struct inode *inode,
180  make_io:
181                 /*
182 +                * If we need to do any I/O, try to readahead up to 16
183 +                * blocks from the inode table.
184 +                */
185 +               if (EXT4_SB(sb)->s_inode_readahead_bits) {
186 +                       ext4_fsblk_t b, end, table;
187 +                       int ra = 1 << EXT4_SB(sb)->s_inode_readahead_bits;
188 +                       unsigned num;
190 +                       table = ext4_inode_table(sb, gdp);
191 +                       b = block & ~(ra-1);
192 +                       if (table > b)
193 +                               b = table;
194 +                       end = b + ra;
195 +                       num = EXT4_INODES_PER_GROUP(sb);
196 +                       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
197 +                                      EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
198 +                               num -= le16_to_cpu(gdp->bg_itable_unused);
199 +                       table += num / inodes_per_block;
200 +                       if (end > table)
201 +                               end = table;
202 +                       while (b <= end)
203 +                               sb_breadahead(sb, b++);
204 +               }
206 +               /*
207                  * There are other valid inodes in the buffer, this inode
208                  * has in-inode xattrs, or we don't have this inode in memory.
209                  * Read the block from disk.
210 @@ -3978,10 +3968,9 @@ make_io:
211                 submit_bh(READ_META, bh);
212                 wait_on_buffer(bh);
213                 if (!buffer_uptodate(bh)) {
214 -                       ext4_error(inode->i_sb, "ext4_get_inode_loc",
215 -                                       "unable to read inode block - "
216 -                                       "inode=%lu, block=%llu",
217 -                                       inode->i_ino, block);
218 +                       ext4_error(sb, "ext4_get_inode_loc",
219 +                                  "unable to read inode block - inode=%lu, "
220 +                                  "block=%llu", inode->i_ino, block);
221                         brelse(bh);
222                         return -EIO;
223                 }
224 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
225 index 1515006..00c8d97 100644
226 --- a/fs/ext4/super.c
227 +++ b/fs/ext4/super.c
228 @@ -514,8 +514,10 @@ static void ext4_put_super(struct super_block *sb)
229                 BUFFER_TRACE(sbi->s_sbh, "marking dirty");
230                 ext4_commit_super(sb, es, 1);
231         }
232 -       if (sbi->s_proc)
233 +       if (sbi->s_proc) {
234 +               remove_proc_entry("inode_readahead_bits", sbi->s_proc);
235                 remove_proc_entry(sb->s_id, ext4_proc_root);
236 +       }
238         for (i = 0; i < sbi->s_gdb_count; i++)
239                 brelse(sbi->s_group_desc[i]);
240 @@ -778,6 +780,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
241         else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
242                 seq_puts(seq, ",data=writeback");
244 +       if (sbi->s_inode_readahead_bits != EXT4_DEF_INODE_READAHEAD_BITS)
245 +               seq_printf(seq, ",inode_readahead_bits=%d",
246 +                          sbi->s_inode_readahead_bits);
248         ext4_show_quota_options(seq, sb);
249         return 0;
251 @@ -912,6 +918,7 @@ enum {
252         Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
253         Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
254         Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
255 +       Opt_inode_readahead_bits
256  };
258  static match_table_t tokens = {
259 @@ -972,6 +979,7 @@ static match_table_t tokens = {
260         {Opt_resize, "resize"},
261         {Opt_delalloc, "delalloc"},
262         {Opt_nodelalloc, "nodelalloc"},
263 +       {Opt_inode_readahead_bits, "inode_readahead_bits=%u"},
264         {Opt_err, NULL},
265  };
267 @@ -1380,6 +1388,13 @@ set_qf_format:
268                 case Opt_delalloc:
269                         set_opt(sbi->s_mount_opt, DELALLOC);
270                         break;
271 +               case Opt_inode_readahead_bits:
272 +                       if (match_int(&args[0], &option))
273 +                               return 0;
274 +                       if (option < 0 || option > 31)
275 +                               return 0;
276 +                       sbi->s_inode_readahead_bits = option;
277 +                       break;
278                 default:
279                         printk(KERN_ERR
280                                "EXT4-fs: Unrecognized mount option \"%s\" "
281 @@ -1937,6 +1952,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
282         sbi->s_mount_opt = 0;
283         sbi->s_resuid = EXT4_DEF_RESUID;
284         sbi->s_resgid = EXT4_DEF_RESGID;
285 +       sbi->s_inode_readahead_bits = EXT4_DEF_INODE_READAHEAD_BITS;
286         sbi->s_sb_block = sb_block;
288         unlock_kernel();
289 @@ -2233,6 +2249,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
290         if (ext4_proc_root)
291                 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
293 +       if (sbi->s_proc)
294 +               proc_create_data("inode_readahead_bits", 0644, sbi->s_proc,
295 +                                &ext4_ui_proc_fops,
296 +                                &sbi->s_inode_readahead_bits);
298         bgl_lock_init(&sbi->s_blockgroup_lock);
300         for (i = 0; i < db_count; i++) {
301 @@ -2512,8 +2533,10 @@ failed_mount2:
302                 brelse(sbi->s_group_desc[i]);
303         kfree(sbi->s_group_desc);
304  failed_mount:
305 -       if (sbi->s_proc)
306 +       if (sbi->s_proc) {
307 +               remove_proc_entry("inode_readahead_bits", sbi->s_proc);
308                 remove_proc_entry(sb->s_id, ext4_proc_root);
309 +       }
310  #ifdef CONFIG_QUOTA
311         for (i = 0; i < MAXQUOTAS; i++)
312                 kfree(sbi->s_qf_names[i]);