From bb646e7e80b1c3ed3e672acd411a47a0d9298a8e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 24 Sep 2008 12:54:26 -0400 Subject: [PATCH] Add make-proc-generic and centralize-proc-files patches. Update inode-readahead patch. --- inode-readahead | 208 ++++++++++++++++++++++++++++++++++++++++++++++---------- series | 2 + 2 files changed, 173 insertions(+), 37 deletions(-) diff --git a/inode-readahead b/inode-readahead index 8979a596..57cc7563 100644 --- a/inode-readahead +++ b/inode-readahead @@ -1,4 +1,4 @@ -ext4: Use preallocation when reading from the inode table +ext4: Use readahead when reading an inode from the inode table With modern hard drives, reading 64k takes roughly the same time as reading a 4k block. So request readahead for adjacent inode table @@ -9,12 +9,33 @@ tree after flushing the caches via "echo 3 > /proc/sys/vm/drop_caches" is reduced by 21%. Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/inode.c | 110 ++++++++++++++++++++++++++----------------------------- - 1 files changed, 52 insertions(+), 58 deletions(-) - +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 163c445..fc7ce2e 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -790,6 +790,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) + #define EXT4_DEF_RESUID 0 + #define EXT4_DEF_RESGID 0 + ++#define EXT4_DEF_INODE_READAHEAD_BITS 5 ++ + /* + * Default mount options + */ +diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h +index f92af01..04e1fd2 100644 +--- a/fs/ext4/ext4_sb.h ++++ b/fs/ext4/ext4_sb.h +@@ -52,6 +52,7 @@ struct ext4_sb_info { + int s_desc_per_block_bits; + int s_inode_size; + int s_first_ino; ++ unsigned int s_inode_readahead_bits; + spinlock_t s_next_gen_lock; + u32 s_next_generation; + u32 s_hash_seed[4]; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index eed1265..9764b79 100644 +index eed1265..5c19604 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3833,41 +3833,6 @@ out_stop: @@ -59,43 +80,52 @@ index eed1265..9764b79 100644 /* * ext4_get_inode_loc returns with an extra refcount against the inode's * underlying buffer_head on success. If 'in_mem' is true, we have all -@@ -3877,13 +3842,33 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, +@@ -3877,19 +3842,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, static int __ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc, int in_mem) { -+ struct ext4_group_desc *gdp; - ext4_fsblk_t block; - struct buffer_head *bh; -+ int inodes_per_block, inode_offset; +- ext4_fsblk_t block; +- struct buffer_head *bh; ++ struct ext4_group_desc *gdp; ++ struct buffer_head *bh; ++ struct super_block *sb = inode->i_sb; ++ ext4_fsblk_t block; ++ int inodes_per_block, inode_offset; + + iloc->bh = 0; -+ if (!ext4_valid_inum(inode->i_sb, inode->i_ino)) ++ if (!ext4_valid_inum(sb, inode->i_ino)) + return -EIO; - block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); - if (!block) -+ iloc->block_group = (inode->i_ino - 1) / -+ EXT4_INODES_PER_GROUP(inode->i_sb); -+ gdp = ext4_get_group_desc(inode->i_sb, iloc->block_group, NULL); ++ iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); ++ gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); + if (!gdp) return -EIO; +- bh = sb_getblk(inode->i_sb, block); + /* + * Figure out the offset within the block group inode table + */ -+ inodes_per_block = (EXT4_BLOCK_SIZE(inode->i_sb) / -+ EXT4_INODE_SIZE(inode->i_sb)); ++ inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); + inode_offset = ((inode->i_ino - 1) % -+ EXT4_INODES_PER_GROUP(inode->i_sb)); -+ block = ext4_inode_table(inode->i_sb, gdp) + -+ (inode_offset / inodes_per_block); -+ iloc->offset = (inode_offset % inodes_per_block) * -+ EXT4_INODE_SIZE(inode->i_sb); ++ EXT4_INODES_PER_GROUP(sb)); ++ block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); ++ iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); + - bh = sb_getblk(inode->i_sb, block); ++ bh = sb_getblk(sb, block); if (!bh) { - ext4_error (inode->i_sb, "ext4_get_inode_loc", -@@ -3917,28 +3902,13 @@ static int __ext4_get_inode_loc(struct inode *inode, +- ext4_error (inode->i_sb, "ext4_get_inode_loc", +- "unable to read inode block - " +- "inode=%lu, block=%llu", +- inode->i_ino, block); ++ ext4_error(sb, "ext4_get_inode_loc", "unable to read " ++ "inode block - inode=%lu, block=%llu", ++ inode->i_ino, block); + return -EIO; + } + if (!buffer_uptodate(bh)) { +@@ -3917,28 +3898,12 @@ static int __ext4_get_inode_loc(struct inode *inode, */ if (in_mem) { struct buffer_head *bitmap_bh; @@ -121,14 +151,14 @@ index eed1265..9764b79 100644 - goto make_io; + start = inode_offset & ~(inodes_per_block - 1); -+ /* Is the inode bitmap in cache? */ - bitmap_bh = sb_getblk(inode->i_sb, +- bitmap_bh = sb_getblk(inode->i_sb, - ext4_inode_bitmap(inode->i_sb, desc)); -+ ext4_inode_bitmap(inode->i_sb, gdp)); ++ /* Is the inode bitmap in cache? */ ++ bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); if (!bitmap_bh) goto make_io; -@@ -3951,14 +3921,14 @@ static int __ext4_get_inode_loc(struct inode *inode, +@@ -3951,14 +3916,14 @@ static int __ext4_get_inode_loc(struct inode *inode, brelse(bitmap_bh); goto make_io; } @@ -145,34 +175,138 @@ index eed1265..9764b79 100644 /* all other inodes are free, so skip I/O */ memset(bh->b_data, 0, bh->b_size); set_buffer_uptodate(bh); -@@ -3969,6 +3939,30 @@ static int __ext4_get_inode_loc(struct inode *inode, +@@ -3969,6 +3934,31 @@ static int __ext4_get_inode_loc(struct inode *inode, make_io: /* + * If we need to do any I/O, try to readahead up to 16 + * blocks from the inode table. + */ -+ { ++ if (EXT4_SB(sb)->s_inode_readahead_bits) { + ext4_fsblk_t b, end, table; ++ int ra = 1 << EXT4_SB(sb)->s_inode_readahead_bits; + unsigned num; + -+ table = ext4_inode_table(inode->i_sb, gdp); -+ b = block & ~15; ++ table = ext4_inode_table(sb, gdp); ++ b = block & ~(ra-1); + if (table > b) + b = table; -+ end = b+16; -+ num = EXT4_INODES_PER_GROUP(inode->i_sb); -+ if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, ++ end = b + ra; ++ num = EXT4_INODES_PER_GROUP(sb); ++ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + num -= le16_to_cpu(gdp->bg_itable_unused); + table += num / inodes_per_block; + if (end > table) + end = table; + while (b <= end) -+ sb_breadahead(inode->i_sb, b++); ++ sb_breadahead(sb, b++); + } + + /* * There are other valid inodes in the buffer, this inode * has in-inode xattrs, or we don't have this inode in memory. * Read the block from disk. +@@ -3978,10 +3968,9 @@ make_io: + submit_bh(READ_META, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { +- ext4_error(inode->i_sb, "ext4_get_inode_loc", +- "unable to read inode block - " +- "inode=%lu, block=%llu", +- inode->i_ino, block); ++ ext4_error(sb, "ext4_get_inode_loc", ++ "unable to read inode block - inode=%lu, " ++ "block=%llu", inode->i_ino, block); + brelse(bh); + return -EIO; + } +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 1515006..00c8d97 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -514,8 +514,10 @@ static void ext4_put_super(struct super_block *sb) + BUFFER_TRACE(sbi->s_sbh, "marking dirty"); + ext4_commit_super(sb, es, 1); + } +- if (sbi->s_proc) ++ if (sbi->s_proc) { ++ remove_proc_entry("inode_readahead_bits", sbi->s_proc); + remove_proc_entry(sb->s_id, ext4_proc_root); ++ } + + for (i = 0; i < sbi->s_gdb_count; i++) + brelse(sbi->s_group_desc[i]); +@@ -778,6 +780,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) + else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) + seq_puts(seq, ",data=writeback"); + ++ if (sbi->s_inode_readahead_bits != EXT4_DEF_INODE_READAHEAD_BITS) ++ seq_printf(seq, ",inode_readahead_bits=%d", ++ sbi->s_inode_readahead_bits); ++ + ext4_show_quota_options(seq, sb); + return 0; + } +@@ -912,6 +918,7 @@ enum { + Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, + Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, + Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, ++ Opt_inode_readahead_bits + }; + + static match_table_t tokens = { +@@ -972,6 +979,7 @@ static match_table_t tokens = { + {Opt_resize, "resize"}, + {Opt_delalloc, "delalloc"}, + {Opt_nodelalloc, "nodelalloc"}, ++ {Opt_inode_readahead_bits, "inode_readahead_bits=%u"}, + {Opt_err, NULL}, + }; + +@@ -1380,6 +1388,13 @@ set_qf_format: + case Opt_delalloc: + set_opt(sbi->s_mount_opt, DELALLOC); + break; ++ case Opt_inode_readahead_bits: ++ if (match_int(&args[0], &option)) ++ return 0; ++ if (option < 0 || option > 31) ++ return 0; ++ sbi->s_inode_readahead_bits = option; ++ break; + default: + printk(KERN_ERR + "EXT4-fs: Unrecognized mount option \"%s\" " +@@ -1937,6 +1952,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + sbi->s_mount_opt = 0; + sbi->s_resuid = EXT4_DEF_RESUID; + sbi->s_resgid = EXT4_DEF_RESGID; ++ sbi->s_inode_readahead_bits = EXT4_DEF_INODE_READAHEAD_BITS; + sbi->s_sb_block = sb_block; + + unlock_kernel(); +@@ -2233,6 +2249,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + if (ext4_proc_root) + sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); + ++ if (sbi->s_proc) ++ proc_create_data("inode_readahead_bits", 0644, sbi->s_proc, ++ &ext4_ui_proc_fops, ++ &sbi->s_inode_readahead_bits); ++ + bgl_lock_init(&sbi->s_blockgroup_lock); + + for (i = 0; i < db_count; i++) { +@@ -2512,8 +2533,10 @@ failed_mount2: + brelse(sbi->s_group_desc[i]); + kfree(sbi->s_group_desc); + failed_mount: +- if (sbi->s_proc) ++ if (sbi->s_proc) { ++ remove_proc_entry("inode_readahead_bits", sbi->s_proc); + remove_proc_entry(sb->s_id, ext4_proc_root); ++ } + #ifdef CONFIG_QUOTA + for (i = 0; i < MAXQUOTAS; i++) + kfree(sbi->s_qf_names[i]); diff --git a/series b/series index 16263b13..284615c1 100644 --- a/series +++ b/series @@ -44,6 +44,8 @@ ext4-use-percpu-data-for-lg_prealloc_list avoid-WARN-messages.patch #use-async-commit-by-default ext4-fix-namei-stack-usage +make-proc-generic +centralize-proc-functions inode-readahead ########################################## -- 2.11.4.GIT