From 308c814e00856cb53f1bfabc3c6baa4a9bf42883 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 22 Feb 2016 22:45:48 -0500 Subject: [PATCH] Update to mbcache v3 patches --- add-reusable-flag-to-cache-entries | 296 ++++++ ext2-convert-to-mbcache2 | 68 +- ext4-convert-to-mbcache2 | 75 +- get-rid-of-e_hash_list_head | 139 +++ kill-ext4_mballoc_ready | 65 ++ mbcache2-limit-cache-size | 18 +- mbcache2-rename-to-mbcache | 1439 +++++++++++++++++++++++++++ mbcache2-use-referenced-bit-instead-of-LRU | 170 +++- reimplement-mbcache | 99 +- remove-mbcache | 4 +- series | 5 + shortcut-setting-of-xattr-to-the-same-value | 57 ++ timestamps | 32 +- 13 files changed, 2269 insertions(+), 198 deletions(-) create mode 100644 add-reusable-flag-to-cache-entries create mode 100644 get-rid-of-e_hash_list_head create mode 100644 kill-ext4_mballoc_ready create mode 100644 mbcache2-rename-to-mbcache create mode 100644 shortcut-setting-of-xattr-to-the-same-value diff --git a/add-reusable-flag-to-cache-entries b/add-reusable-flag-to-cache-entries new file mode 100644 index 00000000..88980d87 --- /dev/null +++ b/add-reusable-flag-to-cache-entries @@ -0,0 +1,296 @@ +mbcache: add reusable flag to cache entries + +From: Andreas Gruenbacher + +To reduce amount of damage caused by single bad block, we limit number +of inodes sharing an xattr block to 1024. Thus there can be more xattr +blocks with the same contents when there are lots of files with the same +extended attributes. These xattr blocks naturally result in hash +collisions and can form long hash chains and we unnecessarily check each +such block only to find out we cannot use it because it is already +shared by too many inodes. + +Add a reusable flag to cache entries which is cleared when a cache entry +has reached its maximum refcount. Cache entries which are not marked +reusable are skipped by mb_cache_entry_find_{first,next}. This +significantly speeds up mbcache when there are many same xattr blocks. +For example for xattr-bench with 5 values and each process handling +20000 files, the run for 64 processes is 25x faster with this patch. +Even for 8 processes the speedup is almost 3x. We have also verified +that for situations where there is only one xattr block of each kind, +the patch doesn't have a measurable cost. + +[JK: Remove handling of setting the same value since it is not needed +anymore, check for races in e_reusable setting, improve changelog, +add measurements] + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +--- + fs/ext2/xattr.c | 2 +- + fs/ext4/xattr.c | 66 +++++++++++++++++++++++++++++++------------------ + fs/mbcache.c | 38 +++++++++++++++++++++++++--- + include/linux/mbcache.h | 5 +++- + 4 files changed, 81 insertions(+), 30 deletions(-) + +diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c +index 71d58c2d7a19..1a5e3bff0b63 100644 +--- a/fs/ext2/xattr.c ++++ b/fs/ext2/xattr.c +@@ -823,7 +823,7 @@ ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh) + __u32 hash = le32_to_cpu(HDR(bh)->h_hash); + int error; + +- error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr); ++ error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr, 1); + if (error) { + if (error == -EBUSY) { + ea_bdebug(bh, "already in cache (%d cache entries)", +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index b661ae8332e3..0441e055c8e8 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -545,6 +545,8 @@ static void + ext4_xattr_release_block(handle_t *handle, struct inode *inode, + struct buffer_head *bh) + { ++ struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); ++ u32 hash, ref; + int error = 0; + + BUFFER_TRACE(bh, "get_write_access"); +@@ -553,23 +555,34 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, + goto out; + + lock_buffer(bh); +- if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { +- __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); +- ++ hash = le32_to_cpu(BHDR(bh)->h_hash); ++ ref = le32_to_cpu(BHDR(bh)->h_refcount); ++ if (ref == 1) { + ea_bdebug(bh, "refcount now=0; freeing"); + /* + * This must happen under buffer lock for + * ext4_xattr_block_set() to reliably detect freed block + */ +- mb_cache_entry_delete_block(EXT4_GET_MB_CACHE(inode), hash, +- bh->b_blocknr); ++ mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr); + get_bh(bh); + unlock_buffer(bh); + ext4_free_blocks(handle, inode, bh, 0, 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); + } else { +- le32_add_cpu(&BHDR(bh)->h_refcount, -1); ++ ref--; ++ BHDR(bh)->h_refcount = cpu_to_le32(ref); ++ if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) { ++ struct mb_cache_entry *ce; ++ ++ ce = mb_cache_entry_get(ext4_mb_cache, hash, ++ bh->b_blocknr); ++ if (ce) { ++ ce->e_reusable = 1; ++ mb_cache_entry_put(ext4_mb_cache, ce); ++ } ++ } ++ + /* + * Beware of this ugliness: Releasing of xattr block references + * from different inodes can race and so we have to protect +@@ -872,6 +885,8 @@ inserted: + if (new_bh == bs->bh) + ea_bdebug(new_bh, "keeping"); + else { ++ u32 ref; ++ + /* The old block is released after updating + the inode. */ + error = dquot_alloc_block(inode, +@@ -886,15 +901,18 @@ inserted: + lock_buffer(new_bh); + /* + * We have to be careful about races with +- * freeing or rehashing of xattr block. Once we +- * hold buffer lock xattr block's state is +- * stable so we can check whether the block got +- * freed / rehashed or not. Since we unhash +- * mbcache entry under buffer lock when freeing +- * / rehashing xattr block, checking whether +- * entry is still hashed is reliable. ++ * freeing, rehashing or adding references to ++ * xattr block. Once we hold buffer lock xattr ++ * block's state is stable so we can check ++ * whether the block got freed / rehashed or ++ * not. Since we unhash mbcache entry under ++ * buffer lock when freeing / rehashing xattr ++ * block, checking whether entry is still ++ * hashed is reliable. Same rules hold for ++ * e_reusable handling. + */ +- if (hlist_bl_unhashed(&ce->e_hash_list)) { ++ if (hlist_bl_unhashed(&ce->e_hash_list) || ++ !ce->e_reusable) { + /* + * Undo everything and check mbcache + * again. +@@ -909,9 +927,12 @@ inserted: + new_bh = NULL; + goto inserted; + } +- le32_add_cpu(&BHDR(new_bh)->h_refcount, 1); ++ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1; ++ BHDR(new_bh)->h_refcount = cpu_to_le32(ref); ++ if (ref >= EXT4_XATTR_REFCOUNT_MAX) ++ ce->e_reusable = 0; + ea_bdebug(new_bh, "reusing; refcount now=%d", +- le32_to_cpu(BHDR(new_bh)->h_refcount)); ++ ref); + unlock_buffer(new_bh); + error = ext4_handle_dirty_xattr_block(handle, + inode, +@@ -1566,11 +1587,14 @@ cleanup: + static void + ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh) + { +- __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); ++ struct ext4_xattr_header *header = BHDR(bh); ++ __u32 hash = le32_to_cpu(header->h_hash); ++ int reusable = le32_to_cpu(header->h_refcount) < ++ EXT4_XATTR_REFCOUNT_MAX; + int error; + + error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash, +- bh->b_blocknr); ++ bh->b_blocknr, reusable); + if (error) { + if (error == -EBUSY) + ea_bdebug(bh, "already in cache"); +@@ -1645,12 +1669,6 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, + if (!bh) { + EXT4_ERROR_INODE(inode, "block %lu read error", + (unsigned long) ce->e_block); +- } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= +- EXT4_XATTR_REFCOUNT_MAX) { +- ea_idebug(inode, "block %lu refcount %d>=%d", +- (unsigned long) ce->e_block, +- le32_to_cpu(BHDR(bh)->h_refcount), +- EXT4_XATTR_REFCOUNT_MAX); + } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) { + *pce = ce; + return bh; +diff --git a/fs/mbcache.c b/fs/mbcache.c +index 903be151dcfe..eccda3a02de6 100644 +--- a/fs/mbcache.c ++++ b/fs/mbcache.c +@@ -63,13 +63,14 @@ static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache, + * @mask - gfp mask with which the entry should be allocated + * @key - key of the entry + * @block - block that contains data ++ * @reusable - is the block reusable by other inodes? + * + * Creates entry in @cache with key @key and records that data is stored in + * block @block. The function returns -EBUSY if entry with the same key + * and for the same block already exists in cache. Otherwise 0 is returned. + */ + int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, +- sector_t block) ++ sector_t block, bool reusable) + { + struct mb_cache_entry *entry, *dup; + struct hlist_bl_node *dup_node; +@@ -91,6 +92,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, + atomic_set(&entry->e_refcnt, 1); + entry->e_key = key; + entry->e_block = block; ++ entry->e_reusable = reusable; + head = mb_cache_entry_head(cache, key); + hlist_bl_lock(head); + hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { +@@ -137,7 +139,7 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache, + while (node) { + entry = hlist_bl_entry(node, struct mb_cache_entry, + e_hash_list); +- if (entry->e_key == key) { ++ if (entry->e_key == key && entry->e_reusable) { + atomic_inc(&entry->e_refcnt); + goto out; + } +@@ -184,10 +186,38 @@ struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache, + } + EXPORT_SYMBOL(mb_cache_entry_find_next); + ++/* ++ * mb_cache_entry_get - get a cache entry by block number (and key) ++ * @cache - cache we work with ++ * @key - key of block number @block ++ * @block - block number ++ */ ++struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, ++ sector_t block) ++{ ++ struct hlist_bl_node *node; ++ struct hlist_bl_head *head; ++ struct mb_cache_entry *entry; ++ ++ head = mb_cache_entry_head(cache, key); ++ hlist_bl_lock(head); ++ hlist_bl_for_each_entry(entry, node, head, e_hash_list) { ++ if (entry->e_key == key && entry->e_block == block) { ++ atomic_inc(&entry->e_refcnt); ++ goto out; ++ } ++ } ++ entry = NULL; ++out: ++ hlist_bl_unlock(head); ++ return entry; ++} ++EXPORT_SYMBOL(mb_cache_entry_get); ++ + /* mb_cache_entry_delete_block - remove information about block from cache + * @cache - cache we work with +- * @key - key of the entry to remove +- * @block - block containing data for @key ++ * @key - key of block @block ++ * @block - block number + * + * Remove entry from cache @cache with key @key with data stored in @block. + */ +diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h +index 607e6968542e..86c9a8b480c5 100644 +--- a/include/linux/mbcache.h ++++ b/include/linux/mbcache.h +@@ -18,6 +18,7 @@ struct mb_cache_entry { + /* Key in hash - stable during lifetime of the entry */ + u32 e_key; + u32 e_referenced:1; ++ u32 e_reusable:1; + /* Block number of hashed block - stable during lifetime of the entry */ + sector_t e_block; + }; +@@ -26,7 +27,7 @@ struct mb_cache *mb_cache_create(int bucket_bits); + void mb_cache_destroy(struct mb_cache *cache); + + int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, +- sector_t block); ++ sector_t block, bool reusable); + void __mb_cache_entry_free(struct mb_cache_entry *entry); + static inline int mb_cache_entry_put(struct mb_cache *cache, + struct mb_cache_entry *entry) +@@ -39,6 +40,8 @@ static inline int mb_cache_entry_put(struct mb_cache *cache, + + void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key, + sector_t block); ++struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, ++ sector_t block); + struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, + u32 key); + struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache, +-- +2.6.2 + + diff --git a/ext2-convert-to-mbcache2 b/ext2-convert-to-mbcache2 index 00bc485d..d98d417b 100644 --- a/ext2-convert-to-mbcache2 +++ b/ext2-convert-to-mbcache2 @@ -13,9 +13,9 @@ Signed-off-by: Theodore Ts'o --- fs/ext2/ext2.h | 3 ++ fs/ext2/super.c | 25 ++++++---- - fs/ext2/xattr.c | 146 +++++++++++++++++++++++++++----------------------------- - fs/ext2/xattr.h | 21 ++------ - 4 files changed, 95 insertions(+), 100 deletions(-) + fs/ext2/xattr.c | 143 ++++++++++++++++++++++++++------------------------------ + fs/ext2/xattr.h | 21 ++------- + 4 files changed, 92 insertions(+), 100 deletions(-) diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 4c69c94cafd8..f98ce7e60a0f 100644 @@ -39,7 +39,7 @@ index 4c69c94cafd8..f98ce7e60a0f 100644 static inline spinlock_t * diff --git a/fs/ext2/super.c b/fs/ext2/super.c -index 748d35afc902..111a31761ffa 100644 +index 2a188413a2b0..b78caf25f746 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -131,7 +131,10 @@ static void ext2_put_super (struct super_block * sb) @@ -111,7 +111,7 @@ index 748d35afc902..111a31761ffa 100644 MODULE_AUTHOR("Remy Card and others"); diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c -index fa70848afa8f..c7ab4cadcea0 100644 +index f57a7aba32eb..7162b4869bc3 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -56,7 +56,7 @@ @@ -123,7 +123,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 #include #include #include -@@ -92,14 +92,12 @@ +@@ -90,14 +90,12 @@ static int ext2_xattr_set2(struct inode *, struct buffer_head *, struct ext2_xattr_header *); @@ -139,7 +139,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 static const struct xattr_handler *ext2_xattr_handler_map[] = { [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler, #ifdef CONFIG_EXT2_FS_POSIX_ACL -@@ -154,6 +152,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, +@@ -152,6 +150,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, size_t name_len, size; char *end; int error; @@ -147,7 +147,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", name_index, name, buffer, (long)buffer_size); -@@ -198,7 +197,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", +@@ -196,7 +195,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", goto found; entry = next; } @@ -156,7 +156,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 ea_idebug(inode, "cache insert failed"); error = -ENODATA; goto cleanup; -@@ -211,7 +210,7 @@ found: +@@ -209,7 +208,7 @@ found: le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) goto bad_block; @@ -165,7 +165,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 ea_idebug(inode, "cache insert failed"); if (buffer) { error = -ERANGE; -@@ -249,6 +248,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) +@@ -247,6 +246,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) char *end; size_t rest = buffer_size; int error; @@ -173,7 +173,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 ea_idebug(inode, "buffer=%p, buffer_size=%ld", buffer, (long)buffer_size); -@@ -283,7 +283,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", +@@ -281,7 +281,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", goto bad_block; entry = next; } @@ -182,7 +182,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 ea_idebug(inode, "cache insert failed"); /* list the attribute names */ -@@ -480,22 +480,23 @@ bad_block: ext2_error(sb, "ext2_xattr_set", +@@ -483,22 +483,23 @@ bad_block: ext2_error(sb, "ext2_xattr_set", /* Here we know that we can set the new attribute. */ if (header) { @@ -214,7 +214,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 unlock_buffer(bh); ea_bdebug(bh, "cloning"); header = kmalloc(bh->b_size, GFP_KERNEL); -@@ -623,6 +624,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, +@@ -626,6 +627,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, struct super_block *sb = inode->i_sb; struct buffer_head *new_bh = NULL; int error; @@ -222,7 +222,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 if (header) { new_bh = ext2_xattr_cache_find(inode, header); -@@ -650,7 +652,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, +@@ -653,7 +655,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, don't need to change the reference count. */ new_bh = old_bh; get_bh(new_bh); @@ -231,7 +231,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 } else { /* We need to allocate a new block */ ext2_fsblk_t goal = ext2_group_first_block_no(sb, -@@ -671,7 +673,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, +@@ -674,7 +676,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, memcpy(new_bh->b_data, header, new_bh->b_size); set_buffer_uptodate(new_bh); unlock_buffer(new_bh); @@ -240,7 +240,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 ext2_xattr_update_super_block(sb); } -@@ -704,19 +706,21 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, +@@ -707,19 +709,21 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, error = 0; if (old_bh && old_bh != new_bh) { @@ -268,7 +268,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 ea_bdebug(old_bh, "freeing"); ext2_free_blocks(inode, old_bh->b_blocknr, 1); mark_inode_dirty(inode); -@@ -727,8 +731,6 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, +@@ -730,8 +734,6 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, } else { /* Decrement the refcount only. */ le32_add_cpu(&HDR(old_bh)->h_refcount, -1); @@ -277,7 +277,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 dquot_free_block_nodirty(inode, 1); mark_inode_dirty(inode); mark_buffer_dirty(old_bh); -@@ -754,7 +756,6 @@ void +@@ -757,7 +759,6 @@ void ext2_xattr_delete_inode(struct inode *inode) { struct buffer_head *bh = NULL; @@ -285,7 +285,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 down_write(&EXT2_I(inode)->xattr_sem); if (!EXT2_I(inode)->i_file_acl) -@@ -774,19 +775,22 @@ ext2_xattr_delete_inode(struct inode *inode) +@@ -777,19 +778,22 @@ ext2_xattr_delete_inode(struct inode *inode) EXT2_I(inode)->i_file_acl); goto cleanup; } @@ -313,7 +313,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount)); unlock_buffer(bh); -@@ -803,18 +807,6 @@ cleanup: +@@ -806,18 +810,6 @@ cleanup: } /* @@ -332,7 +332,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 * ext2_xattr_cache_insert() * * Create a new entry in the extended attribute cache, and insert -@@ -823,27 +815,22 @@ ext2_xattr_put_super(struct super_block *sb) +@@ -826,28 +818,20 @@ ext2_xattr_put_super(struct super_block *sb) * Returns 0, or a negative error number on failure. */ static int @@ -341,33 +341,31 @@ index fa70848afa8f..c7ab4cadcea0 100644 { __u32 hash = le32_to_cpu(HDR(bh)->h_hash); - struct mb_cache_entry *ce; -+ struct mb2_cache_entry *ce; int error; - ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS); - if (!ce) - return -ENOMEM; - error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); -- if (error) { ++ error = mb2_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr); + if (error) { - mb_cache_entry_free(ce); -- if (error == -EBUSY) { -+ ce = mb2_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr); -+ if (IS_ERR(ce)) { -+ if (PTR_ERR(ce) == -EBUSY) { + if (error == -EBUSY) { ea_bdebug(bh, "already in cache (%d cache entries)", atomic_read(&ext2_xattr_cache->c_entry_count)); error = 0; } - } else { +- } else { - ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, - atomic_read(&ext2_xattr_cache->c_entry_count)); - mb_cache_entry_release(ce); +- } ++ } else + ea_bdebug(bh, "inserting [%x]", (int)hash); -+ mb2_cache_entry_put(cache, ce); - } return error; } -@@ -900,23 +887,17 @@ static struct buffer_head * + +@@ -903,23 +887,17 @@ static struct buffer_head * ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) { __u32 hash = le32_to_cpu(header->h_hash); @@ -394,7 +392,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 bh = sb_bread(inode->i_sb, ce->e_block); if (!bh) { ext2_error(inode->i_sb, "ext2_xattr_cache_find", -@@ -924,7 +905,21 @@ again: +@@ -927,7 +905,21 @@ again: inode->i_ino, (unsigned long) ce->e_block); } else { lock_buffer(bh); @@ -417,7 +415,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 EXT2_XATTR_REFCOUNT_MAX) { ea_idebug(inode, "block %ld refcount %d>%d", (unsigned long) ce->e_block, -@@ -933,13 +928,14 @@ again: +@@ -936,13 +928,14 @@ again: } else if (!ext2_xattr_cmp(header, HDR(bh))) { ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); @@ -434,7 +432,7 @@ index fa70848afa8f..c7ab4cadcea0 100644 } return NULL; } -@@ -1012,17 +1008,15 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header, +@@ -1015,17 +1008,15 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header, #undef BLOCK_HASH_SHIFT @@ -507,6 +505,6 @@ index 60edf298644e..6ea38aa9563a 100644 } -- -2.1.4 +2.6.2 diff --git a/ext4-convert-to-mbcache2 b/ext4-convert-to-mbcache2 index 88b8385f..5344bee1 100644 --- a/ext4-convert-to-mbcache2 +++ b/ext4-convert-to-mbcache2 @@ -12,15 +12,15 @@ Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 +- fs/ext4/super.c | 7 ++- - fs/ext4/xattr.c | 135 +++++++++++++++++++++++++++++--------------------------- + fs/ext4/xattr.c | 136 ++++++++++++++++++++++++++++---------------------------- fs/ext4/xattr.h | 5 +-- - 4 files changed, 77 insertions(+), 72 deletions(-) + 4 files changed, 75 insertions(+), 75 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index 750063f7a50c..068a3eaa41ac 100644 +index 0662b285dc8a..b53cbc05b172 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h -@@ -1371,7 +1371,7 @@ struct ext4_sb_info { +@@ -1468,7 +1468,7 @@ struct ext4_sb_info { struct list_head s_es_list; /* List of inodes with reclaimable extents */ long s_es_nr_inode; struct ext4_es_stats s_es_stats; @@ -30,10 +30,10 @@ index 750063f7a50c..068a3eaa41ac 100644 /* Ratelimit ext4 messages. */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index c9ab67da6e5a..dbd5b9b9c99a 100644 +index 3ed01ec011d7..ecc37e103435 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c -@@ -814,7 +814,6 @@ static void ext4_put_super(struct super_block *sb) +@@ -844,7 +844,6 @@ static void ext4_put_super(struct super_block *sb) ext4_release_system_zone(sb); ext4_mb_release(sb); ext4_ext_release(sb); @@ -41,7 +41,7 @@ index c9ab67da6e5a..dbd5b9b9c99a 100644 if (!(sb->s_flags & MS_RDONLY)) { ext4_clear_feature_journal_needs_recovery(sb); -@@ -3759,7 +3758,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) +@@ -3797,7 +3796,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) no_journal: if (ext4_mballoc_ready) { @@ -50,7 +50,7 @@ index c9ab67da6e5a..dbd5b9b9c99a 100644 if (!sbi->s_mb_cache) { ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); goto failed_mount_wq; -@@ -3989,6 +3988,10 @@ failed_mount4: +@@ -4027,6 +4026,10 @@ failed_mount4: if (EXT4_SB(sb)->rsv_conversion_wq) destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); failed_mount_wq: @@ -62,7 +62,7 @@ index c9ab67da6e5a..dbd5b9b9c99a 100644 jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c -index 6b6b3e751f8c..a80e5e2acadd 100644 +index a95151e875bd..fe9f8d6ab6c9 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -53,7 +53,7 @@ @@ -74,7 +74,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 #include #include "ext4_jbd2.h" #include "ext4.h" -@@ -80,10 +80,10 @@ +@@ -78,10 +78,10 @@ # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif @@ -87,7 +87,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 static void ext4_xattr_rehash(struct ext4_xattr_header *, struct ext4_xattr_entry *); static int ext4_xattr_list(struct dentry *dentry, char *buffer, -@@ -278,7 +278,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, +@@ -276,7 +276,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, struct ext4_xattr_entry *entry; size_t size; int error; @@ -96,7 +96,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", name_index, name, buffer, (long)buffer_size); -@@ -425,7 +425,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) +@@ -428,7 +428,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) struct inode *inode = d_inode(dentry); struct buffer_head *bh = NULL; int error; @@ -105,7 +105,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 ea_idebug(inode, "buffer=%p, buffer_size=%ld", buffer, (long)buffer_size); -@@ -542,11 +542,8 @@ static void +@@ -545,11 +545,8 @@ static void ext4_xattr_release_block(handle_t *handle, struct inode *inode, struct buffer_head *bh) { @@ -117,7 +117,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 BUFFER_TRACE(bh, "get_write_access"); error = ext4_journal_get_write_access(handle, bh); if (error) -@@ -554,9 +551,15 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, +@@ -557,9 +554,15 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, lock_buffer(bh); if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { @@ -135,7 +135,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 get_bh(bh); unlock_buffer(bh); ext4_free_blocks(handle, inode, bh, 0, 1, -@@ -564,8 +567,6 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, +@@ -567,8 +570,6 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, EXT4_FREE_BLOCKS_FORGET); } else { le32_add_cpu(&BHDR(bh)->h_refcount, -1); @@ -144,7 +144,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 /* * Beware of this ugliness: Releasing of xattr block references * from different inodes can race and so we have to protect -@@ -778,17 +779,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, +@@ -781,17 +782,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, struct super_block *sb = inode->i_sb; struct buffer_head *new_bh = NULL; struct ext4_xattr_search *s = &bs->s; @@ -164,7 +164,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 BUFFER_TRACE(bs->bh, "get_write_access"); error = ext4_journal_get_write_access(handle, bs->bh); if (error) -@@ -796,10 +795,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, +@@ -799,10 +798,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, lock_buffer(bs->bh); if (header(s->base)->h_refcount == cpu_to_le32(1)) { @@ -184,7 +184,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 ea_bdebug(bs->bh, "modifying in-place"); error = ext4_xattr_set_entry(i, s); if (!error) { -@@ -823,10 +827,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, +@@ -826,10 +830,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, int offset = (char *)s->here - bs->bh->b_data; unlock_buffer(bs->bh); @@ -195,7 +195,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 ea_bdebug(bs->bh, "cloning"); s->base = kmalloc(bs->bh->b_size, GFP_NOFS); error = -ENOMEM; -@@ -881,6 +881,31 @@ inserted: +@@ -884,6 +884,31 @@ inserted: if (error) goto cleanup_dquot; lock_buffer(new_bh); @@ -227,7 +227,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 le32_add_cpu(&BHDR(new_bh)->h_refcount, 1); ea_bdebug(new_bh, "reusing; refcount now=%d", le32_to_cpu(BHDR(new_bh)->h_refcount)); -@@ -891,7 +916,8 @@ inserted: +@@ -894,7 +919,8 @@ inserted: if (error) goto cleanup_dquot; } @@ -237,7 +237,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 ce = NULL; } else if (bs->bh && s->base == bs->bh->b_data) { /* We were modifying this block in-place. */ -@@ -956,7 +982,7 @@ getblk_failed: +@@ -959,7 +985,7 @@ getblk_failed: cleanup: if (ce) @@ -246,7 +246,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 brelse(new_bh); if (!(bs->bh && s->base == bs->bh->b_data)) kfree(s->base); -@@ -1509,17 +1535,6 @@ cleanup: +@@ -1512,17 +1538,6 @@ cleanup: } /* @@ -264,7 +264,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 * ext4_xattr_cache_insert() * * Create a new entry in the extended attribute cache, and insert -@@ -1528,27 +1543,22 @@ ext4_xattr_put_super(struct super_block *sb) +@@ -1531,28 +1546,18 @@ ext4_xattr_put_super(struct super_block *sb) * Returns 0, or a negative error number on failure. */ static void @@ -273,7 +273,6 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 { __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); - struct mb_cache_entry *ce; -+ struct mb2_cache_entry *ce; int error; - ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS); @@ -282,24 +281,24 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 - return; - } - error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); -- if (error) { ++ error = mb2_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash, ++ bh->b_blocknr); + if (error) { - mb_cache_entry_free(ce); - if (error == -EBUSY) { -+ ce = mb2_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash, -+ bh->b_blocknr); -+ if (IS_ERR(ce)) { -+ if (PTR_ERR(ce) == -EBUSY) { ++ if (error == -EBUSY) ea_bdebug(bh, "already in cache"); - error = 0; - } - } else { +- error = 0; +- } +- } else { ++ } else ea_bdebug(bh, "inserting [%x]", (int)hash); - mb_cache_entry_release(ce); -+ mb2_cache_entry_put(ext4_mb_cache, ce); - } +- } } -@@ -1602,26 +1612,19 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1, + /* +@@ -1605,26 +1610,19 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1, */ static struct buffer_head * ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, @@ -330,7 +329,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 bh = sb_bread(inode->i_sb, ce->e_block); if (!bh) { EXT4_ERROR_INODE(inode, "block %lu read error", -@@ -1637,7 +1640,7 @@ again: +@@ -1640,7 +1638,7 @@ again: return bh; } brelse(bh); @@ -339,7 +338,7 @@ index 6b6b3e751f8c..a80e5e2acadd 100644 } return NULL; } -@@ -1712,15 +1715,15 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, +@@ -1715,15 +1713,15 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, #define HASH_BUCKET_BITS 10 @@ -384,6 +383,6 @@ index ddc0957760ba..10b0f7323ed6 100644 #ifdef CONFIG_EXT4_FS_SECURITY extern int ext4_init_security(handle_t *handle, struct inode *inode, -- -2.1.4 +2.6.2 diff --git a/get-rid-of-e_hash_list_head b/get-rid-of-e_hash_list_head new file mode 100644 index 00000000..6f2d7c10 --- /dev/null +++ b/get-rid-of-e_hash_list_head @@ -0,0 +1,139 @@ +mbcache: get rid of _e_hash_list_head + +From: Andreas Gruenbacher + +Get rid of field _e_hash_list_head in cache entries and add bit field +e_referenced instead. + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +--- + fs/mbcache.c | 41 ++++++++++------------------------------- + include/linux/mbcache.h | 8 ++------ + 2 files changed, 12 insertions(+), 37 deletions(-) + +diff --git a/fs/mbcache.c b/fs/mbcache.c +index 4241b633f155..903be151dcfe 100644 +--- a/fs/mbcache.c ++++ b/fs/mbcache.c +@@ -45,27 +45,10 @@ static struct kmem_cache *mb_entry_cache; + static unsigned long mb_cache_shrink(struct mb_cache *cache, + unsigned int nr_to_scan); + +-static inline bool mb_cache_entry_referenced(struct mb_cache_entry *entry) ++static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache, ++ u32 key) + { +- return entry->_e_hash_list_head & 1; +-} +- +-static inline void mb_cache_entry_set_referenced(struct mb_cache_entry *entry) +-{ +- entry->_e_hash_list_head |= 1; +-} +- +-static inline void mb_cache_entry_clear_referenced( +- struct mb_cache_entry *entry) +-{ +- entry->_e_hash_list_head &= ~1; +-} +- +-static inline struct hlist_bl_head *mb_cache_entry_head( +- struct mb_cache_entry *entry) +-{ +- return (struct hlist_bl_head *) +- (entry->_e_hash_list_head & ~1); ++ return &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; + } + + /* +@@ -108,8 +91,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, + atomic_set(&entry->e_refcnt, 1); + entry->e_key = key; + entry->e_block = block; +- head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; +- entry->_e_hash_list_head = (unsigned long)head; ++ head = mb_cache_entry_head(cache, key); + hlist_bl_lock(head); + hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { + if (dup->e_key == key && dup->e_block == block) { +@@ -146,10 +128,7 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache, + struct hlist_bl_node *node; + struct hlist_bl_head *head; + +- if (entry) +- head = mb_cache_entry_head(entry); +- else +- head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; ++ head = mb_cache_entry_head(cache, key); + hlist_bl_lock(head); + if (entry && !hlist_bl_unhashed(&entry->e_hash_list)) + node = entry->e_hash_list.next; +@@ -219,7 +198,7 @@ void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key, + struct hlist_bl_head *head; + struct mb_cache_entry *entry; + +- head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; ++ head = mb_cache_entry_head(cache, key); + hlist_bl_lock(head); + hlist_bl_for_each_entry(entry, node, head, e_hash_list) { + if (entry->e_key == key && entry->e_block == block) { +@@ -250,7 +229,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_block); + void mb_cache_entry_touch(struct mb_cache *cache, + struct mb_cache_entry *entry) + { +- mb_cache_entry_set_referenced(entry); ++ entry->e_referenced = 1; + } + EXPORT_SYMBOL(mb_cache_entry_touch); + +@@ -275,8 +254,8 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, + while (nr_to_scan-- && !list_empty(&cache->c_list)) { + entry = list_first_entry(&cache->c_list, + struct mb_cache_entry, e_list); +- if (mb_cache_entry_referenced(entry)) { +- mb_cache_entry_clear_referenced(entry); ++ if (entry->e_referenced) { ++ entry->e_referenced = 0; + list_move_tail(&cache->c_list, &entry->e_list); + continue; + } +@@ -287,7 +266,7 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, + * from under us. + */ + spin_unlock(&cache->c_list_lock); +- head = mb_cache_entry_head(entry); ++ head = mb_cache_entry_head(cache, entry->e_key); + hlist_bl_lock(head); + if (!hlist_bl_unhashed(&entry->e_hash_list)) { + hlist_bl_del_init(&entry->e_hash_list); +diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h +index a74a1f3082fb..607e6968542e 100644 +--- a/include/linux/mbcache.h ++++ b/include/linux/mbcache.h +@@ -12,18 +12,14 @@ struct mb_cache; + struct mb_cache_entry { + /* List of entries in cache - protected by cache->c_list_lock */ + struct list_head e_list; +- /* Hash table list - protected by bitlock in e_hash_list_head */ ++ /* Hash table list - protected by hash chain bitlock */ + struct hlist_bl_node e_hash_list; + atomic_t e_refcnt; + /* Key in hash - stable during lifetime of the entry */ + u32 e_key; ++ u32 e_referenced:1; + /* Block number of hashed block - stable during lifetime of the entry */ + sector_t e_block; +- /* +- * Head of hash list (for list bit lock) - stable. Combined with +- * referenced bit of entry +- */ +- unsigned long _e_hash_list_head; + }; + + struct mb_cache *mb_cache_create(int bucket_bits); +-- +2.6.2 + + diff --git a/kill-ext4_mballoc_ready b/kill-ext4_mballoc_ready new file mode 100644 index 00000000..61a107b5 --- /dev/null +++ b/kill-ext4_mballoc_ready @@ -0,0 +1,65 @@ +ext4: kill ext4_mballoc_ready + +From: Andreas Gruenbacher + +This variable, introduced in commit 9c191f70, is unnecessary: it is set +once the module has been initialized correctly, and ext4_fill_super +cannot run unless the module has been initialized correctly. + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +--- + fs/ext4/super.c | 14 ++++---------- + 1 file changed, 4 insertions(+), 10 deletions(-) + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index ecc37e103435..2f550519e0aa 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -55,7 +55,6 @@ + + static struct ext4_lazy_init *ext4_li_info; + static struct mutex ext4_li_mtx; +-static int ext4_mballoc_ready; + static struct ratelimit_state ext4_mount_msg_ratelimit; + + static int ext4_load_journal(struct super_block *, struct ext4_super_block *, +@@ -3795,12 +3794,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; + + no_journal: +- if (ext4_mballoc_ready) { +- sbi->s_mb_cache = ext4_xattr_create_cache(); +- if (!sbi->s_mb_cache) { +- ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); +- goto failed_mount_wq; +- } ++ sbi->s_mb_cache = ext4_xattr_create_cache(); ++ if (!sbi->s_mb_cache) { ++ ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); ++ goto failed_mount_wq; + } + + if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) && +@@ -5361,8 +5358,6 @@ static int __init ext4_init_fs(void) + err = ext4_init_mballoc(); + if (err) + goto out2; +- else +- ext4_mballoc_ready = 1; + err = init_inodecache(); + if (err) + goto out1; +@@ -5378,7 +5373,6 @@ out: + unregister_as_ext3(); + destroy_inodecache(); + out1: +- ext4_mballoc_ready = 0; + ext4_exit_mballoc(); + out2: + ext4_exit_sysfs(); +-- +2.6.2 + + diff --git a/mbcache2-limit-cache-size b/mbcache2-limit-cache-size index 83c6f568..ced2081d 100644 --- a/mbcache2-limit-cache-size +++ b/mbcache2-limit-cache-size @@ -20,7 +20,7 @@ Signed-off-by: Theodore Ts'o 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/fs/mbcache2.c b/fs/mbcache2.c -index 4ccf0752c6d1..fe9f6f6a2953 100644 +index 5c3e1a8c38f6..3e3198d6b9d6 100644 --- a/fs/mbcache2.c +++ b/fs/mbcache2.c @@ -4,6 +4,7 @@ @@ -31,7 +31,7 @@ index 4ccf0752c6d1..fe9f6f6a2953 100644 #include /* -@@ -21,16 +22,29 @@ struct mb2_cache { +@@ -27,16 +28,29 @@ struct mb2_cache { struct hlist_bl_head *c_hash; /* log2 of hash table size */ int c_bucket_bits; @@ -61,7 +61,7 @@ index 4ccf0752c6d1..fe9f6f6a2953 100644 /* * mb2_cache_entry_create - create entry in cache * @cache - cache where the entry should be created -@@ -52,6 +66,13 @@ struct mb2_cache_entry *mb2_cache_entry_create(struct mb2_cache *cache, +@@ -55,6 +69,13 @@ int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key, struct hlist_bl_node *dup_node; struct hlist_bl_head *head; @@ -74,8 +74,8 @@ index 4ccf0752c6d1..fe9f6f6a2953 100644 + entry = kmem_cache_alloc(mb2_entry_cache, mask); if (!entry) - return ERR_PTR(-ENOMEM); -@@ -252,12 +273,9 @@ static unsigned long mb2_cache_count(struct shrinker *shrink, + return -ENOMEM; +@@ -223,12 +244,9 @@ static unsigned long mb2_cache_count(struct shrinker *shrink, } /* Shrink number of entries in cache */ @@ -90,7 +90,7 @@ index 4ccf0752c6d1..fe9f6f6a2953 100644 struct mb2_cache_entry *entry; struct hlist_bl_head *head; unsigned int shrunk = 0; -@@ -290,6 +308,25 @@ static unsigned long mb2_cache_scan(struct shrinker *shrink, +@@ -261,6 +279,25 @@ static unsigned long mb2_cache_scan(struct shrinker *shrink, return shrunk; } @@ -116,7 +116,7 @@ index 4ccf0752c6d1..fe9f6f6a2953 100644 /* * mb2_cache_create - create cache * @bucket_bits: log2 of the hash table size -@@ -309,6 +346,7 @@ struct mb2_cache *mb2_cache_create(int bucket_bits) +@@ -280,6 +317,7 @@ struct mb2_cache *mb2_cache_create(int bucket_bits) if (!cache) goto err_out; cache->c_bucket_bits = bucket_bits; @@ -124,7 +124,7 @@ index 4ccf0752c6d1..fe9f6f6a2953 100644 INIT_LIST_HEAD(&cache->c_lru_list); spin_lock_init(&cache->c_lru_list_lock); cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head), -@@ -325,6 +363,8 @@ struct mb2_cache *mb2_cache_create(int bucket_bits) +@@ -296,6 +334,8 @@ struct mb2_cache *mb2_cache_create(int bucket_bits) cache->c_shrink.seeks = DEFAULT_SEEKS; register_shrinker(&cache->c_shrink); @@ -134,6 +134,6 @@ index 4ccf0752c6d1..fe9f6f6a2953 100644 err_out: -- -2.1.4 +2.6.2 diff --git a/mbcache2-rename-to-mbcache b/mbcache2-rename-to-mbcache new file mode 100644 index 00000000..9f50f306 --- /dev/null +++ b/mbcache2-rename-to-mbcache @@ -0,0 +1,1439 @@ +mbcache2: rename to mbcache + +From: Jan Kara + +Since old mbcache code is gone, let's rename new code to mbcache since +number 2 is now meaningless. This is just a mechanical replacement. + +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +--- + fs/Makefile | 2 +- + fs/ext2/ext2.h | 4 +- + fs/ext2/xattr.c | 48 +++--- + fs/ext2/xattr.h | 8 +- + fs/ext4/ext4.h | 2 +- + fs/ext4/xattr.c | 54 +++--- + fs/ext4/xattr.h | 4 +- + fs/mbcache.c | 424 +++++++++++++++++++++++++++++++++++++++++++++++ + fs/mbcache2.c | 424 ----------------------------------------------- + include/linux/mbcache.h | 53 ++++++ + include/linux/mbcache2.h | 53 ------ + 11 files changed, 538 insertions(+), 538 deletions(-) + create mode 100644 fs/mbcache.c + delete mode 100644 fs/mbcache2.c + create mode 100644 include/linux/mbcache.h + delete mode 100644 include/linux/mbcache2.h + +diff --git a/fs/Makefile b/fs/Makefile +index 59b844007fbc..79f522575cba 100644 +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -41,7 +41,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o + obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o + obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o + +-obj-$(CONFIG_FS_MBCACHE) += mbcache2.o ++obj-$(CONFIG_FS_MBCACHE) += mbcache.o + obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o + obj-$(CONFIG_NFS_COMMON) += nfs_common/ + obj-$(CONFIG_COREDUMP) += coredump.o +diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h +index f98ce7e60a0f..170939f379d7 100644 +--- a/fs/ext2/ext2.h ++++ b/fs/ext2/ext2.h +@@ -61,7 +61,7 @@ struct ext2_block_alloc_info { + #define rsv_start rsv_window._rsv_start + #define rsv_end rsv_window._rsv_end + +-struct mb2_cache; ++struct mb_cache; + + /* + * second extended-fs super-block data in memory +@@ -113,7 +113,7 @@ struct ext2_sb_info { + * of the mount options. + */ + spinlock_t s_lock; +- struct mb2_cache *s_mb_cache; ++ struct mb_cache *s_mb_cache; + }; + + static inline spinlock_t * +diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c +index 7162b4869bc3..71d58c2d7a19 100644 +--- a/fs/ext2/xattr.c ++++ b/fs/ext2/xattr.c +@@ -56,7 +56,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +@@ -90,7 +90,7 @@ + static int ext2_xattr_set2(struct inode *, struct buffer_head *, + struct ext2_xattr_header *); + +-static int ext2_xattr_cache_insert(struct mb2_cache *, struct buffer_head *); ++static int ext2_xattr_cache_insert(struct mb_cache *, struct buffer_head *); + static struct buffer_head *ext2_xattr_cache_find(struct inode *, + struct ext2_xattr_header *); + static void ext2_xattr_rehash(struct ext2_xattr_header *, +@@ -150,7 +150,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, + size_t name_len, size; + char *end; + int error; +- struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; ++ struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; + + ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", + name_index, name, buffer, (long)buffer_size); +@@ -246,7 +246,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) + char *end; + size_t rest = buffer_size; + int error; +- struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; ++ struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; + + ea_idebug(inode, "buffer=%p, buffer_size=%ld", + buffer, (long)buffer_size); +@@ -493,8 +493,8 @@ bad_block: ext2_error(sb, "ext2_xattr_set", + * This must happen under buffer lock for + * ext2_xattr_set2() to reliably detect modified block + */ +- mb2_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache, +- hash, bh->b_blocknr); ++ mb_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache, ++ hash, bh->b_blocknr); + + /* keep the buffer locked while modifying it. */ + } else { +@@ -627,7 +627,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, + struct super_block *sb = inode->i_sb; + struct buffer_head *new_bh = NULL; + int error; +- struct mb2_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache; ++ struct mb_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache; + + if (header) { + new_bh = ext2_xattr_cache_find(inode, header); +@@ -721,8 +721,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, + * This must happen under buffer lock for + * ext2_xattr_set2() to reliably detect freed block + */ +- mb2_cache_entry_delete_block(ext2_mb_cache, +- hash, old_bh->b_blocknr); ++ mb_cache_entry_delete_block(ext2_mb_cache, ++ hash, old_bh->b_blocknr); + /* Free the old block. */ + ea_bdebug(old_bh, "freeing"); + ext2_free_blocks(inode, old_bh->b_blocknr, 1); +@@ -786,8 +786,8 @@ ext2_xattr_delete_inode(struct inode *inode) + * This must happen under buffer lock for ext2_xattr_set2() to + * reliably detect freed block + */ +- mb2_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache, +- hash, bh->b_blocknr); ++ mb_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache, ++ hash, bh->b_blocknr); + ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); + get_bh(bh); + bforget(bh); +@@ -818,12 +818,12 @@ cleanup: + * Returns 0, or a negative error number on failure. + */ + static int +-ext2_xattr_cache_insert(struct mb2_cache *cache, struct buffer_head *bh) ++ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh) + { + __u32 hash = le32_to_cpu(HDR(bh)->h_hash); + int error; + +- error = mb2_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr); ++ error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr); + if (error) { + if (error == -EBUSY) { + ea_bdebug(bh, "already in cache (%d cache entries)", +@@ -887,14 +887,14 @@ static struct buffer_head * + ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) + { + __u32 hash = le32_to_cpu(header->h_hash); +- struct mb2_cache_entry *ce; +- struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; ++ struct mb_cache_entry *ce; ++ struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; + + if (!header->h_hash) + return NULL; /* never share */ + ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); + again: +- ce = mb2_cache_entry_find_first(ext2_mb_cache, hash); ++ ce = mb_cache_entry_find_first(ext2_mb_cache, hash); + while (ce) { + struct buffer_head *bh; + +@@ -915,7 +915,7 @@ again: + * entry is still hashed is reliable. + */ + if (hlist_bl_unhashed(&ce->e_hash_list)) { +- mb2_cache_entry_put(ext2_mb_cache, ce); ++ mb_cache_entry_put(ext2_mb_cache, ce); + unlock_buffer(bh); + brelse(bh); + goto again; +@@ -928,14 +928,14 @@ again: + } else if (!ext2_xattr_cmp(header, HDR(bh))) { + ea_bdebug(bh, "b_count=%d", + atomic_read(&(bh->b_count))); +- mb2_cache_entry_touch(ext2_mb_cache, ce); +- mb2_cache_entry_put(ext2_mb_cache, ce); ++ mb_cache_entry_touch(ext2_mb_cache, ce); ++ mb_cache_entry_put(ext2_mb_cache, ce); + return bh; + } + unlock_buffer(bh); + brelse(bh); + } +- ce = mb2_cache_entry_find_next(ext2_mb_cache, ce); ++ ce = mb_cache_entry_find_next(ext2_mb_cache, ce); + } + return NULL; + } +@@ -1010,13 +1010,13 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header, + + #define HASH_BUCKET_BITS 10 + +-struct mb2_cache *ext2_xattr_create_cache(void) ++struct mb_cache *ext2_xattr_create_cache(void) + { +- return mb2_cache_create(HASH_BUCKET_BITS); ++ return mb_cache_create(HASH_BUCKET_BITS); + } + +-void ext2_xattr_destroy_cache(struct mb2_cache *cache) ++void ext2_xattr_destroy_cache(struct mb_cache *cache) + { + if (cache) +- mb2_cache_destroy(cache); ++ mb_cache_destroy(cache); + } +diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h +index 6ea38aa9563a..6f82ab1b00ca 100644 +--- a/fs/ext2/xattr.h ++++ b/fs/ext2/xattr.h +@@ -53,7 +53,7 @@ struct ext2_xattr_entry { + #define EXT2_XATTR_SIZE(size) \ + (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) + +-struct mb2_cache; ++struct mb_cache; + + # ifdef CONFIG_EXT2_FS_XATTR + +@@ -68,8 +68,8 @@ extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_ + + extern void ext2_xattr_delete_inode(struct inode *); + +-extern struct mb2_cache *ext2_xattr_create_cache(void); +-extern void ext2_xattr_destroy_cache(struct mb2_cache *cache); ++extern struct mb_cache *ext2_xattr_create_cache(void); ++extern void ext2_xattr_destroy_cache(struct mb_cache *cache); + + extern const struct xattr_handler *ext2_xattr_handlers[]; + +@@ -94,7 +94,7 @@ ext2_xattr_delete_inode(struct inode *inode) + { + } + +-static inline void ext2_xattr_destroy_cache(struct mb2_cache *cache) ++static inline void ext2_xattr_destroy_cache(struct mb_cache *cache) + { + } + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index b53cbc05b172..0662b285dc8a 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1468,7 +1468,7 @@ struct ext4_sb_info { + struct list_head s_es_list; /* List of inodes with reclaimable extents */ + long s_es_nr_inode; + struct ext4_es_stats s_es_stats; +- struct mb2_cache *s_mb_cache; ++ struct mb_cache *s_mb_cache; + spinlock_t s_es_lock ____cacheline_aligned_in_smp; + + /* Ratelimit ext4 messages. */ +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index fe9f8d6ab6c9..c6af8a7a436a 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -53,7 +53,7 @@ + #include + #include + #include +-#include ++#include + #include + #include "ext4_jbd2.h" + #include "ext4.h" +@@ -78,10 +78,10 @@ + # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) + #endif + +-static void ext4_xattr_cache_insert(struct mb2_cache *, struct buffer_head *); ++static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *); + static struct buffer_head *ext4_xattr_cache_find(struct inode *, + struct ext4_xattr_header *, +- struct mb2_cache_entry **); ++ struct mb_cache_entry **); + static void ext4_xattr_rehash(struct ext4_xattr_header *, + struct ext4_xattr_entry *); + static int ext4_xattr_list(struct dentry *dentry, char *buffer, +@@ -276,7 +276,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, + struct ext4_xattr_entry *entry; + size_t size; + int error; +- struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); ++ struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); + + ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", + name_index, name, buffer, (long)buffer_size); +@@ -428,7 +428,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) + struct inode *inode = d_inode(dentry); + struct buffer_head *bh = NULL; + int error; +- struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); ++ struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); + + ea_idebug(inode, "buffer=%p, buffer_size=%ld", + buffer, (long)buffer_size); +@@ -561,8 +561,8 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, + * This must happen under buffer lock for + * ext4_xattr_block_set() to reliably detect freed block + */ +- mb2_cache_entry_delete_block(EXT4_GET_MB_CACHE(inode), hash, +- bh->b_blocknr); ++ mb_cache_entry_delete_block(EXT4_GET_MB_CACHE(inode), hash, ++ bh->b_blocknr); + get_bh(bh); + unlock_buffer(bh); + ext4_free_blocks(handle, inode, bh, 0, 1, +@@ -782,9 +782,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + struct super_block *sb = inode->i_sb; + struct buffer_head *new_bh = NULL; + struct ext4_xattr_search *s = &bs->s; +- struct mb2_cache_entry *ce = NULL; ++ struct mb_cache_entry *ce = NULL; + int error = 0; +- struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); ++ struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); + + #define header(x) ((struct ext4_xattr_header *)(x)) + +@@ -805,8 +805,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + * ext4_xattr_block_set() to reliably detect modified + * block + */ +- mb2_cache_entry_delete_block(ext4_mb_cache, hash, +- bs->bh->b_blocknr); ++ mb_cache_entry_delete_block(ext4_mb_cache, hash, ++ bs->bh->b_blocknr); + ea_bdebug(bs->bh, "modifying in-place"); + error = ext4_xattr_set_entry(i, s); + if (!error) { +@@ -904,7 +904,7 @@ inserted: + EXT4_C2B(EXT4_SB(sb), + 1)); + brelse(new_bh); +- mb2_cache_entry_put(ext4_mb_cache, ce); ++ mb_cache_entry_put(ext4_mb_cache, ce); + ce = NULL; + new_bh = NULL; + goto inserted; +@@ -919,8 +919,8 @@ inserted: + if (error) + goto cleanup_dquot; + } +- mb2_cache_entry_touch(ext4_mb_cache, ce); +- mb2_cache_entry_put(ext4_mb_cache, ce); ++ mb_cache_entry_touch(ext4_mb_cache, ce); ++ mb_cache_entry_put(ext4_mb_cache, ce); + ce = NULL; + } else if (bs->bh && s->base == bs->bh->b_data) { + /* We were modifying this block in-place. */ +@@ -985,7 +985,7 @@ getblk_failed: + + cleanup: + if (ce) +- mb2_cache_entry_put(ext4_mb_cache, ce); ++ mb_cache_entry_put(ext4_mb_cache, ce); + brelse(new_bh); + if (!(bs->bh && s->base == bs->bh->b_data)) + kfree(s->base); +@@ -1546,13 +1546,13 @@ cleanup: + * Returns 0, or a negative error number on failure. + */ + static void +-ext4_xattr_cache_insert(struct mb2_cache *ext4_mb_cache, struct buffer_head *bh) ++ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh) + { + __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); + int error; + +- error = mb2_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash, +- bh->b_blocknr); ++ error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash, ++ bh->b_blocknr); + if (error) { + if (error == -EBUSY) + ea_bdebug(bh, "already in cache"); +@@ -1610,16 +1610,16 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1, + */ + static struct buffer_head * + ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, +- struct mb2_cache_entry **pce) ++ struct mb_cache_entry **pce) + { + __u32 hash = le32_to_cpu(header->h_hash); +- struct mb2_cache_entry *ce; +- struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); ++ struct mb_cache_entry *ce; ++ struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); + + if (!header->h_hash) + return NULL; /* never share */ + ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); +- ce = mb2_cache_entry_find_first(ext4_mb_cache, hash); ++ ce = mb_cache_entry_find_first(ext4_mb_cache, hash); + while (ce) { + struct buffer_head *bh; + +@@ -1638,7 +1638,7 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, + return bh; + } + brelse(bh); +- ce = mb2_cache_entry_find_next(ext4_mb_cache, ce); ++ ce = mb_cache_entry_find_next(ext4_mb_cache, ce); + } + return NULL; + } +@@ -1713,15 +1713,15 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, + + #define HASH_BUCKET_BITS 10 + +-struct mb2_cache * ++struct mb_cache * + ext4_xattr_create_cache(void) + { +- return mb2_cache_create(HASH_BUCKET_BITS); ++ return mb_cache_create(HASH_BUCKET_BITS); + } + +-void ext4_xattr_destroy_cache(struct mb2_cache *cache) ++void ext4_xattr_destroy_cache(struct mb_cache *cache) + { + if (cache) +- mb2_cache_destroy(cache); ++ mb_cache_destroy(cache); + } + +diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h +index 10b0f7323ed6..69dd3e6566e0 100644 +--- a/fs/ext4/xattr.h ++++ b/fs/ext4/xattr.h +@@ -123,8 +123,8 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, + struct ext4_xattr_info *i, + struct ext4_xattr_ibody_find *is); + +-extern struct mb2_cache *ext4_xattr_create_cache(void); +-extern void ext4_xattr_destroy_cache(struct mb2_cache *); ++extern struct mb_cache *ext4_xattr_create_cache(void); ++extern void ext4_xattr_destroy_cache(struct mb_cache *); + + #ifdef CONFIG_EXT4_FS_SECURITY + extern int ext4_init_security(handle_t *handle, struct inode *inode, +diff --git a/fs/mbcache.c b/fs/mbcache.c +new file mode 100644 +index 000000000000..4241b633f155 +--- /dev/null ++++ b/fs/mbcache.c +@@ -0,0 +1,424 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * Mbcache is a simple key-value store. Keys need not be unique, however ++ * key-value pairs are expected to be unique (we use this fact in ++ * mb_cache_entry_delete_block()). ++ * ++ * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. ++ * They use hash of a block contents as a key and block number as a value. ++ * That's why keys need not be unique (different xattr blocks may end up having ++ * the same hash). However block number always uniquely identifies a cache ++ * entry. ++ * ++ * We provide functions for creation and removal of entries, search by key, ++ * and a special "delete entry with given key-value pair" operation. Fixed ++ * size hash table is used for fast key lookups. ++ */ ++ ++struct mb_cache { ++ /* Hash table of entries */ ++ struct hlist_bl_head *c_hash; ++ /* log2 of hash table size */ ++ int c_bucket_bits; ++ /* Maximum entries in cache to avoid degrading hash too much */ ++ int c_max_entries; ++ /* Protects c_list, c_entry_count */ ++ spinlock_t c_list_lock; ++ struct list_head c_list; ++ /* Number of entries in cache */ ++ unsigned long c_entry_count; ++ struct shrinker c_shrink; ++ /* Work for shrinking when the cache has too many entries */ ++ struct work_struct c_shrink_work; ++}; ++ ++static struct kmem_cache *mb_entry_cache; ++ ++static unsigned long mb_cache_shrink(struct mb_cache *cache, ++ unsigned int nr_to_scan); ++ ++static inline bool mb_cache_entry_referenced(struct mb_cache_entry *entry) ++{ ++ return entry->_e_hash_list_head & 1; ++} ++ ++static inline void mb_cache_entry_set_referenced(struct mb_cache_entry *entry) ++{ ++ entry->_e_hash_list_head |= 1; ++} ++ ++static inline void mb_cache_entry_clear_referenced( ++ struct mb_cache_entry *entry) ++{ ++ entry->_e_hash_list_head &= ~1; ++} ++ ++static inline struct hlist_bl_head *mb_cache_entry_head( ++ struct mb_cache_entry *entry) ++{ ++ return (struct hlist_bl_head *) ++ (entry->_e_hash_list_head & ~1); ++} ++ ++/* ++ * Number of entries to reclaim synchronously when there are too many entries ++ * in cache ++ */ ++#define SYNC_SHRINK_BATCH 64 ++ ++/* ++ * mb_cache_entry_create - create entry in cache ++ * @cache - cache where the entry should be created ++ * @mask - gfp mask with which the entry should be allocated ++ * @key - key of the entry ++ * @block - block that contains data ++ * ++ * Creates entry in @cache with key @key and records that data is stored in ++ * block @block. The function returns -EBUSY if entry with the same key ++ * and for the same block already exists in cache. Otherwise 0 is returned. ++ */ ++int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, ++ sector_t block) ++{ ++ struct mb_cache_entry *entry, *dup; ++ struct hlist_bl_node *dup_node; ++ struct hlist_bl_head *head; ++ ++ /* Schedule background reclaim if there are too many entries */ ++ if (cache->c_entry_count >= cache->c_max_entries) ++ schedule_work(&cache->c_shrink_work); ++ /* Do some sync reclaim if background reclaim cannot keep up */ ++ if (cache->c_entry_count >= 2*cache->c_max_entries) ++ mb_cache_shrink(cache, SYNC_SHRINK_BATCH); ++ ++ entry = kmem_cache_alloc(mb_entry_cache, mask); ++ if (!entry) ++ return -ENOMEM; ++ ++ INIT_LIST_HEAD(&entry->e_list); ++ /* One ref for hash, one ref returned */ ++ atomic_set(&entry->e_refcnt, 1); ++ entry->e_key = key; ++ entry->e_block = block; ++ head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; ++ entry->_e_hash_list_head = (unsigned long)head; ++ hlist_bl_lock(head); ++ hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { ++ if (dup->e_key == key && dup->e_block == block) { ++ hlist_bl_unlock(head); ++ kmem_cache_free(mb_entry_cache, entry); ++ return -EBUSY; ++ } ++ } ++ hlist_bl_add_head(&entry->e_hash_list, head); ++ hlist_bl_unlock(head); ++ ++ spin_lock(&cache->c_list_lock); ++ list_add_tail(&entry->e_list, &cache->c_list); ++ /* Grab ref for LRU list */ ++ atomic_inc(&entry->e_refcnt); ++ cache->c_entry_count++; ++ spin_unlock(&cache->c_list_lock); ++ ++ return 0; ++} ++EXPORT_SYMBOL(mb_cache_entry_create); ++ ++void __mb_cache_entry_free(struct mb_cache_entry *entry) ++{ ++ kmem_cache_free(mb_entry_cache, entry); ++} ++EXPORT_SYMBOL(__mb_cache_entry_free); ++ ++static struct mb_cache_entry *__entry_find(struct mb_cache *cache, ++ struct mb_cache_entry *entry, ++ u32 key) ++{ ++ struct mb_cache_entry *old_entry = entry; ++ struct hlist_bl_node *node; ++ struct hlist_bl_head *head; ++ ++ if (entry) ++ head = mb_cache_entry_head(entry); ++ else ++ head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; ++ hlist_bl_lock(head); ++ if (entry && !hlist_bl_unhashed(&entry->e_hash_list)) ++ node = entry->e_hash_list.next; ++ else ++ node = hlist_bl_first(head); ++ while (node) { ++ entry = hlist_bl_entry(node, struct mb_cache_entry, ++ e_hash_list); ++ if (entry->e_key == key) { ++ atomic_inc(&entry->e_refcnt); ++ goto out; ++ } ++ node = node->next; ++ } ++ entry = NULL; ++out: ++ hlist_bl_unlock(head); ++ if (old_entry) ++ mb_cache_entry_put(cache, old_entry); ++ ++ return entry; ++} ++ ++/* ++ * mb_cache_entry_find_first - find the first entry in cache with given key ++ * @cache: cache where we should search ++ * @key: key to look for ++ * ++ * Search in @cache for entry with key @key. Grabs reference to the first ++ * entry found and returns the entry. ++ */ ++struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, ++ u32 key) ++{ ++ return __entry_find(cache, NULL, key); ++} ++EXPORT_SYMBOL(mb_cache_entry_find_first); ++ ++/* ++ * mb_cache_entry_find_next - find next entry in cache with the same ++ * @cache: cache where we should search ++ * @entry: entry to start search from ++ * ++ * Finds next entry in the hash chain which has the same key as @entry. ++ * If @entry is unhashed (which can happen when deletion of entry races ++ * with the search), finds the first entry in the hash chain. The function ++ * drops reference to @entry and returns with a reference to the found entry. ++ */ ++struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache, ++ struct mb_cache_entry *entry) ++{ ++ return __entry_find(cache, entry, entry->e_key); ++} ++EXPORT_SYMBOL(mb_cache_entry_find_next); ++ ++/* mb_cache_entry_delete_block - remove information about block from cache ++ * @cache - cache we work with ++ * @key - key of the entry to remove ++ * @block - block containing data for @key ++ * ++ * Remove entry from cache @cache with key @key with data stored in @block. ++ */ ++void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key, ++ sector_t block) ++{ ++ struct hlist_bl_node *node; ++ struct hlist_bl_head *head; ++ struct mb_cache_entry *entry; ++ ++ head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; ++ hlist_bl_lock(head); ++ hlist_bl_for_each_entry(entry, node, head, e_hash_list) { ++ if (entry->e_key == key && entry->e_block == block) { ++ /* We keep hash list reference to keep entry alive */ ++ hlist_bl_del_init(&entry->e_hash_list); ++ hlist_bl_unlock(head); ++ spin_lock(&cache->c_list_lock); ++ if (!list_empty(&entry->e_list)) { ++ list_del_init(&entry->e_list); ++ cache->c_entry_count--; ++ atomic_dec(&entry->e_refcnt); ++ } ++ spin_unlock(&cache->c_list_lock); ++ mb_cache_entry_put(cache, entry); ++ return; ++ } ++ } ++ hlist_bl_unlock(head); ++} ++EXPORT_SYMBOL(mb_cache_entry_delete_block); ++ ++/* mb_cache_entry_touch - cache entry got used ++ * @cache - cache the entry belongs to ++ * @entry - entry that got used ++ * ++ * Marks entry as used to give hit higher chances of surviving in cache. ++ */ ++void mb_cache_entry_touch(struct mb_cache *cache, ++ struct mb_cache_entry *entry) ++{ ++ mb_cache_entry_set_referenced(entry); ++} ++EXPORT_SYMBOL(mb_cache_entry_touch); ++ ++static unsigned long mb_cache_count(struct shrinker *shrink, ++ struct shrink_control *sc) ++{ ++ struct mb_cache *cache = container_of(shrink, struct mb_cache, ++ c_shrink); ++ ++ return cache->c_entry_count; ++} ++ ++/* Shrink number of entries in cache */ ++static unsigned long mb_cache_shrink(struct mb_cache *cache, ++ unsigned int nr_to_scan) ++{ ++ struct mb_cache_entry *entry; ++ struct hlist_bl_head *head; ++ unsigned int shrunk = 0; ++ ++ spin_lock(&cache->c_list_lock); ++ while (nr_to_scan-- && !list_empty(&cache->c_list)) { ++ entry = list_first_entry(&cache->c_list, ++ struct mb_cache_entry, e_list); ++ if (mb_cache_entry_referenced(entry)) { ++ mb_cache_entry_clear_referenced(entry); ++ list_move_tail(&cache->c_list, &entry->e_list); ++ continue; ++ } ++ list_del_init(&entry->e_list); ++ cache->c_entry_count--; ++ /* ++ * We keep LRU list reference so that entry doesn't go away ++ * from under us. ++ */ ++ spin_unlock(&cache->c_list_lock); ++ head = mb_cache_entry_head(entry); ++ hlist_bl_lock(head); ++ if (!hlist_bl_unhashed(&entry->e_hash_list)) { ++ hlist_bl_del_init(&entry->e_hash_list); ++ atomic_dec(&entry->e_refcnt); ++ } ++ hlist_bl_unlock(head); ++ if (mb_cache_entry_put(cache, entry)) ++ shrunk++; ++ cond_resched(); ++ spin_lock(&cache->c_list_lock); ++ } ++ spin_unlock(&cache->c_list_lock); ++ ++ return shrunk; ++} ++ ++static unsigned long mb_cache_scan(struct shrinker *shrink, ++ struct shrink_control *sc) ++{ ++ int nr_to_scan = sc->nr_to_scan; ++ struct mb_cache *cache = container_of(shrink, struct mb_cache, ++ c_shrink); ++ return mb_cache_shrink(cache, nr_to_scan); ++} ++ ++/* We shrink 1/X of the cache when we have too many entries in it */ ++#define SHRINK_DIVISOR 16 ++ ++static void mb_cache_shrink_worker(struct work_struct *work) ++{ ++ struct mb_cache *cache = container_of(work, struct mb_cache, ++ c_shrink_work); ++ mb_cache_shrink(cache, cache->c_max_entries / SHRINK_DIVISOR); ++} ++ ++/* ++ * mb_cache_create - create cache ++ * @bucket_bits: log2 of the hash table size ++ * ++ * Create cache for keys with 2^bucket_bits hash entries. ++ */ ++struct mb_cache *mb_cache_create(int bucket_bits) ++{ ++ struct mb_cache *cache; ++ int bucket_count = 1 << bucket_bits; ++ int i; ++ ++ if (!try_module_get(THIS_MODULE)) ++ return NULL; ++ ++ cache = kzalloc(sizeof(struct mb_cache), GFP_KERNEL); ++ if (!cache) ++ goto err_out; ++ cache->c_bucket_bits = bucket_bits; ++ cache->c_max_entries = bucket_count << 4; ++ INIT_LIST_HEAD(&cache->c_list); ++ spin_lock_init(&cache->c_list_lock); ++ cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head), ++ GFP_KERNEL); ++ if (!cache->c_hash) { ++ kfree(cache); ++ goto err_out; ++ } ++ for (i = 0; i < bucket_count; i++) ++ INIT_HLIST_BL_HEAD(&cache->c_hash[i]); ++ ++ cache->c_shrink.count_objects = mb_cache_count; ++ cache->c_shrink.scan_objects = mb_cache_scan; ++ cache->c_shrink.seeks = DEFAULT_SEEKS; ++ register_shrinker(&cache->c_shrink); ++ ++ INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker); ++ ++ return cache; ++ ++err_out: ++ module_put(THIS_MODULE); ++ return NULL; ++} ++EXPORT_SYMBOL(mb_cache_create); ++ ++/* ++ * mb_cache_destroy - destroy cache ++ * @cache: the cache to destroy ++ * ++ * Free all entries in cache and cache itself. Caller must make sure nobody ++ * (except shrinker) can reach @cache when calling this. ++ */ ++void mb_cache_destroy(struct mb_cache *cache) ++{ ++ struct mb_cache_entry *entry, *next; ++ ++ unregister_shrinker(&cache->c_shrink); ++ ++ /* ++ * We don't bother with any locking. Cache must not be used at this ++ * point. ++ */ ++ list_for_each_entry_safe(entry, next, &cache->c_list, e_list) { ++ if (!hlist_bl_unhashed(&entry->e_hash_list)) { ++ hlist_bl_del_init(&entry->e_hash_list); ++ atomic_dec(&entry->e_refcnt); ++ } else ++ WARN_ON(1); ++ list_del(&entry->e_list); ++ WARN_ON(atomic_read(&entry->e_refcnt) != 1); ++ mb_cache_entry_put(cache, entry); ++ } ++ kfree(cache->c_hash); ++ kfree(cache); ++ module_put(THIS_MODULE); ++} ++EXPORT_SYMBOL(mb_cache_destroy); ++ ++static int __init mbcache_init(void) ++{ ++ mb_entry_cache = kmem_cache_create("mbcache", ++ sizeof(struct mb_cache_entry), 0, ++ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); ++ BUG_ON(!mb_entry_cache); ++ return 0; ++} ++ ++static void __exit mbcache_exit(void) ++{ ++ kmem_cache_destroy(mb_entry_cache); ++} ++ ++module_init(mbcache_init) ++module_exit(mbcache_exit) ++ ++MODULE_AUTHOR("Jan Kara "); ++MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); ++MODULE_LICENSE("GPL"); +diff --git a/fs/mbcache2.c b/fs/mbcache2.c +deleted file mode 100644 +index 49f7a6feaa83..000000000000 +--- a/fs/mbcache2.c ++++ /dev/null +@@ -1,424 +0,0 @@ +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-/* +- * Mbcache is a simple key-value store. Keys need not be unique, however +- * key-value pairs are expected to be unique (we use this fact in +- * mb2_cache_entry_delete_block()). +- * +- * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. +- * They use hash of a block contents as a key and block number as a value. +- * That's why keys need not be unique (different xattr blocks may end up having +- * the same hash). However block number always uniquely identifies a cache +- * entry. +- * +- * We provide functions for creation and removal of entries, search by key, +- * and a special "delete entry with given key-value pair" operation. Fixed +- * size hash table is used for fast key lookups. +- */ +- +-struct mb2_cache { +- /* Hash table of entries */ +- struct hlist_bl_head *c_hash; +- /* log2 of hash table size */ +- int c_bucket_bits; +- /* Maximum entries in cache to avoid degrading hash too much */ +- int c_max_entries; +- /* Protects c_list, c_entry_count */ +- spinlock_t c_list_lock; +- struct list_head c_list; +- /* Number of entries in cache */ +- unsigned long c_entry_count; +- struct shrinker c_shrink; +- /* Work for shrinking when the cache has too many entries */ +- struct work_struct c_shrink_work; +-}; +- +-static struct kmem_cache *mb2_entry_cache; +- +-static unsigned long mb2_cache_shrink(struct mb2_cache *cache, +- unsigned int nr_to_scan); +- +-static inline bool mb2_cache_entry_referenced(struct mb2_cache_entry *entry) +-{ +- return entry->_e_hash_list_head & 1; +-} +- +-static inline void mb2_cache_entry_set_referenced(struct mb2_cache_entry *entry) +-{ +- entry->_e_hash_list_head |= 1; +-} +- +-static inline void mb2_cache_entry_clear_referenced( +- struct mb2_cache_entry *entry) +-{ +- entry->_e_hash_list_head &= ~1; +-} +- +-static inline struct hlist_bl_head *mb2_cache_entry_head( +- struct mb2_cache_entry *entry) +-{ +- return (struct hlist_bl_head *) +- (entry->_e_hash_list_head & ~1); +-} +- +-/* +- * Number of entries to reclaim synchronously when there are too many entries +- * in cache +- */ +-#define SYNC_SHRINK_BATCH 64 +- +-/* +- * mb2_cache_entry_create - create entry in cache +- * @cache - cache where the entry should be created +- * @mask - gfp mask with which the entry should be allocated +- * @key - key of the entry +- * @block - block that contains data +- * +- * Creates entry in @cache with key @key and records that data is stored in +- * block @block. The function returns -EBUSY if entry with the same key +- * and for the same block already exists in cache. Otherwise 0 is returned. +- */ +-int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key, +- sector_t block) +-{ +- struct mb2_cache_entry *entry, *dup; +- struct hlist_bl_node *dup_node; +- struct hlist_bl_head *head; +- +- /* Schedule background reclaim if there are too many entries */ +- if (cache->c_entry_count >= cache->c_max_entries) +- schedule_work(&cache->c_shrink_work); +- /* Do some sync reclaim if background reclaim cannot keep up */ +- if (cache->c_entry_count >= 2*cache->c_max_entries) +- mb2_cache_shrink(cache, SYNC_SHRINK_BATCH); +- +- entry = kmem_cache_alloc(mb2_entry_cache, mask); +- if (!entry) +- return -ENOMEM; +- +- INIT_LIST_HEAD(&entry->e_list); +- /* One ref for hash, one ref returned */ +- atomic_set(&entry->e_refcnt, 1); +- entry->e_key = key; +- entry->e_block = block; +- head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; +- entry->_e_hash_list_head = (unsigned long)head; +- hlist_bl_lock(head); +- hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { +- if (dup->e_key == key && dup->e_block == block) { +- hlist_bl_unlock(head); +- kmem_cache_free(mb2_entry_cache, entry); +- return -EBUSY; +- } +- } +- hlist_bl_add_head(&entry->e_hash_list, head); +- hlist_bl_unlock(head); +- +- spin_lock(&cache->c_list_lock); +- list_add_tail(&entry->e_list, &cache->c_list); +- /* Grab ref for LRU list */ +- atomic_inc(&entry->e_refcnt); +- cache->c_entry_count++; +- spin_unlock(&cache->c_list_lock); +- +- return 0; +-} +-EXPORT_SYMBOL(mb2_cache_entry_create); +- +-void __mb2_cache_entry_free(struct mb2_cache_entry *entry) +-{ +- kmem_cache_free(mb2_entry_cache, entry); +-} +-EXPORT_SYMBOL(__mb2_cache_entry_free); +- +-static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache, +- struct mb2_cache_entry *entry, +- u32 key) +-{ +- struct mb2_cache_entry *old_entry = entry; +- struct hlist_bl_node *node; +- struct hlist_bl_head *head; +- +- if (entry) +- head = mb2_cache_entry_head(entry); +- else +- head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; +- hlist_bl_lock(head); +- if (entry && !hlist_bl_unhashed(&entry->e_hash_list)) +- node = entry->e_hash_list.next; +- else +- node = hlist_bl_first(head); +- while (node) { +- entry = hlist_bl_entry(node, struct mb2_cache_entry, +- e_hash_list); +- if (entry->e_key == key) { +- atomic_inc(&entry->e_refcnt); +- goto out; +- } +- node = node->next; +- } +- entry = NULL; +-out: +- hlist_bl_unlock(head); +- if (old_entry) +- mb2_cache_entry_put(cache, old_entry); +- +- return entry; +-} +- +-/* +- * mb2_cache_entry_find_first - find the first entry in cache with given key +- * @cache: cache where we should search +- * @key: key to look for +- * +- * Search in @cache for entry with key @key. Grabs reference to the first +- * entry found and returns the entry. +- */ +-struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache, +- u32 key) +-{ +- return __entry_find(cache, NULL, key); +-} +-EXPORT_SYMBOL(mb2_cache_entry_find_first); +- +-/* +- * mb2_cache_entry_find_next - find next entry in cache with the same +- * @cache: cache where we should search +- * @entry: entry to start search from +- * +- * Finds next entry in the hash chain which has the same key as @entry. +- * If @entry is unhashed (which can happen when deletion of entry races +- * with the search), finds the first entry in the hash chain. The function +- * drops reference to @entry and returns with a reference to the found entry. +- */ +-struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache, +- struct mb2_cache_entry *entry) +-{ +- return __entry_find(cache, entry, entry->e_key); +-} +-EXPORT_SYMBOL(mb2_cache_entry_find_next); +- +-/* mb2_cache_entry_delete_block - remove information about block from cache +- * @cache - cache we work with +- * @key - key of the entry to remove +- * @block - block containing data for @key +- * +- * Remove entry from cache @cache with key @key with data stored in @block. +- */ +-void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key, +- sector_t block) +-{ +- struct hlist_bl_node *node; +- struct hlist_bl_head *head; +- struct mb2_cache_entry *entry; +- +- head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; +- hlist_bl_lock(head); +- hlist_bl_for_each_entry(entry, node, head, e_hash_list) { +- if (entry->e_key == key && entry->e_block == block) { +- /* We keep hash list reference to keep entry alive */ +- hlist_bl_del_init(&entry->e_hash_list); +- hlist_bl_unlock(head); +- spin_lock(&cache->c_list_lock); +- if (!list_empty(&entry->e_list)) { +- list_del_init(&entry->e_list); +- cache->c_entry_count--; +- atomic_dec(&entry->e_refcnt); +- } +- spin_unlock(&cache->c_list_lock); +- mb2_cache_entry_put(cache, entry); +- return; +- } +- } +- hlist_bl_unlock(head); +-} +-EXPORT_SYMBOL(mb2_cache_entry_delete_block); +- +-/* mb2_cache_entry_touch - cache entry got used +- * @cache - cache the entry belongs to +- * @entry - entry that got used +- * +- * Marks entry as used to give hit higher chances of surviving in cache. +- */ +-void mb2_cache_entry_touch(struct mb2_cache *cache, +- struct mb2_cache_entry *entry) +-{ +- mb2_cache_entry_set_referenced(entry); +-} +-EXPORT_SYMBOL(mb2_cache_entry_touch); +- +-static unsigned long mb2_cache_count(struct shrinker *shrink, +- struct shrink_control *sc) +-{ +- struct mb2_cache *cache = container_of(shrink, struct mb2_cache, +- c_shrink); +- +- return cache->c_entry_count; +-} +- +-/* Shrink number of entries in cache */ +-static unsigned long mb2_cache_shrink(struct mb2_cache *cache, +- unsigned int nr_to_scan) +-{ +- struct mb2_cache_entry *entry; +- struct hlist_bl_head *head; +- unsigned int shrunk = 0; +- +- spin_lock(&cache->c_list_lock); +- while (nr_to_scan-- && !list_empty(&cache->c_list)) { +- entry = list_first_entry(&cache->c_list, +- struct mb2_cache_entry, e_list); +- if (mb2_cache_entry_referenced(entry)) { +- mb2_cache_entry_clear_referenced(entry); +- list_move_tail(&cache->c_list, &entry->e_list); +- continue; +- } +- list_del_init(&entry->e_list); +- cache->c_entry_count--; +- /* +- * We keep LRU list reference so that entry doesn't go away +- * from under us. +- */ +- spin_unlock(&cache->c_list_lock); +- head = mb2_cache_entry_head(entry); +- hlist_bl_lock(head); +- if (!hlist_bl_unhashed(&entry->e_hash_list)) { +- hlist_bl_del_init(&entry->e_hash_list); +- atomic_dec(&entry->e_refcnt); +- } +- hlist_bl_unlock(head); +- if (mb2_cache_entry_put(cache, entry)) +- shrunk++; +- cond_resched(); +- spin_lock(&cache->c_list_lock); +- } +- spin_unlock(&cache->c_list_lock); +- +- return shrunk; +-} +- +-static unsigned long mb2_cache_scan(struct shrinker *shrink, +- struct shrink_control *sc) +-{ +- int nr_to_scan = sc->nr_to_scan; +- struct mb2_cache *cache = container_of(shrink, struct mb2_cache, +- c_shrink); +- return mb2_cache_shrink(cache, nr_to_scan); +-} +- +-/* We shrink 1/X of the cache when we have too many entries in it */ +-#define SHRINK_DIVISOR 16 +- +-static void mb2_cache_shrink_worker(struct work_struct *work) +-{ +- struct mb2_cache *cache = container_of(work, struct mb2_cache, +- c_shrink_work); +- mb2_cache_shrink(cache, cache->c_max_entries / SHRINK_DIVISOR); +-} +- +-/* +- * mb2_cache_create - create cache +- * @bucket_bits: log2 of the hash table size +- * +- * Create cache for keys with 2^bucket_bits hash entries. +- */ +-struct mb2_cache *mb2_cache_create(int bucket_bits) +-{ +- struct mb2_cache *cache; +- int bucket_count = 1 << bucket_bits; +- int i; +- +- if (!try_module_get(THIS_MODULE)) +- return NULL; +- +- cache = kzalloc(sizeof(struct mb2_cache), GFP_KERNEL); +- if (!cache) +- goto err_out; +- cache->c_bucket_bits = bucket_bits; +- cache->c_max_entries = bucket_count << 4; +- INIT_LIST_HEAD(&cache->c_list); +- spin_lock_init(&cache->c_list_lock); +- cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head), +- GFP_KERNEL); +- if (!cache->c_hash) { +- kfree(cache); +- goto err_out; +- } +- for (i = 0; i < bucket_count; i++) +- INIT_HLIST_BL_HEAD(&cache->c_hash[i]); +- +- cache->c_shrink.count_objects = mb2_cache_count; +- cache->c_shrink.scan_objects = mb2_cache_scan; +- cache->c_shrink.seeks = DEFAULT_SEEKS; +- register_shrinker(&cache->c_shrink); +- +- INIT_WORK(&cache->c_shrink_work, mb2_cache_shrink_worker); +- +- return cache; +- +-err_out: +- module_put(THIS_MODULE); +- return NULL; +-} +-EXPORT_SYMBOL(mb2_cache_create); +- +-/* +- * mb2_cache_destroy - destroy cache +- * @cache: the cache to destroy +- * +- * Free all entries in cache and cache itself. Caller must make sure nobody +- * (except shrinker) can reach @cache when calling this. +- */ +-void mb2_cache_destroy(struct mb2_cache *cache) +-{ +- struct mb2_cache_entry *entry, *next; +- +- unregister_shrinker(&cache->c_shrink); +- +- /* +- * We don't bother with any locking. Cache must not be used at this +- * point. +- */ +- list_for_each_entry_safe(entry, next, &cache->c_list, e_list) { +- if (!hlist_bl_unhashed(&entry->e_hash_list)) { +- hlist_bl_del_init(&entry->e_hash_list); +- atomic_dec(&entry->e_refcnt); +- } else +- WARN_ON(1); +- list_del(&entry->e_list); +- WARN_ON(atomic_read(&entry->e_refcnt) != 1); +- mb2_cache_entry_put(cache, entry); +- } +- kfree(cache->c_hash); +- kfree(cache); +- module_put(THIS_MODULE); +-} +-EXPORT_SYMBOL(mb2_cache_destroy); +- +-static int __init mb2cache_init(void) +-{ +- mb2_entry_cache = kmem_cache_create("mbcache", +- sizeof(struct mb2_cache_entry), 0, +- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); +- BUG_ON(!mb2_entry_cache); +- return 0; +-} +- +-static void __exit mb2cache_exit(void) +-{ +- kmem_cache_destroy(mb2_entry_cache); +-} +- +-module_init(mb2cache_init) +-module_exit(mb2cache_exit) +- +-MODULE_AUTHOR("Jan Kara "); +-MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); +-MODULE_LICENSE("GPL"); +diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h +new file mode 100644 +index 000000000000..a74a1f3082fb +--- /dev/null ++++ b/include/linux/mbcache.h +@@ -0,0 +1,53 @@ ++#ifndef _LINUX_MBCACHE_H ++#define _LINUX_MBCACHE_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++struct mb_cache; ++ ++struct mb_cache_entry { ++ /* List of entries in cache - protected by cache->c_list_lock */ ++ struct list_head e_list; ++ /* Hash table list - protected by bitlock in e_hash_list_head */ ++ struct hlist_bl_node e_hash_list; ++ atomic_t e_refcnt; ++ /* Key in hash - stable during lifetime of the entry */ ++ u32 e_key; ++ /* Block number of hashed block - stable during lifetime of the entry */ ++ sector_t e_block; ++ /* ++ * Head of hash list (for list bit lock) - stable. Combined with ++ * referenced bit of entry ++ */ ++ unsigned long _e_hash_list_head; ++}; ++ ++struct mb_cache *mb_cache_create(int bucket_bits); ++void mb_cache_destroy(struct mb_cache *cache); ++ ++int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, ++ sector_t block); ++void __mb_cache_entry_free(struct mb_cache_entry *entry); ++static inline int mb_cache_entry_put(struct mb_cache *cache, ++ struct mb_cache_entry *entry) ++{ ++ if (!atomic_dec_and_test(&entry->e_refcnt)) ++ return 0; ++ __mb_cache_entry_free(entry); ++ return 1; ++} ++ ++void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key, ++ sector_t block); ++struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, ++ u32 key); ++struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache, ++ struct mb_cache_entry *entry); ++void mb_cache_entry_touch(struct mb_cache *cache, ++ struct mb_cache_entry *entry); ++ ++#endif /* _LINUX_MBCACHE_H */ +diff --git a/include/linux/mbcache2.h b/include/linux/mbcache2.h +deleted file mode 100644 +index c934843a6a31..000000000000 +--- a/include/linux/mbcache2.h ++++ /dev/null +@@ -1,53 +0,0 @@ +-#ifndef _LINUX_MB2CACHE_H +-#define _LINUX_MB2CACHE_H +- +-#include +-#include +-#include +-#include +-#include +- +-struct mb2_cache; +- +-struct mb2_cache_entry { +- /* List of entries in cache - protected by cache->c_list_lock */ +- struct list_head e_list; +- /* Hash table list - protected by bitlock in e_hash_list_head */ +- struct hlist_bl_node e_hash_list; +- atomic_t e_refcnt; +- /* Key in hash - stable during lifetime of the entry */ +- u32 e_key; +- /* Block number of hashed block - stable during lifetime of the entry */ +- sector_t e_block; +- /* +- * Head of hash list (for list bit lock) - stable. Combined with +- * referenced bit of entry +- */ +- unsigned long _e_hash_list_head; +-}; +- +-struct mb2_cache *mb2_cache_create(int bucket_bits); +-void mb2_cache_destroy(struct mb2_cache *cache); +- +-int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key, +- sector_t block); +-void __mb2_cache_entry_free(struct mb2_cache_entry *entry); +-static inline int mb2_cache_entry_put(struct mb2_cache *cache, +- struct mb2_cache_entry *entry) +-{ +- if (!atomic_dec_and_test(&entry->e_refcnt)) +- return 0; +- __mb2_cache_entry_free(entry); +- return 1; +-} +- +-void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key, +- sector_t block); +-struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache, +- u32 key); +-struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache, +- struct mb2_cache_entry *entry); +-void mb2_cache_entry_touch(struct mb2_cache *cache, +- struct mb2_cache_entry *entry); +- +-#endif /* _LINUX_MB2CACHE_H */ +-- +2.6.2 + + diff --git a/mbcache2-use-referenced-bit-instead-of-LRU b/mbcache2-use-referenced-bit-instead-of-LRU index 1711ffdc..2d488a28 100644 --- a/mbcache2-use-referenced-bit-instead-of-LRU +++ b/mbcache2-use-referenced-bit-instead-of-LRU @@ -1,4 +1,4 @@ -mbcache2: use referenced bit instead of LRU +mbcache2: Use referenced bit instead of LRU From: Jan Kara @@ -8,7 +8,8 @@ list are relatively expensive. In this patch we switch to lazy updates of LRU list. Whenever entry gets used, we set a referenced bit in it. When reclaiming entries, we give -referenced entries another round in the LRU. +referenced entries another round in the LRU. Since the list is not a +real LRU anymore, rename it to just 'list'. In my testing this logic gives about 30% boost to workloads with mostly unique xattr blocks (e.g. xattr-bench with 10 files and 10000 unique @@ -17,15 +18,28 @@ xattr values). Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- - fs/mbcache2.c | 41 +++++++++++++++++++++++++++++++++-------- - include/linux/mbcache2.h | 7 +++++-- - 2 files changed, 38 insertions(+), 10 deletions(-) + fs/mbcache2.c | 87 +++++++++++++++++++++++++++++++----------------- + include/linux/mbcache2.h | 11 +++--- + 2 files changed, 63 insertions(+), 35 deletions(-) diff --git a/fs/mbcache2.c b/fs/mbcache2.c -index fe9f6f6a2953..60310a690f8d 100644 +index 3e3198d6b9d6..49f7a6feaa83 100644 --- a/fs/mbcache2.c +++ b/fs/mbcache2.c -@@ -39,6 +39,29 @@ static struct kmem_cache *mb2_entry_cache; +@@ -30,9 +30,9 @@ struct mb2_cache { + int c_bucket_bits; + /* Maximum entries in cache to avoid degrading hash too much */ + int c_max_entries; +- /* Protects c_lru_list, c_entry_count */ +- spinlock_t c_lru_list_lock; +- struct list_head c_lru_list; ++ /* Protects c_list, c_entry_count */ ++ spinlock_t c_list_lock; ++ struct list_head c_list; + /* Number of entries in cache */ + unsigned long c_entry_count; + struct shrinker c_shrink; +@@ -45,6 +45,29 @@ static struct kmem_cache *mb2_entry_cache; static unsigned long mb2_cache_shrink(struct mb2_cache *cache, unsigned int nr_to_scan); @@ -55,7 +69,14 @@ index fe9f6f6a2953..60310a690f8d 100644 /* * Number of entries to reclaim synchronously when there are too many entries * in cache -@@ -83,7 +106,7 @@ struct mb2_cache_entry *mb2_cache_entry_create(struct mb2_cache *cache, +@@ -80,13 +103,13 @@ int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key, + if (!entry) + return -ENOMEM; + +- INIT_LIST_HEAD(&entry->e_lru_list); ++ INIT_LIST_HEAD(&entry->e_list); + /* One ref for hash, one ref returned */ + atomic_set(&entry->e_refcnt, 1); entry->e_key = key; entry->e_block = block; head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; @@ -64,16 +85,23 @@ index fe9f6f6a2953..60310a690f8d 100644 hlist_bl_lock(head); hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { if (dup->e_key == key && dup->e_block == block) { -@@ -125,7 +148,7 @@ EXPORT_SYMBOL(__mb2_cache_entry_free); - void mb2_cache_entry_delete(struct mb2_cache *cache, - struct mb2_cache_entry *entry) - { -- struct hlist_bl_head *head = entry->e_hash_list_head; -+ struct hlist_bl_head *head = mb2_cache_entry_head(entry); +@@ -98,12 +121,12 @@ int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key, + hlist_bl_add_head(&entry->e_hash_list, head); + hlist_bl_unlock(head); - hlist_bl_lock(head); - if (!hlist_bl_unhashed(&entry->e_hash_list)) { -@@ -153,7 +176,7 @@ static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache, +- spin_lock(&cache->c_lru_list_lock); +- list_add_tail(&entry->e_lru_list, &cache->c_lru_list); ++ spin_lock(&cache->c_list_lock); ++ list_add_tail(&entry->e_list, &cache->c_list); + /* Grab ref for LRU list */ + atomic_inc(&entry->e_refcnt); + cache->c_entry_count++; +- spin_unlock(&cache->c_lru_list_lock); ++ spin_unlock(&cache->c_list_lock); + + return 0; + } +@@ -124,7 +147,7 @@ static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache, struct hlist_bl_head *head; if (entry) @@ -82,7 +110,31 @@ index fe9f6f6a2953..60310a690f8d 100644 else head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; hlist_bl_lock(head); -@@ -256,10 +279,7 @@ EXPORT_SYMBOL(mb2_cache_entry_delete_block); +@@ -203,13 +226,13 @@ void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key, + /* We keep hash list reference to keep entry alive */ + hlist_bl_del_init(&entry->e_hash_list); + hlist_bl_unlock(head); +- spin_lock(&cache->c_lru_list_lock); +- if (!list_empty(&entry->e_lru_list)) { +- list_del_init(&entry->e_lru_list); ++ spin_lock(&cache->c_list_lock); ++ if (!list_empty(&entry->e_list)) { ++ list_del_init(&entry->e_list); + cache->c_entry_count--; + atomic_dec(&entry->e_refcnt); + } +- spin_unlock(&cache->c_lru_list_lock); ++ spin_unlock(&cache->c_list_lock); + mb2_cache_entry_put(cache, entry); + return; + } +@@ -222,15 +245,12 @@ EXPORT_SYMBOL(mb2_cache_entry_delete_block); + * @cache - cache the entry belongs to + * @entry - entry that got used + * +- * Move entry in lru list to reflect the fact that it was used. ++ * Marks entry as used to give hit higher chances of surviving in cache. + */ void mb2_cache_entry_touch(struct mb2_cache *cache, struct mb2_cache_entry *entry) { @@ -94,33 +146,93 @@ index fe9f6f6a2953..60310a690f8d 100644 } EXPORT_SYMBOL(mb2_cache_entry_touch); -@@ -284,6 +304,11 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache, - while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) { - entry = list_first_entry(&cache->c_lru_list, - struct mb2_cache_entry, e_lru_list); +@@ -251,18 +271,23 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache, + struct hlist_bl_head *head; + unsigned int shrunk = 0; + +- spin_lock(&cache->c_lru_list_lock); +- while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) { +- entry = list_first_entry(&cache->c_lru_list, +- struct mb2_cache_entry, e_lru_list); +- list_del_init(&entry->e_lru_list); ++ spin_lock(&cache->c_list_lock); ++ while (nr_to_scan-- && !list_empty(&cache->c_list)) { ++ entry = list_first_entry(&cache->c_list, ++ struct mb2_cache_entry, e_list); + if (mb2_cache_entry_referenced(entry)) { + mb2_cache_entry_clear_referenced(entry); -+ list_move_tail(&cache->c_lru_list, &entry->e_lru_list); ++ list_move_tail(&cache->c_list, &entry->e_list); + continue; + } - list_del_init(&entry->e_lru_list); ++ list_del_init(&entry->e_list); cache->c_entry_count--; /* -@@ -291,7 +316,7 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache, + * We keep LRU list reference so that entry doesn't go away * from under us. */ - spin_unlock(&cache->c_lru_list_lock); +- spin_unlock(&cache->c_lru_list_lock); - head = entry->e_hash_list_head; ++ spin_unlock(&cache->c_list_lock); + head = mb2_cache_entry_head(entry); hlist_bl_lock(head); if (!hlist_bl_unhashed(&entry->e_hash_list)) { hlist_bl_del_init(&entry->e_hash_list); +@@ -272,9 +297,9 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache, + if (mb2_cache_entry_put(cache, entry)) + shrunk++; + cond_resched(); +- spin_lock(&cache->c_lru_list_lock); ++ spin_lock(&cache->c_list_lock); + } +- spin_unlock(&cache->c_lru_list_lock); ++ spin_unlock(&cache->c_list_lock); + + return shrunk; + } +@@ -318,8 +343,8 @@ struct mb2_cache *mb2_cache_create(int bucket_bits) + goto err_out; + cache->c_bucket_bits = bucket_bits; + cache->c_max_entries = bucket_count << 4; +- INIT_LIST_HEAD(&cache->c_lru_list); +- spin_lock_init(&cache->c_lru_list_lock); ++ INIT_LIST_HEAD(&cache->c_list); ++ spin_lock_init(&cache->c_list_lock); + cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head), + GFP_KERNEL); + if (!cache->c_hash) { +@@ -361,13 +386,13 @@ void mb2_cache_destroy(struct mb2_cache *cache) + * We don't bother with any locking. Cache must not be used at this + * point. + */ +- list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) { ++ list_for_each_entry_safe(entry, next, &cache->c_list, e_list) { + if (!hlist_bl_unhashed(&entry->e_hash_list)) { + hlist_bl_del_init(&entry->e_hash_list); + atomic_dec(&entry->e_refcnt); + } else + WARN_ON(1); +- list_del(&entry->e_lru_list); ++ list_del(&entry->e_list); + WARN_ON(atomic_read(&entry->e_refcnt) != 1); + mb2_cache_entry_put(cache, entry); + } diff --git a/include/linux/mbcache2.h b/include/linux/mbcache2.h -index 2a58c51c3a0a..ca5b509c14a8 100644 +index b6f160ff2533..c934843a6a31 100644 --- a/include/linux/mbcache2.h +++ b/include/linux/mbcache2.h +@@ -10,8 +10,8 @@ + struct mb2_cache; + + struct mb2_cache_entry { +- /* LRU list - protected by cache->c_lru_list_lock */ +- struct list_head e_lru_list; ++ /* List of entries in cache - protected by cache->c_list_lock */ ++ struct list_head e_list; + /* Hash table list - protected by bitlock in e_hash_list_head */ + struct hlist_bl_node e_hash_list; + atomic_t e_refcnt; @@ -19,8 +19,11 @@ struct mb2_cache_entry { - unsigned int e_key; + u32 e_key; /* Block number of hashed block - stable during lifetime of the entry */ sector_t e_block; - /* Head of hash list (for list bit lock) - stable */ @@ -134,6 +246,6 @@ index 2a58c51c3a0a..ca5b509c14a8 100644 struct mb2_cache *mb2_cache_create(int bucket_bits); -- -2.1.4 +2.6.2 diff --git a/reimplement-mbcache b/reimplement-mbcache index 145ab695..5361de0f 100644 --- a/reimplement-mbcache +++ b/reimplement-mbcache @@ -12,7 +12,7 @@ complexity. This is reimplementation of the mbcache functionality to exactly fit the purpose ext? filesystems use it for. Cache entries are now considerably smaller (7 instead of 13 longs), the code is considerably smaller as -well (432 vs 913 lines of code), and IMO also simpler. The new code is +well (414 vs 913 lines of code), and IMO also simpler. The new code is also much more lightweight. I have measured the speed using artificial xattr-bench benchmark, which @@ -44,13 +44,13 @@ V=10000 F\P 1 2 4 8 16 32 64 10 0.161,0.154 0.198,0.190 0.296,0.256 0.662,0.480 1.192,0.818 2.989,2.200 6.362,4.746 100 0.176,0.174 0.236,0.203 0.326,0.255 0.696,0.511 1.183,0.855 4.205,3.444 19.510,17.760 -1000 0.199,0.183 0.240,0.227 1.159,1.014 2.286,2.154 6.023,6.039 10.933,--- 36.620,--- +1000 0.199,0.183 0.240,0.227 1.159,1.014 2.286,2.154 6.023,6.039 ---,10.933 ---,36.620 V=100000 F\P 1 2 4 8 16 32 64 10 0.171,0.162 0.204,0.198 0.285,0.230 0.692,0.500 1.225,0.881 2.990,2.243 6.379,4.771 100 0.151,0.171 0.220,0.210 0.295,0.255 0.720,0.518 1.226,0.844 3.423,2.831 19.234,17.544 -1000 0.192,0.189 0.249,0.225 1.162,1.043 2.257,2.093 5.853,4.997 10.399,--- 32.198,--- +1000 0.192,0.189 0.249,0.225 1.162,1.043 2.257,2.093 5.853,4.997 ---,10.399 ---,32.198 We see that the new code is faster in pretty much all the cases and starting from 4 processes there are significant gains with the new code @@ -62,9 +62,9 @@ Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/Makefile | 2 +- - fs/mbcache2.c | 388 +++++++++++++++++++++++++++++++++++++++++++++++ - include/linux/mbcache2.h | 54 +++++++ - 3 files changed, 443 insertions(+), 1 deletion(-) + fs/mbcache2.c | 359 +++++++++++++++++++++++++++++++++++++++++++++++ + include/linux/mbcache2.h | 50 +++++++ + 3 files changed, 410 insertions(+), 1 deletion(-) create mode 100644 fs/mbcache2.c create mode 100644 include/linux/mbcache2.h @@ -83,10 +83,10 @@ index 79f522575cba..15b3d6c4e46a 100644 obj-$(CONFIG_COREDUMP) += coredump.o diff --git a/fs/mbcache2.c b/fs/mbcache2.c new file mode 100644 -index 000000000000..4ccf0752c6d1 +index 000000000000..5c3e1a8c38f6 --- /dev/null +++ b/fs/mbcache2.c -@@ -0,0 +1,388 @@ +@@ -0,0 +1,359 @@ +#include +#include +#include @@ -97,9 +97,15 @@ index 000000000000..4ccf0752c6d1 + +/* + * Mbcache is a simple key-value store. Keys need not be unique, however -+ * key-value pairs are expected to be unique (we use this in ++ * key-value pairs are expected to be unique (we use this fact in + * mb2_cache_entry_delete_block()). + * ++ * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. ++ * They use hash of a block contents as a key and block number as a value. ++ * That's why keys need not be unique (different xattr blocks may end up having ++ * the same hash). However block number always uniquely identifies a cache ++ * entry. ++ * + * We provide functions for creation and removal of entries, search by key, + * and a special "delete entry with given key-value pair" operation. Fixed + * size hash table is used for fast key lookups. @@ -129,13 +135,10 @@ index 000000000000..4ccf0752c6d1 + * + * Creates entry in @cache with key @key and records that data is stored in + * block @block. The function returns -EBUSY if entry with the same key -+ * and for the same block already exists in cache. Otherwise reference to -+ * the created entry is returned. ++ * and for the same block already exists in cache. Otherwise 0 is returned. + */ -+struct mb2_cache_entry *mb2_cache_entry_create(struct mb2_cache *cache, -+ gfp_t mask, -+ unsigned int key, -+ sector_t block) ++int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key, ++ sector_t block) +{ + struct mb2_cache_entry *entry, *dup; + struct hlist_bl_node *dup_node; @@ -143,11 +146,11 @@ index 000000000000..4ccf0752c6d1 + + entry = kmem_cache_alloc(mb2_entry_cache, mask); + if (!entry) -+ return ERR_PTR(-ENOMEM); ++ return -ENOMEM; + + INIT_LIST_HEAD(&entry->e_lru_list); + /* One ref for hash, one ref returned */ -+ atomic_set(&entry->e_refcnt, 2); ++ atomic_set(&entry->e_refcnt, 1); + entry->e_key = key; + entry->e_block = block; + head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; @@ -157,7 +160,7 @@ index 000000000000..4ccf0752c6d1 + if (dup->e_key == key && dup->e_block == block) { + hlist_bl_unlock(head); + kmem_cache_free(mb2_entry_cache, entry); -+ return ERR_PTR(-EBUSY); ++ return -EBUSY; + } + } + hlist_bl_add_head(&entry->e_hash_list, head); @@ -170,7 +173,7 @@ index 000000000000..4ccf0752c6d1 + cache->c_entry_count++; + spin_unlock(&cache->c_lru_list_lock); + -+ return entry; ++ return 0; +} +EXPORT_SYMBOL(mb2_cache_entry_create); + @@ -180,41 +183,9 @@ index 000000000000..4ccf0752c6d1 +} +EXPORT_SYMBOL(__mb2_cache_entry_free); + -+/* -+ * mb2_cache_entry_delete - delete entry from cache -+ * @cache - cache where the entry is -+ * @entry - entry to delete -+ * -+ * Delete entry from cache. The entry is unhashed and deleted from the lru list -+ * so it cannot be found. We also drop the reference to @entry caller gave us. -+ * However entry need not be freed if there's someone else still holding a -+ * reference to it. Freeing happens when the last reference is dropped. -+ */ -+void mb2_cache_entry_delete(struct mb2_cache *cache, -+ struct mb2_cache_entry *entry) -+{ -+ struct hlist_bl_head *head = entry->e_hash_list_head; -+ -+ hlist_bl_lock(head); -+ if (!hlist_bl_unhashed(&entry->e_hash_list)) { -+ hlist_bl_del_init(&entry->e_hash_list); -+ atomic_dec(&entry->e_refcnt); -+ } -+ hlist_bl_unlock(head); -+ spin_lock(&cache->c_lru_list_lock); -+ if (!list_empty(&entry->e_lru_list)) { -+ list_del_init(&entry->e_lru_list); -+ cache->c_entry_count--; -+ atomic_dec(&entry->e_refcnt); -+ } -+ spin_unlock(&cache->c_lru_list_lock); -+ mb2_cache_entry_put(cache, entry); -+} -+EXPORT_SYMBOL(mb2_cache_entry_delete); -+ +static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache, + struct mb2_cache_entry *entry, -+ unsigned int key) ++ u32 key) +{ + struct mb2_cache_entry *old_entry = entry; + struct hlist_bl_node *node; @@ -256,7 +227,7 @@ index 000000000000..4ccf0752c6d1 + * entry found and returns the entry. + */ +struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache, -+ unsigned int key) ++ u32 key) +{ + return __entry_find(cache, NULL, key); +} @@ -286,7 +257,7 @@ index 000000000000..4ccf0752c6d1 + * + * Remove entry from cache @cache with key @key with data stored in @block. + */ -+void mb2_cache_entry_delete_block(struct mb2_cache *cache, unsigned int key, ++void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key, + sector_t block) +{ + struct hlist_bl_node *node; @@ -477,10 +448,10 @@ index 000000000000..4ccf0752c6d1 +MODULE_LICENSE("GPL"); diff --git a/include/linux/mbcache2.h b/include/linux/mbcache2.h new file mode 100644 -index 000000000000..2a58c51c3a0a +index 000000000000..b6f160ff2533 --- /dev/null +++ b/include/linux/mbcache2.h -@@ -0,0 +1,54 @@ +@@ -0,0 +1,50 @@ +#ifndef _LINUX_MB2CACHE_H +#define _LINUX_MB2CACHE_H + @@ -499,7 +470,7 @@ index 000000000000..2a58c51c3a0a + struct hlist_bl_node e_hash_list; + atomic_t e_refcnt; + /* Key in hash - stable during lifetime of the entry */ -+ unsigned int e_key; ++ u32 e_key; + /* Block number of hashed block - stable during lifetime of the entry */ + sector_t e_block; + /* Head of hash list (for list bit lock) - stable */ @@ -509,12 +480,8 @@ index 000000000000..2a58c51c3a0a +struct mb2_cache *mb2_cache_create(int bucket_bits); +void mb2_cache_destroy(struct mb2_cache *cache); + -+struct mb2_cache_entry *mb2_cache_entry_create(struct mb2_cache *cache, -+ gfp_t mask, -+ unsigned int key, -+ sector_t block); -+void mb2_cache_entry_delete(struct mb2_cache *cache, -+ struct mb2_cache_entry *entry); ++int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key, ++ sector_t block); +void __mb2_cache_entry_free(struct mb2_cache_entry *entry); +static inline int mb2_cache_entry_put(struct mb2_cache *cache, + struct mb2_cache_entry *entry) @@ -525,10 +492,10 @@ index 000000000000..2a58c51c3a0a + return 1; +} + -+void mb2_cache_entry_delete_block(struct mb2_cache *cache, unsigned int key, ++void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key, + sector_t block); +struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache, -+ unsigned int key); ++ u32 key); +struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache, + struct mb2_cache_entry *entry); +void mb2_cache_entry_touch(struct mb2_cache *cache, @@ -536,6 +503,6 @@ index 000000000000..2a58c51c3a0a + +#endif /* _LINUX_MB2CACHE_H */ -- -2.1.4 +2.6.2 diff --git a/remove-mbcache b/remove-mbcache index 18cbd236..1cb1ff22 100644 --- a/remove-mbcache +++ b/remove-mbcache @@ -1,4 +1,4 @@ -mbcache: remove +mbcache: remove mbcache From: Jan Kara @@ -954,6 +954,6 @@ index 6a392e7a723a..000000000000 - struct block_device *, - unsigned int); -- -2.1.4 +2.6.2 diff --git a/series b/series index 42403699..d992e459 100644 --- a/series +++ b/series @@ -9,6 +9,11 @@ ext2-convert-to-mbcache2 remove-mbcache mbcache2-limit-cache-size mbcache2-use-referenced-bit-instead-of-LRU +mbcache2-rename-to-mbcache +kill-ext4_mballoc_ready +get-rid-of-e_hash_list_head +shortcut-setting-of-xattr-to-the-same-value +add-reusable-flag-to-cache-entries ########################################## # unstable patches diff --git a/shortcut-setting-of-xattr-to-the-same-value b/shortcut-setting-of-xattr-to-the-same-value new file mode 100644 index 00000000..216b0811 --- /dev/null +++ b/shortcut-setting-of-xattr-to-the-same-value @@ -0,0 +1,57 @@ +ext4: shortcut setting of xattr to the same value + +From: Jan Kara + +When someone tried to set xattr to the same value (i.e., not changing +anything) we did all the work of removing original xattr, possibly +breaking references to shared xattr block, inserting new xattr, and +merging xattr blocks again. Since this is not so rare operation and it +is relatively cheap for us to detect this case, check for this and +shortcut xattr setting in that case. + +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +--- + fs/ext4/xattr.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index c6af8a7a436a..b661ae8332e3 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1096,6 +1096,17 @@ static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, + return 0; + } + ++static int ext4_xattr_value_same(struct ext4_xattr_search *s, ++ struct ext4_xattr_info *i) ++{ ++ void *value; ++ ++ if (le32_to_cpu(s->here->e_value_size) != i->value_len) ++ return 0; ++ value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs); ++ return !memcmp(value, i->value, i->value_len); ++} ++ + /* + * ext4_xattr_set_handle() + * +@@ -1172,6 +1183,13 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, + else if (!bs.s.not_found) + error = ext4_xattr_block_set(handle, inode, &i, &bs); + } else { ++ error = 0; ++ /* Xattr value did not change? Save us some work and bail out */ ++ if (!is.s.not_found && ext4_xattr_value_same(&is.s, &i)) ++ goto cleanup; ++ if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i)) ++ goto cleanup; ++ + error = ext4_xattr_ibody_set(handle, inode, &i, &is); + if (!error && !bs.s.not_found) { + i.value = NULL; +-- +2.6.2 + + diff --git a/timestamps b/timestamps index 230c74e0..77d83c97 100755 --- a/timestamps +++ b/timestamps @@ -27,26 +27,20 @@ touch -d @1452287245 block-dio-during-truncate touch -d @1453078165 crypto-add-ioctls-to-backup-crypto-metadata touch -d @1453097831 crypto-rename-ext4_get_encryption_info touch -d @1453098259 crypto-add-ciphertext_access-mount-option -touch -d @1454891705 crypto-fix-validate-when-key-add-remove -touch -d @1454910866 crypto-move-context-consistency-check-to-ext4_file_open -touch -d @1455250512 fix-scheduling-in-atomic-on-group-checksum-failure -touch -d @1455253041 fix-erroneus-return-value -touch -d @1455254236 add-a-line-break-for-proc-mb_groups-display -touch -d @1455257759 fix-potential-integer-overflow -touch -d @1455258043 dont-read-blocks-from-disk-after-extents-being-swapped -touch -d @1455258180 remove-unused-parameter-newblock touch -d @1455600079 stable-boundary -touch -d @1455859105 fix-bh-b_state-corruption -touch -d @1455860001 fix-crashes-in-dioread_nolock-mode -touch -d @1455908377 fix-memleak-in-ext4_readdir -touch -d @1455915167 reimplement-mbcache -touch -d @1455915233 ext4-convert-to-mbcache2 -touch -d @1455915523 ext2-convert-to-mbcache2 -touch -d @1455915619 remove-mbcache -touch -d @1455915679 mbcache2-limit-cache-size -touch -d @1455915868 mbcache2-use-referenced-bit-instead-of-LRU touch -d @1456097501 make-sure-to-revoke-all-the-freeable-blocks-in-ext4_free_blocks touch -d @1456097924 iterate-over-buffer-heads-correctly-in-move_extent_per_page -touch -d @1456097959 series -touch -d @1456097967 status touch -d @1456098126 timestamps +touch -d @1456159749 reimplement-mbcache +touch -d @1456159813 ext4-convert-to-mbcache2 +touch -d @1456160198 ext2-convert-to-mbcache2 +touch -d @1456161674 remove-mbcache +touch -d @1456162383 mbcache2-limit-cache-size +touch -d @1456183427 mbcache2-use-referenced-bit-instead-of-LRU +touch -d @1456198522 mbcache2-rename-to-mbcache +touch -d @1456198865 kill-ext4_mballoc_ready +touch -d @1456198925 get-rid-of-e_hash_list_head +touch -d @1456198984 shortcut-setting-of-xattr-to-the-same-value +touch -d @1456199044 add-reusable-flag-to-cache-entries +touch -d @1456199094 series +touch -d @1456199097 status -- 2.11.4.GIT