From 453e1491fbba1c16416132c3a46e5d2653c370bd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 7 Apr 2014 10:15:59 -0400 Subject: [PATCH] Rebase to v3.14-8670-gd15e031 --- add-__init-marking-to-init_inodecache | 28 - add-ext4_es_store_pblock_status | 80 --- add-support-collapse-range | 447 ------------- add-zero-range-support | 593 ------------------ address-a-benign-compiler-warning | 35 -- atomically-set-inode-flags | 101 --- avoid-exposure-stale-data-in-ext4_punch_hole | 39 -- cap-max-length-from-ext | 56 -- change-block-and-index-hash-chain-to-hlist_bl_node | 308 --------- cleanup-error-handling-in-swap_inode_boot_loader | 62 -- ...c-code-in-ext4_ext_handle_uninitialized_extents | 52 -- dont-hold-j_state_lock-during-wake_up | 34 - dont-leave-crtime-uninitialized | 27 - dont-try-to-set-hash-signedness-flag-if-fs-is-ro | 49 -- each-fs-uses-its-own-mbcache | 348 ----------- ext4-dont-calculate-total-xattr-size-if-unneeded | 48 -- fix-comment-typo | 32 - ...turn-from-ext4_ext_handle_uninitialized_extents | 49 -- ...rtial-cluster-handling-for-bigalloc-filesystems | 60 -- fix-premature-freeing-of-partial-clusters | 58 -- fix-resize-large-itable | 127 ---- fix-resize-nonstd-blocks-per-group | 31 - fix-swap_inode_boot_loader-cleanup | 47 -- fix-use-after-free-in-jbd2_journal_start_reserved | 31 - fix-xfstest-generic-299-block-validity-failures | 52 -- fs-push-sync_filesystem-down-to-remount_fs | 631 ------------------- initialize-fe_logical | 52 -- jbd2-add-transaction-to-checkpoint-list-earlier | 76 --- jbd2-calc-stats-wo-j_state_lock-and-j_list_lock | 66 -- jbd2-check-b_transaction-wo-taking-j_list_lock | 29 - jbd2-dont-unplog-after-revoke-records | 33 - jbd2-improve-error-messages-for-inconsistent-jh | 92 --- jbd2-mark-file-local-functions-as-static | 52 -- ...nimize-j_list_lock-in-journal_get_create_access | 37 -- jbd2-minimize-j_state_lock-in-jbd2_journal_forget | 57 -- kill-i_version-support-for-hurd-OS-filesystems | 73 --- make-ext4_block_zero_page_range_static | 103 --- mbcache-decouple-locking-of-local-from-global-data | 688 --------------------- merge-uninitialized-extents | 95 --- only-sync-filesystem-when-remounting-ro | 32 - optimize-hurd-tests | 84 --- refactor-ext4_fallocate-code | 207 ------- remove-an-unneeded-check-in-mext_page_mkuptodate | 24 - remove-unneeded-test-of-ret-variable | 59 -- remove-unused-ac_ex_scanned | 55 -- series | 124 +--- ...e-warnings-in-extent-status-tree-debugging-code | 67 -- speedup-WB_SYNC_ALL-pass | 78 --- timestamps | 107 +--- translate-mode-bits-to-strings | 69 --- update-i_size-after-the-preallocation | 179 ------ 51 files changed, 61 insertions(+), 5902 deletions(-) delete mode 100644 add-__init-marking-to-init_inodecache delete mode 100644 add-ext4_es_store_pblock_status delete mode 100644 add-support-collapse-range delete mode 100644 add-zero-range-support delete mode 100644 address-a-benign-compiler-warning delete mode 100644 atomically-set-inode-flags delete mode 100644 avoid-exposure-stale-data-in-ext4_punch_hole delete mode 100644 cap-max-length-from-ext delete mode 100644 change-block-and-index-hash-chain-to-hlist_bl_node delete mode 100644 cleanup-error-handling-in-swap_inode_boot_loader delete mode 100644 delete-path-delalloc-code-in-ext4_ext_handle_uninitialized_extents delete mode 100644 dont-hold-j_state_lock-during-wake_up delete mode 100644 dont-leave-crtime-uninitialized delete mode 100644 dont-try-to-set-hash-signedness-flag-if-fs-is-ro delete mode 100644 each-fs-uses-its-own-mbcache delete mode 100644 ext4-dont-calculate-total-xattr-size-if-unneeded delete mode 100644 fix-comment-typo delete mode 100644 fix-error-return-from-ext4_ext_handle_uninitialized_extents delete mode 100644 fix-partial-cluster-handling-for-bigalloc-filesystems delete mode 100644 fix-premature-freeing-of-partial-clusters delete mode 100644 fix-resize-large-itable delete mode 100644 fix-resize-nonstd-blocks-per-group delete mode 100644 fix-swap_inode_boot_loader-cleanup delete mode 100644 fix-use-after-free-in-jbd2_journal_start_reserved delete mode 100644 fix-xfstest-generic-299-block-validity-failures delete mode 100644 fs-push-sync_filesystem-down-to-remount_fs delete mode 100644 initialize-fe_logical delete mode 100644 jbd2-add-transaction-to-checkpoint-list-earlier delete mode 100644 jbd2-calc-stats-wo-j_state_lock-and-j_list_lock delete mode 100644 jbd2-check-b_transaction-wo-taking-j_list_lock delete mode 100644 jbd2-dont-unplog-after-revoke-records delete mode 100644 jbd2-improve-error-messages-for-inconsistent-jh delete mode 100644 jbd2-mark-file-local-functions-as-static delete mode 100644 jbd2-minimize-j_list_lock-in-journal_get_create_access delete mode 100644 jbd2-minimize-j_state_lock-in-jbd2_journal_forget delete mode 100644 kill-i_version-support-for-hurd-OS-filesystems delete mode 100644 make-ext4_block_zero_page_range_static delete mode 100644 mbcache-decouple-locking-of-local-from-global-data delete mode 100644 merge-uninitialized-extents delete mode 100644 only-sync-filesystem-when-remounting-ro delete mode 100644 optimize-hurd-tests delete mode 100644 refactor-ext4_fallocate-code delete mode 100644 remove-an-unneeded-check-in-mext_page_mkuptodate delete mode 100644 remove-unneeded-test-of-ret-variable delete mode 100644 remove-unused-ac_ex_scanned rewrite series (76%) delete mode 100644 silence-warnings-in-extent-status-tree-debugging-code delete mode 100644 speedup-WB_SYNC_ALL-pass rewrite timestamps (78%) delete mode 100644 translate-mode-bits-to-strings delete mode 100644 update-i_size-after-the-preallocation diff --git a/add-__init-marking-to-init_inodecache b/add-__init-marking-to-init_inodecache deleted file mode 100644 index e302dee1..00000000 --- a/add-__init-marking-to-init_inodecache +++ /dev/null @@ -1,28 +0,0 @@ -ext4: Add __init marking to init_inodecache - -From: Fabian Frederick - -init_inodecache is only called by __init init_ext4_fs. - -Signed-off-by: Fabian Frederick -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/super.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index 1f7784d..9a387b8 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -940,7 +940,7 @@ static void init_once(void *foo) - inode_init_once(&ei->vfs_inode); - } - --static int init_inodecache(void) -+static int __init init_inodecache(void) - { - ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", - sizeof(struct ext4_inode_info), --- -1.8.1.4 - diff --git a/add-ext4_es_store_pblock_status b/add-ext4_es_store_pblock_status deleted file mode 100644 index 86fef8ee..00000000 --- a/add-ext4_es_store_pblock_status +++ /dev/null @@ -1,80 +0,0 @@ -ext4: add ext4_es_store_pblock_status() - -Avoid false positives by static code analysis tools such as sparse and -coverity caused by the fact that we set the physical block, and then -the status in the extent_status structure. It is also more efficient -to set both of these values at once. - -Addresses-Coverity-Id: #989077 -Addresses-Coverity-Id: #989078 -Addresses-Coverity-Id: #1080722 - -Signed-off-by: "Theodore Ts'o" -Reviewed-by: Zheng Liu ---- - fs/ext4/extents_status.c | 14 ++++++-------- - fs/ext4/extents_status.h | 9 +++++++++ - 2 files changed, 15 insertions(+), 8 deletions(-) - -diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c -index 3981ff7..a900004 100644 ---- a/fs/ext4/extents_status.c -+++ b/fs/ext4/extents_status.c -@@ -658,8 +658,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, - - newes.es_lblk = lblk; - newes.es_len = len; -- ext4_es_store_pblock(&newes, pblk); -- ext4_es_store_status(&newes, status); -+ ext4_es_store_pblock_status(&newes, pblk, status); - trace_ext4_es_insert_extent(inode, &newes); - - ext4_es_insert_extent_check(inode, &newes); -@@ -699,8 +698,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, - - newes.es_lblk = lblk; - newes.es_len = len; -- ext4_es_store_pblock(&newes, pblk); -- ext4_es_store_status(&newes, status); -+ ext4_es_store_pblock_status(&newes, pblk, status); - trace_ext4_es_cache_extent(inode, &newes); - - if (!len) -@@ -812,13 +810,13 @@ retry: - - newes.es_lblk = end + 1; - newes.es_len = len2; -+ block = 0x7FDEADBEEF; - if (ext4_es_is_written(&orig_es) || -- ext4_es_is_unwritten(&orig_es)) { -+ ext4_es_is_unwritten(&orig_es)) - block = ext4_es_pblock(&orig_es) + - orig_es.es_len - len2; -- ext4_es_store_pblock(&newes, block); -- } -- ext4_es_store_status(&newes, ext4_es_status(&orig_es)); -+ ext4_es_store_pblock_status(&newes, block, -+ ext4_es_status(&orig_es)); - err = __es_insert_extent(inode, &newes); - if (err) { - es->es_lblk = orig_es.es_lblk; -diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h -index 167f4ab8..f1b62a4 100644 ---- a/fs/ext4/extents_status.h -+++ b/fs/ext4/extents_status.h -@@ -129,6 +129,15 @@ static inline void ext4_es_store_status(struct extent_status *es, - (es->es_pblk & ~ES_MASK)); - } - -+static inline void ext4_es_store_pblock_status(struct extent_status *es, -+ ext4_fsblk_t pb, -+ unsigned int status) -+{ -+ es->es_pblk = (((ext4_fsblk_t) -+ (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) | -+ (pb & ~ES_MASK)); -+} -+ - extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); - extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); - extern void ext4_es_lru_add(struct inode *inode); diff --git a/add-support-collapse-range b/add-support-collapse-range deleted file mode 100644 index 88571d8d..00000000 --- a/add-support-collapse-range +++ /dev/null @@ -1,447 +0,0 @@ -ext4: Add support FALLOC_FL_COLLAPSE_RANGE for fallocate - -From: Namjae Jeon - -This patch implements fallocate's FALLOC_FL_COLLAPSE_RANGE for Ext4. - -The semantics of this flag are following: -1) It collapses the range lying between offset and length by removing any data - blocks which are present in this range and than updates all the logical - offsets of extents beyond "offset + len" to nullify the hole created by - removing blocks. In short, it does not leave a hole. -2) It should be used exclusively. No other fallocate flag in combination. -3) Offset and length supplied to fallocate should be fs block size aligned - in case of xfs and ext4. -4) Collaspe range does not work beyond i_size. - -Signed-off-by: Namjae Jeon -Signed-off-by: Ashish Sangwan -Tested-by: Dongsu Park -Signed-off-by: "Theodore Ts'o" - ---- - fs/ext4/ext4.h | 3 + - fs/ext4/extents.c | 307 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- - fs/ext4/move_extent.c | 2 +- - include/trace/events/ext4.h | 33 ++++++- - 4 files changed, 342 insertions(+), 3 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index b7207db..beec427 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -2758,6 +2758,7 @@ extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); - extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - __u64 start, __u64 len); - extern int ext4_ext_precache(struct inode *inode); -+extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); - - /* move_extent.c */ - extern void ext4_double_down_write_data_sem(struct inode *first, -@@ -2767,6 +2768,8 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode, - extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, - __u64 start_orig, __u64 start_donor, - __u64 len, __u64 *moved_len); -+extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path, -+ struct ext4_extent **extent); - - /* page-io.c */ - extern int __init ext4_init_pageio(void); -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index 2e0608e..bbba1ef 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -4581,12 +4581,16 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) - unsigned int credits, blkbits = inode->i_blkbits; - - /* Return error if mode is not supported */ -- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) -+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | -+ FALLOC_FL_COLLAPSE_RANGE)) - return -EOPNOTSUPP; - - if (mode & FALLOC_FL_PUNCH_HOLE) - return ext4_punch_hole(inode, offset, len); - -+ if (mode & FALLOC_FL_COLLAPSE_RANGE) -+ return ext4_collapse_range(inode, offset, len); -+ - ret = ext4_convert_inline_data(inode); - if (ret) - return ret; -@@ -4885,3 +4889,304 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - ext4_es_lru_add(inode); - return error; - } -+ -+/* -+ * ext4_access_path: -+ * Function to access the path buffer for marking it dirty. -+ * It also checks if there are sufficient credits left in the journal handle -+ * to update path. -+ */ -+static int -+ext4_access_path(handle_t *handle, struct inode *inode, -+ struct ext4_ext_path *path) -+{ -+ int credits, err; -+ -+ if (!ext4_handle_valid(handle)) -+ return 0; -+ -+ /* -+ * Check if need to extend journal credits -+ * 3 for leaf, sb, and inode plus 2 (bmap and group -+ * descriptor) for each block group; assume two block -+ * groups -+ */ -+ if (handle->h_buffer_credits < 7) { -+ credits = ext4_writepage_trans_blocks(inode); -+ err = ext4_ext_truncate_extend_restart(handle, inode, credits); -+ /* EAGAIN is success */ -+ if (err && err != -EAGAIN) -+ return err; -+ } -+ -+ err = ext4_ext_get_access(handle, inode, path); -+ return err; -+} -+ -+/* -+ * ext4_ext_shift_path_extents: -+ * Shift the extents of a path structure lying between path[depth].p_ext -+ * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift -+ * from starting block for each extent. -+ */ -+static int -+ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, -+ struct inode *inode, handle_t *handle, -+ ext4_lblk_t *start) -+{ -+ int depth, err = 0; -+ struct ext4_extent *ex_start, *ex_last; -+ bool update = 0; -+ depth = path->p_depth; -+ -+ while (depth >= 0) { -+ if (depth == path->p_depth) { -+ ex_start = path[depth].p_ext; -+ if (!ex_start) -+ return -EIO; -+ -+ ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); -+ if (!ex_last) -+ return -EIO; -+ -+ err = ext4_access_path(handle, inode, path + depth); -+ if (err) -+ goto out; -+ -+ if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) -+ update = 1; -+ -+ *start = ex_last->ee_block + -+ ext4_ext_get_actual_len(ex_last); -+ -+ while (ex_start <= ex_last) { -+ ex_start->ee_block -= shift; -+ if (ex_start > -+ EXT_FIRST_EXTENT(path[depth].p_hdr)) { -+ if (ext4_ext_try_to_merge_right(inode, -+ path, ex_start - 1)) -+ ex_last--; -+ } -+ ex_start++; -+ } -+ err = ext4_ext_dirty(handle, inode, path + depth); -+ if (err) -+ goto out; -+ -+ if (--depth < 0 || !update) -+ break; -+ } -+ -+ /* Update index too */ -+ err = ext4_access_path(handle, inode, path + depth); -+ if (err) -+ goto out; -+ -+ path[depth].p_idx->ei_block -= shift; -+ err = ext4_ext_dirty(handle, inode, path + depth); -+ if (err) -+ goto out; -+ -+ /* we are done if current index is not a starting index */ -+ if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr)) -+ break; -+ -+ depth--; -+ } -+ -+out: -+ return err; -+} -+ -+/* -+ * ext4_ext_shift_extents: -+ * All the extents which lies in the range from start to the last allocated -+ * block for the file are shifted downwards by shift blocks. -+ * On success, 0 is returned, error otherwise. -+ */ -+static int -+ext4_ext_shift_extents(struct inode *inode, handle_t *handle, -+ ext4_lblk_t start, ext4_lblk_t shift) -+{ -+ struct ext4_ext_path *path; -+ int ret = 0, depth; -+ struct ext4_extent *extent; -+ ext4_lblk_t stop_block, current_block; -+ ext4_lblk_t ex_start, ex_end; -+ -+ /* Let path point to the last extent */ -+ path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); -+ if (IS_ERR(path)) -+ return PTR_ERR(path); -+ -+ depth = path->p_depth; -+ extent = path[depth].p_ext; -+ if (!extent) { -+ ext4_ext_drop_refs(path); -+ kfree(path); -+ return ret; -+ } -+ -+ stop_block = extent->ee_block + ext4_ext_get_actual_len(extent); -+ ext4_ext_drop_refs(path); -+ kfree(path); -+ -+ /* Nothing to shift, if hole is at the end of file */ -+ if (start >= stop_block) -+ return ret; -+ -+ /* -+ * Don't start shifting extents until we make sure the hole is big -+ * enough to accomodate the shift. -+ */ -+ path = ext4_ext_find_extent(inode, start - 1, NULL, 0); -+ depth = path->p_depth; -+ extent = path[depth].p_ext; -+ ex_start = extent->ee_block; -+ ex_end = extent->ee_block + ext4_ext_get_actual_len(extent); -+ ext4_ext_drop_refs(path); -+ kfree(path); -+ -+ if ((start == ex_start && shift > ex_start) || -+ (shift > start - ex_end)) -+ return -EINVAL; -+ -+ /* Its safe to start updating extents */ -+ while (start < stop_block) { -+ path = ext4_ext_find_extent(inode, start, NULL, 0); -+ if (IS_ERR(path)) -+ return PTR_ERR(path); -+ depth = path->p_depth; -+ extent = path[depth].p_ext; -+ current_block = extent->ee_block; -+ if (start > current_block) { -+ /* Hole, move to the next extent */ -+ ret = mext_next_extent(inode, path, &extent); -+ if (ret != 0) { -+ ext4_ext_drop_refs(path); -+ kfree(path); -+ if (ret == 1) -+ ret = 0; -+ break; -+ } -+ } -+ ret = ext4_ext_shift_path_extents(path, shift, inode, -+ handle, &start); -+ ext4_ext_drop_refs(path); -+ kfree(path); -+ if (ret) -+ break; -+ } -+ -+ return ret; -+} -+ -+/* -+ * ext4_collapse_range: -+ * This implements the fallocate's collapse range functionality for ext4 -+ * Returns: 0 and non-zero on error. -+ */ -+int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) -+{ -+ struct super_block *sb = inode->i_sb; -+ ext4_lblk_t punch_start, punch_stop; -+ handle_t *handle; -+ unsigned int credits; -+ loff_t new_size; -+ int ret; -+ -+ BUG_ON(offset + len > i_size_read(inode)); -+ -+ /* Collapse range works only on fs block size aligned offsets. */ -+ if (offset & (EXT4_BLOCK_SIZE(sb) - 1) || -+ len & (EXT4_BLOCK_SIZE(sb) - 1)) -+ return -EINVAL; -+ -+ if (!S_ISREG(inode->i_mode)) -+ return -EOPNOTSUPP; -+ -+ trace_ext4_collapse_range(inode, offset, len); -+ -+ punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); -+ punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); -+ -+ /* Write out all dirty pages */ -+ ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1); -+ if (ret) -+ return ret; -+ -+ /* Take mutex lock */ -+ mutex_lock(&inode->i_mutex); -+ -+ /* It's not possible punch hole on append only file */ -+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { -+ ret = -EPERM; -+ goto out_mutex; -+ } -+ -+ if (IS_SWAPFILE(inode)) { -+ ret = -ETXTBSY; -+ goto out_mutex; -+ } -+ -+ /* Currently just for extent based files */ -+ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { -+ ret = -EOPNOTSUPP; -+ goto out_mutex; -+ } -+ -+ truncate_pagecache_range(inode, offset, -1); -+ -+ /* Wait for existing dio to complete */ -+ ext4_inode_block_unlocked_dio(inode); -+ inode_dio_wait(inode); -+ -+ credits = ext4_writepage_trans_blocks(inode); -+ handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); -+ if (IS_ERR(handle)) { -+ ret = PTR_ERR(handle); -+ goto out_dio; -+ } -+ -+ down_write(&EXT4_I(inode)->i_data_sem); -+ ext4_discard_preallocations(inode); -+ -+ ret = ext4_es_remove_extent(inode, punch_start, -+ EXT_MAX_BLOCKS - punch_start - 1); -+ if (ret) { -+ up_write(&EXT4_I(inode)->i_data_sem); -+ goto out_stop; -+ } -+ -+ ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); -+ if (ret) { -+ up_write(&EXT4_I(inode)->i_data_sem); -+ goto out_stop; -+ } -+ -+ ret = ext4_ext_shift_extents(inode, handle, punch_stop, -+ punch_stop - punch_start); -+ if (ret) { -+ up_write(&EXT4_I(inode)->i_data_sem); -+ goto out_stop; -+ } -+ -+ new_size = i_size_read(inode) - len; -+ truncate_setsize(inode, new_size); -+ EXT4_I(inode)->i_disksize = new_size; -+ -+ ext4_discard_preallocations(inode); -+ up_write(&EXT4_I(inode)->i_data_sem); -+ if (IS_SYNC(inode)) -+ ext4_handle_sync(handle); -+ inode->i_mtime = inode->i_ctime = ext4_current_time(inode); -+ ext4_mark_inode_dirty(handle, inode); -+ -+out_stop: -+ ext4_journal_stop(handle); -+out_dio: -+ ext4_inode_resume_unlocked_dio(inode); -+out_mutex: -+ mutex_unlock(&inode->i_mutex); -+ return ret; -+} -diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c -index f39a88a..58ee7dc 100644 ---- a/fs/ext4/move_extent.c -+++ b/fs/ext4/move_extent.c -@@ -76,7 +76,7 @@ copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest) - * ext4_ext_path structure refers to the last extent, or a negative error - * value on failure. - */ --static int -+int - mext_next_extent(struct inode *inode, struct ext4_ext_path *path, - struct ext4_extent **extent) - { -diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h -index 451e020..e9d7ee7 100644 ---- a/include/trace/events/ext4.h -+++ b/include/trace/events/ext4.h -@@ -16,6 +16,11 @@ struct mpage_da_data; - struct ext4_map_blocks; - struct extent_status; - -+/* shim until we merge in the xfs_collapse_range branch */ -+#ifndef FALLOC_FL_COLLAPSE_RANGE -+#define FALLOC_FL_COLLAPSE_RANGE 0x08 -+#endif -+ - #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) - - #define show_mballoc_flags(flags) __print_flags(flags, "|", \ -@@ -71,7 +76,8 @@ struct extent_status; - #define show_falloc_mode(mode) __print_flags(mode, "|", \ - { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ - { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ -- { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}) -+ { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \ -+ { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}) - - - TRACE_EVENT(ext4_free_inode, -@@ -2415,6 +2421,31 @@ TRACE_EVENT(ext4_es_shrink_exit, - __entry->shrunk_nr, __entry->cache_cnt) - ); - -+TRACE_EVENT(ext4_collapse_range, -+ TP_PROTO(struct inode *inode, loff_t offset, loff_t len), -+ -+ TP_ARGS(inode, offset, len), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev) -+ __field(ino_t, ino) -+ __field(loff_t, offset) -+ __field(loff_t, len) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = inode->i_sb->s_dev; -+ __entry->ino = inode->i_ino; -+ __entry->offset = offset; -+ __entry->len = len; -+ ), -+ -+ TP_printk("dev %d,%d ino %lu offset %lld len %lld", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ (unsigned long) __entry->ino, -+ __entry->offset, __entry->len) -+); -+ - #endif /* _TRACE_EXT4_H */ - - /* This part must be outside protection */ diff --git a/add-zero-range-support b/add-zero-range-support deleted file mode 100644 index aa827d9a..00000000 --- a/add-zero-range-support +++ /dev/null @@ -1,593 +0,0 @@ -ext4: Introduce FALLOC_FL_ZERO_RANGE flag for fallocate - -From: Lukas Czerner - -Introduce new FALLOC_FL_ZERO_RANGE flag for fallocate. This has the same -functionality as xfs ioctl XFS_IOC_ZERO_RANGE. - -It can be used to convert a range of file to zeros preferably without -issuing data IO. Blocks should be preallocated for the regions that span -holes in the file, and the entire range is preferable converted to -unwritten extents - -This can be also used to preallocate blocks past EOF in the same way as -with fallocate. Flag FALLOC_FL_KEEP_SIZE which should cause the inode -size to remain the same. - -Also add appropriate tracepoints. - -Signed-off-by: Lukas Czerner -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/ext4.h | 2 + - fs/ext4/extents.c | 273 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- - fs/ext4/inode.c | 17 +++-- - include/trace/events/ext4.h | 68 +++++++++-------- - 4 files changed, 307 insertions(+), 53 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index beec427..1b3cbf8 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -568,6 +568,8 @@ enum { - #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 - /* Do not put hole in extent cache */ - #define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 -+ /* Convert written extents to unwritten */ -+#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0400 - - /* - * The bit position of these flags must not overlap with any of the -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index 1c09a09..491208c 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -3602,6 +3602,8 @@ out: - * b> Splits in two extents: Write is happening at either end of the extent - * c> Splits in three extents: Somone is writing in middle of the extent - * -+ * This works the same way in the case of initialized -> unwritten conversion. -+ * - * One of more index blocks maybe needed if the extent tree grow after - * the uninitialized extent split. To prevent ENOSPC occur at the IO - * complete, we need to split the uninitialized extent before DIO submit -@@ -3612,7 +3614,7 @@ out: - * - * Returns the size of uninitialized extent to be written on success. - */ --static int ext4_split_unwritten_extents(handle_t *handle, -+static int ext4_split_convert_extents(handle_t *handle, - struct inode *inode, - struct ext4_map_blocks *map, - struct ext4_ext_path *path, -@@ -3624,9 +3626,9 @@ static int ext4_split_unwritten_extents(handle_t *handle, - unsigned int ee_len; - int split_flag = 0, depth; - -- ext_debug("ext4_split_unwritten_extents: inode %lu, logical" -- "block %llu, max_blocks %u\n", inode->i_ino, -- (unsigned long long)map->m_lblk, map->m_len); -+ ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n", -+ __func__, inode->i_ino, -+ (unsigned long long)map->m_lblk, map->m_len); - - eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; -@@ -3641,14 +3643,73 @@ static int ext4_split_unwritten_extents(handle_t *handle, - ee_block = le32_to_cpu(ex->ee_block); - ee_len = ext4_ext_get_actual_len(ex); - -- split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; -- split_flag |= EXT4_EXT_MARK_UNINIT2; -- if (flags & EXT4_GET_BLOCKS_CONVERT) -- split_flag |= EXT4_EXT_DATA_VALID2; -+ /* Convert to unwritten */ -+ if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) { -+ split_flag |= EXT4_EXT_DATA_VALID1; -+ /* Convert to initialized */ -+ } else if (flags & EXT4_GET_BLOCKS_CONVERT) { -+ split_flag |= ee_block + ee_len <= eof_block ? -+ EXT4_EXT_MAY_ZEROOUT : 0; -+ split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2); -+ } - flags |= EXT4_GET_BLOCKS_PRE_IO; - return ext4_split_extent(handle, inode, path, map, split_flag, flags); - } - -+static int ext4_convert_initialized_extents(handle_t *handle, -+ struct inode *inode, -+ struct ext4_map_blocks *map, -+ struct ext4_ext_path *path) -+{ -+ struct ext4_extent *ex; -+ ext4_lblk_t ee_block; -+ unsigned int ee_len; -+ int depth; -+ int err = 0; -+ -+ depth = ext_depth(inode); -+ ex = path[depth].p_ext; -+ ee_block = le32_to_cpu(ex->ee_block); -+ ee_len = ext4_ext_get_actual_len(ex); -+ -+ ext_debug("%s: inode %lu, logical" -+ "block %llu, max_blocks %u\n", __func__, inode->i_ino, -+ (unsigned long long)ee_block, ee_len); -+ -+ if (ee_block != map->m_lblk || ee_len > map->m_len) { -+ err = ext4_split_convert_extents(handle, inode, map, path, -+ EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); -+ if (err < 0) -+ goto out; -+ ext4_ext_drop_refs(path); -+ path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); -+ if (IS_ERR(path)) { -+ err = PTR_ERR(path); -+ goto out; -+ } -+ depth = ext_depth(inode); -+ ex = path[depth].p_ext; -+ } -+ -+ err = ext4_ext_get_access(handle, inode, path + depth); -+ if (err) -+ goto out; -+ /* first mark the extent as uninitialized */ -+ ext4_ext_mark_uninitialized(ex); -+ -+ /* note: ext4_ext_correct_indexes() isn't needed here because -+ * borders are not changed -+ */ -+ ext4_ext_try_to_merge(handle, inode, path, ex); -+ -+ /* Mark modified extent as dirty */ -+ err = ext4_ext_dirty(handle, inode, path + path->p_depth); -+out: -+ ext4_ext_show_leaf(inode, path); -+ return err; -+} -+ -+ - static int ext4_convert_unwritten_extents_endio(handle_t *handle, - struct inode *inode, - struct ext4_map_blocks *map, -@@ -3682,8 +3743,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, - inode->i_ino, (unsigned long long)ee_block, ee_len, - (unsigned long long)map->m_lblk, map->m_len); - #endif -- err = ext4_split_unwritten_extents(handle, inode, map, path, -- EXT4_GET_BLOCKS_CONVERT); -+ err = ext4_split_convert_extents(handle, inode, map, path, -+ EXT4_GET_BLOCKS_CONVERT); - if (err < 0) - goto out; - ext4_ext_drop_refs(path); -@@ -3884,6 +3945,38 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, - } - - static int -+ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, -+ struct ext4_map_blocks *map, -+ struct ext4_ext_path *path, int flags, -+ unsigned int allocated, ext4_fsblk_t newblock) -+{ -+ int ret = 0; -+ int err = 0; -+ -+ /* -+ * Make sure that the extent is no bigger than we support with -+ * uninitialized extent -+ */ -+ if (map->m_len > EXT_UNINIT_MAX_LEN) -+ map->m_len = EXT_UNINIT_MAX_LEN / 2; -+ -+ ret = ext4_convert_initialized_extents(handle, inode, map, -+ path); -+ if (ret >= 0) { -+ ext4_update_inode_fsync_trans(handle, inode, 1); -+ err = check_eofblocks_fl(handle, inode, map->m_lblk, -+ path, map->m_len); -+ } else -+ err = ret; -+ map->m_flags |= EXT4_MAP_UNWRITTEN; -+ if (allocated > map->m_len) -+ allocated = map->m_len; -+ map->m_len = allocated; -+ -+ return err ? err : allocated; -+} -+ -+static int - ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, - struct ext4_map_blocks *map, - struct ext4_ext_path *path, int flags, -@@ -3910,8 +4003,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, - - /* get_block() before submit the IO, split the extent */ - if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { -- ret = ext4_split_unwritten_extents(handle, inode, map, -- path, flags); -+ ret = ext4_split_convert_extents(handle, inode, map, -+ path, flags | EXT4_GET_BLOCKS_CONVERT); - if (ret <= 0) - goto out; - /* -@@ -4199,6 +4292,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t ee_start = ext4_ext_pblock(ex); - unsigned short ee_len; - -+ - /* - * Uninitialized extents are treated as holes, except that - * we split out initialized portions during a write. -@@ -4215,7 +4309,17 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, - ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, - ee_block, ee_len, newblock); - -- if (!ext4_ext_is_uninitialized(ex)) -+ /* -+ * If the extent is initialized check whether the -+ * caller wants to convert it to unwritten. -+ */ -+ if ((!ext4_ext_is_uninitialized(ex)) && -+ (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { -+ allocated = ext4_ext_convert_initialized_extent( -+ handle, inode, map, path, flags, -+ allocated, newblock); -+ goto out2; -+ } else if (!ext4_ext_is_uninitialized(ex)) - goto out; - - ret = ext4_ext_handle_uninitialized_extents( -@@ -4604,6 +4708,144 @@ retry: - return ret > 0 ? ret2 : ret; - } - -+static long ext4_zero_range(struct file *file, loff_t offset, -+ loff_t len, int mode) -+{ -+ struct inode *inode = file_inode(file); -+ handle_t *handle = NULL; -+ unsigned int max_blocks; -+ loff_t new_size = 0; -+ int ret = 0; -+ int flags; -+ int partial; -+ loff_t start, end; -+ ext4_lblk_t lblk; -+ struct address_space *mapping = inode->i_mapping; -+ unsigned int blkbits = inode->i_blkbits; -+ -+ trace_ext4_zero_range(inode, offset, len, mode); -+ -+ /* -+ * Write out all dirty pages to avoid race conditions -+ * Then release them. -+ */ -+ if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { -+ ret = filemap_write_and_wait_range(mapping, offset, -+ offset + len - 1); -+ if (ret) -+ return ret; -+ } -+ -+ /* -+ * Round up offset. This is not fallocate, we neet to zero out -+ * blocks, so convert interior block aligned part of the range to -+ * unwritten and possibly manually zero out unaligned parts of the -+ * range. -+ */ -+ start = round_up(offset, 1 << blkbits); -+ end = round_down((offset + len), 1 << blkbits); -+ -+ if (start < offset || end > offset + len) -+ return -EINVAL; -+ partial = (offset + len) & ((1 << blkbits) - 1); -+ -+ lblk = start >> blkbits; -+ max_blocks = (end >> blkbits); -+ if (max_blocks < lblk) -+ max_blocks = 0; -+ else -+ max_blocks -= lblk; -+ -+ flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | -+ EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; -+ if (mode & FALLOC_FL_KEEP_SIZE) -+ flags |= EXT4_GET_BLOCKS_KEEP_SIZE; -+ -+ mutex_lock(&inode->i_mutex); -+ -+ /* -+ * Indirect files do not support unwritten extnets -+ */ -+ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { -+ ret = -EOPNOTSUPP; -+ goto out_mutex; -+ } -+ -+ if (!(mode & FALLOC_FL_KEEP_SIZE) && -+ offset + len > i_size_read(inode)) { -+ new_size = offset + len; -+ ret = inode_newsize_ok(inode, new_size); -+ if (ret) -+ goto out_mutex; -+ /* -+ * If we have a partial block after EOF we have to allocate -+ * the entire block. -+ */ -+ if (partial) -+ max_blocks += 1; -+ } -+ -+ if (max_blocks > 0) { -+ -+ /* Now release the pages and zero block aligned part of pages*/ -+ truncate_pagecache_range(inode, start, end - 1); -+ -+ /* Wait all existing dio workers, newcomers will block on i_mutex */ -+ ext4_inode_block_unlocked_dio(inode); -+ inode_dio_wait(inode); -+ -+ /* -+ * Remove entire range from the extent status tree. -+ */ -+ ret = ext4_es_remove_extent(inode, lblk, max_blocks); -+ if (ret) -+ goto out_dio; -+ -+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, -+ mode); -+ if (ret) -+ goto out_dio; -+ } -+ -+ handle = ext4_journal_start(inode, EXT4_HT_MISC, 4); -+ if (IS_ERR(handle)) { -+ ret = PTR_ERR(handle); -+ ext4_std_error(inode->i_sb, ret); -+ goto out_dio; -+ } -+ -+ inode->i_mtime = inode->i_ctime = ext4_current_time(inode); -+ -+ if (!ret && new_size) { -+ if (new_size > i_size_read(inode)) -+ i_size_write(inode, new_size); -+ if (new_size > EXT4_I(inode)->i_disksize) -+ ext4_update_i_disksize(inode, new_size); -+ } else if (!ret && !new_size) { -+ /* -+ * Mark that we allocate beyond EOF so the subsequent truncate -+ * can proceed even if the new size is the same as i_size. -+ */ -+ if ((offset + len) > i_size_read(inode)) -+ ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); -+ } -+ -+ ext4_mark_inode_dirty(handle, inode); -+ -+ /* Zero out partial block at the edges of the range */ -+ ret = ext4_zero_partial_blocks(handle, inode, offset, len); -+ -+ if (file->f_flags & O_SYNC) -+ ext4_handle_sync(handle); -+ -+ ext4_journal_stop(handle); -+out_dio: -+ ext4_inode_resume_unlocked_dio(inode); -+out_mutex: -+ mutex_unlock(&inode->i_mutex); -+ return ret; -+} -+ - /* - * preallocate space for a file. This implements ext4's fallocate file - * operation, which gets called from sys_fallocate system call. -@@ -4625,7 +4867,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) - - /* Return error if mode is not supported */ - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | -- FALLOC_FL_COLLAPSE_RANGE)) -+ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) - return -EOPNOTSUPP; - - if (mode & FALLOC_FL_PUNCH_HOLE) -@@ -4645,6 +4887,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) - return -EOPNOTSUPP; - -+ if (mode & FALLOC_FL_ZERO_RANGE) -+ return ext4_zero_range(file, offset, len, mode); -+ - trace_ext4_fallocate_enter(inode, offset, len, mode); - lblk = offset >> blkbits; - /* -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index ab3e835..7cc2455 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -503,6 +503,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, - { - struct extent_status es; - int retval; -+ int ret = 0; - #ifdef ES_AGGRESSIVE_TEST - struct ext4_map_blocks orig_map; - -@@ -558,7 +559,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, - EXT4_GET_BLOCKS_KEEP_SIZE); - } - if (retval > 0) { -- int ret; - unsigned int status; - - if (unlikely(retval != map->m_len)) { -@@ -585,7 +585,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, - - found: - if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { -- int ret = check_block_validity(inode, map); -+ ret = check_block_validity(inode, map); - if (ret != 0) - return ret; - } -@@ -602,7 +602,13 @@ found: - * with buffer head unmapped. - */ - if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) -- return retval; -+ /* -+ * If we need to convert extent to unwritten -+ * we continue and do the actual work in -+ * ext4_ext_map_blocks() -+ */ -+ if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) -+ return retval; - - /* - * Here we clear m_flags because after allocating an new extent, -@@ -658,7 +664,6 @@ found: - ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); - - if (retval > 0) { -- int ret; - unsigned int status; - - if (unlikely(retval != map->m_len)) { -@@ -693,7 +698,7 @@ found: - has_zeroout: - up_write((&EXT4_I(inode)->i_data_sem)); - if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { -- int ret = check_block_validity(inode, map); -+ ret = check_block_validity(inode, map); - if (ret != 0) - return ret; - } -@@ -3507,7 +3512,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) - if (!S_ISREG(inode->i_mode)) - return -EOPNOTSUPP; - -- trace_ext4_punch_hole(inode, offset, length); -+ trace_ext4_punch_hole(inode, offset, length, 0); - - /* - * Write out all dirty pages to avoid race conditions -diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h -index e9d7ee7..010ea89 100644 ---- a/include/trace/events/ext4.h -+++ b/include/trace/events/ext4.h -@@ -21,6 +21,10 @@ struct extent_status; - #define FALLOC_FL_COLLAPSE_RANGE 0x08 - #endif - -+#ifndef FALLOC_FL_ZERO_RANGE -+#define FALLOC_FL_ZERO_RANGE 0x10 -+#endif -+ - #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) - - #define show_mballoc_flags(flags) __print_flags(flags, "|", \ -@@ -77,7 +81,8 @@ struct extent_status; - { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ - { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ - { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \ -- { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}) -+ { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ -+ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) - - - TRACE_EVENT(ext4_free_inode, -@@ -1339,7 +1344,7 @@ TRACE_EVENT(ext4_direct_IO_exit, - __entry->rw, __entry->ret) - ); - --TRACE_EVENT(ext4_fallocate_enter, -+DECLARE_EVENT_CLASS(ext4__fallocate_mode, - TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), - - TP_ARGS(inode, offset, len, mode), -@@ -1347,23 +1352,45 @@ TRACE_EVENT(ext4_fallocate_enter, - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) -- __field( loff_t, pos ) -- __field( loff_t, len ) -+ __field( loff_t, offset ) -+ __field( loff_t, len ) - __field( int, mode ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; -- __entry->pos = offset; -+ __entry->offset = offset; - __entry->len = len; - __entry->mode = mode; - ), - -- TP_printk("dev %d,%d ino %lu pos %lld len %lld mode %s", -+ TP_printk("dev %d,%d ino %lu offset %lld len %lld mode %s", - MAJOR(__entry->dev), MINOR(__entry->dev), -- (unsigned long) __entry->ino, __entry->pos, -- __entry->len, show_falloc_mode(__entry->mode)) -+ (unsigned long) __entry->ino, -+ __entry->offset, __entry->len, -+ show_falloc_mode(__entry->mode)) -+); -+ -+DEFINE_EVENT(ext4__fallocate_mode, ext4_fallocate_enter, -+ -+ TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), -+ -+ TP_ARGS(inode, offset, len, mode) -+); -+ -+DEFINE_EVENT(ext4__fallocate_mode, ext4_punch_hole, -+ -+ TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), -+ -+ TP_ARGS(inode, offset, len, mode) -+); -+ -+DEFINE_EVENT(ext4__fallocate_mode, ext4_zero_range, -+ -+ TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode), -+ -+ TP_ARGS(inode, offset, len, mode) - ); - - TRACE_EVENT(ext4_fallocate_exit, -@@ -1395,31 +1422,6 @@ TRACE_EVENT(ext4_fallocate_exit, - __entry->ret) - ); - --TRACE_EVENT(ext4_punch_hole, -- TP_PROTO(struct inode *inode, loff_t offset, loff_t len), -- -- TP_ARGS(inode, offset, len), -- -- TP_STRUCT__entry( -- __field( dev_t, dev ) -- __field( ino_t, ino ) -- __field( loff_t, offset ) -- __field( loff_t, len ) -- ), -- -- TP_fast_assign( -- __entry->dev = inode->i_sb->s_dev; -- __entry->ino = inode->i_ino; -- __entry->offset = offset; -- __entry->len = len; -- ), -- -- TP_printk("dev %d,%d ino %lu offset %lld len %lld", -- MAJOR(__entry->dev), MINOR(__entry->dev), -- (unsigned long) __entry->ino, -- __entry->offset, __entry->len) --); -- - TRACE_EVENT(ext4_unlink_enter, - TP_PROTO(struct inode *parent, struct dentry *dentry), - diff --git a/address-a-benign-compiler-warning b/address-a-benign-compiler-warning deleted file mode 100644 index ba5aa407..00000000 --- a/address-a-benign-compiler-warning +++ /dev/null @@ -1,35 +0,0 @@ -ext4: address a benign compiler warning - -From: Patrick Palka - -When !defined(CONFIG_EXT4_DEBUG), mb_debug() should be defined as a -no_printk() statement instead of an empty statement in order to suppress -the following compiler warning: - -fs/ext4/mballoc.c: In function ‘ext4_mb_cleanup_pa’: -fs/ext4/mballoc.c:2659:47: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] - mb_debug(1, "mballoc: %u PAs left\n", count); - -Signed-off-by: Patrick Palka -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/mballoc.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h -index 08481ee..9347328 100644 ---- a/fs/ext4/mballoc.h -+++ b/fs/ext4/mballoc.h -@@ -48,7 +48,7 @@ extern ushort ext4_mballoc_debug; - } \ - } while (0) - #else --#define mb_debug(n, fmt, a...) -+#define mb_debug(n, fmt, a...) no_printk(fmt, ## a) - #endif - - #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ --- -1.9.0.rc3 - - diff --git a/atomically-set-inode-flags b/atomically-set-inode-flags deleted file mode 100644 index ae62f7e3..00000000 --- a/atomically-set-inode-flags +++ /dev/null @@ -1,101 +0,0 @@ -ext4: atomically set inode->i_flags in ext4_set_inode_flags() - -Use cmpxchg() to atomically set i_flags instead of clearing out the -S_IMMUTABLE, S_APPEND, etc. flags and then setting them from the -EXT4_IMMUTABLE_FL, EXT4_APPEND_FL flags, since this opens up a race -where an immutable file has the immutable flag cleared for a brief -window of time. - -Reported-by: John Sullivan -Signed-off-by: "Theodore Ts'o" -Cc: stable@kernel.org ---- - fs/ext4/inode.c | 14 ++++++++------ - fs/inode.c | 31 +++++++++++++++++++++++++++++++ - include/linux/fs.h | 3 +++ - 3 files changed, 42 insertions(+), 6 deletions(-) - -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index b5e182a..df067c3 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -3938,18 +3938,20 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) - void ext4_set_inode_flags(struct inode *inode) - { - unsigned int flags = EXT4_I(inode)->i_flags; -+ unsigned int new_fl = 0; - -- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); - if (flags & EXT4_SYNC_FL) -- inode->i_flags |= S_SYNC; -+ new_fl |= S_SYNC; - if (flags & EXT4_APPEND_FL) -- inode->i_flags |= S_APPEND; -+ new_fl |= S_APPEND; - if (flags & EXT4_IMMUTABLE_FL) -- inode->i_flags |= S_IMMUTABLE; -+ new_fl |= S_IMMUTABLE; - if (flags & EXT4_NOATIME_FL) -- inode->i_flags |= S_NOATIME; -+ new_fl |= S_NOATIME; - if (flags & EXT4_DIRSYNC_FL) -- inode->i_flags |= S_DIRSYNC; -+ new_fl |= S_DIRSYNC; -+ inode_set_flags(inode, new_fl, -+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); - } - - /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ -diff --git a/fs/inode.c b/fs/inode.c -index 4bcdad3..26f95ce 100644 ---- a/fs/inode.c -+++ b/fs/inode.c -@@ -1899,3 +1899,34 @@ void inode_dio_done(struct inode *inode) - wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); - } - EXPORT_SYMBOL(inode_dio_done); -+ -+/* -+ * inode_set_flags - atomically set some inode flags -+ * -+ * Note: the caller should be holding i_mutex, or else be sure that -+ * they have exclusive access to the inode structure (i.e., while the -+ * inode is being instantiated). The reason for the cmpxchg() loop -+ * --- which wouldn't be necessary if all code paths which modify -+ * i_flags actually followed this rule, is that there is at least one -+ * code path which doesn't today --- for example, -+ * __generic_file_aio_write() calls file_remove_suid() without holding -+ * i_mutex --- so we use cmpxchg() out of an abundance of caution. -+ * -+ * In the long run, i_mutex is overkill, and we should probably look -+ * at using the i_lock spinlock to protect i_flags, and then make sure -+ * it is so documented in include/linux/fs.h and that all code follows -+ * the locking convention!! -+ */ -+void inode_set_flags(struct inode *inode, unsigned int flags, -+ unsigned int mask) -+{ -+ unsigned int old_flags, new_flags; -+ -+ WARN_ON_ONCE(flags & ~mask); -+ do { -+ old_flags = ACCESS_ONCE(inode->i_flags); -+ new_flags = (old_flags & ~mask) | flags; -+ } while (unlikely(cmpxchg(&inode->i_flags, old_flags, -+ new_flags) != old_flags)); -+} -+EXPORT_SYMBOL(inode_set_flags); -diff --git a/include/linux/fs.h b/include/linux/fs.h -index 6082956..5d1f6fa 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -2556,6 +2556,9 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, - void inode_dio_wait(struct inode *inode); - void inode_dio_done(struct inode *inode); - -+extern void inode_set_flags(struct inode *inode, unsigned int flags, -+ unsigned int mask); -+ - extern const struct file_operations generic_ro_fops; - - #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) diff --git a/avoid-exposure-stale-data-in-ext4_punch_hole b/avoid-exposure-stale-data-in-ext4_punch_hole deleted file mode 100644 index 427e3468..00000000 --- a/avoid-exposure-stale-data-in-ext4_punch_hole +++ /dev/null @@ -1,39 +0,0 @@ -ext4: avoid exposure of stale data in ext4_punch_hole() - -From: Maxim Patlasov - -While handling punch-hole fallocate, it's useless to truncate page cache -before removing the range from extent tree (or block map in indirect case) -because page cache can be re-populated (by read-ahead or read(2) or mmap-ed -read) immediately after truncating page cache, but before updating extent -tree (or block map). In that case the user will see stale data even after -fallocate is completed. - -Until the problem of data corruption resulting from pages backed by -already freed blocks is fully resolved, the simple thing we can do now -is to add another truncation of pagecache after punch hole is done. - -Signed-off-by: Maxim Patlasov -Signed-off-by: "Theodore Ts'o" -Reviewed-by: Jan Kara ---- - fs/ext4/inode.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 113458c..5324a38 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -3614,6 +3614,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) - up_write(&EXT4_I(inode)->i_data_sem); - if (IS_SYNC(inode)) - ext4_handle_sync(handle); -+ -+ /* Now release the pages again to reduce race window */ -+ if (last_block_offset > first_block_offset) -+ truncate_pagecache_range(inode, first_block_offset, -+ last_block_offset); -+ - inode->i_mtime = inode->i_ctime = ext4_current_time(inode); - ext4_mark_inode_dirty(handle, inode); - out_stop: diff --git a/cap-max-length-from-ext b/cap-max-length-from-ext deleted file mode 100644 index b45308d4..00000000 --- a/cap-max-length-from-ext +++ /dev/null @@ -1,56 +0,0 @@ -ext4: avoid possible overflow in ext4_map_blocks() - -The ext4_map_blocks() function returns the number of blocks which -satisfying the caller's request. This number of blocks requested by -the caller is specified by an unsigned integer, but the return value -of ext4_map_blocks() is a signed integer (to accomodate error codes -per the kernel's standard error signalling convention). - -Historically, overflows could never happen since mballoc() will refuse -to allocate more than 2048 blocks at a time (which is something we -should fix), and if the blocks were already allocated, the fact that -there would be some number of intervening metadata blocks pretty much -guaranteed that there could never be a contiguous region of data -blocks that was greater than 2**31 blocks. - -However, this is now possible if there is a file system which is a bit -bigger than 8TB, and is created using the new mke2fs hugeblock -feature, which can create a perfectly contiguous file. In that case, -if a userspace program attempted to call fallocate() on this already -fully allocated file, it's possible that ext4_map_blocks() could -return a number large enough that it would overflow a signed integer, -resulting in a ext4 thinking that the ext4_map_blocks() call had -failed with some strange error code. - -Since ext4_map_blocks() is always free to return a smaller number of -blocks than what was requested by the caller, fix this by capping the -number of blocks that ext4_map_blocks() will ever try to map to 2**31 -- 1. In practice this should never get hit, except by someone -deliberately trying to provke the above-described bug. - -Thanks to the PaX team for asking whethre this could possibly happen -in some off-line discussions about using some static code checking -technology they are developing to find bugs in kernel code. - -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/inode.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 6e39895..113458c 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -514,6 +514,12 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, - "logical block %lu\n", inode->i_ino, flags, map->m_len, - (unsigned long) map->m_lblk); - -+ /* -+ * ext4_map_blocks returns an int, and m_len is an unsigned int -+ */ -+ if (unlikely(map->m_len > INT_MAX)) -+ map->m_len = INT_MAX; -+ - /* Lookup extent status tree firstly */ - if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { - ext4_es_lru_add(inode); diff --git a/change-block-and-index-hash-chain-to-hlist_bl_node b/change-block-and-index-hash-chain-to-hlist_bl_node deleted file mode 100644 index fbe7537d..00000000 --- a/change-block-and-index-hash-chain-to-hlist_bl_node +++ /dev/null @@ -1,308 +0,0 @@ -fs/mbcache.c: change block and index hash chain to hlist_bl_node - -From: T Makphaibulchoke - -This patch changes each mb_cache's both block and index hash chains to -use a hlist_bl_node, which contains a built-in lock. This is the -first step in decoupling of locks serializing accesses to mb_cache -global data and each mb_cache_entry local data. - -Signed-off-by: T. Makphaibulchoke -Signed-off-by: "Theodore Ts'o" ---- - fs/mbcache.c | 117 ++++++++++++++++++++++++++++++++---------------- - include/linux/mbcache.h | 12 ++--- - 2 files changed, 85 insertions(+), 44 deletions(-) - -diff --git a/fs/mbcache.c b/fs/mbcache.c -index e519e45..55db0da 100644 ---- a/fs/mbcache.c -+++ b/fs/mbcache.c -@@ -34,9 +34,9 @@ - #include - #include - #include --#include -+#include - #include -- -+#include - - #ifdef MB_CACHE_DEBUG - # define mb_debug(f...) do { \ -@@ -87,21 +87,38 @@ static LIST_HEAD(mb_cache_lru_list); - static DEFINE_SPINLOCK(mb_cache_spinlock); - - static inline int --__mb_cache_entry_is_hashed(struct mb_cache_entry *ce) -+__mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce) - { -- return !list_empty(&ce->e_block_list); -+ return !hlist_bl_unhashed(&ce->e_block_list); - } - - --static void --__mb_cache_entry_unhash(struct mb_cache_entry *ce) -+static inline void -+__mb_cache_entry_unhash_block(struct mb_cache_entry *ce) - { -- if (__mb_cache_entry_is_hashed(ce)) { -- list_del_init(&ce->e_block_list); -- list_del(&ce->e_index.o_list); -- } -+ if (__mb_cache_entry_is_block_hashed(ce)) -+ hlist_bl_del_init(&ce->e_block_list); -+} -+ -+static inline int -+__mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce) -+{ -+ return !hlist_bl_unhashed(&ce->e_index.o_list); - } - -+static inline void -+__mb_cache_entry_unhash_index(struct mb_cache_entry *ce) -+{ -+ if (__mb_cache_entry_is_index_hashed(ce)) -+ hlist_bl_del_init(&ce->e_index.o_list); -+} -+ -+static inline void -+__mb_cache_entry_unhash(struct mb_cache_entry *ce) -+{ -+ __mb_cache_entry_unhash_index(ce); -+ __mb_cache_entry_unhash_block(ce); -+} - - static void - __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) -@@ -125,7 +142,7 @@ __mb_cache_entry_release_unlock(struct mb_cache_entry *ce) - ce->e_used -= MB_CACHE_WRITER; - ce->e_used--; - if (!(ce->e_used || ce->e_queued)) { -- if (!__mb_cache_entry_is_hashed(ce)) -+ if (!__mb_cache_entry_is_block_hashed(ce)) - goto forget; - mb_assert(list_empty(&ce->e_lru_list)); - list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); -@@ -221,18 +238,18 @@ mb_cache_create(const char *name, int bucket_bits) - cache->c_name = name; - atomic_set(&cache->c_entry_count, 0); - cache->c_bucket_bits = bucket_bits; -- cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), -- GFP_KERNEL); -+ cache->c_block_hash = kmalloc(bucket_count * -+ sizeof(struct hlist_bl_head), GFP_KERNEL); - if (!cache->c_block_hash) - goto fail; - for (n=0; nc_block_hash[n]); -- cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head), -- GFP_KERNEL); -+ INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]); -+ cache->c_index_hash = kmalloc(bucket_count * -+ sizeof(struct hlist_bl_head), GFP_KERNEL); - if (!cache->c_index_hash) - goto fail; - for (n=0; nc_index_hash[n]); -+ INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]); - cache->c_entry_cache = kmem_cache_create(name, - sizeof(struct mb_cache_entry), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); -@@ -364,10 +381,13 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) - return NULL; - atomic_inc(&cache->c_entry_count); - INIT_LIST_HEAD(&ce->e_lru_list); -- INIT_LIST_HEAD(&ce->e_block_list); -+ INIT_HLIST_BL_NODE(&ce->e_block_list); -+ INIT_HLIST_BL_NODE(&ce->e_index.o_list); - ce->e_cache = cache; - ce->e_queued = 0; - } -+ ce->e_block_hash_p = &cache->c_block_hash[0]; -+ ce->e_index_hash_p = &cache->c_index_hash[0]; - ce->e_used = 1 + MB_CACHE_WRITER; - return ce; - } -@@ -393,25 +413,32 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, - { - struct mb_cache *cache = ce->e_cache; - unsigned int bucket; -- struct list_head *l; -+ struct hlist_bl_node *l; - int error = -EBUSY; -+ struct hlist_bl_head *block_hash_p; -+ struct hlist_bl_head *index_hash_p; -+ struct mb_cache_entry *lce; - -+ mb_assert(ce); - bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), - cache->c_bucket_bits); -+ block_hash_p = &cache->c_block_hash[bucket]; - spin_lock(&mb_cache_spinlock); -- list_for_each_prev(l, &cache->c_block_hash[bucket]) { -- struct mb_cache_entry *ce = -- list_entry(l, struct mb_cache_entry, e_block_list); -- if (ce->e_bdev == bdev && ce->e_block == block) -+ hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) { -+ if (lce->e_bdev == bdev && lce->e_block == block) - goto out; - } -+ mb_assert(!__mb_cache_entry_is_block_hashed(ce)); - __mb_cache_entry_unhash(ce); - ce->e_bdev = bdev; - ce->e_block = block; -- list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); -+ ce->e_block_hash_p = block_hash_p; - ce->e_index.o_key = key; - bucket = hash_long(key, cache->c_bucket_bits); -- list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]); -+ index_hash_p = &cache->c_index_hash[bucket]; -+ ce->e_index_hash_p = index_hash_p; -+ hlist_bl_add_head(&ce->e_index.o_list, index_hash_p); -+ hlist_bl_add_head(&ce->e_block_list, block_hash_p); - error = 0; - out: - spin_unlock(&mb_cache_spinlock); -@@ -463,14 +490,16 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, - sector_t block) - { - unsigned int bucket; -- struct list_head *l; -+ struct hlist_bl_node *l; - struct mb_cache_entry *ce; -+ struct hlist_bl_head *block_hash_p; - - bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), - cache->c_bucket_bits); -+ block_hash_p = &cache->c_block_hash[bucket]; - spin_lock(&mb_cache_spinlock); -- list_for_each(l, &cache->c_block_hash[bucket]) { -- ce = list_entry(l, struct mb_cache_entry, e_block_list); -+ hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { -+ mb_assert(ce->e_block_hash_p == block_hash_p); - if (ce->e_bdev == bdev && ce->e_block == block) { - DEFINE_WAIT(wait); - -@@ -489,7 +518,7 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, - finish_wait(&mb_cache_queue, &wait); - ce->e_used += 1 + MB_CACHE_WRITER; - -- if (!__mb_cache_entry_is_hashed(ce)) { -+ if (!__mb_cache_entry_is_block_hashed(ce)) { - __mb_cache_entry_release_unlock(ce); - return NULL; - } -@@ -506,12 +535,14 @@ cleanup: - #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) - - static struct mb_cache_entry * --__mb_cache_entry_find(struct list_head *l, struct list_head *head, -+__mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head, - struct block_device *bdev, unsigned int key) - { -- while (l != head) { -+ while (l != NULL) { - struct mb_cache_entry *ce = -- list_entry(l, struct mb_cache_entry, e_index.o_list); -+ hlist_bl_entry(l, struct mb_cache_entry, -+ e_index.o_list); -+ mb_assert(ce->e_index_hash_p == head); - if (ce->e_bdev == bdev && ce->e_index.o_key == key) { - DEFINE_WAIT(wait); - -@@ -532,7 +563,7 @@ __mb_cache_entry_find(struct list_head *l, struct list_head *head, - } - finish_wait(&mb_cache_queue, &wait); - -- if (!__mb_cache_entry_is_hashed(ce)) { -+ if (!__mb_cache_entry_is_block_hashed(ce)) { - __mb_cache_entry_release_unlock(ce); - spin_lock(&mb_cache_spinlock); - return ERR_PTR(-EAGAIN); -@@ -562,12 +593,16 @@ mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev, - unsigned int key) - { - unsigned int bucket = hash_long(key, cache->c_bucket_bits); -- struct list_head *l; -- struct mb_cache_entry *ce; -+ struct hlist_bl_node *l; -+ struct mb_cache_entry *ce = NULL; -+ struct hlist_bl_head *index_hash_p; - -+ index_hash_p = &cache->c_index_hash[bucket]; - spin_lock(&mb_cache_spinlock); -- l = cache->c_index_hash[bucket].next; -- ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); -+ if (!hlist_bl_empty(index_hash_p)) { -+ l = hlist_bl_first(index_hash_p); -+ ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); -+ } - spin_unlock(&mb_cache_spinlock); - return ce; - } -@@ -597,12 +632,16 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, - { - struct mb_cache *cache = prev->e_cache; - unsigned int bucket = hash_long(key, cache->c_bucket_bits); -- struct list_head *l; -+ struct hlist_bl_node *l; - struct mb_cache_entry *ce; -+ struct hlist_bl_head *index_hash_p; - -+ index_hash_p = &cache->c_index_hash[bucket]; -+ mb_assert(prev->e_index_hash_p == index_hash_p); - spin_lock(&mb_cache_spinlock); -+ mb_assert(!hlist_bl_empty(index_hash_p)); - l = prev->e_index.o_list.next; -- ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); -+ ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); - __mb_cache_entry_release_unlock(prev); - return ce; - } -diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h -index 5525d37..6a392e7 100644 ---- a/include/linux/mbcache.h -+++ b/include/linux/mbcache.h -@@ -3,19 +3,21 @@ - - (C) 2001 by Andreas Gruenbacher, - */ -- - struct mb_cache_entry { - struct list_head e_lru_list; - struct mb_cache *e_cache; - unsigned short e_used; - unsigned short e_queued; -+ atomic_t e_refcnt; - struct block_device *e_bdev; - sector_t e_block; -- struct list_head e_block_list; -+ struct hlist_bl_node e_block_list; - struct { -- struct list_head o_list; -+ struct hlist_bl_node o_list; - unsigned int o_key; - } e_index; -+ struct hlist_bl_head *e_block_hash_p; -+ struct hlist_bl_head *e_index_hash_p; - }; - - struct mb_cache { -@@ -25,8 +27,8 @@ struct mb_cache { - int c_max_entries; - int c_bucket_bits; - struct kmem_cache *c_entry_cache; -- struct list_head *c_block_hash; -- struct list_head *c_index_hash; -+ struct hlist_bl_head *c_block_hash; -+ struct hlist_bl_head *c_index_hash; - }; - - /* Functions on caches */ --- -1.7.11.3 - - diff --git a/cleanup-error-handling-in-swap_inode_boot_loader b/cleanup-error-handling-in-swap_inode_boot_loader deleted file mode 100644 index b78915fa..00000000 --- a/cleanup-error-handling-in-swap_inode_boot_loader +++ /dev/null @@ -1,62 +0,0 @@ -ext4: clean up error handling in swap_inode_boot_loader() - -Tighten up the code to make the code easier to read and maintain. - -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/ioctl.c | 24 ++++++------------------ - 1 file changed, 6 insertions(+), 18 deletions(-) - -diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c -index a2a837f..0f2252e 100644 ---- a/fs/ext4/ioctl.c -+++ b/fs/ext4/ioctl.c -@@ -104,21 +104,15 @@ static long swap_inode_boot_loader(struct super_block *sb, - struct ext4_inode_info *ei_bl; - struct ext4_sb_info *sbi = EXT4_SB(sb); - -- if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) { -- err = -EINVAL; -- goto swap_boot_out; -- } -+ if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) -+ return -EINVAL; - -- if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) { -- err = -EPERM; -- goto swap_boot_out; -- } -+ if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) -+ return -EPERM; - - inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO); -- if (IS_ERR(inode_bl)) { -- err = PTR_ERR(inode_bl); -- goto swap_boot_out; -- } -+ if (IS_ERR(inode_bl)) -+ return PTR_ERR(inode_bl); - ei_bl = EXT4_I(inode_bl); - - filemap_flush(inode->i_mapping); -@@ -193,20 +187,14 @@ static long swap_inode_boot_loader(struct super_block *sb, - ext4_mark_inode_dirty(handle, inode); - } - } -- - ext4_journal_stop(handle); -- - ext4_double_up_write_data_sem(inode, inode_bl); - - journal_err_out: - ext4_inode_resume_unlocked_dio(inode); - ext4_inode_resume_unlocked_dio(inode_bl); -- - unlock_two_nondirectories(inode, inode_bl); -- - iput(inode_bl); -- --swap_boot_out: - return err; - } - diff --git a/delete-path-delalloc-code-in-ext4_ext_handle_uninitialized_extents b/delete-path-delalloc-code-in-ext4_ext_handle_uninitialized_extents deleted file mode 100644 index a841540c..00000000 --- a/delete-path-delalloc-code-in-ext4_ext_handle_uninitialized_extents +++ /dev/null @@ -1,52 +0,0 @@ -ext4: delete path dealloc code in ext4_ext_handle_uninitialized_extents - -From: Eric Whitney - -Code deallocating the extent path referenced by an argument to -ext4_ext_handle_uninitialized_extents was made redundant with identical -code in its one caller, ext4_ext_map_blocks, by commit 3779473246. -Allocating and deallocating the path in the same function also makes -the code clearer. - -Signed-off-by: Eric Whitney -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/extents.c | 7 +------ - 1 file changed, 1 insertion(+), 6 deletions(-) - -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index ef4b535..cca976b 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -4006,10 +4006,6 @@ out1: - map->m_pblk = newblock; - map->m_len = allocated; - out2: -- if (path) { -- ext4_ext_drop_refs(path); -- kfree(path); -- } - return err ? err : allocated; - } - -@@ -4209,7 +4205,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, - err = ret; - else - allocated = ret; -- goto out3; -+ goto out2; - } - } - -@@ -4490,7 +4486,6 @@ out2: - kfree(path); - } - --out3: - trace_ext4_ext_map_blocks_exit(inode, flags, map, - err ? err : allocated); - ext4_es_lru_add(inode); --- -1.8.3.2 - - diff --git a/dont-hold-j_state_lock-during-wake_up b/dont-hold-j_state_lock-during-wake_up deleted file mode 100644 index edec3b65..00000000 --- a/dont-hold-j_state_lock-during-wake_up +++ /dev/null @@ -1,34 +0,0 @@ -jbd2: don't hold j_state_lock while calling wake_up() - -The j_state_lock is one of the hottest locks in the jbd2 layer and -thus one of its scalability bottlenecks. - -We don't need to be holding the j_state_lock while we are calling -wake_up(&journal->j_wait_commit), so release the lock a little bit -earlier. - -Signed-off-by: "Theodore Ts'o" -diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c -index 244b6f6..67b8e30 100644 ---- a/fs/jbd2/journal.c -+++ b/fs/jbd2/journal.c -@@ -302,8 +302,8 @@ static void journal_kill_thread(journal_t *journal) - journal->j_flags |= JBD2_UNMOUNT; - - while (journal->j_task) { -- wake_up(&journal->j_wait_commit); - write_unlock(&journal->j_state_lock); -+ wake_up(&journal->j_wait_commit); - wait_event(journal->j_wait_done_commit, journal->j_task == NULL); - write_lock(&journal->j_state_lock); - } -@@ -710,8 +710,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) - while (tid_gt(tid, journal->j_commit_sequence)) { - jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", - tid, journal->j_commit_sequence); -- wake_up(&journal->j_wait_commit); - read_unlock(&journal->j_state_lock); -+ wake_up(&journal->j_wait_commit); - wait_event(journal->j_wait_done_commit, - !tid_gt(tid, journal->j_commit_sequence)); - read_lock(&journal->j_state_lock); diff --git a/dont-leave-crtime-uninitialized b/dont-leave-crtime-uninitialized deleted file mode 100644 index 798ccd63..00000000 --- a/dont-leave-crtime-uninitialized +++ /dev/null @@ -1,27 +0,0 @@ -ext4: don't leave i_crtime.tv_sec uninitialized - -If the i_crtime field is not present in the inode, don't leave the -field uninitialized. - -Fixes: ef7f38359 ("ext4: Add nanosecond timestamps") -Reported-by: Vegard Nossum -Tested-by: Vegard Nossum -Signed-off-by: "Theodore Ts'o" -Cc: stable@vger.kernel.org ---- - fs/ext4/ext4.h | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index ece5556..d3a534f 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -771,6 +771,8 @@ do { \ - if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ - (einode)->xtime.tv_sec = \ - (signed)le32_to_cpu((raw_inode)->xtime); \ -+ else \ -+ (einode)->xtime.tv_sec = 0; \ - if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ - ext4_decode_extra_time(&(einode)->xtime, \ - raw_inode->xtime ## _extra); \ diff --git a/dont-try-to-set-hash-signedness-flag-if-fs-is-ro b/dont-try-to-set-hash-signedness-flag-if-fs-is-ro deleted file mode 100644 index 05f89dff..00000000 --- a/dont-try-to-set-hash-signedness-flag-if-fs-is-ro +++ /dev/null @@ -1,49 +0,0 @@ -ext4: don't try to modify s_flags if the the file system is read-only - -If an ext4 file system is created by some tool other than mke2fs -(perhaps by someone who has a pathalogical fear of the GPL) that -doesn't set one or the other of the EXT2_FLAGS_{UN}SIGNED_HASH flags, -and that file system is then mounted read-only, don't try to modify -the s_flags field. Otherwise, if dm_verity is in use, the superblock -will change, causing an dm_verity failure. - -Signed-off-by: "Theodore Ts'o" -Cc: stable@vger.kernel.org ---- - fs/ext4/super.c | 20 +++++++++++++------- - 1 file changed, 13 insertions(+), 7 deletions(-) - -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index 0491c81..f5c13b8 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -3695,16 +3695,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) - for (i = 0; i < 4; i++) - sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); - sbi->s_def_hash_version = es->s_def_hash_version; -- i = le32_to_cpu(es->s_flags); -- if (i & EXT2_FLAGS_UNSIGNED_HASH) -- sbi->s_hash_unsigned = 3; -- else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { -+ if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { -+ i = le32_to_cpu(es->s_flags); -+ if (i & EXT2_FLAGS_UNSIGNED_HASH) -+ sbi->s_hash_unsigned = 3; -+ else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { - #ifdef __CHAR_UNSIGNED__ -- es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); -- sbi->s_hash_unsigned = 3; -+ if (!(sb->s_flags & MS_RDONLY)) -+ es->s_flags |= -+ cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); -+ sbi->s_hash_unsigned = 3; - #else -- es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); -+ if (!(sb->s_flags & MS_RDONLY)) -+ es->s_flags |= -+ cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); - #endif -+ } - } - - /* Handle clustersize */ diff --git a/each-fs-uses-its-own-mbcache b/each-fs-uses-its-own-mbcache deleted file mode 100644 index 85367e15..00000000 --- a/each-fs-uses-its-own-mbcache +++ /dev/null @@ -1,348 +0,0 @@ -ext4: each filesystem creates and uses its own mb_cache - -From: T Makphaibulchoke - -This patch adds new interfaces to create and destory cache, -ext4_xattr_create_cache() and ext4_xattr_destroy_cache(), and remove -the cache creation and destory calls from ex4_init_xattr() and -ext4_exitxattr() in fs/ext4/xattr.c. - -fs/ext4/super.c has been changed so that when a filesystem is mounted -a cache is allocated and attched to its ext4_sb_info structure. - -fs/mbcache.c has been changed so that only one slab allocator is -allocated and used by all mbcache structures. - -Signed-off-by: T. Makphaibulchoke ---- - fs/ext4/ext4.h | 1 + - fs/ext4/super.c | 25 +++++++++++++++++-------- - fs/ext4/xattr.c | 51 ++++++++++++++++++++++++++++----------------------- - fs/ext4/xattr.h | 6 +++--- - fs/mbcache.c | 18 +++++++++++++----- - 5 files changed, 62 insertions(+), 39 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index 1b3cbf8..f4f889e 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -1329,6 +1329,7 @@ struct ext4_sb_info { - struct list_head s_es_lru; - unsigned long s_es_last_sorted; - struct percpu_counter s_extent_cache_cnt; -+ struct mb_cache *s_mb_cache; - spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; - - /* Ratelimit ext4 messages. */ -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index 89baee4..5a51af7 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -59,6 +59,7 @@ static struct kset *ext4_kset; - static struct ext4_lazy_init *ext4_li_info; - static struct mutex ext4_li_mtx; - static struct ext4_features *ext4_feat; -+static int ext4_mballoc_ready; - - static int ext4_load_journal(struct super_block *, struct ext4_super_block *, - unsigned long journal_devnum); -@@ -845,6 +846,10 @@ static void ext4_put_super(struct super_block *sb) - invalidate_bdev(sbi->journal_bdev); - ext4_blkdev_remove(sbi); - } -+ if (sbi->s_mb_cache) { -+ ext4_xattr_destroy_cache(sbi->s_mb_cache); -+ sbi->s_mb_cache = NULL; -+ } - if (sbi->s_mmp_tsk) - kthread_stop(sbi->s_mmp_tsk); - sb->s_fs_info = NULL; -@@ -4010,6 +4015,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) - percpu_counter_set(&sbi->s_dirtyclusters_counter, 0); - - no_journal: -+ if (ext4_mballoc_ready) { -+ sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); -+ if (!sbi->s_mb_cache) { -+ ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); -+ goto failed_mount_wq; -+ } -+ } -+ - /* - * Get the # of file system overhead blocks from the - * superblock if present. -@@ -5519,11 +5532,9 @@ static int __init ext4_init_fs(void) - - err = ext4_init_mballoc(); - if (err) -- goto out3; -- -- err = ext4_init_xattr(); -- if (err) - goto out2; -+ else -+ ext4_mballoc_ready = 1; - err = init_inodecache(); - if (err) - goto out1; -@@ -5539,10 +5550,9 @@ out: - unregister_as_ext3(); - destroy_inodecache(); - out1: -- ext4_exit_xattr(); --out2: -+ ext4_mballoc_ready = 0; - ext4_exit_mballoc(); --out3: -+out2: - ext4_exit_feat_adverts(); - out4: - if (ext4_proc_root) -@@ -5565,7 +5575,6 @@ static void __exit ext4_exit_fs(void) - unregister_as_ext3(); - unregister_filesystem(&ext4_fs_type); - destroy_inodecache(); -- ext4_exit_xattr(); - ext4_exit_mballoc(); - ext4_exit_feat_adverts(); - remove_proc_entry("fs/ext4", NULL); -diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c -index 185066f..1f5cf58 100644 ---- a/fs/ext4/xattr.c -+++ b/fs/ext4/xattr.c -@@ -81,7 +81,7 @@ - # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) - #endif - --static void ext4_xattr_cache_insert(struct buffer_head *); -+static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *); - static struct buffer_head *ext4_xattr_cache_find(struct inode *, - struct ext4_xattr_header *, - struct mb_cache_entry **); -@@ -90,8 +90,6 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *, - static int ext4_xattr_list(struct dentry *dentry, char *buffer, - size_t buffer_size); - --static struct mb_cache *ext4_xattr_cache; -- - static const struct xattr_handler *ext4_xattr_handler_map[] = { - [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, - #ifdef CONFIG_EXT4_FS_POSIX_ACL -@@ -117,6 +115,9 @@ const struct xattr_handler *ext4_xattr_handlers[] = { - NULL - }; - -+#define EXT4_GET_MB_CACHE(inode) (((struct ext4_sb_info *) \ -+ inode->i_sb->s_fs_info)->s_mb_cache) -+ - static __le32 ext4_xattr_block_csum(struct inode *inode, - sector_t block_nr, - struct ext4_xattr_header *hdr) -@@ -265,6 +266,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, - struct ext4_xattr_entry *entry; - size_t size; - int error; -+ struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); - - ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", - name_index, name, buffer, (long)buffer_size); -@@ -286,7 +288,7 @@ bad_block: - error = -EIO; - goto cleanup; - } -- ext4_xattr_cache_insert(bh); -+ ext4_xattr_cache_insert(ext4_mb_cache, bh); - entry = BFIRST(bh); - error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); - if (error == -EIO) -@@ -409,6 +411,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) - struct inode *inode = dentry->d_inode; - struct buffer_head *bh = NULL; - int error; -+ struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); - - ea_idebug(inode, "buffer=%p, buffer_size=%ld", - buffer, (long)buffer_size); -@@ -430,7 +433,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) - error = -EIO; - goto cleanup; - } -- ext4_xattr_cache_insert(bh); -+ ext4_xattr_cache_insert(ext4_mb_cache, bh); - error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); - - cleanup: -@@ -526,8 +529,9 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, - { - struct mb_cache_entry *ce = NULL; - int error = 0; -+ struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); - -- ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr); -+ ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr); - error = ext4_journal_get_write_access(handle, bh); - if (error) - goto out; -@@ -746,13 +750,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, - struct ext4_xattr_search *s = &bs->s; - struct mb_cache_entry *ce = NULL; - int error = 0; -+ struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); - - #define header(x) ((struct ext4_xattr_header *)(x)) - - if (i->value && i->value_len > sb->s_blocksize) - return -ENOSPC; - if (s->base) { -- ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev, -+ ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev, - bs->bh->b_blocknr); - error = ext4_journal_get_write_access(handle, bs->bh); - if (error) -@@ -770,7 +775,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, - if (!IS_LAST_ENTRY(s->first)) - ext4_xattr_rehash(header(s->base), - s->here); -- ext4_xattr_cache_insert(bs->bh); -+ ext4_xattr_cache_insert(ext4_mb_cache, -+ bs->bh); - } - unlock_buffer(bs->bh); - if (error == -EIO) -@@ -906,7 +912,7 @@ getblk_failed: - memcpy(new_bh->b_data, s->base, new_bh->b_size); - set_buffer_uptodate(new_bh); - unlock_buffer(new_bh); -- ext4_xattr_cache_insert(new_bh); -+ ext4_xattr_cache_insert(ext4_mb_cache, new_bh); - error = ext4_handle_dirty_xattr_block(handle, - inode, new_bh); - if (error) -@@ -1495,13 +1501,13 @@ ext4_xattr_put_super(struct super_block *sb) - * Returns 0, or a negative error number on failure. - */ - static void --ext4_xattr_cache_insert(struct buffer_head *bh) -+ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh) - { - __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); - struct mb_cache_entry *ce; - int error; - -- ce = mb_cache_entry_alloc(ext4_xattr_cache, GFP_NOFS); -+ ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS); - if (!ce) { - ea_bdebug(bh, "out of memory"); - return; -@@ -1573,12 +1579,13 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, - { - __u32 hash = le32_to_cpu(header->h_hash); - struct mb_cache_entry *ce; -+ struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); - - if (!header->h_hash) - return NULL; /* never share */ - ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); - again: -- ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev, -+ ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev, - hash); - while (ce) { - struct buffer_head *bh; -@@ -1676,19 +1683,17 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, - - #undef BLOCK_HASH_SHIFT - --int __init --ext4_init_xattr(void) -+#define HASH_BUCKET_BITS 10 -+ -+struct mb_cache * -+ext4_xattr_create_cache(char *name) - { -- ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); -- if (!ext4_xattr_cache) -- return -ENOMEM; -- return 0; -+ return mb_cache_create(name, HASH_BUCKET_BITS); - } - --void --ext4_exit_xattr(void) -+void ext4_xattr_destroy_cache(struct mb_cache *cache) - { -- if (ext4_xattr_cache) -- mb_cache_destroy(ext4_xattr_cache); -- ext4_xattr_cache = NULL; -+ if (cache) -+ mb_cache_destroy(cache); - } -+ -diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h -index 819d639..29bedf5 100644 ---- a/fs/ext4/xattr.h -+++ b/fs/ext4/xattr.h -@@ -110,9 +110,6 @@ extern void ext4_xattr_put_super(struct super_block *); - extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, - struct ext4_inode *raw_inode, handle_t *handle); - --extern int __init ext4_init_xattr(void); --extern void ext4_exit_xattr(void); -- - extern const struct xattr_handler *ext4_xattr_handlers[]; - - extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, -@@ -124,6 +121,9 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, - struct ext4_xattr_info *i, - struct ext4_xattr_ibody_find *is); - -+extern struct mb_cache *ext4_xattr_create_cache(char *name); -+extern void ext4_xattr_destroy_cache(struct mb_cache *); -+ - #ifdef CONFIG_EXT4_FS_SECURITY - extern int ext4_init_security(handle_t *handle, struct inode *inode, - struct inode *dir, const struct qstr *qstr); -diff --git a/fs/mbcache.c b/fs/mbcache.c -index 786ecab..bf166e3 100644 ---- a/fs/mbcache.c -+++ b/fs/mbcache.c -@@ -99,6 +99,7 @@ - - static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); - static struct blockgroup_lock *mb_cache_bg_lock; -+static struct kmem_cache *mb_cache_kmem_cache; - - MODULE_AUTHOR("Andreas Gruenbacher "); - MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); -@@ -351,11 +352,14 @@ mb_cache_create(const char *name, int bucket_bits) - goto fail; - for (n=0; nc_index_hash[n]); -- cache->c_entry_cache = kmem_cache_create(name, -- sizeof(struct mb_cache_entry), 0, -- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); -- if (!cache->c_entry_cache) -- goto fail2; -+ if (!mb_cache_kmem_cache) { -+ mb_cache_kmem_cache = kmem_cache_create(name, -+ sizeof(struct mb_cache_entry), 0, -+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); -+ if (!mb_cache_kmem_cache) -+ goto fail2; -+ } -+ cache->c_entry_cache = mb_cache_kmem_cache; - - /* - * Set an upper limit on the number of cache entries so that the hash -@@ -476,6 +480,10 @@ mb_cache_destroy(struct mb_cache *cache) - atomic_read(&cache->c_entry_count)); - } - -+ if (list_empty(&mb_cache_list)) { -+ kmem_cache_destroy(mb_cache_kmem_cache); -+ mb_cache_kmem_cache = NULL; -+ } - kfree(cache->c_index_hash); - kfree(cache->c_block_hash); - kfree(cache); diff --git a/ext4-dont-calculate-total-xattr-size-if-unneeded b/ext4-dont-calculate-total-xattr-size-if-unneeded deleted file mode 100644 index d69ef978..00000000 --- a/ext4-dont-calculate-total-xattr-size-if-unneeded +++ /dev/null @@ -1,48 +0,0 @@ -ext4: don't calculate total xattr header size unless needed - -The function ext4_expand_extra_isize_ea() doesn't need the size of all -of the extended attribute headers. So if we don't calculate it when -it is unneeded, it we can skip some undeeded memory references, and as -a bonus, we eliminate some kvetching by static code analysis tools. - -Addresses-Coverity-Id: #741291 - -Signed-off-by: "Theodore Ts'o" -diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c -index e175e94..185066f 100644 ---- a/fs/ext4/xattr.c -+++ b/fs/ext4/xattr.c -@@ -567,12 +567,13 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last, - size_t *min_offs, void *base, int *total) - { - for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { -- *total += EXT4_XATTR_LEN(last->e_name_len); - if (!last->e_value_block && last->e_value_size) { - size_t offs = le16_to_cpu(last->e_value_offs); - if (offs < *min_offs) - *min_offs = offs; - } -+ if (total) -+ *total += EXT4_XATTR_LEN(last->e_name_len); - } - return (*min_offs - ((void *)last - base) - sizeof(__u32)); - } -@@ -1228,7 +1229,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, - struct ext4_xattr_block_find *bs = NULL; - char *buffer = NULL, *b_entry_name = NULL; - size_t min_offs, free; -- int total_ino, total_blk; -+ int total_ino; - void *base, *start, *end; - int extra_isize = 0, error = 0, tried_min_extra_isize = 0; - int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); -@@ -1286,8 +1287,7 @@ retry: - first = BFIRST(bh); - end = bh->b_data + bh->b_size; - min_offs = end - base; -- free = ext4_xattr_free_space(first, &min_offs, base, -- &total_blk); -+ free = ext4_xattr_free_space(first, &min_offs, base, NULL); - if (free < new_extra_isize) { - if (!tried_min_extra_isize && s_min_extra_isize) { - tried_min_extra_isize++; diff --git a/fix-comment-typo b/fix-comment-typo deleted file mode 100644 index 1539acd3..00000000 --- a/fix-comment-typo +++ /dev/null @@ -1,32 +0,0 @@ -ext4: fix comment typo - -From: Matthew Wilcox - -Signed-off-by: Matthew Wilcox -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/inode.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 9462730..14a9744 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -3691,7 +3691,7 @@ void ext4_truncate(struct inode *inode) - - /* - * There is a possibility that we're either freeing the inode -- * or it completely new indode. In those cases we might not -+ * or it's a completely new inode. In those cases we might not - * have i_mutex locked because it's not necessary. - */ - if (!(inode->i_state & (I_NEW|I_FREEING))) --- -1.9.0 - --- -To unsubscribe, send a message with 'unsubscribe linux-mm' in -the body to majordomo@kvack.org. For more info on Linux MM, -see: http://www.linux-mm.org/ . -Don't email: email@kvack.org - diff --git a/fix-error-return-from-ext4_ext_handle_uninitialized_extents b/fix-error-return-from-ext4_ext_handle_uninitialized_extents deleted file mode 100644 index 27e6af34..00000000 --- a/fix-error-return-from-ext4_ext_handle_uninitialized_extents +++ /dev/null @@ -1,49 +0,0 @@ -ext4: fix error return from ext4_ext_handle_uninitialized_extents() - -From: Eric Whitney - -Commit 3779473246 breaks the return of error codes from -ext4_ext_handle_uninitialized_extents() in ext4_ext_map_blocks(). A -portion of the patch assigns that function's signed integer return -value to an unsigned int. Consequently, negatively valued error codes -are lost and can be treated as a bogus allocated block count. - -Signed-off-by: Eric Whitney -Signed-off-by: "Theodore Ts'o" -Cc: stable@vger.kernel.org ---- - fs/ext4/extents.c | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index 74bc2d5..9875fd0 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -4128,7 +4128,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, - struct ext4_extent newex, *ex, *ex2; - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - ext4_fsblk_t newblock = 0; -- int free_on_err = 0, err = 0, depth; -+ int free_on_err = 0, err = 0, depth, ret; - unsigned int allocated = 0, offset = 0; - unsigned int allocated_clusters = 0; - struct ext4_allocation_request ar; -@@ -4189,9 +4189,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, - if (!ext4_ext_is_uninitialized(ex)) - goto out; - -- allocated = ext4_ext_handle_uninitialized_extents( -+ ret = ext4_ext_handle_uninitialized_extents( - handle, inode, map, path, flags, - allocated, newblock); -+ if (ret < 0) -+ err = ret; -+ else -+ allocated = ret; - goto out3; - } - } --- -1.8.3.2 - - diff --git a/fix-partial-cluster-handling-for-bigalloc-filesystems b/fix-partial-cluster-handling-for-bigalloc-filesystems deleted file mode 100644 index 4ac3cc4a..00000000 --- a/fix-partial-cluster-handling-for-bigalloc-filesystems +++ /dev/null @@ -1,60 +0,0 @@ -ext4: fix partial cluster handling for bigalloc file systems - -From: Eric Whitney - -Commit 9cb00419fa, which enables hole punching for bigalloc file -systems, exposed a bug introduced by commit 6ae06ff51e in an earlier -release. When run on a bigalloc file system, xfstests generic/013, 068, -075, 083, 091, 100, 112, 127, 263, 269, and 270 fail with e2fsck errors -or cause kernel error messages indicating that previously freed blocks -are being freed again. - -The latter commit optimizes the selection of the starting extent in -ext4_ext_rm_leaf() when hole punching by beginning with the extent -supplied in the path argument rather than with the last extent in the -leaf node (as is still done when truncating). However, the code in -rm_leaf that initially sets partial_cluster to track cluster sharing on -extent boundaries is only guaranteed to run if rm_leaf starts with the -last node in the leaf. Consequently, partial_cluster is not correctly -initialized when hole punching, and a cluster on the boundary of a -punched region that should be retained may instead be deallocated. - -Signed-off-by: Eric Whitney -Signed-off-by: "Theodore Ts'o" -Cc: stable@vger.kernel.org ---- - fs/ext4/extents.c | 21 +++++++++++++++++++++ - 1 file changed, 21 insertions(+) - -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index f5def95..12bf3cc 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -2602,6 +2602,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, - ex_ee_block = le32_to_cpu(ex->ee_block); - ex_ee_len = ext4_ext_get_actual_len(ex); - -+ /* -+ * If we're starting with an extent other than the last one in the -+ * node, we need to see if it shares a cluster with the extent to -+ * the right (towards the end of the file). If its leftmost cluster -+ * is this extent's rightmost cluster and it is not cluster aligned, -+ * we'll mark it as a partial that is not to be deallocated. -+ */ -+ -+ if (ex != EXT_LAST_EXTENT(eh)) { -+ ext4_fsblk_t current_pblk, right_pblk; -+ long long current_cluster, right_cluster; -+ -+ current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; -+ current_cluster = (long long)EXT4_B2C(sbi, current_pblk); -+ right_pblk = ext4_ext_pblock(ex + 1); -+ right_cluster = (long long)EXT4_B2C(sbi, right_pblk); -+ if (current_cluster == right_cluster && -+ EXT4_PBLK_COFF(sbi, right_pblk)) -+ *partial_cluster = -right_cluster; -+ } -+ - trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); - - while (ex >= EXT_FIRST_EXTENT(eh) && diff --git a/fix-premature-freeing-of-partial-clusters b/fix-premature-freeing-of-partial-clusters deleted file mode 100644 index 66518753..00000000 --- a/fix-premature-freeing-of-partial-clusters +++ /dev/null @@ -1,58 +0,0 @@ -ext4: fix premature freeing of partial clusters split across leaf blocks - -From: Eric Whitney - -Xfstests generic/311 and shared/298 fail when run on a bigalloc file -system. Kernel error messages produced during the tests report that -blocks to be freed are already on the to-be-freed list. When e2fsck -is run at the end of the tests, it typically reports bad i_blocks and -bad free blocks counts. - -The bug that causes these failures is located in ext4_ext_rm_leaf(). -Code at the end of the function frees a partial cluster if it's not -shared with an extent remaining in the leaf. However, if all the -extents in the leaf have been removed, the code dereferences an -invalid extent pointer (off the front of the leaf) when the check for -sharing is made. This generally has the effect of unconditionally -freeing the partial cluster, which leads to the observed failures -when the partial cluster is shared with the last extent in the next -leaf. - -Fix this by attempting to free the cluster only if extents remain in -the leaf. Any remaining partial cluster will be freed if possible -when the next leaf is processed or when leaf removal is complete. - -Signed-off-by: Eric Whitney -Signed-off-by: "Theodore Ts'o" -Cc: stable@vger.kernel.org ---- - fs/ext4/extents.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index 243a02e..340fadd 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -2743,10 +2743,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, - err = ext4_ext_correct_indexes(handle, inode, path); - - /* -- * Free the partial cluster only if the current extent does not -- * reference it. Otherwise we might free used cluster. -+ * If there's a partial cluster and at least one extent remains in -+ * the leaf, free the partial cluster if it isn't shared with the -+ * current extent. If there's a partial cluster and no extents -+ * remain in the leaf, it can't be freed here. It can only be -+ * freed when it's possible to determine if it's not shared with -+ * any other extent - when the next leaf is processed or when space -+ * removal is complete. - */ -- if (*partial_cluster > 0 && -+ if (*partial_cluster > 0 && eh->eh_entries && - (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != - *partial_cluster)) { - int flags = get_default_free_blocks_flags(inode); --- -1.8.3.2 - - diff --git a/fix-resize-large-itable b/fix-resize-large-itable deleted file mode 100644 index e410f03d..00000000 --- a/fix-resize-large-itable +++ /dev/null @@ -1,127 +0,0 @@ -ext4: fix online resize with very large inode tables - -If a file system has a large number of inodes per block group, all of -the metadata blocks in a flex_bg may be larger than what can fit in a -single block group. Unfortunately, ext4_alloc_group_tables() in -resize.c was never tested to see if it would handle this case -correctly, and there were a large number of bugs which caused the -following sequence to result in a BUG_ON: - -kernel bug at fs/ext4/resize.c:409! - ... -call trace: - [] ext4_flex_group_add+0x1448/0x1830 - [] ext4_resize_fs+0x7b2/0xe80 - [] ext4_ioctl+0xbf0/0xf00 - [] do_vfs_ioctl+0x2dd/0x4b0 - [] ? final_putname+0x22/0x50 - [] sys_ioctl+0x81/0xa0 - [] system_call_fastpath+0x16/0x1b -code: c8 4c 89 df e8 41 96 f8 ff 44 89 e8 49 01 c4 44 29 6d d4 0 -rip [] set_flexbg_block_bitmap+0x171/0x180 - - -This can be reproduced with the following command sequence: - - mke2fs -t ext4 -i 4096 /dev/vdd 1G - mount -t ext4 /dev/vdd /vdd - resize2fs /dev/vdd 8G - -To fix this, we need to make sure the right thing happens when a block -group's inode table straddles two block groups, which means the -following bugs had to be fixed: - -1) Not clearing the BLOCK_UNINIT flag in the second block group in - ext4_alloc_group_tables --- the was proximate cause of the BUG_ON. - -2) Incorrectly determining how many block groups contained contiguous - free blocks in ext4_alloc_group_tables(). - -3) Incorrectly setting the start of the next block range to be marked - in use after a discontinuity in setup_new_flex_group_blocks(). - -Signed-off-by: "Theodore Ts'o" -Cc: stable@vger.kernel.org ---- - fs/ext4/resize.c | 32 ++++++++++++++++++++------------ - 1 file changed, 20 insertions(+), 12 deletions(-) - -diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c -index c5adbb3..69a6261 100644 ---- a/fs/ext4/resize.c -+++ b/fs/ext4/resize.c -@@ -243,6 +243,7 @@ static int ext4_alloc_group_tables(struct super_block *sb, - ext4_group_t group; - ext4_group_t last_group; - unsigned overhead; -+ __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0; - - BUG_ON(flex_gd->count == 0 || group_data == NULL); - -@@ -266,7 +267,7 @@ next_group: - src_group++; - for (; src_group <= last_group; src_group++) { - overhead = ext4_group_overhead_blocks(sb, src_group); -- if (overhead != 0) -+ if (overhead == 0) - last_blk += group_data[src_group - group].blocks_count; - else - break; -@@ -280,8 +281,7 @@ next_group: - group = ext4_get_group_number(sb, start_blk - 1); - group -= group_data[0].group; - group_data[group].free_blocks_count--; -- if (flexbg_size > 1) -- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; -+ flex_gd->bg_flags[group] &= uninit_mask; - } - - /* Allocate inode bitmaps */ -@@ -292,22 +292,30 @@ next_group: - group = ext4_get_group_number(sb, start_blk - 1); - group -= group_data[0].group; - group_data[group].free_blocks_count--; -- if (flexbg_size > 1) -- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; -+ flex_gd->bg_flags[group] &= uninit_mask; - } - - /* Allocate inode tables */ - for (; it_index < flex_gd->count; it_index++) { -- if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk) -+ unsigned int itb = EXT4_SB(sb)->s_itb_per_group; -+ ext4_fsblk_t next_group_start; -+ -+ if (start_blk + itb > last_blk) - goto next_group; - group_data[it_index].inode_table = start_blk; -- group = ext4_get_group_number(sb, start_blk - 1); -+ group = ext4_get_group_number(sb, start_blk); -+ next_group_start = ext4_group_first_block_no(sb, group + 1); - group -= group_data[0].group; -- group_data[group].free_blocks_count -= -- EXT4_SB(sb)->s_itb_per_group; -- if (flexbg_size > 1) -- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; - -+ if (start_blk + itb > next_group_start) { -+ flex_gd->bg_flags[group + 1] &= uninit_mask; -+ overhead = start_blk + itb - next_group_start; -+ group_data[group + 1].free_blocks_count -= overhead; -+ itb -= overhead; -+ } -+ -+ group_data[group].free_blocks_count -= itb; -+ flex_gd->bg_flags[group] &= uninit_mask; - start_blk += EXT4_SB(sb)->s_itb_per_group; - } - -@@ -620,7 +628,7 @@ handle_ib: - if (err) - goto out; - count = group_table_count[j]; -- start = group_data[i].block_bitmap; -+ start = (&group_data[i].block_bitmap)[j]; - block = start; - } - diff --git a/fix-resize-nonstd-blocks-per-group b/fix-resize-nonstd-blocks-per-group deleted file mode 100644 index 4cf862f9..00000000 --- a/fix-resize-nonstd-blocks-per-group +++ /dev/null @@ -1,31 +0,0 @@ -ext4: fix online resize with a non-standard blocks per group setting - -The set_flexbg_block_bitmap() function assumed that the number of -blocks in a blockgroup was sb->blocksize * 8, which is normally true, -but not always! Use EXT4_BLOCKS_PER_GROUP(sb) instead, to fix block -bitmap corruption after: - -mke2fs -t ext4 -g 3072 -i 4096 /dev/vdd 1G -mount -t ext4 /dev/vdd /vdd -resize2fs /dev/vdd 8G - -Signed-off-by: "Theodore Ts'o" -Reported-by: Jon Bernard -Cc: stable@vger.kernel.org ---- - fs/ext4/resize.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c -index 69a6261..f3b84cd 100644 ---- a/fs/ext4/resize.c -+++ b/fs/ext4/resize.c -@@ -409,7 +409,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, - start = ext4_group_first_block_no(sb, group); - group -= flex_gd->groups[0].group; - -- count2 = sb->s_blocksize * 8 - (block - start); -+ count2 = EXT4_BLOCKS_PER_GROUP(sb) - (block - start); - if (count2 > count) - count2 = count; - diff --git a/fix-swap_inode_boot_loader-cleanup b/fix-swap_inode_boot_loader-cleanup deleted file mode 100644 index 725bb5a8..00000000 --- a/fix-swap_inode_boot_loader-cleanup +++ /dev/null @@ -1,47 +0,0 @@ -ext4: fix error paths in swap_inode_boot_loader() - -From: Zheng Liu - -In swap_inode_boot_loader() we forgot to release ->i_mutex and resume -unlocked dio for inode and inode_bl if there is an error starting the -journal handle. This commit fixes this issue. - -Reported-by: Ahmed Tamrawi -Cc: Andreas Dilger -Cc: Dr. Tilmann Bubeck -Signed-off-by: Zheng Liu -Signed-off-by: "Theodore Ts'o" -Cc: stable@vger.kernel.org # v3.10+ ---- - fs/ext4/ioctl.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c -index 6bea806..a2a837f 100644 ---- a/fs/ext4/ioctl.c -+++ b/fs/ext4/ioctl.c -@@ -140,7 +140,7 @@ static long swap_inode_boot_loader(struct super_block *sb, - handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2); - if (IS_ERR(handle)) { - err = -EINVAL; -- goto swap_boot_out; -+ goto journal_err_out; - } - - /* Protect extent tree against block allocations via delalloc */ -@@ -198,6 +198,7 @@ static long swap_inode_boot_loader(struct super_block *sb, - - ext4_double_up_write_data_sem(inode, inode_bl); - -+journal_err_out: - ext4_inode_resume_unlocked_dio(inode); - ext4_inode_resume_unlocked_dio(inode_bl); - --- -1.7.9.7 - --- -To unsubscribe from this list: send the line "unsubscribe linux-ext4" in -the body of a message to majordomo@vger.kernel.org -More majordomo info at http://vger.kernel.org/majordomo-info.html - diff --git a/fix-use-after-free-in-jbd2_journal_start_reserved b/fix-use-after-free-in-jbd2_journal_start_reserved deleted file mode 100644 index f4ce5bdb..00000000 --- a/fix-use-after-free-in-jbd2_journal_start_reserved +++ /dev/null @@ -1,31 +0,0 @@ -jbd2: fix use after free in jbd2_journal_start_reserved() - -From: Dan Carpenter - -If start_this_handle() fails then it leads to a use after free of -"handle". - -Signed-off-by: Dan Carpenter -Signed-off-by: "Theodore Ts'o" -Cc: stable@vger.kernel.org - -diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c -index 8360674c85bc..60bb365f54a5 100644 ---- a/fs/jbd2/transaction.c -+++ b/fs/jbd2/transaction.c -@@ -514,11 +514,13 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, - * similarly constrained call sites - */ - ret = start_this_handle(journal, handle, GFP_NOFS); -- if (ret < 0) -+ if (ret < 0) { - jbd2_journal_free_reserved(handle); -+ return ret; -+ } - handle->h_type = type; - handle->h_line_no = line_no; -- return ret; -+ return 0; - } - EXPORT_SYMBOL(jbd2_journal_start_reserved); - diff --git a/fix-xfstest-generic-299-block-validity-failures b/fix-xfstest-generic-299-block-validity-failures deleted file mode 100644 index 092ca917..00000000 --- a/fix-xfstest-generic-299-block-validity-failures +++ /dev/null @@ -1,52 +0,0 @@ -ext4: fix xfstest generic/299 block validity failures - -From: Eric Whitney - -Commit a115f749c1 (ext4: remove wait for unwritten extent conversion from -ext4_truncate) exposed a bug in ext4_ext_handle_uninitialized_extents(). -It can be triggered by xfstest generic/299 when run on a test file -system created without a journal. This test continuously fallocates and -truncates files to which random dio/aio writes are simultaneously -performed by a separate process. The test completes successfully, but -if the test filesystem is mounted with the block_validity option, a -warning message stating that a logical block has been mapped to an -illegal physical block is posted in the kernel log. - -The bug occurs when an extent is being converted to the written state -by ext4_end_io_dio() and ext4_ext_handle_uninitialized_extents() -discovers a mapping for an existing uninitialized extent. Although it -sets EXT4_MAP_MAPPED in map->m_flags, it fails to set map->m_pblk to -the discovered physical block number. Because map->m_pblk is not -otherwise initialized or set by this function or its callers, its -uninitialized value is returned to ext4_map_blocks(), where it is -stored as a bogus mapping in the extent status tree. - -Since map->m_pblk can accidentally contain illegal values that are -larger than the physical size of the file system, calls to -check_block_validity() in ext4_map_blocks() that are enabled if the -block_validity mount option is used can fail, resulting in the logged -warning message. - -Signed-off-by: Eric Whitney -Signed-off-by: "Theodore Ts'o" -Cc: stable@vger.kernel.org # 3.11+ ---- - fs/ext4/extents.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index 10cff47..74bc2d5 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -3906,6 +3906,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, - } else - err = ret; - map->m_flags |= EXT4_MAP_MAPPED; -+ map->m_pblk = newblock; - if (allocated > map->m_len) - allocated = map->m_len; - map->m_len = allocated; --- -1.8.3.2 - - diff --git a/fs-push-sync_filesystem-down-to-remount_fs b/fs-push-sync_filesystem-down-to-remount_fs deleted file mode 100644 index 0b593aa4..00000000 --- a/fs-push-sync_filesystem-down-to-remount_fs +++ /dev/null @@ -1,631 +0,0 @@ -fs: push sync_filesystem() down to the file system's remount_fs() - -Previously, the no-op "mount -o mount /dev/xxx" operation when the -file system is already mounted read-write causes an implied, -unconditional syncfs(). This seems pretty stupid, and it's certainly -documented or guaraunteed to do this, nor is it particularly useful, -except in the case where the file system was mounted rw and is getting -remounted read-only. - -However, it's possible that there might be some file systems that are -actually depending on this behavior. In most file systems, it's -probably fine to only call sync_filesystem() when transitioning from -read-write to read-only, and there are some file systems where this is -not needed at all (for example, for a pseudo-filesystem or something -like romfs). - -Signed-off-by: "Theodore Ts'o" -Cc: linux-fsdevel@vger.kernel.org -Cc: Christoph Hellwig -Cc: Artem Bityutskiy -Cc: Adrian Hunter -Cc: Evgeniy Dushistov -Cc: Jan Kara -Cc: OGAWA Hirofumi -Cc: Anders Larsen -Cc: Phillip Lougher -Cc: Kees Cook -Cc: Mikulas Patocka -Cc: Petr Vandrovec -Cc: xfs@oss.sgi.com -Cc: linux-btrfs@vger.kernel.org -Cc: linux-cifs@vger.kernel.org -Cc: samba-technical@lists.samba.org -Cc: codalist@coda.cs.cmu.edu -Cc: linux-ext4@vger.kernel.org -Cc: linux-f2fs-devel@lists.sourceforge.net -Cc: fuse-devel@lists.sourceforge.net -Cc: cluster-devel@redhat.com -Cc: linux-mtd@lists.infradead.org -Cc: jfs-discussion@lists.sourceforge.net -Cc: linux-nfs@vger.kernel.org -Cc: linux-nilfs@vger.kernel.org -Cc: linux-ntfs-dev@lists.sourceforge.net -Cc: ocfs2-devel@oss.oracle.com -Cc: reiserfs-devel@vger.kernel.org ---- - fs/adfs/super.c | 1 + - fs/affs/super.c | 1 + - fs/befs/linuxvfs.c | 1 + - fs/btrfs/super.c | 1 + - fs/cifs/cifsfs.c | 1 + - fs/coda/inode.c | 1 + - fs/cramfs/inode.c | 1 + - fs/debugfs/inode.c | 1 + - fs/devpts/inode.c | 1 + - fs/efs/super.c | 1 + - fs/ext2/super.c | 1 + - fs/ext3/super.c | 2 ++ - fs/ext4/super.c | 2 ++ - fs/f2fs/super.c | 2 ++ - fs/fat/inode.c | 2 ++ - fs/freevxfs/vxfs_super.c | 1 + - fs/fuse/inode.c | 1 + - fs/gfs2/super.c | 2 ++ - fs/hfs/super.c | 1 + - fs/hfsplus/super.c | 1 + - fs/hpfs/super.c | 2 ++ - fs/isofs/inode.c | 1 + - fs/jffs2/super.c | 1 + - fs/jfs/super.c | 1 + - fs/minix/inode.c | 1 + - fs/ncpfs/inode.c | 1 + - fs/nfs/super.c | 2 ++ - fs/nilfs2/super.c | 1 + - fs/ntfs/super.c | 2 ++ - fs/ocfs2/super.c | 2 ++ - fs/openpromfs/inode.c | 1 + - fs/proc/root.c | 2 ++ - fs/pstore/inode.c | 1 + - fs/qnx4/inode.c | 1 + - fs/qnx6/inode.c | 1 + - fs/reiserfs/super.c | 1 + - fs/romfs/super.c | 1 + - fs/squashfs/super.c | 1 + - fs/super.c | 2 -- - fs/sysv/inode.c | 1 + - fs/ubifs/super.c | 1 + - fs/udf/super.c | 1 + - fs/ufs/super.c | 1 + - fs/xfs/xfs_super.c | 1 + - 44 files changed, 53 insertions(+), 2 deletions(-) - -diff --git a/fs/adfs/super.c b/fs/adfs/super.c -index 7b3003c..952aeb0 100644 ---- a/fs/adfs/super.c -+++ b/fs/adfs/super.c -@@ -212,6 +212,7 @@ static int parse_options(struct super_block *sb, char *options) - - static int adfs_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - *flags |= MS_NODIRATIME; - return parse_options(sb, data); - } -diff --git a/fs/affs/super.c b/fs/affs/super.c -index d098731..3074530 100644 ---- a/fs/affs/super.c -+++ b/fs/affs/super.c -@@ -530,6 +530,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) - - pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); - -+ sync_filesystem(sb); - *flags |= MS_NODIRATIME; - - memcpy(volume, sbi->s_volume, 32); -diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c -index 845d2d6..56d70c8 100644 ---- a/fs/befs/linuxvfs.c -+++ b/fs/befs/linuxvfs.c -@@ -913,6 +913,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) - static int - befs_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - if (!(*flags & MS_RDONLY)) - return -EINVAL; - return 0; -diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c -index 97cc241..00cd0c5 100644 ---- a/fs/btrfs/super.c -+++ b/fs/btrfs/super.c -@@ -1381,6 +1381,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) - unsigned int old_metadata_ratio = fs_info->metadata_ratio; - int ret; - -+ sync_filesystem(sb); - btrfs_remount_prepare(fs_info); - - ret = btrfs_parse_options(root, data); -diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c -index 849f613..4942c94 100644 ---- a/fs/cifs/cifsfs.c -+++ b/fs/cifs/cifsfs.c -@@ -541,6 +541,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root) - - static int cifs_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - *flags |= MS_NODIRATIME; - return 0; - } -diff --git a/fs/coda/inode.c b/fs/coda/inode.c -index 506de34..3f48000 100644 ---- a/fs/coda/inode.c -+++ b/fs/coda/inode.c -@@ -96,6 +96,7 @@ void coda_destroy_inodecache(void) - - static int coda_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - *flags |= MS_NOATIME; - return 0; - } -diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c -index 06610cf..a275911 100644 ---- a/fs/cramfs/inode.c -+++ b/fs/cramfs/inode.c -@@ -244,6 +244,7 @@ static void cramfs_kill_sb(struct super_block *sb) - - static int cramfs_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - *flags |= MS_RDONLY; - return 0; - } -diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c -index 9c0444c..02928a9 100644 ---- a/fs/debugfs/inode.c -+++ b/fs/debugfs/inode.c -@@ -218,6 +218,7 @@ static int debugfs_remount(struct super_block *sb, int *flags, char *data) - int err; - struct debugfs_fs_info *fsi = sb->s_fs_info; - -+ sync_filesystem(sb); - err = debugfs_parse_options(data, &fsi->mount_opts); - if (err) - goto fail; -diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c -index a726b9f..c710380 100644 ---- a/fs/devpts/inode.c -+++ b/fs/devpts/inode.c -@@ -313,6 +313,7 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data) - struct pts_fs_info *fsi = DEVPTS_SB(sb); - struct pts_mount_opts *opts = &fsi->mount_opts; - -+ sync_filesystem(sb); - err = parse_mount_options(data, PARSE_REMOUNT, opts); - - /* -diff --git a/fs/efs/super.c b/fs/efs/super.c -index 50215bb..103bbd8 100644 ---- a/fs/efs/super.c -+++ b/fs/efs/super.c -@@ -114,6 +114,7 @@ static void destroy_inodecache(void) - - static int efs_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - *flags |= MS_RDONLY; - return 0; - } -diff --git a/fs/ext2/super.c b/fs/ext2/super.c -index 20d6697..d260115 100644 ---- a/fs/ext2/super.c -+++ b/fs/ext2/super.c -@@ -1254,6 +1254,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) - unsigned long old_sb_flags; - int err; - -+ sync_filesystem(sb); - spin_lock(&sbi->s_lock); - - /* Store the old options */ -diff --git a/fs/ext3/super.c b/fs/ext3/super.c -index 37fd31e..95c6c5a 100644 ---- a/fs/ext3/super.c -+++ b/fs/ext3/super.c -@@ -2649,6 +2649,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) - int i; - #endif - -+ sync_filesystem(sb); -+ - /* Store the original options */ - old_sb_flags = sb->s_flags; - old_opts.s_mount_opt = sbi->s_mount_opt; -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index f5c13b8..aa3842f 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -4767,6 +4767,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) - #endif - char *orig_data = kstrdup(data, GFP_KERNEL); - -+ sync_filesystem(sb); -+ - /* Store the original options */ - old_sb_flags = sb->s_flags; - old_opts.s_mount_opt = sbi->s_mount_opt; -diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c -index 1a85f83..856bdf9 100644 ---- a/fs/f2fs/super.c -+++ b/fs/f2fs/super.c -@@ -568,6 +568,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) - struct f2fs_mount_info org_mount_opt; - int err, active_logs; - -+ sync_filesystem(sb); -+ - /* - * Save the old mount options in case we - * need to restore them. -diff --git a/fs/fat/inode.c b/fs/fat/inode.c -index 854b578..343e477 100644 ---- a/fs/fat/inode.c -+++ b/fs/fat/inode.c -@@ -635,6 +635,8 @@ static int fat_remount(struct super_block *sb, int *flags, char *data) - struct msdos_sb_info *sbi = MSDOS_SB(sb); - *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); - -+ sync_filesystem(sb); -+ - /* make sure we update state on remount. */ - new_rdonly = *flags & MS_RDONLY; - if (new_rdonly != (sb->s_flags & MS_RDONLY)) { -diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c -index e37eb27..7ca8c75 100644 ---- a/fs/freevxfs/vxfs_super.c -+++ b/fs/freevxfs/vxfs_super.c -@@ -124,6 +124,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp) - - static int vxfs_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - *flags |= MS_RDONLY; - return 0; - } -diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c -index d468643..ecdb255d 100644 ---- a/fs/fuse/inode.c -+++ b/fs/fuse/inode.c -@@ -135,6 +135,7 @@ static void fuse_evict_inode(struct inode *inode) - - static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - if (*flags & MS_MANDLOCK) - return -EINVAL; - -diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c -index 60f60f6..4c6dd50 100644 ---- a/fs/gfs2/super.c -+++ b/fs/gfs2/super.c -@@ -1175,6 +1175,8 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) - struct gfs2_tune *gt = &sdp->sd_tune; - int error; - -+ sync_filesystem(sb); -+ - spin_lock(>->gt_spin); - args.ar_commit = gt->gt_logd_secs; - args.ar_quota_quantum = gt->gt_quota_quantum; -diff --git a/fs/hfs/super.c b/fs/hfs/super.c -index 2d2039e..eee7206 100644 ---- a/fs/hfs/super.c -+++ b/fs/hfs/super.c -@@ -112,6 +112,7 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf) - - static int hfs_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - *flags |= MS_NODIRATIME; - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) - return 0; -diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c -index 80875aa..8eb787b 100644 ---- a/fs/hfsplus/super.c -+++ b/fs/hfsplus/super.c -@@ -323,6 +323,7 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) - - static int hfsplus_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) - return 0; - if (!(*flags & MS_RDONLY)) { -diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c -index 4534ff6..fe3463a 100644 ---- a/fs/hpfs/super.c -+++ b/fs/hpfs/super.c -@@ -421,6 +421,8 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) - struct hpfs_sb_info *sbi = hpfs_sb(s); - char *new_opts = kstrdup(data, GFP_KERNEL); - -+ sync_filesystem(s); -+ - *flags |= MS_NOATIME; - - hpfs_lock(s); -diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c -index 4a9e10e..6af66ee 100644 ---- a/fs/isofs/inode.c -+++ b/fs/isofs/inode.c -@@ -117,6 +117,7 @@ static void destroy_inodecache(void) - - static int isofs_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - if (!(*flags & MS_RDONLY)) - return -EROFS; - return 0; -diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c -index 0defb1c..0918f0e 100644 ---- a/fs/jffs2/super.c -+++ b/fs/jffs2/super.c -@@ -243,6 +243,7 @@ static int jffs2_remount_fs(struct super_block *sb, int *flags, char *data) - struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - int err; - -+ sync_filesystem(sb); - err = jffs2_parse_options(c, data); - if (err) - return -EINVAL; -diff --git a/fs/jfs/super.c b/fs/jfs/super.c -index e2b7483..97f7fda 100644 ---- a/fs/jfs/super.c -+++ b/fs/jfs/super.c -@@ -418,6 +418,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) - int flag = JFS_SBI(sb)->flag; - int ret; - -+ sync_filesystem(sb); - if (!parse_options(data, sb, &newLVSize, &flag)) { - return -EINVAL; - } -diff --git a/fs/minix/inode.c b/fs/minix/inode.c -index 0332109..dcdc298 100644 ---- a/fs/minix/inode.c -+++ b/fs/minix/inode.c -@@ -123,6 +123,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data) - struct minix_sb_info * sbi = minix_sb(sb); - struct minix_super_block * ms; - -+ sync_filesystem(sb); - ms = sbi->s_ms; - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) - return 0; -diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c -index 2cf2ebe..5f86e80 100644 ---- a/fs/ncpfs/inode.c -+++ b/fs/ncpfs/inode.c -@@ -99,6 +99,7 @@ static void destroy_inodecache(void) - - static int ncp_remount(struct super_block *sb, int *flags, char* data) - { -+ sync_filesystem(sb); - *flags |= MS_NODIRATIME; - return 0; - } -diff --git a/fs/nfs/super.c b/fs/nfs/super.c -index 910ed90..2cb5694 100644 ---- a/fs/nfs/super.c -+++ b/fs/nfs/super.c -@@ -2215,6 +2215,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) - struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; - u32 nfsvers = nfss->nfs_client->rpc_ops->version; - -+ sync_filesystem(sb); -+ - /* - * Userspace mount programs that send binary options generally send - * them populated with default values. We have no way to know which -diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c -index 7ac2a12..8c532b2 100644 ---- a/fs/nilfs2/super.c -+++ b/fs/nilfs2/super.c -@@ -1129,6 +1129,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) - unsigned long old_mount_opt; - int err; - -+ sync_filesystem(sb); - old_sb_flags = sb->s_flags; - old_mount_opt = nilfs->ns_mount_opt; - -diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c -index 82650d5..bd5610d 100644 ---- a/fs/ntfs/super.c -+++ b/fs/ntfs/super.c -@@ -468,6 +468,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) - - ntfs_debug("Entering with remount options string: %s", opt); - -+ sync_filesystem(sb); -+ - #ifndef NTFS_RW - /* For read-only compiled driver, enforce read-only flag. */ - *flags |= MS_RDONLY; -diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c -index 49d84f8..5f9bf8f 100644 ---- a/fs/ocfs2/super.c -+++ b/fs/ocfs2/super.c -@@ -631,6 +631,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) - struct ocfs2_super *osb = OCFS2_SB(sb); - u32 tmp; - -+ sync_filesystem(sb); -+ - if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || - !ocfs2_check_set_options(sb, &parsed_options)) { - ret = -EINVAL; -diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c -index 8c0ceb8..15e4500 100644 ---- a/fs/openpromfs/inode.c -+++ b/fs/openpromfs/inode.c -@@ -368,6 +368,7 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino) - - static int openprom_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - *flags |= MS_NOATIME; - return 0; - } -diff --git a/fs/proc/root.c b/fs/proc/root.c -index 87dbcbe..ac823a8 100644 ---- a/fs/proc/root.c -+++ b/fs/proc/root.c -@@ -92,6 +92,8 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) - int proc_remount(struct super_block *sb, int *flags, char *data) - { - struct pid_namespace *pid = sb->s_fs_info; -+ -+ sync_filesystem(sb); - return !proc_parse_options(data, pid); - } - -diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c -index 1282384..192297b 100644 ---- a/fs/pstore/inode.c -+++ b/fs/pstore/inode.c -@@ -249,6 +249,7 @@ static void parse_options(char *options) - - static int pstore_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - parse_options(data); - - return 0; -diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c -index 8955881..c4bcb77 100644 ---- a/fs/qnx4/inode.c -+++ b/fs/qnx4/inode.c -@@ -44,6 +44,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data) - { - struct qnx4_sb_info *qs; - -+ sync_filesystem(sb); - qs = qnx4_sb(sb); - qs->Version = QNX4_VERSION; - *flags |= MS_RDONLY; -diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c -index 8d941ed..65cdaab 100644 ---- a/fs/qnx6/inode.c -+++ b/fs/qnx6/inode.c -@@ -55,6 +55,7 @@ static int qnx6_show_options(struct seq_file *seq, struct dentry *root) - - static int qnx6_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - *flags |= MS_RDONLY; - return 0; - } -diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c -index 2c80335..abf2b76 100644 ---- a/fs/reiserfs/super.c -+++ b/fs/reiserfs/super.c -@@ -1319,6 +1319,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) - int i; - #endif - -+ sync_filesystem(s); - reiserfs_write_lock(s); - - #ifdef CONFIG_QUOTA -diff --git a/fs/romfs/super.c b/fs/romfs/super.c -index d841878..ef90e8b 100644 ---- a/fs/romfs/super.c -+++ b/fs/romfs/super.c -@@ -432,6 +432,7 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) - */ - static int romfs_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - *flags |= MS_RDONLY; - return 0; - } -diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c -index 202df63..031c8d67 100644 ---- a/fs/squashfs/super.c -+++ b/fs/squashfs/super.c -@@ -371,6 +371,7 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) - - static int squashfs_remount(struct super_block *sb, int *flags, char *data) - { -+ sync_filesystem(sb); - *flags |= MS_RDONLY; - return 0; - } -diff --git a/fs/super.c b/fs/super.c -index 80d5cf2..e9dc3c3 100644 ---- a/fs/super.c -+++ b/fs/super.c -@@ -719,8 +719,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) - } - } - -- sync_filesystem(sb); -- - if (sb->s_op->remount_fs) { - retval = sb->s_op->remount_fs(sb, &flags, data); - if (retval) { -diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c -index c327d4e..4742e58 100644 ---- a/fs/sysv/inode.c -+++ b/fs/sysv/inode.c -@@ -60,6 +60,7 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data) - { - struct sysv_sb_info *sbi = SYSV_SB(sb); - -+ sync_filesystem(sb); - if (sbi->s_forced_ro) - *flags |= MS_RDONLY; - return 0; -diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c -index 5ded849..e1598ab 100644 ---- a/fs/ubifs/super.c -+++ b/fs/ubifs/super.c -@@ -1827,6 +1827,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) - int err; - struct ubifs_info *c = sb->s_fs_info; - -+ sync_filesystem(sb); - dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); - - err = ubifs_parse_options(c, data, 1); -diff --git a/fs/udf/super.c b/fs/udf/super.c -index 3306b9f..64f2b73 100644 ---- a/fs/udf/super.c -+++ b/fs/udf/super.c -@@ -646,6 +646,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) - int error = 0; - struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb); - -+ sync_filesystem(sb); - if (lvidiu) { - int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev); - if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY)) -diff --git a/fs/ufs/super.c b/fs/ufs/super.c -index 329f2f5..b8c6791 100644 ---- a/fs/ufs/super.c -+++ b/fs/ufs/super.c -@@ -1280,6 +1280,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) - unsigned new_mount_opt, ufstype; - unsigned flags; - -+ sync_filesystem(sb); - lock_ufs(sb); - mutex_lock(&UFS_SB(sb)->s_lock); - uspi = UFS_SB(sb)->s_uspi; -diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c -index f317488..aaa3eca 100644 ---- a/fs/xfs/xfs_super.c -+++ b/fs/xfs/xfs_super.c -@@ -1197,6 +1197,7 @@ xfs_fs_remount( - char *p; - int error; - -+ sync_filesystem(sb); - while ((p = strsep(&options, ",")) != NULL) { - int token; - diff --git a/initialize-fe_logical b/initialize-fe_logical deleted file mode 100644 index 63db8d79..00000000 --- a/initialize-fe_logical +++ /dev/null @@ -1,52 +0,0 @@ -ext4: make sure ex.fe_logical is initialized - -The lowest levels of mballoc set all of the fields of struct -ext4_free_extent except for fe_logical, since they are just trying to -find the requested free set of blocks, and the logical block hasn't -been set yet. This makes some static code checkers sad. Set it to -various different debug values, which would be useful when -debugging mballoc if these values were to ever show up due to the -parts of mballoc triyng to use ac->ac_b_ex.fe_logical before it is -properly upper layers of mballoc failing to properly set, usually by -ext4_mb_use_best_found(). - -Addresses-Coverity-Id: #139697 -Addresses-Coverity-Id: #139698 -Addresses-Coverity-Id: #139699 - -Signed-off-by: "Theodore Ts'o" - - ---- - fs/ext4/mballoc.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c -index 04a5c75..0d42f63 100644 ---- a/fs/ext4/mballoc.c -+++ b/fs/ext4/mballoc.c -@@ -1808,6 +1808,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, - ext4_lock_group(ac->ac_sb, group); - max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, - ac->ac_g_ex.fe_len, &ex); -+ ex.fe_logical = 0xDEADFA11; /* debug value */ - - if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { - ext4_fsblk_t start; -@@ -1936,7 +1937,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, - */ - break; - } -- -+ ex.fe_logical = 0xDEADC0DE; /* debug value */ - ext4_mb_measure_extent(ac, &ex, e4b); - - i += ex.fe_len; -@@ -1977,6 +1978,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, - max = mb_find_extent(e4b, i, sbi->s_stripe, &ex); - if (max >= sbi->s_stripe) { - ac->ac_found++; -+ ex.fe_logical = 0xDEADF00D; /* debug value */ - ac->ac_b_ex = ex; - ext4_mb_use_best_found(ac, e4b); - break; diff --git a/jbd2-add-transaction-to-checkpoint-list-earlier b/jbd2-add-transaction-to-checkpoint-list-earlier deleted file mode 100644 index af4a16f3..00000000 --- a/jbd2-add-transaction-to-checkpoint-list-earlier +++ /dev/null @@ -1,76 +0,0 @@ -jbd2: add transaction to checkpoint list earlier - -We don't otherwise need j_list_lock during the rest of commit phase -#7, so add the transaction to the checkpoint list at the very end of -commit phase #6. This allows us to drop j_list_lock earlier, which is -a good thing since it is a super hot lock. - -Signed-off-by: "Theodore Ts'o" ---- - fs/jbd2/commit.c | 39 ++++++++++++++++++++------------------- - 1 file changed, 20 insertions(+), 19 deletions(-) - -diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c -index af36252..5f26139 100644 ---- a/fs/jbd2/commit.c -+++ b/fs/jbd2/commit.c -@@ -1065,6 +1065,25 @@ restart_loop: - goto restart_loop; - } - -+ /* Add the transaction to the checkpoint list -+ * __journal_remove_checkpoint() can not destroy transaction -+ * under us because it is not marked as T_FINISHED yet */ -+ if (journal->j_checkpoint_transactions == NULL) { -+ journal->j_checkpoint_transactions = commit_transaction; -+ commit_transaction->t_cpnext = commit_transaction; -+ commit_transaction->t_cpprev = commit_transaction; -+ } else { -+ commit_transaction->t_cpnext = -+ journal->j_checkpoint_transactions; -+ commit_transaction->t_cpprev = -+ commit_transaction->t_cpnext->t_cpprev; -+ commit_transaction->t_cpnext->t_cpprev = -+ commit_transaction; -+ commit_transaction->t_cpprev->t_cpnext = -+ commit_transaction; -+ } -+ spin_unlock(&journal->j_list_lock); -+ - /* Done with this transaction! */ - - jbd_debug(3, "JBD2: commit phase 7\n"); -@@ -1103,24 +1122,6 @@ restart_loop: - - write_unlock(&journal->j_state_lock); - -- if (journal->j_checkpoint_transactions == NULL) { -- journal->j_checkpoint_transactions = commit_transaction; -- commit_transaction->t_cpnext = commit_transaction; -- commit_transaction->t_cpprev = commit_transaction; -- } else { -- commit_transaction->t_cpnext = -- journal->j_checkpoint_transactions; -- commit_transaction->t_cpprev = -- commit_transaction->t_cpnext->t_cpprev; -- commit_transaction->t_cpnext->t_cpprev = -- commit_transaction; -- commit_transaction->t_cpprev->t_cpnext = -- commit_transaction; -- } -- spin_unlock(&journal->j_list_lock); -- /* Drop all spin_locks because commit_callback may be block. -- * __journal_remove_checkpoint() can not destroy transaction -- * under us because it is not marked as T_FINISHED yet */ - if (journal->j_commit_callback) - journal->j_commit_callback(journal, commit_transaction); - -@@ -1131,7 +1132,7 @@ restart_loop: - write_lock(&journal->j_state_lock); - spin_lock(&journal->j_list_lock); - commit_transaction->t_state = T_FINISHED; -- /* Recheck checkpoint lists after j_list_lock was dropped */ -+ /* Check if the transaction can be dropped now that we are finished */ - if (commit_transaction->t_checkpoint_list == NULL && - commit_transaction->t_checkpoint_io_list == NULL) { - __jbd2_journal_drop_transaction(journal, commit_transaction); diff --git a/jbd2-calc-stats-wo-j_state_lock-and-j_list_lock b/jbd2-calc-stats-wo-j_state_lock-and-j_list_lock deleted file mode 100644 index 417e5de6..00000000 --- a/jbd2-calc-stats-wo-j_state_lock-and-j_list_lock +++ /dev/null @@ -1,66 +0,0 @@ -jbd2: calculate statistics without holding j_state_lock and j_list_lock - -The two hottest locks, and thus the biggest scalability bottlenecks, -in the jbd2 layer, are the j_list_lock and j_state_lock. This has -inspired some people to do some truly unnatural things[1]. - -[1] https://www.usenix.org/system/files/conference/fast14/fast14-paper_kang.pdf - -We don't need to be holding both j_state_lock and j_list_lock while -calculating the journal statistics, so move those calculations to the -very end of jbd2_journal_commit_transaction. - -Signed-off-by: "Theodore Ts'o" - -diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c -index 765b31d..af36252 100644 ---- a/fs/jbd2/commit.c -+++ b/fs/jbd2/commit.c -@@ -1083,24 +1083,7 @@ restart_loop: - atomic_read(&commit_transaction->t_handle_count); - trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, - commit_transaction->t_tid, &stats.run); -- -- /* -- * Calculate overall stats -- */ -- spin_lock(&journal->j_history_lock); -- journal->j_stats.ts_tid++; -- if (commit_transaction->t_requested) -- journal->j_stats.ts_requested++; -- journal->j_stats.run.rs_wait += stats.run.rs_wait; -- journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay; -- journal->j_stats.run.rs_running += stats.run.rs_running; -- journal->j_stats.run.rs_locked += stats.run.rs_locked; -- journal->j_stats.run.rs_flushing += stats.run.rs_flushing; -- journal->j_stats.run.rs_logging += stats.run.rs_logging; -- journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count; -- journal->j_stats.run.rs_blocks += stats.run.rs_blocks; -- journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; -- spin_unlock(&journal->j_history_lock); -+ stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0; - - commit_transaction->t_state = T_COMMIT_CALLBACK; - J_ASSERT(commit_transaction == journal->j_committing_transaction); -@@ -1157,4 +1140,21 @@ restart_loop: - spin_unlock(&journal->j_list_lock); - write_unlock(&journal->j_state_lock); - wake_up(&journal->j_wait_done_commit); -+ -+ /* -+ * Calculate overall stats -+ */ -+ spin_lock(&journal->j_history_lock); -+ journal->j_stats.ts_tid++; -+ journal->j_stats.ts_requested += stats.ts_requested; -+ journal->j_stats.run.rs_wait += stats.run.rs_wait; -+ journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay; -+ journal->j_stats.run.rs_running += stats.run.rs_running; -+ journal->j_stats.run.rs_locked += stats.run.rs_locked; -+ journal->j_stats.run.rs_flushing += stats.run.rs_flushing; -+ journal->j_stats.run.rs_logging += stats.run.rs_logging; -+ journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count; -+ journal->j_stats.run.rs_blocks += stats.run.rs_blocks; -+ journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; -+ spin_unlock(&journal->j_history_lock); - } diff --git a/jbd2-check-b_transaction-wo-taking-j_list_lock b/jbd2-check-b_transaction-wo-taking-j_list_lock deleted file mode 100644 index 461dba9e..00000000 --- a/jbd2-check-b_transaction-wo-taking-j_list_lock +++ /dev/null @@ -1,29 +0,0 @@ -jbd2: check jh->b_transaction without taking j_list_lock - -jh->b_transaction is adequately protected for reading by the -jbd_lock_bh_state(bh), so we don't need to take j_list_lock in -__journal_try_to_free_buffer(). - -Signed-off-by: "Theodore Ts'o" ---- - fs/jbd2/transaction.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c -index 60bb365..78900a1 100644 ---- a/fs/jbd2/transaction.c -+++ b/fs/jbd2/transaction.c -@@ -1821,11 +1821,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) - if (buffer_locked(bh) || buffer_dirty(bh)) - goto out; - -- if (jh->b_next_transaction != NULL) -+ if (jh->b_next_transaction != NULL || jh->b_transaction != NULL) - goto out; - - spin_lock(&journal->j_list_lock); -- if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { -+ if (jh->b_cp_transaction != NULL) { - /* written-back checkpointed metadata buffer */ - JBUFFER_TRACE(jh, "remove from checkpoint list"); - __jbd2_journal_remove_checkpoint(jh); diff --git a/jbd2-dont-unplog-after-revoke-records b/jbd2-dont-unplog-after-revoke-records deleted file mode 100644 index ac80c185..00000000 --- a/jbd2-dont-unplog-after-revoke-records +++ /dev/null @@ -1,33 +0,0 @@ -jbd2: don't unplug after writing revoke records - -During commit process, keep the block device plugged after we are done -writing the revoke records, until we are finished writing the rest of -the commit records in the journal. This will allow most of the -journal blocks to be written in a single I/O operation, instead of -separating the the revoke blocks from the rest of the journal blocks. - -Signed-off-by: "Theodore Ts'o" ---- - fs/jbd2/commit.c | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c -index cf2fc05..765b31d 100644 ---- a/fs/jbd2/commit.c -+++ b/fs/jbd2/commit.c -@@ -555,7 +555,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) - blk_start_plug(&plug); - jbd2_journal_write_revoke_records(journal, commit_transaction, - &log_bufs, WRITE_SYNC); -- blk_finish_plug(&plug); - - jbd_debug(3, "JBD2: commit phase 2b\n"); - -@@ -582,7 +581,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) - err = 0; - bufs = 0; - descriptor = NULL; -- blk_start_plug(&plug); - while (commit_transaction->t_buffers) { - - /* Find the next buffer to be journaled... */ diff --git a/jbd2-improve-error-messages-for-inconsistent-jh b/jbd2-improve-error-messages-for-inconsistent-jh deleted file mode 100644 index 799122f4..00000000 --- a/jbd2-improve-error-messages-for-inconsistent-jh +++ /dev/null @@ -1,92 +0,0 @@ -jbd2: improve error messages for inconsistent journal heads - -Fix up error messages printed when the transaction pointers in a -journal head are inconsistent. This improves the error messages which -are printed when running xfstests generic/068 in data=journal mode. -See the bug report at: https://bugzilla.kernel.org/show_bug.cgi?id=60786 - -Signed-off-by: "Theodore Ts'o" - ---- - fs/ext4/ext4_jbd2.c | 10 ++++++++++ - fs/jbd2/transaction.c | 33 ++++++++++++++------------------- - 2 files changed, 24 insertions(+), 19 deletions(-) - -diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c -index 3fe29de..c3fb607 100644 ---- a/fs/ext4/ext4_jbd2.c -+++ b/fs/ext4/ext4_jbd2.c -@@ -259,6 +259,16 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, - if (WARN_ON_ONCE(err)) { - ext4_journal_abort_handle(where, line, __func__, bh, - handle, err); -+ if (inode == NULL) { -+ pr_err("EXT4: jbd2_journal_dirty_metadata " -+ "failed: handle type %u started at " -+ "line %u, credits %u/%u, errcode %d", -+ handle->h_type, -+ handle->h_line_no, -+ handle->h_requested_credits, -+ handle->h_buffer_credits, err); -+ return err; -+ } - ext4_error_inode(inode, where, line, - bh->b_blocknr, - "journal_dirty_metadata failed: " -diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c -index d999b1f..38cfcf5 100644 ---- a/fs/jbd2/transaction.c -+++ b/fs/jbd2/transaction.c -@@ -1313,7 +1313,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) - journal->j_running_transaction)) { - printk(KERN_ERR "JBD2: %s: " - "jh->b_transaction (%llu, %p, %u) != " -- "journal->j_running_transaction (%p, %u)", -+ "journal->j_running_transaction (%p, %u)\n", - journal->j_devname, - (unsigned long long) bh->b_blocknr, - jh->b_transaction, -@@ -1336,30 +1336,25 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) - */ - if (jh->b_transaction != transaction) { - JBUFFER_TRACE(jh, "already on other transaction"); -- if (unlikely(jh->b_transaction != -- journal->j_committing_transaction)) { -- printk(KERN_ERR "JBD2: %s: " -- "jh->b_transaction (%llu, %p, %u) != " -- "journal->j_committing_transaction (%p, %u)", -+ if (unlikely(((jh->b_transaction != -+ journal->j_committing_transaction)) || -+ (jh->b_next_transaction != transaction))) { -+ printk(KERN_ERR "jbd2_journal_dirty_metadata: %s: " -+ "bad jh for block %llu: " -+ "transaction (%p, %u), " -+ "jh->b_transaction (%p, %u), " -+ "jh->b_next_transaction (%p, %u), jlist %u\n", - journal->j_devname, - (unsigned long long) bh->b_blocknr, -+ transaction, transaction->t_tid, - jh->b_transaction, -- jh->b_transaction ? jh->b_transaction->t_tid : 0, -- journal->j_committing_transaction, -- journal->j_committing_transaction ? -- journal->j_committing_transaction->t_tid : 0); -- ret = -EINVAL; -- } -- if (unlikely(jh->b_next_transaction != transaction)) { -- printk(KERN_ERR "JBD2: %s: " -- "jh->b_next_transaction (%llu, %p, %u) != " -- "transaction (%p, %u)", -- journal->j_devname, -- (unsigned long long) bh->b_blocknr, -+ jh->b_transaction ? -+ jh->b_transaction->t_tid : 0, - jh->b_next_transaction, - jh->b_next_transaction ? - jh->b_next_transaction->t_tid : 0, -- transaction, transaction->t_tid); -+ jh->b_jlist); -+ WARN_ON(1); - ret = -EINVAL; - } - /* And this case is illegal: we can't reuse another diff --git a/jbd2-mark-file-local-functions-as-static b/jbd2-mark-file-local-functions-as-static deleted file mode 100644 index 54302ebe..00000000 --- a/jbd2-mark-file-local-functions-as-static +++ /dev/null @@ -1,52 +0,0 @@ -jbd2: mark file-local functions as static - -From: Rashika Kheria - -Mark functions as static in jbd2/journal.c because they are not used -outside this file. - -This eliminates the following warning in jbd2/journal.c: -fs/jbd2/journal.c:125:5: warning: no previous prototype for ‘jbd2_verify_csum_type’ [-Wmissing-prototypes] -fs/jbd2/journal.c:146:5: warning: no previous prototype for ‘jbd2_superblock_csum_verify’ [-Wmissing-prototypes] -fs/jbd2/journal.c:154:6: warning: no previous prototype for ‘jbd2_superblock_csum_set’ [-Wmissing-prototypes] - -Signed-off-by: Rashika Kheria -Signed-off-by: "Theodore Ts'o" -Reviewed-by: Josh Triplett -Reviewed-by: Darrick J. Wong - ---- -fs/jbd2/journal.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c -index 5fa344a..244b6f6 100644 ---- a/fs/jbd2/journal.c -+++ b/fs/jbd2/journal.c -@@ -122,7 +122,7 @@ EXPORT_SYMBOL(__jbd2_debug); - #endif - - /* Checksumming functions */ --int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) -+static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) - { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return 1; -@@ -143,7 +143,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) - return cpu_to_be32(csum); - } - --int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) -+static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) - { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return 1; -@@ -151,7 +151,7 @@ int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) - return sb->s_checksum == jbd2_superblock_csum(j, sb); - } - --void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) -+static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) - { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return; diff --git a/jbd2-minimize-j_list_lock-in-journal_get_create_access b/jbd2-minimize-j_list_lock-in-journal_get_create_access deleted file mode 100644 index 27c92f6d..00000000 --- a/jbd2-minimize-j_list_lock-in-journal_get_create_access +++ /dev/null @@ -1,37 +0,0 @@ -jbd2: minimize region locked by j_list_lock in journal_get_create_access() - -It's not needed until we start trying to modifying fields in the -journal_head which are protected by j_list_lock. - -Signed-off-by: "Theodore Ts'o" ---- - fs/jbd2/transaction.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c -index 78900a1..357f3dc 100644 ---- a/fs/jbd2/transaction.c -+++ b/fs/jbd2/transaction.c -@@ -1073,7 +1073,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) - * reused here. - */ - jbd_lock_bh_state(bh); -- spin_lock(&journal->j_list_lock); - J_ASSERT_JH(jh, (jh->b_transaction == transaction || - jh->b_transaction == NULL || - (jh->b_transaction == journal->j_committing_transaction && -@@ -1096,12 +1095,14 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) - jh->b_modified = 0; - - JBUFFER_TRACE(jh, "file as BJ_Reserved"); -+ spin_lock(&journal->j_list_lock); - __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); - } else if (jh->b_transaction == journal->j_committing_transaction) { - /* first access by this transaction */ - jh->b_modified = 0; - - JBUFFER_TRACE(jh, "set next transaction"); -+ spin_lock(&journal->j_list_lock); - jh->b_next_transaction = transaction; - } - spin_unlock(&journal->j_list_lock); diff --git a/jbd2-minimize-j_state_lock-in-jbd2_journal_forget b/jbd2-minimize-j_state_lock-in-jbd2_journal_forget deleted file mode 100644 index 475eafc0..00000000 --- a/jbd2-minimize-j_state_lock-in-jbd2_journal_forget +++ /dev/null @@ -1,57 +0,0 @@ -jbd2: minimize region locked by j_list_lock in jbd2_journal_forget() - -It's not needed until we start trying to modifying fields in the -journal_head which are protected by j_list_lock. - -Signed-off-by: "Theodore Ts'o" - ---- - fs/jbd2/transaction.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c -index 357f3dc..d999b1f 100644 ---- a/fs/jbd2/transaction.c -+++ b/fs/jbd2/transaction.c -@@ -1416,7 +1416,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) - BUFFER_TRACE(bh, "entry"); - - jbd_lock_bh_state(bh); -- spin_lock(&journal->j_list_lock); - - if (!buffer_jbd(bh)) - goto not_jbd; -@@ -1469,6 +1468,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) - * we know to remove the checkpoint after we commit. - */ - -+ spin_lock(&journal->j_list_lock); - if (jh->b_cp_transaction) { - __jbd2_journal_temp_unlink_buffer(jh); - __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); -@@ -1481,6 +1481,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) - goto drop; - } - } -+ spin_unlock(&journal->j_list_lock); - } else if (jh->b_transaction) { - J_ASSERT_JH(jh, (jh->b_transaction == - journal->j_committing_transaction)); -@@ -1492,7 +1493,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) - - if (jh->b_next_transaction) { - J_ASSERT(jh->b_next_transaction == transaction); -+ spin_lock(&journal->j_list_lock); - jh->b_next_transaction = NULL; -+ spin_unlock(&journal->j_list_lock); - - /* - * only drop a reference if this transaction modified -@@ -1504,7 +1507,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) - } - - not_jbd: -- spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - __brelse(bh); - drop: diff --git a/kill-i_version-support-for-hurd-OS-filesystems b/kill-i_version-support-for-hurd-OS-filesystems deleted file mode 100644 index 8a263616..00000000 --- a/kill-i_version-support-for-hurd-OS-filesystems +++ /dev/null @@ -1,73 +0,0 @@ -ext4: kill i_version support for Hurd-castrated file systems - -The Hurd file system uses uses the inode field which is now used for -i_version for its translator block. This means that ext2 file systems -that are formatted for GNU Hurd can't be used to support NFSv4. Given -that Hurd file systems don't support extents, and a huge number of -modern file system features, this is no great loss. - -If we don't do this, the attempt to update the i_version field will -stomp over the translator block field, which will cause file system -corruption for Hurd file systems. This can be replicated via: - -mke2fs -t ext2 -o hurd /dev/vdc -mount -t ext4 /dev/vdc /vdc -touch /vdc/bug0000 -umount /dev/vdc -e2fsck -f /dev/vdc - -Addresses-Debian-Bug: #738758 - -Reported-By: Gabriele Giacone <1o5g4r8o@gmail.com> -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/inode.c | 29 ++++++++++++++++++----------- - 1 file changed, 18 insertions(+), 11 deletions(-) - -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 7cc2455..ed2c13a 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -4168,11 +4168,14 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) - EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); - EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); - -- inode->i_version = le32_to_cpu(raw_inode->i_disk_version); -- if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { -- if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) -- inode->i_version |= -- (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; -+ if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != -+ cpu_to_le32(EXT4_OS_HURD)) { -+ inode->i_version = le32_to_cpu(raw_inode->i_disk_version); -+ if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { -+ if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) -+ inode->i_version |= -+ (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; -+ } - } - - ret = 0; -@@ -4388,12 +4391,16 @@ static int ext4_do_update_inode(handle_t *handle, - raw_inode->i_block[block] = ei->i_data[block]; - } - -- raw_inode->i_disk_version = cpu_to_le32(inode->i_version); -- if (ei->i_extra_isize) { -- if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) -- raw_inode->i_version_hi = -- cpu_to_le32(inode->i_version >> 32); -- raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); -+ if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != -+ cpu_to_le32(EXT4_OS_HURD)) { -+ raw_inode->i_disk_version = cpu_to_le32(inode->i_version); -+ if (ei->i_extra_isize) { -+ if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) -+ raw_inode->i_version_hi = -+ cpu_to_le32(inode->i_version >> 32); -+ raw_inode->i_extra_isize = -+ cpu_to_le16(ei->i_extra_isize); -+ } - } - - ext4_inode_csum_set(inode, raw_inode, ei); diff --git a/make-ext4_block_zero_page_range_static b/make-ext4_block_zero_page_range_static deleted file mode 100644 index 314e54b9..00000000 --- a/make-ext4_block_zero_page_range_static +++ /dev/null @@ -1,103 +0,0 @@ -ext4: make ext4_block_zero_page_range static - -From: Matthew Wilcox - -It's only called within inode.c, so make it static, remove its prototype -from ext4.h and move it above all of its callers so it doesn't need a -prototype within inode.c. - -Signed-off-by: Matthew Wilcox -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/ext4.h | 2 -- - fs/ext4/inode.c | 42 +++++++++++++++++++++--------------------- - 2 files changed, 21 insertions(+), 23 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index d3a534f..e025c29 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -2133,8 +2133,6 @@ extern int ext4_writepage_trans_blocks(struct inode *); - extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); - extern int ext4_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from); --extern int ext4_block_zero_page_range(handle_t *handle, -- struct address_space *mapping, loff_t from, loff_t length); - extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, - loff_t lstart, loff_t lend); - extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 6e39895..ce7341c 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -3312,33 +3312,13 @@ void ext4_set_aops(struct inode *inode) - } - - /* -- * ext4_block_truncate_page() zeroes out a mapping from file offset `from' -- * up to the end of the block which corresponds to `from'. -- * This required during truncate. We need to physically zero the tail end -- * of that block so it doesn't yield old data if the file is later grown. -- */ --int ext4_block_truncate_page(handle_t *handle, -- struct address_space *mapping, loff_t from) --{ -- unsigned offset = from & (PAGE_CACHE_SIZE-1); -- unsigned length; -- unsigned blocksize; -- struct inode *inode = mapping->host; -- -- blocksize = inode->i_sb->s_blocksize; -- length = blocksize - (offset & (blocksize - 1)); -- -- return ext4_block_zero_page_range(handle, mapping, from, length); --} -- --/* - * ext4_block_zero_page_range() zeros out a mapping of length 'length' - * starting from file offset 'from'. The range to be zero'd must - * be contained with in one block. If the specified range exceeds - * the end of the block it will be shortened to end of the block - * that cooresponds to 'from' - */ --int ext4_block_zero_page_range(handle_t *handle, -+static int ext4_block_zero_page_range(handle_t *handle, - struct address_space *mapping, loff_t from, loff_t length) - { - ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; -@@ -3428,6 +3408,26 @@ unlock: - return err; - } - -+/* -+ * ext4_block_truncate_page() zeroes out a mapping from file offset `from' -+ * up to the end of the block which corresponds to `from'. -+ * This required during truncate. We need to physically zero the tail end -+ * of that block so it doesn't yield old data if the file is later grown. -+ */ -+int ext4_block_truncate_page(handle_t *handle, -+ struct address_space *mapping, loff_t from) -+{ -+ unsigned offset = from & (PAGE_CACHE_SIZE-1); -+ unsigned length; -+ unsigned blocksize; -+ struct inode *inode = mapping->host; -+ -+ blocksize = inode->i_sb->s_blocksize; -+ length = blocksize - (offset & (blocksize - 1)); -+ -+ return ext4_block_zero_page_range(handle, mapping, from, length); -+} -+ - int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, - loff_t lstart, loff_t length) - { --- -1.9.0 - --- -To unsubscribe, send a message with 'unsubscribe linux-mm' in -the body to majordomo@kvack.org. For more info on Linux MM, -see: http://www.linux-mm.org/ . -Don't email: email@kvack.org - diff --git a/mbcache-decouple-locking-of-local-from-global-data b/mbcache-decouple-locking-of-local-from-global-data deleted file mode 100644 index bc4aa145..00000000 --- a/mbcache-decouple-locking-of-local-from-global-data +++ /dev/null @@ -1,688 +0,0 @@ -fs/mbcache.c: doucple the locking of local from global data - -From: T Makphaibulchoke - -The patch increases the parallelism of mbcache by using the built-in -lock in the hlist_bl_node to protect the mb_cache's local block and -index hash chains. The global data mb_cache_lru_list and -mb_cache_list continue to be protected by the global -mb_cache_spinlock. - -New block group spinlock, mb_cache_bg_lock is also added to serialize -accesses to mb_cache_entry's local data. - -A new member e_refcnt is added to the mb_cache_entry structure to help -preventing an mb_cache_entry from being deallocated by a free while it -is being referenced by either mb_cache_entry_get() or -mb_cache_entry_find(). - -Signed-off-by: T. Makphaibulchoke -Signed-off-by: "Theodore Ts'o" ---- - fs/mbcache.c | 417 ++++++++++++++++++++++++++++++++++++++++++----------------- - 1 file changed, 301 insertions(+), 116 deletions(-) - -diff --git a/fs/mbcache.c b/fs/mbcache.c -index 55db0da..786ecab 100644 ---- a/fs/mbcache.c -+++ b/fs/mbcache.c -@@ -26,6 +26,41 @@ - * back on the lru list. - */ - -+/* -+ * Lock descriptions and usage: -+ * -+ * Each hash chain of both the block and index hash tables now contains -+ * a built-in lock used to serialize accesses to the hash chain. -+ * -+ * Accesses to global data structures mb_cache_list and mb_cache_lru_list -+ * are serialized via the global spinlock mb_cache_spinlock. -+ * -+ * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize -+ * accesses to its local data, such as e_used and e_queued. -+ * -+ * Lock ordering: -+ * -+ * Each block hash chain's lock has the highest lock order, followed by an -+ * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's -+ * lock), and mb_cach_spinlock, with the lowest order. While holding -+ * either a block or index hash chain lock, a thread can acquire an -+ * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock. -+ * -+ * Synchronization: -+ * -+ * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and -+ * index hash chian, it needs to lock the corresponding hash chain. For each -+ * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to -+ * prevent either any simultaneous release or free on the entry and also -+ * to serialize accesses to either the e_used or e_queued member of the entry. -+ * -+ * To avoid having a dangling reference to an already freed -+ * mb_cache_entry, an mb_cache_entry is only freed when it is not on a -+ * block hash chain and also no longer being referenced, both e_used, -+ * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is -+ * first removed from a block hash chain. -+ */ -+ - #include - #include - -@@ -37,6 +72,7 @@ - #include - #include - #include -+#include - - #ifdef MB_CACHE_DEBUG - # define mb_debug(f...) do { \ -@@ -57,8 +93,13 @@ - - #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) - -+#define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS) -+#define MB_CACHE_ENTRY_LOCK_INDEX(ce) \ -+ (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS)) -+ - static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); -- -+static struct blockgroup_lock *mb_cache_bg_lock; -+ - MODULE_AUTHOR("Andreas Gruenbacher "); - MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); - MODULE_LICENSE("GPL"); -@@ -86,6 +127,20 @@ static LIST_HEAD(mb_cache_list); - static LIST_HEAD(mb_cache_lru_list); - static DEFINE_SPINLOCK(mb_cache_spinlock); - -+static inline void -+__spin_lock_mb_cache_entry(struct mb_cache_entry *ce) -+{ -+ spin_lock(bgl_lock_ptr(mb_cache_bg_lock, -+ MB_CACHE_ENTRY_LOCK_INDEX(ce))); -+} -+ -+static inline void -+__spin_unlock_mb_cache_entry(struct mb_cache_entry *ce) -+{ -+ spin_unlock(bgl_lock_ptr(mb_cache_bg_lock, -+ MB_CACHE_ENTRY_LOCK_INDEX(ce))); -+} -+ - static inline int - __mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce) - { -@@ -113,11 +168,21 @@ __mb_cache_entry_unhash_index(struct mb_cache_entry *ce) - hlist_bl_del_init(&ce->e_index.o_list); - } - -+/* -+ * __mb_cache_entry_unhash_unlock() -+ * -+ * This function is called to unhash both the block and index hash -+ * chain. -+ * It assumes both the block and index hash chain is locked upon entry. -+ * It also unlock both hash chains both exit -+ */ - static inline void --__mb_cache_entry_unhash(struct mb_cache_entry *ce) -+__mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce) - { - __mb_cache_entry_unhash_index(ce); -+ hlist_bl_unlock(ce->e_index_hash_p); - __mb_cache_entry_unhash_block(ce); -+ hlist_bl_unlock(ce->e_block_hash_p); - } - - static void -@@ -125,36 +190,47 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) - { - struct mb_cache *cache = ce->e_cache; - -- mb_assert(!(ce->e_used || ce->e_queued)); -+ mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))); - kmem_cache_free(cache->c_entry_cache, ce); - atomic_dec(&cache->c_entry_count); - } - -- - static void --__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) -- __releases(mb_cache_spinlock) -+__mb_cache_entry_release(struct mb_cache_entry *ce) - { -+ /* First lock the entry to serialize access to its local data. */ -+ __spin_lock_mb_cache_entry(ce); - /* Wake up all processes queuing for this cache entry. */ - if (ce->e_queued) - wake_up_all(&mb_cache_queue); - if (ce->e_used >= MB_CACHE_WRITER) - ce->e_used -= MB_CACHE_WRITER; -+ /* -+ * Make sure that all cache entries on lru_list have -+ * both e_used and e_qued of 0s. -+ */ - ce->e_used--; -- if (!(ce->e_used || ce->e_queued)) { -- if (!__mb_cache_entry_is_block_hashed(ce)) -+ if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) { -+ if (!__mb_cache_entry_is_block_hashed(ce)) { -+ __spin_unlock_mb_cache_entry(ce); - goto forget; -- mb_assert(list_empty(&ce->e_lru_list)); -- list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); -+ } -+ /* -+ * Need access to lru list, first drop entry lock, -+ * then reacquire the lock in the proper order. -+ */ -+ spin_lock(&mb_cache_spinlock); -+ if (list_empty(&ce->e_lru_list)) -+ list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); -+ spin_unlock(&mb_cache_spinlock); - } -- spin_unlock(&mb_cache_spinlock); -+ __spin_unlock_mb_cache_entry(ce); - return; - forget: -- spin_unlock(&mb_cache_spinlock); -+ mb_assert(list_empty(&ce->e_lru_list)); - __mb_cache_entry_forget(ce, GFP_KERNEL); - } - -- - /* - * mb_cache_shrink_scan() memory pressure callback - * -@@ -177,17 +253,34 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) - - mb_debug("trying to free %d entries", nr_to_scan); - spin_lock(&mb_cache_spinlock); -- while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { -+ while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) { - struct mb_cache_entry *ce = - list_entry(mb_cache_lru_list.next, -- struct mb_cache_entry, e_lru_list); -- list_move_tail(&ce->e_lru_list, &free_list); -- __mb_cache_entry_unhash(ce); -- freed++; -+ struct mb_cache_entry, e_lru_list); -+ list_del_init(&ce->e_lru_list); -+ if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)) -+ continue; -+ spin_unlock(&mb_cache_spinlock); -+ /* Prevent any find or get operation on the entry */ -+ hlist_bl_lock(ce->e_block_hash_p); -+ hlist_bl_lock(ce->e_index_hash_p); -+ /* Ignore if it is touched by a find/get */ -+ if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) || -+ !list_empty(&ce->e_lru_list)) { -+ hlist_bl_unlock(ce->e_index_hash_p); -+ hlist_bl_unlock(ce->e_block_hash_p); -+ spin_lock(&mb_cache_spinlock); -+ continue; -+ } -+ __mb_cache_entry_unhash_unlock(ce); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ spin_lock(&mb_cache_spinlock); - } - spin_unlock(&mb_cache_spinlock); -+ - list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { - __mb_cache_entry_forget(entry, gfp_mask); -+ freed++; - } - return freed; - } -@@ -232,6 +325,14 @@ mb_cache_create(const char *name, int bucket_bits) - int n, bucket_count = 1 << bucket_bits; - struct mb_cache *cache = NULL; - -+ if (!mb_cache_bg_lock) { -+ mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock), -+ GFP_KERNEL); -+ if (!mb_cache_bg_lock) -+ return NULL; -+ bgl_lock_init(mb_cache_bg_lock); -+ } -+ - cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); - if (!cache) - return NULL; -@@ -290,21 +391,47 @@ void - mb_cache_shrink(struct block_device *bdev) - { - LIST_HEAD(free_list); -- struct list_head *l, *ltmp; -+ struct list_head *l; -+ struct mb_cache_entry *ce, *tmp; - -+ l = &mb_cache_lru_list; - spin_lock(&mb_cache_spinlock); -- list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -- struct mb_cache_entry *ce = -- list_entry(l, struct mb_cache_entry, e_lru_list); -+ while (!list_is_last(l, &mb_cache_lru_list)) { -+ l = l->next; -+ ce = list_entry(l, struct mb_cache_entry, e_lru_list); - if (ce->e_bdev == bdev) { -- list_move_tail(&ce->e_lru_list, &free_list); -- __mb_cache_entry_unhash(ce); -+ list_del_init(&ce->e_lru_list); -+ if (ce->e_used || ce->e_queued || -+ atomic_read(&ce->e_refcnt)) -+ continue; -+ spin_unlock(&mb_cache_spinlock); -+ /* -+ * Prevent any find or get operation on the entry. -+ */ -+ hlist_bl_lock(ce->e_block_hash_p); -+ hlist_bl_lock(ce->e_index_hash_p); -+ /* Ignore if it is touched by a find/get */ -+ if (ce->e_used || ce->e_queued || -+ atomic_read(&ce->e_refcnt) || -+ !list_empty(&ce->e_lru_list)) { -+ hlist_bl_unlock(ce->e_index_hash_p); -+ hlist_bl_unlock(ce->e_block_hash_p); -+ l = &mb_cache_lru_list; -+ spin_lock(&mb_cache_spinlock); -+ continue; -+ } -+ __mb_cache_entry_unhash_unlock(ce); -+ mb_assert(!(ce->e_used || ce->e_queued || -+ atomic_read(&ce->e_refcnt))); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ l = &mb_cache_lru_list; -+ spin_lock(&mb_cache_spinlock); - } - } - spin_unlock(&mb_cache_spinlock); -- list_for_each_safe(l, ltmp, &free_list) { -- __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -- e_lru_list), GFP_KERNEL); -+ -+ list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { -+ __mb_cache_entry_forget(ce, GFP_KERNEL); - } - } - -@@ -320,23 +447,27 @@ void - mb_cache_destroy(struct mb_cache *cache) - { - LIST_HEAD(free_list); -- struct list_head *l, *ltmp; -+ struct mb_cache_entry *ce, *tmp; - - spin_lock(&mb_cache_spinlock); -- list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -- struct mb_cache_entry *ce = -- list_entry(l, struct mb_cache_entry, e_lru_list); -- if (ce->e_cache == cache) { -+ list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) { -+ if (ce->e_cache == cache) - list_move_tail(&ce->e_lru_list, &free_list); -- __mb_cache_entry_unhash(ce); -- } - } - list_del(&cache->c_cache_list); - spin_unlock(&mb_cache_spinlock); - -- list_for_each_safe(l, ltmp, &free_list) { -- __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -- e_lru_list), GFP_KERNEL); -+ list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { -+ list_del_init(&ce->e_lru_list); -+ /* -+ * Prevent any find or get operation on the entry. -+ */ -+ hlist_bl_lock(ce->e_block_hash_p); -+ hlist_bl_lock(ce->e_index_hash_p); -+ mb_assert(!(ce->e_used || ce->e_queued || -+ atomic_read(&ce->e_refcnt))); -+ __mb_cache_entry_unhash_unlock(ce); -+ __mb_cache_entry_forget(ce, GFP_KERNEL); - } - - if (atomic_read(&cache->c_entry_count) > 0) { -@@ -345,8 +476,6 @@ mb_cache_destroy(struct mb_cache *cache) - atomic_read(&cache->c_entry_count)); - } - -- kmem_cache_destroy(cache->c_entry_cache); -- - kfree(cache->c_index_hash); - kfree(cache->c_block_hash); - kfree(cache); -@@ -363,29 +492,59 @@ mb_cache_destroy(struct mb_cache *cache) - struct mb_cache_entry * - mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) - { -- struct mb_cache_entry *ce = NULL; -+ struct mb_cache_entry *ce; - - if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { -+ struct list_head *l; -+ -+ l = &mb_cache_lru_list; - spin_lock(&mb_cache_spinlock); -- if (!list_empty(&mb_cache_lru_list)) { -- ce = list_entry(mb_cache_lru_list.next, -- struct mb_cache_entry, e_lru_list); -- list_del_init(&ce->e_lru_list); -- __mb_cache_entry_unhash(ce); -+ while (!list_is_last(l, &mb_cache_lru_list)) { -+ l = l->next; -+ ce = list_entry(l, struct mb_cache_entry, e_lru_list); -+ if (ce->e_cache == cache) { -+ list_del_init(&ce->e_lru_list); -+ if (ce->e_used || ce->e_queued || -+ atomic_read(&ce->e_refcnt)) -+ continue; -+ spin_unlock(&mb_cache_spinlock); -+ /* -+ * Prevent any find or get operation on the -+ * entry. -+ */ -+ hlist_bl_lock(ce->e_block_hash_p); -+ hlist_bl_lock(ce->e_index_hash_p); -+ /* Ignore if it is touched by a find/get */ -+ if (ce->e_used || ce->e_queued || -+ atomic_read(&ce->e_refcnt) || -+ !list_empty(&ce->e_lru_list)) { -+ hlist_bl_unlock(ce->e_index_hash_p); -+ hlist_bl_unlock(ce->e_block_hash_p); -+ l = &mb_cache_lru_list; -+ spin_lock(&mb_cache_spinlock); -+ continue; -+ } -+ mb_assert(list_empty(&ce->e_lru_list)); -+ mb_assert(!(ce->e_used || ce->e_queued || -+ atomic_read(&ce->e_refcnt))); -+ __mb_cache_entry_unhash_unlock(ce); -+ goto found; -+ } - } - spin_unlock(&mb_cache_spinlock); - } -- if (!ce) { -- ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); -- if (!ce) -- return NULL; -- atomic_inc(&cache->c_entry_count); -- INIT_LIST_HEAD(&ce->e_lru_list); -- INIT_HLIST_BL_NODE(&ce->e_block_list); -- INIT_HLIST_BL_NODE(&ce->e_index.o_list); -- ce->e_cache = cache; -- ce->e_queued = 0; -- } -+ -+ ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); -+ if (!ce) -+ return NULL; -+ atomic_inc(&cache->c_entry_count); -+ INIT_LIST_HEAD(&ce->e_lru_list); -+ INIT_HLIST_BL_NODE(&ce->e_block_list); -+ INIT_HLIST_BL_NODE(&ce->e_index.o_list); -+ ce->e_cache = cache; -+ ce->e_queued = 0; -+ atomic_set(&ce->e_refcnt, 0); -+found: - ce->e_block_hash_p = &cache->c_block_hash[0]; - ce->e_index_hash_p = &cache->c_index_hash[0]; - ce->e_used = 1 + MB_CACHE_WRITER; -@@ -414,7 +573,6 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, - struct mb_cache *cache = ce->e_cache; - unsigned int bucket; - struct hlist_bl_node *l; -- int error = -EBUSY; - struct hlist_bl_head *block_hash_p; - struct hlist_bl_head *index_hash_p; - struct mb_cache_entry *lce; -@@ -423,26 +581,29 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, - bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), - cache->c_bucket_bits); - block_hash_p = &cache->c_block_hash[bucket]; -- spin_lock(&mb_cache_spinlock); -+ hlist_bl_lock(block_hash_p); - hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) { -- if (lce->e_bdev == bdev && lce->e_block == block) -- goto out; -+ if (lce->e_bdev == bdev && lce->e_block == block) { -+ hlist_bl_unlock(block_hash_p); -+ return -EBUSY; -+ } - } - mb_assert(!__mb_cache_entry_is_block_hashed(ce)); -- __mb_cache_entry_unhash(ce); -+ __mb_cache_entry_unhash_block(ce); -+ __mb_cache_entry_unhash_index(ce); - ce->e_bdev = bdev; - ce->e_block = block; - ce->e_block_hash_p = block_hash_p; - ce->e_index.o_key = key; -+ hlist_bl_add_head(&ce->e_block_list, block_hash_p); -+ hlist_bl_unlock(block_hash_p); - bucket = hash_long(key, cache->c_bucket_bits); - index_hash_p = &cache->c_index_hash[bucket]; -+ hlist_bl_lock(index_hash_p); - ce->e_index_hash_p = index_hash_p; - hlist_bl_add_head(&ce->e_index.o_list, index_hash_p); -- hlist_bl_add_head(&ce->e_block_list, block_hash_p); -- error = 0; --out: -- spin_unlock(&mb_cache_spinlock); -- return error; -+ hlist_bl_unlock(index_hash_p); -+ return 0; - } - - -@@ -456,24 +617,26 @@ out: - void - mb_cache_entry_release(struct mb_cache_entry *ce) - { -- spin_lock(&mb_cache_spinlock); -- __mb_cache_entry_release_unlock(ce); -+ __mb_cache_entry_release(ce); - } - - - /* - * mb_cache_entry_free() - * -- * This is equivalent to the sequence mb_cache_entry_takeout() -- -- * mb_cache_entry_release(). - */ - void - mb_cache_entry_free(struct mb_cache_entry *ce) - { -- spin_lock(&mb_cache_spinlock); -+ mb_assert(ce); - mb_assert(list_empty(&ce->e_lru_list)); -- __mb_cache_entry_unhash(ce); -- __mb_cache_entry_release_unlock(ce); -+ hlist_bl_lock(ce->e_index_hash_p); -+ __mb_cache_entry_unhash_index(ce); -+ hlist_bl_unlock(ce->e_index_hash_p); -+ hlist_bl_lock(ce->e_block_hash_p); -+ __mb_cache_entry_unhash_block(ce); -+ hlist_bl_unlock(ce->e_block_hash_p); -+ __mb_cache_entry_release(ce); - } - - -@@ -497,39 +660,48 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, - bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), - cache->c_bucket_bits); - block_hash_p = &cache->c_block_hash[bucket]; -- spin_lock(&mb_cache_spinlock); -+ /* First serialize access to the block corresponding hash chain. */ -+ hlist_bl_lock(block_hash_p); - hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { - mb_assert(ce->e_block_hash_p == block_hash_p); - if (ce->e_bdev == bdev && ce->e_block == block) { -- DEFINE_WAIT(wait); -+ /* -+ * Prevent a free from removing the entry. -+ */ -+ atomic_inc(&ce->e_refcnt); -+ hlist_bl_unlock(block_hash_p); -+ __spin_lock_mb_cache_entry(ce); -+ atomic_dec(&ce->e_refcnt); -+ if (ce->e_used > 0) { -+ DEFINE_WAIT(wait); -+ while (ce->e_used > 0) { -+ ce->e_queued++; -+ prepare_to_wait(&mb_cache_queue, &wait, -+ TASK_UNINTERRUPTIBLE); -+ __spin_unlock_mb_cache_entry(ce); -+ schedule(); -+ __spin_lock_mb_cache_entry(ce); -+ ce->e_queued--; -+ } -+ finish_wait(&mb_cache_queue, &wait); -+ } -+ ce->e_used += 1 + MB_CACHE_WRITER; -+ __spin_unlock_mb_cache_entry(ce); - -- if (!list_empty(&ce->e_lru_list)) -+ if (!list_empty(&ce->e_lru_list)) { -+ spin_lock(&mb_cache_spinlock); - list_del_init(&ce->e_lru_list); -- -- while (ce->e_used > 0) { -- ce->e_queued++; -- prepare_to_wait(&mb_cache_queue, &wait, -- TASK_UNINTERRUPTIBLE); - spin_unlock(&mb_cache_spinlock); -- schedule(); -- spin_lock(&mb_cache_spinlock); -- ce->e_queued--; - } -- finish_wait(&mb_cache_queue, &wait); -- ce->e_used += 1 + MB_CACHE_WRITER; -- - if (!__mb_cache_entry_is_block_hashed(ce)) { -- __mb_cache_entry_release_unlock(ce); -+ __mb_cache_entry_release(ce); - return NULL; - } -- goto cleanup; -+ return ce; - } - } -- ce = NULL; -- --cleanup: -- spin_unlock(&mb_cache_spinlock); -- return ce; -+ hlist_bl_unlock(block_hash_p); -+ return NULL; - } - - #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -@@ -538,40 +710,53 @@ static struct mb_cache_entry * - __mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head, - struct block_device *bdev, unsigned int key) - { -+ -+ /* The index hash chain is alredy acquire by caller. */ - while (l != NULL) { - struct mb_cache_entry *ce = - hlist_bl_entry(l, struct mb_cache_entry, - e_index.o_list); - mb_assert(ce->e_index_hash_p == head); - if (ce->e_bdev == bdev && ce->e_index.o_key == key) { -- DEFINE_WAIT(wait); -- -- if (!list_empty(&ce->e_lru_list)) -- list_del_init(&ce->e_lru_list); -- -+ /* -+ * Prevent a free from removing the entry. -+ */ -+ atomic_inc(&ce->e_refcnt); -+ hlist_bl_unlock(head); -+ __spin_lock_mb_cache_entry(ce); -+ atomic_dec(&ce->e_refcnt); -+ ce->e_used++; - /* Incrementing before holding the lock gives readers - priority over writers. */ -- ce->e_used++; -- while (ce->e_used >= MB_CACHE_WRITER) { -- ce->e_queued++; -- prepare_to_wait(&mb_cache_queue, &wait, -- TASK_UNINTERRUPTIBLE); -- spin_unlock(&mb_cache_spinlock); -- schedule(); -+ if (ce->e_used >= MB_CACHE_WRITER) { -+ DEFINE_WAIT(wait); -+ -+ while (ce->e_used >= MB_CACHE_WRITER) { -+ ce->e_queued++; -+ prepare_to_wait(&mb_cache_queue, &wait, -+ TASK_UNINTERRUPTIBLE); -+ __spin_unlock_mb_cache_entry(ce); -+ schedule(); -+ __spin_lock_mb_cache_entry(ce); -+ ce->e_queued--; -+ } -+ finish_wait(&mb_cache_queue, &wait); -+ } -+ __spin_unlock_mb_cache_entry(ce); -+ if (!list_empty(&ce->e_lru_list)) { - spin_lock(&mb_cache_spinlock); -- ce->e_queued--; -+ list_del_init(&ce->e_lru_list); -+ spin_unlock(&mb_cache_spinlock); - } -- finish_wait(&mb_cache_queue, &wait); -- - if (!__mb_cache_entry_is_block_hashed(ce)) { -- __mb_cache_entry_release_unlock(ce); -- spin_lock(&mb_cache_spinlock); -+ __mb_cache_entry_release(ce); - return ERR_PTR(-EAGAIN); - } - return ce; - } - l = l->next; - } -+ hlist_bl_unlock(head); - return NULL; - } - -@@ -598,12 +783,12 @@ mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev, - struct hlist_bl_head *index_hash_p; - - index_hash_p = &cache->c_index_hash[bucket]; -- spin_lock(&mb_cache_spinlock); -+ hlist_bl_lock(index_hash_p); - if (!hlist_bl_empty(index_hash_p)) { - l = hlist_bl_first(index_hash_p); - ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); -- } -- spin_unlock(&mb_cache_spinlock); -+ } else -+ hlist_bl_unlock(index_hash_p); - return ce; - } - -@@ -638,11 +823,11 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, - - index_hash_p = &cache->c_index_hash[bucket]; - mb_assert(prev->e_index_hash_p == index_hash_p); -- spin_lock(&mb_cache_spinlock); -+ hlist_bl_lock(index_hash_p); - mb_assert(!hlist_bl_empty(index_hash_p)); - l = prev->e_index.o_list.next; - ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); -- __mb_cache_entry_release_unlock(prev); -+ __mb_cache_entry_release(prev); - return ce; - } - --- -1.7.11.3 - - diff --git a/merge-uninitialized-extents b/merge-uninitialized-extents deleted file mode 100644 index 5940625a..00000000 --- a/merge-uninitialized-extents +++ /dev/null @@ -1,95 +0,0 @@ -ext4: merge uninitialized extents - -From: Darrick J. Wong - -Allow for merging uninitialized extents. - -Signed-off-by: Darrick J. Wong -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/extents.c | 21 +++++++++++++++++---- - 1 file changed, 17 insertions(+), 4 deletions(-) - -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index 3384dc4..7f0132d 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -1691,7 +1691,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, - * the extent that was written properly split out and conversion to - * initialized is trivial. - */ -- if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) -+ if (ext4_ext_is_uninitialized(ex1) != ext4_ext_is_uninitialized(ex2)) - return 0; - - ext1_ee_len = ext4_ext_get_actual_len(ex1); -@@ -1708,6 +1708,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, - */ - if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) - return 0; -+ if (ext4_ext_is_uninitialized(ex1) && -+ (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) || -+ atomic_read(&EXT4_I(inode)->i_unwritten) || -+ (ext1_ee_len + ext2_ee_len > EXT_UNINIT_MAX_LEN))) -+ return 0; - #ifdef AGGRESSIVE_TEST - if (ext1_ee_len >= 4) - return 0; -@@ -1731,7 +1736,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, - { - struct ext4_extent_header *eh; - unsigned int depth, len; -- int merge_done = 0; -+ int merge_done = 0, uninit; - - depth = ext_depth(inode); - BUG_ON(path[depth].p_hdr == NULL); -@@ -1741,8 +1746,11 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, - if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) - break; - /* merge with next extent! */ -+ uninit = ext4_ext_is_uninitialized(ex); - ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) - + ext4_ext_get_actual_len(ex + 1)); -+ if (uninit) -+ ext4_ext_mark_uninitialized(ex); - - if (ex + 1 < EXT_LAST_EXTENT(eh)) { - len = (EXT_LAST_EXTENT(eh) - ex - 1) -@@ -1896,7 +1904,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, - struct ext4_ext_path *npath = NULL; - int depth, len, err; - ext4_lblk_t next; -- int mb_flags = 0; -+ int mb_flags = 0, uninit; - - if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { - EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); -@@ -1946,9 +1954,11 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, - path + depth); - if (err) - return err; -- -+ uninit = ext4_ext_is_uninitialized(ex); - ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) - + ext4_ext_get_actual_len(newext)); -+ if (uninit) -+ ext4_ext_mark_uninitialized(ex); - eh = path[depth].p_hdr; - nearex = ex; - goto merge; -@@ -1971,10 +1981,13 @@ prepend: - if (err) - return err; - -+ uninit = ext4_ext_is_uninitialized(ex); - ex->ee_block = newext->ee_block; - ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); - ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) - + ext4_ext_get_actual_len(newext)); -+ if (uninit) -+ ext4_ext_mark_uninitialized(ex); - eh = path[depth].p_hdr; - nearex = ex; - goto merge; - diff --git a/only-sync-filesystem-when-remounting-ro b/only-sync-filesystem-when-remounting-ro deleted file mode 100644 index bb78b66c..00000000 --- a/only-sync-filesystem-when-remounting-ro +++ /dev/null @@ -1,32 +0,0 @@ -ext4: only call sync_filesystm() when remounting read-only - -This is the only time it is required for ext4. - -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/super.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index aa3842f..01c5088 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -4767,8 +4767,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) - #endif - char *orig_data = kstrdup(data, GFP_KERNEL); - -- sync_filesystem(sb); -- - /* Store the original options */ - old_sb_flags = sb->s_flags; - old_opts.s_mount_opt = sbi->s_mount_opt; -@@ -4839,6 +4837,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) - } - - if (*flags & MS_RDONLY) { -+ err = sync_filesystem(sb); -+ if (err < 0) -+ goto restore_opts; - err = dquot_suspend(sb, -1); - if (err < 0) - goto restore_opts; diff --git a/optimize-hurd-tests b/optimize-hurd-tests deleted file mode 100644 index 48c55f06..00000000 --- a/optimize-hurd-tests +++ /dev/null @@ -1,84 +0,0 @@ -ext4: optimize Hurd tests when reading/writing inodes - -Set a in-memory superblock flag to indicate whether the file system is -designed to support the Hurd. - -Also, add a sanity check to make sure the 64-bit feature is not set -for Hurd file systems, since i_file_acl_high conflicts with a -Hurd-specific field. - -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/ext4.h | 2 ++ - fs/ext4/inode.c | 9 +++------ - fs/ext4/super.c | 10 ++++++++++ - 3 files changed, 15 insertions(+), 6 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index f4f889e..e01135d 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -1001,6 +1001,8 @@ struct ext4_inode_info { - #define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group - size of blocksize * 8 - blocks */ -+#define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated -+ file systems */ - - #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ - ~EXT4_MOUNT_##opt -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index ed2c13a..b5e182a 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -4168,8 +4168,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) - EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); - EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); - -- if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != -- cpu_to_le32(EXT4_OS_HURD)) { -+ if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { - inode->i_version = le32_to_cpu(raw_inode->i_disk_version); - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { - if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) -@@ -4345,8 +4344,7 @@ static int ext4_do_update_inode(handle_t *handle, - goto out_brelse; - raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); - raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); -- if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != -- cpu_to_le32(EXT4_OS_HURD)) -+ if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) - raw_inode->i_file_acl_high = - cpu_to_le16(ei->i_file_acl >> 32); - raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); -@@ -4391,8 +4389,7 @@ static int ext4_do_update_inode(handle_t *handle, - raw_inode->i_block[block] = ei->i_data[block]; - } - -- if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != -- cpu_to_le32(EXT4_OS_HURD)) { -+ if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { - raw_inode->i_disk_version = cpu_to_le32(inode->i_version); - if (ei->i_extra_isize) { - if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index 3c9eadf..6adee9a 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -3580,6 +3580,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) - "feature flags set on rev 0 fs, " - "running e2fsck is recommended"); - -+ if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { -+ set_opt2(sb, HURD_COMPAT); -+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, -+ EXT4_FEATURE_INCOMPAT_64BIT)) { -+ ext4_msg(sb, KERN_ERR, -+ "The Hurd can't support 64-bit file systems"); -+ goto failed_mount; -+ } -+ } -+ - if (IS_EXT2_SB(sb)) { - if (ext2_feature_set_ok(sb)) - ext4_msg(sb, KERN_INFO, "mounting ext2 file system " diff --git a/refactor-ext4_fallocate-code b/refactor-ext4_fallocate-code deleted file mode 100644 index 41a025e4..00000000 --- a/refactor-ext4_fallocate-code +++ /dev/null @@ -1,207 +0,0 @@ -ext4: refactor ext4_fallocate code - -From: Lukas Czerner - -Move block allocation out of the ext4_fallocate into separate function -called ext4_alloc_file_blocks(). This will allow us to use the same -allocation code for other allocation operations such as zero range which -is commit in the next patch. - -Signed-off-by: Lukas Czerner -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/extents.c | 129 +++++++++++++++++++++++++++++++----------------------- - 1 file changed, 74 insertions(+), 55 deletions(-) - -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index ce3d371..1c09a09 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -4546,6 +4546,64 @@ retry: - ext4_std_error(inode->i_sb, err); - } - -+static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, -+ ext4_lblk_t len, int flags, int mode) -+{ -+ struct inode *inode = file_inode(file); -+ handle_t *handle; -+ int ret = 0; -+ int ret2 = 0; -+ int retries = 0; -+ struct ext4_map_blocks map; -+ unsigned int credits; -+ -+ map.m_lblk = offset; -+ /* -+ * Don't normalize the request if it can fit in one extent so -+ * that it doesn't get unnecessarily split into multiple -+ * extents. -+ */ -+ if (len <= EXT_UNINIT_MAX_LEN) -+ flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; -+ -+ /* -+ * credits to insert 1 extent into extent tree -+ */ -+ credits = ext4_chunk_trans_blocks(inode, len); -+ -+retry: -+ while (ret >= 0 && ret < len) { -+ map.m_lblk = map.m_lblk + ret; -+ map.m_len = len = len - ret; -+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, -+ credits); -+ if (IS_ERR(handle)) { -+ ret = PTR_ERR(handle); -+ break; -+ } -+ ret = ext4_map_blocks(handle, inode, &map, flags); -+ if (ret <= 0) { -+ ext4_debug("inode #%lu: block %u: len %u: " -+ "ext4_ext_map_blocks returned %d", -+ inode->i_ino, map.m_lblk, -+ map.m_len, ret); -+ ext4_mark_inode_dirty(handle, inode); -+ ret2 = ext4_journal_stop(handle); -+ break; -+ } -+ ret2 = ext4_journal_stop(handle); -+ if (ret2) -+ break; -+ } -+ if (ret == -ENOSPC && -+ ext4_should_retry_alloc(inode->i_sb, &retries)) { -+ ret = 0; -+ goto retry; -+ } -+ -+ return ret > 0 ? ret2 : ret; -+} -+ - /* - * preallocate space for a file. This implements ext4's fallocate file - * operation, which gets called from sys_fallocate system call. -@@ -4560,12 +4618,10 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) - loff_t new_size = 0; - unsigned int max_blocks; - int ret = 0; -- int ret2 = 0; -- int retries = 0; - int flags; -- struct ext4_map_blocks map; -+ ext4_lblk_t lblk; - struct timespec tv; -- unsigned int credits, blkbits = inode->i_blkbits; -+ unsigned int blkbits = inode->i_blkbits; - - /* Return error if mode is not supported */ - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | -@@ -4590,17 +4646,18 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) - return -EOPNOTSUPP; - - trace_ext4_fallocate_enter(inode, offset, len, mode); -- map.m_lblk = offset >> blkbits; -+ lblk = offset >> blkbits; - /* - * We can't just convert len to max_blocks because - * If blocksize = 4096 offset = 3072 and len = 2048 - */ - max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -- - map.m_lblk; -- /* -- * credits to insert 1 extent into extent tree -- */ -- credits = ext4_chunk_trans_blocks(inode, max_blocks); -+ - lblk; -+ -+ flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; -+ if (mode & FALLOC_FL_KEEP_SIZE) -+ flags |= EXT4_GET_BLOCKS_KEEP_SIZE; -+ - mutex_lock(&inode->i_mutex); - - if (!(mode & FALLOC_FL_KEEP_SIZE) && -@@ -4611,46 +4668,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) - goto out; - } - -- flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; -- if (mode & FALLOC_FL_KEEP_SIZE) -- flags |= EXT4_GET_BLOCKS_KEEP_SIZE; -- /* -- * Don't normalize the request if it can fit in one extent so -- * that it doesn't get unnecessarily split into multiple -- * extents. -- */ -- if (len <= EXT_UNINIT_MAX_LEN << blkbits) -- flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; -- --retry: -- while (ret >= 0 && ret < max_blocks) { -- map.m_lblk = map.m_lblk + ret; -- map.m_len = max_blocks = max_blocks - ret; -- handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, -- credits); -- if (IS_ERR(handle)) { -- ret = PTR_ERR(handle); -- break; -- } -- ret = ext4_map_blocks(handle, inode, &map, flags); -- if (ret <= 0) { -- ext4_debug("inode #%lu: block %u: len %u: " -- "ext4_ext_map_blocks returned %d", -- inode->i_ino, map.m_lblk, -- map.m_len, ret); -- ext4_mark_inode_dirty(handle, inode); -- ret2 = ext4_journal_stop(handle); -- break; -- } -- ret2 = ext4_journal_stop(handle); -- if (ret2) -- break; -- } -- if (ret == -ENOSPC && -- ext4_should_retry_alloc(inode->i_sb, &retries)) { -- ret = 0; -- goto retry; -- } -+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode); -+ if (ret) -+ goto out; - - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); - if (IS_ERR(handle)) -@@ -4658,14 +4678,14 @@ retry: - - tv = inode->i_ctime = ext4_current_time(inode); - -- if (ret > 0 && new_size) { -+ if (!ret && new_size) { - if (new_size > i_size_read(inode)) { - i_size_write(inode, new_size); - inode->i_mtime = tv; - } - if (new_size > EXT4_I(inode)->i_disksize) - ext4_update_i_disksize(inode, new_size); -- } else if (ret > 0 && !new_size) { -+ } else if (!ret && !new_size) { - /* - * Mark that we allocate beyond EOF so the subsequent truncate - * can proceed even if the new size is the same as i_size. -@@ -4680,9 +4700,8 @@ retry: - ext4_journal_stop(handle); - out: - mutex_unlock(&inode->i_mutex); -- trace_ext4_fallocate_exit(inode, offset, max_blocks, -- ret > 0 ? ret2 : ret); -- return ret > 0 ? ret2 : ret; -+ trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); -+ return ret; - } - - /* --- -1.8.3.1 - - diff --git a/remove-an-unneeded-check-in-mext_page_mkuptodate b/remove-an-unneeded-check-in-mext_page_mkuptodate deleted file mode 100644 index adee6b72..00000000 --- a/remove-an-unneeded-check-in-mext_page_mkuptodate +++ /dev/null @@ -1,24 +0,0 @@ -ext4: remove an unneeded check in mext_page_mkuptodate() - -From: Dan Carpenter - -"err" is zero here, there is no need to check again. - -Signed-off-by: Dan Carpenter -Signed-off-by: "Theodore Ts'o" - -diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c -index 773b503bd18c..f39a88abe32c 100644 ---- a/fs/ext4/move_extent.c -+++ b/fs/ext4/move_extent.c -@@ -861,8 +861,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) - } - if (!buffer_mapped(bh)) { - zero_user(page, block_start, blocksize); -- if (!err) -- set_buffer_uptodate(bh); -+ set_buffer_uptodate(bh); - continue; - } - } - diff --git a/remove-unneeded-test-of-ret-variable b/remove-unneeded-test-of-ret-variable deleted file mode 100644 index be9e40e9..00000000 --- a/remove-unneeded-test-of-ret-variable +++ /dev/null @@ -1,59 +0,0 @@ -ext4: remove unneeded test of ret variable - -From: Lukas Czerner - -Currently in ext4_fallocate() and ext4_zero_range() we're testing ret -variable along with new_size. However in ext4_fallocate() we just tested -ret before and in ext4_zero_range() if will always be zero when we get -there so there is no need to test it in both cases. - -Signed-off-by: Lukas Czerner -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/extents.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index 491208c..8c09e1d 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -4816,12 +4816,12 @@ static long ext4_zero_range(struct file *file, loff_t offset, - - inode->i_mtime = inode->i_ctime = ext4_current_time(inode); - -- if (!ret && new_size) { -+ if (new_size) { - if (new_size > i_size_read(inode)) - i_size_write(inode, new_size); - if (new_size > EXT4_I(inode)->i_disksize) - ext4_update_i_disksize(inode, new_size); -- } else if (!ret && !new_size) { -+ } else { - /* - * Mark that we allocate beyond EOF so the subsequent truncate - * can proceed even if the new size is the same as i_size. -@@ -4923,14 +4923,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) - - tv = inode->i_ctime = ext4_current_time(inode); - -- if (!ret && new_size) { -+ if (new_size) { - if (new_size > i_size_read(inode)) { - i_size_write(inode, new_size); - inode->i_mtime = tv; - } - if (new_size > EXT4_I(inode)->i_disksize) - ext4_update_i_disksize(inode, new_size); -- } else if (!ret && !new_size) { -+ } else { - /* - * Mark that we allocate beyond EOF so the subsequent truncate - * can proceed even if the new size is the same as i_size. --- -1.8.3.1 - --- -To unsubscribe from this list: send the line "unsubscribe linux-ext4" in -the body of a message to majordomo@vger.kernel.org -More majordomo info at http://vger.kernel.org/majordomo-info.html - diff --git a/remove-unused-ac_ex_scanned b/remove-unused-ac_ex_scanned deleted file mode 100644 index d1cb5f9a..00000000 --- a/remove-unused-ac_ex_scanned +++ /dev/null @@ -1,55 +0,0 @@ -ext4: remove unused ac_ex_scanned - -From: Eric Sandeen - -When looking at a bug report with: - -> kernel: EXT4-fs: 0 scanned, 0 found - -I thought wow, 0 scanned, that's odd? But it's not odd; it's printing -a variable that is initialized to 0 and never touched again. - -It's never been used since the original merge, so I don't really even -know what the original intent was, either. - -If anyone knows how to hook it up, speak now via patch, otherwise just -yank it so it's not making a confusing situation more confusing in -kernel logs. - -Signed-off-by: Eric Sandeen -Signed-off-by: "Theodore Ts'o" ---- - -diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c -index 4bbbf13..3aee266 100644 ---- a/fs/ext4/mballoc.c -+++ b/fs/ext4/mballoc.c -@@ -3983,8 +3983,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) - (unsigned long)ac->ac_b_ex.fe_len, - (unsigned long)ac->ac_b_ex.fe_logical, - (int)ac->ac_criteria); -- ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found", -- ac->ac_ex_scanned, ac->ac_found); -+ ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found); - ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); - ngroups = ext4_get_groups_count(sb); - for (i = 0; i < ngroups; i++) { -diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h -index 08481ee..72db0a0 100644 ---- a/fs/ext4/mballoc.h -+++ b/fs/ext4/mballoc.h -@@ -175,8 +175,6 @@ struct ext4_allocation_context { - /* copy of the best found extent taken before preallocation efforts */ - struct ext4_free_extent ac_f_ex; - -- /* number of iterations done. we have to track to limit searching */ -- unsigned long ac_ex_scanned; - __u16 ac_groups_scanned; - __u16 ac_found; - __u16 ac_tail; - --- -To unsubscribe from this list: send the line "unsubscribe linux-ext4" in -the body of a message to majordomo@vger.kernel.org -More majordomo info at http://vger.kernel.org/majordomo-info.html - diff --git a/series b/series dissimilarity index 76% index 14f9f0c2..fcb554b6 100644 --- a/series +++ b/series @@ -1,92 +1,32 @@ -# BASE v3.14-rc2 - -fix-xfstest-generic-299-block-validity-failures -fix-swap_inode_boot_loader-cleanup -dont-try-to-set-hash-signedness-flag-if-fs-is-ro -fix-resize-large-itable -fix-resize-nonstd-blocks-per-group -dont-leave-crtime-uninitialized -fix-use-after-free-in-jbd2_journal_start_reserved - -# ^^ above pushed to Linus - -add-__init-marking-to-init_inodecache -#initialize-multi-block-allocator-before-checking-block-descriptors -cleanup-error-handling-in-swap_inode_boot_loader -remove-an-unneeded-check-in-mext_page_mkuptodate -jbd2-mark-file-local-functions-as-static -address-a-benign-compiler-warning -fix-error-return-from-ext4_ext_handle_uninitialized_extents -add-ext4_es_store_pblock_status -ext4-dont-calculate-total-xattr-size-if-unneeded -initialize-fe_logical -cap-max-length-from-ext -remove-unused-ac_ex_scanned -silence-warnings-in-extent-status-tree-debugging-code -avoid-exposure-stale-data-in-ext4_punch_hole -merge-uninitialized-extents -translate-mode-bits-to-strings -add-support-collapse-range -speedup-WB_SYNC_ALL-pass -jbd2-dont-unplog-after-revoke-records -dont-hold-j_state_lock-during-wake_up -jbd2-calc-stats-wo-j_state_lock-and-j_list_lock -jbd2-add-transaction-to-checkpoint-list-earlier -jbd2-check-b_transaction-wo-taking-j_list_lock -jbd2-minimize-j_list_lock-in-journal_get_create_access -jbd2-minimize-j_state_lock-in-jbd2_journal_forget -jbd2-improve-error-messages-for-inconsistent-jh -fs-push-sync_filesystem-down-to-remount_fs -only-sync-filesystem-when-remounting-ro - -delete-path-delalloc-code-in-ext4_ext_handle_uninitialized_extents -fix-partial-cluster-handling-for-bigalloc-filesystems - -update-i_size-after-the-preallocation -refactor-ext4_fallocate-code -add-zero-range-support - -change-block-and-index-hash-chain-to-hlist_bl_node -mbcache-decouple-locking-of-local-from-global-data -each-fs-uses-its-own-mbcache -kill-i_version-support-for-hurd-OS-filesystems -optimize-hurd-tests -atomically-set-inode-flags -make-ext4_block_zero_page_range_static -fix-comment-typo -remove-unneeded-test-of-ret-variable -fix-premature-freeing-of-partial-clusters - -########################################## -# unstable patches -#################################################### - -stable-boundary -stable-boundary-undo.patch - -use-discard-if-possible-in-blkdev_issue_zeroout -add-blkdiscard-ioctl - -mbcache-decouple-locking-from-global-data -each-fs-creates-its-own-mb_cache - -block-dio-during-truncate - -delalloc-debug - -# note: this may make things slower... -commit-as-soon-as-possible-after-log_start_commit - -# Ted's squelch series, still needs work -add-sysfs-bool-support -add-squelch-errors-support - -# Various disabled patches... -# -#auto-enable-journal_async_commit -#mballoc-allocate-larger-extents - -# various debugging/benchmarking assists -dump-in-use-buffers -akpm-jbd2-locking-fix - +# BASE d15e031 + +########################################## +# unstable patches +#################################################### + +stable-boundary +stable-boundary-undo.patch + +use-discard-if-possible-in-blkdev_issue_zeroout +add-blkdiscard-ioctl + +block-dio-during-truncate + +delalloc-debug + +# note: this may make things slower... +commit-as-soon-as-possible-after-log_start_commit + +# Ted's squelch series, still needs work +add-sysfs-bool-support +add-squelch-errors-support + +# Various disabled patches... +# +#auto-enable-journal_async_commit +#mballoc-allocate-larger-extents + +# various debugging/benchmarking assists +dump-in-use-buffers +akpm-jbd2-locking-fix + diff --git a/silence-warnings-in-extent-status-tree-debugging-code b/silence-warnings-in-extent-status-tree-debugging-code deleted file mode 100644 index c579a75b..00000000 --- a/silence-warnings-in-extent-status-tree-debugging-code +++ /dev/null @@ -1,67 +0,0 @@ -ext4: silence warnings in extent status tree debugging code - -From: Eric Whitney - -Adjust the conversion specifications in a few optionally compiled debug -messages to match the return type of ext4_es_status(). Also, make a -couple of minor grammatical message edits while we're at it. - -Signed-off-by: Eric Whitney -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/extents_status.c | 14 +++++++------- - 1 file changed, 7 insertions(+), 7 deletions(-) - -diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c -index 0578b6b..9366b3a 100644 ---- a/fs/ext4/extents_status.c -+++ b/fs/ext4/extents_status.c -@@ -184,7 +184,7 @@ static void ext4_es_print_tree(struct inode *inode) - while (node) { - struct extent_status *es; - es = rb_entry(node, struct extent_status, rb_node); -- printk(KERN_DEBUG " [%u/%u) %llu %llx", -+ printk(KERN_DEBUG " [%u/%u) %llu %x", - es->es_lblk, es->es_len, - ext4_es_pblock(es), ext4_es_status(es)); - node = rb_next(node); -@@ -445,8 +445,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, - pr_warn("ES insert assertion failed for " - "inode: %lu we can find an extent " - "at block [%d/%d/%llu/%c], but we " -- "want to add an delayed/hole extent " -- "[%d/%d/%llu/%llx]\n", -+ "want to add a delayed/hole extent " -+ "[%d/%d/%llu/%x]\n", - inode->i_ino, ee_block, ee_len, - ee_start, ee_status ? 'u' : 'w', - es->es_lblk, es->es_len, -@@ -486,8 +486,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, - if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { - pr_warn("ES insert assertion failed for inode: %lu " - "can't find an extent at block %d but we want " -- "to add an written/unwritten extent " -- "[%d/%d/%llu/%llx]\n", inode->i_ino, -+ "to add a written/unwritten extent " -+ "[%d/%d/%llu/%x]\n", inode->i_ino, - es->es_lblk, es->es_lblk, es->es_len, - ext4_es_pblock(es), ext4_es_status(es)); - } -@@ -524,7 +524,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, - */ - pr_warn("ES insert assertion failed for inode: %lu " - "We can find blocks but we want to add a " -- "delayed/hole extent [%d/%d/%llu/%llx]\n", -+ "delayed/hole extent [%d/%d/%llu/%x]\n", - inode->i_ino, es->es_lblk, es->es_len, - ext4_es_pblock(es), ext4_es_status(es)); - return; -@@ -554,7 +554,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, - if (ext4_es_is_written(es)) { - pr_warn("ES insert assertion failed for inode: %lu " - "We can't find the block but we want to add " -- "an written extent [%d/%d/%llu/%llx]\n", -+ "a written extent [%d/%d/%llu/%x]\n", - inode->i_ino, es->es_lblk, es->es_len, - ext4_es_pblock(es), ext4_es_status(es)); - return; diff --git a/speedup-WB_SYNC_ALL-pass b/speedup-WB_SYNC_ALL-pass deleted file mode 100644 index 7f3a5ad7..00000000 --- a/speedup-WB_SYNC_ALL-pass +++ /dev/null @@ -1,78 +0,0 @@ -ext4: Speedup WB_SYNC_ALL pass called from sync(2) - -From: Jan Kara - -When doing filesystem wide sync, there's no need to force transaction -commit (or synchronously write inode buffer) separately for each inode -because ext4_sync_fs() takes care of forcing commit at the end (VFS -takes care of flushing buffer cache, respectively). Most of the time -this slowness doesn't manifest because previous WB_SYNC_NONE writeback -doesn't leave much to write but when there are processes aggressively -creating new files and several filesystems to sync, the sync slowness -can be noticeable. In the following test script sync(1) takes around 6 -minutes when there are two ext4 filesystems mounted on a standard SATA -drive. After this patch sync takes a couple of seconds so we have about -two orders of magnitude improvement. - - function run_writers - { - for (( i = 0; i < 10; i++ )); do - mkdir $1/dir$i - for (( j = 0; j < 40000; j++ )); do - dd if=/dev/zero of=$1/dir$i/$j bs=4k count=4 &>/dev/null - done & - done - } - - for dir in "$@"; do - run_writers $dir - done - - sleep 40 - time sync - -Signed-off-by: Jan Kara -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/inode.c | 13 +++++++++++-- - 1 file changed, 11 insertions(+), 2 deletions(-) - -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 6e39895a91b8..7850584b0679 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -4443,7 +4443,12 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) - return -EIO; - } - -- if (wbc->sync_mode != WB_SYNC_ALL) -+ /* -+ * No need to force transaction in WB_SYNC_NONE mode. Also -+ * ext4_sync_fs() will force the commit after everything is -+ * written. -+ */ -+ if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) - return 0; - - err = ext4_force_commit(inode->i_sb); -@@ -4453,7 +4458,11 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) - err = __ext4_get_inode_loc(inode, &iloc, 0); - if (err) - return err; -- if (wbc->sync_mode == WB_SYNC_ALL) -+ /* -+ * sync(2) will flush the whole buffer cache. No need to do -+ * it here separately for each inode. -+ */ -+ if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) - sync_dirty_buffer(iloc.bh); - if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { - EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, --- -1.8.1.4 - --- -To unsubscribe from this list: send the line "unsubscribe linux-ext4" in -the body of a message to majordomo@vger.kernel.org -More majordomo info at http://vger.kernel.org/majordomo-info.html - diff --git a/timestamps b/timestamps dissimilarity index 78% index 1da6ae02..473397fc 100755 --- a/timestamps +++ b/timestamps @@ -1,78 +1,29 @@ -touch -d @1301938342 add-WARN_ON-with-unmapped-dirty-bh-in-writepage -touch -d @1301938342 auto-enable-journal_async_commit -touch -d @1301938342 bio-debug -touch -d @1301938342 mballoc-allocate-larger-extents -touch -d @1325258241 new -touch -d @1371093945 avoid-unnecessarily-writing-back-dirty-pages-before-hole-punching -touch -d @1376747621 save-patch -touch -d @1391388313 add-blkdiscard-ioctl -touch -d @1391388373 mbcache-decouple-locking-from-global-data -touch -d @1391388433 each-fs-creates-its-own-mb_cache -touch -d @1391388493 block-dio-during-truncate -touch -d @1391388553 delalloc-debug -touch -d @1391388613 commit-as-soon-as-possible-after-log_start_commit -touch -d @1391388673 add-sysfs-bool-support -touch -d @1391388733 add-squelch-errors-support -touch -d @1391388793 dump-in-use-buffers -touch -d @1391388853 akpm-jbd2-locking-fix -touch -d @1392219765 fix-xfstest-generic-299-block-validity-failures -touch -d @1392223711 fix-swap_inode_boot_loader-cleanup -touch -d @1392225364 dont-try-to-set-hash-signedness-flag-if-fs-is-ro -touch -d @1392226628 stable-boundary-undo.patch -touch -d @1392352047 use-discard-if-possible-in-blkdev_issue_zeroout -touch -d @1392517993 fix-resize-large-itable -touch -d @1392522145 fix-resize-nonstd-blocks-per-group -touch -d @1392596972 dont-leave-crtime-uninitialized -touch -d @1392687181 fix-use-after-free-in-jbd2_journal_start_reserved -touch -d @1392687293 add-__init-marking-to-init_inodecache -touch -d @1392687413 initialize-multi-block-allocator-before-checking-block-descriptors -touch -d @1392687876 cleanup-error-handling-in-swap_inode_boot_loader -touch -d @1392688000 remove-an-unneeded-check-in-mext_page_mkuptodate -touch -d @1392688144 jbd2-mark-file-local-functions-as-static -touch -d @1392688259 address-a-benign-compiler-warning -touch -d @1392853959 fix-error-return-from-ext4_ext_handle_uninitialized_extents -touch -d @1392858915 add-ext4_es_store_pblock_status -touch -d @1392858921 ext4-dont-calculate-total-xattr-size-if-unneeded -touch -d @1392874601 initialize-fe_logical -touch -d @1392918845 cap-max-length-from-ext -touch -d @1392921130 remove-unused-ac_ex_scanned -touch -d @1392930552 silence-warnings-in-extent-status-tree-debugging-code -touch -d @1392933485 avoid-exposure-stale-data-in-ext4_punch_hole -touch -d @1392949055 merge-uninitialized-extents -touch -d @1393067897 translate-mode-bits-to-strings -touch -d @1393186739 add-support-collapse-range -touch -d @1393948250 speedup-WB_SYNC_ALL-pass -touch -d @1394114171 jbd2-dont-write-non-commit-blocks-synchronously -touch -d @1394320432 jbd2-dont-unplog-after-revoke-records -touch -d @1394323896 dont-hold-j_state_lock-during-wake_up -touch -d @1394326276 jbd2-calc-stats-wo-j_state_lock-and-j_list_lock -touch -d @1394336050 jbd2-add-transaction-to-checkpoint-list-earlier -touch -d @1394341639 jbd2-check-b_transaction-wo-taking-j_list_lock -touch -d @1394343983 jbd2-minimize-j_list_lock-in-journal_get_create_access -touch -d @1394344618 jbd2-minimize-j_state_lock-in-jbd2_journal_forget -touch -d @1394656683 jbd2-improve-error-messages-for-inconsistent-jh -touch -d @1394656743 fs_only_call_sync_filesystem_when_remounting_read-only -touch -d @1394720073 fs-push-sync_filesystem-down-to-remount_fs -touch -d @1394765382 only-sync-filesystem-when-remounting-ro -touch -d @1394766886 delete-path-delalloc-code-in-ext4_ext_handle_uninitialized_extents -touch -d @1394768056 fix-partial-cluster-handling-for-bigalloc-filesystems -touch -d @1394854287 virtio-blk-make-queue-depth-configurable -touch -d @1395021053 v3-ext4-initialize-multi-block-allocator-before-checking-block-descriptors.new -touch -d @1395021387 v3-ext4-initialize-multi-block-allocator-before-checking-block-descriptors -touch -d @1395179075 update-i_size-after-the-preallocation -touch -d @1395180231 refactor-ext4_fallocate-code -touch -d @1395180335 add-zero-range-support -touch -d @1395184781 change-block-and-index-hash-chain-to-hlist_bl_node -touch -d @1395185000 mbcache-decouple-locking-of-local-from-global-data -touch -d @1395185089 each-fs-uses-its-own-mbcache -touch -d @1395289977 kill-i_version-support-for-hurd-OS-filesystems -touch -d @1395684546 optimize-hurd-tests -touch -d @1395686592 atomically-set-inode-flags -touch -d @1395688156 make-ext4_block_zero_page_range_static -touch -d @1395688507 fix-comment-typo -touch -d @1396328361 remove-unneeded-test-of-ret-variable -touch -d @1396328421 stable-boundary -touch -d @1396396170 fix-premature-freeing-of-partial-clusters -touch -d @1396396254 series -touch -d @1396396265 status -touch -d @1396401989 timestamps +touch -d @1301938342 add-WARN_ON-with-unmapped-dirty-bh-in-writepage +touch -d @1301938342 auto-enable-journal_async_commit +touch -d @1301938342 bio-debug +touch -d @1301938342 mballoc-allocate-larger-extents +touch -d @1325258241 new +touch -d @1371093945 avoid-unnecessarily-writing-back-dirty-pages-before-hole-punching +touch -d @1376747621 save-patch +touch -d @1391388373 mbcache-decouple-locking-from-global-data +touch -d @1391388433 each-fs-creates-its-own-mb_cache +touch -d @1392687413 initialize-multi-block-allocator-before-checking-block-descriptors +touch -d @1394114171 jbd2-dont-write-non-commit-blocks-synchronously +touch -d @1394656743 fs_only_call_sync_filesystem_when_remounting_read-only +touch -d @1394854287 virtio-blk-make-queue-depth-configurable +touch -d @1395021053 v3-ext4-initialize-multi-block-allocator-before-checking-block-descriptors.new +touch -d @1395021387 v3-ext4-initialize-multi-block-allocator-before-checking-block-descriptors +touch -d @1396651868 stable-boundary +touch -d @1396651928 stable-boundary-undo.patch +touch -d @1396651988 use-discard-if-possible-in-blkdev_issue_zeroout +touch -d @1396652048 add-blkdiscard-ioctl +touch -d @1396652108 block-dio-during-truncate +touch -d @1396652168 delalloc-debug +touch -d @1396652228 commit-as-soon-as-possible-after-log_start_commit +touch -d @1396652288 add-sysfs-bool-support +touch -d @1396652348 add-squelch-errors-support +touch -d @1396652408 dump-in-use-buffers +touch -d @1396652468 akpm-jbd2-locking-fix +touch -d @1396879898 series +touch -d @1396879908 status +touch -d @1396880144 timestamps diff --git a/translate-mode-bits-to-strings b/translate-mode-bits-to-strings deleted file mode 100644 index 0e0dd0e6..00000000 --- a/translate-mode-bits-to-strings +++ /dev/null @@ -1,69 +0,0 @@ -ext4: translate fallocate mode bits to strings - -From: Lukas Czerner - -Signed-off-by: Lukas Czerner -Signed-off-by: "Theodore Ts'o" ---- - fs/ext4/ext4.h | 1 + - fs/ext4/extents.c | 1 - - include/trace/events/ext4.h | 9 +++++++-- - 3 files changed, 8 insertions(+), 3 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index ece5556..3b9601c 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -31,6 +31,7 @@ - #include - #include - #include -+#include - #ifdef __KERNEL__ - #include - #endif -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index 0e675bc..e5485eb 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -37,7 +37,6 @@ - #include - #include - #include --#include - #include - #include - #include "ext4_jbd2.h" -diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h -index 197d312..451e020 100644 ---- a/include/trace/events/ext4.h -+++ b/include/trace/events/ext4.h -@@ -68,6 +68,11 @@ struct extent_status; - { EXTENT_STATUS_DELAYED, "D" }, \ - { EXTENT_STATUS_HOLE, "H" }) - -+#define show_falloc_mode(mode) __print_flags(mode, "|", \ -+ { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ -+ { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ -+ { FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}) -+ - - TRACE_EVENT(ext4_free_inode, - TP_PROTO(struct inode *inode), -@@ -1349,10 +1354,10 @@ TRACE_EVENT(ext4_fallocate_enter, - __entry->mode = mode; - ), - -- TP_printk("dev %d,%d ino %lu pos %lld len %lld mode %d", -+ TP_printk("dev %d,%d ino %lu pos %lld len %lld mode %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->pos, -- __entry->len, __entry->mode) -+ __entry->len, show_falloc_mode(__entry->mode)) - ); - - TRACE_EVENT(ext4_fallocate_exit, --- -1.8.3.1 - - diff --git a/update-i_size-after-the-preallocation b/update-i_size-after-the-preallocation deleted file mode 100644 index fcb3d7c7..00000000 --- a/update-i_size-after-the-preallocation +++ /dev/null @@ -1,179 +0,0 @@ -ext4: Update inode i_size after the preallocation - -From: Lukas Czerner - -Currently in ext4_fallocate we would update inode size, c_time and sync -the file with every partial allocation which is entirely unnecessary. It -is true that if the crash happens in the middle of truncate we might end -up with unchanged i size, or c_time which I do not think is really a -problem - it does not mean file system corruption in any way. Note that -xfs is doing things the same way e.g. update all of the mentioned after -the allocation is done. - -This commit moves all the updates after the allocation is done. In -addition we also need to change m_time as not only inode has been change -bot also data regions might have changed (unwritten extents). However -m_time will be only updated when i_size changed. - -Also we do not need to be paranoid about changing the c_time only if the -actual allocation have happened, we can change it even if we try to -allocate only to find out that there are already block allocated. It's -not really a big deal and it will save us some additional complexity. - -Also use ext4_debug, instead of ext4_warning in #ifdef EXT4FS_DEBUG -section. - -Signed-off-by: Lukas Czerner -Signed-off-by: "Theodore Ts'o" - --- -v3: Do not remove the code to set EXT4_INODE_EOFBLOCKS flag - - fs/ext4/extents.c | 96 ++++++++++++++++++++++++------------------------------- - 1 file changed, 42 insertions(+), 54 deletions(-) - -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index 202e8e3..ce3d371 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -4546,36 +4546,6 @@ retry: - ext4_std_error(inode->i_sb, err); - } - --static void ext4_falloc_update_inode(struct inode *inode, -- int mode, loff_t new_size, int update_ctime) --{ -- struct timespec now; -- -- if (update_ctime) { -- now = current_fs_time(inode->i_sb); -- if (!timespec_equal(&inode->i_ctime, &now)) -- inode->i_ctime = now; -- } -- /* -- * Update only when preallocation was requested beyond -- * the file size. -- */ -- if (!(mode & FALLOC_FL_KEEP_SIZE)) { -- if (new_size > i_size_read(inode)) -- i_size_write(inode, new_size); -- if (new_size > EXT4_I(inode)->i_disksize) -- ext4_update_i_disksize(inode, new_size); -- } else { -- /* -- * Mark that we allocate beyond EOF so the subsequent truncate -- * can proceed even if the new size is the same as i_size. -- */ -- if (new_size > i_size_read(inode)) -- ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); -- } -- --} -- - /* - * preallocate space for a file. This implements ext4's fallocate file - * operation, which gets called from sys_fallocate system call. -@@ -4587,13 +4557,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) - { - struct inode *inode = file_inode(file); - handle_t *handle; -- loff_t new_size; -+ loff_t new_size = 0; - unsigned int max_blocks; - int ret = 0; - int ret2 = 0; - int retries = 0; - int flags; - struct ext4_map_blocks map; -+ struct timespec tv; - unsigned int credits, blkbits = inode->i_blkbits; - - /* Return error if mode is not supported */ -@@ -4631,12 +4602,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) - */ - credits = ext4_chunk_trans_blocks(inode, max_blocks); - mutex_lock(&inode->i_mutex); -- ret = inode_newsize_ok(inode, (len + offset)); -- if (ret) { -- mutex_unlock(&inode->i_mutex); -- trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); -- return ret; -+ -+ if (!(mode & FALLOC_FL_KEEP_SIZE) && -+ offset + len > i_size_read(inode)) { -+ new_size = offset + len; -+ ret = inode_newsize_ok(inode, new_size); -+ if (ret) -+ goto out; - } -+ - flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; - if (mode & FALLOC_FL_KEEP_SIZE) - flags |= EXT4_GET_BLOCKS_KEEP_SIZE; -@@ -4660,28 +4634,14 @@ retry: - } - ret = ext4_map_blocks(handle, inode, &map, flags); - if (ret <= 0) { --#ifdef EXT4FS_DEBUG -- ext4_warning(inode->i_sb, -- "inode #%lu: block %u: len %u: " -- "ext4_ext_map_blocks returned %d", -- inode->i_ino, map.m_lblk, -- map.m_len, ret); --#endif -+ ext4_debug("inode #%lu: block %u: len %u: " -+ "ext4_ext_map_blocks returned %d", -+ inode->i_ino, map.m_lblk, -+ map.m_len, ret); - ext4_mark_inode_dirty(handle, inode); - ret2 = ext4_journal_stop(handle); - break; - } -- if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len, -- blkbits) >> blkbits)) -- new_size = offset + len; -- else -- new_size = ((loff_t) map.m_lblk + ret) << blkbits; -- -- ext4_falloc_update_inode(inode, mode, new_size, -- (map.m_flags & EXT4_MAP_NEW)); -- ext4_mark_inode_dirty(handle, inode); -- if ((file->f_flags & O_SYNC) && ret >= max_blocks) -- ext4_handle_sync(handle); - ret2 = ext4_journal_stop(handle); - if (ret2) - break; -@@ -4691,6 +4651,34 @@ retry: - ret = 0; - goto retry; - } -+ -+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); -+ if (IS_ERR(handle)) -+ goto out; -+ -+ tv = inode->i_ctime = ext4_current_time(inode); -+ -+ if (ret > 0 && new_size) { -+ if (new_size > i_size_read(inode)) { -+ i_size_write(inode, new_size); -+ inode->i_mtime = tv; -+ } -+ if (new_size > EXT4_I(inode)->i_disksize) -+ ext4_update_i_disksize(inode, new_size); -+ } else if (ret > 0 && !new_size) { -+ /* -+ * Mark that we allocate beyond EOF so the subsequent truncate -+ * can proceed even if the new size is the same as i_size. -+ */ -+ if ((offset + len) > i_size_read(inode)) -+ ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); -+ } -+ ext4_mark_inode_dirty(handle, inode); -+ if (file->f_flags & O_SYNC) -+ ext4_handle_sync(handle); -+ -+ ext4_journal_stop(handle); -+out: - mutex_unlock(&inode->i_mutex); - trace_ext4_fallocate_exit(inode, offset, max_blocks, - ret > 0 ? ret2 : ret); -- 2.11.4.GIT