From 1edc5bea1d2d5e1c900b5623d02670117fab46a0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 14 Nov 2016 21:22:35 -0500 Subject: [PATCH] patch series sync --- add-indirection-to-metadata-block-read-paths | 251 ++++++ add-support-for-log-metadata-block-tracking-in-log | 949 +++++++++++++++++++++ allow-ext4_ext_truncate-to-return-an-error | 84 ++ allow-ext4_truncate-to-return-an-error | 189 ++++ cleaner | 327 +++++++ disable-writeback | 22 + ...r-head-in-ext4_commit_super-if-holding-spinlock | 62 ++ series | 11 +- timestamps | 15 +- 9 files changed, 1905 insertions(+), 5 deletions(-) create mode 100644 add-indirection-to-metadata-block-read-paths create mode 100644 add-support-for-log-metadata-block-tracking-in-log create mode 100644 allow-ext4_ext_truncate-to-return-an-error create mode 100644 allow-ext4_truncate-to-return-an-error create mode 100644 cleaner create mode 100644 disable-writeback create mode 100644 dont-lock-buffer-head-in-ext4_commit_super-if-holding-spinlock diff --git a/add-indirection-to-metadata-block-read-paths b/add-indirection-to-metadata-block-read-paths new file mode 100644 index 00000000..03180f5d --- /dev/null +++ b/add-indirection-to-metadata-block-read-paths @@ -0,0 +1,251 @@ +Add indirection to metadata read paths + +From: Abutalib Aghayev + +Change all metadata block reads to use jmap-aware function that first looks +up the metadata block in the jmap. If lookup is successful, the function +reads the corresponding log block from the journal and copies it to the +metadata block buffer head. Otherwise, it reads the metadata block from +the file system, just like standard jmap-unaware function. + +Signed-off-by: Abutalib Aghayev + +--- + fs/ext4/extents.c | 3 ++- + fs/ext4/ialloc.c | 5 ++++- + fs/ext4/indirect.c | 3 ++- + fs/ext4/inode.c | 8 ++++++-- + fs/ext4/move_extent.c | 3 ++- + fs/ext4/namei.c | 8 +++++--- + fs/ext4/resize.c | 4 +++- + fs/jbd2/jmap.c | 17 ++++++++++------- + 8 files changed, 34 insertions(+), 17 deletions(-) + +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index c930a0110fb4..aea6e67d9037 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -517,6 +517,7 @@ __read_extent_tree_block(const char *function, unsigned int line, + { + struct buffer_head *bh; + int err; ++ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + + bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS); + if (unlikely(!bh)) +@@ -524,7 +525,7 @@ __read_extent_tree_block(const char *function, unsigned int line, + + if (!bh_uptodate_or_lock(bh)) { + trace_ext4_ext_load_extent(inode, pblk, _RET_IP_); +- err = bh_submit_read(bh); ++ err = jbd2_bh_submit_read(journal, bh, __func__); + if (err < 0) + goto errout; + } +diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c +index 170421edfdfe..919c2d114fb5 100644 +--- a/fs/ext4/ialloc.c ++++ b/fs/ext4/ialloc.c +@@ -14,6 +14,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -160,6 +161,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) + struct buffer_head *bh = NULL; + ext4_fsblk_t bitmap_blk; + int err; ++ journal_t *journal = EXT4_SB(sb)->s_journal; + + desc = ext4_get_group_desc(sb, block_group, NULL); + if (!desc) +@@ -214,7 +216,8 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) + trace_ext4_load_inode_bitmap(sb, block_group); + bh->b_end_io = ext4_end_bitmap_read; + get_bh(bh); +- submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); ++ jbd2_submit_bh(journal, REQ_OP_READ, REQ_META | REQ_PRIO, bh, __func__); ++ + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { + put_bh(bh); +diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c +index bc15c2c17633..21531ef4a182 100644 +--- a/fs/ext4/indirect.c ++++ b/fs/ext4/indirect.c +@@ -145,6 +145,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, + Indirect chain[4], int *err) + { + struct super_block *sb = inode->i_sb; ++ journal_t *journal = EXT4_SB(sb)->s_journal; + Indirect *p = chain; + struct buffer_head *bh; + int ret = -EIO; +@@ -162,7 +163,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, + } + + if (!bh_uptodate_or_lock(bh)) { +- if (bh_submit_read(bh) < 0) { ++ if (jbd2_bh_submit_read(journal, bh, __func__) < 0) { + put_bh(bh); + goto failure; + } +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 9c064727ed62..0e2f7c3b499e 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -989,13 +989,15 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, + ext4_lblk_t block, int map_flags) + { + struct buffer_head *bh; ++ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + + bh = ext4_getblk(handle, inode, block, map_flags); + if (IS_ERR(bh)) + return bh; + if (!bh || buffer_uptodate(bh)) + return bh; +- ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh); ++ jbd2_ll_rw_block(journal, REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh, ++ __func__); + wait_on_buffer(bh); + if (buffer_uptodate(bh)) + return bh; +@@ -4201,6 +4203,7 @@ static int __ext4_get_inode_loc(struct inode *inode, + struct super_block *sb = inode->i_sb; + ext4_fsblk_t block; + int inodes_per_block, inode_offset; ++ journal_t *journal = EXT4_SB(sb)->s_journal; + + iloc->bh = NULL; + if (!ext4_valid_inum(sb, inode->i_ino)) +@@ -4316,7 +4319,8 @@ static int __ext4_get_inode_loc(struct inode *inode, + trace_ext4_load_inode(inode); + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; +- submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); ++ jbd2_submit_bh(journal, REQ_OP_READ, REQ_META | REQ_PRIO, bh, ++ __func__); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { + EXT4_ERROR_INODE_BLOCK(inode, block, +diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c +index 6fc14def0c70..b6c25638f5f4 100644 +--- a/fs/ext4/move_extent.c ++++ b/fs/ext4/move_extent.c +@@ -177,6 +177,7 @@ static int + mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) + { + struct inode *inode = page->mapping->host; ++ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + sector_t block; + struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; + unsigned int blocksize, block_start, block_end; +@@ -225,7 +226,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) + for (i = 0; i < nr; i++) { + bh = arr[i]; + if (!bh_uptodate_or_lock(bh)) { +- err = bh_submit_read(bh); ++ err = jbd2_bh_submit_read(journal, bh, __func__); + if (err) + return err; + } +diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c +index 104f8bfba718..9c23616e7702 100644 +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -1361,6 +1361,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, + struct buffer_head *bh_use[NAMEI_RA_SIZE]; + struct buffer_head *bh, *ret = NULL; + ext4_lblk_t start, block, b; ++ journal_t *journal; + const u8 *name = d_name->name; + int ra_max = 0; /* Number of bh's in the readahead + buffer, bh_use[] */ +@@ -1373,6 +1374,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, + + *res_dir = NULL; + sb = dir->i_sb; ++ journal = EXT4_SB(sb)->s_journal; + namelen = d_name->len; + if (namelen > EXT4_NAME_LEN) + return NULL; +@@ -1449,9 +1451,9 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, + } + bh_use[ra_max] = bh; + if (bh) +- ll_rw_block(REQ_OP_READ, +- REQ_META | REQ_PRIO, +- 1, &bh); ++ jbd2_ll_rw_block(journal, REQ_OP_READ, ++ REQ_META | REQ_PRIO, ++ 1, &bh, __func__); + } + } + if ((bh = bh_use[ra_ptr++]) == NULL) +diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c +index cf681004b196..4e8711abc333 100644 +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -1192,10 +1192,12 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, + static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) + { + struct buffer_head *bh = sb_getblk(sb, block); ++ journal_t *journal = EXT4_SB(sb)->s_journal; ++ + if (unlikely(!bh)) + return NULL; + if (!bh_uptodate_or_lock(bh)) { +- if (bh_submit_read(bh) < 0) { ++ if (jbd2_bh_submit_read(journal, bh, __func__) < 0) { + brelse(bh); + return NULL; + } +diff --git a/fs/jbd2/jmap.c b/fs/jbd2/jmap.c +index 7d7b4eb389ed..8c844f65eeaa 100644 +--- a/fs/jbd2/jmap.c ++++ b/fs/jbd2/jmap.c +@@ -87,17 +87,19 @@ static int process_existing_mappings(journal_t *journal, + mappings[nr_new++] = mappings[i]; + continue; + } ++ /* ++ * We are either deleting the entry because it was revoked, or ++ * we are moving it to the live blocks list of this transaction. ++ * In either case, we remove it from its existing list. ++ */ ++ list_del(&je->list); ++ + if (je->revoked) { + rb_erase(&je->rb_node, &journal->j_jmap); + kmem_cache_free(jbd2_jmap_cache, je); + } else { +- /* +- * Delete jmap entry from the old transaction's list +- * before adding it to the new transaction's list. +- */ +- list_del(&je->list); +- fill_entry(je, &mappings[i], t_idx, &ti->live_logblks); + trace_jbd2_jmap_replace(je, &mappings[i], t_idx); ++ fill_entry(je, &mappings[i], t_idx, &ti->live_logblks); + } + } + return nr_new; +@@ -141,12 +143,13 @@ static void add_new_mappings(journal_t *journal, struct transaction_info *ti, + int t_idx, struct blk_mapping *mappings, + struct jmap_entry **new_entries, int nr_new) + { +- struct rb_node **p = &journal->j_jmap.rb_node; ++ struct rb_node **p; + struct rb_node *parent = NULL; + struct jmap_entry *je; + int i; + + for (i = 0; i < nr_new; ++i) { ++ p = &journal->j_jmap.rb_node; + while (*p) { + parent = *p; + je = rb_entry(parent, struct jmap_entry, rb_node); diff --git a/add-support-for-log-metadata-block-tracking-in-log b/add-support-for-log-metadata-block-tracking-in-log new file mode 100644 index 00000000..1eaccde3 --- /dev/null +++ b/add-support-for-log-metadata-block-tracking-in-log @@ -0,0 +1,949 @@ +Add support for tracking metadata blocks in the log. + +From: Abutalib Aghayev + +This patch adds two important data structures, jmap and transaction_infos, +and supporting functions. Jmap is a map from a metadata block number to +the log block number. When a transaction commits, jmap is updated with new +mappings; when a block is revoked, the mapping for the block is removed +from the jmap. Transaction_infos is an array of transaction_info +structures that contain information about transactions currently present in +the log. It contains a linked list of live blocks in a transaction, and it +is updated after every commit to keep the list up-to-date. +Transaction_infos array will be used by the cleaner for identifying live +blocks and migrating them to appropriate location. + +Signed-off-by: Abutalib Aghayev + +--- + fs/jbd2/Makefile | 3 +- + fs/jbd2/commit.c | 17 ++++ + fs/jbd2/jmap.c | 440 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + fs/jbd2/journal.c | 17 +++- + include/linux/jbd2.h | 13 +++ + include/linux/jmap.h | 129 +++++++++++++++++++++++++ + include/trace/events/jbd2.h | 169 ++++++++++++++++++++++++++++++++ + 7 files changed, 783 insertions(+), 5 deletions(-) + +diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile +index 802a3413872a..a54f50b3a06e 100644 +--- a/fs/jbd2/Makefile ++++ b/fs/jbd2/Makefile +@@ -4,4 +4,5 @@ + + obj-$(CONFIG_JBD2) += jbd2.o + +-jbd2-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o ++jbd2-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o \ ++ jmap.o +diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c +index 31f8ca046639..4a249ec74b5c 100644 +--- a/fs/jbd2/commit.c ++++ b/fs/jbd2/commit.c +@@ -361,6 +361,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) + int flags; + int err; + unsigned long long blocknr; ++ struct blk_mapping *mappings; ++ int nr_mappings; + ktime_t start_time; + u64 commit_time; + char *tagp = NULL; +@@ -562,8 +564,14 @@ void jbd2_journal_commit_transaction(journal_t *journal) + J_ASSERT(commit_transaction->t_nr_buffers <= + atomic_read(&commit_transaction->t_outstanding_credits)); + ++ nr_mappings = commit_transaction->t_nr_buffers; ++ mappings = kmalloc(sizeof(*mappings) * nr_mappings, GFP_NOFS); ++ if (!mappings) ++ jbd2_journal_abort(journal, -ENOMEM); ++ + err = 0; + bufs = 0; ++ nr_mappings = 0; + descriptor = NULL; + while (commit_transaction->t_buffers) { + +@@ -660,6 +668,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) + continue; + } + jbd2_file_log_bh(&io_bufs, wbuf[bufs]); ++ mappings[nr_mappings++] = (struct blk_mapping) { ++ jh2bh(jh)->b_blocknr, blocknr ++ }; + + /* Record the new block's tag in the current descriptor + buffer */ +@@ -894,6 +905,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) + transaction can be removed from any checkpoint list it was on + before. */ + ++ err = jbd2_transaction_infos_add(journal, commit_transaction, ++ mappings, nr_mappings); ++ if (err) ++ jbd2_journal_abort(journal, -ENOMEM); ++ kfree(mappings); ++ + jbd_debug(3, "JBD2: commit phase 6\n"); + + J_ASSERT(list_empty(&commit_transaction->t_inode_list)); +diff --git a/fs/jbd2/jmap.c b/fs/jbd2/jmap.c +new file mode 100644 +index 000000000000..7d7b4eb389ed +--- /dev/null ++++ b/fs/jbd2/jmap.c +@@ -0,0 +1,440 @@ ++#include ++#include ++#include ++ ++static struct kmem_cache *jbd2_jmap_cache; ++ ++int jbd2_journal_init_jmap_cache(void) ++{ ++ jbd2_jmap_cache = KMEM_CACHE(jmap_entry, SLAB_RECLAIM_ACCOUNT); ++ if (!jbd2_jmap_cache) ++ return -ENOMEM; ++ return 0; ++} ++ ++void jbd2_journal_destroy_jmap_cache(void) ++{ ++ if (jbd2_jmap_cache) ++ kmem_cache_destroy(jbd2_jmap_cache); ++ jbd2_jmap_cache = NULL; ++} ++ ++/* ++ * Allocate an array of transaction_info structures and initialize the list ++ * heads inside them. ++ */ ++int jbd2_init_transaction_infos(journal_t *journal) ++{ ++ int i; ++ struct transaction_infos *tis = kzalloc(sizeof(*tis), GFP_KERNEL); ++ if (!tis) ++ return -ENOMEM; ++ ++ tis->buf = kzalloc(sizeof(*tis->buf) * MAX_LIVE_TRANSACTIONS, ++ GFP_KERNEL); ++ if (!tis->buf) { ++ kfree(tis); ++ return -ENOMEM; ++ } ++ ++ for (i = 0; i < MAX_LIVE_TRANSACTIONS; ++i) ++ INIT_LIST_HEAD(&tis->buf[i].live_logblks); ++ ++ journal->j_transaction_infos = tis; ++ return 0; ++} ++ ++/* ++ * Free the array of transaction_info structures. ++ */ ++void jbd2_free_transaction_infos(journal_t *journal) ++{ ++ struct transaction_infos *tis = journal->j_transaction_infos; ++ if (!tis) ++ return; ++ kfree(tis->buf); ++ kfree(tis); ++} ++ ++/* ++ * Fill an entry to be stored in jmap. ++ */ ++static void fill_entry(struct jmap_entry *entry, struct blk_mapping *mapping, ++ int t_idx, struct list_head *list) ++{ ++ entry->mapping = *mapping; ++ entry->fsblk_last_modified = jiffies; ++ entry->t_idx = t_idx; ++ list_add(&entry->list, list); ++} ++ ++/* ++ * A helper function for jbd2_transaction_infos_add. Scans through the mappings ++ * array, dropping revoked entries from jmap and updating existing entries. ++ * Moves the new mappings to the beginning of the mappings array and returns the ++ * number of new mappings. Should be called with a write lock on j_jmap_lock. ++ */ ++static int process_existing_mappings(journal_t *journal, ++ struct transaction_info *ti, int t_idx, ++ struct blk_mapping *mappings, int nr_mappings) ++{ ++ struct jmap_entry *je; ++ int i, nr_new = 0; ++ ++ for (i = 0; i < nr_mappings; ++i) { ++ je = jbd2_jmap_lookup(journal, mappings[i].fsblk, __func__); ++ if (!je) { ++ mappings[nr_new++] = mappings[i]; ++ continue; ++ } ++ if (je->revoked) { ++ rb_erase(&je->rb_node, &journal->j_jmap); ++ kmem_cache_free(jbd2_jmap_cache, je); ++ } else { ++ /* ++ * Delete jmap entry from the old transaction's list ++ * before adding it to the new transaction's list. ++ */ ++ list_del(&je->list); ++ fill_entry(je, &mappings[i], t_idx, &ti->live_logblks); ++ trace_jbd2_jmap_replace(je, &mappings[i], t_idx); ++ } ++ } ++ return nr_new; ++} ++ ++/* ++ * A helper function for jbd2_transaction_infos_add. Allocates an array of ++ * jmap_entry structures and returns the pointer to array if successful. ++ * Otherwise, returns NULL. ++ */ ++static struct jmap_entry **alloc_jmap_entries(int nr_entries) ++{ ++ struct jmap_entry **jmap_entries; ++ int i; ++ ++ jmap_entries = kmalloc(sizeof(struct jmap_entry *) * nr_entries, ++ GFP_NOFS); ++ if (!jmap_entries) ++ return NULL; ++ ++ for (i = 0; i < nr_entries; i++) { ++ jmap_entries[i] = kmem_cache_zalloc(jbd2_jmap_cache, GFP_NOFS); ++ if (!jmap_entries[i]) ++ goto out_err; ++ } ++ return jmap_entries; ++ ++out_err: ++ for (i = 0; i < nr_entries && jmap_entries[i]; ++i) ++ kmem_cache_free(jbd2_jmap_cache, jmap_entries[i]); ++ kfree(jmap_entries); ++ return NULL; ++} ++ ++/* ++ * A helper function for jbd2_transaction_infos_add. Adds new mappings to jmap ++ * and updates the linked list of live logblks of the new transaction. Should ++ * be called with write lock on j_jmap_lock. ++ */ ++static void add_new_mappings(journal_t *journal, struct transaction_info *ti, ++ int t_idx, struct blk_mapping *mappings, ++ struct jmap_entry **new_entries, int nr_new) ++{ ++ struct rb_node **p = &journal->j_jmap.rb_node; ++ struct rb_node *parent = NULL; ++ struct jmap_entry *je; ++ int i; ++ ++ for (i = 0; i < nr_new; ++i) { ++ while (*p) { ++ parent = *p; ++ je = rb_entry(parent, struct jmap_entry, rb_node); ++ ++ if (mappings[i].fsblk < je->mapping.fsblk) ++ p = &(*p)->rb_left; ++ else if (mappings[i].fsblk > je->mapping.fsblk) ++ p = &(*p)->rb_right; ++ else ++ BUG_ON(1); ++ } ++ fill_entry(new_entries[i], &mappings[i], t_idx, ++ &ti->live_logblks); ++ rb_link_node(&new_entries[i]->rb_node, parent, p); ++ rb_insert_color(&new_entries[i]->rb_node, &journal->j_jmap); ++ trace_jbd2_jmap_insert(&mappings[i], t_idx); ++ } ++} ++ ++/* ++ * This function is called after a transaction commits. It adds new ++ * transaction_info structure to transaction_infos and populates jmap map with ++ * the new mappings that are part of the committed transaction. It also adds ++ * all the mappings to the linked list that is part of the transaction_info ++ * structure. ++ */ ++int jbd2_transaction_infos_add(journal_t *journal, transaction_t *transaction, ++ struct blk_mapping *mappings, int nr_mappings) ++{ ++ struct transaction_infos *tis = journal->j_transaction_infos; ++ int t_idx = tis->head; ++ struct transaction_info *ti = &tis->buf[t_idx]; ++ struct jmap_entry **new_entries = NULL; ++ int nr_new = 0; ++ ++ /* ++ * We are possibly reusing space of an old transaction_info. The old ++ * transaction should not have any live blocks in it. ++ */ ++ BUG_ON(!list_empty(&ti->live_logblks)); ++ ++ write_lock(&journal->j_jmap_lock); ++ nr_new = process_existing_mappings(journal, ti, t_idx, mappings, ++ nr_mappings); ++ write_unlock(&journal->j_jmap_lock); ++ ++ if (nr_new == 0) ++ goto move_head; ++ ++ new_entries = alloc_jmap_entries(nr_new); ++ if (!new_entries) ++ return -ENOMEM; ++ ++ write_lock(&journal->j_jmap_lock); ++ add_new_mappings(journal, ti, t_idx, mappings, new_entries, nr_new); ++ write_unlock(&journal->j_jmap_lock); ++ ++ kfree(new_entries); ++ ++move_head: ++ write_lock(&journal->j_jmap_lock); ++ ti->tid = transaction->t_tid; ++ ti->offset = transaction->t_log_start; ++ tis->head = (tis->head + 1) & (MAX_LIVE_TRANSACTIONS - 1); ++ write_unlock(&journal->j_jmap_lock); ++ ++ trace_jbd2_transaction_infos_add(t_idx, ti, nr_mappings); ++ return 0; ++} ++ ++/* ++ * Look up fsblk in the jmap and return the corresponding jmap entry if found. ++ * Should be called with a read lock on j_jmap_lock. ++ */ ++struct jmap_entry *jbd2_jmap_lookup(journal_t *journal, sector_t fsblk, ++ const char *func) ++{ ++ struct rb_node *p; ++ ++ BUG_ON(!journal); ++ ++ for (p = journal->j_jmap.rb_node; p; ) { ++ struct jmap_entry *je = rb_entry(p, struct jmap_entry, rb_node); ++ if (je->mapping.fsblk > fsblk) ++ p = p->rb_left; ++ else if (je->mapping.fsblk < fsblk) ++ p = p->rb_right; ++ else { ++ trace_jbd2_jmap_lookup(fsblk, je->mapping.logblk, func); ++ return je; ++ } ++ } ++ trace_jbd2_jmap_lookup(fsblk, 0, func); ++ return NULL; ++} ++ ++/* ++ * Revoke a mapping for the fsblk in the jmap. A lookup for fsblk will return ++ * NULL and the mapping will be removed from the jmap during commit, unless ++ * fsblk is reallocated as a metadata block. ++ */ ++void jbd2_jmap_revoke(journal_t *journal, sector_t fsblk) ++{ ++ struct jmap_entry *je; ++ ++ write_lock(&journal->j_jmap_lock); ++ je = jbd2_jmap_lookup(journal, fsblk, __func__); ++ /* ++ * For now, since we do not construct jmap from the journal, it is ++ * possible that a metadata block that was revoked is not in the jmap. ++ * Eventually, this should not be the case and we should have a ++ * BUG_ON(!je) here. ++ */ ++ if (je) { ++ BUG_ON(je->revoked); ++ je->revoked = true; ++ } ++ write_unlock(&journal->j_jmap_lock); ++} ++ ++/* ++ * Cancel a revoke for the fsblk in the jmap. ++ */ ++void jbd2_jmap_cancel_revoke(journal_t *journal, sector_t fsblk) ++{ ++ struct jmap_entry *je; ++ ++ write_lock(&journal->j_jmap_lock); ++ je = jbd2_jmap_lookup(journal, fsblk, __func__); ++ BUG_ON(!je); ++ BUG_ON(!je->revoked); ++ je->revoked = false; ++ write_unlock(&journal->j_jmap_lock); ++} ++ ++/* ++ * Read bh from its most up-to-date location, either from the file system or ++ * from the log. ++ * ++ * If there is no mapping for the bh in jmap, this function acts like submit_bh. ++ * Otherwise, it submits a read for the block pointed by the mapping located in ++ * the log. Upon completion, bh will be filled with the contents of the block ++ * read from the log. ++ */ ++void jbd2_submit_bh(journal_t *journal, int rw, int op_flags, ++ struct buffer_head *bh, const char *func) ++{ ++ sector_t fsblk = bh->b_blocknr; ++ sector_t logblk; ++ struct jmap_entry *je; ++ ++ BUG_ON(!buffer_locked(bh)); ++ ++ if (!journal) { ++ submit_bh(rw, op_flags, bh); ++ return; ++ } ++ ++ read_lock(&journal->j_jmap_lock); ++ je = jbd2_jmap_lookup(journal, fsblk, func); ++ if (!je) { ++ read_unlock(&journal->j_jmap_lock); ++ submit_bh(rw, op_flags, bh); ++ return; ++ } ++ logblk = je->mapping.logblk; ++ read_unlock(&journal->j_jmap_lock); ++ ++ BUG_ON(rw == WRITE); ++ read_block_from_log(journal, bh, op_flags, logblk); ++} ++ ++/* ++ * End_io handler for read_block_from_log that copies the contents of ++ * log_bh read from log to the embedded bh. ++ */ ++static void jbd2_end_log_read(struct buffer_head *log_bh, int uptodate) ++{ ++ struct buffer_head *bh = log_bh->b_private; ++ ++ if (uptodate) { ++ trace_jbd2_jmap_printf1("read from log", bh->b_blocknr); ++ memcpy(bh->b_data, log_bh->b_data, log_bh->b_size); ++ } else { ++ trace_jbd2_jmap_printf1("failed to read from log", bh->b_blocknr); ++ } ++ ++ unlock_buffer(log_bh); ++ put_bh(log_bh); ++ brelse(log_bh); ++ ++ bh->b_end_io(bh, uptodate); ++} ++ ++/* ++ * This function fills |bh| with the contents of the |blk|. Assume ++ * jmap maps metadata block 123 to log block 100123. To read the ++ * metadata block 123, we obtain a buffer head for it and call ++ * read_block_from_log passing the obtained buffer head as |bh| and ++ * 100123 as |blk|. If block 100123 is cached, then we copy the ++ * contents to |bh| and return. Otherwise, we submit a request and ++ * end_io handler copies the contents of block 100123 to |bh|. ++ * Returns -ENOMEM if getblk fails, 1 if block is not cached, 0 if ++ * block is cached. ++ */ ++int read_block_from_log(journal_t *journal, struct buffer_head *bh, ++ int op_flags, sector_t blk) ++{ ++ struct buffer_head *log_bh; ++ ++ BUG_ON(!buffer_locked(bh)); ++ ++ log_bh = __getblk(journal->j_fs_dev, blk, bh->b_size); ++ if (unlikely(!log_bh)) { ++ bh->b_end_io(bh, 0); ++ return -ENOMEM; ++ } ++ ++ lock_buffer(log_bh); ++ if (buffer_uptodate(log_bh)) { ++ memcpy(bh->b_data, log_bh->b_data, bh->b_size); ++ unlock_buffer(log_bh); ++ brelse(log_bh); ++ bh->b_end_io(bh, 1); ++ return 0; ++ } ++ ++ log_bh->b_end_io = jbd2_end_log_read; ++ log_bh->b_private = bh; ++ get_bh(log_bh); ++ submit_bh(READ, op_flags, log_bh); ++ return 1; ++} ++ ++/* ++ * Copy of ll_rw_block that uses jbd2_submit_bh instead of submit_bh. ++ */ ++void jbd2_ll_rw_block(journal_t *journal, int rw, int op_flags, ++ int nr, struct buffer_head *bhs[], const char *func) ++{ ++ int i; ++ ++ for (i = 0; i < nr; i++) { ++ struct buffer_head *bh = bhs[i]; ++ ++ if (!trylock_buffer(bh)) ++ continue; ++ BUG_ON(rw == WRITE); ++ if (!buffer_uptodate(bh)) { ++ bh->b_end_io = end_buffer_read_sync; ++ get_bh(bh); ++ jbd2_submit_bh(journal, rw, op_flags, bh, func); ++ continue; ++ } ++ unlock_buffer(bh); ++ } ++} ++ ++/* ++ * Copy of bh_submit_read that uses jbd2_submit_bh instead of submit_bh. ++ */ ++int jbd2_bh_submit_read(journal_t *journal, struct buffer_head *bh, ++ const char *func) ++{ ++ BUG_ON(!buffer_locked(bh)); ++ ++ if (buffer_uptodate(bh)) { ++ unlock_buffer(bh); ++ return 0; ++ } ++ ++ get_bh(bh); ++ bh->b_end_io = end_buffer_read_sync; ++ jbd2_submit_bh(journal, READ, 0, bh, func); ++ wait_on_buffer(bh); ++ if (buffer_uptodate(bh)) ++ return 0; ++ return -EIO; ++} ++ ++int jbd2_smr_journal_init(journal_t *journal) ++{ ++ journal->j_jmap = RB_ROOT; ++ rwlock_init(&journal->j_jmap_lock); ++ return jbd2_init_transaction_infos(journal); ++} ++ ++void jbd2_smr_journal_exit(journal_t *journal) ++{ ++ jbd2_free_transaction_infos(journal); ++} +diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c +index 927da4956a89..0cbfb7fdc45d 100644 +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -1120,15 +1120,17 @@ static journal_t *journal_init_common(struct block_device *bdev, + journal->j_max_batch_time = 15000; /* 15ms */ + atomic_set(&journal->j_reserved_credits, 0); + ++ err = jbd2_smr_journal_init(journal); ++ if (err) ++ goto out_err; ++ + /* The journal is marked for error until we succeed with recovery! */ + journal->j_flags = JBD2_ABORT; + + /* Set up a default-sized revoke table for the new mount. */ + err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); +- if (err) { +- kfree(journal); +- return NULL; +- } ++ if (err) ++ goto out_err; + + spin_lock_init(&journal->j_history_lock); + +@@ -1162,6 +1164,9 @@ static journal_t *journal_init_common(struct block_device *bdev, + journal->j_superblock = (journal_superblock_t *)bh->b_data; + + return journal; ++out_err: ++ kfree(journal); ++ return NULL; + } + + /* jbd2_journal_init_dev and jbd2_journal_init_inode: +@@ -1734,6 +1739,7 @@ int jbd2_journal_destroy(journal_t *journal) + jbd2_journal_destroy_revoke(journal); + if (journal->j_chksum_driver) + crypto_free_shash(journal->j_chksum_driver); ++ jbd2_smr_journal_exit(journal); + kfree(journal->j_wbuf); + kfree(journal); + +@@ -2634,6 +2640,8 @@ static int __init journal_init_caches(void) + ret = jbd2_journal_init_handle_cache(); + if (ret == 0) + ret = jbd2_journal_init_transaction_cache(); ++ if (ret == 0) ++ ret = jbd2_journal_init_jmap_cache(); + return ret; + } + +@@ -2643,6 +2651,7 @@ static void jbd2_journal_destroy_caches(void) + jbd2_journal_destroy_journal_head_cache(); + jbd2_journal_destroy_handle_cache(); + jbd2_journal_destroy_transaction_cache(); ++ jbd2_journal_destroy_jmap_cache(); + jbd2_journal_destroy_slabs(); + } + +diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h +index dfaa1f4dcb0c..317efb491569 100644 +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -732,6 +733,9 @@ jbd2_time_diff(unsigned long start, unsigned long end) + * prior abort)? + * @j_sb_buffer: First part of superblock buffer + * @j_superblock: Second part of superblock buffer ++ * @j_map: A map from file system blocks to log blocks ++ * @j_transaction_infos: An array of information structures per live transaction ++ * @j_map_lock: Protect j_jmap and j_transaction_infos + * @j_format_version: Version of the superblock format + * @j_state_lock: Protect the various scalars in the journal + * @j_barrier_count: Number of processes waiting to create a barrier lock +@@ -807,6 +811,15 @@ struct journal_s + struct buffer_head *j_sb_buffer; + journal_superblock_t *j_superblock; + ++ /* A map from file system blocks to journal blocks */ ++ struct rb_root j_jmap; ++ ++ /* An array of housekeeping information about live transactions */ ++ struct transaction_infos *j_transaction_infos; ++ ++ /* Protect j_jmap and j_transaction_infos */ ++ rwlock_t j_jmap_lock; ++ + /* Version of the superblock format */ + int j_format_version; + +diff --git a/include/linux/jmap.h b/include/linux/jmap.h +new file mode 100644 +index 000000000000..d068358380b0 +--- /dev/null ++++ b/include/linux/jmap.h +@@ -0,0 +1,129 @@ ++#ifndef _LINUX_JMAP_H ++#define _LINUX_JMAP_H ++ ++#include ++#include ++#include ++#include ++ ++/* ++ * Maximum number of transactions. This guides the size of the circular buffer ++ * in which we store housekeeping information per transaction. We start ++ * cleaning either when the circular buffer is full or when we hit the free ++ * space threshold, whichever happens first. For starters, we make this ++ * constant large to make sure that we start cleaning only when we hit the free ++ * space threshold. Later we can empirically determine a sensible value. ++ */ ++#define MAX_LIVE_TRANSACTIONS 65536 ++ ++/* ++ * Forward declaration for journal_t so that we don't get circular dependency ++ * between jbd2.h and jmap.h ++ */ ++struct journal_s; ++typedef struct journal_s journal_t; ++ ++/* ++ * A mapping from file system block to log block. ++ */ ++struct blk_mapping { ++ sector_t fsblk; ++ sector_t logblk; ++}; ++ ++/* ++ * An RB-tree entry wrapper for blk_mapping with extra housekeeping information. ++ */ ++struct jmap_entry { ++ struct rb_node rb_node; ++ ++ /* The actual mapping information. */ ++ struct blk_mapping mapping; ++ ++ /* ++ * If a block that is mapped gets deleted, the revoked bit is set. A ++ * lookup for a deleted block fails. If a deleted block gets ++ * re-allocated as a metadata block, the mapping is updated and revoked ++ * bit is cleared. ++ */ ++ bool revoked; ++ ++ /* ++ * All log blocks that are part of the same transaction in the log are ++ * chained with a linked list. The root of the list is stored in the ++ * transaction_info structure described below. ++ */ ++ struct list_head list; ++ ++ /* ++ * The last time when fsblk was written again to the journal and ++ * therefore was remapped to a different log block. ++ */ ++ unsigned long fsblk_last_modified; ++ ++ /* ++ * Index of the transaction in the transaction_info_buffer (described ++ * below) of which the log block is part of. ++ */ ++ int t_idx; ++}; ++ ++/* ++ * Housekeeping information about committed transaction. ++ */ ++struct transaction_info { ++ /* Id of the transaction */ ++ tid_t tid; ++ ++ /* Offset where the transaction starts in the log */ ++ sector_t offset; ++ ++ /* ++ * A list of live log blocks referenced in the RB-tree that belong to ++ * this transaction. It is used during cleaning to locate live blocks ++ * and migrate them to appropriate location. If this list is empty, ++ * then the transaction does not contain any live blocks and we can ++ * reuse its space. If this list is not empty, then we can quickly ++ * locate all the live blocks in this transaction. ++ */ ++ struct list_head live_logblks; ++}; ++ ++/* ++ * An array of transaction_info structures about all the transactions in the ++ * log. Since there can only be a limited number of transactions in the log, we ++ * use a circular buffer to store housekeeping information about transactions. ++ */ ++struct transaction_infos { ++ struct transaction_info *buf; ++ int head; ++ int tail; ++}; ++ ++extern int jbd2_smr_journal_init(journal_t *journal); ++extern void jbd2_smr_journal_exit(journal_t *journal); ++ ++extern int jbd2_journal_init_jmap_cache(void); ++extern void jbd2_journal_destroy_jmap_cache(void); ++ ++extern int jbd2_init_transaction_infos(journal_t *journal); ++extern void jbd2_free_transaction_infos(journal_t *journal); ++extern int jbd2_transaction_infos_add(journal_t *journal, ++ transaction_t *transaction, ++ struct blk_mapping *mappings, ++ int nr_mappings); ++ ++extern struct jmap_entry *jbd2_jmap_lookup(journal_t *journal, sector_t fsblk, ++ const char *func); ++extern void jbd2_jmap_revoke(journal_t *journal, sector_t fsblk); ++extern void jbd2_jmap_cancel_revoke(journal_t *journal, sector_t fsblk); ++extern void jbd2_submit_bh(journal_t *journal, int rw, int op_flags, ++ struct buffer_head *bh, const char *func); ++extern int read_block_from_log(journal_t *journal, struct buffer_head *bh, ++ int op_flags, sector_t blk); ++extern void jbd2_ll_rw_block(journal_t *journal, int rw, int op_flags, int nr, ++ struct buffer_head *bhs[], const char *func); ++extern int jbd2_bh_submit_read(journal_t *journal, struct buffer_head *bh, ++ const char *func); ++ ++#endif +diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h +index c1d1f3eb242d..bc1511a425ec 100644 +--- a/include/trace/events/jbd2.h ++++ b/include/trace/events/jbd2.h +@@ -379,6 +379,175 @@ TRACE_EVENT(jbd2_lock_buffer_stall, + __entry->stall_ms) + ); + ++TRACE_EVENT(jbd2_jmap_replace, ++ ++ TP_PROTO(struct jmap_entry *jentry, struct blk_mapping *mapping, \ ++ int t_idx), ++ ++ TP_ARGS(jentry, mapping, t_idx), ++ ++ TP_STRUCT__entry( ++ __field(sector_t, fsblk ) ++ __field(sector_t, old_logblk ) ++ __field(sector_t, new_logblk ) ++ __field(int, old_t_idx ) ++ __field(int, new_t_idx ) ++ ), ++ ++ TP_fast_assign( ++ __entry->fsblk = mapping->fsblk; ++ __entry->old_logblk = jentry->mapping.logblk; ++ __entry->new_logblk = mapping->logblk; ++ __entry->old_t_idx = jentry->t_idx; ++ __entry->new_t_idx = t_idx; ++ ), ++ ++ TP_printk("remap %llu from %llu to %llu, move from transaction at index %d to transaction at index %d", ++ (unsigned long long) __entry->fsblk, ++ (unsigned long long) __entry->old_logblk, ++ (unsigned long long) __entry->new_logblk, ++ __entry->old_t_idx, ++ __entry->new_t_idx) ++); ++ ++TRACE_EVENT(jbd2_jmap_insert, ++ ++ TP_PROTO(struct blk_mapping *mapping, int t_idx), ++ ++ TP_ARGS(mapping, t_idx), ++ ++ TP_STRUCT__entry( ++ __field(sector_t, fsblk ) ++ __field(sector_t, logblk) ++ __field(int, t_idx) ++ ), ++ ++ TP_fast_assign( ++ __entry->fsblk = mapping->fsblk; ++ __entry->logblk = mapping->logblk; ++ __entry->t_idx = t_idx; ++ ), ++ ++ TP_printk("map %llu to %llu, insert to transaction %d", ++ (unsigned long long) __entry->fsblk, ++ (unsigned long long) __entry->logblk, ++ __entry->t_idx) ++); ++ ++TRACE_EVENT(jbd2_jmap_lookup, ++ ++ TP_PROTO(sector_t fsblk, sector_t logblk, const char *func), ++ ++ TP_ARGS(fsblk, logblk, func), ++ ++ TP_STRUCT__entry( ++ __field(sector_t, fsblk ) ++ __field(sector_t, logblk) ++ __string(func, func) ++ ), ++ ++ TP_fast_assign( ++ __entry->fsblk = fsblk; ++ __entry->logblk = logblk; ++ __assign_str(func, func); ++ ), ++ ++ TP_printk("%s: lookup %llu -> %llu", ++ __get_str(func), ++ (unsigned long long) __entry->fsblk, ++ (unsigned long long) __entry->logblk) ++); ++ ++TRACE_EVENT(jbd2_jmap_printf, ++ ++ TP_PROTO(const char *s), ++ ++ TP_ARGS(s), ++ ++ TP_STRUCT__entry( ++ __string(s, s) ++ ), ++ ++ TP_fast_assign( ++ __assign_str(s, s); ++ ), ++ ++ TP_printk("%s", ++ __get_str(s)) ++); ++ ++TRACE_EVENT(jbd2_jmap_printf1, ++ ++ TP_PROTO(const char *s, sector_t fsblk), ++ ++ TP_ARGS(s, fsblk), ++ ++ TP_STRUCT__entry( ++ __string(s, s) ++ __field(sector_t, fsblk ) ++ ), ++ ++ TP_fast_assign( ++ __assign_str(s, s); ++ __entry->fsblk = fsblk; ++ ), ++ ++ TP_printk("%s: %llu", ++ __get_str(s), ++ (unsigned long long) __entry->fsblk) ++); ++ ++TRACE_EVENT(jbd2_jmap_printf2, ++ ++ TP_PROTO(const char *s, sector_t fsblk, sector_t logblk), ++ ++ TP_ARGS(s, fsblk, logblk), ++ ++ TP_STRUCT__entry( ++ __string(s, s) ++ __field(sector_t, fsblk ) ++ __field(sector_t, logblk) ++ ), ++ ++ TP_fast_assign( ++ __assign_str(s, s); ++ __entry->fsblk = fsblk; ++ __entry->logblk = logblk; ++ ), ++ ++ TP_printk("%s: %llu:%llu", ++ __get_str(s), ++ (unsigned long long) __entry->fsblk, ++ (unsigned long long) __entry->logblk) ++); ++ ++TRACE_EVENT(jbd2_transaction_infos_add, ++ ++ TP_PROTO(int t_idx, struct transaction_info *ti, int nr_mappings), ++ ++ TP_ARGS(t_idx, ti, nr_mappings), ++ ++ TP_STRUCT__entry( ++ __field(int, t_idx ) ++ __field(tid_t, tid ) ++ __field(sector_t, offset) ++ __field(int, nr_mappings) ++ ), ++ ++ TP_fast_assign( ++ __entry->t_idx = t_idx; ++ __entry->tid = ti->tid; ++ __entry->offset = ti->offset; ++ __entry->nr_mappings = nr_mappings; ++ ), ++ ++ TP_printk("inserted transaction %u (offset %llu) at index %d with %d mappings", ++ __entry->tid, ++ (unsigned long long) __entry->offset, ++ __entry->t_idx, ++ __entry->nr_mappings) ++); ++ + #endif /* _TRACE_JBD2_H */ + + /* This part must be outside protection */ diff --git a/allow-ext4_ext_truncate-to-return-an-error b/allow-ext4_ext_truncate-to-return-an-error new file mode 100644 index 00000000..72416404 --- /dev/null +++ b/allow-ext4_ext_truncate-to-return-an-error @@ -0,0 +1,84 @@ +ext4: allow ext4_ext_truncate() to return an error + +Return errors to the caller instead of declaring the file system +corrupted. + +Signed-off-by: Theodore Ts'o +--- + fs/ext4/ext4.h | 2 +- + fs/ext4/extents.c | 15 +++++++-------- + fs/ext4/inode.c | 4 +++- + 3 files changed, 11 insertions(+), 10 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index be2282dcde7d..54211c7876f8 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -3128,7 +3128,7 @@ extern int ext4_ext_writepage_trans_blocks(struct inode *, int); + extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); + extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, int flags); +-extern void ext4_ext_truncate(handle_t *, struct inode *); ++extern int ext4_ext_truncate(handle_t *, struct inode *); + extern int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, + ext4_lblk_t end); + extern void ext4_ext_init(struct super_block *); +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index c930a0110fb4..d3b119499c53 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -4631,7 +4631,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, + return err ? err : allocated; + } + +-void ext4_ext_truncate(handle_t *handle, struct inode *inode) ++int ext4_ext_truncate(handle_t *handle, struct inode *inode) + { + struct super_block *sb = inode->i_sb; + ext4_lblk_t last_block; +@@ -4645,7 +4645,9 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) + + /* we have to know where to truncate from in crash case */ + EXT4_I(inode)->i_disksize = inode->i_size; +- ext4_mark_inode_dirty(handle, inode); ++ err = ext4_mark_inode_dirty(handle, inode); ++ if (err) ++ return err; + + last_block = (inode->i_size + sb->s_blocksize - 1) + >> EXT4_BLOCK_SIZE_BITS(sb); +@@ -4657,12 +4659,9 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } +- if (err) { +- ext4_std_error(inode->i_sb, err); +- return; +- } +- err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); +- ext4_std_error(inode->i_sb, err); ++ if (err) ++ return err; ++ return ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + } + + static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 40ea090d2e0e..7f32899c9701 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -4167,11 +4167,13 @@ int ext4_truncate(struct inode *inode) + ext4_discard_preallocations(inode); + + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) +- ext4_ext_truncate(handle, inode); ++ err = ext4_ext_truncate(handle, inode); + else + ext4_ind_truncate(handle, inode); + + up_write(&ei->i_data_sem); ++ if (err) ++ goto out_stop; + + if (IS_SYNC(inode)) + ext4_handle_sync(handle); diff --git a/allow-ext4_truncate-to-return-an-error b/allow-ext4_truncate-to-return-an-error new file mode 100644 index 00000000..8ecf2637 --- /dev/null +++ b/allow-ext4_truncate-to-return-an-error @@ -0,0 +1,189 @@ +ext4: allow ext4_truncate() to return an error + +This allows us to properly propagate errors back up to +ext4_truncate()'s callers. This also means we no longer have to +silently ignore some errors (e.g., when trying to add the inode to the +orphan inode list). + +Signed-off-by: Theodore Ts'o +--- + fs/ext4/ext4.h | 2 +- + fs/ext4/inode.c | 41 ++++++++++++++++++++++++++--------------- + fs/ext4/ioctl.c | 7 +++++-- + fs/ext4/super.c | 6 ++++-- + 4 files changed, 36 insertions(+), 20 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 282a51b07c57..be2282dcde7d 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -2491,7 +2491,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); + extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); + extern int ext4_inode_attach_jinode(struct inode *inode); + extern int ext4_can_truncate(struct inode *inode); +-extern void ext4_truncate(struct inode *); ++extern int ext4_truncate(struct inode *); + extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); + extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); + extern void ext4_set_inode_flags(struct inode *); +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 9c064727ed62..40ea090d2e0e 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -261,8 +261,15 @@ void ext4_evict_inode(struct inode *inode) + "couldn't mark inode dirty (err %d)", err); + goto stop_handle; + } +- if (inode->i_blocks) +- ext4_truncate(inode); ++ if (inode->i_blocks) { ++ err = ext4_truncate(inode); ++ if (err) { ++ ext4_error(inode->i_sb, ++ "couldn't truncate inode %lu (err %d)", ++ inode->i_ino, err); ++ goto stop_handle; ++ } ++ } + + /* + * ext4_ext_truncate() doesn't reserve any slop when it +@@ -4091,10 +4098,11 @@ int ext4_inode_attach_jinode(struct inode *inode) + * that's fine - as long as they are linked from the inode, the post-crash + * ext4_truncate() run will find them and release them. + */ +-void ext4_truncate(struct inode *inode) ++int ext4_truncate(struct inode *inode) + { + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned int credits; ++ int err = 0; + handle_t *handle; + struct address_space *mapping = inode->i_mapping; + +@@ -4108,7 +4116,7 @@ void ext4_truncate(struct inode *inode) + trace_ext4_truncate_enter(inode); + + if (!ext4_can_truncate(inode)) +- return; ++ return 0; + + ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); + +@@ -4120,13 +4128,13 @@ void ext4_truncate(struct inode *inode) + + ext4_inline_data_truncate(inode, &has_inline); + if (has_inline) +- return; ++ return 0; + } + + /* If we zero-out tail of the page, we have to create jinode for jbd2 */ + if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { + if (ext4_inode_attach_jinode(inode) < 0) +- return; ++ return 0; + } + + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) +@@ -4135,10 +4143,8 @@ void ext4_truncate(struct inode *inode) + credits = ext4_blocks_for_truncate(inode); + + handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); +- if (IS_ERR(handle)) { +- ext4_std_error(inode->i_sb, PTR_ERR(handle)); +- return; +- } ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); + + if (inode->i_size & (inode->i_sb->s_blocksize - 1)) + ext4_block_truncate_page(handle, mapping, inode->i_size); +@@ -4152,7 +4158,8 @@ void ext4_truncate(struct inode *inode) + * Implication: the file must always be in a sane, consistent + * truncatable state while each transaction commits. + */ +- if (ext4_orphan_add(handle, inode)) ++ err = ext4_orphan_add(handle, inode); ++ if (err) + goto out_stop; + + down_write(&EXT4_I(inode)->i_data_sem); +@@ -4185,6 +4192,7 @@ void ext4_truncate(struct inode *inode) + ext4_journal_stop(handle); + + trace_ext4_truncate_exit(inode); ++ return err; + } + + /* +@@ -5199,12 +5207,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) + * in data=journal mode to make pages freeable. + */ + truncate_pagecache(inode, inode->i_size); +- if (shrink) +- ext4_truncate(inode); ++ if (shrink) { ++ rc = ext4_truncate(inode); ++ if (rc) ++ error = rc; ++ } + up_write(&EXT4_I(inode)->i_mmap_sem); + } + +- if (!rc) { ++ if (!error) { + setattr_copy(inode, attr); + mark_inode_dirty(inode); + } +@@ -5216,7 +5227,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) + if (orphan && inode->i_nlink) + ext4_orphan_del(NULL, inode); + +- if (!rc && (ia_valid & ATTR_MODE)) ++ if (!error && (ia_valid & ATTR_MODE)) + rc = posix_acl_chmod(inode, inode->i_mode); + + err_out: +diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c +index bf5ae8ebbc97..99862a3726fc 100644 +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -248,8 +248,11 @@ static int ext4_ioctl_setflags(struct inode *inode, + err = -EOPNOTSUPP; + goto flags_out; + } +- } else if (oldflags & EXT4_EOFBLOCKS_FL) +- ext4_truncate(inode); ++ } else if (oldflags & EXT4_EOFBLOCKS_FL) { ++ err = ext4_truncate(inode); ++ if (err) ++ goto flags_out; ++ } + + handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); + if (IS_ERR(handle)) { +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 20da99da0a34..e4f61c39328a 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -2330,7 +2330,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, + struct ext4_super_block *es) + { + unsigned int s_flags = sb->s_flags; +- int nr_orphans = 0, nr_truncates = 0; ++ int ret, nr_orphans = 0, nr_truncates = 0; + #ifdef CONFIG_QUOTA + int i; + #endif +@@ -2412,7 +2412,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, + inode->i_ino, inode->i_size); + inode_lock(inode); + truncate_inode_pages(inode->i_mapping, inode->i_size); +- ext4_truncate(inode); ++ ret = ext4_truncate(inode); ++ if (ret) ++ ext4_std_error(inode->i_sb, ret); + inode_unlock(inode); + nr_truncates++; + } else { diff --git a/cleaner b/cleaner new file mode 100644 index 00000000..67e0e2e2 --- /dev/null +++ b/cleaner @@ -0,0 +1,327 @@ +Introduce cleaner + +From: Abutalib Aghayev + +An experimental cleaner. Copy the live blocks from the transaction at the +tail in batches to the transaction at the head. After a commit ends, check +if free space is below watermark and start cleaning until free space is +above high watermark. + +Signed-off-by: Abutalib Aghayev + +--- + fs/jbd2/Makefile | 2 +- + fs/jbd2/jmap.c | 43 ++++++++++++++++++++++++++++++----- + fs/jbd2/journal.c | 12 +++++++++- + include/linux/jbd2.h | 6 ++++- + include/linux/jmap.h | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------ + 5 files changed, 151 insertions(+), 23 deletions(-) + +diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile +index a54f50b3a06e..b6a2dddcc0a7 100644 +--- a/fs/jbd2/Makefile ++++ b/fs/jbd2/Makefile +@@ -5,4 +5,4 @@ + obj-$(CONFIG_JBD2) += jbd2.o + + jbd2-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o \ +- jmap.o ++ jmap.o cleaner.o +diff --git a/fs/jbd2/jmap.c b/fs/jbd2/jmap.c +index 8c844f65eeaa..693b3e8d736c 100644 +--- a/fs/jbd2/jmap.c ++++ b/fs/jbd2/jmap.c +@@ -38,7 +38,7 @@ int jbd2_init_transaction_infos(journal_t *journal) + } + + for (i = 0; i < MAX_LIVE_TRANSACTIONS; ++i) +- INIT_LIST_HEAD(&tis->buf[i].live_logblks); ++ INIT_LIST_HEAD(&tis->buf[i].live_blks); + + journal->j_transaction_infos = tis; + return 0; +@@ -91,15 +91,26 @@ static int process_existing_mappings(journal_t *journal, + * We are either deleting the entry because it was revoked, or + * we are moving it to the live blocks list of this transaction. + * In either case, we remove it from its existing list. ++ * However, before removing it we check to see if this is an ++ * entry in the live blocks list of the tail transaction a ++ * pointer to whom is cached by the cleaner and update the ++ * cached pointer if so. + */ +- list_del(&je->list); ++ spin_lock(&journal->j_cleaner_ctx->pos_lock); ++ if (je == journal->j_cleaner_ctx->pos) { ++ journal->j_cleaner_ctx->pos = list_next_entry(je, list); ++ trace_jbd2_jmap_printf1("updating pos to", ++ (unsigned long long) journal->j_cleaner_ctx->pos); ++ } ++ list_del(&je->list); ++ spin_unlock(&journal->j_cleaner_ctx->pos_lock); + + if (je->revoked) { + rb_erase(&je->rb_node, &journal->j_jmap); + kmem_cache_free(jbd2_jmap_cache, je); + } else { + trace_jbd2_jmap_replace(je, &mappings[i], t_idx); +- fill_entry(je, &mappings[i], t_idx, &ti->live_logblks); ++ fill_entry(je, &mappings[i], t_idx, &ti->live_blks); + } + } + return nr_new; +@@ -161,8 +172,7 @@ static void add_new_mappings(journal_t *journal, struct transaction_info *ti, + else + BUG_ON(1); + } +- fill_entry(new_entries[i], &mappings[i], t_idx, +- &ti->live_logblks); ++ fill_entry(new_entries[i], &mappings[i], t_idx, &ti->live_blks); + rb_link_node(&new_entries[i]->rb_node, parent, p); + rb_insert_color(&new_entries[i]->rb_node, &journal->j_jmap); + trace_jbd2_jmap_insert(&mappings[i], t_idx); +@@ -189,7 +199,9 @@ int jbd2_transaction_infos_add(journal_t *journal, transaction_t *transaction, + * We are possibly reusing space of an old transaction_info. The old + * transaction should not have any live blocks in it. + */ +- BUG_ON(!list_empty(&ti->live_logblks)); ++ BUG_ON(!list_empty(&ti->live_blks)); ++ ++ atomic_inc(&journal->j_cleaner_ctx->nr_txns_committed); + + write_lock(&journal->j_jmap_lock); + nr_new = process_existing_mappings(journal, ti, t_idx, mappings, +@@ -432,12 +444,31 @@ int jbd2_bh_submit_read(journal_t *journal, struct buffer_head *bh, + + int jbd2_smr_journal_init(journal_t *journal) + { ++ journal->j_cleaner_ctx = kzalloc(sizeof(struct cleaner_ctx), ++ GFP_KERNEL); ++ if (!journal->j_cleaner_ctx) ++ return -ENOMEM; ++ ++ journal->j_cleaner_ctx->journal = journal; ++ journal->j_cleaner_ctx->pos = NULL; ++ spin_lock_init(&journal->j_cleaner_ctx->pos_lock); ++ atomic_set(&journal->j_cleaner_ctx->cleaning, 0); ++ atomic_set(&journal->j_cleaner_ctx->batch_in_progress, 0); ++ atomic_set(&journal->j_cleaner_ctx->nr_pending_reads, 0); ++ atomic_set(&journal->j_cleaner_ctx->nr_txns_committed, 0); ++ atomic_set(&journal->j_cleaner_ctx->nr_txns_cleaned, 0); ++ init_completion(&journal->j_cleaner_ctx->live_block_reads); ++ + journal->j_jmap = RB_ROOT; + rwlock_init(&journal->j_jmap_lock); ++ + return jbd2_init_transaction_infos(journal); + } + + void jbd2_smr_journal_exit(journal_t *journal) + { ++ atomic_set(&journal->j_cleaner_ctx->cleaning, 0); ++ flush_work(&journal->j_cleaner_ctx->work); ++ kfree(journal->j_cleaner_ctx); + jbd2_free_transaction_infos(journal); + } +diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c +index 0cbfb7fdc45d..8e305aacef48 100644 +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -51,7 +51,7 @@ + #include + + #ifdef CONFIG_JBD2_DEBUG +-ushort jbd2_journal_enable_debug __read_mostly; ++ushort jbd2_journal_enable_debug __read_mostly = 1; + EXPORT_SYMBOL(jbd2_journal_enable_debug); + + module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644); +@@ -227,6 +227,14 @@ static int kjournald2(void *arg) + } + + wake_up(&journal->j_wait_done_commit); ++ ++ if (cleaning(journal) || low_on_space(journal)) { ++ if (try_to_move_tail(journal) && high_on_space(journal)) ++ stop_cleaning(journal); ++ else ++ start_cleaning(journal); ++ } ++ + if (freezing(current)) { + /* + * The simpler the better. Flushing journal isn't a +@@ -255,6 +263,8 @@ static int kjournald2(void *arg) + should_sleep = 0; + if (journal->j_flags & JBD2_UNMOUNT) + should_sleep = 0; ++ if (cleaning_batch_complete(journal)) ++ should_sleep = 0; + if (should_sleep) { + write_unlock(&journal->j_state_lock); + schedule(); +diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h +index 317efb491569..350d5d229b68 100644 +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -735,7 +735,8 @@ jbd2_time_diff(unsigned long start, unsigned long end) + * @j_superblock: Second part of superblock buffer + * @j_map: A map from file system blocks to log blocks + * @j_transaction_infos: An array of information structures per live transaction +- * @j_map_lock: Protect j_jmap and j_transaction_infos ++ * @j_jmap_lock: Protect j_jmap and j_transaction_infos ++ * @j_cleaner_ctx: Cleaner state + * @j_format_version: Version of the superblock format + * @j_state_lock: Protect the various scalars in the journal + * @j_barrier_count: Number of processes waiting to create a barrier lock +@@ -820,6 +821,9 @@ struct journal_s + /* Protect j_jmap and j_transaction_infos */ + rwlock_t j_jmap_lock; + ++ /* Cleaner state */ ++ struct cleaner_ctx *j_cleaner_ctx; ++ + /* Version of the superblock format */ + int j_format_version; + +diff --git a/include/linux/jmap.h b/include/linux/jmap.h +index d068358380b0..b734551ddb67 100644 +--- a/include/linux/jmap.h ++++ b/include/linux/jmap.h +@@ -5,6 +5,14 @@ + #include + #include + #include ++#include ++ ++/* ++ * Forward declaration for journal_t so that we don't get circular dependency ++ * between jbd2.h and jmap.h ++ */ ++struct journal_s; ++typedef struct journal_s journal_t; + + /* + * Maximum number of transactions. This guides the size of the circular buffer +@@ -17,13 +25,6 @@ + #define MAX_LIVE_TRANSACTIONS 65536 + + /* +- * Forward declaration for journal_t so that we don't get circular dependency +- * between jbd2.h and jmap.h +- */ +-struct journal_s; +-typedef struct journal_s journal_t; +- +-/* + * A mapping from file system block to log block. + */ + struct blk_mapping { +@@ -79,14 +80,14 @@ struct transaction_info { + sector_t offset; + + /* +- * A list of live log blocks referenced in the RB-tree that belong to +- * this transaction. It is used during cleaning to locate live blocks +- * and migrate them to appropriate location. If this list is empty, +- * then the transaction does not contain any live blocks and we can +- * reuse its space. If this list is not empty, then we can quickly +- * locate all the live blocks in this transaction. ++ * A list of live blocks referenced in the RB-tree that belong to this ++ * transaction. It is used during cleaning to locate live blocks and ++ * migrate them to appropriate location. If this list is empty, then ++ * the transaction does not contain any live blocks and we can reuse its ++ * space. If this list is not empty, then we can quickly locate all the ++ * live blocks in this transaction. + */ +- struct list_head live_logblks; ++ struct list_head live_blks; + }; + + /* +@@ -126,4 +127,86 @@ extern void jbd2_ll_rw_block(journal_t *journal, int rw, int op_flags, int nr, + extern int jbd2_bh_submit_read(journal_t *journal, struct buffer_head *bh, + const char *func); + ++/* ++ * Cleaner stuff is below. ++ */ ++ ++/* ++ * Number of blocks to read at once, for cleaning. ++ */ ++#define CLEANER_BATCH_SIZE 16 ++ ++/* ++ * Context structure for the cleaner. ++ */ ++struct cleaner_ctx { ++ /* ++ * We set to true once we drop below low watermark and it stays so until ++ * we rise above the high watermark. It is accessed by the commit ++ * thread and the foreground kernel threads during the journal ++ * destruction, therefore it is atomic. ++ */ ++ atomic_t cleaning; ++ ++ /* ++ * We clean in batches of blocks. This flag indicates if we are ++ * currently cleaning a batch. It is accessed by the commit thread and ++ * the cleaner thread, therefore it is atomic. ++ */ ++ atomic_t batch_in_progress; ++ ++ /* ++ * We find live blocks to clean from the live blocks list of the ++ * transaction at the tail. This list can be larger than our batch size ++ * and we may need several attempts to process it. We cache the ++ * position of the next entry to start from in |pos|. Since cleaner ++ * thread can run concurrently with the commit thread that can modify ++ * the live blocks list of the transaction at the tail (for example, if ++ * it needs to drop a revoked entry or if |pos| points to an entry that ++ * has been updated and should move from the live blocks list of the ++ * transaction at the tail to the live blocks list of current ++ * transaction) we protect |pos| with |pos_lock|. ++ */ ++ struct jmap_entry *pos; ++ spinlock_t pos_lock; ++ ++ /* ++ * Live block mappings for the blocks that we copy in a batch. ++ */ ++ struct blk_mapping mappings[CLEANER_BATCH_SIZE]; ++ ++ /* ++ * Buffer heads for the live blocks read in a batch. ++ */ ++ struct buffer_head *bhs[CLEANER_BATCH_SIZE]; ++ ++ /* ++ * Number of pending reads in a batch. Every submitted read increments ++ * it and every completed read decrements it. ++ */ ++ atomic_t nr_pending_reads; ++ ++ /* ++ * The cleaner thread sleeps on this condition variable until the last ++ * completed read wakes the up the cleaner thread. ++ */ ++ struct completion live_block_reads; ++ ++ /* TODO: temporary for debugging, remove once done. */ ++ atomic_t nr_txns_committed; ++ atomic_t nr_txns_cleaned; ++ ++ journal_t *journal; ++ struct work_struct work; ++}; ++ ++extern int low_on_space(journal_t *journal); ++extern int high_on_space(journal_t *journal); ++extern bool cleaning(journal_t *journal); ++extern void stop_cleaning(journal_t *journal); ++extern void start_cleaning(journal_t *journal); ++extern void clean_next_batch(journal_t *journal); ++extern bool cleaning_batch_complete(journal_t *journal); ++extern bool try_to_move_tail(journal_t *journal); ++ + #endif diff --git a/disable-writeback b/disable-writeback new file mode 100644 index 00000000..122e48ba --- /dev/null +++ b/disable-writeback @@ -0,0 +1,22 @@ +Disable writeback + +From: Abutalib Aghayev + +Now that we have a working cleaner, disable writeback of metadata blocks. + +Signed-off-by: Abutalib Aghayev + +diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c +index 67c1038..912a516 100644 +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -1861,8 +1861,7 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) + + __blist_del_buffer(list, jh); + jh->b_jlist = BJ_None; +- if (test_clear_buffer_jbddirty(bh)) +- mark_buffer_dirty(bh); /* Expose it to the VM */ ++ clear_buffer_jbddirty(bh); + } + + /* diff --git a/dont-lock-buffer-head-in-ext4_commit_super-if-holding-spinlock b/dont-lock-buffer-head-in-ext4_commit_super-if-holding-spinlock new file mode 100644 index 00000000..08bcb6ed --- /dev/null +++ b/dont-lock-buffer-head-in-ext4_commit_super-if-holding-spinlock @@ -0,0 +1,62 @@ +ext4: don't lock buffer in ext4_commit_super if holding spinlock + +If there is an error reported in mballoc via ext4_grp_locked_error(), +the code is holding a spinlock, so ext4_commit_super() must not try to +lock the buffer head, or else it will trigger a BUG: + + BUG: sleeping function called from invalid context at ./include/linux/buffer_head.h:358 + in_atomic(): 1, irqs_disabled(): 0, pid: 993, name: mount + CPU: 0 PID: 993 Comm: mount Not tainted 4.9.0-rc1-clouder1 #62 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 + ffff880006423548 ffffffff81318c89 ffffffff819ecdd0 0000000000000166 + ffff880006423558 ffffffff810810b0 ffff880006423580 ffffffff81081153 + ffff880006e5a1a0 ffff88000690e400 0000000000000000 ffff8800064235c0 + Call Trace: + [] dump_stack+0x67/0x9e + [] ___might_sleep+0xf0/0x140 + [] __might_sleep+0x53/0xb0 + [] ext4_commit_super+0x19c/0x290 + [] __ext4_grp_locked_error+0x14a/0x230 + [] ? __might_sleep+0x53/0xb0 + [] ext4_mb_generate_buddy+0x1de/0x320 + +Since ext4_grp_locked_error() calls ext4_commit_super with sync == 0 +(and it is the only caller which does so), avoid locking and unlocking +the buffer in this case. + +This can result in races with ext4_commit_super() if there are other +problems (which is what commit 4743f83990614 was trying to address), +but a Warning is better than BUG. + +Fixes: 4743f83990614 +Cc: stable@vger.kernel.org # 4.9 +Reported-by: Nikolay Borisov +Signed-off-by: Theodore Ts'o +--- + fs/ext4/super.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index e4f61c39328a..ff6f3ab09c7e 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -4537,7 +4537,8 @@ static int ext4_commit_super(struct super_block *sb, int sync) + &EXT4_SB(sb)->s_freeinodes_counter)); + BUFFER_TRACE(sbh, "marking dirty"); + ext4_superblock_csum_set(sb); +- lock_buffer(sbh); ++ if (sync) ++ lock_buffer(sbh); + if (buffer_write_io_error(sbh)) { + /* + * Oh, dear. A previous attempt to write the +@@ -4553,8 +4554,8 @@ static int ext4_commit_super(struct super_block *sb, int sync) + set_buffer_uptodate(sbh); + } + mark_buffer_dirty(sbh); +- unlock_buffer(sbh); + if (sync) { ++ unlock_buffer(sbh); + error = __sync_dirty_buffer(sbh, + test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC); + if (error) diff --git a/series b/series index 6607fcd3..83d4b1da 100644 --- a/series +++ b/series @@ -1,4 +1,8 @@ -# v4.9-rc2 +# 6da22013bb79 + +allow-ext4_truncate-to-return-an-error +allow-ext4_ext_truncate-to-return-an-error +dont-lock-buffer-head-in-ext4_commit_super-if-holding-spinlock #################################################### # unstable patches @@ -7,6 +11,11 @@ stable-boundary stable-boundary-undo.patch +add-support-for-log-metadata-block-tracking-in-log +add-indirection-to-metadata-block-read-paths +cleaner +disable-writeback + only-call-ext4_truncate-if-there-is-data-to-truncate migrate-to-use-vfs-crypto-engine diff --git a/timestamps b/timestamps index eef5d371..d156a105 100755 --- a/timestamps +++ b/timestamps @@ -39,7 +39,14 @@ touch -d @1470931112 fix-xattr-shifting-when-expanding-inodes-2 touch -d @1470931201 properly-align-shifted-xattrs-when-expanding-inodes touch -d @1470933535 avoid-deadlock-while-expanding-inode-size touch -d @1474558786 save-patch -touch -d @1476539911 stable-boundary -touch -d @1477774933 status -touch -d @1477775426 series -touch -d @1477775440 timestamps +touch -d @1478656656 add-support-for-log-metadata-block-tracking-in-log +touch -d @1478656658 add-indirection-to-metadata-block-read-paths +touch -d @1478656938 cleaner +touch -d @1478656998 disable-writeback +touch -d @1479092546 allow-ext4_truncate-to-return-an-error +touch -d @1479092548 allow-ext4_ext_truncate-to-return-an-error +touch -d @1479092549 dont-lock-buffer-head-in-ext4_commit_super-if-holding-spinlock +touch -d @1479092551 stable-boundary +touch -d @1479176106 status +touch -d @1479176225 series +touch -d @1479176302 timestamps -- 2.11.4.GIT