1 Add support for tracking metadata blocks in the log.
3 From: Abutalib Aghayev <agayev@cs.cmu.edu>
5 This patch adds two important data structures, jmap and transaction_infos,
6 and supporting functions. Jmap is a map from a metadata block number to
7 the log block number. When a transaction commits, jmap is updated with new
8 mappings; when a block is revoked, the mapping for the block is removed
9 from the jmap. Transaction_infos is an array of transaction_info
10 structures that contain information about transactions currently present in
11 the log. It contains a linked list of live blocks in a transaction, and it
12 is updated after every commit to keep the list up-to-date.
13 Transaction_infos array will be used by the cleaner for identifying live
14 blocks and migrating them to appropriate location.
16 [ Modified by tytso to conditionalize changes on the JBD2_LAZY journal flag ]
18 Signed-off-by: Abutalib Aghayev <agayev@cs.cmu.edu>
19 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
21 fs/jbd2/Makefile | 3 +-
22 fs/jbd2/commit.c | 25 +++++
23 fs/jbd2/jmap.c | 510 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
24 fs/jbd2/jmap.h | 128 ++++++++++++++++++++++
25 fs/jbd2/journal.c | 13 +++
26 include/linux/jbd2.h | 24 ++++
27 include/trace/events/jbd2.h | 196 +++++++++++++++++++++++++++++++++
28 7 files changed, 898 insertions(+), 1 deletion(-)
30 diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile
31 index 802a3413872a..a54f50b3a06e 100644
32 --- a/fs/jbd2/Makefile
33 +++ b/fs/jbd2/Makefile
36 obj-$(CONFIG_JBD2) += jbd2.o
38 -jbd2-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o
39 +jbd2-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o \
41 diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
42 index b6b194ec1b4f..a3228d515404 100644
43 --- a/fs/jbd2/commit.c
44 +++ b/fs/jbd2/commit.c
46 #include <linux/bitops.h>
47 #include <trace/events/jbd2.h>
52 * IO end handler for temporary buffer_heads handling writes to the journal.
54 @@ -362,6 +364,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
57 unsigned long long blocknr;
58 + struct blk_mapping *mappings = NULL;
59 + struct blk_mapping *map_ptr = NULL;
63 @@ -563,6 +567,14 @@ void jbd2_journal_commit_transaction(journal_t *journal)
64 J_ASSERT(commit_transaction->t_nr_buffers <=
65 atomic_read(&commit_transaction->t_outstanding_credits));
67 + if (journal->j_flags & JBD2_LAZY) {
68 + int nr_mappings = commit_transaction->t_nr_buffers;
70 + map_ptr = mappings = kmalloc(sizeof(*mappings) * nr_mappings, GFP_NOFS);
72 + jbd2_journal_abort(journal, -ENOMEM);
78 @@ -661,6 +673,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
81 jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
83 + map_ptr->fsblk = jh2bh(jh)->b_blocknr;
84 + map_ptr->logblk = blocknr;
88 /* Record the new block's tag in the current descriptor
90 @@ -895,6 +912,14 @@ void jbd2_journal_commit_transaction(journal_t *journal)
91 transaction can be removed from any checkpoint list it was on
95 + err = jbd2_transaction_infos_add(journal, commit_transaction,
96 + mappings, map_ptr - mappings);
98 + jbd2_journal_abort(journal, -ENOMEM);
102 jbd_debug(3, "JBD2: commit phase 6\n");
104 J_ASSERT(list_empty(&commit_transaction->t_inode_list));
105 diff --git a/fs/jbd2/jmap.c b/fs/jbd2/jmap.c
107 index 000000000000..7de6f4a0a1dc
111 +#include <linux/blk_types.h>
112 +#include <linux/jbd2.h>
114 +#include <trace/events/jbd2.h>
116 +static struct kmem_cache *jbd2_jmap_cache;
118 +int jbd2_journal_init_jmap_cache(void)
120 + jbd2_jmap_cache = KMEM_CACHE(jmap_entry, SLAB_RECLAIM_ACCOUNT);
121 + if (!jbd2_jmap_cache)
126 +void jbd2_journal_destroy_jmap_cache(void)
128 + kmem_cache_destroy(jbd2_jmap_cache);
129 + jbd2_jmap_cache = NULL;
133 + * Allocate an array of transaction_info structures and initialize the list
134 + * heads inside them.
136 +int jbd2_init_transaction_infos(journal_t *journal)
139 + struct transaction_infos *tis = kzalloc(sizeof(*tis), GFP_KERNEL);
143 + tis->buf = kzalloc(sizeof(*tis->buf) * MAX_LIVE_TRANSACTIONS,
150 + for (i = 0; i < MAX_LIVE_TRANSACTIONS; ++i)
151 + INIT_LIST_HEAD(&tis->buf[i].live_blks);
153 + journal->j_transaction_infos = tis;
158 + * Free the array of transaction_info structures.
160 +void jbd2_free_transaction_infos(journal_t *journal)
162 + struct transaction_infos *tis = journal->j_transaction_infos;
170 + * Fill an entry to be stored in jmap.
172 +static void fill_entry(struct jmap_entry *entry, struct blk_mapping *mapping,
173 + int t_idx, struct list_head *list)
175 + entry->mapping = *mapping;
176 + entry->fsblk_last_modified = jiffies;
177 + entry->t_idx = t_idx;
178 + list_add(&entry->list, list);
182 + * A helper function for jbd2_transaction_infos_add. Scans through the mappings
183 + * array, dropping revoked entries from jmap and updating existing entries.
184 + * Moves the new mappings to the beginning of the mappings array and returns the
185 + * number of new mappings. Should be called with a write lock on j_jmap_lock.
187 +static int process_existing_mappings(journal_t *journal,
188 + struct transaction_info *ti, int t_idx,
189 + struct blk_mapping *mappings, int nr_mappings)
191 + struct jmap_entry *je;
194 + for (i = 0; i < nr_mappings; ++i) {
195 + je = jbd2_jmap_lookup(journal, mappings[i].fsblk, __func__);
197 + mappings[nr_new++] = mappings[i];
201 + * We are either deleting the entry because it was revoked, or
202 + * we are moving it to the live blocks list of this transaction.
203 + * In either case, we remove it from its existing list.
205 + list_del(&je->list);
208 + rb_erase(&je->rb_node, &journal->j_jmap);
209 + kmem_cache_free(jbd2_jmap_cache, je);
211 + trace_jbd2_jmap_replace(je, &mappings[i], t_idx);
212 + fill_entry(je, &mappings[i], t_idx, &ti->live_blks);
219 + * A helper function for jbd2_transaction_infos_add. Allocates an array of
220 + * jmap_entry structures and returns the pointer to array if successful.
221 + * Otherwise, returns NULL.
223 +static struct jmap_entry **alloc_jmap_entries(int nr_entries)
225 + struct jmap_entry **jmap_entries;
228 + jmap_entries = kmalloc(sizeof(struct jmap_entry *) * nr_entries,
233 + for (i = 0; i < nr_entries; i++) {
234 + jmap_entries[i] = kmem_cache_zalloc(jbd2_jmap_cache, GFP_NOFS);
235 + if (!jmap_entries[i])
238 + return jmap_entries;
241 + for (i = 0; i < nr_entries && jmap_entries[i]; ++i)
242 + kmem_cache_free(jbd2_jmap_cache, jmap_entries[i]);
243 + kfree(jmap_entries);
248 + * A helper function for jbd2_transaction_infos_add. Adds new mappings to jmap
249 + * and updates the linked list of live logblks of the new transaction. Should
250 + * be called with write lock on j_jmap_lock.
252 +static void add_new_mappings(journal_t *journal, struct transaction_info *ti,
253 + int t_idx, struct blk_mapping *mappings,
254 + struct jmap_entry **new_entries, int nr_new)
256 + struct rb_node **p;
257 + struct rb_node *parent = NULL;
258 + struct jmap_entry *je;
261 + for (i = 0; i < nr_new; ++i) {
262 + p = &journal->j_jmap.rb_node;
265 + je = rb_entry(parent, struct jmap_entry, rb_node);
267 + if (mappings[i].fsblk < je->mapping.fsblk)
268 + p = &(*p)->rb_left;
269 + else if (mappings[i].fsblk > je->mapping.fsblk)
270 + p = &(*p)->rb_right;
274 + fill_entry(new_entries[i], &mappings[i], t_idx, &ti->live_blks);
275 + rb_link_node(&new_entries[i]->rb_node, parent, p);
276 + rb_insert_color(&new_entries[i]->rb_node, &journal->j_jmap);
277 + trace_jbd2_jmap_insert(&mappings[i], t_idx);
281 +void jbd2_add_new_transaction_infos(journal_t *journal, tid_t tid,
282 + unsigned long log_start)
284 + struct transaction_infos *tis = journal->j_transaction_infos;
285 + int t_idx = tis->head;
286 + struct transaction_info *ti = &tis->buf[t_idx];
289 + * We are possibly reusing space of an old transaction_info. The old
290 + * transaction should not have any live blocks in it.
292 + BUG_ON(!list_empty(&ti->live_blks));
294 + write_lock(&journal->j_jmap_lock);
296 + ti->offset = log_start;
297 + write_unlock(&journal->j_jmap_lock);
300 +int jbd2_add_mapping(journal_t *journal, struct blk_mapping *mapping)
302 + struct transaction_infos *tis = journal->j_transaction_infos;
303 + int t_idx = tis->head;
304 + struct transaction_info *ti = &tis->buf[t_idx];
305 + struct jmap_entry *new_entry;
308 + write_lock(&journal->j_jmap_lock);
309 + nr_new = process_existing_mappings(journal, ti, t_idx, mapping, 1);
310 + write_unlock(&journal->j_jmap_lock);
315 + new_entry = kmem_cache_zalloc(jbd2_jmap_cache, GFP_NOFS);
319 + write_lock(&journal->j_jmap_lock);
320 + add_new_mappings(journal, ti, t_idx, mapping, &new_entry, 1);
321 + write_unlock(&journal->j_jmap_lock);
325 +void jbd2_finish_transaction_infos(journal_t *journal)
327 + struct transaction_infos *tis = journal->j_transaction_infos;
329 + write_lock(&journal->j_jmap_lock);
330 + tis->head = (tis->head + 1) & (MAX_LIVE_TRANSACTIONS - 1);
331 + write_unlock(&journal->j_jmap_lock);
335 + * This function is called after a transaction commits. It adds new
336 + * transaction_info structure to transaction_infos and populates jmap map with
337 + * the new mappings that are part of the committed transaction. It also adds
338 + * all the mappings to the linked list that is part of the transaction_info
341 +int jbd2_transaction_infos_add(journal_t *journal, transaction_t *transaction,
342 + struct blk_mapping *mappings, int nr_mappings)
344 + struct transaction_infos *tis = journal->j_transaction_infos;
345 + int t_idx = tis->head;
346 + struct transaction_info *ti = &tis->buf[t_idx];
347 + struct jmap_entry **new_entries = NULL;
351 + * We are possibly reusing space of an old transaction_info. The old
352 + * transaction should not have any live blocks in it.
354 + BUG_ON(!list_empty(&ti->live_blks));
356 + write_lock(&journal->j_jmap_lock);
357 + nr_new = process_existing_mappings(journal, ti, t_idx, mappings,
359 + write_unlock(&journal->j_jmap_lock);
364 + new_entries = alloc_jmap_entries(nr_new);
368 + write_lock(&journal->j_jmap_lock);
369 + add_new_mappings(journal, ti, t_idx, mappings, new_entries, nr_new);
370 + write_unlock(&journal->j_jmap_lock);
372 + kfree(new_entries);
375 + write_lock(&journal->j_jmap_lock);
376 + ti->tid = transaction->t_tid;
377 + ti->offset = transaction->t_log_start;
378 + tis->head = (tis->head + 1) & (MAX_LIVE_TRANSACTIONS - 1);
379 + write_unlock(&journal->j_jmap_lock);
381 + trace_jbd2_transaction_infos_add(t_idx, ti, nr_mappings);
386 + * Look up fsblk in the jmap and return the corresponding jmap entry if found.
387 + * Should be called with a read lock on j_jmap_lock.
389 +struct jmap_entry *jbd2_jmap_lookup(journal_t *journal, sector_t fsblk,
396 + for (p = journal->j_jmap.rb_node; p; ) {
397 + struct jmap_entry *je = rb_entry(p, struct jmap_entry, rb_node);
398 + if (je->mapping.fsblk > fsblk)
400 + else if (je->mapping.fsblk < fsblk)
403 + trace_jbd2_jmap_lookup(fsblk, je->mapping.logblk, func);
407 + trace_jbd2_jmap_lookup(fsblk, 0, func);
412 + * Revoke a mapping for the fsblk in the jmap. A lookup for fsblk will return
413 + * NULL and the mapping will be removed from the jmap during commit, unless
414 + * fsblk is reallocated as a metadata block.
416 +void jbd2_jmap_revoke(journal_t *journal, sector_t fsblk)
418 + struct jmap_entry *je;
420 + write_lock(&journal->j_jmap_lock);
421 + je = jbd2_jmap_lookup(journal, fsblk, __func__);
423 + * For now, since we do not construct jmap from the journal, it is
424 + * possible that a metadata block that was revoked is not in the jmap.
425 + * Eventually, this should not be the case and we should have a
426 + * BUG_ON(!je) here.
429 + if (WARN_ON(je->revoked))
430 + pr_err("JBD2: block %llu already revoked!\n",
431 + (unsigned long long) fsblk);
432 + je->revoked = true;
434 + write_unlock(&journal->j_jmap_lock);
438 + * Cancel a revoke for the fsblk in the jmap.
440 +void jbd2_jmap_cancel_revoke(journal_t *journal, sector_t fsblk)
442 + struct jmap_entry *je;
444 + write_lock(&journal->j_jmap_lock);
445 + je = jbd2_jmap_lookup(journal, fsblk, __func__);
447 + BUG_ON(!je->revoked);
448 + je->revoked = false;
449 + write_unlock(&journal->j_jmap_lock);
453 + * Read bh from its most up-to-date location, either from the file system or
456 + * If there is no mapping for the bh in jmap, this function acts like submit_bh.
457 + * Otherwise, it submits a read for the block pointed by the mapping located in
458 + * the log. Upon completion, bh will be filled with the contents of the block
459 + * read from the log.
461 +void jbd2_submit_bh(journal_t *journal, int rw, int op_flags,
462 + struct buffer_head *bh, const char *func)
464 + sector_t fsblk = bh->b_blocknr;
466 + struct jmap_entry *je;
468 + BUG_ON(!buffer_locked(bh));
470 + if (!journal || !(journal->j_flags & JBD2_LAZY)) {
471 + submit_bh(rw, op_flags, bh);
475 + read_lock(&journal->j_jmap_lock);
476 + je = jbd2_jmap_lookup(journal, fsblk, func);
478 + read_unlock(&journal->j_jmap_lock);
479 + submit_bh(rw, op_flags, bh);
482 + logblk = je->mapping.logblk;
483 + read_unlock(&journal->j_jmap_lock);
485 + BUG_ON(rw == WRITE);
486 + read_block_from_log(journal, bh, op_flags, logblk);
488 +EXPORT_SYMBOL(jbd2_submit_bh);
491 + * End_io handler for read_block_from_log that copies the contents of
492 + * log_bh read from log to the embedded bh.
494 +static void jbd2_end_log_read(struct buffer_head *log_bh, int uptodate)
496 + struct buffer_head *bh = log_bh->b_private;
498 + trace_jbd2_jmap_read_from_log(bh->b_blocknr, log_bh->b_blocknr,
501 + memcpy(bh->b_data, log_bh->b_data, log_bh->b_size);
503 + unlock_buffer(log_bh);
507 + bh->b_end_io(bh, uptodate);
511 + * This function fills |bh| with the contents of the |blk|. Assume
512 + * jmap maps metadata block 123 to log block 100123. To read the
513 + * metadata block 123, we obtain a buffer head for it and call
514 + * read_block_from_log passing the obtained buffer head as |bh| and
515 + * 100123 as |blk|. If block 100123 is cached, then we copy the
516 + * contents to |bh| and return. Otherwise, we submit a request and
517 + * end_io handler copies the contents of block 100123 to |bh|.
518 + * Returns -ENOMEM if getblk fails, 1 if block is not cached, 0 if
521 +int read_block_from_log(journal_t *journal, struct buffer_head *bh,
522 + int op_flags, sector_t blk)
524 + struct buffer_head *log_bh;
526 + BUG_ON(!buffer_locked(bh));
528 + log_bh = __getblk(journal->j_fs_dev, blk, bh->b_size);
529 + if (unlikely(!log_bh)) {
530 + bh->b_end_io(bh, 0);
534 + lock_buffer(log_bh);
535 + if (buffer_uptodate(log_bh)) {
536 + memcpy(bh->b_data, log_bh->b_data, bh->b_size);
537 + unlock_buffer(log_bh);
539 + bh->b_end_io(bh, 1);
543 + log_bh->b_end_io = jbd2_end_log_read;
544 + log_bh->b_private = bh;
546 + submit_bh(READ, op_flags, log_bh);
551 + * Copy of ll_rw_block that uses jbd2_submit_bh instead of submit_bh.
553 +void jbd2_ll_rw_block(journal_t *journal, int rw, int op_flags,
554 + int nr, struct buffer_head *bhs[], const char *func)
558 + for (i = 0; i < nr; i++) {
559 + struct buffer_head *bh = bhs[i];
561 + if (!trylock_buffer(bh))
563 + BUG_ON(rw == WRITE);
564 + if (!buffer_uptodate(bh)) {
565 + bh->b_end_io = end_buffer_read_sync;
567 + jbd2_submit_bh(journal, rw, op_flags, bh, func);
573 +EXPORT_SYMBOL(jbd2_ll_rw_block);
576 + * Copy of bh_submit_read that uses jbd2_submit_bh instead of submit_bh.
578 +int jbd2_bh_submit_read(journal_t *journal, struct buffer_head *bh,
581 + BUG_ON(!buffer_locked(bh));
583 + if (buffer_uptodate(bh)) {
589 + bh->b_end_io = end_buffer_read_sync;
590 + jbd2_submit_bh(journal, READ, 0, bh, func);
591 + wait_on_buffer(bh);
592 + if (buffer_uptodate(bh))
596 +EXPORT_SYMBOL(jbd2_bh_submit_read);
598 +int jbd2_smr_journal_init(journal_t *journal)
600 + journal->j_jmap = RB_ROOT;
601 + rwlock_init(&journal->j_jmap_lock);
602 + return jbd2_init_transaction_infos(journal);
605 +void jbd2_smr_journal_exit(journal_t *journal)
607 + jbd2_free_transaction_infos(journal);
610 +void jbd2_sb_breadahead(journal_t *journal, struct super_block *sb,
613 + struct buffer_head *bh = __getblk(sb->s_bdev, block, sb->s_blocksize);
615 + jbd2_ll_rw_block(journal, REQ_OP_READ, REQ_RAHEAD, 1,
620 +EXPORT_SYMBOL(jbd2_sb_breadahead);
621 diff --git a/fs/jbd2/jmap.h b/fs/jbd2/jmap.h
623 index 000000000000..91564ce9bbda
627 +#ifndef _LINUX_JMAP_H
628 +#define _LINUX_JMAP_H
630 +#include <linux/buffer_head.h>
631 +#include <linux/journal-head.h>
632 +#include <linux/list.h>
633 +#include <linux/circ_buf.h>
634 +#include <linux/completion.h>
637 + * Forward declaration for journal_t so that we don't get circular dependency
638 + * between jbd2.h and jmap.h
641 +typedef struct journal_s journal_t;
644 + * Maximum number of transactions. This guides the size of the circular buffer
645 + * in which we store housekeeping information per transaction. We start
646 + * cleaning either when the circular buffer is full or when we hit the free
647 + * space threshold, whichever happens first. For starters, we make this
648 + * constant large to make sure that we start cleaning only when we hit the free
649 + * space threshold. Later we can empirically determine a sensible value.
651 +#define MAX_LIVE_TRANSACTIONS 65536
654 + * A mapping from file system block to log block.
656 +struct blk_mapping {
662 + * An RB-tree entry wrapper for blk_mapping with extra housekeeping information.
665 + struct rb_node rb_node;
667 + /* The actual mapping information. */
668 + struct blk_mapping mapping;
671 + * If a block that is mapped gets deleted, the revoked bit is set. A
672 + * lookup for a deleted block fails. If a deleted block gets
673 + * re-allocated as a metadata block, the mapping is updated and revoked
679 + * All log blocks that are part of the same transaction in the log are
680 + * chained with a linked list. The root of the list is stored in the
681 + * transaction_info structure described below.
683 + struct list_head list;
686 + * The last time when fsblk was written again to the journal and
687 + * therefore was remapped to a different log block.
689 + unsigned long fsblk_last_modified;
692 + * Index of the transaction in the transaction_info_buffer (described
693 + * below) of which the log block is part of.
699 + * Housekeeping information about committed transaction.
701 +struct transaction_info {
702 + /* Id of the transaction */
705 + /* Offset where the transaction starts in the log */
709 + * A list of live blocks referenced in the RB-tree that belong to this
710 + * transaction. It is used during cleaning to locate live blocks and
711 + * migrate them to appropriate location. If this list is empty, then
712 + * the transaction does not contain any live blocks and we can reuse its
713 + * space. If this list is not empty, then we can quickly locate all the
714 + * live blocks in this transaction.
716 + struct list_head live_blks;
720 + * An array of transaction_info structures about all the transactions in the
721 + * log. Since there can only be a limited number of transactions in the log, we
722 + * use a circular buffer to store housekeeping information about transactions.
724 +struct transaction_infos {
725 + struct transaction_info *buf;
730 +extern int jbd2_smr_journal_init(journal_t *journal);
731 +extern void jbd2_smr_journal_exit(journal_t *journal);
733 +extern int jbd2_journal_init_jmap_cache(void);
734 +extern void jbd2_journal_destroy_jmap_cache(void);
736 +extern int jbd2_init_transaction_infos(journal_t *journal);
737 +extern void jbd2_free_transaction_infos(journal_t *journal);
738 +extern void jbd2_add_new_transaction_infos(journal_t *journal, tid_t t_tid,
739 + unsigned long log_start);
740 +extern int jbd2_add_mapping(journal_t *journal, struct blk_mapping *mapping);
741 +extern void jbd2_finish_transaction_infos(journal_t *journal);
742 +extern int jbd2_transaction_infos_add(journal_t *journal,
743 + transaction_t *transaction,
744 + struct blk_mapping *mappings,
747 +extern struct jmap_entry *jbd2_jmap_lookup(journal_t *journal, sector_t fsblk,
749 +extern void jbd2_jmap_revoke(journal_t *journal, sector_t fsblk);
750 +extern void jbd2_jmap_cancel_revoke(journal_t *journal, sector_t fsblk);
751 +extern int read_block_from_log(journal_t *journal, struct buffer_head *bh,
752 + int op_flags, sector_t blk);
755 diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
756 index b3b1d8ec1081..50ab9b5bc7f4 100644
757 --- a/fs/jbd2/journal.c
758 +++ b/fs/jbd2/journal.c
760 #include <linux/bitops.h>
761 #include <linux/ratelimit.h>
765 #define CREATE_TRACE_POINTS
766 #include <trace/events/jbd2.h>
768 @@ -1120,6 +1122,10 @@ static journal_t *journal_init_common(struct block_device *bdev,
769 journal->j_max_batch_time = 15000; /* 15ms */
770 atomic_set(&journal->j_reserved_credits, 0);
772 + err = jbd2_smr_journal_init(journal);
776 /* The journal is marked for error until we succeed with recovery! */
777 journal->j_flags = JBD2_ABORT;
779 @@ -1685,6 +1691,9 @@ int jbd2_journal_destroy(journal_t *journal)
780 if (journal->j_running_transaction)
781 jbd2_journal_commit_transaction(journal);
783 + if (journal->j_flags & JBD2_LAZY)
784 + journal->j_flags |= JBD2_NO_CLEANUP;
786 if (journal->j_flags & JBD2_NO_CLEANUP) {
787 jbd2_journal_destroy_checkpoint(journal);
788 journal->j_checkpoint_transactions = NULL;
789 @@ -1741,6 +1750,7 @@ int jbd2_journal_destroy(journal_t *journal)
790 jbd2_journal_destroy_revoke(journal);
791 if (journal->j_chksum_driver)
792 crypto_free_shash(journal->j_chksum_driver);
793 + jbd2_smr_journal_exit(journal);
794 kfree(journal->j_wbuf);
797 @@ -2641,6 +2651,8 @@ static int __init journal_init_caches(void)
798 ret = jbd2_journal_init_handle_cache();
800 ret = jbd2_journal_init_transaction_cache();
802 + ret = jbd2_journal_init_jmap_cache();
806 @@ -2650,6 +2662,7 @@ static void jbd2_journal_destroy_caches(void)
807 jbd2_journal_destroy_journal_head_cache();
808 jbd2_journal_destroy_handle_cache();
809 jbd2_journal_destroy_transaction_cache();
810 + jbd2_journal_destroy_jmap_cache();
811 jbd2_journal_destroy_slabs();
814 diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
815 index 9a07b0485784..a1d56bb9fa4f 100644
816 --- a/include/linux/jbd2.h
817 +++ b/include/linux/jbd2.h
818 @@ -732,6 +732,9 @@ jbd2_time_diff(unsigned long start, unsigned long end)
820 * @j_sb_buffer: First part of superblock buffer
821 * @j_superblock: Second part of superblock buffer
822 + * @j_map: A map from file system blocks to log blocks
823 + * @j_transaction_infos: An array of information structures per live transaction
824 + * @j_map_lock: Protect j_jmap and j_transaction_infos
825 * @j_format_version: Version of the superblock format
826 * @j_state_lock: Protect the various scalars in the journal
827 * @j_barrier_count: Number of processes waiting to create a barrier lock
828 @@ -807,6 +810,15 @@ struct journal_s
829 struct buffer_head *j_sb_buffer;
830 journal_superblock_t *j_superblock;
832 + /* A map from file system blocks to journal blocks */
833 + struct rb_root j_jmap;
835 + /* An array of housekeeping information about live transactions */
836 + struct transaction_infos *j_transaction_infos;
838 + /* Protect j_jmap and j_transaction_infos */
839 + rwlock_t j_jmap_lock;
841 /* Version of the superblock format */
842 int j_format_version;
844 @@ -1129,6 +1141,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3)
846 #define JBD2_REC_ERR 0x080 /* The errno in the sb has been recorded */
847 #define JBD2_NO_CLEANUP 0x100 /* Don't flush empty the journal on shutdown */
848 +#define JBD2_LAZY 0x200 /* Do lazy journalling */
851 * Function declarations for the journaling transaction and buffer
852 @@ -1401,6 +1414,17 @@ static inline void jbd2_journal_abort_handle(handle_t *handle)
853 handle->h_aborted = 1;
856 +/* Lazy journalling redirection */
857 +extern void jbd2_submit_bh(journal_t *journal, int rw, int op_flags,
858 + struct buffer_head *bh, const char *func);
859 +extern void jbd2_ll_rw_block(journal_t *journal, int rw, int op_flags, int nr,
860 + struct buffer_head *bhs[], const char *func);
861 +extern int jbd2_bh_submit_read(journal_t *journal, struct buffer_head *bh,
863 +extern void jbd2_sb_breadahead(journal_t *journal, struct super_block *sb,
867 #endif /* __KERNEL__ */
869 /* Comparison functions for transaction IDs: perform comparisons using
870 diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
871 index c1d1f3eb242d..a0cbe7a03fc4 100644
872 --- a/include/trace/events/jbd2.h
873 +++ b/include/trace/events/jbd2.h
876 struct transaction_chp_stats_s;
877 struct transaction_run_stats_s;
880 +struct transaction_info;
882 TRACE_EVENT(jbd2_checkpoint,
884 @@ -379,6 +382,199 @@ TRACE_EVENT(jbd2_lock_buffer_stall,
888 +TRACE_EVENT(jbd2_jmap_replace,
890 + TP_PROTO(struct jmap_entry *jentry, struct blk_mapping *mapping, \
893 + TP_ARGS(jentry, mapping, t_idx),
896 + __field(sector_t, fsblk )
897 + __field(sector_t, old_logblk )
898 + __field(sector_t, new_logblk )
899 + __field(int, old_t_idx )
900 + __field(int, new_t_idx )
904 + __entry->fsblk = mapping->fsblk;
905 + __entry->old_logblk = jentry->mapping.logblk;
906 + __entry->new_logblk = mapping->logblk;
907 + __entry->old_t_idx = jentry->t_idx;
908 + __entry->new_t_idx = t_idx;
911 + TP_printk("remap %llu from %llu to %llu, move from transaction at index %d to transaction at index %d",
912 + (unsigned long long) __entry->fsblk,
913 + (unsigned long long) __entry->old_logblk,
914 + (unsigned long long) __entry->new_logblk,
915 + __entry->old_t_idx,
916 + __entry->new_t_idx)
919 +TRACE_EVENT(jbd2_jmap_insert,
921 + TP_PROTO(struct blk_mapping *mapping, int t_idx),
923 + TP_ARGS(mapping, t_idx),
926 + __field(sector_t, fsblk )
927 + __field(sector_t, logblk)
928 + __field(int, t_idx)
932 + __entry->fsblk = mapping->fsblk;
933 + __entry->logblk = mapping->logblk;
934 + __entry->t_idx = t_idx;
937 + TP_printk("map %llu to %llu, insert to transaction %d",
938 + (unsigned long long) __entry->fsblk,
939 + (unsigned long long) __entry->logblk,
943 +TRACE_EVENT(jbd2_jmap_lookup,
945 + TP_PROTO(sector_t fsblk, sector_t logblk, const char *func),
947 + TP_ARGS(fsblk, logblk, func),
950 + __field(sector_t, fsblk )
951 + __field(sector_t, logblk)
952 + __string(func, func)
956 + __entry->fsblk = fsblk;
957 + __entry->logblk = logblk;
958 + __assign_str(func, func);
961 + TP_printk("%s: lookup %llu -> %llu",
963 + (unsigned long long) __entry->fsblk,
964 + (unsigned long long) __entry->logblk)
967 +TRACE_EVENT(jbd2_jmap_read_from_log,
969 + TP_PROTO(sector_t fsblk, sector_t logblk, int uptodate),
971 + TP_ARGS(fsblk, logblk, uptodate),
974 + __field(sector_t, fsblk )
975 + __field(sector_t, logblk)
976 + __field(int, uptodate)
980 + __entry->fsblk = fsblk;
981 + __entry->logblk = logblk;
982 + __entry->uptodate = uptodate;
985 + TP_printk("fsblk %llu logblk %llu uptodate %d",
986 + (unsigned long long) __entry->fsblk,
987 + (unsigned long long) __entry->logblk,
991 +TRACE_EVENT(jbd2_jmap_printf,
993 + TP_PROTO(const char *s),
1002 + __assign_str(s, s);
1009 +TRACE_EVENT(jbd2_jmap_printf1,
1011 + TP_PROTO(const char *s, sector_t fsblk),
1013 + TP_ARGS(s, fsblk),
1017 + __field(sector_t, fsblk )
1021 + __assign_str(s, s);
1022 + __entry->fsblk = fsblk;
1025 + TP_printk("%s: %llu",
1027 + (unsigned long long) __entry->fsblk)
1030 +TRACE_EVENT(jbd2_jmap_printf2,
1032 + TP_PROTO(const char *s, sector_t fsblk, sector_t logblk),
1034 + TP_ARGS(s, fsblk, logblk),
1038 + __field(sector_t, fsblk )
1039 + __field(sector_t, logblk)
1043 + __assign_str(s, s);
1044 + __entry->fsblk = fsblk;
1045 + __entry->logblk = logblk;
1048 + TP_printk("%s: %llu:%llu",
1050 + (unsigned long long) __entry->fsblk,
1051 + (unsigned long long) __entry->logblk)
1054 +TRACE_EVENT(jbd2_transaction_infos_add,
1056 + TP_PROTO(int t_idx, struct transaction_info *ti, int nr_mappings),
1058 + TP_ARGS(t_idx, ti, nr_mappings),
1061 + __field(int, t_idx )
1062 + __field(tid_t, tid )
1063 + __field(sector_t, offset)
1064 + __field(int, nr_mappings)
1068 + __entry->t_idx = t_idx;
1069 + __entry->tid = ti->tid;
1070 + __entry->offset = ti->offset;
1071 + __entry->nr_mappings = nr_mappings;
1074 + TP_printk("inserted transaction %u (offset %llu) at index %d with %d mappings",
1076 + (unsigned long long) __entry->offset,
1078 + __entry->nr_mappings)
1081 #endif /* _TRACE_JBD2_H */
1083 /* This part must be outside protection */