Add back cleaner (it was accidentally dropped due to merge failures).
[ext4-patch-queue.git] / cleaner
blob7081b03a4539a96cb4e15bd7a416374276e79883
1 Introduce cleaner
3 From: Abutalib Aghayev <agayev@cs.cmu.edu>
5 An experimental cleaner.  Copy the live blocks from the transaction at the
6 tail in batches to the transaction at the head.  After a commit ends, check
7 if free space is below watermark and start cleaning until free space is
8 above high watermark.
10 Signed-off-by: Abutalib Aghayev <agayev@cs.cmu.edu>
11 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
12 ---
13  fs/jbd2/Makefile     |   2 +-
14  fs/jbd2/checkpoint.c |   3 +
15  fs/jbd2/cleaner.c    | 367 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
16  fs/jbd2/jmap.c       |  34 +++++++++
17  fs/jbd2/jmap.h       |  82 +++++++++++++++++++++
18  fs/jbd2/journal.c    |  12 +++
19  include/linux/jbd2.h |   6 +-
20  7 files changed, 504 insertions(+), 2 deletions(-)
22 diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile
23 index a54f50b3a06e..b6a2dddcc0a7 100644
24 --- a/fs/jbd2/Makefile
25 +++ b/fs/jbd2/Makefile
26 @@ -5,4 +5,4 @@
27  obj-$(CONFIG_JBD2) += jbd2.o
29  jbd2-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o \
30 -               jmap.o
31 +               jmap.o cleaner.o
32 diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
33 index 4055f51617ef..b60bbf58e8f7 100644
34 --- a/fs/jbd2/checkpoint.c
35 +++ b/fs/jbd2/checkpoint.c
36 @@ -389,6 +389,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
37         tid_t           first_tid;
38         unsigned long   blocknr;
40 +       if (journal->j_flags & JBD2_LAZY)
41 +               return 0;
43         if (is_journal_aborted(journal))
44                 return -EIO;
46 diff --git a/fs/jbd2/cleaner.c b/fs/jbd2/cleaner.c
47 new file mode 100644
48 index 000000000000..1ab9d2e50702
49 --- /dev/null
50 +++ b/fs/jbd2/cleaner.c
51 @@ -0,0 +1,367 @@
52 +#include <linux/blk_types.h>
53 +#include <linux/jbd2.h>
54 +#include "jmap.h"
55 +#include <linux/list.h>
56 +#include <linux/blkdev.h>
57 +#include <linux/completion.h>
58 +#include <linux/delay.h>
59 +#include <trace/events/jbd2.h>
61 +inline int jbd2_low_on_space(journal_t *journal)
63 +       int x = atomic_read(&journal->j_cleaner_ctx->nr_txns_committed);
64 +       if (x > 10) {
65 +               trace_jbd2_jmap_printf1("low on space", x);
66 +               return true;
67 +       }
68 +       trace_jbd2_jmap_printf1("not low on space", x);
69 +       return false;
72 +inline int jbd2_high_on_space(journal_t *journal)
74 +       if (atomic_read(&journal->j_cleaner_ctx->nr_txns_cleaned) < 2) {
75 +               trace_jbd2_jmap_printf("not enough cleaned");
76 +               return false;
77 +       }
78 +       trace_jbd2_jmap_printf("enough cleaned");
79 +       atomic_set(&journal->j_cleaner_ctx->nr_txns_cleaned, 0);
80 +       atomic_set(&journal->j_cleaner_ctx->nr_txns_committed, 0);
81 +       return true;
84 +inline bool jbd2_cleaning(journal_t *journal)
86 +       return atomic_read(&journal->j_cleaner_ctx->cleaning);
89 +inline void jbd2_stop_cleaning(journal_t *journal)
91 +       trace_jbd2_jmap_printf("stopped cleaning");
92 +       atomic_set(&journal->j_cleaner_ctx->cleaning, 0);
95 +inline void jbd2_start_cleaning(journal_t *journal)
97 +       struct cleaner_ctx *ctx = journal->j_cleaner_ctx;
99 +       trace_jbd2_jmap_printf("started cleaning");
100 +       atomic_set(&journal->j_cleaner_ctx->cleaning, 1);
102 +       /* Schedule the next batch of cleaning */
103 +       if (!jbd2_cleaning_batch_complete(journal)) {
104 +               trace_jbd2_jmap_printf("not scheduling a new batch");
105 +               return;
106 +       }
108 +       trace_jbd2_jmap_printf("scheduling a batch");
109 +       BUG_ON(atomic_read(&ctx->nr_pending_reads));
111 +       atomic_set(&ctx->batch_in_progress, 1);
112 +       schedule_work(&ctx->work);
117 +inline bool jbd2_cleaning_batch_complete(journal_t *journal)
119 +       return jbd2_cleaning(journal) &&
120 +               atomic_read(&journal->j_cleaner_ctx->batch_in_progress) == 0;
124 + * Tries to move the tail forward (hence free space) as long as the transaction
125 + * at the tail has only stale blocks.  Returns true if manages to free a
126 + * transaction, false otherwise.
127 + */
128 +bool jbd2_try_to_move_tail(journal_t *journal)
130 +       struct transaction_infos *tis = journal->j_transaction_infos;
131 +       struct transaction_info *ti, *ti1;
133 +       /*
134 +        * Advance the tail as far as possible by skipping over transactions
135 +        * with no live blocks.
136 +        */
137 +       write_lock(&journal->j_jmap_lock);
138 +       ti = ti1 = &tis->buf[tis->tail];
140 +       for ( ; list_empty(&ti->live_blks); ti = &tis->buf[tis->tail]) {
141 +               trace_jbd2_jmap_printf2("cleaned a transaction",
142 +                                       tis->tail, ti->tid);
143 +               tis->tail = (tis->tail + 1) & (MAX_LIVE_TRANSACTIONS - 1);
144 +               atomic_inc(&journal->j_cleaner_ctx->nr_txns_cleaned);
145 +       }
146 +       write_unlock(&journal->j_jmap_lock);
148 +       if (ti == ti1)
149 +               return false;
150 +       /*
151 +        * In the worst case, this will end up updating the journal superblock
152 +        * after cleaning up every transaction.  Should we avoid it?
153 +        */
154 +       write_unlock(&journal->j_state_lock);
155 +       jbd2_update_log_tail(journal, ti->tid, ti->offset);
156 +       write_lock(&journal->j_state_lock);
158 +       return true;
162 + * Finds the live blocks at the tail transaction and copies the corresponding
163 + * mappings to |ctx->mappings|.  Returns the number of live block mappings
164 + * copied.  Should be called with a read lock on |j_jmap_lock|.
165 + */
166 +static int find_live_blocks(struct cleaner_ctx *ctx)
168 +       journal_t *journal = ctx->journal;
169 +       struct transaction_infos *tis = journal->j_transaction_infos;
170 +       struct transaction_info *ti = &tis->buf[tis->tail];
171 +       struct jmap_entry *je = NULL;
172 +       int i, nr_live = 0;
174 +       if (unlikely(list_empty(&ti->live_blks)))
175 +               goto done;
177 +       spin_lock(&ctx->pos_lock);
178 +       if (!ctx->pos)
179 +               ctx->pos = list_first_entry(&ti->live_blks, typeof(*je), list);
180 +       je = ctx->pos;
181 +       spin_unlock(&ctx->pos_lock);
183 +       list_for_each_entry_from(je, &ti->live_blks, list) {
184 +               if (je->revoked)
185 +                       continue;
186 +               ctx->mappings[nr_live++] = je->mapping;
187 +               if (nr_live == CLEANER_BATCH_SIZE)
188 +                       break;
189 +       }
191 +done:
192 +       trace_jbd2_jmap_printf1("found live blocks", nr_live);
193 +       for (i = 0; i < nr_live; ++i)
194 +               trace_jbd2_jmap_printf2("m",
195 +                                       ctx->mappings[i].fsblk,
196 +                                       ctx->mappings[i].logblk);
197 +       return nr_live;
200 +static void live_block_read_end_io(struct buffer_head *bh, int uptodate)
202 +       struct cleaner_ctx *ctx = bh->b_private;
204 +       if (uptodate) {
205 +               set_buffer_uptodate(bh);
206 +               if (atomic_dec_and_test(&ctx->nr_pending_reads))
207 +                       wake_up(&ctx->live_block_reads);
208 +       } else {
209 +               WARN_ON(1);
210 +               clear_buffer_uptodate(bh);
211 +       }
213 +       unlock_buffer(bh);
214 +       put_bh(bh);
218 + * Reads live blocks in |ctx->mappings| populated by find_live_blocks into
219 + * buffer heads in |ctx->bhs|.  Returns true if at least one of the reads goes
220 + * out to disk and false otherwise.  If this function returns true then the
221 + * client should sleep on the condition variable |ctx->live_block_reads|.  The
222 + * client will be woken up when all reads are complete, through the end_io
223 + * handler attached to buffer heads read from disk.
224 + */
225 +static bool read_live_blocks(struct cleaner_ctx *ctx, int nr_live)
227 +       journal_t *journal = ctx->journal;
228 +       bool slow = false;
229 +       struct blk_plug plug;
230 +       bool plugged = false;
231 +       int i, rc;
233 +       for (i = 0; i < nr_live; ++i) {
234 +               ctx->bhs[i] = __getblk(journal->j_dev, ctx->mappings[i].fsblk,
235 +                               journal->j_blocksize);
236 +               if (unlikely(!ctx->bhs[i])) {
237 +                       rc = -ENOMEM;
238 +                       goto out_err;
239 +               }
240 +               if (buffer_uptodate(ctx->bhs[i]))
241 +                       continue;
242 +               if (!plugged) {
243 +                       plugged = true;
244 +                       blk_start_plug(&plug);
245 +               }
246 +               lock_buffer(ctx->bhs[i]);
247 +               if (buffer_uptodate(ctx->bhs[i]))
248 +                       continue;
249 +               ctx->bhs[i]->b_private = ctx;
250 +               ctx->bhs[i]->b_end_io = live_block_read_end_io;
251 +               get_bh(ctx->bhs[i]);
252 +               rc = read_block_from_log(ctx->journal, ctx->bhs[i],
253 +                                        REQ_RAHEAD, ctx->mappings[i].logblk);
254 +               if (unlikely(rc < 0))
255 +                       goto out_err;
256 +               atomic_inc(&ctx->nr_pending_reads);
257 +               if (rc) {
258 +                       slow = true;
259 +                       trace_jbd2_jmap_printf2("reading from disk",
260 +                                               ctx->mappings[i].fsblk,
261 +                                               ctx->mappings[i].logblk);
262 +               } else {
263 +                       trace_jbd2_jmap_printf2("cached",
264 +                                               ctx->mappings[i].fsblk,
265 +                                               ctx->mappings[i].logblk);
266 +               }
267 +       }
268 +       if (plugged)
269 +               blk_finish_plug(&plug);
270 +       return slow;
272 +out_err:
273 +       if (plugged)
274 +               blk_finish_plug(&plug);
275 +       jbd2_journal_abort(ctx->journal, rc);
276 +       return false;
280 + * This function finds the live blocks that became stale between the call to
281 + * find_live_blocks and now, and discards them.  It returns true if there are no
282 + * more live blocks left at the tail transaction.
283 + */
284 +static bool discard_stale_blocks(struct cleaner_ctx *ctx, int nr_live)
286 +       journal_t *journal = ctx->journal;
287 +       struct transaction_infos *tis = journal->j_transaction_infos;
288 +       struct transaction_info *ti = &tis->buf[tis->tail];
289 +       struct jmap_entry *je = NULL;
290 +       int i = 0, j = 0, next = 0;
292 +       trace_jbd2_jmap_printf(__func__);
293 +       spin_lock(&ctx->pos_lock);
294 +       BUG_ON(!ctx->pos);
295 +       je = ctx->pos;
296 +       list_for_each_entry_from(je, &ti->live_blks, list) {
297 +               for (j = next; j < nr_live; ++j) {
298 +                       if (je->mapping.fsblk == ctx->mappings[j].fsblk) {
299 +                               next = j+1;
300 +                               ctx->pos = list_next_entry(je, list);
301 +                               if (je->revoked) {
302 +                                       brelse(ctx->bhs[j]);
303 +                                       ctx->bhs[j] = NULL;
304 +                                       trace_jbd2_jmap_printf2(
305 +                                               "revoked",
306 +                                               ctx->mappings[i].fsblk,
307 +                                               ctx->mappings[i].logblk);
308 +                               }
309 +                               break;
310 +                       } else {
311 +                               trace_jbd2_jmap_printf2(
312 +                                               "moved to another list",
313 +                                               ctx->mappings[i].fsblk,
314 +                                               ctx->mappings[i].logblk);
315 +                               brelse(ctx->bhs[j]);
316 +                               ctx->bhs[j] = NULL;
317 +                       }
318 +               }
319 +               if (++i == nr_live || j == nr_live)
320 +                       break;
321 +       }
322 +       spin_unlock(&ctx->pos_lock);
324 +       /*
325 +        * We have exited the loop.  If we haven't processed all the entries in
326 +        * |ctx->mappings|, that is if (j < nr_live) at the exit, and we have
327 +        * not processed |nr_live| entries from the live blocks list at the
328 +        * tail, that is if (i < nr_live) at the exit, then the live blocks list
329 +        * has shrunk and the tail transaction has no live blocks left.
330 +        */
331 +       return j < nr_live && i < nr_live;
334 +static void attach_live_blocks(struct cleaner_ctx *ctx, handle_t *handle,
335 +                              int nr_live)
337 +       int err, i;
339 +       trace_jbd2_jmap_printf(__func__);
340 +       for (i = 0; i < nr_live; ++i) {
341 +               if (!ctx->bhs[i])
342 +                       continue;
343 +               trace_jbd2_jmap_printf2("attaching",
344 +                                       ctx->mappings[i].fsblk,
345 +                                       ctx->mappings[i].logblk);
346 +               err = jbd2_journal_get_write_access(handle, ctx->bhs[i]);
347 +               if (!err)
348 +                       err = jbd2_journal_dirty_metadata(handle, ctx->bhs[i]);
349 +               if (err) {
350 +                       jbd2_journal_abort(ctx->journal, err);
351 +                       return;
352 +               }
353 +       }
357 + * Read the live blocks from the tail transaction and attach them to the current
358 + * transaction.
359 + */
360 +void jbd2_jmap_do_clean_batch(struct work_struct *work)
362 +       struct cleaner_ctx *ctx = container_of(work, struct cleaner_ctx, work);
363 +       bool wake_up_commit_thread = true;
364 +       handle_t *handle = NULL;
365 +       int nr_live, err;
367 +       read_lock(&ctx->journal->j_jmap_lock);
368 +       nr_live = find_live_blocks(ctx);
369 +       read_unlock(&ctx->journal->j_jmap_lock);
371 +       if (nr_live < CLEANER_BATCH_SIZE)
372 +               wake_up_commit_thread = false;
373 +       if (nr_live == 0)
374 +               goto done;
376 +       read_live_blocks(ctx, nr_live);
377 +       wait_event(ctx->live_block_reads,
378 +                  atomic_read(&ctx->nr_pending_reads) <= 0);
380 +       handle = jbd2_journal_start(ctx->journal, nr_live);
381 +       if (IS_ERR(handle)) {
382 +               jbd2_journal_abort(ctx->journal, PTR_ERR(handle));
383 +               return;
384 +       }
386 +       read_lock(&ctx->journal->j_jmap_lock);
387 +       if (discard_stale_blocks(ctx, nr_live))
388 +               wake_up_commit_thread = false;
389 +       read_unlock(&ctx->journal->j_jmap_lock);
390 +       /*
391 +        * I'm not sure why this function was under the jmap_lock
392 +        * previously, but it can't be, since it calls functions that
393 +        * can block due to memory allocation.  I don't think it needs
394 +        * to be protected, since it appears that ctx->mapping is only
395 +        * used by the cleaner code, and so it can't be run multiple
396 +        * times.  -- TYT
397 +        */
398 +       attach_live_blocks(ctx, handle, nr_live);
400 +       err = jbd2_journal_stop(handle);
401 +       if (err) {
402 +               jbd2_journal_abort(ctx->journal, err);
403 +               return;
404 +       }
406 +done:
407 +       atomic_set(&ctx->batch_in_progress, 0);
408 +       atomic_inc(&ctx->nr_txns_cleaned);
409 +       if (wake_up_commit_thread) {
410 +               trace_jbd2_jmap_printf("waking up commit thread");
411 +               wake_up(&ctx->journal->j_wait_commit);
412 +       } else {
413 +               trace_jbd2_jmap_printf("not waking up commit thread");
414 +               spin_lock(&ctx->pos_lock);
415 +               ctx->pos = NULL;
416 +               spin_unlock(&ctx->pos_lock);
417 +       }
419 diff --git a/fs/jbd2/jmap.c b/fs/jbd2/jmap.c
420 index 7de6f4a0a1dc..0e759cc095f5 100644
421 --- a/fs/jbd2/jmap.c
422 +++ b/fs/jbd2/jmap.c
423 @@ -91,8 +91,17 @@ static int process_existing_mappings(journal_t *journal,
424                  * We are either deleting the entry because it was revoked, or
425                  * we are moving it to the live blocks list of this transaction.
426                  * In either case, we remove it from its existing list.
427 +                * However, before removing it we check to see if this is an
428 +                * entry in the live blocks list of the tail transaction a
429 +                * pointer to whom is cached by the cleaner and update the
430 +                * cached pointer if so.
431                  */
432 +               spin_lock(&journal->j_cleaner_ctx->pos_lock);
433 +               if (je == journal->j_cleaner_ctx->pos) {
434 +                       journal->j_cleaner_ctx->pos = list_next_entry(je, list);
435 +               }
436                 list_del(&je->list);
437 +               spin_unlock(&journal->j_cleaner_ctx->pos_lock);
439                 if (je->revoked) {
440                         rb_erase(&je->rb_node, &journal->j_jmap);
441 @@ -216,6 +225,8 @@ void jbd2_finish_transaction_infos(journal_t *journal)
443         struct transaction_infos *tis = journal->j_transaction_infos;
445 +       atomic_inc(&journal->j_cleaner_ctx->nr_txns_committed);
447         write_lock(&journal->j_jmap_lock);
448         tis->head = (tis->head + 1) & (MAX_LIVE_TRANSACTIONS - 1);
449         write_unlock(&journal->j_jmap_lock);
450 @@ -243,6 +254,8 @@ int jbd2_transaction_infos_add(journal_t *journal, transaction_t *transaction,
451          */
452         BUG_ON(!list_empty(&ti->live_blks));
454 +       atomic_inc(&journal->j_cleaner_ctx->nr_txns_committed);
456         write_lock(&journal->j_jmap_lock);
457         nr_new = process_existing_mappings(journal, ti, t_idx, mappings,
458                                         nr_mappings);
459 @@ -489,11 +502,32 @@ int jbd2_smr_journal_init(journal_t *journal)
461         journal->j_jmap = RB_ROOT;
462         rwlock_init(&journal->j_jmap_lock);
463 +       journal->j_cleaner_ctx = kzalloc(sizeof(struct cleaner_ctx),
464 +                                        GFP_KERNEL);
465 +       if (!journal->j_cleaner_ctx)
466 +               return -ENOMEM;
468 +       journal->j_cleaner_ctx->journal = journal;
469 +       journal->j_cleaner_ctx->pos = NULL;
470 +       spin_lock_init(&journal->j_cleaner_ctx->pos_lock);
471 +       atomic_set(&journal->j_cleaner_ctx->cleaning, 0);
472 +       atomic_set(&journal->j_cleaner_ctx->batch_in_progress, 0);
473 +       atomic_set(&journal->j_cleaner_ctx->nr_pending_reads, 0);
474 +       atomic_set(&journal->j_cleaner_ctx->nr_txns_committed, 0);
475 +       atomic_set(&journal->j_cleaner_ctx->nr_txns_cleaned, 0);
476 +       init_waitqueue_head(&journal->j_cleaner_ctx->live_block_reads);
477 +       INIT_WORK(&journal->j_cleaner_ctx->work, jbd2_jmap_do_clean_batch);
478         return jbd2_init_transaction_infos(journal);
481  void jbd2_smr_journal_exit(journal_t *journal)
483 +       if (journal->j_cleaner_ctx) {
484 +               atomic_set(&journal->j_cleaner_ctx->cleaning, 0);
485 +               flush_work(&journal->j_cleaner_ctx->work);
486 +               kfree(journal->j_cleaner_ctx);
487 +               journal->j_cleaner_ctx = NULL;
488 +       }
489         jbd2_free_transaction_infos(journal);
492 diff --git a/fs/jbd2/jmap.h b/fs/jbd2/jmap.h
493 index 91564ce9bbda..5ae3dc52746f 100644
494 --- a/fs/jbd2/jmap.h
495 +++ b/fs/jbd2/jmap.h
496 @@ -125,4 +125,86 @@ extern void jbd2_jmap_cancel_revoke(journal_t *journal, sector_t fsblk);
497  extern int read_block_from_log(journal_t *journal, struct buffer_head *bh,
498                                int op_flags, sector_t blk);
500 +extern void jbd2_jmap_do_clean_batch(struct work_struct *work);
503 + * Cleaner stuff is below.
504 + */
507 + * Number of blocks to read at once, for cleaning.
508 + */
509 +#define CLEANER_BATCH_SIZE 16
512 + * Context structure for the cleaner.
513 + */
514 +struct cleaner_ctx {
515 +       /*
516 +        * We set to true once we drop below low watermark and it stays so until
517 +        * we rise above the high watermark.  It is accessed by the commit
518 +        * thread and the foreground kernel threads during the journal
519 +        * destruction, therefore it is atomic.
520 +        */
521 +       atomic_t cleaning;
523 +       /*
524 +        * We clean in batches of blocks.  This flag indicates if we are
525 +        * currently cleaning a batch.  It is accessed by the commit thread and
526 +        * the cleaner thread, therefore it is atomic.
527 +        */
528 +       atomic_t batch_in_progress;
530 +       /*
531 +        * We find live blocks to clean from the live blocks list of the
532 +        * transaction at the tail.  This list can be larger than our batch size
533 +        * and we may need several attempts to process it.  We cache the
534 +        * position of the next entry to start from in |pos|.  Since cleaner
535 +        * thread can run concurrently with the commit thread that can modify
536 +        * the live blocks list of the transaction at the tail (for example, if
537 +        * it needs to drop a revoked entry or if |pos| points to an entry that
538 +        * has been updated and should move from the live blocks list of the
539 +        * transaction at the tail to the live blocks list of current
540 +        * transaction) we protect |pos| with |pos_lock|.
541 +        */
542 +       struct jmap_entry *pos;
543 +       spinlock_t pos_lock;
545 +       /*
546 +        * Live block mappings for the blocks that we copy in a batch.
547 +        */
548 +       struct blk_mapping mappings[CLEANER_BATCH_SIZE];
550 +       /*
551 +        * Buffer heads for the live blocks read in a batch.
552 +        */
553 +       struct buffer_head *bhs[CLEANER_BATCH_SIZE];
555 +       /*
556 +        * Number of pending reads in a batch.  Every submitted read increments
557 +        * it and every completed read decrements it.
558 +        */
559 +       atomic_t nr_pending_reads;
561 +       /*
562 +        * The cleaner thread sleeps on this wait queue until the last
563 +        * completed read wakes the up the cleaner thread.
564 +        */
565 +       wait_queue_head_t live_block_reads;
567 +       /* TODO: temporary for debugging, remove once done. */
568 +       atomic_t nr_txns_committed;
569 +       atomic_t nr_txns_cleaned;
571 +       journal_t *journal;
572 +       struct work_struct work;
575 +extern int jbd2_low_on_space(journal_t *journal);
576 +extern int jbd2_high_on_space(journal_t *journal);
577 +extern bool jbd2_cleaning(journal_t *journal);
578 +extern void jbd2_stop_cleaning(journal_t *journal);
579 +extern void jbd2_start_cleaning(journal_t *journal);
580 +extern bool jbd2_cleaning_batch_complete(journal_t *journal);
581 +extern bool jbd2_try_to_move_tail(journal_t *journal);
582  #endif
583 diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
584 index 50ab9b5bc7f4..9c097ddfe63f 100644
585 --- a/fs/jbd2/journal.c
586 +++ b/fs/jbd2/journal.c
587 @@ -229,6 +229,15 @@ static int kjournald2(void *arg)
588         }
590         wake_up(&journal->j_wait_done_commit);
592 +       if ((journal->j_flags & JBD2_LAZY) &&
593 +           (jbd2_cleaning(journal) || jbd2_low_on_space(journal))) {
594 +               if (jbd2_try_to_move_tail(journal) && jbd2_high_on_space(journal))
595 +                       jbd2_stop_cleaning(journal);
596 +               else
597 +                       jbd2_start_cleaning(journal);
598 +       }
600         if (freezing(current)) {
601                 /*
602                  * The simpler the better. Flushing journal isn't a
603 @@ -257,6 +266,9 @@ static int kjournald2(void *arg)
604                         should_sleep = 0;
605                 if (journal->j_flags & JBD2_UNMOUNT)
606                         should_sleep = 0;
607 +               if ((journal->j_flags & JBD2_LAZY) &&
608 +                   jbd2_cleaning_batch_complete(journal))
609 +                       should_sleep = 0;
610                 if (should_sleep) {
611                         write_unlock(&journal->j_state_lock);
612                         schedule();
613 diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
614 index a1d56bb9fa4f..fa6094124bcb 100644
615 --- a/include/linux/jbd2.h
616 +++ b/include/linux/jbd2.h
617 @@ -734,7 +734,8 @@ jbd2_time_diff(unsigned long start, unsigned long end)
618   * @j_superblock: Second part of superblock buffer
619   * @j_map: A map from file system blocks to log blocks
620   * @j_transaction_infos: An array of information structures per live transaction
621 - * @j_map_lock: Protect j_jmap and j_transaction_infos
622 + * @j_jmap_lock: Protect j_jmap and j_transaction_infos
623 + * @j_cleaner_ctx: Cleaner state
624   * @j_format_version: Version of the superblock format
625   * @j_state_lock: Protect the various scalars in the journal
626   * @j_barrier_count:  Number of processes waiting to create a barrier lock
627 @@ -819,6 +820,9 @@ struct journal_s
628         /* Protect j_jmap and j_transaction_infos */
629         rwlock_t                j_jmap_lock;
631 +       /* Cleaner state */
632 +       struct cleaner_ctx      *j_cleaner_ctx;
634         /* Version of the superblock format */
635         int                     j_format_version;